aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-01-13 19:58:01 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-01-13 19:58:01 +0000
commit050e163ae8b4bb6eb252b59e2f8f36e68ae9239d (patch)
tree7376a0c71aad05d327e5b1dcbceb3311a10f9f29
parent8a6c1c25bce0267ee4072bd7b786b921e8a66a35 (diff)
Vendor import of llvm trunk r257626:vendor/llvm/llvm-trunk-r257626
Notes
Notes: svn path=/vendor/llvm/dist/; revision=293838 svn path=/vendor/llvm/llvm-trunk-r257626/; revision=293839; tag=vendor/llvm/llvm-trunk-r257626
-rwxr-xr-xcmake/modules/AddLLVM.cmake6
-rw-r--r--cmake/modules/HandleLLVMOptions.cmake30
-rw-r--r--docs/AliasAnalysis.rst4
-rw-r--r--docs/CommandGuide/llvm-symbolizer.rst6
-rw-r--r--docs/ExceptionHandling.rst64
-rw-r--r--docs/GettingStartedVS.rst34
-rw-r--r--docs/LangRef.rst214
-rw-r--r--docs/Phabricator.rst8
-rw-r--r--docs/ProgrammersManual.rst6
-rw-r--r--examples/Kaleidoscope/Orc/fully_lazy/toy.cpp2
-rw-r--r--include/llvm/ADT/IntEqClasses.h8
-rw-r--r--include/llvm/ADT/PointerEmbeddedInt.h103
-rw-r--r--include/llvm/ADT/PointerIntPair.h11
-rw-r--r--include/llvm/ADT/PointerSumType.h205
-rw-r--r--include/llvm/ADT/Twine.h8
-rw-r--r--include/llvm/Analysis/LazyCallGraph.h91
-rw-r--r--include/llvm/Analysis/LoopInfo.h250
-rw-r--r--include/llvm/CodeGen/AsmPrinter.h2
-rw-r--r--include/llvm/CodeGen/DIE.h42
-rw-r--r--include/llvm/CodeGen/LiveInterval.h6
-rw-r--r--include/llvm/CodeGen/RegisterPressure.h39
-rw-r--r--include/llvm/CodeGen/WinEHFuncInfo.h5
-rw-r--r--include/llvm/DebugInfo/Symbolize/DIPrinter.h9
-rw-r--r--include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h106
-rw-r--r--include/llvm/ExecutionEngine/Orc/IndirectionUtils.h10
-rw-r--r--include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h4
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h (renamed from include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h)77
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcError.h37
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h784
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h185
-rw-r--r--include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h432
-rw-r--r--include/llvm/ExecutionEngine/Orc/RPCChannel.h179
-rw-r--r--include/llvm/ExecutionEngine/Orc/RPCUtils.h266
-rw-r--r--include/llvm/ExecutionEngine/RTDyldMemoryManager.h4
-rw-r--r--include/llvm/ExecutionEngine/RuntimeDyld.h44
-rw-r--r--include/llvm/IR/Attributes.td5
-rw-r--r--include/llvm/IR/Function.h11
-rw-r--r--include/llvm/IR/IRBuilder.h4
-rw-r--r--include/llvm/IR/Intrinsics.td2
-rw-r--r--include/llvm/IR/IntrinsicsX86.td384
-rw-r--r--include/llvm/IR/LLVMContext.h11
-rw-r--r--include/llvm/IR/Metadata.h21
-rw-r--r--include/llvm/InitializePasses.h3
-rw-r--r--include/llvm/LinkAllPasses.h3
-rw-r--r--include/llvm/Linker/Linker.h7
-rw-r--r--include/llvm/MC/MCExpr.h3
-rw-r--r--include/llvm/MC/MCObjectFileInfo.h2
-rw-r--r--include/llvm/MC/MCStreamer.h4
-rw-r--r--include/llvm/Object/COFF.h3
-rw-r--r--include/llvm/Object/ELFObjectFile.h10
-rw-r--r--include/llvm/Pass.h4
-rw-r--r--include/llvm/ProfileData/CoverageMapping.h113
-rw-r--r--include/llvm/ProfileData/InstrProf.h78
-rw-r--r--include/llvm/ProfileData/InstrProfData.inc67
-rw-r--r--include/llvm/ProfileData/SampleProf.h55
-rw-r--r--include/llvm/Support/ARMTargetParser.def2
-rw-r--r--include/llvm/Support/Allocator.h3
-rw-r--r--include/llvm/Support/COFF.h9
-rw-r--r--include/llvm/Support/ELF.h17
-rw-r--r--include/llvm/Support/ELFRelocs/WebAssembly.def8
-rw-r--r--include/llvm/Support/GenericDomTree.h12
-rw-r--r--include/llvm/Support/MathExtras.h19
-rw-r--r--include/llvm/Transforms/IPO.h18
-rw-r--r--include/llvm/Transforms/Utils/Cloning.h36
-rw-r--r--include/llvm/Transforms/Utils/Local.h3
-rw-r--r--include/llvm/module.modulemap1
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp30
-rw-r--r--lib/Analysis/CallGraphSCCPass.cpp7
-rw-r--r--lib/Analysis/GlobalsModRef.cpp15
-rw-r--r--lib/Analysis/InstructionSimplify.cpp199
-rw-r--r--lib/Analysis/LoopAccessAnalysis.cpp1
-rw-r--r--lib/Analysis/LoopInfo.cpp6
-rw-r--r--lib/Analysis/LoopPass.cpp27
-rw-r--r--lib/Analysis/ScopedNoAliasAA.cpp2
-rw-r--r--lib/Analysis/TypeBasedAliasAnalysis.cpp2
-rw-r--r--lib/Analysis/ValueTracking.cpp12
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp23
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp15
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp32
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp33
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp80
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h16
-rw-r--r--lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp23
-rw-r--r--lib/CodeGen/BranchFolding.cpp15
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp16
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp151
-rw-r--r--lib/CodeGen/LiveInterval.cpp10
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp2
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp2
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp4
-rw-r--r--lib/CodeGen/MachineInstr.cpp38
-rw-r--r--lib/CodeGen/MachineLICM.cpp87
-rw-r--r--lib/CodeGen/MachineVerifier.cpp2
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp2
-rw-r--r--lib/CodeGen/RegisterPressure.cpp117
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp26
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp14
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp18
-rw-r--r--lib/CodeGen/ShrinkWrap.cpp57
-rw-r--r--lib/CodeGen/StackColoring.cpp9
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp25
-rw-r--r--lib/DebugInfo/Symbolize/DIPrinter.cpp40
-rw-r--r--lib/ExecutionEngine/Orc/CMakeLists.txt4
-rw-r--r--lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp (renamed from lib/ExecutionEngine/Orc/OrcTargetSupport.cpp)4
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp2
-rw-r--r--lib/ExecutionEngine/Orc/OrcCBindingsStack.h3
-rw-r--r--lib/ExecutionEngine/Orc/OrcError.cpp57
-rw-r--r--lib/ExecutionEngine/Orc/OrcMCJITReplacement.h16
-rw-r--r--lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp83
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp46
-rw-r--r--lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h6
-rw-r--r--lib/ExecutionEngine/SectionMemoryManager.cpp3
-rw-r--r--lib/Fuzzer/FuzzerDriver.cpp4
-rw-r--r--lib/Fuzzer/FuzzerFlags.def4
-rw-r--r--lib/Fuzzer/FuzzerInterface.h17
-rw-r--r--lib/Fuzzer/FuzzerInternal.h9
-rw-r--r--lib/Fuzzer/FuzzerLoop.cpp23
-rw-r--r--lib/Fuzzer/FuzzerMutate.cpp90
-rw-r--r--lib/Fuzzer/FuzzerTraceState.cpp244
-rw-r--r--lib/Fuzzer/FuzzerUtil.cpp25
-rw-r--r--lib/Fuzzer/test/FuzzerUnittest.cpp35
-rw-r--r--lib/Fuzzer/test/MemcmpTest.cpp12
-rw-r--r--lib/Fuzzer/test/fuzzer-dfsan.test7
-rw-r--r--lib/Fuzzer/test/fuzzer-dict.test6
-rw-r--r--lib/Fuzzer/test/fuzzer-traces.test7
-rw-r--r--lib/Fuzzer/test/fuzzer.test6
-rw-r--r--lib/IR/AsmWriter.cpp2
-rw-r--r--lib/IR/Core.cpp2
-rw-r--r--lib/IR/Function.cpp44
-rw-r--r--lib/IR/IRPrintingPasses.cpp11
-rw-r--r--lib/IR/LLVMContext.cpp16
-rw-r--r--lib/IR/LLVMContextImpl.h7
-rw-r--r--lib/IR/LegacyPassManager.cpp13
-rw-r--r--lib/IR/Metadata.cpp2
-rw-r--r--lib/IR/Verifier.cpp317
-rw-r--r--lib/LTO/LTOCodeGenerator.cpp3
-rw-r--r--lib/Linker/IRMover.cpp24
-rw-r--r--lib/Linker/LinkModules.cpp157
-rw-r--r--lib/MC/MCExpr.cpp1
-rw-r--r--lib/MC/MCObjectFileInfo.cpp10
-rw-r--r--lib/MC/MCObjectStreamer.cpp2
-rw-r--r--lib/MC/WinCOFFObjectWriter.cpp8
-rw-r--r--lib/Object/COFFObjectFile.cpp24
-rw-r--r--lib/Object/ELF.cpp7
-rw-r--r--lib/ProfileData/CoverageMapping.cpp2
-rw-r--r--lib/ProfileData/CoverageMappingReader.cpp36
-rw-r--r--lib/ProfileData/InstrProf.cpp62
-rw-r--r--lib/ProfileData/InstrProfWriter.cpp16
-rw-r--r--lib/Support/Debug.cpp8
-rw-r--r--lib/Support/IntEqClasses.cpp4
-rw-r--r--lib/Support/Triple.cpp56
-rw-r--r--lib/Support/Windows/Path.inc1
-rw-r--r--lib/Support/Windows/Signals.inc5
-rw-r--r--lib/Support/Windows/WindowsSupport.h31
-rw-r--r--lib/Support/raw_ostream.cpp2
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.cpp57
-rw-r--r--lib/Target/AArch64/AArch64InstrInfo.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h4
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp54
-rw-r--r--lib/Target/AMDGPU/AMDGPUAsmPrinter.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPUCallingConv.td67
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.cpp300
-rw-r--r--lib/Target/AMDGPU/AMDGPUISelLowering.h9
-rw-r--r--lib/Target/AMDGPU/AMDGPUInstrInfo.td4
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp8
-rw-r--r--lib/Target/AMDGPU/CMakeLists.txt1
-rw-r--r--lib/Target/AMDGPU/EvergreenInstructions.td2
-rw-r--r--lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp1
-rw-r--r--lib/Target/AMDGPU/SIDefines.h2
-rw-r--r--lib/Target/AMDGPU/SIFixSGPRCopies.cpp2
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp14
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp132
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.h7
-rw-r--r--lib/Target/AMDGPU/SIInsertWaits.cpp16
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.cpp16
-rw-r--r--lib/Target/AMDGPU/SIInstrInfo.h3
-rw-r--r--lib/Target/AMDGPU/SIInstructions.td2
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.cpp6
-rw-r--r--lib/Target/AMDGPU/SIMachineFunctionInfo.h26
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.cpp1968
-rw-r--r--lib/Target/AMDGPU/SIMachineScheduler.h489
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.cpp96
-rw-r--r--lib/Target/AMDGPU/SIRegisterInfo.h6
-rw-r--r--lib/Target/AMDGPU/SITypeRewriter.cpp3
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp21
-rw-r--r--lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h2
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp23
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h9
-rw-r--r--lib/Target/ARM/ARMCallingConv.td15
-rw-r--r--lib/Target/ARM/ARMFastISel.cpp5
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp15
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp138
-rw-r--r--lib/Target/ARM/ARMISelLowering.h11
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td21
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td7
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td8
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td7
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td4
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.cpp3
-rw-r--r--lib/Target/ARM/ARMMachineFunctionInfo.h9
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp28
-rw-r--r--lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp3
-rw-r--r--lib/Target/ARM/Thumb1FrameLowering.h5
-rw-r--r--lib/Target/AVR/AVR.h54
-rw-r--r--lib/Target/AVR/AVRSelectionDAGInfo.h29
-rw-r--r--lib/Target/AVR/AVRTargetObjectFile.cpp40
-rw-r--r--lib/Target/AVR/AVRTargetObjectFile.h35
-rw-r--r--lib/Target/AVR/CMakeLists.txt1
-rw-r--r--lib/Target/Hexagon/CMakeLists.txt6
-rw-r--r--lib/Target/Hexagon/HexagonAsmPrinter.cpp6
-rw-r--r--lib/Target/Hexagon/HexagonBitSimplify.cpp26
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfoV4.td4
-rw-r--r--lib/Target/Hexagon/HexagonRDF.cpp60
-rw-r--r--lib/Target/Hexagon/HexagonRDF.h28
-rw-r--r--lib/Target/Hexagon/HexagonRDFOpt.cpp272
-rw-r--r--lib/Target/Hexagon/HexagonRegisterInfo.cpp2
-rw-r--r--lib/Target/Hexagon/HexagonTargetMachine.cpp12
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp30
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp19
-rw-r--r--lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h20
-rw-r--r--lib/Target/Hexagon/RDFCopy.cpp180
-rw-r--r--lib/Target/Hexagon/RDFCopy.h48
-rw-r--r--lib/Target/Hexagon/RDFDeadCode.cpp204
-rw-r--r--lib/Target/Hexagon/RDFDeadCode.h65
-rw-r--r--lib/Target/Hexagon/RDFGraph.cpp1716
-rw-r--r--lib/Target/Hexagon/RDFGraph.h841
-rw-r--r--lib/Target/Hexagon/RDFLiveness.cpp848
-rw-r--r--lib/Target/Hexagon/RDFLiveness.h106
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp19
-rw-r--r--lib/Target/Mips/MipsISelLowering.h1
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td4
-rw-r--r--lib/Target/Mips/MipsSEInstrInfo.cpp5
-rw-r--r--lib/Target/NVPTX/NVPTXISelLowering.cpp1
-rw-r--r--lib/Target/NVPTX/NVPTXTargetObjectFile.h3
-rw-r--r--lib/Target/PowerPC/PPCAsmPrinter.cpp100
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td15
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp43
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td15
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.cpp21
-rw-r--r--lib/Target/PowerPC/PPCMachineFunctionInfo.h4
-rw-r--r--lib/Target/PowerPC/PPCTLSDynamicCall.cpp7
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.cpp182
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.h3
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td3
-rw-r--r--lib/Target/WebAssembly/CMakeLists.txt3
-rw-r--r--lib/Target/WebAssembly/Disassembler/CMakeLists.txt3
-rw-r--r--lib/Target/WebAssembly/Disassembler/LLVMBuild.txt23
-rw-r--r--lib/Target/WebAssembly/Disassembler/Makefile16
-rw-r--r--lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp148
-rw-r--r--lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp85
-rw-r--r--lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h3
-rw-r--r--lib/Target/WebAssembly/LLVMBuild.txt3
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt1
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp5
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp26
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp7
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp90
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp83
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h42
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp94
-rw-r--r--lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h68
-rw-r--r--lib/Target/WebAssembly/Makefile2
-rw-r--r--lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp63
-rw-r--r--lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp153
-rw-r--r--lib/Target/WebAssembly/WebAssemblyISelLowering.cpp14
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrControl.td25
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp9
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrInfo.td33
-rw-r--r--lib/Target/WebAssembly/WebAssemblyInstrMemory.td246
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp35
-rw-r--r--lib/Target/WebAssembly/WebAssemblyMCInstLower.h3
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegStackify.cpp8
-rw-r--r--lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp18
-rw-r--r--lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp5
-rw-r--r--lib/Target/WebAssembly/known_gcc_test_failures.txt19
-rw-r--r--lib/Target/X86/X86.h7
-rw-r--r--lib/Target/X86/X86CallingConv.td6
-rw-r--r--lib/Target/X86/X86FastISel.cpp3
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp298
-rw-r--r--lib/Target/X86/X86ISelLowering.h19
-rw-r--r--lib/Target/X86/X86InstrAVX512.td114
-rw-r--r--lib/Target/X86/X86InstrExtension.td36
-rw-r--r--lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--lib/Target/X86/X86InstrInfo.td160
-rw-r--r--lib/Target/X86/X86InstrMPX.td28
-rw-r--r--lib/Target/X86/X86InstrSSE.td11
-rw-r--r--lib/Target/X86/X86IntrinsicsInfo.h148
-rw-r--r--lib/Target/X86/X86MachineFunctionInfo.h7
-rw-r--r--lib/Target/X86/X86OptimizeLEAs.cpp195
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp12
-rw-r--r--lib/Target/X86/X86RegisterInfo.h2
-rw-r--r--lib/Transforms/IPO/ForceFunctionAttrs.cpp2
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp166
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp24
-rw-r--r--lib/Transforms/IPO/IPO.cpp5
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp4
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp17
-rw-r--r--lib/Transforms/Instrumentation/InstrProfiling.cpp34
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp2
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp218
-rw-r--r--lib/Transforms/Scalar/LICM.cpp46
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp2
-rw-r--r--lib/Transforms/Scalar/LoopUnrollPass.cpp488
-rw-r--r--lib/Transforms/Scalar/MemCpyOptimizer.cpp35
-rw-r--r--lib/Transforms/Scalar/PlaceSafepoints.cpp2
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp3
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp102
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp61
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp9
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp16
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp72
-rw-r--r--lib/Transforms/Utils/Local.cpp31
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp4
-rw-r--r--lib/Transforms/Utils/LoopUtils.cpp2
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp101
-rw-r--r--lib/Transforms/Utils/SimplifyLibCalls.cpp165
-rw-r--r--lib/Transforms/Utils/ValueMapper.cpp13
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp44
-rw-r--r--test/Analysis/GlobalsModRef/nocapture.ll57
-rw-r--r--test/Analysis/LoopAccessAnalysis/interleave-innermost.ll29
-rw-r--r--test/Bitcode/compatibility.ll37
-rw-r--r--test/CodeGen/AArch64/arm64-misched-memdep-bug.ll22
-rw-r--r--test/CodeGen/AArch64/branch-folder-merge-mmos.ll33
-rw-r--r--test/CodeGen/AArch64/machine-combiner.ll258
-rw-r--r--test/CodeGen/AMDGPU/ctlz.ll269
-rw-r--r--test/CodeGen/AMDGPU/ctlz_zero_undef.ll197
-rw-r--r--test/CodeGen/AMDGPU/flat-scratch-reg.ll8
-rw-r--r--test/CodeGen/AMDGPU/fmin_legacy.ll4
-rw-r--r--test/CodeGen/AMDGPU/fsub.ll15
-rw-r--r--test/CodeGen/AMDGPU/hsa-globals.ll16
-rw-r--r--test/CodeGen/AMDGPU/hsa-note-no-func.ll6
-rw-r--r--test/CodeGen/AMDGPU/hsa.ll4
-rw-r--r--test/CodeGen/AMDGPU/inline-asm.ll11
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll3
-rw-r--r--test/CodeGen/AMDGPU/llvm.round.f64.ll2
-rw-r--r--test/CodeGen/AMDGPU/ret.ll245
-rw-r--r--test/CodeGen/AMDGPU/si-scheduler.ll55
-rw-r--r--test/CodeGen/AMDGPU/sint_to_fp.i64.ll62
-rw-r--r--test/CodeGen/AMDGPU/sint_to_fp.ll91
-rw-r--r--test/CodeGen/AMDGPU/udiv.ll89
-rw-r--r--test/CodeGen/AMDGPU/uint_to_fp.i64.ll57
-rw-r--r--test/CodeGen/AMDGPU/uint_to_fp.ll123
-rw-r--r--test/CodeGen/ARM/bit-reverse-to-rbit.ll34
-rw-r--r--test/CodeGen/ARM/cxx-tlscc.ll46
-rw-r--r--test/CodeGen/ARM/darwin-tls.ll165
-rw-r--r--test/CodeGen/ARM/fabs-to-bfc.ll14
-rw-r--r--test/CodeGen/ARM/fp16-args.ll7
-rw-r--r--test/CodeGen/ARM/fp16-v3.ll28
-rw-r--r--test/CodeGen/ARM/inlineasm-imm-thumb.ll20
-rw-r--r--test/CodeGen/ARM/inlineasm-imm-thumb2.ll31
-rw-r--r--test/CodeGen/ARM/zero-cycle-zero.ll58
-rw-r--r--test/CodeGen/Hexagon/bit-phi.ll58
-rw-r--r--test/CodeGen/Hexagon/postinc-offset.ll3
-rw-r--r--test/CodeGen/Hexagon/rdf-copy.ll54
-rw-r--r--test/CodeGen/Hexagon/rdf-dead-loop.ll31
-rw-r--r--test/CodeGen/Mips/llvm-ir/call.ll15
-rw-r--r--test/CodeGen/Mips/madd-msub.ll14
-rw-r--r--test/CodeGen/PowerPC/2016-01-07-BranchWeightCrash.ll35
-rw-r--r--test/CodeGen/PowerPC/ppc64le-localentry-large.ll27
-rw-r--r--test/CodeGen/PowerPC/ppc64le-localentry.ll20
-rw-r--r--test/CodeGen/PowerPC/pr25802.ll52
-rw-r--r--test/CodeGen/PowerPC/tls_get_addr_clobbers.ll54
-rw-r--r--test/CodeGen/PowerPC/tls_get_addr_stackframe.ll32
-rw-r--r--test/CodeGen/SPARC/2011-01-19-DelaySlot.ll2
-rw-r--r--test/CodeGen/SPARC/analyze-branch.ll58
-rw-r--r--test/CodeGen/WebAssembly/call.ll22
-rw-r--r--test/CodeGen/WebAssembly/cfg-stackify.ll817
-rw-r--r--test/CodeGen/WebAssembly/comparisons_f32.ll2
-rw-r--r--test/CodeGen/WebAssembly/comparisons_f64.ll2
-rw-r--r--test/CodeGen/WebAssembly/comparisons_i32.ll2
-rw-r--r--test/CodeGen/WebAssembly/comparisons_i64.ll2
-rw-r--r--test/CodeGen/WebAssembly/conv.ll2
-rw-r--r--test/CodeGen/WebAssembly/copysign-casts.ll2
-rw-r--r--test/CodeGen/WebAssembly/cpus.ll16
-rw-r--r--test/CodeGen/WebAssembly/dead-vreg.ll2
-rw-r--r--test/CodeGen/WebAssembly/f32.ll4
-rw-r--r--test/CodeGen/WebAssembly/f64.ll4
-rw-r--r--test/CodeGen/WebAssembly/fast-isel.ll2
-rw-r--r--test/CodeGen/WebAssembly/frem.ll6
-rw-r--r--test/CodeGen/WebAssembly/func.ll3
-rw-r--r--test/CodeGen/WebAssembly/global.ll20
-rw-r--r--test/CodeGen/WebAssembly/globl.ll2
-rw-r--r--test/CodeGen/WebAssembly/i32.ll2
-rw-r--r--test/CodeGen/WebAssembly/i64.ll2
-rw-r--r--test/CodeGen/WebAssembly/ident.ll2
-rw-r--r--test/CodeGen/WebAssembly/immediates.ll2
-rw-r--r--test/CodeGen/WebAssembly/inline-asm.ll7
-rw-r--r--test/CodeGen/WebAssembly/legalize.ll2
-rw-r--r--test/CodeGen/WebAssembly/load-ext.ll2
-rw-r--r--test/CodeGen/WebAssembly/load-store-i1.ll2
-rw-r--r--test/CodeGen/WebAssembly/load.ll2
-rw-r--r--test/CodeGen/WebAssembly/loop-idiom.ll2
-rw-r--r--test/CodeGen/WebAssembly/memory-addr32.ll2
-rw-r--r--test/CodeGen/WebAssembly/memory-addr64.ll2
-rw-r--r--test/CodeGen/WebAssembly/offset-folding.ll2
-rw-r--r--test/CodeGen/WebAssembly/offset.ll2
-rw-r--r--test/CodeGen/WebAssembly/phi.ll4
-rw-r--r--test/CodeGen/WebAssembly/reg-stackify.ll22
-rw-r--r--test/CodeGen/WebAssembly/return-int32.ll2
-rw-r--r--test/CodeGen/WebAssembly/return-void.ll2
-rw-r--r--test/CodeGen/WebAssembly/returned.ll10
-rw-r--r--test/CodeGen/WebAssembly/select.ll2
-rw-r--r--test/CodeGen/WebAssembly/signext-zeroext.ll6
-rw-r--r--test/CodeGen/WebAssembly/store-results.ll2
-rw-r--r--test/CodeGen/WebAssembly/store-trunc.ll2
-rw-r--r--test/CodeGen/WebAssembly/store.ll2
-rw-r--r--test/CodeGen/WebAssembly/switch.ll86
-rw-r--r--test/CodeGen/WebAssembly/unreachable.ll4
-rw-r--r--test/CodeGen/WebAssembly/unused-argument.ll4
-rw-r--r--test/CodeGen/WebAssembly/userstack.ll23
-rw-r--r--test/CodeGen/WebAssembly/varargs.ll4
-rw-r--r--test/CodeGen/WebAssembly/vtable.ll2
-rw-r--r--test/CodeGen/WinEH/wineh-cloning.ll42
-rw-r--r--test/CodeGen/WinEH/wineh-no-demotion.ll4
-rw-r--r--test/CodeGen/WinEH/wineh-statenumbering.ll2
-rw-r--r--test/CodeGen/X86/2008-11-03-F80VAARG.ll4
-rw-r--r--test/CodeGen/X86/2012-01-12-extract-sv.ll4
-rw-r--r--test/CodeGen/X86/avx-vbroadcast.ll431
-rw-r--r--test/CodeGen/X86/avx2-vbroadcast.ll763
-rw-r--r--test/CodeGen/X86/avx512-intrinsics.ll395
-rw-r--r--test/CodeGen/X86/avx512bw-intrinsics.ll215
-rw-r--r--test/CodeGen/X86/avx512bwvl-intrinsics.ll421
-rw-r--r--test/CodeGen/X86/avx512vl-intrinsics.ll1033
-rw-r--r--test/CodeGen/X86/catchpad-lifetime.ll91
-rw-r--r--test/CodeGen/X86/cxx_tlscc64.ll63
-rw-r--r--test/CodeGen/X86/dagcombine-cse.ll2
-rw-r--r--test/CodeGen/X86/f16c-intrinsics.ll12
-rw-r--r--test/CodeGen/X86/insertps-combine.ll33
-rw-r--r--test/CodeGen/X86/lea-opt.ll38
-rw-r--r--test/CodeGen/X86/pr13577.ll16
-rw-r--r--test/CodeGen/X86/scalar-int-to-fp.ll43
-rw-r--r--test/CodeGen/X86/shrinkwrap-hang.ll32
-rw-r--r--test/CodeGen/X86/stack-folding-fp-sse42.ll2
-rw-r--r--test/CodeGen/X86/statepoint-vector.ll162
-rw-r--r--test/CodeGen/X86/vec_uint_to_fp-fastmath.ll24
-rw-r--r--test/CodeGen/X86/version_directive.ll4
-rw-r--r--test/CodeGen/X86/x86-shrink-wrapping.ll105
-rw-r--r--test/DebugInfo/COFF/asm.ll73
-rw-r--r--test/DebugInfo/COFF/multifile.ll108
-rw-r--r--test/DebugInfo/COFF/multifunction.ll271
-rw-r--r--test/DebugInfo/COFF/simple.ll60
-rw-r--r--test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll2
-rw-r--r--test/DebugInfo/X86/debug-macro.ll67
-rw-r--r--test/DebugInfo/X86/debugger-tune.ll2
-rw-r--r--test/DebugInfo/X86/tls.ll2
-rw-r--r--test/ExecutionEngine/MCJIT/remote/cross-module-a.ll1
-rw-r--r--test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll14
-rw-r--r--test/ExecutionEngine/MCJIT/remote/multi-module-a.ll1
-rw-r--r--test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll10
-rw-r--r--test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll1
-rw-r--r--test/ExecutionEngine/MCJIT/remote/stubs-remote.ll2
-rw-r--r--test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll1
-rw-r--r--test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll1
-rw-r--r--test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll1
-rw-r--r--test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll1
-rw-r--r--test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll5
-rw-r--r--test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll1
-rw-r--r--test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll5
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll1
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/cross-module-sm-pic-a.ll14
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll1
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/multi-module-sm-pic-a.ll10
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll1
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll2
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll1
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll1
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll1
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll1
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll5
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll1
-rw-r--r--test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll5
-rw-r--r--test/Feature/exception.ll4
-rw-r--r--test/Instrumentation/MemorySanitizer/origin-array.ll23
-rw-r--r--test/Linker/Inputs/pr26037.ll23
-rw-r--r--test/Linker/pr26037.ll38
-rw-r--r--test/MC/ARM/twice.ll9
-rw-r--r--test/MC/COFF/timestamp.s1
-rw-r--r--test/MC/Disassembler/Mips/mips2/valid-mips2.txt1
-rw-r--r--test/MC/Disassembler/Mips/mips3/valid-mips3.txt1
-rw-r--r--test/MC/Disassembler/Mips/mips4/valid-mips4.txt1
-rw-r--r--test/MC/Disassembler/X86/avx-512.txt2
-rw-r--r--test/MC/Mips/mips1/valid.s1
-rw-r--r--test/MC/Mips/mips2/valid.s1
-rw-r--r--test/MC/Mips/mips3/valid.s1
-rw-r--r--test/MC/Mips/mips32/valid.s1
-rw-r--r--test/MC/Mips/mips32r2/valid.s1
-rw-r--r--test/MC/Mips/mips32r3/valid.s1
-rw-r--r--test/MC/Mips/mips32r5/valid.s1
-rw-r--r--test/MC/Mips/mips32r6/valid.s1
-rw-r--r--test/MC/Mips/mips4/valid.s1
-rw-r--r--test/MC/Mips/mips5/valid.s1
-rw-r--r--test/MC/Mips/mips64/valid.s1
-rw-r--r--test/MC/Mips/mips64r2/valid.s1
-rw-r--r--test/MC/Mips/mips64r3/valid.s1
-rw-r--r--test/MC/Mips/mips64r5/valid.s1
-rw-r--r--test/MC/Mips/mips64r6/valid.s1
-rw-r--r--test/MC/X86/avx512-encodings.s56
-rw-r--r--test/MC/X86/intel-syntax-avx512.s124
-rw-r--r--test/MC/X86/intel-syntax-x86-64-avx512f_vl.s104
-rw-r--r--test/MC/X86/x86-64-avx512dq.s48
-rw-r--r--test/Other/2010-05-06-Printer.ll14
-rw-r--r--test/TableGen/TwoLevelName.td24
-rw-r--r--test/Transforms/FunctionAttrs/norecurse.ll2
-rw-r--r--test/Transforms/FunctionImport/Inputs/funcimport.ll15
-rw-r--r--test/Transforms/FunctionImport/Inputs/funcimport_alias.ll7
-rw-r--r--test/Transforms/FunctionImport/funcimport.ll2
-rw-r--r--test/Transforms/FunctionImport/funcimport_alias.ll25
-rw-r--r--test/Transforms/FunctionImport/funcimport_debug.ll14
-rw-r--r--test/Transforms/IPConstantProp/PR16052.ll26
-rw-r--r--test/Transforms/IPConstantProp/PR26044.ll31
-rw-r--r--test/Transforms/Inline/attributes.ll84
-rw-r--r--test/Transforms/InstCombine/fast-math.ll69
-rw-r--r--test/Transforms/InstCombine/inline-intrinsic-assert.ll12
-rw-r--r--test/Transforms/InstCombine/insert-extract-shuffle.ll50
-rw-r--r--test/Transforms/InstCombine/log-pow.ll64
-rw-r--r--test/Transforms/InstCombine/no_cgscc_assert.ll5
-rw-r--r--test/Transforms/InstCombine/pow-exp.ll53
-rw-r--r--test/Transforms/InstCombine/pow-exp2.ll19
-rw-r--r--test/Transforms/InstCombine/pow-sqrt.ll14
-rw-r--r--test/Transforms/InstCombine/printf-3.ll39
-rw-r--r--test/Transforms/InstCombine/tan.ll19
-rw-r--r--test/Transforms/InstSimplify/floating-point-compare.ll41
-rw-r--r--test/Transforms/JumpThreading/pr26096.ll68
-rw-r--r--test/Transforms/JumpThreading/select.ll37
-rw-r--r--test/Transforms/LoopUnroll/partial-unroll-optsize.ll3
-rw-r--r--test/Transforms/LoopUnroll/unloop.ll2
-rw-r--r--test/Transforms/MemCpyOpt/fca2memcpy.ll24
-rw-r--r--test/Transforms/Reassociate/add_across_block_crash.ll20
-rw-r--r--test/Transforms/RewriteStatepointsForGC/constants.ll11
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector-nosplit.ll112
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll2
-rw-r--r--test/Transforms/RewriteStatepointsForGC/live-vector.ll2
-rw-r--r--test/Transforms/RewriteStatepointsForGC/two-invokes-one-landingpad.ll33
-rw-r--r--test/Transforms/SimplifyCFG/bug-25299.ll40
-rw-r--r--test/Transforms/SimplifyCFG/invoke_unwind.ll42
-rw-r--r--test/Transforms/Util/split-bit-piece.ll45
-rw-r--r--test/Verifier/gc_relocate_return.ll5
-rw-r--r--test/Verifier/invalid-eh.ll274
-rw-r--r--test/lit.cfg6
-rw-r--r--test/lit.site.cfg.in1
-rw-r--r--test/tools/llvm-lto/error.ll2
-rw-r--r--test/tools/llvm-objdump/Inputs/malformed-macho.binbin0 -> 843 bytes
-rw-r--r--test/tools/llvm-objdump/X86/macho-private-header.test6
-rw-r--r--test/tools/llvm-objdump/malformed-archives.test2
-rw-r--r--test/tools/llvm-objdump/malformed-macho.test2
-rw-r--r--test/tools/llvm-profdata/value-prof.proftext10
-rw-r--r--test/tools/llvm-readobj/codeview-linetables.test309
-rw-r--r--test/tools/llvm-symbolizer/Inputs/addr.inp2
-rw-r--r--test/tools/llvm-symbolizer/print_context.c22
-rw-r--r--test/tools/llvm-symbolizer/sym.test4
-rw-r--r--tools/lli/CMakeLists.txt3
-rw-r--r--tools/lli/ChildTarget/CMakeLists.txt7
-rw-r--r--tools/lli/ChildTarget/ChildTarget.cpp285
-rw-r--r--tools/lli/ChildTarget/Makefile4
-rw-r--r--tools/lli/OrcLazyJIT.cpp2
-rw-r--r--tools/lli/OrcLazyJIT.h4
-rw-r--r--tools/lli/RPCChannel.h49
-rw-r--r--tools/lli/RemoteJITUtils.h131
-rw-r--r--tools/lli/RemoteMemoryManager.cpp174
-rw-r--r--tools/lli/RemoteMemoryManager.h101
-rw-r--r--tools/lli/RemoteTarget.cpp71
-rw-r--r--tools/lli/RemoteTarget.h122
-rw-r--r--tools/lli/RemoteTargetExternal.cpp327
-rw-r--r--tools/lli/RemoteTargetExternal.h143
-rw-r--r--tools/lli/RemoteTargetMessage.h85
-rw-r--r--tools/lli/Unix/RPCChannel.inc122
-rw-r--r--tools/lli/Windows/RPCChannel.inc29
-rw-r--r--tools/lli/lli.cpp179
-rw-r--r--tools/llvm-lto/llvm-lto.cpp1
-rw-r--r--tools/llvm-objdump/COFFDump.cpp19
-rw-r--r--tools/llvm-objdump/MachODump.cpp42
-rw-r--r--tools/llvm-objdump/llvm-objdump.cpp41
-rw-r--r--tools/llvm-objdump/llvm-objdump.h2
-rw-r--r--tools/llvm-readobj/COFFDumper.cpp42
-rw-r--r--tools/llvm-readobj/ELFDumper.cpp3
-rw-r--r--tools/llvm-symbolizer/llvm-symbolizer.cpp40
-rw-r--r--unittests/ADT/CMakeLists.txt2
-rw-r--r--unittests/ADT/PointerEmbeddedIntTest.cpp46
-rw-r--r--unittests/ADT/PointerIntPairTest.cpp27
-rw-r--r--unittests/ADT/PointerSumTypeTest.cpp113
-rw-r--r--unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp5
-rw-r--r--unittests/ExecutionEngine/Orc/CMakeLists.txt1
-rw-r--r--unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp82
-rw-r--r--unittests/ExecutionEngine/Orc/OrcTestCommon.cpp6
-rw-r--r--unittests/ExecutionEngine/Orc/OrcTestCommon.h2
-rw-r--r--unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp147
-rw-r--r--unittests/IR/AsmWriterTest.cpp37
-rw-r--r--unittests/IR/CMakeLists.txt1
-rw-r--r--unittests/IR/IRBuilderTest.cpp16
-rw-r--r--unittests/ProfileData/InstrProfTest.cpp140
-rw-r--r--unittests/Support/MathExtrasTest.cpp54
-rw-r--r--utils/KillTheDoctor/KillTheDoctor.cpp6
-rw-r--r--utils/TableGen/AsmWriterEmitter.cpp163
-rw-r--r--utils/TableGen/AsmWriterInst.cpp6
-rw-r--r--utils/lit/lit/util.py9
602 files changed, 26329 insertions, 6787 deletions
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index bed81b28426e..b06e434a2487 100755
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -308,6 +308,8 @@ endfunction(set_windows_version_resource_properties)
# SHARED;STATIC
# STATIC by default w/o BUILD_SHARED_LIBS.
# SHARED by default w/ BUILD_SHARED_LIBS.
+# OBJECT
+# Also create an OBJECT library target. Default if STATIC && SHARED.
# MODULE
# Target ${name} might not be created on unsupported platforms.
# Check with "if(TARGET ${name})".
@@ -329,7 +331,7 @@ endfunction(set_windows_version_resource_properties)
# )
function(llvm_add_library name)
cmake_parse_arguments(ARG
- "MODULE;SHARED;STATIC;DISABLE_LLVM_LINK_LLVM_DYLIB;SONAME"
+ "MODULE;SHARED;STATIC;OBJECT;DISABLE_LLVM_LINK_LLVM_DYLIB;SONAME"
"OUTPUT_NAME"
"ADDITIONAL_HEADERS;DEPENDS;LINK_COMPONENTS;LINK_LIBS;OBJLIBS"
${ARGN})
@@ -362,7 +364,7 @@ function(llvm_add_library name)
endif()
# Generate objlib
- if(ARG_SHARED AND ARG_STATIC)
+ if((ARG_SHARED AND ARG_STATIC) OR ARG_OBJECT)
# Generate an obj library for both targets.
set(obj_name "obj.${name}")
add_library(${obj_name} OBJECT EXCLUDE_FROM_ALL
diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake
index 4c5ffe2f7b28..6db258ff66a1 100644
--- a/cmake/modules/HandleLLVMOptions.cmake
+++ b/cmake/modules/HandleLLVMOptions.cmake
@@ -363,6 +363,36 @@ if( MSVC )
append("/Zc:inline" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+ if (NOT LLVM_ENABLE_TIMESTAMPS AND CMAKE_CXX_COMPILER_ID MATCHES "Clang")
+ # clang-cl and cl by default produce non-deterministic binaries because
+ # link.exe /incremental requires a timestamp in the .obj file. clang-cl
+ # has the flag /Brepro to force deterministic binaries, so pass that when
+ # LLVM_ENABLE_TIMESTAMPS is turned off.
+ # This checks CMAKE_CXX_COMPILER_ID in addition to check_cxx_compiler_flag()
+ # because cl.exe does not emit an error on flags it doesn't understand,
+ # letting check_cxx_compiler_flag() claim it understands all flags.
+ check_cxx_compiler_flag("/Brepro" SUPPORTS_BREPRO)
+ append_if(SUPPORTS_BREPRO "/Brepro" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
+
+ if (SUPPORTS_BREPRO)
+ # Check if /INCREMENTAL is passed to the linker and complain that it
+ # won't work with /Brepro.
+ string(TOUPPER "${CMAKE_EXE_LINKER_FLAGS}" upper_exe_flags)
+ string(TOUPPER "${CMAKE_MODULE_LINKER_FLAGS}" upper_module_flags)
+ string(TOUPPER "${CMAKE_SHARED_LINKER_FLAGS}" upper_shared_flags)
+
+ string(FIND "${upper_exe_flags}" "/INCREMENTAL" exe_index)
+ string(FIND "${upper_module_flags}" "/INCREMENTAL" module_index)
+ string(FIND "${upper_shared_flags}" "/INCREMENTAL" shared_index)
+
+ if (${exe_index} GREATER -1 OR
+ ${module_index} GREATER -1 OR
+ ${shared_index} GREATER -1)
+ message(FATAL_ERROR "LLVM_ENABLE_TIMESTAMPS not compatible with /INCREMENTAL linking")
+ endif()
+ endif()
+ endif()
+
# Disable sized deallocation if the flag is supported. MSVC fails to compile
# the operator new overload in User otherwise.
check_c_compiler_flag("/WX /Zc:sizedDealloc-" SUPPORTS_SIZED_DEALLOC)
diff --git a/docs/AliasAnalysis.rst b/docs/AliasAnalysis.rst
index fe7fcbd4bc50..e055b4e1afbc 100644
--- a/docs/AliasAnalysis.rst
+++ b/docs/AliasAnalysis.rst
@@ -190,7 +190,7 @@ this property are side-effect free, only depending on their input arguments and
the state of memory when they are called. This property allows calls to these
functions to be eliminated and moved around, as long as there is no store
instruction that changes the contents of memory. Note that all functions that
-satisfy the ``doesNotAccessMemory`` method also satisfies ``onlyReadsMemory``.
+satisfy the ``doesNotAccessMemory`` method also satisfy ``onlyReadsMemory``.
Writing a new ``AliasAnalysis`` Implementation
==============================================
@@ -634,7 +634,7 @@ transformations:
* It uses mod/ref information to hoist function calls out of loops that do not
write to memory and are loop-invariant.
-* If uses alias information to promote memory objects that are loaded and stored
+* It uses alias information to promote memory objects that are loaded and stored
to in loops to live in a register instead. It can do this if there are no may
aliases to the loaded/stored memory location.
diff --git a/docs/CommandGuide/llvm-symbolizer.rst b/docs/CommandGuide/llvm-symbolizer.rst
index ec4178e4e7ab..7bcad1c12f11 100644
--- a/docs/CommandGuide/llvm-symbolizer.rst
+++ b/docs/CommandGuide/llvm-symbolizer.rst
@@ -11,9 +11,9 @@ DESCRIPTION
:program:`llvm-symbolizer` reads object file names and addresses from standard
input and prints corresponding source code locations to standard output.
-If object file is specified in command line, :program:`llvm-symbolizer` reads
-only addresses from standard input. This
-program uses debug info sections and symbol table in the object files.
+If object file is specified in command line, :program:`llvm-symbolizer`
+processes only addresses from standard input, the rest is output verbatim.
+This program uses debug info sections and symbol table in the object files.
EXAMPLE
--------
diff --git a/docs/ExceptionHandling.rst b/docs/ExceptionHandling.rst
index 74827c02a272..41dd4b606b15 100644
--- a/docs/ExceptionHandling.rst
+++ b/docs/ExceptionHandling.rst
@@ -775,3 +775,67 @@ C++ code:
The "inner" ``catchswitch`` consumes ``%1`` which is produced by the outer
catchswitch.
+
+.. _wineh-constraints:
+
+Funclet transitions
+-----------------------
+
+The EH tables for personalities that use funclets make implicit use of the
+funclet nesting relationship to encode unwind destinations, and so are
+constrained in the set of funclet transitions they can represent. The related
+LLVM IR instructions accordingly have constraints that ensure encodability of
+the EH edges in the flow graph.
+
+A ``catchswitch``, ``catchpad``, or ``cleanuppad`` is said to be "entered"
+when it executes. It may subsequently be "exited" by any of the following
+means:
+
+* A ``catchswitch`` is immediately exited when none of its constituent
+ ``catchpad``\ s are appropriate for the in-flight exception and it unwinds
+ to its unwind destination or the caller.
+* A ``catchpad`` and its parent ``catchswitch`` are both exited when a
+ ``catchret`` from the ``catchpad`` is executed.
+* A ``cleanuppad`` is exited when a ``cleanupret`` from it is executed.
+* Any of these pads is exited when control unwinds to the function's caller,
+ either by a ``call`` which unwinds all the way to the function's caller,
+ a nested ``catchswitch`` marked "``unwinds to caller``", or a nested
+ ``cleanuppad``\ 's ``cleanupret`` marked "``unwinds to caller"``.
+* Any of these pads is exited when an unwind edge (from an ``invoke``,
+ nested ``catchswitch``, or nested ``cleanuppad``\ 's ``cleanupret``)
+ unwinds to a destination pad that is not a descendant of the given pad.
+
+Note that the ``ret`` instruction is *not* a valid way to exit a funclet pad;
+it is undefined behavior to execute a ``ret`` when a pad has been entered but
+not exited.
+
+A single unwind edge may exit any number of pads (with the restrictions that
+the edge from a ``catchswitch`` must exit at least itself, and the edge from
+a ``cleanupret`` must exit at least its ``cleanuppad``), and then must enter
+exactly one pad, which must be distinct from all the exited pads. The parent
+of the pad that an unwind edge enters must be the most-recently-entered
+not-yet-exited pad (after exiting from any pads that the unwind edge exits),
+or "none" if there is no such pad. This ensures that the stack of executing
+funclets at run-time always corresponds to some path in the funclet pad tree
+that the parent tokens encode.
+
+All unwind edges which exit any given funclet pad (including ``cleanupret``
+edges exiting their ``cleanuppad`` and ``catchswitch`` edges exiting their
+``catchswitch``) must share the same unwind destination. Similarly, any
+funclet pad which may be exited by unwind to caller must not be exited by
+any exception edges which unwind anywhere other than the caller. This
+ensures that each funclet as a whole has only one unwind destination, which
+EH tables for funclet personalities may require. Note that any unwind edge
+which exits a ``catchpad`` also exits its parent ``catchswitch``, so this
+implies that for any given ``catchswitch``, its unwind destination must also
+be the unwind destination of any unwind edge that exits any of its constituent
+``catchpad``\s. Because ``catchswitch`` has no ``nounwind`` variant, and
+because IR producers are not *required* to annotate calls which will not
+unwind as ``nounwind``, it is legal to nest a ``call`` or an "``unwind to
+caller``\ " ``catchswitch`` within a funclet pad that has an unwind
+destination other than caller; it is undefined behavior for such a ``call``
+or ``catchswitch`` to unwind.
+
+Finally, the funclet pads' unwind destinations cannot form a cycle. This
+ensures that EH lowering can construct "try regions" with a tree-like
+structure, which funclet-based personalities may require.
diff --git a/docs/GettingStartedVS.rst b/docs/GettingStartedVS.rst
index 63e81f5165df..0ca50904ce44 100644
--- a/docs/GettingStartedVS.rst
+++ b/docs/GettingStartedVS.rst
@@ -12,26 +12,20 @@ Welcome to LLVM on Windows! This document only covers LLVM on Windows using
Visual Studio, not mingw or cygwin. In order to get started, you first need to
know some basic information.
-There are many different projects that compose LLVM. The first is the LLVM
-suite. This contains all of the tools, libraries, and header files needed to
-use LLVM. It contains an assembler, disassembler,
-bitcode analyzer and bitcode optimizer. It also contains a test suite that can
-be used to test the LLVM tools.
-
-Another useful project on Windows is `Clang <http://clang.llvm.org/>`_.
-Clang is a C family ([Objective]C/C++) compiler. Clang mostly works on
-Windows, but does not currently understand all of the Microsoft extensions
-to C and C++. Because of this, clang cannot parse the C++ standard library
-included with Visual Studio, nor parts of the Windows Platform SDK. However,
-most standard C programs do compile. Clang can be used to emit bitcode,
-directly emit object files or even linked executables using Visual Studio's
-``link.exe``.
-
-The large LLVM test suite cannot be run on the Visual Studio port at this
-time.
-
-Most of the tools build and work. ``bugpoint`` does build, but does
-not work.
+There are many different projects that compose LLVM. The first piece is the
+LLVM suite. This contains all of the tools, libraries, and header files needed
+to use LLVM. It contains an assembler, disassembler, bitcode analyzer and
+bitcode optimizer. It also contains basic regression tests that can be used to
+test the LLVM tools and the Clang front end.
+
+The second piece is the `Clang <http://clang.llvm.org/>`_ front end. This
+component compiles C, C++, Objective C, and Objective C++ code into LLVM
+bitcode. Clang typically uses LLVM libraries to optimize the bitcode and emit
+machine code. LLVM fully supports the COFF object file format, which is
+compatible with all other existing Windows toolchains.
+
+The last major part of LLVM, the execution Test Suite, does not run on Windows,
+and this document does not discuss it.
Additional information about the LLVM directory structure and tool chain
can be found on the main :doc:`GettingStarted` page.
diff --git a/docs/LangRef.rst b/docs/LangRef.rst
index 103d876b3cef..5f8a3a5a4a98 100644
--- a/docs/LangRef.rst
+++ b/docs/LangRef.rst
@@ -1579,6 +1579,8 @@ caller's deoptimization state to the callee's deoptimization state is
semantically equivalent to composing the caller's deoptimization
continuation after the callee's deoptimization continuation.
+.. _ob_funclet:
+
Funclet Operand Bundles
^^^^^^^^^^^^^^^^^^^^^^^
@@ -1588,6 +1590,18 @@ is within a particular funclet. There can be at most one
``"funclet"`` operand bundle attached to a call site and it must have
exactly one bundle operand.
+If any funclet EH pads have been "entered" but not "exited" (per the
+`description in the EH doc\ <ExceptionHandling.html#wineh-constraints>`_),
+it is undefined behavior to execute a ``call`` or ``invoke`` which:
+
+* does not have a ``"funclet"`` bundle and is not a ``call`` to a nounwind
+ intrinsic, or
+* has a ``"funclet"`` bundle whose operand is not the most-recently-entered
+ not-yet-exited funclet EH pad.
+
+Similarly, if no funclet EH pads have been entered-but-not-yet-exited,
+executing a ``call`` or ``invoke`` with a ``"funclet"`` bundle is undefined behavior.
+
.. _moduleasm:
Module-Level Inline Assembly
@@ -5404,10 +5418,12 @@ The ``parent`` argument is the token of the funclet that contains the
``catchswitch`` instruction. If the ``catchswitch`` is not inside a funclet,
this operand may be the token ``none``.
-The ``default`` argument is the label of another basic block beginning with a
-"pad" instruction, one of ``cleanuppad`` or ``catchswitch``.
+The ``default`` argument is the label of another basic block beginning with
+either a ``cleanuppad`` or ``catchswitch`` instruction. This unwind destination
+must be a legal target with respect to the ``parent`` links, as described in
+the `exception handling documentation\ <ExceptionHandling.html#wineh-constraints>`_.
-The ``handlers`` are a list of successor blocks that each begin with a
+The ``handlers`` are a nonempty list of successor blocks that each begin with a
:ref:`catchpad <i_catchpad>` instruction.
Semantics:
@@ -5431,82 +5447,6 @@ Example:
dispatch2:
%cs2 = catchswitch within %parenthandler [label %handler0] unwind label %cleanup
-.. _i_catchpad:
-
-'``catchpad``' Instruction
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-Syntax:
-"""""""
-
-::
-
- <resultval> = catchpad within <catchswitch> [<args>*]
-
-Overview:
-"""""""""
-
-The '``catchpad``' instruction is used by `LLVM's exception handling
-system <ExceptionHandling.html#overview>`_ to specify that a basic block
-begins a catch handler --- one where a personality routine attempts to transfer
-control to catch an exception.
-
-Arguments:
-""""""""""
-
-The ``catchswitch`` operand must always be a token produced by a
-:ref:`catchswitch <i_catchswitch>` instruction in a predecessor block. This
-ensures that each ``catchpad`` has exactly one predecessor block, and it always
-terminates in a ``catchswitch``.
-
-The ``args`` correspond to whatever information the personality routine
-requires to know if this is an appropriate handler for the exception. Control
-will transfer to the ``catchpad`` if this is the first appropriate handler for
-the exception.
-
-The ``resultval`` has the type :ref:`token <t_token>` and is used to match the
-``catchpad`` to corresponding :ref:`catchrets <i_catchret>` and other nested EH
-pads.
-
-Semantics:
-""""""""""
-
-When the call stack is being unwound due to an exception being thrown, the
-exception is compared against the ``args``. If it doesn't match, control will
-not reach the ``catchpad`` instruction. The representation of ``args`` is
-entirely target and personality function-specific.
-
-Like the :ref:`landingpad <i_landingpad>` instruction, the ``catchpad``
-instruction must be the first non-phi of its parent basic block.
-
-The meaning of the tokens produced and consumed by ``catchpad`` and other "pad"
-instructions is described in the
-`Windows exception handling documentation <ExceptionHandling.html#wineh>`.
-
-Executing a ``catchpad`` instruction constitutes "entering" that pad.
-The pad may then be "exited" in one of three ways:
-
-1) explicitly via a ``catchret`` that consumes it. Executing such a ``catchret``
- is undefined behavior if any descendant pads have been entered but not yet
- exited.
-2) implicitly via a call (which unwinds all the way to the current function's caller),
- or via a ``catchswitch`` or a ``cleanupret`` that unwinds to caller.
-3) implicitly via an unwind edge whose destination EH pad isn't a descendant of
- the ``catchpad``. When the ``catchpad`` is exited in this manner, it is
- undefined behavior if the destination EH pad has a parent which is not an
- ancestor of the ``catchpad`` being exited.
-
-Example:
-""""""""
-
-.. code-block:: llvm
-
- dispatch:
- %cs = catchswitch within none [label %handler0] unwind to caller
- ;; A catch block which can catch an integer.
- handler0:
- %tok = catchpad within %cs [i8** @_ZTIi]
-
.. _i_catchret:
'``catchret``' Instruction
@@ -5543,11 +5483,10 @@ unwinding was interrupted with a :ref:`catchpad <i_catchpad>` instruction. The
code to, for example, destroy the active exception. Control then transfers to
``normal``.
-The ``token`` argument must be a token produced by a dominating ``catchpad``
-instruction. The ``catchret`` destroys the physical frame established by
-``catchpad``, so executing multiple returns on the same token without
-re-executing the ``catchpad`` will result in undefined behavior.
-See :ref:`catchpad <i_catchpad>` for more details.
+The ``token`` argument must be a token produced by a ``catchpad`` instruction.
+If the specified ``catchpad`` is not the most-recently-entered not-yet-exited
+funclet pad (as described in the `EH documentation\ <ExceptionHandling.html#wineh-constraints>`_),
+the ``catchret``'s behavior is undefined.
Example:
""""""""
@@ -5581,7 +5520,15 @@ Arguments:
The '``cleanupret``' instruction requires one argument, which indicates
which ``cleanuppad`` it exits, and must be a :ref:`cleanuppad <i_cleanuppad>`.
-It also has an optional successor, ``continue``.
+If the specified ``cleanuppad`` is not the most-recently-entered not-yet-exited
+funclet pad (as described in the `EH documentation\ <ExceptionHandling.html#wineh-constraints>`_),
+the ``cleanupret``'s behavior is undefined.
+
+The '``cleanupret``' instruction also has an optional successor, ``continue``,
+which must be the label of another basic block beginning with either a
+``cleanuppad`` or ``catchswitch`` instruction. This unwind destination must
+be a legal target with respect to the ``parent`` links, as described in the
+`exception handling documentation\ <ExceptionHandling.html#wineh-constraints>`_.
Semantics:
""""""""""
@@ -5591,13 +5538,6 @@ The '``cleanupret``' instruction indicates to the
:ref:`cleanuppad <i_cleanuppad>` it transferred control to has ended.
It transfers control to ``continue`` or unwinds out of the function.
-The unwind destination ``continue``, if present, must be an EH pad
-whose parent is either ``none`` or an ancestor of the ``cleanuppad``
-being returned from. This constitutes an exceptional exit from all
-ancestors of the completed ``cleanuppad``, up to but not including
-the parent of ``continue``.
-See :ref:`cleanuppad <i_cleanuppad>` for more details.
-
Example:
""""""""
@@ -6996,7 +6936,7 @@ name ``<index>`` corresponding to a metadata node with one ``i32`` entry of
value 1. The existence of the ``!nontemporal`` metadata on the instruction
tells the optimizer and code generator that this load is not expected to
be reused in the cache. The code generator may select special
-instructions to save cache bandwidth, such as the MOVNT instruction on
+instructions to save cache bandwidth, such as the ``MOVNT`` instruction on
x86.
The optional ``!invariant.group`` metadata must reference a
@@ -8590,6 +8530,74 @@ Example:
catch i8** @_ZTIi
filter [1 x i8**] [@_ZTId]
+.. _i_catchpad:
+
+'``catchpad``' Instruction
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Syntax:
+"""""""
+
+::
+
+ <resultval> = catchpad within <catchswitch> [<args>*]
+
+Overview:
+"""""""""
+
+The '``catchpad``' instruction is used by `LLVM's exception handling
+system <ExceptionHandling.html#overview>`_ to specify that a basic block
+begins a catch handler --- one where a personality routine attempts to transfer
+control to catch an exception.
+
+Arguments:
+""""""""""
+
+The ``catchswitch`` operand must always be a token produced by a
+:ref:`catchswitch <i_catchswitch>` instruction in a predecessor block. This
+ensures that each ``catchpad`` has exactly one predecessor block, and it always
+terminates in a ``catchswitch``.
+
+The ``args`` correspond to whatever information the personality routine
+requires to know if this is an appropriate handler for the exception. Control
+will transfer to the ``catchpad`` if this is the first appropriate handler for
+the exception.
+
+The ``resultval`` has the type :ref:`token <t_token>` and is used to match the
+``catchpad`` to corresponding :ref:`catchrets <i_catchret>` and other nested EH
+pads.
+
+Semantics:
+""""""""""
+
+When the call stack is being unwound due to an exception being thrown, the
+exception is compared against the ``args``. If it doesn't match, control will
+not reach the ``catchpad`` instruction. The representation of ``args`` is
+entirely target and personality function-specific.
+
+Like the :ref:`landingpad <i_landingpad>` instruction, the ``catchpad``
+instruction must be the first non-phi of its parent basic block.
+
+The meaning of the tokens produced and consumed by ``catchpad`` and other "pad"
+instructions is described in the
+`Windows exception handling documentation\ <ExceptionHandling.html#wineh>`_.
+
+When a ``catchpad`` has been "entered" but not yet "exited" (as
+described in the `EH documentation\ <ExceptionHandling.html#wineh-constraints>`_),
+it is undefined behavior to execute a :ref:`call <i_call>` or :ref:`invoke <i_invoke>`
+that does not carry an appropriate :ref:`"funclet" bundle <ob_funclet>`.
+
+Example:
+""""""""
+
+.. code-block:: llvm
+
+ dispatch:
+ %cs = catchswitch within none [label %handler0] unwind to caller
+ ;; A catch block which can catch an integer.
+ handler0:
+ %tok = catchpad within %cs [i8** @_ZTIi]
+
.. _i_cleanuppad:
'``cleanuppad``' Instruction
@@ -8644,22 +8652,10 @@ The ``cleanuppad`` instruction has several restrictions:
- A basic block that is not a cleanup block may not include a
'``cleanuppad``' instruction.
-Executing a ``cleanuppad`` instruction constitutes "entering" that pad.
-The pad may then be "exited" in one of three ways:
-
-1) explicitly via a ``cleanupret`` that consumes it. Executing such a ``cleanupret``
- is undefined behavior if any descendant pads have been entered but not yet
- exited.
-2) implicitly via a call (which unwinds all the way to the current function's caller),
- or via a ``catchswitch`` or a ``cleanupret`` that unwinds to caller.
-3) implicitly via an unwind edge whose destination EH pad isn't a descendant of
- the ``cleanuppad``. When the ``cleanuppad`` is exited in this manner, it is
- undefined behavior if the destination EH pad has a parent which is not an
- ancestor of the ``cleanuppad`` being exited.
-
-It is undefined behavior for the ``cleanuppad`` to exit via an unwind edge which
-does not transitively unwind to the same destination as a constituent
-``cleanupret``.
+When a ``cleanuppad`` has been "entered" but not yet "exited" (as
+described in the `EH documentation\ <ExceptionHandling.html#wineh-constraints>`_),
+it is undefined behavior to execute a :ref:`call <i_call>` or :ref:`invoke <i_invoke>`
+that does not carry an appropriate :ref:`"funclet" bundle <ob_funclet>`.
Example:
""""""""
diff --git a/docs/Phabricator.rst b/docs/Phabricator.rst
index 73704d9b17d7..af1e4429fda9 100644
--- a/docs/Phabricator.rst
+++ b/docs/Phabricator.rst
@@ -136,7 +136,7 @@ reviewers, the ``Differential Revision``, etc from the review and commit it to t
arc commit --revision D<Revision>
-When committing an LLVM change that has been reviewed using
+When committing a change that has been reviewed using
Phabricator, the convention is for the commit message to end with the
line:
@@ -153,6 +153,12 @@ This allows people reading the version history to see the review for
context. This also allows Phabricator to detect the commit, close the
review, and add a link from the review to the commit.
+If you use ``git`` or ``svn`` to commit the change and forget to add the line
+to your commit message, you should close the review manually. In the web UI,
+under "Leap Into Action" put the SVN revision number in the Comment, set the
+Action to "Close Revision" and click Submit. Note the review must have been
+Accepted first.
+
Abandoning a change
-------------------
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index 44f76fef8f1f..665e30aeb676 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -408,6 +408,9 @@ Then you can run your pass like this:
'foo' debug type
$ opt < a.bc > /dev/null -mypass -debug-only=bar
'bar' debug type
+ $ opt < a.bc > /dev/null -mypass -debug-only=foo,bar
+ 'foo' debug type
+ 'bar' debug type
Of course, in practice, you should only set ``DEBUG_TYPE`` at the top of a file,
to specify the debug type for the entire module. Be careful that you only do
@@ -417,7 +420,8 @@ system in place to ensure that names do not conflict. If two different modules
use the same string, they will all be turned on when the name is specified.
This allows, for example, all debug information for instruction scheduling to be
enabled with ``-debug-only=InstrSched``, even if the source lives in multiple
-files.
+files. The name must not include a comma (,) as that is used to seperate the
+arguments of the ``-debug-only`` option.
For performance reasons, -debug-only is not available in optimized build
(``--enable-optimized``) of LLVM.
diff --git a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
index 78184f5d32cd..0371a3f76f69 100644
--- a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
+++ b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp
@@ -4,7 +4,7 @@
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
-#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
diff --git a/include/llvm/ADT/IntEqClasses.h b/include/llvm/ADT/IntEqClasses.h
index 8e75c48e3764..0baee2f11a79 100644
--- a/include/llvm/ADT/IntEqClasses.h
+++ b/include/llvm/ADT/IntEqClasses.h
@@ -53,10 +53,10 @@ public:
NumClasses = 0;
}
- /// join - Join the equivalence classes of a and b. After joining classes,
- /// findLeader(a) == findLeader(b).
- /// This requires an uncompressed map.
- void join(unsigned a, unsigned b);
+ /// Join the equivalence classes of a and b. After joining classes,
+ /// findLeader(a) == findLeader(b). This requires an uncompressed map.
+ /// Returns the new leader.
+ unsigned join(unsigned a, unsigned b);
/// findLeader - Compute the leader of a's equivalence class. This is the
/// smallest member of the class.
diff --git a/include/llvm/ADT/PointerEmbeddedInt.h b/include/llvm/ADT/PointerEmbeddedInt.h
new file mode 100644
index 000000000000..8781d1803ac7
--- /dev/null
+++ b/include/llvm/ADT/PointerEmbeddedInt.h
@@ -0,0 +1,103 @@
+//===- llvm/ADT/PointerEmbeddedInt.h ----------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_POINTEREMBEDDEDINT_H
+#define LLVM_ADT_POINTEREMBEDDEDINT_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <climits>
+
+namespace llvm {
+
+/// Utility to embed an integer into a pointer-like type. This is specifically
+/// intended to allow embedding integers where fewer bits are required than
+/// exist in a pointer, and the integer can participate in abstractions along
+/// side other pointer-like types. For example it can be placed into a \c
+/// PointerSumType or \c PointerUnion.
+///
+/// Note that much like pointers, an integer value of zero has special utility
+/// due to boolean conversions. For example, a non-null value can be tested for
+/// in the above abstractions without testing the particular active member.
+/// Also, the default constructed value zero initializes the integer.
+template <typename IntT, int Bits = sizeof(IntT) * CHAR_BIT>
+class PointerEmbeddedInt {
+ uintptr_t Value;
+
+ static_assert(Bits < sizeof(uintptr_t) * CHAR_BIT,
+ "Cannot embed more bits than we have in a pointer!");
+
+ enum : uintptr_t {
+ // We shift as many zeros into the value as we can while preserving the
+ // number of bits desired for the integer.
+ Shift = sizeof(uintptr_t) * CHAR_BIT - Bits,
+
+ // We also want to be able to mask out the preserved bits for asserts.
+ Mask = static_cast<uintptr_t>(-1) << Bits
+ };
+
+ friend class PointerLikeTypeTraits<PointerEmbeddedInt>;
+
+ explicit PointerEmbeddedInt(uintptr_t Value) : Value(Value) {}
+
+public:
+ PointerEmbeddedInt() : Value(0) {}
+
+ PointerEmbeddedInt(IntT I) : Value(static_cast<uintptr_t>(I) << Shift) {
+ assert((I & Mask) == 0 && "Integer has bits outside those preserved!");
+ }
+
+ PointerEmbeddedInt &operator=(IntT I) {
+ assert((I & Mask) == 0 && "Integer has bits outside those preserved!");
+ Value = static_cast<uintptr_t>(I) << Shift;
+ }
+
+ // Note that this imilict conversion additionally allows all of the basic
+ // comparison operators to work transparently, etc.
+ operator IntT() const { return static_cast<IntT>(Value >> Shift); }
+};
+
+// Provide pointer like traits to support use with pointer unions and sum
+// types.
+template <typename IntT, int Bits>
+class PointerLikeTypeTraits<PointerEmbeddedInt<IntT, Bits>> {
+ typedef PointerEmbeddedInt<IntT, Bits> T;
+
+public:
+ static inline void *getAsVoidPointer(const T &P) {
+ return reinterpret_cast<void *>(P.Value);
+ }
+ static inline T getFromVoidPointer(void *P) {
+ return T(reinterpret_cast<uintptr_t>(P));
+ }
+ static inline T getFromVoidPointer(const void *P) {
+ return T(reinterpret_cast<uintptr_t>(P));
+ }
+
+ enum { NumLowBitsAvailable = T::Shift };
+};
+
+// Teach DenseMap how to use PointerEmbeddedInt objects as keys if the Int type
+// itself can be a key.
+template <typename IntT, int Bits>
+struct DenseMapInfo<PointerEmbeddedInt<IntT, Bits>> {
+ typedef PointerEmbeddedInt<IntT, Bits> T;
+
+ typedef DenseMapInfo<IntT> IntInfo;
+
+ static inline T getEmptyKey() { return IntInfo::getEmptyKey(); }
+ static inline T getTombstoneKey() { return IntInfo::getTombstoneKey(); }
+ static unsigned getHashValue(const T &Arg) {
+ return IntInfo::getHashValue(Arg);
+ }
+ static bool isEqual(const T &LHS, const T &RHS) { return LHS == RHS; }
+};
+}
+
+#endif
diff --git a/include/llvm/ADT/PointerIntPair.h b/include/llvm/ADT/PointerIntPair.h
index 0058d85d1ae4..83fbf127e6da 100644
--- a/include/llvm/ADT/PointerIntPair.h
+++ b/include/llvm/ADT/PointerIntPair.h
@@ -55,20 +55,25 @@ public:
PointerTy getPointer() const { return Info::getPointer(Value); }
- IntType getInt() const { return (IntType)Info::getInt(Value); }
+ IntType getInt() const {
+ return (IntType)Info::getInt(Value);
+ }
void setPointer(PointerTy PtrVal) {
Value = Info::updatePointer(Value, PtrVal);
}
- void setInt(IntType IntVal) { Value = Info::updateInt(Value, IntVal); }
+ void setInt(IntType IntVal) {
+ Value = Info::updateInt(Value, static_cast<intptr_t>(IntVal));
+ }
void initWithPointer(PointerTy PtrVal) {
Value = Info::updatePointer(0, PtrVal);
}
void setPointerAndInt(PointerTy PtrVal, IntType IntVal) {
- Value = Info::updateInt(Info::updatePointer(0, PtrVal), IntVal);
+ Value = Info::updateInt(Info::updatePointer(0, PtrVal),
+ static_cast<intptr_t>(IntVal));
}
PointerTy const *getAddrOfPointer() const {
diff --git a/include/llvm/ADT/PointerSumType.h b/include/llvm/ADT/PointerSumType.h
new file mode 100644
index 000000000000..6b8618fc5a17
--- /dev/null
+++ b/include/llvm/ADT/PointerSumType.h
@@ -0,0 +1,205 @@
+//===- llvm/ADT/PointerSumType.h --------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_POINTERSUMTYPE_H
+#define LLVM_ADT_POINTERSUMTYPE_H
+
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+
+namespace llvm {
+
+/// A compile time pair of an integer tag and the pointer-like type which it
+/// indexes within a sum type. Also allows the user to specify a particular
+/// traits class for pointer types with custom behavior such as over-aligned
+/// allocation.
+template <uintptr_t N, typename PointerArgT,
+ typename TraitsArgT = PointerLikeTypeTraits<PointerArgT>>
+struct PointerSumTypeMember {
+ enum { Tag = N };
+ typedef PointerArgT PointerT;
+ typedef TraitsArgT TraitsT;
+};
+
+namespace detail {
+
+template <typename TagT, typename... MemberTs>
+struct PointerSumTypeHelper;
+
+}
+
+/// A sum type over pointer-like types.
+///
+/// This is a normal tagged union across pointer-like types that uses the low
+/// bits of the pointers to store the tag.
+///
+/// Each member of the sum type is specified by passing a \c
+/// PointerSumTypeMember specialization in the variadic member argument list.
+/// This allows the user to control the particular tag value associated with
+/// a particular type, use the same type for multiple different tags, and
+/// customize the pointer-like traits used for a particular member. Note that
+/// these *must* be specializations of \c PointerSumTypeMember, no other type
+/// will suffice, even if it provides a compatible interface.
+///
+/// This type implements all of the comparison operators and even hash table
+/// support by comparing the underlying storage of the pointer values. It
+/// doesn't support delegating to particular members for comparisons.
+///
+/// It also default constructs to a zero tag with a null pointer, whatever that
+/// would be. This means that the zero value for the tag type is significant
+/// and may be desireable to set to a state that is particularly desirable to
+/// default construct.
+///
+/// There is no support for constructing or accessing with a dynamic tag as
+/// that would fundamentally violate the type safety provided by the sum type.
+template <typename TagT, typename... MemberTs> class PointerSumType {
+ uintptr_t Value;
+
+ typedef detail::PointerSumTypeHelper<TagT, MemberTs...> HelperT;
+
+public:
+ PointerSumType() : Value(0) {}
+
+ /// A typed constructor for a specific tagged member of the sum type.
+ template <TagT N>
+ static PointerSumType
+ create(typename HelperT::template Lookup<N>::PointerT Pointer) {
+ PointerSumType Result;
+ void *V = HelperT::template Lookup<N>::TraitsT::getAsVoidPointer(Pointer);
+ assert((reinterpret_cast<uintptr_t>(V) & HelperT::TagMask) == 0 &&
+ "Pointer is insufficiently aligned to store the discriminant!");
+ Result.Value = reinterpret_cast<uintptr_t>(V) | N;
+ return Result;
+ }
+
+ TagT getTag() const { return static_cast<TagT>(Value & HelperT::TagMask); }
+
+ template <TagT N> bool is() const { return N == getTag(); }
+
+ template <TagT N> typename HelperT::template Lookup<N>::PointerT get() const {
+ void *P = is<N>() ? getImpl() : nullptr;
+ return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(P);
+ }
+
+ template <TagT N>
+ typename HelperT::template Lookup<N>::PointerT cast() const {
+ assert(is<N>() && "This instance has a different active member.");
+ return HelperT::template Lookup<N>::TraitsT::getFromVoidPointer(getImpl());
+ }
+
+ operator bool() const { return Value & HelperT::PointerMask; }
+ bool operator==(const PointerSumType &R) const { return Value == R.Value; }
+ bool operator!=(const PointerSumType &R) const { return Value != R.Value; }
+ bool operator<(const PointerSumType &R) const { return Value < R.Value; }
+ bool operator>(const PointerSumType &R) const { return Value > R.Value; }
+ bool operator<=(const PointerSumType &R) const { return Value <= R.Value; }
+ bool operator>=(const PointerSumType &R) const { return Value >= R.Value; }
+
+ uintptr_t getOpaqueValue() const { return Value; }
+
+protected:
+ void *getImpl() const {
+ return reinterpret_cast<void *>(Value & HelperT::PointerMask);
+ }
+};
+
+namespace detail {
+
+/// A helper template for implementing \c PointerSumType. It provides fast
+/// compile-time lookup of the member from a particular tag value, along with
+/// useful constants and compile time checking infrastructure..
+template <typename TagT, typename... MemberTs>
+struct PointerSumTypeHelper : MemberTs... {
+ // First we use a trick to allow quickly looking up information about
+ // a particular member of the sum type. This works because we arranged to
+ // have this type derive from all of the member type templates. We can select
+ // the matching member for a tag using type deduction during overload
+ // resolution.
+ template <TagT N, typename PointerT, typename TraitsT>
+ static PointerSumTypeMember<N, PointerT, TraitsT>
+ LookupOverload(PointerSumTypeMember<N, PointerT, TraitsT> *);
+ template <TagT N> static void LookupOverload(...);
+ template <TagT N> struct Lookup {
+ // Compute a particular member type by resolving the lookup helper ovorload.
+ typedef decltype(LookupOverload<N>(
+ static_cast<PointerSumTypeHelper *>(nullptr))) MemberT;
+
+ /// The Nth member's pointer type.
+ typedef typename MemberT::PointerT PointerT;
+
+ /// The Nth member's traits type.
+ typedef typename MemberT::TraitsT TraitsT;
+ };
+
+ // Next we need to compute the number of bits available for the discriminant
+ // by taking the min of the bits available for each member. Much of this
+ // would be amazingly easier with good constexpr support.
+ template <uintptr_t V, uintptr_t... Vs>
+ struct Min : std::integral_constant<
+ uintptr_t, (V < Min<Vs...>::value ? V : Min<Vs...>::value)> {
+ };
+ template <uintptr_t V>
+ struct Min<V> : std::integral_constant<uintptr_t, V> {};
+ enum { NumTagBits = Min<MemberTs::TraitsT::NumLowBitsAvailable...>::value };
+
+ // Also compute the smallest discriminant and various masks for convenience.
+ enum : uint64_t {
+ MinTag = Min<MemberTs::Tag...>::value,
+ PointerMask = static_cast<uint64_t>(-1) << NumTagBits,
+ TagMask = ~PointerMask
+ };
+
+ // Finally we need a recursive template to do static checks of each
+ // member.
+ template <typename MemberT, typename... InnerMemberTs>
+ struct Checker : Checker<InnerMemberTs...> {
+ static_assert(MemberT::Tag < (1 << NumTagBits),
+ "This discriminant value requires too many bits!");
+ };
+ template <typename MemberT> struct Checker<MemberT> : std::true_type {
+ static_assert(MemberT::Tag < (1 << NumTagBits),
+ "This discriminant value requires too many bits!");
+ };
+ static_assert(Checker<MemberTs...>::value,
+ "Each member must pass the checker.");
+};
+
+}
+
+// Teach DenseMap how to use PointerSumTypes as keys.
+template <typename TagT, typename... MemberTs>
+struct DenseMapInfo<PointerSumType<TagT, MemberTs...>> {
+ typedef PointerSumType<TagT, MemberTs...> SumType;
+
+ typedef detail::PointerSumTypeHelper<TagT, MemberTs...> HelperT;
+ enum { SomeTag = HelperT::MinTag };
+ typedef typename HelperT::template Lookup<HelperT::MinTag>::PointerT
+ SomePointerT;
+ typedef DenseMapInfo<SomePointerT> SomePointerInfo;
+
+ static inline SumType getEmptyKey() {
+ return SumType::create<SomeTag>(SomePointerInfo::getEmptyKey());
+ }
+ static inline SumType getTombstoneKey() {
+ return SumType::create<SomeTag>(
+ SomePointerInfo::getTombstoneKey());
+ }
+ static unsigned getHashValue(const SumType &Arg) {
+ uintptr_t OpaqueValue = Arg.getOpaqueValue();
+ return DenseMapInfo<uintptr_t>::getHashValue(OpaqueValue);
+ }
+ static bool isEqual(const SumType &LHS, const SumType &RHS) {
+ return LHS == RHS;
+ }
+};
+
+}
+
+#endif
diff --git a/include/llvm/ADT/Twine.h b/include/llvm/ADT/Twine.h
index db0bf4b68de8..81b1a6d946fc 100644
--- a/include/llvm/ADT/Twine.h
+++ b/include/llvm/ADT/Twine.h
@@ -101,15 +101,13 @@ namespace llvm {
/// A pointer to a SmallString instance.
SmallStringKind,
- /// A char value reinterpreted as a pointer, to render as a character.
+ /// A char value, to render as a character.
CharKind,
- /// An unsigned int value reinterpreted as a pointer, to render as an
- /// unsigned decimal integer.
+ /// An unsigned int value, to render as an unsigned decimal integer.
DecUIKind,
- /// An int value reinterpreted as a pointer, to render as a signed
- /// decimal integer.
+ /// An int value, to render as a signed decimal integer.
DecIKind,
/// A pointer to an unsigned long value, to render as an unsigned decimal
diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h
index ef3d5e8fe3df..e02f3ab2de1f 100644
--- a/include/llvm/Analysis/LazyCallGraph.h
+++ b/include/llvm/Analysis/LazyCallGraph.h
@@ -104,54 +104,10 @@ class LazyCallGraph {
public:
class Node;
class SCC;
+ class iterator;
typedef SmallVector<PointerUnion<Function *, Node *>, 4> NodeVectorT;
typedef SmallVectorImpl<PointerUnion<Function *, Node *>> NodeVectorImplT;
- /// A lazy iterator used for both the entry nodes and child nodes.
- ///
- /// When this iterator is dereferenced, if not yet available, a function will
- /// be scanned for "calls" or uses of functions and its child information
- /// will be constructed. All of these results are accumulated and cached in
- /// the graph.
- class iterator
- : public iterator_adaptor_base<iterator, NodeVectorImplT::iterator,
- std::forward_iterator_tag, Node> {
- friend class LazyCallGraph;
- friend class LazyCallGraph::Node;
-
- LazyCallGraph *G;
- NodeVectorImplT::iterator E;
-
- // Build the iterator for a specific position in a node list.
- iterator(LazyCallGraph &G, NodeVectorImplT::iterator NI,
- NodeVectorImplT::iterator E)
- : iterator_adaptor_base(NI), G(&G), E(E) {
- while (I != E && I->isNull())
- ++I;
- }
-
- public:
- iterator() {}
-
- using iterator_adaptor_base::operator++;
- iterator &operator++() {
- do {
- ++I;
- } while (I != E && I->isNull());
- return *this;
- }
-
- reference operator*() const {
- if (I->is<Node *>())
- return *I->get<Node *>();
-
- Function *F = I->get<Function *>();
- Node &ChildN = G->get(*F);
- *I = &ChildN;
- return ChildN;
- }
- };
-
/// A node in the call graph.
///
/// This represents a single node. It's primary roles are to cache the list of
@@ -200,6 +156,51 @@ public:
bool operator!=(const Node &N) const { return !operator==(N); }
};
+ /// A lazy iterator used for both the entry nodes and child nodes.
+ ///
+ /// When this iterator is dereferenced, if not yet available, a function will
+ /// be scanned for "calls" or uses of functions and its child information
+ /// will be constructed. All of these results are accumulated and cached in
+ /// the graph.
+ class iterator
+ : public iterator_adaptor_base<iterator, NodeVectorImplT::iterator,
+ std::forward_iterator_tag, Node> {
+ friend class LazyCallGraph;
+ friend class LazyCallGraph::Node;
+
+ LazyCallGraph *G;
+ NodeVectorImplT::iterator E;
+
+ // Build the iterator for a specific position in a node list.
+ iterator(LazyCallGraph &G, NodeVectorImplT::iterator NI,
+ NodeVectorImplT::iterator E)
+ : iterator_adaptor_base(NI), G(&G), E(E) {
+ while (I != E && I->isNull())
+ ++I;
+ }
+
+ public:
+ iterator() {}
+
+ using iterator_adaptor_base::operator++;
+ iterator &operator++() {
+ do {
+ ++I;
+ } while (I != E && I->isNull());
+ return *this;
+ }
+
+ reference operator*() const {
+ if (I->is<Node *>())
+ return *I->get<Node *>();
+
+ Function *F = I->get<Function *>();
+ Node &ChildN = G->get(*F);
+ *I = &ChildN;
+ return ChildN;
+ }
+ };
+
/// An SCC of the call graph.
///
/// This represents a Strongly Connected Component of the call graph as
diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h
index c219bd85a48a..70e636ce1f3d 100644
--- a/include/llvm/Analysis/LoopInfo.h
+++ b/include/llvm/Analysis/LoopInfo.h
@@ -59,38 +59,37 @@ template<class N, class M> class LoopInfoBase;
template<class N, class M> class LoopBase;
//===----------------------------------------------------------------------===//
-/// LoopBase class - Instances of this class are used to represent loops that
-/// are detected in the flow graph
+/// Instances of this class are used to represent loops that are detected in the
+/// flow graph.
///
template<class BlockT, class LoopT>
class LoopBase {
LoopT *ParentLoop;
- // SubLoops - Loops contained entirely within this one.
+ // Loops contained entirely within this one.
std::vector<LoopT *> SubLoops;
- // Blocks - The list of blocks in this loop. First entry is the header node.
+ // The list of blocks in this loop. First entry is the header node.
std::vector<BlockT*> Blocks;
SmallPtrSet<const BlockT*, 8> DenseBlockSet;
- /// Indicator that this loops has been "unlooped", so there's no loop here
- /// anymore.
- bool IsUnloop = false;
+ /// Indicator that this loop is no longer a valid loop.
+ bool IsInvalid = false;
LoopBase(const LoopBase<BlockT, LoopT> &) = delete;
const LoopBase<BlockT, LoopT>&
operator=(const LoopBase<BlockT, LoopT> &) = delete;
public:
- /// Loop ctor - This creates an empty loop.
+ /// This creates an empty loop.
LoopBase() : ParentLoop(nullptr) {}
~LoopBase() {
for (size_t i = 0, e = SubLoops.size(); i != e; ++i)
delete SubLoops[i];
}
- /// getLoopDepth - Return the nesting level of this loop. An outer-most
- /// loop has depth 1, for consistency with loop depth values used for basic
- /// blocks, where depth 0 is used for blocks not inside any loops.
+ /// Return the nesting level of this loop. An outer-most loop has depth 1,
+ /// for consistency with loop depth values used for basic blocks, where depth
+ /// 0 is used for blocks not inside any loops.
unsigned getLoopDepth() const {
unsigned D = 1;
for (const LoopT *CurLoop = ParentLoop; CurLoop;
@@ -101,33 +100,28 @@ public:
BlockT *getHeader() const { return Blocks.front(); }
LoopT *getParentLoop() const { return ParentLoop; }
- /// setParentLoop is a raw interface for bypassing addChildLoop.
+ /// This is a raw interface for bypassing addChildLoop.
void setParentLoop(LoopT *L) { ParentLoop = L; }
- /// contains - Return true if the specified loop is contained within in
- /// this loop.
- ///
+ /// Return true if the specified loop is contained within in this loop.
bool contains(const LoopT *L) const {
if (L == this) return true;
if (!L) return false;
return contains(L->getParentLoop());
}
- /// contains - Return true if the specified basic block is in this loop.
- ///
+ /// Return true if the specified basic block is in this loop.
bool contains(const BlockT *BB) const {
return DenseBlockSet.count(BB);
}
- /// contains - Return true if the specified instruction is in this loop.
- ///
+ /// Return true if the specified instruction is in this loop.
template<class InstT>
bool contains(const InstT *Inst) const {
return contains(Inst->getParent());
}
- /// iterator/begin/end - Return the loops contained entirely within this loop.
- ///
+ /// Return the loops contained entirely within this loop.
const std::vector<LoopT *> &getSubLoops() const { return SubLoops; }
std::vector<LoopT *> &getSubLoopsVector() { return SubLoops; }
typedef typename std::vector<LoopT *>::const_iterator iterator;
@@ -139,8 +133,7 @@ public:
reverse_iterator rend() const { return SubLoops.rend(); }
bool empty() const { return SubLoops.empty(); }
- /// getBlocks - Get a list of the basic blocks which make up this loop.
- ///
+ /// Get a list of the basic blocks which make up this loop.
const std::vector<BlockT*> &getBlocks() const { return Blocks; }
typedef typename std::vector<BlockT*>::const_iterator block_iterator;
block_iterator block_begin() const { return Blocks.begin(); }
@@ -149,21 +142,19 @@ public:
return make_range(block_begin(), block_end());
}
- /// getNumBlocks - Get the number of blocks in this loop in constant time.
+ /// Get the number of blocks in this loop in constant time.
unsigned getNumBlocks() const {
return Blocks.size();
}
- /// Mark this loop as having been unlooped - the last backedge was removed and
- /// we no longer have a loop.
- void markUnlooped() { IsUnloop = true; }
+ /// Invalidate the loop, indicating that it is no longer a loop.
+ void invalidate() { IsInvalid = true; }
- /// Return true if this no longer represents a loop.
- bool isUnloop() const { return IsUnloop; }
+ /// Return true if this loop is no longer valid.
+ bool isInvalid() { return IsInvalid; }
- /// isLoopExiting - True if terminator in the block can branch to another
- /// block that is outside of the current loop.
- ///
+ /// True if terminator in the block can branch to another block that is
+ /// outside of the current loop.
bool isLoopExiting(const BlockT *BB) const {
typedef GraphTraits<const BlockT*> BlockTraits;
for (typename BlockTraits::ChildIteratorType SI =
@@ -175,8 +166,7 @@ public:
return false;
}
- /// getNumBackEdges - Calculate the number of back edges to the loop header
- ///
+ /// Calculate the number of back edges to the loop header.
unsigned getNumBackEdges() const {
unsigned NumBackEdges = 0;
BlockT *H = getHeader();
@@ -199,53 +189,49 @@ public:
// induction variable canonicalization pass should be used to normalize loops
// for easy analysis. These methods assume canonical loops.
- /// getExitingBlocks - Return all blocks inside the loop that have successors
- /// outside of the loop. These are the blocks _inside of the current loop_
- /// which branch out. The returned list is always unique.
- ///
+ /// Return all blocks inside the loop that have successors outside of the
+ /// loop. These are the blocks _inside of the current loop_ which branch out.
+ /// The returned list is always unique.
void getExitingBlocks(SmallVectorImpl<BlockT *> &ExitingBlocks) const;
- /// getExitingBlock - If getExitingBlocks would return exactly one block,
- /// return that block. Otherwise return null.
+ /// If getExitingBlocks would return exactly one block, return that block.
+ /// Otherwise return null.
BlockT *getExitingBlock() const;
- /// getExitBlocks - Return all of the successor blocks of this loop. These
- /// are the blocks _outside of the current loop_ which are branched to.
- ///
+ /// Return all of the successor blocks of this loop. These are the blocks
+ /// _outside of the current loop_ which are branched to.
void getExitBlocks(SmallVectorImpl<BlockT*> &ExitBlocks) const;
- /// getExitBlock - If getExitBlocks would return exactly one block,
- /// return that block. Otherwise return null.
+ /// If getExitBlocks would return exactly one block, return that block.
+ /// Otherwise return null.
BlockT *getExitBlock() const;
/// Edge type.
typedef std::pair<const BlockT*, const BlockT*> Edge;
- /// getExitEdges - Return all pairs of (_inside_block_,_outside_block_).
+ /// Return all pairs of (_inside_block_,_outside_block_).
void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const;
- /// getLoopPreheader - If there is a preheader for this loop, return it. A
- /// loop has a preheader if there is only one edge to the header of the loop
- /// from outside of the loop. If this is the case, the block branching to the
- /// header of the loop is the preheader node.
+ /// If there is a preheader for this loop, return it. A loop has a preheader
+ /// if there is only one edge to the header of the loop from outside of the
+ /// loop. If this is the case, the block branching to the header of the loop
+ /// is the preheader node.
///
/// This method returns null if there is no preheader for the loop.
- ///
BlockT *getLoopPreheader() const;
- /// getLoopPredecessor - If the given loop's header has exactly one unique
- /// predecessor outside the loop, return it. Otherwise return null.
- /// This is less strict that the loop "preheader" concept, which requires
+ /// If the given loop's header has exactly one unique predecessor outside the
+ /// loop, return it. Otherwise return null.
+ /// This is less strict that the loop "preheader" concept, which requires
/// the predecessor to have exactly one successor.
- ///
BlockT *getLoopPredecessor() const;
- /// getLoopLatch - If there is a single latch block for this loop, return it.
+ /// If there is a single latch block for this loop, return it.
/// A latch block is a block that contains a branch back to the header.
BlockT *getLoopLatch() const;
- /// getLoopLatches - Return all loop latch blocks of this loop. A latch block
- /// is a block that contains a branch back to the header.
+ /// Return all loop latch blocks of this loop. A latch block is a block that
+ /// contains a branch back to the header.
void getLoopLatches(SmallVectorImpl<BlockT *> &LoopLatches) const {
BlockT *H = getHeader();
typedef GraphTraits<Inverse<BlockT*> > InvBlockTraits;
@@ -260,32 +246,29 @@ public:
// APIs for updating loop information after changing the CFG
//
- /// addBasicBlockToLoop - This method is used by other analyses to update loop
- /// information. NewBB is set to be a new member of the current loop.
+ /// This method is used by other analyses to update loop information.
+ /// NewBB is set to be a new member of the current loop.
/// Because of this, it is added as a member of all parent loops, and is added
/// to the specified LoopInfo object as being in the current basic block. It
/// is not valid to replace the loop header with this method.
- ///
void addBasicBlockToLoop(BlockT *NewBB, LoopInfoBase<BlockT, LoopT> &LI);
- /// replaceChildLoopWith - This is used when splitting loops up. It replaces
- /// the OldChild entry in our children list with NewChild, and updates the
- /// parent pointer of OldChild to be null and the NewChild to be this loop.
+ /// This is used when splitting loops up. It replaces the OldChild entry in
+ /// our children list with NewChild, and updates the parent pointer of
+ /// OldChild to be null and the NewChild to be this loop.
/// This updates the loop depth of the new child.
void replaceChildLoopWith(LoopT *OldChild, LoopT *NewChild);
- /// addChildLoop - Add the specified loop to be a child of this loop. This
- /// updates the loop depth of the new child.
- ///
+ /// Add the specified loop to be a child of this loop.
+ /// This updates the loop depth of the new child.
void addChildLoop(LoopT *NewChild) {
assert(!NewChild->ParentLoop && "NewChild already has a parent!");
NewChild->ParentLoop = static_cast<LoopT *>(this);
SubLoops.push_back(NewChild);
}
- /// removeChildLoop - This removes the specified child from being a subloop of
- /// this loop. The loop is not deleted, as it will presumably be inserted
- /// into another loop.
+ /// This removes the specified child from being a subloop of this loop. The
+ /// loop is not deleted, as it will presumably be inserted into another loop.
LoopT *removeChildLoop(iterator I) {
assert(I != SubLoops.end() && "Cannot remove end iterator!");
LoopT *Child = *I;
@@ -295,7 +278,7 @@ public:
return Child;
}
- /// addBlockEntry - This adds a basic block directly to the basic block list.
+ /// This adds a basic block directly to the basic block list.
/// This should only be used by transformations that create new loops. Other
/// transformations should use addBasicBlockToLoop.
void addBlockEntry(BlockT *BB) {
@@ -303,19 +286,18 @@ public:
DenseBlockSet.insert(BB);
}
- /// reverseBlocks - interface to reverse Blocks[from, end of loop] in this loop
+ /// interface to reverse Blocks[from, end of loop] in this loop
void reverseBlock(unsigned from) {
std::reverse(Blocks.begin() + from, Blocks.end());
}
- /// reserveBlocks- interface to do reserve() for Blocks
+ /// interface to do reserve() for Blocks
void reserveBlocks(unsigned size) {
Blocks.reserve(size);
}
- /// moveToHeader - This method is used to move BB (which must be part of this
- /// loop) to be the loop header of the loop (the block that dominates all
- /// others).
+ /// This method is used to move BB (which must be part of this loop) to be the
+ /// loop header of the loop (the block that dominates all others).
void moveToHeader(BlockT *BB) {
if (Blocks[0] == BB) return;
for (unsigned i = 0; ; ++i) {
@@ -328,9 +310,9 @@ public:
}
}
- /// removeBlockFromLoop - This removes the specified basic block from the
- /// current loop, updating the Blocks as appropriate. This does not update
- /// the mapping in the LoopInfo class.
+ /// This removes the specified basic block from the current loop, updating the
+ /// Blocks as appropriate. This does not update the mapping in the LoopInfo
+ /// class.
void removeBlockFromLoop(BlockT *BB) {
auto I = std::find(Blocks.begin(), Blocks.end(), BB);
assert(I != Blocks.end() && "N is not in this list!");
@@ -339,10 +321,10 @@ public:
DenseBlockSet.erase(BB);
}
- /// verifyLoop - Verify loop structure
+ /// Verify loop structure
void verifyLoop() const;
- /// verifyLoop - Verify loop structure of this loop and all nested loops.
+ /// Verify loop structure of this loop and all nested loops.
void verifyLoopNest(DenseSet<const LoopT*> *Loops) const;
void print(raw_ostream &OS, unsigned Depth = 0) const;
@@ -368,28 +350,26 @@ class Loop : public LoopBase<BasicBlock, Loop> {
public:
Loop() {}
- /// isLoopInvariant - Return true if the specified value is loop invariant
- ///
+ /// Return true if the specified value is loop invariant.
bool isLoopInvariant(const Value *V) const;
- /// hasLoopInvariantOperands - Return true if all the operands of the
- /// specified instruction are loop invariant.
+ /// Return true if all the operands of the specified instruction are loop
+ /// invariant.
bool hasLoopInvariantOperands(const Instruction *I) const;
- /// makeLoopInvariant - If the given value is an instruction inside of the
- /// loop and it can be hoisted, do so to make it trivially loop-invariant.
+ /// If the given value is an instruction inside of the loop and it can be
+ /// hoisted, do so to make it trivially loop-invariant.
/// Return true if the value after any hoisting is loop invariant. This
/// function can be used as a slightly more aggressive replacement for
/// isLoopInvariant.
///
/// If InsertPt is specified, it is the point to hoist instructions to.
/// If null, the terminator of the loop preheader is used.
- ///
bool makeLoopInvariant(Value *V, bool &Changed,
Instruction *InsertPt = nullptr) const;
- /// makeLoopInvariant - If the given instruction is inside of the
- /// loop and it can be hoisted, do so to make it trivially loop-invariant.
+ /// If the given instruction is inside of the loop and it can be hoisted, do
+ /// so to make it trivially loop-invariant.
/// Return true if the instruction after any hoisting is loop invariant. This
/// function can be used as a slightly more aggressive replacement for
/// isLoopInvariant.
@@ -400,28 +380,26 @@ public:
bool makeLoopInvariant(Instruction *I, bool &Changed,
Instruction *InsertPt = nullptr) const;
- /// getCanonicalInductionVariable - Check to see if the loop has a canonical
- /// induction variable: an integer recurrence that starts at 0 and increments
- /// by one each time through the loop. If so, return the phi node that
- /// corresponds to it.
+ /// Check to see if the loop has a canonical induction variable: an integer
+ /// recurrence that starts at 0 and increments by one each time through the
+ /// loop. If so, return the phi node that corresponds to it.
///
/// The IndVarSimplify pass transforms loops to have a canonical induction
/// variable.
///
PHINode *getCanonicalInductionVariable() const;
- /// isLCSSAForm - Return true if the Loop is in LCSSA form
+ /// Return true if the Loop is in LCSSA form.
bool isLCSSAForm(DominatorTree &DT) const;
- /// \brief Return true if this Loop and all inner subloops are in LCSSA form.
+ /// Return true if this Loop and all inner subloops are in LCSSA form.
bool isRecursivelyLCSSAForm(DominatorTree &DT) const;
- /// isLoopSimplifyForm - Return true if the Loop is in the form that
- /// the LoopSimplify form transforms loops to, which is sometimes called
- /// normal form.
+ /// Return true if the Loop is in the form that the LoopSimplify form
+ /// transforms loops to, which is sometimes called normal form.
bool isLoopSimplifyForm() const;
- /// isSafeToClone - Return true if the loop body is safe to clone in practice.
+ /// Return true if the loop body is safe to clone in practice.
bool isSafeToClone() const;
/// Returns true if the loop is annotated parallel.
@@ -454,23 +432,22 @@ public:
/// operand should should be the node itself.
void setLoopID(MDNode *LoopID) const;
- /// hasDedicatedExits - Return true if no exit block for the loop
- /// has a predecessor that is outside the loop.
+ /// Return true if no exit block for the loop has a predecessor that is
+ /// outside the loop.
bool hasDedicatedExits() const;
- /// getUniqueExitBlocks - Return all unique successor blocks of this loop.
+ /// Return all unique successor blocks of this loop.
/// These are the blocks _outside of the current loop_ which are branched to.
/// This assumes that loop exits are in canonical form.
- ///
void getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const;
- /// getUniqueExitBlock - If getUniqueExitBlocks would return exactly one
- /// block, return that block. Otherwise return null.
+ /// If getUniqueExitBlocks would return exactly one block, return that block.
+ /// Otherwise return null.
BasicBlock *getUniqueExitBlock() const;
void dump() const;
- /// \brief Return the debug location of the start of this loop.
+ /// Return the debug location of the start of this loop.
/// This looks for a BB terminating instruction with a known debug
/// location by looking at the preheader and header blocks. If it
/// cannot find a terminating instruction with location information,
@@ -498,7 +475,7 @@ private:
};
//===----------------------------------------------------------------------===//
-/// LoopInfo - This class builds and contains all of the top level loop
+/// This class builds and contains all of the top-level loop
/// structures in the specified function.
///
@@ -507,6 +484,8 @@ class LoopInfoBase {
// BBMap - Mapping of basic blocks to the inner most loop they occur in
DenseMap<const BlockT *, LoopT *> BBMap;
std::vector<LoopT *> TopLevelLoops;
+ std::vector<LoopT *> RemovedLoops;
+
friend class LoopBase<BlockT, LoopT>;
friend class LoopInfo;
@@ -538,6 +517,9 @@ public:
for (auto *L : TopLevelLoops)
delete L;
TopLevelLoops.clear();
+ for (auto *L : RemovedLoops)
+ delete L;
+ RemovedLoops.clear();
}
/// iterator/begin/end - The interface to the top-level loops in the current
@@ -552,33 +534,30 @@ public:
reverse_iterator rend() const { return TopLevelLoops.rend(); }
bool empty() const { return TopLevelLoops.empty(); }
- /// getLoopFor - Return the inner most loop that BB lives in. If a basic
- /// block is in no loop (for example the entry node), null is returned.
- ///
+ /// Return the inner most loop that BB lives in. If a basic block is in no
+ /// loop (for example the entry node), null is returned.
LoopT *getLoopFor(const BlockT *BB) const { return BBMap.lookup(BB); }
- /// operator[] - same as getLoopFor...
- ///
+ /// Same as getLoopFor.
const LoopT *operator[](const BlockT *BB) const {
return getLoopFor(BB);
}
- /// getLoopDepth - Return the loop nesting level of the specified block. A
- /// depth of 0 means the block is not inside any loop.
- ///
+ /// Return the loop nesting level of the specified block. A depth of 0 means
+ /// the block is not inside any loop.
unsigned getLoopDepth(const BlockT *BB) const {
const LoopT *L = getLoopFor(BB);
return L ? L->getLoopDepth() : 0;
}
- // isLoopHeader - True if the block is a loop header node
+ // True if the block is a loop header node
bool isLoopHeader(const BlockT *BB) const {
const LoopT *L = getLoopFor(BB);
return L && L->getHeader() == BB;
}
- /// removeLoop - This removes the specified top-level loop from this loop info
- /// object. The loop is not deleted, as it will presumably be inserted into
+ /// This removes the specified top-level loop from this loop info object.
+ /// The loop is not deleted, as it will presumably be inserted into
/// another loop.
LoopT *removeLoop(iterator I) {
assert(I != end() && "Cannot remove end iterator!");
@@ -588,9 +567,9 @@ public:
return L;
}
- /// changeLoopFor - Change the top-level loop that contains BB to the
- /// specified loop. This should be used by transformations that restructure
- /// the loop hierarchy tree.
+ /// Change the top-level loop that contains BB to the specified loop.
+ /// This should be used by transformations that restructure the loop hierarchy
+ /// tree.
void changeLoopFor(BlockT *BB, LoopT *L) {
if (!L) {
BBMap.erase(BB);
@@ -599,8 +578,8 @@ public:
BBMap[BB] = L;
}
- /// changeTopLevelLoop - Replace the specified loop in the top-level loops
- /// list with the indicated loop.
+ /// Replace the specified loop in the top-level loops list with the indicated
+ /// loop.
void changeTopLevelLoop(LoopT *OldLoop,
LoopT *NewLoop) {
auto I = std::find(TopLevelLoops.begin(), TopLevelLoops.end(), OldLoop);
@@ -610,14 +589,13 @@ public:
"Loops already embedded into a subloop!");
}
- /// addTopLevelLoop - This adds the specified loop to the collection of
- /// top-level loops.
+ /// This adds the specified loop to the collection of top-level loops.
void addTopLevelLoop(LoopT *New) {
assert(!New->getParentLoop() && "Loop already in subloop!");
TopLevelLoops.push_back(New);
}
- /// removeBlock - This method completely removes BB from all data structures,
+ /// This method completely removes BB from all data structures,
/// including all of the Loop objects it is nested in and our mapping from
/// BasicBlocks to loops.
void removeBlock(BlockT *BB) {
@@ -670,15 +648,14 @@ public:
// Most of the public interface is provided via LoopInfoBase.
- /// updateUnloop - Update LoopInfo after removing the last backedge from a
- /// loop--now the "unloop". This updates the loop forest and parent loops for
- /// each block so that Unloop is no longer referenced, but does not actually
- /// delete the Unloop object. Generally, the loop pass manager should manage
- /// deleting the Unloop.
- void updateUnloop(Loop *Unloop);
+ /// Update LoopInfo after removing the last backedge from a loop. This updates
+ /// the loop forest and parent loops for each block so that \c L is no longer
+ /// referenced, but does not actually delete \c L immediately. The pointer
+ /// will remain valid until this LoopInfo's memory is released.
+ void markAsRemoved(Loop *L);
- /// replacementPreservesLCSSAForm - Returns true if replacing From with To
- /// everywhere is guaranteed to preserve LCSSA form.
+ /// Returns true if replacing From with To everywhere is guaranteed to
+ /// preserve LCSSA form.
bool replacementPreservesLCSSAForm(Instruction *From, Value *To) {
// Preserving LCSSA form is only problematic if the replacing value is an
// instruction.
@@ -698,8 +675,7 @@ public:
return ToLoop->contains(getLoopFor(From->getParent()));
}
- /// \brief Checks if moving a specific instruction can break LCSSA in any
- /// loop.
+ /// Checks if moving a specific instruction can break LCSSA in any loop.
///
/// Return true if moving \p Inst to before \p NewLoc will break LCSSA,
/// assuming that the function containing \p Inst and \p NewLoc is currently
diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h
index f5e778b2f262..cf29fc9ef889 100644
--- a/include/llvm/CodeGen/AsmPrinter.h
+++ b/include/llvm/CodeGen/AsmPrinter.h
@@ -259,7 +259,7 @@ public:
void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const;
/// Lower the specified LLVM Constant to an MCExpr.
- const MCExpr *lowerConstant(const Constant *CV);
+ virtual const MCExpr *lowerConstant(const Constant *CV);
/// \brief Print a general LLVM constant to the .s file.
void EmitGlobalConstant(const DataLayout &DL, const Constant *CV);
diff --git a/include/llvm/CodeGen/DIE.h b/include/llvm/CodeGen/DIE.h
index fa612d981dec..72b3adc7de99 100644
--- a/include/llvm/CodeGen/DIE.h
+++ b/include/llvm/CodeGen/DIE.h
@@ -29,6 +29,48 @@ class MCSymbol;
class raw_ostream;
class DwarfTypeUnit;
+// AsmStreamerBase - A base abstract interface class defines methods that
+// can be implemented to stream objects or can be implemented to
+// calculate the size of the streamed objects.
+// The derived classes will use an AsmPrinter to implement the methods.
+//
+// TODO: complete this interface and use it to merge EmitValue and SizeOf
+// methods in the DIE classes below.
+class AsmStreamerBase {
+protected:
+ const AsmPrinter *AP;
+ AsmStreamerBase(const AsmPrinter *AP) : AP(AP) {}
+
+public:
+ virtual ~AsmStreamerBase() {}
+ virtual unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr,
+ unsigned PadTo = 0) = 0;
+ virtual unsigned emitInt8(unsigned char Value) = 0;
+ virtual unsigned emitBytes(StringRef Data) = 0;
+};
+
+/// EmittingAsmStreamer - Implements AbstractAsmStreamer to stream objects.
+/// Notice that the return value is not the actual size of the streamed object.
+/// For size calculation use SizeReporterAsmStreamer.
+class EmittingAsmStreamer : public AsmStreamerBase {
+public:
+ EmittingAsmStreamer(const AsmPrinter *AP) : AsmStreamerBase(AP) {}
+ unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr,
+ unsigned PadTo = 0) override;
+ unsigned emitInt8(unsigned char Value) override;
+ unsigned emitBytes(StringRef Data) override;
+};
+
+/// SizeReporterAsmStreamer - Only reports the size of the streamed objects.
+class SizeReporterAsmStreamer : public AsmStreamerBase {
+public:
+ SizeReporterAsmStreamer(const AsmPrinter *AP) : AsmStreamerBase(AP) {}
+ unsigned emitULEB128(uint64_t Value, const char *Desc = nullptr,
+ unsigned PadTo = 0) override;
+ unsigned emitInt8(unsigned char Value) override;
+ unsigned emitBytes(StringRef Data) override;
+};
+
//===--------------------------------------------------------------------===//
/// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a
/// Dwarf abbreviation.
diff --git a/include/llvm/CodeGen/LiveInterval.h b/include/llvm/CodeGen/LiveInterval.h
index 0157bf9117e5..edade3164a3c 100644
--- a/include/llvm/CodeGen/LiveInterval.h
+++ b/include/llvm/CodeGen/LiveInterval.h
@@ -848,9 +848,9 @@ namespace llvm {
public:
explicit ConnectedVNInfoEqClasses(LiveIntervals &lis) : LIS(lis) {}
- /// Classify - Classify the values in LI into connected components.
- /// Return the number of connected components.
- unsigned Classify(const LiveInterval *LI);
+ /// Classify the values in \p LR into connected components.
+ /// Returns the number of connected components.
+ unsigned Classify(const LiveRange &LR);
/// getEqClass - Classify creates equivalence classes numbered 0..N. Return
/// the equivalence class assigned the VNI.
diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h
index 987634fb36c3..9bbdf3e071bd 100644
--- a/include/llvm/CodeGen/RegisterPressure.h
+++ b/include/llvm/CodeGen/RegisterPressure.h
@@ -141,6 +141,28 @@ public:
LLVM_DUMP_METHOD void dump(const TargetRegisterInfo &TRI) const;
};
+/// List of registers defined and used by a machine instruction.
+class RegisterOperands {
+public:
+ /// List of virtual regiserts and register units read by the instruction.
+ SmallVector<unsigned, 8> Uses;
+ /// \brief List of virtual registers and register units defined by the
+ /// instruction which are not dead.
+ SmallVector<unsigned, 8> Defs;
+ /// \brief List of virtual registers and register units defined by the
+ /// instruction but dead.
+ SmallVector<unsigned, 8> DeadDefs;
+
+ /// Analyze the given instruction \p MI and fill in the Uses, Defs and
+ /// DeadDefs list based on the MachineOperand flags.
+ void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI,
+ const MachineRegisterInfo &MRI, bool IgnoreDead = false);
+
+ /// Use liveness information to find dead defs not marked with a dead flag
+ /// and move them to the DeadDefs vector.
+ void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS);
+};
+
/// Array of PressureDiffs.
class PressureDiffs {
PressureDiff *PDiffArray;
@@ -161,6 +183,10 @@ public:
const PressureDiff &operator[](unsigned Idx) const {
return const_cast<PressureDiffs*>(this)->operator[](Idx);
}
+ /// \brief Record pressure difference induced by the given operand list to
+ /// node with index \p Idx.
+ void addInstruction(unsigned Idx, const RegisterOperands &RegOpers,
+ const MachineRegisterInfo &MRI);
};
/// Store the effects of a change in pressure on things that MI scheduler cares
@@ -329,8 +355,17 @@ public:
void setPos(MachineBasicBlock::const_iterator Pos) { CurrPos = Pos; }
/// Recede across the previous instruction.
- void recede(SmallVectorImpl<unsigned> *LiveUses = nullptr,
- PressureDiff *PDiff = nullptr);
+ void recede(SmallVectorImpl<unsigned> *LiveUses = nullptr);
+
+ /// Recede across the previous instruction.
+ /// This "low-level" variant assumes that recedeSkipDebugValues() was
+ /// called previously and takes precomputed RegisterOperands for the
+ /// instruction.
+ void recede(const RegisterOperands &RegOpers,
+ SmallVectorImpl<unsigned> *LiveUses = nullptr);
+
+ /// Recede until we find an instruction which is not a DebugValue.
+ void recedeSkipDebugValues();
/// Advance across the current instruction.
void advance();
diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h
index f6ad7a8572ab..46c1029f62cf 100644
--- a/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -93,8 +93,6 @@ struct WinEHFuncInfo {
DenseMap<const Instruction *, int> EHPadStateMap;
DenseMap<const FuncletPadInst *, int> FuncletBaseStateMap;
DenseMap<const InvokeInst *, int> InvokeStateMap;
- DenseMap<const CatchReturnInst *, const BasicBlock *>
- CatchRetSuccessorColorMap;
DenseMap<MCSymbol *, std::pair<int, MCSymbol *>> LabelToStateMap;
SmallVector<CxxUnwindMapEntry, 4> CxxUnwindMap;
SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap;
@@ -125,8 +123,5 @@ void calculateSEHStateNumbers(const Function *ParentFn,
WinEHFuncInfo &FuncInfo);
void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo);
-
-void calculateCatchReturnSuccessorColors(const Function *Fn,
- WinEHFuncInfo &FuncInfo);
}
#endif // LLVM_CODEGEN_WINEHFUNCINFO_H
diff --git a/include/llvm/DebugInfo/Symbolize/DIPrinter.h b/include/llvm/DebugInfo/Symbolize/DIPrinter.h
index 0703fb14da61..3098199bb4da 100644
--- a/include/llvm/DebugInfo/Symbolize/DIPrinter.h
+++ b/include/llvm/DebugInfo/Symbolize/DIPrinter.h
@@ -28,13 +28,16 @@ class DIPrinter {
raw_ostream &OS;
bool PrintFunctionNames;
bool PrintPretty;
- void printName(const DILineInfo &Info, bool Inlined);
+ int PrintSourceContext;
+
+ void print(const DILineInfo &Info, bool Inlined);
+ void printContext(std::string FileName, int64_t Line);
public:
DIPrinter(raw_ostream &OS, bool PrintFunctionNames = true,
- bool PrintPretty = false)
+ bool PrintPretty = false, int PrintSourceContext = 0)
: OS(OS), PrintFunctionNames(PrintFunctionNames),
- PrintPretty(PrintPretty) {}
+ PrintPretty(PrintPretty), PrintSourceContext(PrintSourceContext) {}
DIPrinter &operator<<(const DILineInfo &Info);
DIPrinter &operator<<(const DIInliningInfo &Info);
diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
index 7dab5d1bc67f..84af4728b350 100644
--- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h
@@ -19,7 +19,6 @@
#include "LambdaResolver.h"
#include "LogicalDylib.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/Transforms/Utils/Cloning.h"
#include <list>
#include <memory>
@@ -61,31 +60,36 @@ private:
typedef typename BaseLayerT::ModuleSetHandleT BaseLayerModuleSetHandleT;
- class ModuleOwner {
+ // Provide type-erasure for the Modules and MemoryManagers.
+ template <typename ResourceT>
+ class ResourceOwner {
public:
- ModuleOwner() = default;
- ModuleOwner(const ModuleOwner&) = delete;
- ModuleOwner& operator=(const ModuleOwner&) = delete;
- virtual ~ModuleOwner() { }
- virtual Module& getModule() const = 0;
+ ResourceOwner() = default;
+ ResourceOwner(const ResourceOwner&) = delete;
+ ResourceOwner& operator=(const ResourceOwner&) = delete;
+ virtual ~ResourceOwner() { }
+ virtual ResourceT& getResource() const = 0;
};
- template <typename ModulePtrT>
- class ModuleOwnerImpl : public ModuleOwner {
+ template <typename ResourceT, typename ResourcePtrT>
+ class ResourceOwnerImpl : public ResourceOwner<ResourceT> {
public:
- ModuleOwnerImpl(ModulePtrT ModulePtr) : ModulePtr(std::move(ModulePtr)) {}
- Module& getModule() const override { return *ModulePtr; }
+ ResourceOwnerImpl(ResourcePtrT ResourcePtr)
+ : ResourcePtr(std::move(ResourcePtr)) {}
+ ResourceT& getResource() const override { return *ResourcePtr; }
private:
- ModulePtrT ModulePtr;
+ ResourcePtrT ResourcePtr;
};
- template <typename ModulePtrT>
- std::unique_ptr<ModuleOwner> wrapOwnership(ModulePtrT ModulePtr) {
- return llvm::make_unique<ModuleOwnerImpl<ModulePtrT>>(std::move(ModulePtr));
+ template <typename ResourceT, typename ResourcePtrT>
+ std::unique_ptr<ResourceOwner<ResourceT>>
+ wrapOwnership(ResourcePtrT ResourcePtr) {
+ typedef ResourceOwnerImpl<ResourceT, ResourcePtrT> RO;
+ return llvm::make_unique<RO>(std::move(ResourcePtr));
}
struct LogicalModuleResources {
- std::unique_ptr<ModuleOwner> SourceModuleOwner;
+ std::unique_ptr<ResourceOwner<Module>> SourceModule;
std::set<const Function*> StubsToClone;
std::unique_ptr<IndirectStubsMgrT> StubsMgr;
@@ -93,15 +97,16 @@ private:
// Explicit move constructor to make MSVC happy.
LogicalModuleResources(LogicalModuleResources &&Other)
- : SourceModuleOwner(std::move(Other.SourceModuleOwner)),
+ : SourceModule(std::move(Other.SourceModule)),
StubsToClone(std::move(Other.StubsToClone)),
StubsMgr(std::move(Other.StubsMgr)) {}
// Explicit move assignment to make MSVC happy.
LogicalModuleResources& operator=(LogicalModuleResources &&Other) {
- SourceModuleOwner = std::move(Other.SourceModuleOwner);
+ SourceModule = std::move(Other.SourceModule);
StubsToClone = std::move(Other.StubsToClone);
StubsMgr = std::move(Other.StubsMgr);
+ return *this;
}
JITSymbol findSymbol(StringRef Name, bool ExportedSymbolsOnly) {
@@ -114,12 +119,35 @@ private:
};
-
-
struct LogicalDylibResources {
typedef std::function<RuntimeDyld::SymbolInfo(const std::string&)>
SymbolResolverFtor;
+
+ typedef std::function<typename BaseLayerT::ModuleSetHandleT(
+ BaseLayerT&,
+ std::unique_ptr<Module>,
+ std::unique_ptr<RuntimeDyld::SymbolResolver>)>
+ ModuleAdderFtor;
+
+ LogicalDylibResources() = default;
+
+ // Explicit move constructor to make MSVC happy.
+ LogicalDylibResources(LogicalDylibResources &&Other)
+ : ExternalSymbolResolver(std::move(Other.ExternalSymbolResolver)),
+ MemMgr(std::move(Other.MemMgr)),
+ ModuleAdder(std::move(Other.ModuleAdder)) {}
+
+ // Explicit move assignment operator to make MSVC happy.
+ LogicalDylibResources& operator=(LogicalDylibResources &&Other) {
+ ExternalSymbolResolver = std::move(Other.ExternalSymbolResolver);
+ MemMgr = std::move(Other.MemMgr);
+ ModuleAdder = std::move(Other.ModuleAdder);
+ return *this;
+ }
+
SymbolResolverFtor ExternalSymbolResolver;
+ std::unique_ptr<ResourceOwner<RuntimeDyld::MemoryManager>> MemMgr;
+ ModuleAdderFtor ModuleAdder;
};
typedef LogicalDylib<BaseLayerT, LogicalModuleResources,
@@ -157,9 +185,6 @@ public:
MemoryManagerPtrT MemMgr,
SymbolResolverPtrT Resolver) {
- assert(MemMgr == nullptr &&
- "User supplied memory managers not supported with COD yet.");
-
LogicalDylibs.push_back(CODLogicalDylib(BaseLayer));
auto &LDResources = LogicalDylibs.back().getDylibResources();
@@ -168,6 +193,18 @@ public:
return Resolver->findSymbol(Name);
};
+ auto &MemMgrRef = *MemMgr;
+ LDResources.MemMgr =
+ wrapOwnership<RuntimeDyld::MemoryManager>(std::move(MemMgr));
+
+ LDResources.ModuleAdder =
+ [&MemMgrRef](BaseLayerT &B, std::unique_ptr<Module> M,
+ std::unique_ptr<RuntimeDyld::SymbolResolver> R) {
+ std::vector<std::unique_ptr<Module>> Ms;
+ Ms.push_back(std::move(M));
+ return B.addModuleSet(std::move(Ms), &MemMgrRef, std::move(R));
+ };
+
// Process each of the modules in this module set.
for (auto &M : Ms)
addLogicalModule(LogicalDylibs.back(), std::move(M));
@@ -215,9 +252,9 @@ private:
auto LMH = LD.createLogicalModule();
auto &LMResources = LD.getLogicalModuleResources(LMH);
- LMResources.SourceModuleOwner = wrapOwnership(std::move(SrcMPtr));
+ LMResources.SourceModule = wrapOwnership<Module>(std::move(SrcMPtr));
- Module &SrcM = LMResources.SourceModuleOwner->getModule();
+ Module &SrcM = LMResources.SourceModule->getResource();
// Create the GlobalValues module.
const DataLayout &DL = SrcM.getDataLayout();
@@ -326,12 +363,9 @@ private:
return RuntimeDyld::SymbolInfo(nullptr);
});
- std::vector<std::unique_ptr<Module>> GVsMSet;
- GVsMSet.push_back(std::move(GVsM));
auto GVsH =
- BaseLayer.addModuleSet(std::move(GVsMSet),
- llvm::make_unique<SectionMemoryManager>(),
- std::move(GVsResolver));
+ LD.getDylibResources().ModuleAdder(BaseLayer, std::move(GVsM),
+ std::move(GVsResolver));
LD.addToLogicalModule(LMH, GVsH);
}
@@ -348,7 +382,7 @@ private:
LogicalModuleHandle LMH,
Function &F) {
auto &LMResources = LD.getLogicalModuleResources(LMH);
- Module &SrcM = LMResources.SourceModuleOwner->getModule();
+ Module &SrcM = LMResources.SourceModule->getResource();
// If F is a declaration we must already have compiled it.
if (F.isDeclaration())
@@ -386,7 +420,7 @@ private:
LogicalModuleHandle LMH,
const PartitionT &Part) {
auto &LMResources = LD.getLogicalModuleResources(LMH);
- Module &SrcM = LMResources.SourceModuleOwner->getModule();
+ Module &SrcM = LMResources.SourceModule->getResource();
// Create the module.
std::string NewName = SrcM.getName();
@@ -445,7 +479,6 @@ private:
moveFunctionBody(*F, VMap, &Materializer);
// Create memory manager and symbol resolver.
- auto MemMgr = llvm::make_unique<SectionMemoryManager>();
auto Resolver = createLambdaResolver(
[this, &LD, LMH](const std::string &Name) {
if (auto Symbol = LD.findSymbolInternally(LMH, Name))
@@ -459,10 +492,9 @@ private:
Symbol.getFlags());
return RuntimeDyld::SymbolInfo(nullptr);
});
- std::vector<std::unique_ptr<Module>> PartMSet;
- PartMSet.push_back(std::move(M));
- return BaseLayer.addModuleSet(std::move(PartMSet), std::move(MemMgr),
- std::move(Resolver));
+
+ return LD.getDylibResources().ModuleAdder(BaseLayer, std::move(M),
+ std::move(Resolver));
}
BaseLayerT &BaseLayer;
diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index d6ee3a846b04..e17630fa05ff 100644
--- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -22,6 +22,7 @@
#include "llvm/IR/Mangler.h"
#include "llvm/IR/Module.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Support/Process.h"
#include <sstream>
namespace llvm {
@@ -179,14 +180,15 @@ private:
std::error_code EC;
auto TrampolineBlock =
sys::OwningMemoryBlock(
- sys::Memory::allocateMappedMemory(TargetT::PageSize, nullptr,
+ sys::Memory::allocateMappedMemory(sys::Process::getPageSize(), nullptr,
sys::Memory::MF_READ |
sys::Memory::MF_WRITE, EC));
assert(!EC && "Failed to allocate trampoline block");
unsigned NumTrampolines =
- (TargetT::PageSize - TargetT::PointerSize) / TargetT::TrampolineSize;
+ (sys::Process::getPageSize() - TargetT::PointerSize) /
+ TargetT::TrampolineSize;
uint8_t *TrampolineMem = static_cast<uint8_t*>(TrampolineBlock.base());
TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(),
@@ -240,8 +242,8 @@ private:
virtual void anchor();
};
-/// @brief IndirectStubsManager implementation for a concrete target, e.g.
-/// OrcX86_64. (See OrcTargetSupport.h).
+/// @brief IndirectStubsManager implementation for the host architecture, e.g.
+/// OrcX86_64. (See OrcArchitectureSupport.h).
template <typename TargetT>
class LocalIndirectStubsManager : public IndirectStubsManager {
public:
diff --git a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
index 2acfecfb94dc..4dc48f114883 100644
--- a/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
+++ b/include/llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h
@@ -108,9 +108,7 @@ private:
void Finalize() override {
State = Finalizing;
- RTDyld->resolveRelocations();
- RTDyld->registerEHFrames();
- MemMgr->finalizeMemory();
+ RTDyld->finalizeWithMemoryManagerLocking();
State = Finalized;
}
diff --git a/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h b/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
index 246d3e0a9fc6..1b0488bcf00d 100644
--- a/include/llvm/ExecutionEngine/Orc/OrcTargetSupport.h
+++ b/include/llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h
@@ -1,4 +1,4 @@
-//===-- OrcTargetSupport.h - Code to support specific targets --*- C++ -*-===//
+//===-- OrcArchitectureSupport.h - Architecture support code ---*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,32 +7,76 @@
//
//===----------------------------------------------------------------------===//
//
-// Target specific code for Orc, e.g. callback assembly.
+// Architecture specific code for Orc, e.g. callback assembly.
//
-// Target classes should be part of the JIT *target* process, not the host
+// Architecture classes should be part of the JIT *target* process, not the host
// process (except where you're doing hosted JITing and the two are one and the
// same).
//
//===----------------------------------------------------------------------===//
-#ifndef LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
-#define LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H
#include "IndirectionUtils.h"
#include "llvm/Support/Memory.h"
+#include "llvm/Support/Process.h"
namespace llvm {
namespace orc {
+/// Generic ORC Architecture support.
+///
+/// This class can be substituted as the target architecure support class for
+/// ORC templates that require one (e.g. IndirectStubsManagers). It does not
+/// support lazy JITing however, and any attempt to use that functionality
+/// will result in execution of an llvm_unreachable.
+class OrcGenericArchitecture {
+public:
+ static const unsigned PointerSize = sizeof(uintptr_t);
+ static const unsigned TrampolineSize = 1;
+ static const unsigned ResolverCodeSize = 1;
+
+ typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
+
+ static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
+ void *CallbackMgr) {
+ llvm_unreachable("writeResolverCode is not supported by the generic host "
+ "support class");
+ }
+
+ static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines) {
+ llvm_unreachable("writeTrampolines is not supported by the generic host "
+ "support class");
+ }
+
+ class IndirectStubsInfo {
+ public:
+ const static unsigned StubSize = 1;
+ unsigned getNumStubs() const { llvm_unreachable("Not supported"); }
+ void *getStub(unsigned Idx) const { llvm_unreachable("Not supported"); }
+ void **getPtr(unsigned Idx) const { llvm_unreachable("Not supported"); }
+ };
+
+ static std::error_code emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs,
+ void *InitialPtrVal) {
+ llvm_unreachable("emitIndirectStubsBlock is not supported by the generic "
+ "host support class");
+ }
+};
+
+/// @brief X86_64 support.
+///
+/// X86_64 supports lazy JITing.
class OrcX86_64 {
public:
- static const unsigned PageSize = 4096;
static const unsigned PointerSize = 8;
static const unsigned TrampolineSize = 8;
static const unsigned ResolverCodeSize = 0x78;
- typedef TargetAddress (*JITReentryFn)(void *CallbackMgr,
- void *TrampolineId);
+ typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
/// @brief Write the resolver code into the given memory. The user is be
/// responsible for allocating the memory and setting permissions.
@@ -49,16 +93,16 @@ public:
/// makeIndirectStubsBlock function.
class IndirectStubsInfo {
friend class OrcX86_64;
+
public:
const static unsigned StubSize = 8;
- const static unsigned PtrSize = 8;
IndirectStubsInfo() : NumStubs(0) {}
IndirectStubsInfo(IndirectStubsInfo &&Other)
: NumStubs(Other.NumStubs), StubsMem(std::move(Other.StubsMem)) {
Other.NumStubs = 0;
}
- IndirectStubsInfo& operator=(IndirectStubsInfo &&Other) {
+ IndirectStubsInfo &operator=(IndirectStubsInfo &&Other) {
NumStubs = Other.NumStubs;
Other.NumStubs = 0;
StubsMem = std::move(Other.StubsMem);
@@ -70,17 +114,18 @@ public:
/// @brief Get a pointer to the stub at the given index, which must be in
/// the range 0 .. getNumStubs() - 1.
- void* getStub(unsigned Idx) const {
- return static_cast<uint64_t*>(StubsMem.base()) + Idx;
+ void *getStub(unsigned Idx) const {
+ return static_cast<uint64_t *>(StubsMem.base()) + Idx;
}
/// @brief Get a pointer to the implementation-pointer at the given index,
/// which must be in the range 0 .. getNumStubs() - 1.
- void** getPtr(unsigned Idx) const {
+ void **getPtr(unsigned Idx) const {
char *PtrsBase =
- static_cast<char*>(StubsMem.base()) + NumStubs * StubSize;
- return reinterpret_cast<void**>(PtrsBase) + Idx;
+ static_cast<char *>(StubsMem.base()) + NumStubs * StubSize;
+ return reinterpret_cast<void **>(PtrsBase) + Idx;
}
+
private:
unsigned NumStubs;
sys::OwningMemoryBlock StubsMem;
@@ -100,4 +145,4 @@ public:
} // End namespace orc.
} // End namespace llvm.
-#endif // LLVM_EXECUTIONENGINE_ORC_ORCTARGETSUPPORT_H
+#endif // LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H
diff --git a/include/llvm/ExecutionEngine/Orc/OrcError.h b/include/llvm/ExecutionEngine/Orc/OrcError.h
new file mode 100644
index 000000000000..48f35d6b39be
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/OrcError.h
@@ -0,0 +1,37 @@
+//===------ OrcError.h - Reject symbol lookup requests ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Define an error category, error codes, and helper utilities for Orc.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCERROR_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCERROR_H
+
+#include <system_error>
+
+namespace llvm {
+namespace orc {
+
+enum class OrcErrorCode : int {
+ // RPC Errors
+ RemoteAllocatorDoesNotExist = 1,
+ RemoteAllocatorIdAlreadyInUse,
+ RemoteMProtectAddrUnrecognized,
+ RemoteIndirectStubsOwnerDoesNotExist,
+ RemoteIndirectStubsOwnerIdAlreadyInUse,
+ UnexpectedRPCCall
+};
+
+std::error_code orcError(OrcErrorCode ErrCode);
+
+} // End namespace orc.
+} // End namespace llvm.
+
+#endif // LLVM_EXECUTIONENGINE_ORC_ORCERROR_H
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
new file mode 100644
index 000000000000..d7640b8e8b5f
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h
@@ -0,0 +1,784 @@
+//===---- OrcRemoteTargetClient.h - Orc Remote-target Client ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the OrcRemoteTargetClient class and helpers. This class
+// can be used to communicate over an RPCChannel with an OrcRemoteTargetServer
+// instance to support remote-JITing.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETCLIENT_H
+
+#include "IndirectionUtils.h"
+#include "OrcRemoteTargetRPCAPI.h"
+#include <system_error>
+
+#define DEBUG_TYPE "orc-remote"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+/// This class provides utilities (including memory manager, indirect stubs
+/// manager, and compile callback manager types) that support remote JITing
+/// in ORC.
+///
+/// Each of the utility classes talks to a JIT server (an instance of the
+/// OrcRemoteTargetServer class) via an RPC system (see RPCUtils.h) to carry out
+/// its actions.
+template <typename ChannelT>
+class OrcRemoteTargetClient : public OrcRemoteTargetRPCAPI {
+public:
+ /// Remote memory manager.
+ class RCMemoryManager : public RuntimeDyld::MemoryManager {
+ public:
+ RCMemoryManager(OrcRemoteTargetClient &Client, ResourceIdMgr::ResourceId Id)
+ : Client(Client), Id(Id) {
+ DEBUG(dbgs() << "Created remote allocator " << Id << "\n");
+ }
+
+ RCMemoryManager(RCMemoryManager &&Other)
+ : Client(std::move(Other.Client)), Id(std::move(Other.Id)),
+ Unmapped(std::move(Other.Unmapped)),
+ Unfinalized(std::move(Other.Unfinalized)) {}
+
+ RCMemoryManager operator=(RCMemoryManager &&Other) {
+ Client = std::move(Other.Client);
+ Id = std::move(Other.Id);
+ Unmapped = std::move(Other.Unmapped);
+ Unfinalized = std::move(Other.Unfinalized);
+ return *this;
+ }
+
+ ~RCMemoryManager() {
+ Client.destroyRemoteAllocator(Id);
+ DEBUG(dbgs() << "Destroyed remote allocator " << Id << "\n");
+ }
+
+ uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName) override {
+ Unmapped.back().CodeAllocs.emplace_back(Size, Alignment);
+ uint8_t *Alloc = reinterpret_cast<uint8_t *>(
+ Unmapped.back().CodeAllocs.back().getLocalAddress());
+ DEBUG(dbgs() << "Allocator " << Id << " allocated code for "
+ << SectionName << ": " << Alloc << " (" << Size
+ << " bytes, alignment " << Alignment << ")\n");
+ return Alloc;
+ }
+
+ uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID, StringRef SectionName,
+ bool IsReadOnly) override {
+ if (IsReadOnly) {
+ Unmapped.back().RODataAllocs.emplace_back(Size, Alignment);
+ uint8_t *Alloc = reinterpret_cast<uint8_t *>(
+ Unmapped.back().RODataAllocs.back().getLocalAddress());
+ DEBUG(dbgs() << "Allocator " << Id << " allocated ro-data for "
+ << SectionName << ": " << Alloc << " (" << Size
+ << " bytes, alignment " << Alignment << ")\n");
+ return Alloc;
+ } // else...
+
+ Unmapped.back().RWDataAllocs.emplace_back(Size, Alignment);
+ uint8_t *Alloc = reinterpret_cast<uint8_t *>(
+ Unmapped.back().RWDataAllocs.back().getLocalAddress());
+ DEBUG(dbgs() << "Allocator " << Id << " allocated rw-data for "
+ << SectionName << ": " << Alloc << " (" << Size
+ << " bytes, alignment " << Alignment << ")\n");
+ return Alloc;
+ }
+
+ void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+ uintptr_t RODataSize, uint32_t RODataAlign,
+ uintptr_t RWDataSize,
+ uint32_t RWDataAlign) override {
+ Unmapped.push_back(ObjectAllocs());
+
+ DEBUG(dbgs() << "Allocator " << Id << " reserved:\n");
+
+ if (CodeSize != 0) {
+ std::error_code EC = Client.reserveMem(Unmapped.back().RemoteCodeAddr,
+ Id, CodeSize, CodeAlign);
+ // FIXME; Add error to poll.
+ assert(!EC && "Failed reserving remote memory.");
+ (void)EC;
+ DEBUG(dbgs() << " code: "
+ << format("0x%016x", Unmapped.back().RemoteCodeAddr)
+ << " (" << CodeSize << " bytes, alignment " << CodeAlign
+ << ")\n");
+ }
+
+ if (RODataSize != 0) {
+ std::error_code EC = Client.reserveMem(Unmapped.back().RemoteRODataAddr,
+ Id, RODataSize, RODataAlign);
+ // FIXME; Add error to poll.
+ assert(!EC && "Failed reserving remote memory.");
+ (void)EC;
+ DEBUG(dbgs() << " ro-data: "
+ << format("0x%016x", Unmapped.back().RemoteRODataAddr)
+ << " (" << RODataSize << " bytes, alignment "
+ << RODataAlign << ")\n");
+ }
+
+ if (RWDataSize != 0) {
+ std::error_code EC = Client.reserveMem(Unmapped.back().RemoteRWDataAddr,
+ Id, RWDataSize, RWDataAlign);
+ // FIXME; Add error to poll.
+ assert(!EC && "Failed reserving remote memory.");
+ (void)EC;
+ DEBUG(dbgs() << " rw-data: "
+ << format("0x%016x", Unmapped.back().RemoteRWDataAddr)
+ << " (" << RWDataSize << " bytes, alignment "
+ << RWDataAlign << ")\n");
+ }
+ }
+
+ bool needsToReserveAllocationSpace() override { return true; }
+
+ void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+ size_t Size) override {}
+
+ void deregisterEHFrames(uint8_t *addr, uint64_t LoadAddr,
+ size_t Size) override {}
+
+ void notifyObjectLoaded(RuntimeDyld &Dyld,
+ const object::ObjectFile &Obj) override {
+ DEBUG(dbgs() << "Allocator " << Id << " applied mappings:\n");
+ for (auto &ObjAllocs : Unmapped) {
+ {
+ TargetAddress NextCodeAddr = ObjAllocs.RemoteCodeAddr;
+ for (auto &Alloc : ObjAllocs.CodeAllocs) {
+ NextCodeAddr = RoundUpToAlignment(NextCodeAddr, Alloc.getAlign());
+ Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextCodeAddr);
+ DEBUG(dbgs() << " code: "
+ << static_cast<void *>(Alloc.getLocalAddress())
+ << " -> " << format("0x%016x", NextCodeAddr) << "\n");
+ Alloc.setRemoteAddress(NextCodeAddr);
+ NextCodeAddr += Alloc.getSize();
+ }
+ }
+ {
+ TargetAddress NextRODataAddr = ObjAllocs.RemoteRODataAddr;
+ for (auto &Alloc : ObjAllocs.RODataAllocs) {
+ NextRODataAddr =
+ RoundUpToAlignment(NextRODataAddr, Alloc.getAlign());
+ Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextRODataAddr);
+ DEBUG(dbgs() << " ro-data: "
+ << static_cast<void *>(Alloc.getLocalAddress())
+ << " -> " << format("0x%016x", NextRODataAddr)
+ << "\n");
+ Alloc.setRemoteAddress(NextRODataAddr);
+ NextRODataAddr += Alloc.getSize();
+ }
+ }
+ {
+ TargetAddress NextRWDataAddr = ObjAllocs.RemoteRWDataAddr;
+ for (auto &Alloc : ObjAllocs.RWDataAllocs) {
+ NextRWDataAddr =
+ RoundUpToAlignment(NextRWDataAddr, Alloc.getAlign());
+ Dyld.mapSectionAddress(Alloc.getLocalAddress(), NextRWDataAddr);
+ DEBUG(dbgs() << " rw-data: "
+ << static_cast<void *>(Alloc.getLocalAddress())
+ << " -> " << format("0x%016x", NextRWDataAddr)
+ << "\n");
+ Alloc.setRemoteAddress(NextRWDataAddr);
+ NextRWDataAddr += Alloc.getSize();
+ }
+ }
+ Unfinalized.push_back(std::move(ObjAllocs));
+ }
+ Unmapped.clear();
+ }
+
+ bool finalizeMemory(std::string *ErrMsg = nullptr) override {
+ DEBUG(dbgs() << "Allocator " << Id << " finalizing:\n");
+
+ for (auto &ObjAllocs : Unfinalized) {
+
+ for (auto &Alloc : ObjAllocs.CodeAllocs) {
+ DEBUG(dbgs() << " copying code: "
+ << static_cast<void *>(Alloc.getLocalAddress()) << " -> "
+ << format("0x%016x", Alloc.getRemoteAddress()) << " ("
+ << Alloc.getSize() << " bytes)\n");
+ Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
+ Alloc.getSize());
+ }
+
+ if (ObjAllocs.RemoteCodeAddr) {
+ DEBUG(dbgs() << " setting R-X permissions on code block: "
+ << format("0x%016x", ObjAllocs.RemoteCodeAddr) << "\n");
+ Client.setProtections(Id, ObjAllocs.RemoteCodeAddr,
+ sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+ }
+
+ for (auto &Alloc : ObjAllocs.RODataAllocs) {
+ DEBUG(dbgs() << " copying ro-data: "
+ << static_cast<void *>(Alloc.getLocalAddress()) << " -> "
+ << format("0x%016x", Alloc.getRemoteAddress()) << " ("
+ << Alloc.getSize() << " bytes)\n");
+ Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
+ Alloc.getSize());
+ }
+
+ if (ObjAllocs.RemoteRODataAddr) {
+ DEBUG(dbgs() << " setting R-- permissions on ro-data block: "
+ << format("0x%016x", ObjAllocs.RemoteRODataAddr)
+ << "\n");
+ Client.setProtections(Id, ObjAllocs.RemoteRODataAddr,
+ sys::Memory::MF_READ);
+ }
+
+ for (auto &Alloc : ObjAllocs.RWDataAllocs) {
+ DEBUG(dbgs() << " copying rw-data: "
+ << static_cast<void *>(Alloc.getLocalAddress()) << " -> "
+ << format("0x%016x", Alloc.getRemoteAddress()) << " ("
+ << Alloc.getSize() << " bytes)\n");
+ Client.writeMem(Alloc.getRemoteAddress(), Alloc.getLocalAddress(),
+ Alloc.getSize());
+ }
+
+ if (ObjAllocs.RemoteRWDataAddr) {
+ DEBUG(dbgs() << " setting RW- permissions on rw-data block: "
+ << format("0x%016x", ObjAllocs.RemoteRWDataAddr)
+ << "\n");
+ Client.setProtections(Id, ObjAllocs.RemoteRWDataAddr,
+ sys::Memory::MF_READ | sys::Memory::MF_WRITE);
+ }
+ }
+ Unfinalized.clear();
+
+ return false;
+ }
+
+ private:
+ class Alloc {
+ public:
+ Alloc(uint64_t Size, unsigned Align)
+ : Size(Size), Align(Align), Contents(new char[Size + Align - 1]),
+ RemoteAddr(0) {}
+
+ Alloc(Alloc &&Other)
+ : Size(std::move(Other.Size)), Align(std::move(Other.Align)),
+ Contents(std::move(Other.Contents)),
+ RemoteAddr(std::move(Other.RemoteAddr)) {}
+
+ Alloc &operator=(Alloc &&Other) {
+ Size = std::move(Other.Size);
+ Align = std::move(Other.Align);
+ Contents = std::move(Other.Contents);
+ RemoteAddr = std::move(Other.RemoteAddr);
+ return *this;
+ }
+
+ uint64_t getSize() const { return Size; }
+
+ unsigned getAlign() const { return Align; }
+
+ char *getLocalAddress() const {
+ uintptr_t LocalAddr = reinterpret_cast<uintptr_t>(Contents.get());
+ LocalAddr = RoundUpToAlignment(LocalAddr, Align);
+ return reinterpret_cast<char *>(LocalAddr);
+ }
+
+ void setRemoteAddress(TargetAddress RemoteAddr) {
+ this->RemoteAddr = RemoteAddr;
+ }
+
+ TargetAddress getRemoteAddress() const { return RemoteAddr; }
+
+ private:
+ uint64_t Size;
+ unsigned Align;
+ std::unique_ptr<char[]> Contents;
+ TargetAddress RemoteAddr;
+ };
+
+ struct ObjectAllocs {
+ ObjectAllocs()
+ : RemoteCodeAddr(0), RemoteRODataAddr(0), RemoteRWDataAddr(0) {}
+
+ ObjectAllocs(ObjectAllocs &&Other)
+ : RemoteCodeAddr(std::move(Other.RemoteCodeAddr)),
+ RemoteRODataAddr(std::move(Other.RemoteRODataAddr)),
+ RemoteRWDataAddr(std::move(Other.RemoteRWDataAddr)),
+ CodeAllocs(std::move(Other.CodeAllocs)),
+ RODataAllocs(std::move(Other.RODataAllocs)),
+ RWDataAllocs(std::move(Other.RWDataAllocs)) {}
+
+ ObjectAllocs &operator=(ObjectAllocs &&Other) {
+ RemoteCodeAddr = std::move(Other.RemoteCodeAddr);
+ RemoteRODataAddr = std::move(Other.RemoteRODataAddr);
+ RemoteRWDataAddr = std::move(Other.RemoteRWDataAddr);
+ CodeAllocs = std::move(Other.CodeAllocs);
+ RODataAllocs = std::move(Other.RODataAllocs);
+ RWDataAllocs = std::move(Other.RWDataAllocs);
+ return *this;
+ }
+
+ TargetAddress RemoteCodeAddr;
+ TargetAddress RemoteRODataAddr;
+ TargetAddress RemoteRWDataAddr;
+ std::vector<Alloc> CodeAllocs, RODataAllocs, RWDataAllocs;
+ };
+
+ OrcRemoteTargetClient &Client;
+ ResourceIdMgr::ResourceId Id;
+ std::vector<ObjectAllocs> Unmapped;
+ std::vector<ObjectAllocs> Unfinalized;
+ };
+
+ /// Remote indirect stubs manager.
+ class RCIndirectStubsManager : public IndirectStubsManager {
+ public:
+ RCIndirectStubsManager(OrcRemoteTargetClient &Remote,
+ ResourceIdMgr::ResourceId Id)
+ : Remote(Remote), Id(Id) {}
+
+ ~RCIndirectStubsManager() { Remote.destroyIndirectStubsManager(Id); }
+
+ std::error_code createStub(StringRef StubName, TargetAddress StubAddr,
+ JITSymbolFlags StubFlags) override {
+ if (auto EC = reserveStubs(1))
+ return EC;
+
+ return createStubInternal(StubName, StubAddr, StubFlags);
+ }
+
+ std::error_code createStubs(const StubInitsMap &StubInits) override {
+ if (auto EC = reserveStubs(StubInits.size()))
+ return EC;
+
+ for (auto &Entry : StubInits)
+ if (auto EC = createStubInternal(Entry.first(), Entry.second.first,
+ Entry.second.second))
+ return EC;
+
+ return std::error_code();
+ }
+
+ JITSymbol findStub(StringRef Name, bool ExportedStubsOnly) override {
+ auto I = StubIndexes.find(Name);
+ if (I == StubIndexes.end())
+ return nullptr;
+ auto Key = I->second.first;
+ auto Flags = I->second.second;
+ auto StubSymbol = JITSymbol(getStubAddr(Key), Flags);
+ if (ExportedStubsOnly && !StubSymbol.isExported())
+ return nullptr;
+ return StubSymbol;
+ }
+
+ JITSymbol findPointer(StringRef Name) override {
+ auto I = StubIndexes.find(Name);
+ if (I == StubIndexes.end())
+ return nullptr;
+ auto Key = I->second.first;
+ auto Flags = I->second.second;
+ return JITSymbol(getPtrAddr(Key), Flags);
+ }
+
+ std::error_code updatePointer(StringRef Name,
+ TargetAddress NewAddr) override {
+ auto I = StubIndexes.find(Name);
+ assert(I != StubIndexes.end() && "No stub pointer for symbol");
+ auto Key = I->second.first;
+ return Remote.writePointer(getPtrAddr(Key), NewAddr);
+ }
+
+ private:
+ struct RemoteIndirectStubsInfo {
+ RemoteIndirectStubsInfo(TargetAddress StubBase, TargetAddress PtrBase,
+ unsigned NumStubs)
+ : StubBase(StubBase), PtrBase(PtrBase), NumStubs(NumStubs) {}
+ TargetAddress StubBase;
+ TargetAddress PtrBase;
+ unsigned NumStubs;
+ };
+
+ OrcRemoteTargetClient &Remote;
+ ResourceIdMgr::ResourceId Id;
+ std::vector<RemoteIndirectStubsInfo> RemoteIndirectStubsInfos;
+ typedef std::pair<uint16_t, uint16_t> StubKey;
+ std::vector<StubKey> FreeStubs;
+ StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
+
+ std::error_code reserveStubs(unsigned NumStubs) {
+ if (NumStubs <= FreeStubs.size())
+ return std::error_code();
+
+ unsigned NewStubsRequired = NumStubs - FreeStubs.size();
+ TargetAddress StubBase;
+ TargetAddress PtrBase;
+ unsigned NumStubsEmitted;
+
+ Remote.emitIndirectStubs(StubBase, PtrBase, NumStubsEmitted, Id,
+ NewStubsRequired);
+
+ unsigned NewBlockId = RemoteIndirectStubsInfos.size();
+ RemoteIndirectStubsInfos.push_back(
+ RemoteIndirectStubsInfo(StubBase, PtrBase, NumStubsEmitted));
+
+ for (unsigned I = 0; I < NumStubsEmitted; ++I)
+ FreeStubs.push_back(std::make_pair(NewBlockId, I));
+
+ return std::error_code();
+ }
+
+ std::error_code createStubInternal(StringRef StubName,
+ TargetAddress InitAddr,
+ JITSymbolFlags StubFlags) {
+ auto Key = FreeStubs.back();
+ FreeStubs.pop_back();
+ StubIndexes[StubName] = std::make_pair(Key, StubFlags);
+ return Remote.writePointer(getPtrAddr(Key), InitAddr);
+ }
+
+ TargetAddress getStubAddr(StubKey K) {
+ assert(RemoteIndirectStubsInfos[K.first].StubBase != 0 &&
+ "Missing stub address");
+ return RemoteIndirectStubsInfos[K.first].StubBase +
+ K.second * Remote.getIndirectStubSize();
+ }
+
+ TargetAddress getPtrAddr(StubKey K) {
+ assert(RemoteIndirectStubsInfos[K.first].PtrBase != 0 &&
+ "Missing pointer address");
+ return RemoteIndirectStubsInfos[K.first].PtrBase +
+ K.second * Remote.getPointerSize();
+ }
+ };
+
+ /// Remote compile callback manager.
+ class RCCompileCallbackManager : public JITCompileCallbackManager {
+ public:
+ RCCompileCallbackManager(TargetAddress ErrorHandlerAddress,
+ OrcRemoteTargetClient &Remote)
+ : JITCompileCallbackManager(ErrorHandlerAddress), Remote(Remote) {
+ assert(!Remote.CompileCallback && "Compile callback already set");
+ Remote.CompileCallback = [this](TargetAddress TrampolineAddr) {
+ return executeCompileCallback(TrampolineAddr);
+ };
+ Remote.emitResolverBlock();
+ }
+
+ private:
+ void grow() {
+ TargetAddress BlockAddr = 0;
+ uint32_t NumTrampolines = 0;
+ auto EC = Remote.emitTrampolineBlock(BlockAddr, NumTrampolines);
+ assert(!EC && "Failed to create trampolines");
+
+ uint32_t TrampolineSize = Remote.getTrampolineSize();
+ for (unsigned I = 0; I < NumTrampolines; ++I)
+ this->AvailableTrampolines.push_back(BlockAddr + (I * TrampolineSize));
+ }
+
+ OrcRemoteTargetClient &Remote;
+ };
+
+ /// Create an OrcRemoteTargetClient.
+ /// Channel is the ChannelT instance to communicate on. It is assumed that
+ /// the channel is ready to be read from and written to.
+ static ErrorOr<OrcRemoteTargetClient> Create(ChannelT &Channel) {
+ std::error_code EC;
+ OrcRemoteTargetClient H(Channel, EC);
+ if (EC)
+ return EC;
+ return H;
+ }
+
+ /// Call the int(void) function at the given address in the target and return
+ /// its result.
+ std::error_code callIntVoid(int &Result, TargetAddress Addr) {
+ DEBUG(dbgs() << "Calling int(*)(void) " << format("0x%016x", Addr) << "\n");
+
+ if (auto EC = call<CallIntVoid>(Channel, Addr))
+ return EC;
+
+ unsigned NextProcId;
+ if (auto EC = listenForCompileRequests(NextProcId))
+ return EC;
+
+ if (NextProcId != CallIntVoidResponseId)
+ return orcError(OrcErrorCode::UnexpectedRPCCall);
+
+ return handle<CallIntVoidResponse>(Channel, [&](int R) {
+ Result = R;
+ DEBUG(dbgs() << "Result: " << R << "\n");
+ return std::error_code();
+ });
+ }
+
+ /// Call the int(int, char*[]) function at the given address in the target and
+ /// return its result.
+ std::error_code callMain(int &Result, TargetAddress Addr,
+ const std::vector<std::string> &Args) {
+ DEBUG(dbgs() << "Calling int(*)(int, char*[]) " << format("0x%016x", Addr)
+ << "\n");
+
+ if (auto EC = call<CallMain>(Channel, Addr, Args))
+ return EC;
+
+ unsigned NextProcId;
+ if (auto EC = listenForCompileRequests(NextProcId))
+ return EC;
+
+ if (NextProcId != CallMainResponseId)
+ return orcError(OrcErrorCode::UnexpectedRPCCall);
+
+ return handle<CallMainResponse>(Channel, [&](int R) {
+ Result = R;
+ DEBUG(dbgs() << "Result: " << R << "\n");
+ return std::error_code();
+ });
+ }
+
+ /// Call the void() function at the given address in the target and wait for
+ /// it to finish.
+ std::error_code callVoidVoid(TargetAddress Addr) {
+ DEBUG(dbgs() << "Calling void(*)(void) " << format("0x%016x", Addr)
+ << "\n");
+
+ if (auto EC = call<CallVoidVoid>(Channel, Addr))
+ return EC;
+
+ unsigned NextProcId;
+ if (auto EC = listenForCompileRequests(NextProcId))
+ return EC;
+
+ if (NextProcId != CallVoidVoidResponseId)
+ return orcError(OrcErrorCode::UnexpectedRPCCall);
+
+ return handle<CallVoidVoidResponse>(Channel, doNothing);
+ }
+
+ /// Create an RCMemoryManager which will allocate its memory on the remote
+ /// target.
+ std::error_code
+ createRemoteMemoryManager(std::unique_ptr<RCMemoryManager> &MM) {
+ assert(!MM && "MemoryManager should be null before creation.");
+
+ auto Id = AllocatorIds.getNext();
+ if (auto EC = call<CreateRemoteAllocator>(Channel, Id))
+ return EC;
+ MM = llvm::make_unique<RCMemoryManager>(*this, Id);
+ return std::error_code();
+ }
+
+ /// Create an RCIndirectStubsManager that will allocate stubs on the remote
+ /// target.
+ std::error_code
+ createIndirectStubsManager(std::unique_ptr<RCIndirectStubsManager> &I) {
+ assert(!I && "Indirect stubs manager should be null before creation.");
+ auto Id = IndirectStubOwnerIds.getNext();
+ if (auto EC = call<CreateIndirectStubsOwner>(Channel, Id))
+ return EC;
+ I = llvm::make_unique<RCIndirectStubsManager>(*this, Id);
+ return std::error_code();
+ }
+
+ /// Search for symbols in the remote process. Note: This should be used by
+ /// symbol resolvers *after* they've searched the local symbol table in the
+ /// JIT stack.
+ std::error_code getSymbolAddress(TargetAddress &Addr, StringRef Name) {
+ // Check for an 'out-of-band' error, e.g. from an MM destructor.
+ if (ExistingError)
+ return ExistingError;
+
+ // Request remote symbol address.
+ if (auto EC = call<GetSymbolAddress>(Channel, Name))
+ return EC;
+
+ return expect<GetSymbolAddressResponse>(Channel, [&](TargetAddress &A) {
+ Addr = A;
+ DEBUG(dbgs() << "Remote address lookup " << Name << " = "
+ << format("0x%016x", Addr) << "\n");
+ return std::error_code();
+ });
+ }
+
+ /// Get the triple for the remote target.
+ const std::string &getTargetTriple() const { return RemoteTargetTriple; }
+
+ std::error_code terminateSession() { return call<TerminateSession>(Channel); }
+
+private:
+ OrcRemoteTargetClient(ChannelT &Channel, std::error_code &EC)
+ : Channel(Channel), RemotePointerSize(0), RemotePageSize(0),
+ RemoteTrampolineSize(0), RemoteIndirectStubSize(0) {
+ if ((EC = call<GetRemoteInfo>(Channel)))
+ return;
+
+ EC = expect<GetRemoteInfoResponse>(
+ Channel, readArgs(RemoteTargetTriple, RemotePointerSize, RemotePageSize,
+ RemoteTrampolineSize, RemoteIndirectStubSize));
+ }
+
+ void destroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
+ if (auto EC = call<DestroyRemoteAllocator>(Channel, Id)) {
+ // FIXME: This will be triggered by a removeModuleSet call: Propagate
+ // error return up through that.
+ llvm_unreachable("Failed to destroy remote allocator.");
+ AllocatorIds.release(Id);
+ }
+ }
+
+ std::error_code destroyIndirectStubsManager(ResourceIdMgr::ResourceId Id) {
+ IndirectStubOwnerIds.release(Id);
+ return call<DestroyIndirectStubsOwner>(Channel, Id);
+ }
+
+ std::error_code emitIndirectStubs(TargetAddress &StubBase,
+ TargetAddress &PtrBase,
+ uint32_t &NumStubsEmitted,
+ ResourceIdMgr::ResourceId Id,
+ uint32_t NumStubsRequired) {
+ if (auto EC = call<EmitIndirectStubs>(Channel, Id, NumStubsRequired))
+ return EC;
+
+ return expect<EmitIndirectStubsResponse>(
+ Channel, readArgs(StubBase, PtrBase, NumStubsEmitted));
+ }
+
+ std::error_code emitResolverBlock() {
+ // Check for an 'out-of-band' error, e.g. from an MM destructor.
+ if (ExistingError)
+ return ExistingError;
+
+ return call<EmitResolverBlock>(Channel);
+ }
+
+ std::error_code emitTrampolineBlock(TargetAddress &BlockAddr,
+ uint32_t &NumTrampolines) {
+ // Check for an 'out-of-band' error, e.g. from an MM destructor.
+ if (ExistingError)
+ return ExistingError;
+
+ if (auto EC = call<EmitTrampolineBlock>(Channel))
+ return EC;
+
+ return expect<EmitTrampolineBlockResponse>(
+ Channel, [&](TargetAddress BAddr, uint32_t NTrampolines) {
+ BlockAddr = BAddr;
+ NumTrampolines = NTrampolines;
+ return std::error_code();
+ });
+ }
+
+ uint32_t getIndirectStubSize() const { return RemoteIndirectStubSize; }
+ uint32_t getPageSize() const { return RemotePageSize; }
+ uint32_t getPointerSize() const { return RemotePointerSize; }
+
+ uint32_t getTrampolineSize() const { return RemoteTrampolineSize; }
+
+ std::error_code listenForCompileRequests(uint32_t &NextId) {
+ // Check for an 'out-of-band' error, e.g. from an MM destructor.
+ if (ExistingError)
+ return ExistingError;
+
+ if (auto EC = getNextProcId(Channel, NextId))
+ return EC;
+
+ while (NextId == RequestCompileId) {
+ TargetAddress TrampolineAddr = 0;
+ if (auto EC = handle<RequestCompile>(Channel, readArgs(TrampolineAddr)))
+ return EC;
+
+ TargetAddress ImplAddr = CompileCallback(TrampolineAddr);
+ if (auto EC = call<RequestCompileResponse>(Channel, ImplAddr))
+ return EC;
+
+ if (auto EC = getNextProcId(Channel, NextId))
+ return EC;
+ }
+
+ return std::error_code();
+ }
+
+ std::error_code readMem(char *Dst, TargetAddress Src, uint64_t Size) {
+ // Check for an 'out-of-band' error, e.g. from an MM destructor.
+ if (ExistingError)
+ return ExistingError;
+
+ if (auto EC = call<ReadMem>(Channel, Src, Size))
+ return EC;
+
+ if (auto EC = expect<ReadMemResponse>(
+ Channel, [&]() { return Channel.readBytes(Dst, Size); }))
+ return EC;
+
+ return std::error_code();
+ }
+
+ std::error_code reserveMem(TargetAddress &RemoteAddr,
+ ResourceIdMgr::ResourceId Id, uint64_t Size,
+ uint32_t Align) {
+
+ // Check for an 'out-of-band' error, e.g. from an MM destructor.
+ if (ExistingError)
+ return ExistingError;
+
+ if (std::error_code EC = call<ReserveMem>(Channel, Id, Size, Align))
+ return EC;
+
+ return expect<ReserveMemResponse>(Channel, readArgs(RemoteAddr));
+ }
+
+ std::error_code setProtections(ResourceIdMgr::ResourceId Id,
+ TargetAddress RemoteSegAddr,
+ unsigned ProtFlags) {
+ return call<SetProtections>(Channel, Id, RemoteSegAddr, ProtFlags);
+ }
+
+ std::error_code writeMem(TargetAddress Addr, const char *Src, uint64_t Size) {
+ // Check for an 'out-of-band' error, e.g. from an MM destructor.
+ if (ExistingError)
+ return ExistingError;
+
+ // Make the send call.
+ if (auto EC = call<WriteMem>(Channel, Addr, Size))
+ return EC;
+
+ // Follow this up with the section contents.
+ if (auto EC = Channel.appendBytes(Src, Size))
+ return EC;
+
+ return Channel.send();
+ }
+
+ std::error_code writePointer(TargetAddress Addr, TargetAddress PtrVal) {
+ // Check for an 'out-of-band' error, e.g. from an MM destructor.
+ if (ExistingError)
+ return ExistingError;
+
+ return call<WritePtr>(Channel, Addr, PtrVal);
+ }
+
+ static std::error_code doNothing() { return std::error_code(); }
+
+ ChannelT &Channel;
+ std::error_code ExistingError;
+ std::string RemoteTargetTriple;
+ uint32_t RemotePointerSize;
+ uint32_t RemotePageSize;
+ uint32_t RemoteTrampolineSize;
+ uint32_t RemoteIndirectStubSize;
+ ResourceIdMgr AllocatorIds, IndirectStubOwnerIds;
+ std::function<TargetAddress(TargetAddress)> CompileCallback;
+};
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
new file mode 100644
index 000000000000..96dc24251026
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h
@@ -0,0 +1,185 @@
+//===--- OrcRemoteTargetRPCAPI.h - Orc Remote-target RPC API ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Orc remote-target RPC API. It should not be used
+// directly, but is used by the RemoteTargetClient and RemoteTargetServer
+// classes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETRPCAPI_H
+
+#include "JITSymbol.h"
+#include "RPCChannel.h"
+#include "RPCUtils.h"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+class OrcRemoteTargetRPCAPI : public RPC<RPCChannel> {
+protected:
+ class ResourceIdMgr {
+ public:
+ typedef uint64_t ResourceId;
+ ResourceIdMgr() : NextId(0) {}
+ ResourceId getNext() {
+ if (!FreeIds.empty()) {
+ ResourceId I = FreeIds.back();
+ FreeIds.pop_back();
+ return I;
+ }
+ return NextId++;
+ }
+ void release(ResourceId I) { FreeIds.push_back(I); }
+
+ private:
+ ResourceId NextId;
+ std::vector<ResourceId> FreeIds;
+ };
+
+public:
+ enum JITProcId : uint32_t {
+ InvalidId = 0,
+ CallIntVoidId,
+ CallIntVoidResponseId,
+ CallMainId,
+ CallMainResponseId,
+ CallVoidVoidId,
+ CallVoidVoidResponseId,
+ CreateRemoteAllocatorId,
+ CreateIndirectStubsOwnerId,
+ DestroyRemoteAllocatorId,
+ DestroyIndirectStubsOwnerId,
+ EmitIndirectStubsId,
+ EmitIndirectStubsResponseId,
+ EmitResolverBlockId,
+ EmitTrampolineBlockId,
+ EmitTrampolineBlockResponseId,
+ GetSymbolAddressId,
+ GetSymbolAddressResponseId,
+ GetRemoteInfoId,
+ GetRemoteInfoResponseId,
+ ReadMemId,
+ ReadMemResponseId,
+ ReserveMemId,
+ ReserveMemResponseId,
+ RequestCompileId,
+ RequestCompileResponseId,
+ SetProtectionsId,
+ TerminateSessionId,
+ WriteMemId,
+ WritePtrId
+ };
+
+ static const char *getJITProcIdName(JITProcId Id);
+
+ typedef Procedure<CallIntVoidId, TargetAddress /* FnAddr */> CallIntVoid;
+
+ typedef Procedure<CallIntVoidResponseId, int /* Result */>
+ CallIntVoidResponse;
+
+ typedef Procedure<CallMainId, TargetAddress /* FnAddr */,
+ std::vector<std::string> /* Args */>
+ CallMain;
+
+ typedef Procedure<CallMainResponseId, int /* Result */> CallMainResponse;
+
+ typedef Procedure<CallVoidVoidId, TargetAddress /* FnAddr */> CallVoidVoid;
+
+ typedef Procedure<CallVoidVoidResponseId> CallVoidVoidResponse;
+
+ typedef Procedure<CreateRemoteAllocatorId,
+ ResourceIdMgr::ResourceId /* Allocator ID */>
+ CreateRemoteAllocator;
+
+ typedef Procedure<CreateIndirectStubsOwnerId,
+ ResourceIdMgr::ResourceId /* StubsOwner ID */>
+ CreateIndirectStubsOwner;
+
+ typedef Procedure<DestroyRemoteAllocatorId,
+ ResourceIdMgr::ResourceId /* Allocator ID */>
+ DestroyRemoteAllocator;
+
+ typedef Procedure<DestroyIndirectStubsOwnerId,
+ ResourceIdMgr::ResourceId /* StubsOwner ID */>
+ DestroyIndirectStubsOwner;
+
+ typedef Procedure<EmitIndirectStubsId,
+ ResourceIdMgr::ResourceId /* StubsOwner ID */,
+ uint32_t /* NumStubsRequired */>
+ EmitIndirectStubs;
+
+ typedef Procedure<
+ EmitIndirectStubsResponseId, TargetAddress /* StubsBaseAddr */,
+ TargetAddress /* PtrsBaseAddr */, uint32_t /* NumStubsEmitted */>
+ EmitIndirectStubsResponse;
+
+ typedef Procedure<EmitResolverBlockId> EmitResolverBlock;
+
+ typedef Procedure<EmitTrampolineBlockId> EmitTrampolineBlock;
+
+ typedef Procedure<EmitTrampolineBlockResponseId,
+ TargetAddress /* BlockAddr */,
+ uint32_t /* NumTrampolines */>
+ EmitTrampolineBlockResponse;
+
+ typedef Procedure<GetSymbolAddressId, std::string /*SymbolName*/>
+ GetSymbolAddress;
+
+ typedef Procedure<GetSymbolAddressResponseId, uint64_t /* SymbolAddr */>
+ GetSymbolAddressResponse;
+
+ typedef Procedure<GetRemoteInfoId> GetRemoteInfo;
+
+ typedef Procedure<GetRemoteInfoResponseId, std::string /* Triple */,
+ uint32_t /* PointerSize */, uint32_t /* PageSize */,
+ uint32_t /* TrampolineSize */,
+ uint32_t /* IndirectStubSize */>
+ GetRemoteInfoResponse;
+
+ typedef Procedure<ReadMemId, TargetAddress /* Src */, uint64_t /* Size */>
+ ReadMem;
+
+ typedef Procedure<ReadMemResponseId> ReadMemResponse;
+
+ typedef Procedure<ReserveMemId, ResourceIdMgr::ResourceId /* Id */,
+ uint64_t /* Size */, uint32_t /* Align */>
+ ReserveMem;
+
+ typedef Procedure<ReserveMemResponseId, TargetAddress /* Addr */>
+ ReserveMemResponse;
+
+ typedef Procedure<RequestCompileId, TargetAddress /* TrampolineAddr */>
+ RequestCompile;
+
+ typedef Procedure<RequestCompileResponseId, TargetAddress /* ImplAddr */>
+ RequestCompileResponse;
+
+ typedef Procedure<SetProtectionsId, ResourceIdMgr::ResourceId /* Id */,
+ TargetAddress /* Dst */, uint32_t /* ProtFlags */>
+ SetProtections;
+
+ typedef Procedure<TerminateSessionId> TerminateSession;
+
+ typedef Procedure<WriteMemId, TargetAddress /* Dst */, uint64_t /* Size */
+ /* Data should follow */>
+ WriteMem;
+
+ typedef Procedure<WritePtrId, TargetAddress /* Dst */,
+ TargetAddress /* Val */>
+ WritePtr;
+};
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
new file mode 100644
index 000000000000..5247661e49ce
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
@@ -0,0 +1,432 @@
+//===---- OrcRemoteTargetServer.h - Orc Remote-target Server ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the OrcRemoteTargetServer class. It can be used to build a
+// JIT server that can execute code sent from an OrcRemoteTargetClient.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
+#define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
+
+#include "OrcRemoteTargetRPCAPI.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+#include <map>
+
+#define DEBUG_TYPE "orc-remote"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+template <typename ChannelT, typename TargetT>
+class OrcRemoteTargetServer : public OrcRemoteTargetRPCAPI {
+public:
+ typedef std::function<TargetAddress(const std::string &Name)>
+ SymbolLookupFtor;
+
+ OrcRemoteTargetServer(ChannelT &Channel, SymbolLookupFtor SymbolLookup)
+ : Channel(Channel), SymbolLookup(std::move(SymbolLookup)) {}
+
+ std::error_code getNextProcId(JITProcId &Id) {
+ return deserialize(Channel, Id);
+ }
+
+ std::error_code handleKnownProcedure(JITProcId Id) {
+ typedef OrcRemoteTargetServer ThisT;
+
+ DEBUG(dbgs() << "Handling known proc: " << getJITProcIdName(Id) << "\n");
+
+ switch (Id) {
+ case CallIntVoidId:
+ return handle<CallIntVoid>(Channel, *this, &ThisT::handleCallIntVoid);
+ case CallMainId:
+ return handle<CallMain>(Channel, *this, &ThisT::handleCallMain);
+ case CallVoidVoidId:
+ return handle<CallVoidVoid>(Channel, *this, &ThisT::handleCallVoidVoid);
+ case CreateRemoteAllocatorId:
+ return handle<CreateRemoteAllocator>(Channel, *this,
+ &ThisT::handleCreateRemoteAllocator);
+ case CreateIndirectStubsOwnerId:
+ return handle<CreateIndirectStubsOwner>(
+ Channel, *this, &ThisT::handleCreateIndirectStubsOwner);
+ case DestroyRemoteAllocatorId:
+ return handle<DestroyRemoteAllocator>(
+ Channel, *this, &ThisT::handleDestroyRemoteAllocator);
+ case DestroyIndirectStubsOwnerId:
+ return handle<DestroyIndirectStubsOwner>(
+ Channel, *this, &ThisT::handleDestroyIndirectStubsOwner);
+ case EmitIndirectStubsId:
+ return handle<EmitIndirectStubs>(Channel, *this,
+ &ThisT::handleEmitIndirectStubs);
+ case EmitResolverBlockId:
+ return handle<EmitResolverBlock>(Channel, *this,
+ &ThisT::handleEmitResolverBlock);
+ case EmitTrampolineBlockId:
+ return handle<EmitTrampolineBlock>(Channel, *this,
+ &ThisT::handleEmitTrampolineBlock);
+ case GetSymbolAddressId:
+ return handle<GetSymbolAddress>(Channel, *this,
+ &ThisT::handleGetSymbolAddress);
+ case GetRemoteInfoId:
+ return handle<GetRemoteInfo>(Channel, *this, &ThisT::handleGetRemoteInfo);
+ case ReadMemId:
+ return handle<ReadMem>(Channel, *this, &ThisT::handleReadMem);
+ case ReserveMemId:
+ return handle<ReserveMem>(Channel, *this, &ThisT::handleReserveMem);
+ case SetProtectionsId:
+ return handle<SetProtections>(Channel, *this,
+ &ThisT::handleSetProtections);
+ case WriteMemId:
+ return handle<WriteMem>(Channel, *this, &ThisT::handleWriteMem);
+ case WritePtrId:
+ return handle<WritePtr>(Channel, *this, &ThisT::handleWritePtr);
+ default:
+ return orcError(OrcErrorCode::UnexpectedRPCCall);
+ }
+
+ llvm_unreachable("Unhandled JIT RPC procedure Id.");
+ }
+
+ std::error_code requestCompile(TargetAddress &CompiledFnAddr,
+ TargetAddress TrampolineAddr) {
+ if (auto EC = call<RequestCompile>(Channel, TrampolineAddr))
+ return EC;
+
+ while (1) {
+ JITProcId Id = InvalidId;
+ if (auto EC = getNextProcId(Id))
+ return EC;
+
+ switch (Id) {
+ case RequestCompileResponseId:
+ return handle<RequestCompileResponse>(Channel,
+ readArgs(CompiledFnAddr));
+ default:
+ if (auto EC = handleKnownProcedure(Id))
+ return EC;
+ }
+ }
+
+ llvm_unreachable("Fell through request-compile command loop.");
+ }
+
+private:
+ struct Allocator {
+ Allocator() = default;
+ Allocator(Allocator &&Other) : Allocs(std::move(Other.Allocs)) {}
+ Allocator &operator=(Allocator &&Other) {
+ Allocs = std::move(Other.Allocs);
+ return *this;
+ }
+
+ ~Allocator() {
+ for (auto &Alloc : Allocs)
+ sys::Memory::releaseMappedMemory(Alloc.second);
+ }
+
+ std::error_code allocate(void *&Addr, size_t Size, uint32_t Align) {
+ std::error_code EC;
+ sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(
+ Size, nullptr, sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC);
+ if (EC)
+ return EC;
+
+ Addr = MB.base();
+ assert(Allocs.find(MB.base()) == Allocs.end() && "Duplicate alloc");
+ Allocs[MB.base()] = std::move(MB);
+ return std::error_code();
+ }
+
+ std::error_code setProtections(void *block, unsigned Flags) {
+ auto I = Allocs.find(block);
+ if (I == Allocs.end())
+ return orcError(OrcErrorCode::RemoteMProtectAddrUnrecognized);
+ return sys::Memory::protectMappedMemory(I->second, Flags);
+ }
+
+ private:
+ std::map<void *, sys::MemoryBlock> Allocs;
+ };
+
+ static std::error_code doNothing() { return std::error_code(); }
+
+ static TargetAddress reenter(void *JITTargetAddr, void *TrampolineAddr) {
+ TargetAddress CompiledFnAddr = 0;
+
+ auto T = static_cast<OrcRemoteTargetServer *>(JITTargetAddr);
+ auto EC = T->requestCompile(
+ CompiledFnAddr, static_cast<TargetAddress>(
+ reinterpret_cast<uintptr_t>(TrampolineAddr)));
+ assert(!EC && "Compile request failed");
+ (void)EC;
+ return CompiledFnAddr;
+ }
+
+ std::error_code handleCallIntVoid(TargetAddress Addr) {
+ typedef int (*IntVoidFnTy)();
+ IntVoidFnTy Fn =
+ reinterpret_cast<IntVoidFnTy>(static_cast<uintptr_t>(Addr));
+
+ DEBUG(dbgs() << " Calling "
+ << reinterpret_cast<void *>(reinterpret_cast<intptr_t>(Fn))
+ << "\n");
+ int Result = Fn();
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+
+ return call<CallIntVoidResponse>(Channel, Result);
+ }
+
+ std::error_code handleCallMain(TargetAddress Addr,
+ std::vector<std::string> Args) {
+ typedef int (*MainFnTy)(int, const char *[]);
+
+ MainFnTy Fn = reinterpret_cast<MainFnTy>(static_cast<uintptr_t>(Addr));
+ int ArgC = Args.size() + 1;
+ int Idx = 1;
+ std::unique_ptr<const char *[]> ArgV(new const char *[ArgC + 1]);
+ ArgV[0] = "<jit process>";
+ for (auto &Arg : Args)
+ ArgV[Idx++] = Arg.c_str();
+
+ DEBUG(dbgs() << " Calling " << reinterpret_cast<void *>(Fn) << "\n");
+ int Result = Fn(ArgC, ArgV.get());
+ DEBUG(dbgs() << " Result = " << Result << "\n");
+
+ return call<CallMainResponse>(Channel, Result);
+ }
+
+ std::error_code handleCallVoidVoid(TargetAddress Addr) {
+ typedef void (*VoidVoidFnTy)();
+ VoidVoidFnTy Fn =
+ reinterpret_cast<VoidVoidFnTy>(static_cast<uintptr_t>(Addr));
+
+ DEBUG(dbgs() << " Calling " << reinterpret_cast<void *>(Fn) << "\n");
+ Fn();
+ DEBUG(dbgs() << " Complete.\n");
+
+ return call<CallVoidVoidResponse>(Channel);
+ }
+
+ std::error_code handleCreateRemoteAllocator(ResourceIdMgr::ResourceId Id) {
+ auto I = Allocators.find(Id);
+ if (I != Allocators.end())
+ return orcError(OrcErrorCode::RemoteAllocatorIdAlreadyInUse);
+ DEBUG(dbgs() << " Created allocator " << Id << "\n");
+ Allocators[Id] = Allocator();
+ return std::error_code();
+ }
+
+ std::error_code handleCreateIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
+ auto I = IndirectStubsOwners.find(Id);
+ if (I != IndirectStubsOwners.end())
+ return orcError(OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse);
+ DEBUG(dbgs() << " Create indirect stubs owner " << Id << "\n");
+ IndirectStubsOwners[Id] = ISBlockOwnerList();
+ return std::error_code();
+ }
+
+ std::error_code handleDestroyRemoteAllocator(ResourceIdMgr::ResourceId Id) {
+ auto I = Allocators.find(Id);
+ if (I == Allocators.end())
+ return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist);
+ Allocators.erase(I);
+ DEBUG(dbgs() << " Destroyed allocator " << Id << "\n");
+ return std::error_code();
+ }
+
+ std::error_code
+ handleDestroyIndirectStubsOwner(ResourceIdMgr::ResourceId Id) {
+ auto I = IndirectStubsOwners.find(Id);
+ if (I == IndirectStubsOwners.end())
+ return orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist);
+ IndirectStubsOwners.erase(I);
+ return std::error_code();
+ }
+
+ std::error_code handleEmitIndirectStubs(ResourceIdMgr::ResourceId Id,
+ uint32_t NumStubsRequired) {
+ DEBUG(dbgs() << " ISMgr " << Id << " request " << NumStubsRequired
+ << " stubs.\n");
+
+ auto StubOwnerItr = IndirectStubsOwners.find(Id);
+ if (StubOwnerItr == IndirectStubsOwners.end())
+ return orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist);
+
+ typename TargetT::IndirectStubsInfo IS;
+ if (auto EC =
+ TargetT::emitIndirectStubsBlock(IS, NumStubsRequired, nullptr))
+ return EC;
+
+ TargetAddress StubsBase =
+ static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(IS.getStub(0)));
+ TargetAddress PtrsBase =
+ static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(IS.getPtr(0)));
+ uint32_t NumStubsEmitted = IS.getNumStubs();
+
+ auto &BlockList = StubOwnerItr->second;
+ BlockList.push_back(std::move(IS));
+
+ return call<EmitIndirectStubsResponse>(Channel, StubsBase, PtrsBase,
+ NumStubsEmitted);
+ }
+
+ std::error_code handleEmitResolverBlock() {
+ std::error_code EC;
+ ResolverBlock = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+ TargetT::ResolverCodeSize, nullptr,
+ sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+ if (EC)
+ return EC;
+
+ TargetT::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
+ &reenter, this);
+
+ return sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(),
+ sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC);
+ }
+
+ std::error_code handleEmitTrampolineBlock() {
+ std::error_code EC;
+ auto TrampolineBlock =
+ sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+ sys::Process::getPageSize(), nullptr,
+ sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+ if (EC)
+ return EC;
+
+ unsigned NumTrampolines =
+ (sys::Process::getPageSize() - TargetT::PointerSize) /
+ TargetT::TrampolineSize;
+
+ uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
+ TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(),
+ NumTrampolines);
+
+ EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(),
+ sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC);
+
+ TrampolineBlocks.push_back(std::move(TrampolineBlock));
+
+ return call<EmitTrampolineBlockResponse>(
+ Channel,
+ static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(TrampolineMem)),
+ NumTrampolines);
+ }
+
+ std::error_code handleGetSymbolAddress(const std::string &Name) {
+ TargetAddress Addr = SymbolLookup(Name);
+ DEBUG(dbgs() << " Symbol '" << Name << "' = " << format("0x%016x", Addr)
+ << "\n");
+ return call<GetSymbolAddressResponse>(Channel, Addr);
+ }
+
+ std::error_code handleGetRemoteInfo() {
+ std::string ProcessTriple = sys::getProcessTriple();
+ uint32_t PointerSize = TargetT::PointerSize;
+ uint32_t PageSize = sys::Process::getPageSize();
+ uint32_t TrampolineSize = TargetT::TrampolineSize;
+ uint32_t IndirectStubSize = TargetT::IndirectStubsInfo::StubSize;
+ DEBUG(dbgs() << " Remote info:\n"
+ << " triple = '" << ProcessTriple << "'\n"
+ << " pointer size = " << PointerSize << "\n"
+ << " page size = " << PageSize << "\n"
+ << " trampoline size = " << TrampolineSize << "\n"
+ << " indirect stub size = " << IndirectStubSize << "\n");
+ return call<GetRemoteInfoResponse>(Channel, ProcessTriple, PointerSize,
+ PageSize, TrampolineSize,
+ IndirectStubSize);
+ }
+
+ std::error_code handleReadMem(TargetAddress RSrc, uint64_t Size) {
+ char *Src = reinterpret_cast<char *>(static_cast<uintptr_t>(RSrc));
+
+ DEBUG(dbgs() << " Reading " << Size << " bytes from "
+ << static_cast<void *>(Src) << "\n");
+
+ if (auto EC = call<ReadMemResponse>(Channel))
+ return EC;
+
+ if (auto EC = Channel.appendBytes(Src, Size))
+ return EC;
+
+ return Channel.send();
+ }
+
+ std::error_code handleReserveMem(ResourceIdMgr::ResourceId Id, uint64_t Size,
+ uint32_t Align) {
+ auto I = Allocators.find(Id);
+ if (I == Allocators.end())
+ return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist);
+ auto &Allocator = I->second;
+ void *LocalAllocAddr = nullptr;
+ if (auto EC = Allocator.allocate(LocalAllocAddr, Size, Align))
+ return EC;
+
+ DEBUG(dbgs() << " Allocator " << Id << " reserved " << LocalAllocAddr
+ << " (" << Size << " bytes, alignment " << Align << ")\n");
+
+ TargetAddress AllocAddr =
+ static_cast<TargetAddress>(reinterpret_cast<uintptr_t>(LocalAllocAddr));
+
+ return call<ReserveMemResponse>(Channel, AllocAddr);
+ }
+
+ std::error_code handleSetProtections(ResourceIdMgr::ResourceId Id,
+ TargetAddress Addr, uint32_t Flags) {
+ auto I = Allocators.find(Id);
+ if (I == Allocators.end())
+ return orcError(OrcErrorCode::RemoteAllocatorDoesNotExist);
+ auto &Allocator = I->second;
+ void *LocalAddr = reinterpret_cast<void *>(static_cast<uintptr_t>(Addr));
+ DEBUG(dbgs() << " Allocator " << Id << " set permissions on " << LocalAddr
+ << " to " << (Flags & sys::Memory::MF_READ ? 'R' : '-')
+ << (Flags & sys::Memory::MF_WRITE ? 'W' : '-')
+ << (Flags & sys::Memory::MF_EXEC ? 'X' : '-') << "\n");
+ return Allocator.setProtections(LocalAddr, Flags);
+ }
+
+ std::error_code handleWriteMem(TargetAddress RDst, uint64_t Size) {
+ char *Dst = reinterpret_cast<char *>(static_cast<uintptr_t>(RDst));
+ DEBUG(dbgs() << " Writing " << Size << " bytes to "
+ << format("0x%016x", RDst) << "\n");
+ return Channel.readBytes(Dst, Size);
+ }
+
+ std::error_code handleWritePtr(TargetAddress Addr, TargetAddress PtrVal) {
+ DEBUG(dbgs() << " Writing pointer *" << format("0x%016x", Addr) << " = "
+ << format("0x%016x", PtrVal) << "\n");
+ uintptr_t *Ptr =
+ reinterpret_cast<uintptr_t *>(static_cast<uintptr_t>(Addr));
+ *Ptr = static_cast<uintptr_t>(PtrVal);
+ return std::error_code();
+ }
+
+ ChannelT &Channel;
+ SymbolLookupFtor SymbolLookup;
+ std::map<ResourceIdMgr::ResourceId, Allocator> Allocators;
+ typedef std::vector<typename TargetT::IndirectStubsInfo> ISBlockOwnerList;
+ std::map<ResourceIdMgr::ResourceId, ISBlockOwnerList> IndirectStubsOwners;
+ sys::OwningMemoryBlock ResolverBlock;
+ std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
+};
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#undef DEBUG_TYPE
+
+#endif
diff --git a/include/llvm/ExecutionEngine/Orc/RPCChannel.h b/include/llvm/ExecutionEngine/Orc/RPCChannel.h
new file mode 100644
index 000000000000..b97b6daf5864
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/RPCChannel.h
@@ -0,0 +1,179 @@
+// -*- c++ -*-
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_RPCCHANNEL_H
+#define LLVM_EXECUTIONENGINE_ORC_RPCCHANNEL_H
+
+#include "OrcError.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/Support/Endian.h"
+
+#include <system_error>
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+/// Interface for byte-streams to be used with RPC.
+class RPCChannel {
+public:
+ virtual ~RPCChannel() {}
+
+ /// Read Size bytes from the stream into *Dst.
+ virtual std::error_code readBytes(char *Dst, unsigned Size) = 0;
+
+ /// Read size bytes from *Src and append them to the stream.
+ virtual std::error_code appendBytes(const char *Src, unsigned Size) = 0;
+
+ /// Flush the stream if possible.
+ virtual std::error_code send() = 0;
+};
+
+/// RPC channel serialization for a variadic list of arguments.
+template <typename T, typename... Ts>
+std::error_code serialize_seq(RPCChannel &C, const T &Arg, const Ts &... Args) {
+ if (auto EC = serialize(C, Arg))
+ return EC;
+ return serialize_seq(C, Args...);
+}
+
+/// RPC channel serialization for an (empty) variadic list of arguments.
+inline std::error_code serialize_seq(RPCChannel &C) {
+ return std::error_code();
+}
+
+/// RPC channel deserialization for a variadic list of arguments.
+template <typename T, typename... Ts>
+std::error_code deserialize_seq(RPCChannel &C, T &Arg, Ts &... Args) {
+ if (auto EC = deserialize(C, Arg))
+ return EC;
+ return deserialize_seq(C, Args...);
+}
+
+/// RPC channel serialization for an (empty) variadic list of arguments.
+inline std::error_code deserialize_seq(RPCChannel &C) {
+ return std::error_code();
+}
+
+/// RPC channel serialization for integer primitives.
+template <typename T>
+typename std::enable_if<
+ std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
+ std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
+ std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
+ std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value,
+ std::error_code>::type
+serialize(RPCChannel &C, T V) {
+ support::endian::byte_swap<T, support::big>(V);
+ return C.appendBytes(reinterpret_cast<const char *>(&V), sizeof(T));
+}
+
+/// RPC channel deserialization for integer primitives.
+template <typename T>
+typename std::enable_if<
+ std::is_same<T, uint64_t>::value || std::is_same<T, int64_t>::value ||
+ std::is_same<T, uint32_t>::value || std::is_same<T, int32_t>::value ||
+ std::is_same<T, uint16_t>::value || std::is_same<T, int16_t>::value ||
+ std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value,
+ std::error_code>::type
+deserialize(RPCChannel &C, T &V) {
+ if (auto EC = C.readBytes(reinterpret_cast<char *>(&V), sizeof(T)))
+ return EC;
+ support::endian::byte_swap<T, support::big>(V);
+ return std::error_code();
+}
+
+/// RPC channel serialization for enums.
+template <typename T>
+typename std::enable_if<std::is_enum<T>::value, std::error_code>::type
+serialize(RPCChannel &C, T V) {
+ return serialize(C, static_cast<typename std::underlying_type<T>::type>(V));
+}
+
+/// RPC channel deserialization for enums.
+template <typename T>
+typename std::enable_if<std::is_enum<T>::value, std::error_code>::type
+deserialize(RPCChannel &C, T &V) {
+ typename std::underlying_type<T>::type Tmp;
+ std::error_code EC = deserialize(C, Tmp);
+ V = static_cast<T>(Tmp);
+ return EC;
+}
+
+/// RPC channel serialization for bools.
+inline std::error_code serialize(RPCChannel &C, bool V) {
+ uint8_t VN = V ? 1 : 0;
+ return C.appendBytes(reinterpret_cast<const char *>(&VN), 1);
+}
+
+/// RPC channel deserialization for bools.
+inline std::error_code deserialize(RPCChannel &C, bool &V) {
+ uint8_t VN = 0;
+ if (auto EC = C.readBytes(reinterpret_cast<char *>(&VN), 1))
+ return EC;
+
+ V = (VN != 0) ? true : false;
+ return std::error_code();
+}
+
+/// RPC channel serialization for StringRefs.
+/// Note: There is no corresponding deseralization for this, as StringRef
+/// doesn't own its memory and so can't hold the deserialized data.
+inline std::error_code serialize(RPCChannel &C, StringRef S) {
+ if (auto EC = serialize(C, static_cast<uint64_t>(S.size())))
+ return EC;
+ return C.appendBytes((const char *)S.bytes_begin(), S.size());
+}
+
+/// RPC channel serialization for std::strings.
+inline std::error_code serialize(RPCChannel &C, const std::string &S) {
+ return serialize(C, StringRef(S));
+}
+
+/// RPC channel deserialization for std::strings.
+inline std::error_code deserialize(RPCChannel &C, std::string &S) {
+ uint64_t Count;
+ if (auto EC = deserialize(C, Count))
+ return EC;
+ S.resize(Count);
+ return C.readBytes(&S[0], Count);
+}
+
+/// RPC channel serialization for ArrayRef<T>.
+template <typename T>
+std::error_code serialize(RPCChannel &C, const ArrayRef<T> &A) {
+ if (auto EC = serialize(C, static_cast<uint64_t>(A.size())))
+ return EC;
+
+ for (const auto &E : A)
+ if (auto EC = serialize(C, E))
+ return EC;
+
+ return std::error_code();
+}
+
+/// RPC channel serialization for std::array<T>.
+template <typename T>
+std::error_code serialize(RPCChannel &C, const std::vector<T> &V) {
+ return serialize(C, ArrayRef<T>(V));
+}
+
+/// RPC channel deserialization for std::array<T>.
+template <typename T>
+std::error_code deserialize(RPCChannel &C, std::vector<T> &V) {
+ uint64_t Count = 0;
+ if (auto EC = deserialize(C, Count))
+ return EC;
+
+ V.resize(Count);
+ for (auto &E : V)
+ if (auto EC = deserialize(C, E))
+ return EC;
+
+ return std::error_code();
+}
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
new file mode 100644
index 000000000000..0bd5cbc0cdde
--- /dev/null
+++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@@ -0,0 +1,266 @@
+//===----- RPCUTils.h - Basic tilities for building RPC APIs ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Basic utilities for building RPC APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_RPCUTILS_H
+
+#include "llvm/ADT/STLExtras.h"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+// Base class containing utilities that require partial specialization.
+// These cannot be included in RPC, as template class members cannot be
+// partially specialized.
+class RPCBase {
+protected:
+ template <typename ProcedureIdT, ProcedureIdT ProcId, typename... Ts>
+ class ProcedureHelper {
+ public:
+ static const ProcedureIdT Id = ProcId;
+ };
+
+ template <typename ChannelT, typename Proc> class CallHelper;
+
+ template <typename ChannelT, typename ProcedureIdT, ProcedureIdT ProcId,
+ typename... ArgTs>
+ class CallHelper<ChannelT, ProcedureHelper<ProcedureIdT, ProcId, ArgTs...>> {
+ public:
+ static std::error_code call(ChannelT &C, const ArgTs &... Args) {
+ if (auto EC = serialize(C, ProcId))
+ return EC;
+ // If you see a compile-error on this line you're probably calling a
+ // function with the wrong signature.
+ return serialize_seq(C, Args...);
+ }
+ };
+
+ template <typename ChannelT, typename Proc> class HandlerHelper;
+
+ template <typename ChannelT, typename ProcedureIdT, ProcedureIdT ProcId,
+ typename... ArgTs>
+ class HandlerHelper<ChannelT,
+ ProcedureHelper<ProcedureIdT, ProcId, ArgTs...>> {
+ public:
+ template <typename HandlerT>
+ static std::error_code handle(ChannelT &C, HandlerT Handler) {
+ return readAndHandle(C, Handler, llvm::index_sequence_for<ArgTs...>());
+ }
+
+ private:
+ template <typename HandlerT, size_t... Is>
+ static std::error_code readAndHandle(ChannelT &C, HandlerT Handler,
+ llvm::index_sequence<Is...> _) {
+ std::tuple<ArgTs...> RPCArgs;
+ if (auto EC = deserialize_seq(C, std::get<Is>(RPCArgs)...))
+ return EC;
+ return Handler(std::get<Is>(RPCArgs)...);
+ }
+ };
+
+ template <typename ClassT, typename... ArgTs> class MemberFnWrapper {
+ public:
+ typedef std::error_code (ClassT::*MethodT)(ArgTs...);
+ MemberFnWrapper(ClassT &Instance, MethodT Method)
+ : Instance(Instance), Method(Method) {}
+ std::error_code operator()(ArgTs &... Args) {
+ return (Instance.*Method)(Args...);
+ }
+
+ private:
+ ClassT &Instance;
+ MethodT Method;
+ };
+
+ template <typename... ArgTs> class ReadArgs {
+ public:
+ std::error_code operator()() { return std::error_code(); }
+ };
+
+ template <typename ArgT, typename... ArgTs>
+ class ReadArgs<ArgT, ArgTs...> : public ReadArgs<ArgTs...> {
+ public:
+ ReadArgs(ArgT &Arg, ArgTs &... Args)
+ : ReadArgs<ArgTs...>(Args...), Arg(Arg) {}
+
+ std::error_code operator()(ArgT &ArgVal, ArgTs &... ArgVals) {
+ this->Arg = std::move(ArgVal);
+ return ReadArgs<ArgTs...>::operator()(ArgVals...);
+ }
+
+ private:
+ ArgT &Arg;
+ };
+};
+
+/// Contains primitive utilities for defining, calling and handling calls to
+/// remote procedures. ChannelT is a bidirectional stream conforming to the
+/// RPCChannel interface (see RPCChannel.h), and ProcedureIdT is a procedure
+/// identifier type that must be serializable on ChannelT.
+///
+/// These utilities support the construction of very primitive RPC utilities.
+/// Their intent is to ensure correct serialization and deserialization of
+/// procedure arguments, and to keep the client and server's view of the API in
+/// sync.
+///
+/// These utilities do not support return values. These can be handled by
+/// declaring a corresponding '.*Response' procedure and expecting it after a
+/// call). They also do not support versioning: the client and server *must* be
+/// compiled with the same procedure definitions.
+///
+///
+///
+/// Overview (see comments individual types/methods for details):
+///
+/// Procedure<Id, Args...> :
+///
+/// associates a unique serializable id with an argument list.
+///
+///
+/// call<Proc>(Channel, Args...) :
+///
+/// Calls the remote procedure 'Proc' by serializing Proc's id followed by its
+/// arguments and sending the resulting bytes to 'Channel'.
+///
+///
+/// handle<Proc>(Channel, <functor matching std::error_code(Args...)> :
+///
+/// Handles a call to 'Proc' by deserializing its arguments and calling the
+/// given functor. This assumes that the id for 'Proc' has already been
+/// deserialized.
+///
+/// expect<Proc>(Channel, <functor matching std::error_code(Args...)> :
+///
+/// The same as 'handle', except that the procedure id should not have been
+/// read yet. Expect will deserialize the id and assert that it matches Proc's
+/// id. If it does not, and unexpected RPC call error is returned.
+
+template <typename ChannelT, typename ProcedureIdT = uint32_t>
+class RPC : public RPCBase {
+public:
+ /// Utility class for defining/referring to RPC procedures.
+ ///
+ /// Typedefs of this utility are used when calling/handling remote procedures.
+ ///
+ /// ProcId should be a unique value of ProcedureIdT (i.e. not used with any
+ /// other Procedure typedef in the RPC API being defined.
+ ///
+ /// the template argument Ts... gives the argument list for the remote
+ /// procedure.
+ ///
+ /// E.g.
+ ///
+ /// typedef Procedure<0, bool> Proc1;
+ /// typedef Procedure<1, std::string, std::vector<int>> Proc2;
+ ///
+ /// if (auto EC = call<Proc1>(Channel, true))
+ /// /* handle EC */;
+ ///
+ /// if (auto EC = expect<Proc2>(Channel,
+ /// [](std::string &S, std::vector<int> &V) {
+ /// // Stuff.
+ /// return std::error_code();
+ /// })
+ /// /* handle EC */;
+ ///
+ template <ProcedureIdT ProcId, typename... Ts>
+ using Procedure = ProcedureHelper<ProcedureIdT, ProcId, Ts...>;
+
+ /// Serialize Args... to channel C, but do not call C.send().
+ ///
+ /// For buffered channels, this can be used to queue up several calls before
+ /// flushing the channel.
+ template <typename Proc, typename... ArgTs>
+ static std::error_code appendCall(ChannelT &C, const ArgTs &... Args) {
+ return CallHelper<ChannelT, Proc>::call(C, Args...);
+ }
+
+ /// Serialize Args... to channel C and call C.send().
+ template <typename Proc, typename... ArgTs>
+ static std::error_code call(ChannelT &C, const ArgTs &... Args) {
+ if (auto EC = appendCall<Proc>(C, Args...))
+ return EC;
+ return C.send();
+ }
+
+ /// Deserialize and return an enum whose underlying type is ProcedureIdT.
+ static std::error_code getNextProcId(ChannelT &C, ProcedureIdT &Id) {
+ return deserialize(C, Id);
+ }
+
+ /// Deserialize args for Proc from C and call Handler. The signature of
+ /// handler must conform to 'std::error_code(Args...)' where Args... matches
+ /// the arguments used in the Proc typedef.
+ template <typename Proc, typename HandlerT>
+ static std::error_code handle(ChannelT &C, HandlerT Handler) {
+ return HandlerHelper<ChannelT, Proc>::handle(C, Handler);
+ }
+
+ /// Helper version of 'handle' for calling member functions.
+ template <typename Proc, typename ClassT, typename... ArgTs>
+ static std::error_code
+ handle(ChannelT &C, ClassT &Instance,
+ std::error_code (ClassT::*HandlerMethod)(ArgTs...)) {
+ return handle<Proc>(
+ C, MemberFnWrapper<ClassT, ArgTs...>(Instance, HandlerMethod));
+ }
+
+ /// Deserialize a ProcedureIdT from C and verify it matches the id for Proc.
+ /// If the id does match, deserialize the arguments and call the handler
+ /// (similarly to handle).
+ /// If the id does not match, return an unexpect RPC call error and do not
+ /// deserialize any further bytes.
+ template <typename Proc, typename HandlerT>
+ static std::error_code expect(ChannelT &C, HandlerT Handler) {
+ ProcedureIdT ProcId;
+ if (auto EC = getNextProcId(C, ProcId))
+ return EC;
+ if (ProcId != Proc::Id)
+ return orcError(OrcErrorCode::UnexpectedRPCCall);
+ return handle<Proc>(C, Handler);
+ }
+
+ /// Helper version of expect for calling member functions.
+ template <typename Proc, typename ClassT, typename... ArgTs>
+ static std::error_code
+ expect(ChannelT &C, ClassT &Instance,
+ std::error_code (ClassT::*HandlerMethod)(ArgTs...)) {
+ return expect<Proc>(
+ C, MemberFnWrapper<ClassT, ArgTs...>(Instance, HandlerMethod));
+ }
+
+ /// Helper for handling setter procedures - this method returns a functor that
+ /// sets the variables referred to by Args... to values deserialized from the
+ /// channel.
+ /// E.g.
+ ///
+ /// typedef Procedure<0, bool, int> Proc1;
+ ///
+ /// ...
+ /// bool B;
+ /// int I;
+ /// if (auto EC = expect<Proc1>(Channel, readArgs(B, I)))
+ /// /* Handle Args */ ;
+ ///
+ template <typename... ArgTs>
+ static ReadArgs<ArgTs...> readArgs(ArgTs &... Args) {
+ return ReadArgs<ArgTs...>(Args...);
+ }
+};
+
+} // end namespace remote
+} // end namespace orc
+} // end namespace llvm
+
+#endif
diff --git a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
index 207bad06c239..c5006962550e 100644
--- a/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
+++ b/include/llvm/ExecutionEngine/RTDyldMemoryManager.h
@@ -30,6 +30,10 @@ class ExecutionEngine;
class MCJITMemoryManager : public RuntimeDyld::MemoryManager {
public:
+
+ // Don't hide the notifyObjectLoaded method from RuntimeDyld::MemoryManager.
+ using RuntimeDyld::MemoryManager::notifyObjectLoaded;
+
/// This method is called after an object has been loaded into memory but
/// before relocations are applied to the loaded sections. The object load
/// may have been initiated by MCJIT to resolve an external symbol for another
diff --git a/include/llvm/ExecutionEngine/RuntimeDyld.h b/include/llvm/ExecutionEngine/RuntimeDyld.h
index 385b8d0a30b1..100e97b8b3d9 100644
--- a/include/llvm/ExecutionEngine/RuntimeDyld.h
+++ b/include/llvm/ExecutionEngine/RuntimeDyld.h
@@ -95,7 +95,9 @@ public:
/// \brief Memory Management.
class MemoryManager {
+ friend class RuntimeDyld;
public:
+ MemoryManager() : FinalizationLocked(false) {}
virtual ~MemoryManager() {}
/// Allocate a memory block of (at least) the given size suitable for
@@ -122,9 +124,11 @@ public:
///
/// Note that by default the callback is disabled. To enable it
/// redefine the method needsToReserveAllocationSpace to return true.
- virtual void reserveAllocationSpace(uintptr_t CodeSize,
- uintptr_t DataSizeRO,
- uintptr_t DataSizeRW) {}
+ virtual void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+ uintptr_t RODataSize,
+ uint32_t RODataAlign,
+ uintptr_t RWDataSize,
+ uint32_t RWDataAlign) {}
/// Override to return true to enable the reserveAllocationSpace callback.
virtual bool needsToReserveAllocationSpace() { return false; }
@@ -151,8 +155,23 @@ public:
/// Returns true if an error occurred, false otherwise.
virtual bool finalizeMemory(std::string *ErrMsg = nullptr) = 0;
+ /// This method is called after an object has been loaded into memory but
+ /// before relocations are applied to the loaded sections.
+ ///
+ /// Memory managers which are preparing code for execution in an external
+ /// address space can use this call to remap the section addresses for the
+ /// newly loaded object.
+ ///
+ /// For clients that do not need access to an ExecutionEngine instance this
+ /// method should be preferred to its cousin
+ /// MCJITMemoryManager::notifyObjectLoaded as this method is compatible with
+ /// ORC JIT stacks.
+ virtual void notifyObjectLoaded(RuntimeDyld &RTDyld,
+ const object::ObjectFile &Obj) {}
+
private:
virtual void anchor();
+ bool FinalizationLocked;
};
/// \brief Symbol resolution.
@@ -241,6 +260,25 @@ public:
this->ProcessAllSections = ProcessAllSections;
}
+ /// Perform all actions needed to make the code owned by this RuntimeDyld
+ /// instance executable:
+ ///
+ /// 1) Apply relocations.
+ /// 2) Register EH frames.
+ /// 3) Update memory permissions*.
+ ///
+ /// * Finalization is potentially recursive**, and the 3rd step will only be
+ /// applied by the outermost call to finalize. This allows different
+ /// RuntimeDyld instances to share a memory manager without the innermost
+ /// finalization locking the memory and causing relocation fixup errors in
+ /// outer instances.
+ ///
+ /// ** Recursive finalization occurs when one RuntimeDyld instances needs the
+ /// address of a symbol owned by some other instance in order to apply
+ /// relocations.
+ ///
+ void finalizeWithMemoryManagerLocking();
+
private:
// RuntimeDyldImpl is the actual class. RuntimeDyld is just the public
// interface.
diff --git a/include/llvm/IR/Attributes.td b/include/llvm/IR/Attributes.td
index 797cd55427b3..30249bbd8fab 100644
--- a/include/llvm/IR/Attributes.td
+++ b/include/llvm/IR/Attributes.td
@@ -189,4 +189,9 @@ class MergeRule<string F> {
string MergeFunc = F;
}
+def : MergeRule<"setAND<LessPreciseFPMADAttr>">;
+def : MergeRule<"setAND<NoInfsFPMathAttr>">;
+def : MergeRule<"setAND<NoNansFPMathAttr>">;
+def : MergeRule<"setAND<UnsafeFPMathAttr>">;
+def : MergeRule<"setOR<NoImplicitFloatAttr>">;
def : MergeRule<"adjustCallerSSPLevel">;
diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h
index 2a983930bf4d..4f64caeade20 100644
--- a/include/llvm/IR/Function.h
+++ b/include/llvm/IR/Function.h
@@ -66,7 +66,8 @@ private:
* bit 2 : HasPrologueData
* bit 3 : HasPersonalityFn
* bits 4-13 : CallingConvention
- * bits 14-15 : [reserved]
+ * bits 14 : HasGC
+ * bits 15 : [reserved]
*/
/// Bits from GlobalObject::GlobalObjectSubclassData.
@@ -220,9 +221,11 @@ public:
/// hasGC/getGC/setGC/clearGC - The name of the garbage collection algorithm
/// to use during code generation.
- bool hasGC() const;
- const char *getGC() const;
- void setGC(const char *Str);
+ bool hasGC() const {
+ return getSubclassDataFromValue() & (1<<14);
+ }
+ const std::string &getGC() const;
+ void setGC(const std::string Str);
void clearGC();
/// @brief adds the attribute to the list of attributes.
diff --git a/include/llvm/IR/IRBuilder.h b/include/llvm/IR/IRBuilder.h
index a30505471aac..1b75c60631b0 100644
--- a/include/llvm/IR/IRBuilder.h
+++ b/include/llvm/IR/IRBuilder.h
@@ -178,10 +178,10 @@ public:
void clearFastMathFlags() { FMF.clear(); }
/// \brief Set the floating point math metadata to be used.
- void SetDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; }
+ void setDefaultFPMathTag(MDNode *FPMathTag) { DefaultFPMathTag = FPMathTag; }
/// \brief Set the fast-math flags to be used with generated fp-math operators
- void SetFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; }
+ void setFastMathFlags(FastMathFlags NewFMF) { FMF = NewFMF; }
//===--------------------------------------------------------------------===//
// RAII helpers.
diff --git a/include/llvm/IR/Intrinsics.td b/include/llvm/IR/Intrinsics.td
index 5a95ddced538..f67029ab56e3 100644
--- a/include/llvm/IR/Intrinsics.td
+++ b/include/llvm/IR/Intrinsics.td
@@ -575,7 +575,7 @@ def int_experimental_gc_statepoint : Intrinsic<[llvm_token_ty],
def int_experimental_gc_result : Intrinsic<[llvm_any_ty], [llvm_token_ty],
[IntrReadMem]>;
-def int_experimental_gc_relocate : Intrinsic<[llvm_anyptr_ty],
+def int_experimental_gc_relocate : Intrinsic<[llvm_any_ty],
[llvm_token_ty, llvm_i32_ty, llvm_i32_ty],
[IntrReadMem]>;
diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td
index 54bcbd8da509..8023a9f6e8e9 100644
--- a/include/llvm/IR/IntrinsicsX86.td
+++ b/include/llvm/IR/IntrinsicsX86.td
@@ -1507,6 +1507,60 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
[llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty],
[IntrNoMem]>;
+ def int_x86_avx512_mask_pshuf_d_128 :
+ GCCBuiltin<"__builtin_ia32_pshufd128_mask">,
+ Intrinsic<[llvm_v4i32_ty],
+ [llvm_v4i32_ty, llvm_i16_ty, llvm_v4i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshuf_d_256 :
+ GCCBuiltin<"__builtin_ia32_pshufd256_mask">,
+ Intrinsic<[llvm_v8i32_ty],
+ [llvm_v8i32_ty, llvm_i16_ty, llvm_v8i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshuf_d_512 :
+ GCCBuiltin<"__builtin_ia32_pshufd512_mask">,
+ Intrinsic<[llvm_v16i32_ty],
+ [llvm_v16i32_ty, llvm_i16_ty, llvm_v16i32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshufh_w_128 :
+ GCCBuiltin<"__builtin_ia32_pshufhw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshufh_w_256 :
+ GCCBuiltin<"__builtin_ia32_pshufhw256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i16_ty, llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshufh_w_512 :
+ GCCBuiltin<"__builtin_ia32_pshufhw512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v32i16_ty, llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshufl_w_128 :
+ GCCBuiltin<"__builtin_ia32_pshuflw128_mask">,
+ Intrinsic<[llvm_v8i16_ty],
+ [llvm_v8i16_ty, llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshufl_w_256 :
+ GCCBuiltin<"__builtin_ia32_pshuflw256_mask">,
+ Intrinsic<[llvm_v16i16_ty],
+ [llvm_v16i16_ty, llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty],
+ [IntrNoMem]>;
+
+ def int_x86_avx512_mask_pshufl_w_512 :
+ GCCBuiltin<"__builtin_ia32_pshuflw512_mask">,
+ Intrinsic<[llvm_v32i16_ty],
+ [llvm_v32i16_ty, llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty],
+ [IntrNoMem]>;
+
def int_x86_avx512_mask_shuf_f32x4_256 :
GCCBuiltin<"__builtin_ia32_shuf_f32x4_256_mask">,
Intrinsic<[llvm_v8f32_ty],
@@ -1836,25 +1890,69 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx_maskload_ps_256 : GCCBuiltin<"__builtin_ia32_maskloadps256">,
Intrinsic<[llvm_v8f32_ty], [llvm_ptr_ty, llvm_v8i32_ty],
[IntrReadArgMem]>;
- def int_x86_avx512_mask_loadu_ps_512 : GCCBuiltin<"__builtin_ia32_loadups512_mask">,
- Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
- [IntrReadArgMem]>;
- def int_x86_avx512_mask_loadu_pd_512 : GCCBuiltin<"__builtin_ia32_loadupd512_mask">,
- Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
- [IntrReadArgMem]>;
- def int_x86_avx512_mask_load_ps_512 : GCCBuiltin<"__builtin_ia32_loadaps512_mask">,
- Intrinsic<[llvm_v16f32_ty], [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty],
- [IntrReadArgMem]>;
- def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">,
- Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty],
- [IntrReadArgMem]>;
- def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">,
- Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
- [IntrNoMem]>;
- def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">,
- Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
- [IntrNoMem]>;
+ def int_x86_avx512_mask_loadu_ps_128 :
+ GCCBuiltin<"__builtin_ia32_loadups128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadArgMem]>;
+ def int_x86_avx512_mask_loadu_ps_256 :
+ GCCBuiltin<"__builtin_ia32_loadups256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadArgMem]>;
+ def int_x86_avx512_mask_loadu_ps_512 :
+ GCCBuiltin<"__builtin_ia32_loadups512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrReadArgMem]>;
+
+ def int_x86_avx512_mask_loadu_pd_128 :
+ GCCBuiltin<"__builtin_ia32_loadupd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+ def int_x86_avx512_mask_loadu_pd_256 :
+ GCCBuiltin<"__builtin_ia32_loadupd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+ def int_x86_avx512_mask_loadu_pd_512 :
+ GCCBuiltin<"__builtin_ia32_loadupd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+
+ def int_x86_avx512_mask_load_ps_128 :
+ GCCBuiltin<"__builtin_ia32_loadaps128_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_ptr_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrReadArgMem]>;
+ def int_x86_avx512_mask_load_ps_256 :
+ GCCBuiltin<"__builtin_ia32_loadaps256_mask">,
+ Intrinsic<[llvm_v8f32_ty],
+ [llvm_ptr_ty, llvm_v8f32_ty, llvm_i8_ty], [IntrReadArgMem]>;
+ def int_x86_avx512_mask_load_ps_512 :
+ GCCBuiltin<"__builtin_ia32_loadaps512_mask">,
+ Intrinsic<[llvm_v16f32_ty],
+ [llvm_ptr_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrReadArgMem]>;
+
+ def int_x86_avx512_mask_load_pd_128 :
+ GCCBuiltin<"__builtin_ia32_loadapd128_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_ptr_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+ def int_x86_avx512_mask_load_pd_256 :
+ GCCBuiltin<"__builtin_ia32_loadapd256_mask">,
+ Intrinsic<[llvm_v4f64_ty],
+ [llvm_ptr_ty, llvm_v4f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+ def int_x86_avx512_mask_load_pd_512 :
+ GCCBuiltin<"__builtin_ia32_loadapd512_mask">,
+ Intrinsic<[llvm_v8f64_ty],
+ [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>;
+
+ def int_x86_avx512_mask_move_ss :
+ GCCBuiltin<"__builtin_ia32_movss_mask">,
+ Intrinsic<[llvm_v4f32_ty],
+ [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty],
+ [IntrNoMem]>;
+ def int_x86_avx512_mask_move_sd :
+ GCCBuiltin<"__builtin_ia32_movsd_mask">,
+ Intrinsic<[llvm_v2f64_ty],
+ [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty],
+ [IntrNoMem]>;
}
// Conditional store ops
@@ -2262,6 +2360,46 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_w_128 : GCCBuiltin<"__builtin_ia32_psllw128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_w_256 : GCCBuiltin<"__builtin_ia32_psllw256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_w_512 : GCCBuiltin<"__builtin_ia32_psllw512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+ llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_wi_128 : GCCBuiltin<"__builtin_ia32_psllwi128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_i8_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_wi_256 : GCCBuiltin<"__builtin_ia32_psllwi256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_i8_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psll_wi_512 : GCCBuiltin<"__builtin_ia32_psllwi512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+ llvm_i8_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psllv16_hi : GCCBuiltin<"__builtin_ia32_psllv16hi_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psllv2_di : GCCBuiltin<"__builtin_ia32_psllv2di_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psllv32hi : GCCBuiltin<"__builtin_ia32_psllv32hi_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+ llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psllv4_di : GCCBuiltin<"__builtin_ia32_psllv4di_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psllv4_si : GCCBuiltin<"__builtin_ia32_psllv4si_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psllv8_hi : GCCBuiltin<"__builtin_ia32_psllv8hi_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psllv8_si : GCCBuiltin<"__builtin_ia32_psllv8si_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
def int_x86_avx512_mask_psra_d_128 : GCCBuiltin<"__builtin_ia32_psrad128_mask">,
Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
@@ -2823,6 +2961,28 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrav16_hi : GCCBuiltin<"__builtin_ia32_psrav16hi_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
+ llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrav32_hi : GCCBuiltin<"__builtin_ia32_psrav32hi_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i16_ty,
+ llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrav4_si : GCCBuiltin<"__builtin_ia32_psrav4si_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrav8_hi : GCCBuiltin<"__builtin_ia32_psrav8hi_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty,
+ llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrav8_si : GCCBuiltin<"__builtin_ia32_psrav8si_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrav_q_128 : GCCBuiltin<"__builtin_ia32_psravq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_psrav_q_256 : GCCBuiltin<"__builtin_ia32_psravq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
def int_x86_avx512_mask_psrlv16_hi : GCCBuiltin<"__builtin_ia32_psrlv16hi_mask">,
Intrinsic<[llvm_v16i16_ty], [llvm_v16i16_ty,
llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
@@ -2844,6 +3004,83 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_mask_psrlv8_si : GCCBuiltin<"__builtin_ia32_psrlv8si_mask">,
Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_prorv_d_128 : GCCBuiltin<"__builtin_ia32_prorvd128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prorv_d_256 : GCCBuiltin<"__builtin_ia32_prorvd256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prorv_d_512 : GCCBuiltin<"__builtin_ia32_prorvd512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prorv_q_128 : GCCBuiltin<"__builtin_ia32_prorvq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prorv_q_256 : GCCBuiltin<"__builtin_ia32_prorvq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prorv_q_512 : GCCBuiltin<"__builtin_ia32_prorvq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+ def int_x86_avx512_mask_prol_d_128 : GCCBuiltin<"__builtin_ia32_prold128_mask">,
+ Intrinsic<[llvm_v4i32_ty] , [llvm_v4i32_ty,
+ llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prol_d_256 : GCCBuiltin<"__builtin_ia32_prold256_mask">,
+ Intrinsic<[llvm_v8i32_ty] , [llvm_v8i32_ty,
+ llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prol_d_512 : GCCBuiltin<"__builtin_ia32_prold512_mask">,
+ Intrinsic<[llvm_v16i32_ty] , [llvm_v16i32_ty,
+ llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prol_q_128 : GCCBuiltin<"__builtin_ia32_prolq128_mask">,
+ Intrinsic<[llvm_v2i64_ty] , [llvm_v2i64_ty,
+ llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prol_q_256 : GCCBuiltin<"__builtin_ia32_prolq256_mask">,
+ Intrinsic<[llvm_v4i64_ty] , [llvm_v4i64_ty,
+ llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prol_q_512 : GCCBuiltin<"__builtin_ia32_prolq512_mask">,
+ Intrinsic<[llvm_v8i64_ty] , [llvm_v8i64_ty,
+ llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
+
+ def int_x86_avx512_mask_prolv_d_128 : GCCBuiltin<"__builtin_ia32_prolvd128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prolv_d_256 : GCCBuiltin<"__builtin_ia32_prolvd256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prolv_d_512 : GCCBuiltin<"__builtin_ia32_prolvd512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prolv_q_128 : GCCBuiltin<"__builtin_ia32_prolvq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prolv_q_256 : GCCBuiltin<"__builtin_ia32_prolvq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_prolv_q_512 : GCCBuiltin<"__builtin_ia32_prolvq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pror_d_128 : GCCBuiltin<"__builtin_ia32_prord128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty,
+ llvm_i8_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pror_d_256 : GCCBuiltin<"__builtin_ia32_prord256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i32_ty,
+ llvm_i8_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pror_d_512 : GCCBuiltin<"__builtin_ia32_prord512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty,
+ llvm_i8_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pror_q_128 : GCCBuiltin<"__builtin_ia32_prorq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty,
+ llvm_i8_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pror_q_256 : GCCBuiltin<"__builtin_ia32_prorq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty,
+ llvm_i8_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pror_q_512 : GCCBuiltin<"__builtin_ia32_prorq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty,
+ llvm_i8_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
}
// Gather ops
@@ -4208,6 +4445,61 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_kortestc_w : GCCBuiltin<"__builtin_ia32_kortestchi">,
Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty],
[IntrNoMem]>;
+
+ def int_x86_avx512_mask_pmovsxb_d_128 : GCCBuiltin<"__builtin_ia32_pmovsxbd128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxb_d_256 : GCCBuiltin<"__builtin_ia32_pmovsxbd256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxb_d_512 : GCCBuiltin<"__builtin_ia32_pmovsxbd512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty,
+ llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxb_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxbq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxb_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxbq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxb_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxbq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty,
+ llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxb_w_128 : GCCBuiltin<"__builtin_ia32_pmovsxbw128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
+ llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxb_w_256 : GCCBuiltin<"__builtin_ia32_pmovsxbw256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty,
+ llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxb_w_512 : GCCBuiltin<"__builtin_ia32_pmovsxbw512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i8_ty,
+ llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxd_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxdq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxd_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxdq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxd_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxdq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty,
+ llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxw_d_128 : GCCBuiltin<"__builtin_ia32_pmovsxwd128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxw_d_256 : GCCBuiltin<"__builtin_ia32_pmovsxwd256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxw_d_512 : GCCBuiltin<"__builtin_ia32_pmovsxwd512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty,
+ llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxw_q_128 : GCCBuiltin<"__builtin_ia32_pmovsxwq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxw_q_256 : GCCBuiltin<"__builtin_ia32_pmovsxwq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovsxw_q_512 : GCCBuiltin<"__builtin_ia32_pmovsxwq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty,
+ llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
}
// Conversion ops
@@ -5319,6 +5611,62 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
def int_x86_avx512_pmovzxdq : GCCBuiltin<"__builtin_ia32_pmovzxdq512">,
Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty],
[IntrNoMem]>;
+
+ def int_x86_avx512_mask_pmovzxb_d_128 : GCCBuiltin<"__builtin_ia32_pmovzxbd128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v16i8_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxb_d_256 : GCCBuiltin<"__builtin_ia32_pmovzxbd256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v16i8_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxb_d_512 : GCCBuiltin<"__builtin_ia32_pmovzxbd512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i8_ty,
+ llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxb_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxbq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v16i8_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxb_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxbq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v16i8_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxb_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxbq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v16i8_ty,
+ llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxb_w_128 : GCCBuiltin<"__builtin_ia32_pmovzxbw128_mask">,
+ Intrinsic<[llvm_v8i16_ty], [llvm_v16i8_ty,
+ llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxb_w_256 : GCCBuiltin<"__builtin_ia32_pmovzxbw256_mask">,
+ Intrinsic<[llvm_v16i16_ty], [llvm_v16i8_ty,
+ llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxb_w_512 : GCCBuiltin<"__builtin_ia32_pmovzxbw512_mask">,
+ Intrinsic<[llvm_v32i16_ty], [llvm_v32i8_ty,
+ llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxd_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxdq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v4i32_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxd_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxdq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v4i32_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxd_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxdq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i32_ty,
+ llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxw_d_128 : GCCBuiltin<"__builtin_ia32_pmovzxwd128_mask">,
+ Intrinsic<[llvm_v4i32_ty], [llvm_v8i16_ty,
+ llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxw_d_256 : GCCBuiltin<"__builtin_ia32_pmovzxwd256_mask">,
+ Intrinsic<[llvm_v8i32_ty], [llvm_v8i16_ty,
+ llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxw_d_512 : GCCBuiltin<"__builtin_ia32_pmovzxwd512_mask">,
+ Intrinsic<[llvm_v16i32_ty], [llvm_v16i16_ty,
+ llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxw_q_128 : GCCBuiltin<"__builtin_ia32_pmovzxwq128_mask">,
+ Intrinsic<[llvm_v2i64_ty], [llvm_v8i16_ty,
+ llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxw_q_256 : GCCBuiltin<"__builtin_ia32_pmovzxwq256_mask">,
+ Intrinsic<[llvm_v4i64_ty], [llvm_v8i16_ty,
+ llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>;
+ def int_x86_avx512_mask_pmovzxw_q_512 : GCCBuiltin<"__builtin_ia32_pmovzxwq512_mask">,
+ Intrinsic<[llvm_v8i64_ty], [llvm_v8i16_ty,
+ llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>;
+
}
//Bitwise Ops
let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.".
diff --git a/include/llvm/IR/LLVMContext.h b/include/llvm/IR/LLVMContext.h
index c546fc3d1ee0..56aa3010d925 100644
--- a/include/llvm/IR/LLVMContext.h
+++ b/include/llvm/IR/LLVMContext.h
@@ -93,6 +93,17 @@ public:
/// tag registered with an LLVMContext has an unique ID.
uint32_t getOperandBundleTagID(StringRef Tag) const;
+
+ /// Define the GC for a function
+ void setGC(const Function &Fn, std::string GCName);
+
+ /// Return the GC for a function
+ const std::string &getGC(const Function &Fn);
+
+ /// Remove the GC for a function
+ void deleteGC(const Function &Fn);
+
+
typedef void (*InlineAsmDiagHandlerTy)(const SMDiagnostic&, void *Context,
unsigned LocCookie);
diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h
index 4a8557d074f0..df8ce354bb7f 100644
--- a/include/llvm/IR/Metadata.h
+++ b/include/llvm/IR/Metadata.h
@@ -915,11 +915,21 @@ public:
/// \brief Resolve cycles.
///
/// Once all forward declarations have been resolved, force cycles to be
- /// resolved. If \p AllowTemps is true, then any temporary metadata
- /// is ignored, otherwise it asserts when encountering temporary metadata.
+ /// resolved. This interface is used when there are no more temporaries,
+ /// and thus unresolved nodes are part of cycles and no longer need RAUW
+ /// support.
///
/// \pre No operands (or operands' operands, etc.) have \a isTemporary().
- void resolveCycles(bool AllowTemps = false);
+ void resolveCycles() { resolveRecursivelyImpl(/* AllowTemps */ false); }
+
+ /// \brief Resolve cycles while ignoring temporaries.
+ ///
+ /// This drops RAUW support for any temporaries, which can no longer
+ /// be uniqued.
+ ///
+ void resolveNonTemporaries() {
+ resolveRecursivelyImpl(/* AllowTemps */ true);
+ }
/// \brief Replace a temporary node with a permanent one.
///
@@ -977,6 +987,11 @@ private:
void decrementUnresolvedOperandCount();
unsigned countUnresolvedOperands();
+ /// Resolve cycles recursively. If \p AllowTemps is true, then any temporary
+ /// metadata is ignored, otherwise it asserts when encountering temporary
+ /// metadata.
+ void resolveRecursivelyImpl(bool AllowTemps);
+
/// \brief Mutate this to be "uniqued".
///
/// Mutate this so that \a isUniqued().
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index cb2b1394e92b..90fbc1d891b7 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -132,7 +132,6 @@ void initializeEarlyCSELegacyPassPass(PassRegistry &);
void initializeEliminateAvailableExternallyPass(PassRegistry&);
void initializeExpandISelPseudosPass(PassRegistry&);
void initializeForceFunctionAttrsLegacyPassPass(PassRegistry&);
-void initializeFunctionAttrsPass(PassRegistry&);
void initializeGCMachineCodeAnalysisPass(PassRegistry&);
void initializeGCModuleInfoPass(PassRegistry&);
void initializeGVNPass(PassRegistry&);
@@ -227,6 +226,7 @@ void initializePostDomOnlyViewerPass(PassRegistry&);
void initializePostDomPrinterPass(PassRegistry&);
void initializePostDomViewerPass(PassRegistry&);
void initializePostDominatorTreePass(PassRegistry&);
+void initializePostOrderFunctionAttrsPass(PassRegistry&);
void initializePostRASchedulerPass(PassRegistry&);
void initializePostMachineSchedulerPass(PassRegistry&);
void initializePrintFunctionPassWrapperPass(PassRegistry&);
@@ -242,6 +242,7 @@ void initializeRegionOnlyPrinterPass(PassRegistry&);
void initializeRegionOnlyViewerPass(PassRegistry&);
void initializeRegionPrinterPass(PassRegistry&);
void initializeRegionViewerPass(PassRegistry&);
+void initializeReversePostOrderFunctionAttrsPass(PassRegistry&);
void initializeRewriteStatepointsForGCPass(PassRegistry&);
void initializeSafeStackPass(PassRegistry&);
void initializeSCCPPass(PassRegistry&);
diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h
index 29fcd93a2a1c..d695d11a6369 100644
--- a/include/llvm/LinkAllPasses.h
+++ b/include/llvm/LinkAllPasses.h
@@ -157,7 +157,8 @@ namespace {
(void) llvm::createPostDomTree();
(void) llvm::createInstructionNamerPass();
(void) llvm::createMetaRenamerPass();
- (void) llvm::createFunctionAttrsPass();
+ (void) llvm::createPostOrderFunctionAttrsPass();
+ (void) llvm::createReversePostOrderFunctionAttrsPass();
(void) llvm::createMergeFunctionsPass();
(void) llvm::createPrintModulePass(*(llvm::raw_ostream*)nullptr);
(void) llvm::createPrintFunctionPass(*(llvm::raw_ostream*)nullptr);
diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h
index dde3f73883ca..2b051e6d15c9 100644
--- a/include/llvm/Linker/Linker.h
+++ b/include/llvm/Linker/Linker.h
@@ -67,10 +67,9 @@ public:
DenseMap<unsigned, MDNode *> *ValIDToTempMDMap);
};
-/// Create a new module with exported local functions renamed and promoted
-/// for ThinLTO.
-std::unique_ptr<Module> renameModuleForThinLTO(std::unique_ptr<Module> M,
- const FunctionInfoIndex *Index);
+/// Perform in-place global value handling on the given Module for
+/// exported local functions renamed and promoted for ThinLTO.
+bool renameModuleForThinLTO(Module &M, const FunctionInfoIndex *Index);
} // End llvm namespace
diff --git a/include/llvm/MC/MCExpr.h b/include/llvm/MC/MCExpr.h
index 1d6bdef0af27..f6ccdc095551 100644
--- a/include/llvm/MC/MCExpr.h
+++ b/include/llvm/MC/MCExpr.h
@@ -290,6 +290,9 @@ public:
VK_Hexagon_LD_PLT,
VK_Hexagon_IE,
VK_Hexagon_IE_GOT,
+
+ VK_WebAssembly_FUNCTION, // Function table index, rather than virtual addr
+
VK_TPREL,
VK_DTPREL
};
diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h
index cf2c3f12bb6b..8a3a6af3bf79 100644
--- a/include/llvm/MC/MCObjectFileInfo.h
+++ b/include/llvm/MC/MCObjectFileInfo.h
@@ -92,6 +92,7 @@ protected:
MCSection *DwarfLocSection;
MCSection *DwarfARangesSection;
MCSection *DwarfRangesSection;
+ MCSection *DwarfMacinfoSection;
// The pubnames section is no longer generated by default. The generation
// can be enabled by a compiler flag.
MCSection *DwarfPubNamesSection;
@@ -245,6 +246,7 @@ public:
MCSection *getDwarfLocSection() const { return DwarfLocSection; }
MCSection *getDwarfARangesSection() const { return DwarfARangesSection; }
MCSection *getDwarfRangesSection() const { return DwarfRangesSection; }
+ MCSection *getDwarfMacinfoSection() const { return DwarfMacinfoSection; }
// DWARF5 Experimental Debug Info Sections
MCSection *getDwarfAccelNamesSection() const {
diff --git a/include/llvm/MC/MCStreamer.h b/include/llvm/MC/MCStreamer.h
index 494f02dfad3e..04d143ffef66 100644
--- a/include/llvm/MC/MCStreamer.h
+++ b/include/llvm/MC/MCStreamer.h
@@ -131,6 +131,10 @@ public:
void finish() override;
+ /// Reset any state between object emissions, i.e. the equivalent of
+ /// MCStreamer's reset method.
+ virtual void reset();
+
/// Callback used to implement the ldr= pseudo.
/// Add a new entry to the constant pool for the current section and return an
/// MCExpr that can be used to refer to the constant pool location.
diff --git a/include/llvm/Object/COFF.h b/include/llvm/Object/COFF.h
index 1b0e2e36bd5e..3e69c3e6e5d4 100644
--- a/include/llvm/Object/COFF.h
+++ b/include/llvm/Object/COFF.h
@@ -858,6 +858,9 @@ public:
std::error_code getExportRVA(uint32_t &Result) const;
std::error_code getSymbolName(StringRef &Result) const;
+ std::error_code isForwarder(bool &Result) const;
+ std::error_code getForwardTo(StringRef &Result) const;
+
private:
const export_directory_table_entry *ExportTable;
uint32_t Index;
diff --git a/include/llvm/Object/ELFObjectFile.h b/include/llvm/Object/ELFObjectFile.h
index 5823848aaacb..5d826da4c2fc 100644
--- a/include/llvm/Object/ELFObjectFile.h
+++ b/include/llvm/Object/ELFObjectFile.h
@@ -842,6 +842,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
case ELF::EM_SPARC:
case ELF::EM_SPARC32PLUS:
return "ELF32-sparc";
+ case ELF::EM_WEBASSEMBLY:
+ return "ELF32-wasm";
default:
return "ELF32-unknown";
}
@@ -861,6 +863,8 @@ StringRef ELFObjectFile<ELFT>::getFileFormatName() const {
return "ELF64-sparc";
case ELF::EM_MIPS:
return "ELF64-mips";
+ case ELF::EM_WEBASSEMBLY:
+ return "ELF64-wasm";
default:
return "ELF64-unknown";
}
@@ -908,6 +912,12 @@ unsigned ELFObjectFile<ELFT>::getArch() const {
return IsLittleEndian ? Triple::sparcel : Triple::sparc;
case ELF::EM_SPARCV9:
return Triple::sparcv9;
+ case ELF::EM_WEBASSEMBLY:
+ switch (EF.getHeader()->e_ident[ELF::EI_CLASS]) {
+ case ELF::ELFCLASS32: return Triple::wasm32;
+ case ELF::ELFCLASS64: return Triple::wasm64;
+ default: return Triple::UnknownArch;
+ }
default:
return Triple::UnknownArch;
diff --git a/include/llvm/Pass.h b/include/llvm/Pass.h
index 3c4d838a4652..99604cdbc9ca 100644
--- a/include/llvm/Pass.h
+++ b/include/llvm/Pass.h
@@ -369,6 +369,10 @@ protected:
/// @brief This is the storage for the -time-passes option.
extern bool TimePassesIsEnabled;
+/// isFunctionInPrintList - returns true if a function should be printed via
+// debugging options like -print-after-all/-print-before-all.
+// @brief Tells if the function IR should be printed by PrinterPass.
+extern bool isFunctionInPrintList(StringRef FunctionName);
} // End llvm namespace
// Include support files that contain important APIs commonly used by Passes,
diff --git a/include/llvm/ProfileData/CoverageMapping.h b/include/llvm/ProfileData/CoverageMapping.h
index 3790e1358449..92a991eb39ba 100644
--- a/include/llvm/ProfileData/CoverageMapping.h
+++ b/include/llvm/ProfileData/CoverageMapping.h
@@ -20,13 +20,34 @@
#include "llvm/ADT/Hashing.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/iterator.h"
+#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/Endian.h"
#include "llvm/Support/ErrorOr.h"
#include "llvm/Support/raw_ostream.h"
#include <system_error>
#include <tuple>
namespace llvm {
+namespace coverage {
+enum class coveragemap_error {
+ success = 0,
+ eof,
+ no_data_found,
+ unsupported_version,
+ truncated,
+ malformed
+};
+} // end of coverage namespace.
+}
+
+namespace std {
+template <>
+struct is_error_code_enum<llvm::coverage::coveragemap_error> : std::true_type {
+};
+}
+
+namespace llvm {
class IndexedInstrProfReader;
namespace coverage {
@@ -35,8 +56,6 @@ class CoverageMappingReader;
class CoverageMapping;
struct CounterExpressions;
-enum CoverageMappingVersion { CoverageMappingVersion1 };
-
/// \brief A Counter is an abstract value that describes how to compute the
/// execution count for a region of code using the collected profile count data.
struct Counter {
@@ -454,6 +473,76 @@ public:
CoverageData getCoverageForExpansion(const ExpansionRecord &Expansion);
};
+const std::error_category &coveragemap_category();
+
+inline std::error_code make_error_code(coveragemap_error E) {
+ return std::error_code(static_cast<int>(E), coveragemap_category());
+}
+
+// Profile coverage map has the following layout:
+// [CoverageMapFileHeader]
+// [ArrayStart]
+// [CovMapFunctionRecord]
+// [CovMapFunctionRecord]
+// ...
+// [ArrayEnd]
+// [Encoded Region Mapping Data]
+LLVM_PACKED_START
+template <class IntPtrT> struct CovMapFunctionRecord {
+#define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+
+ // Return the structural hash associated with the function.
+ template <support::endianness Endian> uint64_t getFuncHash() const {
+ return support::endian::byte_swap<uint64_t, Endian>(FuncHash);
+ }
+ // Return the coverage map data size for the funciton.
+ template <support::endianness Endian> uint32_t getDataSize() const {
+ return support::endian::byte_swap<uint32_t, Endian>(DataSize);
+ }
+ // Return function lookup key. The value is consider opaque.
+ template <support::endianness Endian> IntPtrT getFuncNameRef() const {
+ return support::endian::byte_swap<IntPtrT, Endian>(NamePtr);
+ }
+ // Return the PGO name of the function */
+ template <support::endianness Endian>
+ std::error_code getFuncName(InstrProfSymtab &ProfileNames,
+ StringRef &FuncName) const {
+ IntPtrT NameRef = getFuncNameRef<Endian>();
+ uint32_t NameS = support::endian::byte_swap<uint32_t, Endian>(NameSize);
+ FuncName = ProfileNames.getFuncName(NameRef, NameS);
+ if (NameS && FuncName.empty())
+ return coveragemap_error::malformed;
+ return std::error_code();
+ }
+};
+// Per module coverage mapping data header, i.e. CoverageMapFileHeader
+// documented above.
+struct CovMapHeader {
+#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name;
+#include "llvm/ProfileData/InstrProfData.inc"
+ template <support::endianness Endian> uint32_t getNRecords() const {
+ return support::endian::byte_swap<uint32_t, Endian>(NRecords);
+ }
+ template <support::endianness Endian> uint32_t getFilenamesSize() const {
+ return support::endian::byte_swap<uint32_t, Endian>(FilenamesSize);
+ }
+ template <support::endianness Endian> uint32_t getCoverageSize() const {
+ return support::endian::byte_swap<uint32_t, Endian>(CoverageSize);
+ }
+ template <support::endianness Endian> uint32_t getVersion() const {
+ return support::endian::byte_swap<uint32_t, Endian>(Version);
+ }
+};
+
+LLVM_PACKED_END
+
+enum CoverageMappingVersion {
+ CoverageMappingVersion1 = 0,
+ // The current versin is Version1
+ CoverageMappingCurrentVersion = INSTR_PROF_COVMAP_VERSION
+};
+
} // end namespace coverage
/// \brief Provide DenseMapInfo for CounterExpression
@@ -484,26 +573,6 @@ template<> struct DenseMapInfo<coverage::CounterExpression> {
}
};
-const std::error_category &coveragemap_category();
-
-enum class coveragemap_error {
- success = 0,
- eof,
- no_data_found,
- unsupported_version,
- truncated,
- malformed
-};
-
-inline std::error_code make_error_code(coveragemap_error E) {
- return std::error_code(static_cast<int>(E), coveragemap_category());
-}
-
} // end namespace llvm
-namespace std {
-template <>
-struct is_error_code_enum<llvm::coveragemap_error> : std::true_type {};
-}
-
#endif // LLVM_PROFILEDATA_COVERAGEMAPPING_H_
diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h
index 49569d89507b..c84d8d24f206 100644
--- a/include/llvm/ProfileData/InstrProf.h
+++ b/include/llvm/ProfileData/InstrProf.h
@@ -30,7 +30,6 @@
#include <system_error>
#include <vector>
-#define INSTR_PROF_INDEX_VERSION 3
namespace llvm {
class Function;
@@ -66,7 +65,8 @@ inline StringRef getInstrProfValueProfFuncName() {
/// Return the name of the section containing function coverage mapping
/// data.
inline StringRef getInstrProfCoverageSectionName(bool AddSegment) {
- return AddSegment ? "__DATA,__llvm_covmap" : "__llvm_covmap";
+ return AddSegment ? "__DATA," INSTR_PROF_COVMAP_SECT_NAME_STR
+ : INSTR_PROF_COVMAP_SECT_NAME_STR;
}
/// Return the name prefix of variables containing instrumented function names.
@@ -89,6 +89,12 @@ inline StringRef getCoverageMappingVarName() {
return "__llvm_coverage_mapping";
}
+/// Return the name of the internal variable recording the array
+/// of PGO name vars referenced by the coverage mapping, The owning
+/// functions of those names are not emitted by FE (e.g, unused inline
+/// functions.)
+inline StringRef getCoverageNamesVarName() { return "__llvm_coverage_names"; }
+
/// Return the name of function that registers all the per-function control
/// data at program startup time by calling __llvm_register_function. This
/// function has internal linkage and is called by __llvm_profile_init
@@ -349,11 +355,14 @@ struct InstrProfValueSiteRecord {
return left.Value < right.Value;
});
}
+ /// Sort ValueData Descending by Count
+ inline void sortByCount();
/// Merge data from another InstrProfValueSiteRecord
/// Optionally scale merged counts by \p Weight.
- instrprof_error mergeValueData(InstrProfValueSiteRecord &Input,
- uint64_t Weight = 1);
+ instrprof_error merge(InstrProfValueSiteRecord &Input, uint64_t Weight = 1);
+ /// Scale up value profile data counts.
+ instrprof_error scale(uint64_t Weight);
};
/// Profiling information for a single function.
@@ -396,6 +405,19 @@ struct InstrProfRecord {
/// Optionally scale merged counts by \p Weight.
instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1);
+ /// Scale up profile counts (including value profile data) by
+ /// \p Weight.
+ instrprof_error scale(uint64_t Weight);
+
+ /// Sort value profile data (per site) by count.
+ void sortValueData() {
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
+ std::vector<InstrProfValueSiteRecord> &SiteRecords =
+ getValueSitesForKind(Kind);
+ for (auto &SR : SiteRecords)
+ SR.sortByCount();
+ }
+ }
/// Clear value data entries
void clearValueData() {
for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
@@ -430,6 +452,8 @@ private:
// Scale merged value counts by \p Weight.
instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src,
uint64_t Weight);
+ // Scale up value profile data count.
+ instrprof_error scaleValueProfData(uint32_t ValueKind, uint64_t Weight);
};
uint32_t InstrProfRecord::getNumValueKinds() const {
@@ -497,11 +521,22 @@ inline support::endianness getHostEndianness() {
#define INSTR_PROF_VALUE_PROF_DATA
#include "llvm/ProfileData/InstrProfData.inc"
- /*
- * Initialize the record for runtime value profile data.
- * Return 0 if the initialization is successful, otherwise
- * return 1.
- */
+void InstrProfValueSiteRecord::sortByCount() {
+ ValueData.sort(
+ [](const InstrProfValueData &left, const InstrProfValueData &right) {
+ return left.Count > right.Count;
+ });
+ // Now truncate
+ size_t max_s = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
+ if (ValueData.size() > max_s)
+ ValueData.resize(max_s);
+}
+
+/*
+* Initialize the record for runtime value profile data.
+* Return 0 if the initialization is successful, otherwise
+* return 1.
+*/
int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord,
const uint16_t *NumValueSites,
ValueProfNode **Nodes);
@@ -597,31 +632,6 @@ struct Header {
} // end namespace RawInstrProf
-namespace coverage {
-
-// Profile coverage map has the following layout:
-// [CoverageMapFileHeader]
-// [ArrayStart]
-// [CovMapFunctionRecord]
-// [CovMapFunctionRecord]
-// ...
-// [ArrayEnd]
-// [Encoded Region Mapping Data]
-LLVM_PACKED_START
-template <class IntPtrT> struct CovMapFunctionRecord {
- #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name;
- #include "llvm/ProfileData/InstrProfData.inc"
-};
-// Per module coverage mapping data header, i.e. CoverageMapFileHeader
-// documented above.
-struct CovMapHeader {
-#define COVMAP_HEADER(Type, LLVMType, Name, Init) Type Name;
-#include "llvm/ProfileData/InstrProfData.inc"
-};
-
-LLVM_PACKED_END
-}
-
} // end namespace llvm
namespace std {
diff --git a/include/llvm/ProfileData/InstrProfData.inc b/include/llvm/ProfileData/InstrProfData.inc
index 3a7c0c5f2773..33c7d94aea2a 100644
--- a/include/llvm/ProfileData/InstrProfData.inc
+++ b/include/llvm/ProfileData/InstrProfData.inc
@@ -28,7 +28,7 @@
*
* Examples of how the template is used to instantiate structure definition:
* 1. To declare a structure:
- *
+ *
* struct ProfData {
* #define INSTR_PROF_DATA(Type, LLVMType, Name, Initializer) \
* Type Name;
@@ -155,7 +155,7 @@ VALUE_PROF_KIND(IPVK_Last, IPVK_IndirectCallTarget)
#endif
COVMAP_FUNC_RECORD(const IntPtrT, llvm::Type::getInt8PtrTy(Ctx), \
NamePtr, llvm::ConstantExpr::getBitCast(NamePtr, \
- llvm::Type::getInt8PtrTy(Ctx)))
+ llvm::Type::getInt8PtrTy(Ctx)))
COVMAP_FUNC_RECORD(const uint32_t, llvm::Type::getInt32Ty(Ctx), NameSize, \
llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx),\
NameValue.size()))
@@ -182,7 +182,7 @@ COVMAP_HEADER(uint32_t, Int32Ty, FilenamesSize, \
COVMAP_HEADER(uint32_t, Int32Ty, CoverageSize, \
llvm::ConstantInt::get(Int32Ty, CoverageMappingSize))
COVMAP_HEADER(uint32_t, Int32Ty, Version, \
- llvm::ConstantInt::get(Int32Ty, CoverageMappingVersion1))
+ llvm::ConstantInt::get(Int32Ty, CoverageMappingCurrentVersion))
#undef COVMAP_HEADER
/* COVMAP_HEADER end. */
@@ -190,7 +190,8 @@ COVMAP_HEADER(uint32_t, Int32Ty, Version, \
#ifdef INSTR_PROF_VALUE_PROF_DATA
#define INSTR_PROF_DATA_DEFINED
-/*!
+#define INSTR_PROF_MAX_NUM_VAL_PER_SITE 255
+/*!
* This is the header of the data structure that defines the on-disk
* layout of the value profile data of a particular kind for one function.
*/
@@ -202,7 +203,7 @@ typedef struct ValueProfRecord {
* otherwise the record for this kind won't be emitted.
*/
uint32_t NumValueSites;
- /*
+ /*
* The first element of the array that stores the number of profiled
* values for each value site. The size of the array is NumValueSites.
* Since NumValueSites is greater than zero, there is at least one
@@ -226,7 +227,7 @@ typedef struct ValueProfRecord {
* \brief Return the number of value sites.
*/
uint32_t getNumValueSites() const { return NumValueSites; }
- /*!
+ /*!
* \brief Read data from this record and save it to Record.
*/
void deserializeTo(InstrProfRecord &Record,
@@ -247,10 +248,10 @@ typedef struct ValueProfRecord {
typedef struct ValueProfData {
/*
* Total size in bytes including this field. It must be a multiple
- * of sizeof(uint64_t).
+ * of sizeof(uint64_t).
*/
uint32_t TotalSize;
- /*
+ /*
*The number of value profile kinds that has value profile data.
* In this implementation, a value profile kind is considered to
* have profile data if the number of value profile sites for the
@@ -260,7 +261,7 @@ typedef struct ValueProfData {
*/
uint32_t NumValueKinds;
- /*
+ /*
* Following are a sequence of variable length records. The prefix/header
* of each record is defined by ValueProfRecord type. The number of
* records is NumValueKinds.
@@ -314,7 +315,7 @@ typedef struct ValueProfData {
#endif
} ValueProfData;
-/*
+/*
* The closure is designed to abstact away two types of value profile data:
* - InstrProfRecord which is the primary data structure used to
* represent profile data in host tools (reader, writer, and profile-use)
@@ -335,7 +336,7 @@ typedef struct ValueProfRecordClosure {
uint32_t (*GetNumValueData)(const void *Record, uint32_t VKind);
uint32_t (*GetNumValueDataForSite)(const void *R, uint32_t VK, uint32_t S);
- /*
+ /*
* After extracting the value profile data from the value profile record,
* this method is used to map the in-memory value to on-disk value. If
* the method is null, value will be written out untranslated.
@@ -346,7 +347,7 @@ typedef struct ValueProfRecordClosure {
ValueProfData *(*AllocValueProfData)(size_t TotalSizeInBytes);
} ValueProfRecordClosure;
-/*
+/*
* A wrapper struct that represents value profile runtime data.
* Like InstrProfRecord class which is used by profiling host tools,
* ValueProfRuntimeRecord also implements the abstract intefaces defined in
@@ -384,7 +385,7 @@ serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
uint32_t getNumValueKindsRT(const void *R);
#undef INSTR_PROF_VALUE_PROF_DATA
-#endif /* INSTR_PROF_VALUE_PROF_DATA */
+#endif /* INSTR_PROF_VALUE_PROF_DATA */
#ifdef INSTR_PROF_COMMON_API_IMPL
@@ -412,7 +413,7 @@ uint32_t getValueProfRecordHeaderSize(uint32_t NumValueSites) {
return Size;
}
-/*!
+/*!
* \brief Return the total size of the value profile record including the
* header and the value data.
*/
@@ -432,7 +433,7 @@ InstrProfValueData *getValueProfRecordValueData(ValueProfRecord *This) {
This->NumValueSites));
}
-/*!
+/*!
* \brief Return the total number of value data for \c This record.
*/
INSTR_PROF_INLINE
@@ -444,7 +445,7 @@ uint32_t getValueProfRecordNumValueData(ValueProfRecord *This) {
return NumValueData;
}
-/*!
+/*!
* \brief Use this method to advance to the next \c This \c ValueProfRecord.
*/
INSTR_PROF_INLINE
@@ -465,7 +466,7 @@ ValueProfRecord *getFirstValueProfRecord(ValueProfData *This) {
/* Closure based interfaces. */
-/*!
+/*!
* Return the total size in bytes of the on-disk value profile data
* given the data stored in Record.
*/
@@ -535,7 +536,7 @@ ValueProfData *serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
return VPD;
}
-/*
+/*
* The value profiler runtime library stores the value profile data
* for a given function in \c NumValueSites and \c Nodes structures.
* \c ValueProfRuntimeRecord class is used to encapsulate the runtime
@@ -639,7 +640,7 @@ static ValueProfRecordClosure RTRecordClosure = {0,
getValueForSiteRT,
allocValueProfDataRT};
-/*
+/*
* Return the size of ValueProfData structure to store data
* recorded in the runtime record.
*/
@@ -648,7 +649,7 @@ uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record) {
return getValueProfDataSize(&RTRecordClosure);
}
-/*
+/*
* Return a ValueProfData instance that stores the data collected
* from runtime. If \c DstData is provided by the caller, the value
* profile data will be store in *DstData and DstData is returned,
@@ -696,18 +697,31 @@ serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record,
/* Raw profile format version. */
#define INSTR_PROF_RAW_VERSION 2
+#define INSTR_PROF_INDEX_VERSION 3
+#define INSTR_PROF_COVMAP_VERSION 0
+
+/* Profile version is always of type uint_64_t. Reserve the upper 8 bits in the
+ * version for other variants of profile. We set the lowest bit of the upper 8
+ * bits (i.e. bit 56) to 1 to indicate if this is an IR-level instrumentaiton
+ * generated profile, and 0 if this is a Clang FE generated profile.
+*/
+#define VARIANT_MASKS_ALL 0xff00000000000000ULL
+#define GET_VERSION(V) ((V) & ~VARIANT_MASKS_ALL)
/* Runtime section names and name strings. */
#define INSTR_PROF_DATA_SECT_NAME __llvm_prf_data
#define INSTR_PROF_NAME_SECT_NAME __llvm_prf_names
#define INSTR_PROF_CNTS_SECT_NAME __llvm_prf_cnts
+#define INSTR_PROF_COVMAP_SECT_NAME __llvm_covmap
-#define INSTR_PROF_DATA_SECT_NAME_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME)
-#define INSTR_PROF_NAME_SECT_NAME_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME)
-#define INSTR_PROF_CNTS_SECT_NAME_STR \
- INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
+#define INSTR_PROF_DATA_SECT_NAME_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_DATA_SECT_NAME)
+#define INSTR_PROF_NAME_SECT_NAME_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_NAME_SECT_NAME)
+#define INSTR_PROF_CNTS_SECT_NAME_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_CNTS_SECT_NAME)
+#define INSTR_PROF_COVMAP_SECT_NAME_STR \
+ INSTR_PROF_QUOTE(INSTR_PROF_COVMAP_SECT_NAME)
/* Macros to define start/stop section symbol for a given
* section on Linux. For instance
@@ -751,4 +765,3 @@ typedef struct ValueProfNode {
#else
#undef INSTR_PROF_DATA_DEFINED
#endif
-
diff --git a/include/llvm/ProfileData/SampleProf.h b/include/llvm/ProfileData/SampleProf.h
index 8df3fe803209..6c39cf9458dc 100644
--- a/include/llvm/ProfileData/SampleProf.h
+++ b/include/llvm/ProfileData/SampleProf.h
@@ -140,16 +140,9 @@ public:
/// around unsigned integers.
sampleprof_error addSamples(uint64_t S, uint64_t Weight = 1) {
bool Overflowed;
- if (Weight > 1) {
- S = SaturatingMultiply(S, Weight, &Overflowed);
- if (Overflowed)
- return sampleprof_error::counter_overflow;
- }
- NumSamples = SaturatingAdd(NumSamples, S, &Overflowed);
- if (Overflowed)
- return sampleprof_error::counter_overflow;
-
- return sampleprof_error::success;
+ NumSamples = SaturatingMultiplyAdd(S, Weight, NumSamples, &Overflowed);
+ return Overflowed ? sampleprof_error::counter_overflow
+ : sampleprof_error::success;
}
/// Add called function \p F with samples \p S.
@@ -161,16 +154,10 @@ public:
uint64_t Weight = 1) {
uint64_t &TargetSamples = CallTargets[F];
bool Overflowed;
- if (Weight > 1) {
- S = SaturatingMultiply(S, Weight, &Overflowed);
- if (Overflowed)
- return sampleprof_error::counter_overflow;
- }
- TargetSamples = SaturatingAdd(TargetSamples, S, &Overflowed);
- if (Overflowed)
- return sampleprof_error::counter_overflow;
-
- return sampleprof_error::success;
+ TargetSamples =
+ SaturatingMultiplyAdd(S, Weight, TargetSamples, &Overflowed);
+ return Overflowed ? sampleprof_error::counter_overflow
+ : sampleprof_error::success;
}
/// Return true if this sample record contains function calls.
@@ -215,29 +202,17 @@ public:
void dump() const;
sampleprof_error addTotalSamples(uint64_t Num, uint64_t Weight = 1) {
bool Overflowed;
- if (Weight > 1) {
- Num = SaturatingMultiply(Num, Weight, &Overflowed);
- if (Overflowed)
- return sampleprof_error::counter_overflow;
- }
- TotalSamples = SaturatingAdd(TotalSamples, Num, &Overflowed);
- if (Overflowed)
- return sampleprof_error::counter_overflow;
-
- return sampleprof_error::success;
+ TotalSamples =
+ SaturatingMultiplyAdd(Num, Weight, TotalSamples, &Overflowed);
+ return Overflowed ? sampleprof_error::counter_overflow
+ : sampleprof_error::success;
}
sampleprof_error addHeadSamples(uint64_t Num, uint64_t Weight = 1) {
bool Overflowed;
- if (Weight > 1) {
- Num = SaturatingMultiply(Num, Weight, &Overflowed);
- if (Overflowed)
- return sampleprof_error::counter_overflow;
- }
- TotalHeadSamples = SaturatingAdd(TotalHeadSamples, Num, &Overflowed);
- if (Overflowed)
- return sampleprof_error::counter_overflow;
-
- return sampleprof_error::success;
+ TotalHeadSamples =
+ SaturatingMultiplyAdd(Num, Weight, TotalHeadSamples, &Overflowed);
+ return Overflowed ? sampleprof_error::counter_overflow
+ : sampleprof_error::success;
}
sampleprof_error addBodySamples(uint32_t LineOffset, uint32_t Discriminator,
uint64_t Num, uint64_t Weight = 1) {
diff --git a/include/llvm/Support/ARMTargetParser.def b/include/llvm/Support/ARMTargetParser.def
index c895b095bc5b..d94a51b8bcf9 100644
--- a/include/llvm/Support/ARMTargetParser.def
+++ b/include/llvm/Support/ARMTargetParser.def
@@ -96,7 +96,7 @@ ARM_ARCH("iwmmxt", AK_IWMMXT, "iwmmxt", "", ARMBuildAttrs::CPUArch::v5TE,
FK_NONE, AEK_NONE)
ARM_ARCH("iwmmxt2", AK_IWMMXT2, "iwmmxt2", "", ARMBuildAttrs::CPUArch::v5TE,
FK_NONE, AEK_NONE)
-ARM_ARCH("xscale", AK_XSCALE, "xscale", "", ARMBuildAttrs::CPUArch::v5TE,
+ARM_ARCH("xscale", AK_XSCALE, "xscale", "v5e", ARMBuildAttrs::CPUArch::v5TE,
FK_NONE, AEK_NONE)
ARM_ARCH("armv7s", AK_ARMV7S, "7-S", "v7s", ARMBuildAttrs::CPUArch::v7,
FK_NEON_VFPV4, AEK_DSP)
diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h
index c608736fa956..043d82314609 100644
--- a/include/llvm/Support/Allocator.h
+++ b/include/llvm/Support/Allocator.h
@@ -187,6 +187,7 @@ public:
/// \brief Deallocate all but the current slab and reset the current pointer
/// to the beginning of it, freeing all memory allocated so far.
void Reset() {
+ // Deallocate all but the first slab, and deallocate all custom-sized slabs.
DeallocateCustomSizedSlabs();
CustomSizedSlabs.clear();
@@ -198,7 +199,7 @@ public:
CurPtr = (char *)Slabs.front();
End = CurPtr + SlabSize;
- // Deallocate all but the first slab, and deallocate all custom-sized slabs.
+ __asan_poison_memory_region(*Slabs.begin(), computeSlabSize(0));
DeallocateSlabs(std::next(Slabs.begin()), Slabs.end());
Slabs.erase(std::next(Slabs.begin()), Slabs.end());
}
diff --git a/include/llvm/Support/COFF.h b/include/llvm/Support/COFF.h
index 0162175efe3e..0245632c96a0 100644
--- a/include/llvm/Support/COFF.h
+++ b/include/llvm/Support/COFF.h
@@ -656,6 +656,15 @@ namespace COFF {
}
};
+ enum CodeViewLine : unsigned {
+ CVL_LineNumberStartBits = 24,
+ CVL_LineNumberEndDeltaBits = 7,
+ CVL_LineNumberEndDeltaMask = (1U << CVL_LineNumberEndDeltaBits) - 1,
+ CVL_MaxLineNumber = (1U << CVL_LineNumberStartBits) - 1,
+ CVL_IsStatement = 1U << 31,
+ CVL_MaxColumnNumber = UINT16_MAX,
+ };
+
enum CodeViewIdentifiers {
DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS = 0x1,
DEBUG_SECTION_MAGIC = 0x4,
diff --git a/include/llvm/Support/ELF.h b/include/llvm/Support/ELF.h
index 97708a7cdd63..e24420fc1fe4 100644
--- a/include/llvm/Support/ELF.h
+++ b/include/llvm/Support/ELF.h
@@ -309,7 +309,12 @@ enum {
EM_COOL = 217, // iCelero CoolEngine
EM_NORC = 218, // Nanoradio Optimized RISC
EM_CSR_KALIMBA = 219, // CSR Kalimba architecture family
- EM_AMDGPU = 224 // AMD GPU architecture
+ EM_AMDGPU = 224, // AMD GPU architecture
+
+ // A request has been made to the maintainer of the official registry for
+ // such numbers for an official value for WebAssembly. As soon as one is
+ // allocated, this enum will be updated to use it.
+ EM_WEBASSEMBLY = 0x4157, // WebAssembly architecture
};
// Object file classes.
@@ -594,6 +599,11 @@ enum {
#include "ELFRelocs/Sparc.def"
};
+// ELF Relocation types for WebAssembly
+enum {
+#include "ELFRelocs/WebAssembly.def"
+};
+
#undef ELF_RELOC
// Section header.
@@ -1024,7 +1034,10 @@ enum {
PT_AMDGPU_HSA_LOAD_GLOBAL_PROGRAM = 0x60000000,
PT_AMDGPU_HSA_LOAD_GLOBAL_AGENT = 0x60000001,
PT_AMDGPU_HSA_LOAD_READONLY_AGENT = 0x60000002,
- PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003
+ PT_AMDGPU_HSA_LOAD_CODE_AGENT = 0x60000003,
+
+ // WebAssembly program header types.
+ PT_WEBASSEMBLY_FUNCTIONS = PT_LOPROC + 0, // Function definitions.
};
// Segment flag bits.
diff --git a/include/llvm/Support/ELFRelocs/WebAssembly.def b/include/llvm/Support/ELFRelocs/WebAssembly.def
new file mode 100644
index 000000000000..9a34349efb96
--- /dev/null
+++ b/include/llvm/Support/ELFRelocs/WebAssembly.def
@@ -0,0 +1,8 @@
+
+#ifndef ELF_RELOC
+#error "ELF_RELOC must be defined"
+#endif
+
+ELF_RELOC(R_WEBASSEMBLY_NONE, 0)
+ELF_RELOC(R_WEBASSEMBLY_DATA, 1)
+ELF_RELOC(R_WEBASSEMBLY_FUNCTION, 2)
diff --git a/include/llvm/Support/GenericDomTree.h b/include/llvm/Support/GenericDomTree.h
index 8751f272cd29..8bae582d18c7 100644
--- a/include/llvm/Support/GenericDomTree.h
+++ b/include/llvm/Support/GenericDomTree.h
@@ -724,25 +724,17 @@ public:
if (!this->IsPostDominators) {
// Initialize root
NodeT *entry = TraitsTy::getEntryNode(&F);
- this->Roots.push_back(entry);
- this->IDoms[entry] = nullptr;
- this->DomTreeNodes[entry] = nullptr;
+ addRoot(entry);
Calculate<FT, NodeT *>(*this, F);
} else {
// Initialize the roots list
for (typename TraitsTy::nodes_iterator I = TraitsTy::nodes_begin(&F),
E = TraitsTy::nodes_end(&F);
- I != E; ++I) {
+ I != E; ++I)
if (TraitsTy::child_begin(&*I) == TraitsTy::child_end(&*I))
addRoot(&*I);
- // Prepopulate maps so that we don't get iterator invalidation issues
- // later.
- this->IDoms[&*I] = nullptr;
- this->DomTreeNodes[&*I] = nullptr;
- }
-
Calculate<FT, Inverse<NodeT *>>(*this, F);
}
}
diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h
index 8111aeebe6ee..408ae3c339a2 100644
--- a/include/llvm/Support/MathExtras.h
+++ b/include/llvm/Support/MathExtras.h
@@ -717,6 +717,25 @@ SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) {
return Z;
}
+/// \brief Multiply two unsigned integers, X and Y, and add the unsigned
+/// integer, A to the product. Clamp the result to the maximum representable
+/// value of T on overflow. ResultOverflowed indicates if the result is larger
+/// than the maximum representable value of type T.
+/// Note that this is purely a convenience function as there is no distinction
+/// where overflow occurred in a 'fused' multiply-add for unsigned numbers.
+template <typename T>
+typename std::enable_if<std::is_unsigned<T>::value, T>::type
+SaturatingMultiplyAdd(T X, T Y, T A, bool *ResultOverflowed = nullptr) {
+ bool Dummy;
+ bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy;
+
+ T Product = SaturatingMultiply(X, Y, &Overflowed);
+ if (Overflowed)
+ return Product;
+
+ return SaturatingAdd(A, Product, &Overflowed);
+}
+
extern const float huge_valf;
} // End llvm namespace
diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h
index 0c374a070ce8..78d2fadc5190 100644
--- a/include/llvm/Transforms/IPO.h
+++ b/include/llvm/Transforms/IPO.h
@@ -183,12 +183,20 @@ ModulePass *createBlockExtractorPass();
ModulePass *createStripDeadPrototypesPass();
//===----------------------------------------------------------------------===//
-/// createFunctionAttrsPass - This pass discovers functions that do not access
-/// memory, or only read memory, and gives them the readnone/readonly attribute.
-/// It also discovers function arguments that are not captured by the function
-/// and marks them with the nocapture attribute.
+/// createPostOrderFunctionAttrsPass - This pass walks SCCs of the call graph
+/// in post-order to deduce and propagate function attributes. It can discover
+/// functions that do not access memory, or only read memory, and give them the
+/// readnone/readonly attribute. It also discovers function arguments that are
+/// not captured by the function and marks them with the nocapture attribute.
///
-Pass *createFunctionAttrsPass();
+Pass *createPostOrderFunctionAttrsPass();
+
+//===----------------------------------------------------------------------===//
+/// createReversePostOrderFunctionAttrsPass - This pass walks SCCs of the call
+/// graph in RPO to deduce and propagate function attributes. Currently it
+/// only handles synthesizing norecurse attributes.
+///
+Pass *createReversePostOrderFunctionAttrsPass();
//===----------------------------------------------------------------------===//
/// createMergeFunctionsPass - This pass discovers identical functions and
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 92a1d52f1011..4f006f2adeef 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -147,42 +147,12 @@ void CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
ValueMapTypeRemapper *TypeMapper = nullptr,
ValueMaterializer *Materializer = nullptr);
-/// A helper class used with CloneAndPruneIntoFromInst to change the default
-/// behavior while instructions are being cloned.
-class CloningDirector {
-public:
- /// This enumeration describes the way CloneAndPruneIntoFromInst should
- /// proceed after the CloningDirector has examined an instruction.
- enum CloningAction {
- ///< Continue cloning the instruction (default behavior).
- CloneInstruction,
- ///< Skip this instruction but continue cloning the current basic block.
- SkipInstruction,
- ///< Skip this instruction and stop cloning the current basic block.
- StopCloningBB,
- ///< Don't clone the terminator but clone the current block's successors.
- CloneSuccessors
- };
-
- virtual ~CloningDirector() {}
-
- /// Subclasses must override this function to customize cloning behavior.
- virtual CloningAction handleInstruction(ValueToValueMapTy &VMap,
- const Instruction *Inst,
- BasicBlock *NewBB) = 0;
-
- virtual ValueMapTypeRemapper *getTypeRemapper() { return nullptr; }
- virtual ValueMaterializer *getValueMaterializer() { return nullptr; }
-};
-
void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
const Instruction *StartingInst,
ValueToValueMapTy &VMap, bool ModuleLevelChanges,
- SmallVectorImpl<ReturnInst*> &Returns,
- const char *NameSuffix = "",
- ClonedCodeInfo *CodeInfo = nullptr,
- CloningDirector *Director = nullptr);
-
+ SmallVectorImpl<ReturnInst *> &Returns,
+ const char *NameSuffix = "",
+ ClonedCodeInfo *CodeInfo = nullptr);
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
/// except that it does some simple constant prop and DCE on the fly. The
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 81b376f0c212..911c6f14da0b 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -42,6 +42,7 @@ class TargetLibraryInfo;
class TargetTransformInfo;
class DIBuilder;
class DominatorTree;
+class LazyValueInfo;
template<typename T> class SmallVectorImpl;
@@ -303,7 +304,7 @@ void removeUnwindEdge(BasicBlock *BB);
/// \brief Remove all blocks that can not be reached from the function's entry.
///
/// Returns true if any basic block was removed.
-bool removeUnreachableBlocks(Function &F);
+bool removeUnreachableBlocks(Function &F, LazyValueInfo *LVI = nullptr);
/// \brief Combine the metadata of two instructions so that K can replace J
///
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index 0adce0c9602d..d74ada6faa61 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -207,6 +207,7 @@ module LLVM_Utils {
textual header "Support/ELFRelocs/Sparc.def"
textual header "Support/ELFRelocs/SystemZ.def"
textual header "Support/ELFRelocs/x86_64.def"
+ textual header "Support/ELFRelocs/WebAssembly.def"
}
// This part of the module is usable from both C and C++ code.
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 85404d87a611..c3d280350b90 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -586,8 +586,13 @@ FunctionModRefBehavior BasicAAResult::getModRefBehavior(const Function *F) {
return FunctionModRefBehavior(AAResultBase::getModRefBehavior(F) & Min);
}
-ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
- unsigned ArgIdx) {
+/// Returns true if this is a writeonly (i.e Mod only) parameter. Currently,
+/// we don't have a writeonly attribute, so this only knows about builtin
+/// intrinsics and target library functions. We could consider adding a
+/// writeonly attribute in the future and moving all of these facts to either
+/// Intrinsics.td or InferFunctionAttr.cpp
+static bool isWriteOnlyParam(ImmutableCallSite CS, unsigned ArgIdx,
+ const TargetLibraryInfo &TLI) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction()))
switch (II->getIntrinsicID()) {
default:
@@ -597,9 +602,9 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
case Intrinsic::memmove:
// We don't currently have a writeonly attribute. All other properties
// of these intrinsics are nicely described via attributes in
- // Intrinsics.td and handled generically below.
+ // Intrinsics.td and handled generically.
if (ArgIdx == 0)
- return MRI_Mod;
+ return true;
}
// We can bound the aliasing properties of memset_pattern16 just as we can
@@ -609,7 +614,22 @@ ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
// handled via InferFunctionAttr.
if (CS.getCalledFunction() && isMemsetPattern16(CS.getCalledFunction(), TLI))
if (ArgIdx == 0)
- return MRI_Mod;
+ return true;
+
+ // TODO: memset_pattern4, memset_pattern8
+ // TODO: _chk variants
+ // TODO: strcmp, strcpy
+
+ return false;
+}
+
+ModRefInfo BasicAAResult::getArgModRefInfo(ImmutableCallSite CS,
+ unsigned ArgIdx) {
+
+ // Emulate the missing writeonly attribute by checking for known builtin
+ // intrinsics and target library functions.
+ if (isWriteOnlyParam(CS, ArgIdx, TLI))
+ return MRI_Mod;
if (CS.paramHasAttr(ArgIdx + 1, Attribute::ReadOnly))
return MRI_Ref;
diff --git a/lib/Analysis/CallGraphSCCPass.cpp b/lib/Analysis/CallGraphSCCPass.cpp
index 07b389a2a139..6dd1d0a066b6 100644
--- a/lib/Analysis/CallGraphSCCPass.cpp
+++ b/lib/Analysis/CallGraphSCCPass.cpp
@@ -612,9 +612,10 @@ namespace {
bool runOnSCC(CallGraphSCC &SCC) override {
Out << Banner;
for (CallGraphNode *CGN : SCC) {
- if (CGN->getFunction())
- CGN->getFunction()->print(Out);
- else
+ if (CGN->getFunction()) {
+ if (isFunctionInPrintList(CGN->getFunction()->getName()))
+ CGN->getFunction()->print(Out);
+ } else
Out << "\nPrinting <null> Function\n";
}
return false;
diff --git a/lib/Analysis/GlobalsModRef.cpp b/lib/Analysis/GlobalsModRef.cpp
index 249f3954d554..1babb822074b 100644
--- a/lib/Analysis/GlobalsModRef.cpp
+++ b/lib/Analysis/GlobalsModRef.cpp
@@ -358,21 +358,6 @@ bool GlobalsAAResult::AnalyzeUsesOfPointer(Value *V,
if (CS.isArgOperand(&U) && isFreeCall(I, &TLI)) {
if (Writers)
Writers->insert(CS->getParent()->getParent());
- } else if (CS.doesNotCapture(CS.getDataOperandNo(&U))) {
- Function *ParentF = CS->getParent()->getParent();
- // A nocapture argument may be read from or written to, but does not
- // escape unless the call can somehow recurse.
- //
- // nocapture "indicates that the callee does not make any copies of
- // the pointer that outlive itself". Therefore if we directly or
- // indirectly recurse, we must treat the pointer as escaping.
- if (FunctionToSCCMap[ParentF] ==
- FunctionToSCCMap[CS.getCalledFunction()])
- return true;
- if (Readers)
- Readers->insert(ParentF);
- if (Writers)
- Writers->insert(ParentF);
} else {
return true; // Argument of an unknown call.
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b89ff268d11e..6dfe62596275 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -70,7 +70,7 @@ static Value *SimplifyOrInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyXorInst(Value *, Value *, const Query &, unsigned);
static Value *SimplifyTruncInst(Value *, Type *, const Query &, unsigned);
-/// getFalse - For a boolean type, or a vector of boolean type, return false, or
+/// For a boolean type, or a vector of boolean type, return false, or
/// a vector with every element false, as appropriate for the type.
static Constant *getFalse(Type *Ty) {
assert(Ty->getScalarType()->isIntegerTy(1) &&
@@ -78,7 +78,7 @@ static Constant *getFalse(Type *Ty) {
return Constant::getNullValue(Ty);
}
-/// getTrue - For a boolean type, or a vector of boolean type, return true, or
+/// For a boolean type, or a vector of boolean type, return true, or
/// a vector with every element true, as appropriate for the type.
static Constant *getTrue(Type *Ty) {
assert(Ty->getScalarType()->isIntegerTy(1) &&
@@ -100,7 +100,7 @@ static bool isSameCompare(Value *V, CmpInst::Predicate Pred, Value *LHS,
CRHS == LHS;
}
-/// ValueDominatesPHI - Does the given value dominate the specified phi node?
+/// Does the given value dominate the specified phi node?
static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
Instruction *I = dyn_cast<Instruction>(V);
if (!I)
@@ -131,8 +131,8 @@ static bool ValueDominatesPHI(Value *V, PHINode *P, const DominatorTree *DT) {
return false;
}
-/// ExpandBinOp - Simplify "A op (B op' C)" by distributing op over op', turning
-/// it into "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is
+/// Simplify "A op (B op' C)" by distributing op over op', turning it into
+/// "(A op B) op' (A op C)". Here "op" is given by Opcode and "op'" is
/// given by OpcodeToExpand, while "A" corresponds to LHS and "B op' C" to RHS.
/// Also performs the transform "(A op' B) op C" -> "(A op C) op' (B op C)".
/// Returns the simplified value, or null if no simplification was performed.
@@ -193,8 +193,8 @@ static Value *ExpandBinOp(unsigned Opcode, Value *LHS, Value *RHS,
return nullptr;
}
-/// SimplifyAssociativeBinOp - Generic simplifications for associative binary
-/// operations. Returns the simpler value, or null if none was found.
+/// Generic simplifications for associative binary operations.
+/// Returns the simpler value, or null if none was found.
static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
Instruction::BinaryOps Opcode = (Instruction::BinaryOps)Opc;
@@ -290,10 +290,10 @@ static Value *SimplifyAssociativeBinOp(unsigned Opc, Value *LHS, Value *RHS,
return nullptr;
}
-/// ThreadBinOpOverSelect - In the case of a binary operation with a select
-/// instruction as an operand, try to simplify the binop by seeing whether
-/// evaluating it on both branches of the select results in the same value.
-/// Returns the common value if so, otherwise returns null.
+/// In the case of a binary operation with a select instruction as an operand,
+/// try to simplify the binop by seeing whether evaluating it on both branches
+/// of the select results in the same value. Returns the common value if so,
+/// otherwise returns null.
static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -362,10 +362,9 @@ static Value *ThreadBinOpOverSelect(unsigned Opcode, Value *LHS, Value *RHS,
return nullptr;
}
-/// ThreadCmpOverSelect - In the case of a comparison with a select instruction,
-/// try to simplify the comparison by seeing whether both branches of the select
-/// result in the same value. Returns the common value if so, otherwise returns
-/// null.
+/// In the case of a comparison with a select instruction, try to simplify the
+/// comparison by seeing whether both branches of the select result in the same
+/// value. Returns the common value if so, otherwise returns null.
static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
Value *RHS, const Query &Q,
unsigned MaxRecurse) {
@@ -444,10 +443,10 @@ static Value *ThreadCmpOverSelect(CmpInst::Predicate Pred, Value *LHS,
return nullptr;
}
-/// ThreadBinOpOverPHI - In the case of a binary operation with an operand that
-/// is a PHI instruction, try to simplify the binop by seeing whether evaluating
-/// it on the incoming phi values yields the same result for every value. If so
-/// returns the common value, otherwise returns null.
+/// In the case of a binary operation with an operand that is a PHI instruction,
+/// try to simplify the binop by seeing whether evaluating it on the incoming
+/// phi values yields the same result for every value. If so returns the common
+/// value, otherwise returns null.
static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -486,10 +485,10 @@ static Value *ThreadBinOpOverPHI(unsigned Opcode, Value *LHS, Value *RHS,
return CommonValue;
}
-/// ThreadCmpOverPHI - In the case of a comparison with a PHI instruction, try
-/// try to simplify the comparison by seeing whether comparing with all of the
-/// incoming phi values yields the same result every time. If so returns the
-/// common result, otherwise returns null.
+/// In the case of a comparison with a PHI instruction, try to simplify the
+/// comparison by seeing whether comparing with all of the incoming phi values
+/// yields the same result every time. If so returns the common result,
+/// otherwise returns null.
static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
// Recursion is always used, so bail out at once if we already hit the limit.
@@ -524,8 +523,8 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS,
return CommonValue;
}
-/// SimplifyAddInst - Given operands for an Add, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an Add, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const Query &Q, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -656,8 +655,8 @@ static Constant *computePointerDifference(const DataLayout &DL, Value *LHS,
return ConstantExpr::getSub(LHSOffset, RHSOffset);
}
-/// SimplifySubInst - Given operands for a Sub, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a Sub, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const Query &Q, unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0))
@@ -889,8 +888,8 @@ static Value *SimplifyFMulInst(Value *Op0, Value *Op1,
return nullptr;
}
-/// SimplifyMulInst - Given operands for a Mul, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a Mul, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -989,8 +988,8 @@ Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// SimplifyDiv - Given operands for an SDiv or UDiv, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an SDiv or UDiv, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
@@ -1075,8 +1074,8 @@ static Value *SimplifyDiv(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
return nullptr;
}
-/// SimplifySDivInst - Given operands for an SDiv, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an SDiv, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifySDivInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyDiv(Instruction::SDiv, Op0, Op1, Q, MaxRecurse))
@@ -1093,8 +1092,8 @@ Value *llvm::SimplifySDivInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// SimplifyUDivInst - Given operands for a UDiv, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a UDiv, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyUDivInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyDiv(Instruction::UDiv, Op0, Op1, Q, MaxRecurse))
@@ -1154,8 +1153,8 @@ Value *llvm::SimplifyFDivInst(Value *Op0, Value *Op1, FastMathFlags FMF,
RecursionLimit);
}
-/// SimplifyRem - Given operands for an SRem or URem, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an SRem or URem, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
@@ -1215,8 +1214,8 @@ static Value *SimplifyRem(Instruction::BinaryOps Opcode, Value *Op0, Value *Op1,
return nullptr;
}
-/// SimplifySRemInst - Given operands for an SRem, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an SRem, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifySRemInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyRem(Instruction::SRem, Op0, Op1, Q, MaxRecurse))
@@ -1233,8 +1232,8 @@ Value *llvm::SimplifySRemInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// SimplifyURemInst - Given operands for a URem, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a URem, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyURemInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Value *V = SimplifyRem(Instruction::URem, Op0, Op1, Q, MaxRecurse))
@@ -1279,7 +1278,7 @@ Value *llvm::SimplifyFRemInst(Value *Op0, Value *Op1, FastMathFlags FMF,
RecursionLimit);
}
-/// isUndefShift - Returns true if a shift by \c Amount always yields undef.
+/// Returns true if a shift by \c Amount always yields undef.
static bool isUndefShift(Value *Amount) {
Constant *C = dyn_cast<Constant>(Amount);
if (!C)
@@ -1306,8 +1305,8 @@ static bool isUndefShift(Value *Amount) {
return false;
}
-/// SimplifyShift - Given operands for an Shl, LShr or AShr, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an Shl, LShr or AShr, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyShift(unsigned Opcode, Value *Op0, Value *Op1,
const Query &Q, unsigned MaxRecurse) {
if (Constant *C0 = dyn_cast<Constant>(Op0)) {
@@ -1375,8 +1374,8 @@ static Value *SimplifyRightShift(unsigned Opcode, Value *Op0, Value *Op1,
return nullptr;
}
-/// SimplifyShlInst - Given operands for an Shl, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an Shl, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
const Query &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyShift(Instruction::Shl, Op0, Op1, Q, MaxRecurse))
@@ -1402,8 +1401,8 @@ Value *llvm::SimplifyShlInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW,
RecursionLimit);
}
-/// SimplifyLShrInst - Given operands for an LShr, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an LShr, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
const Query &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyRightShift(Instruction::LShr, Op0, Op1, isExact, Q,
@@ -1427,8 +1426,8 @@ Value *llvm::SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact,
RecursionLimit);
}
-/// SimplifyAShrInst - Given operands for an AShr, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an AShr, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact,
const Query &Q, unsigned MaxRecurse) {
if (Value *V = SimplifyRightShift(Instruction::AShr, Op0, Op1, isExact, Q,
@@ -1502,8 +1501,8 @@ static Value *simplifyUnsignedRangeCheck(ICmpInst *ZeroICmp,
return nullptr;
}
-// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
-// of possible values cannot be satisfied.
+/// Simplify (and (icmp ...) (icmp ...)) to true when we can tell that the range
+/// of possible values cannot be satisfied.
static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
ConstantInt *CI1, *CI2;
@@ -1554,8 +1553,8 @@ static Value *SimplifyAndOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-/// SimplifyAndInst - Given operands for an And, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an And, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -1661,8 +1660,8 @@ Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
-// contains all possible values.
+/// Simplify (or (icmp ...) (icmp ...)) to true when we can tell that the union
+/// contains all possible values.
static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
ICmpInst::Predicate Pred0, Pred1;
ConstantInt *CI1, *CI2;
@@ -1713,8 +1712,8 @@ static Value *SimplifyOrOfICmps(ICmpInst *Op0, ICmpInst *Op1) {
return nullptr;
}
-/// SimplifyOrInst - Given operands for an Or, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an Or, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyOrInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -1849,8 +1848,8 @@ Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1, const DataLayout &DL,
RecursionLimit);
}
-/// SimplifyXorInst - Given operands for a Xor, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a Xor, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyXorInst(Value *Op0, Value *Op1, const Query &Q,
unsigned MaxRecurse) {
if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
@@ -1910,9 +1909,9 @@ static Type *GetCompareTy(Value *Op) {
return CmpInst::makeCmpResultType(Op->getType());
}
-/// ExtractEquivalentCondition - Rummage around inside V looking for something
-/// equivalent to the comparison "LHS Pred RHS". Return such a value if found,
-/// otherwise return null. Helper function for analyzing max/min idioms.
+/// Rummage around inside V looking for something equivalent to the comparison
+/// "LHS Pred RHS". Return such a value if found, otherwise return null.
+/// Helper function for analyzing max/min idioms.
static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred,
Value *LHS, Value *RHS) {
SelectInst *SI = dyn_cast<SelectInst>(V);
@@ -2100,21 +2099,17 @@ static Constant *computePointerICmp(const DataLayout &DL,
// that might be resolve lazily to symbols in another dynamically-loaded
// library (and, thus, could be malloc'ed by the implementation).
auto IsAllocDisjoint = [](SmallVectorImpl<Value *> &Objects) {
- return std::all_of(Objects.begin(), Objects.end(),
- [](Value *V){
- if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
- return AI->getParent() && AI->getParent()->getParent() &&
- AI->isStaticAlloca();
- if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
- return (GV->hasLocalLinkage() ||
- GV->hasHiddenVisibility() ||
- GV->hasProtectedVisibility() ||
- GV->hasUnnamedAddr()) &&
- !GV->isThreadLocal();
- if (const Argument *A = dyn_cast<Argument>(V))
- return A->hasByValAttr();
- return false;
- });
+ return std::all_of(Objects.begin(), Objects.end(), [](Value *V) {
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V))
+ return AI->getParent() && AI->getFunction() && AI->isStaticAlloca();
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(V))
+ return (GV->hasLocalLinkage() || GV->hasHiddenVisibility() ||
+ GV->hasProtectedVisibility() || GV->hasUnnamedAddr()) &&
+ !GV->isThreadLocal();
+ if (const Argument *A = dyn_cast<Argument>(V))
+ return A->hasByValAttr();
+ return false;
+ });
};
if ((IsNAC(LHSUObjs) && IsAllocDisjoint(RHSUObjs)) ||
@@ -2127,8 +2122,8 @@ static Constant *computePointerICmp(const DataLayout &DL,
return nullptr;
}
-/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an ICmpInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
@@ -3102,8 +3097,8 @@ Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
RecursionLimit);
}
-/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an FCmpInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
FastMathFlags FMF, const Query &Q,
unsigned MaxRecurse) {
@@ -3227,8 +3222,7 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
-/// SimplifyWithOpReplaced - See if V simplifies when its operand Op is
-/// replaced with RepOp.
+/// See if V simplifies when its operand Op is replaced with RepOp.
static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
const Query &Q,
unsigned MaxRecurse) {
@@ -3311,8 +3305,8 @@ static const Value *SimplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
return nullptr;
}
-/// SimplifySelectInst - Given operands for a SelectInst, see if we can fold
-/// the result. If not, this returns null.
+/// Given operands for a SelectInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifySelectInst(Value *CondVal, Value *TrueVal,
Value *FalseVal, const Query &Q,
unsigned MaxRecurse) {
@@ -3449,8 +3443,8 @@ Value *llvm::SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
-/// SimplifyGEPInst - Given operands for an GetElementPtrInst, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for an GetElementPtrInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyGEPInst(Type *SrcTy, ArrayRef<Value *> Ops,
const Query &Q, unsigned) {
// The type of the GEP pointer operand.
@@ -3542,8 +3536,8 @@ Value *llvm::SimplifyGEPInst(ArrayRef<Value *> Ops, const DataLayout &DL,
Ops, Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
-/// SimplifyInsertValueInst - Given operands for an InsertValueInst, see if we
-/// can fold the result. If not, this returns null.
+/// Given operands for an InsertValueInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyInsertValueInst(Value *Agg, Value *Val,
ArrayRef<unsigned> Idxs, const Query &Q,
unsigned) {
@@ -3579,8 +3573,8 @@ Value *llvm::SimplifyInsertValueInst(
RecursionLimit);
}
-/// SimplifyExtractValueInst - Given operands for an ExtractValueInst, see if we
-/// can fold the result. If not, this returns null.
+/// Given operands for an ExtractValueInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
const Query &, unsigned) {
if (auto *CAgg = dyn_cast<Constant>(Agg))
@@ -3614,8 +3608,8 @@ Value *llvm::SimplifyExtractValueInst(Value *Agg, ArrayRef<unsigned> Idxs,
RecursionLimit);
}
-/// SimplifyExtractElementInst - Given operands for an ExtractElementInst, see if we
-/// can fold the result. If not, this returns null.
+/// Given operands for an ExtractElementInst, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyExtractElementInst(Value *Vec, Value *Idx, const Query &,
unsigned) {
if (auto *CVec = dyn_cast<Constant>(Vec)) {
@@ -3646,7 +3640,7 @@ Value *llvm::SimplifyExtractElementInst(
RecursionLimit);
}
-/// SimplifyPHINode - See if we can fold the given phi. If not, returns null.
+/// See if we can fold the given phi. If not, returns null.
static Value *SimplifyPHINode(PHINode *PN, const Query &Q) {
// If all of the PHI's incoming values are the same then replace the PHI node
// with the common value.
@@ -3696,8 +3690,8 @@ Value *llvm::SimplifyTruncInst(Value *Op, Type *Ty, const DataLayout &DL,
//=== Helper functions for higher up the class hierarchy.
-/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a BinaryOperator, see if we can fold the result.
+/// If not, this returns null.
static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
switch (Opcode) {
@@ -3763,8 +3757,8 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
}
}
-/// SimplifyFPBinOp - Given operands for a BinaryOperator, see if we can
-/// fold the result. If not, this returns null.
+/// Given operands for a BinaryOperator, see if we can fold the result.
+/// If not, this returns null.
/// In contrast to SimplifyBinOp, try to use FastMathFlag when folding the
/// result. In case we don't need FastMathFlags, simply fall to SimplifyBinOp.
static Value *SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
@@ -3799,8 +3793,7 @@ Value *llvm::SimplifyFPBinOp(unsigned Opcode, Value *LHS, Value *RHS,
RecursionLimit);
}
-/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
-/// fold the result.
+/// Given operands for a CmpInst, see if we can fold the result.
static Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const Query &Q, unsigned MaxRecurse) {
if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
@@ -3938,8 +3931,8 @@ Value *llvm::SimplifyCall(Value *V, ArrayRef<Value *> Args,
Query(DL, TLI, DT, AC, CxtI), RecursionLimit);
}
-/// SimplifyInstruction - See if we can compute a simplified version of this
-/// instruction. If not, this returns null.
+/// See if we can compute a simplified version of this instruction.
+/// If not, this returns null.
Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout &DL,
const TargetLibraryInfo *TLI,
const DominatorTree *DT, AssumptionCache *AC) {
diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp
index d7896ade3543..8bcdcb862014 100644
--- a/lib/Analysis/LoopAccessAnalysis.cpp
+++ b/lib/Analysis/LoopAccessAnalysis.cpp
@@ -845,6 +845,7 @@ int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr,
if (Lp != AR->getLoop()) {
DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " <<
*Ptr << " SCEV: " << *PtrScev << "\n");
+ return 0;
}
// The address calculation must not wrap. Otherwise, a dependence could be
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 9ab9eead584f..0c725fcadff7 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -637,8 +637,10 @@ LoopInfo::LoopInfo(const DominatorTreeBase<BasicBlock> &DomTree) {
analyze(DomTree);
}
-void LoopInfo::updateUnloop(Loop *Unloop) {
- Unloop->markUnlooped();
+void LoopInfo::markAsRemoved(Loop *Unloop) {
+ assert(!Unloop->isInvalid() && "Loop has already been removed");
+ Unloop->invalidate();
+ RemovedLoops.push_back(Unloop);
// First handle the special case of no parent loop to simplify the algorithm.
if (!Unloop->getParentLoop()) {
diff --git a/lib/Analysis/LoopPass.cpp b/lib/Analysis/LoopPass.cpp
index dc424734dd56..8163231c3323 100644
--- a/lib/Analysis/LoopPass.cpp
+++ b/lib/Analysis/LoopPass.cpp
@@ -42,7 +42,11 @@ public:
}
bool runOnLoop(Loop *L, LPPassManager &) override {
- P.run(*L);
+ auto BBI = find_if(L->blocks().begin(), L->blocks().end(),
+ [](BasicBlock *BB) { return BB; });
+ if (BBI != L->blocks().end() &&
+ isFunctionInPrintList((*BBI)->getParent()->getName()))
+ P.run(*L);
return false;
}
};
@@ -174,8 +178,9 @@ bool LPPassManager::runOnFunction(Function &F) {
// Walk Loops
while (!LQ.empty()) {
-
+ bool LoopWasDeleted = false;
CurrentLoop = LQ.back();
+
// Run all passes on the current Loop.
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
LoopPass *P = getContainedPass(Index);
@@ -192,15 +197,15 @@ bool LPPassManager::runOnFunction(Function &F) {
Changed |= P->runOnLoop(CurrentLoop, *this);
}
+ LoopWasDeleted = CurrentLoop->isInvalid();
if (Changed)
dumpPassInfo(P, MODIFICATION_MSG, ON_LOOP_MSG,
- CurrentLoop->isUnloop()
- ? "<deleted>"
- : CurrentLoop->getHeader()->getName());
+ LoopWasDeleted ? "<deleted>"
+ : CurrentLoop->getHeader()->getName());
dumpPreservedSet(P);
- if (CurrentLoop->isUnloop()) {
+ if (LoopWasDeleted) {
// Notify passes that the loop is being deleted.
deleteSimpleAnalysisLoop(CurrentLoop);
} else {
@@ -222,12 +227,11 @@ bool LPPassManager::runOnFunction(Function &F) {
removeNotPreservedAnalysis(P);
recordAvailableAnalysis(P);
- removeDeadPasses(P, CurrentLoop->isUnloop()
- ? "<deleted>"
- : CurrentLoop->getHeader()->getName(),
+ removeDeadPasses(P, LoopWasDeleted ? "<deleted>"
+ : CurrentLoop->getHeader()->getName(),
ON_LOOP_MSG);
- if (CurrentLoop->isUnloop())
+ if (LoopWasDeleted)
// Do not run other passes on this loop.
break;
}
@@ -235,12 +239,11 @@ bool LPPassManager::runOnFunction(Function &F) {
// If the loop was deleted, release all the loop passes. This frees up
// some memory, and avoids trouble with the pass manager trying to call
// verifyAnalysis on them.
- if (CurrentLoop->isUnloop()) {
+ if (LoopWasDeleted) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
Pass *P = getContainedPass(Index);
freePass(P, "<deleted>", ON_LOOP_MSG);
}
- delete CurrentLoop;
}
// Pop the loop from queue after running all passes.
diff --git a/lib/Analysis/ScopedNoAliasAA.cpp b/lib/Analysis/ScopedNoAliasAA.cpp
index 029997adab9e..486f3a583284 100644
--- a/lib/Analysis/ScopedNoAliasAA.cpp
+++ b/lib/Analysis/ScopedNoAliasAA.cpp
@@ -26,7 +26,7 @@
// ... = load %ptr2, !alias.scope !{ !scope1, !scope2 }, !noalias !{ !scope1 }
//
// When evaluating an aliasing query, if one of the instructions is associated
-// has a set of noalias scopes in some domain that is superset of the alias
+// has a set of noalias scopes in some domain that is a superset of the alias
// scopes in that domain of some other instruction, then the two memory
// accesses are assumed not to alias.
//
diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp
index 805f3efb0814..9f923913ca27 100644
--- a/lib/Analysis/TypeBasedAliasAnalysis.cpp
+++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp
@@ -70,7 +70,7 @@
// A a;
// } B;
//
-// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store
+// For an access to B.a.s, we attach !5 (a path tag node) to the load/store
// instruction. The base type is !4 (struct B), the access type is !2 (scalar
// type short) and the offset is 4.
//
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index abc57ed8bca0..a83e207bd265 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -2556,6 +2556,9 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
switch (I->getOpcode()) {
default: break;
+ // Unsigned integers are always nonnegative.
+ case Instruction::UIToFP:
+ return true;
case Instruction::FMul:
// x*x is always non-negative or a NaN.
if (I->getOperand(0) == I->getOperand(1))
@@ -2566,6 +2569,9 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
case Instruction::FRem:
return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+ case Instruction::Select:
+ return CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1) &&
+ CannotBeOrderedLessThanZero(I->getOperand(2), Depth+1);
case Instruction::FPExt:
case Instruction::FPTrunc:
// Widening/narrowing never change sign.
@@ -2574,6 +2580,12 @@ bool llvm::CannotBeOrderedLessThanZero(const Value *V, unsigned Depth) {
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::maxnum:
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) ||
+ CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
+ case Intrinsic::minnum:
+ return CannotBeOrderedLessThanZero(I->getOperand(0), Depth+1) &&
+ CannotBeOrderedLessThanZero(I->getOperand(1), Depth+1);
case Intrinsic::exp:
case Intrinsic::exp2:
case Intrinsic::fabs:
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index c7606fd488a0..2ad4b32e3157 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -2654,8 +2654,6 @@ std::error_code BitcodeReader::parseConstants() {
return error("Invalid record");
Type *EltTy = cast<SequentialType>(CurTy)->getElementType();
- unsigned Size = Record.size();
-
if (EltTy->isIntegerTy(8)) {
SmallVector<uint8_t, 16> Elts(Record.begin(), Record.end());
if (isa<VectorType>(CurTy))
@@ -2680,21 +2678,24 @@ std::error_code BitcodeReader::parseConstants() {
V = ConstantDataVector::get(Context, Elts);
else
V = ConstantDataArray::get(Context, Elts);
+ } else if (EltTy->isHalfTy()) {
+ SmallVector<uint16_t, 16> Elts(Record.begin(), Record.end());
+ if (isa<VectorType>(CurTy))
+ V = ConstantDataVector::getFP(Context, Elts);
+ else
+ V = ConstantDataArray::getFP(Context, Elts);
} else if (EltTy->isFloatTy()) {
- SmallVector<float, 16> Elts(Size);
- std::transform(Record.begin(), Record.end(), Elts.begin(), BitsToFloat);
+ SmallVector<uint32_t, 16> Elts(Record.begin(), Record.end());
if (isa<VectorType>(CurTy))
- V = ConstantDataVector::get(Context, Elts);
+ V = ConstantDataVector::getFP(Context, Elts);
else
- V = ConstantDataArray::get(Context, Elts);
+ V = ConstantDataArray::getFP(Context, Elts);
} else if (EltTy->isDoubleTy()) {
- SmallVector<double, 16> Elts(Size);
- std::transform(Record.begin(), Record.end(), Elts.begin(),
- BitsToDouble);
+ SmallVector<uint64_t, 16> Elts(Record.begin(), Record.end());
if (isa<VectorType>(CurTy))
- V = ConstantDataVector::get(Context, Elts);
+ V = ConstantDataVector::getFP(Context, Elts);
else
- V = ConstantDataArray::get(Context, Elts);
+ V = ConstantDataArray::getFP(Context, Elts);
} else {
return error("Invalid type for value");
}
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index a1f87863757b..a899a0cc3ee4 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -1630,19 +1630,10 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal,
if (isa<IntegerType>(EltTy)) {
for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
Record.push_back(CDS->getElementAsInteger(i));
- } else if (EltTy->isFloatTy()) {
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
- union { float F; uint32_t I; };
- F = CDS->getElementAsFloat(i);
- Record.push_back(I);
- }
} else {
- assert(EltTy->isDoubleTy() && "Unknown ConstantData element type");
- for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
- union { double F; uint64_t I; };
- F = CDS->getElementAsDouble(i);
- Record.push_back(I);
- }
+ for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i)
+ Record.push_back(
+ CDS->getElementAsAPFloat(i).bitcastToAPInt().getLimitedValue());
}
} else if (isa<ConstantArray>(C) || isa<ConstantStruct>(C) ||
isa<ConstantVector>(C)) {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index be7eafbeb83d..5f67d3daa97f 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -192,22 +192,26 @@ bool AsmPrinter::doInitialization(Module &M) {
// use the directive, where it would need the same conditionalization
// anyway.
Triple TT(getTargetTriple());
- if (TT.isOSDarwin()) {
+ // If there is a version specified, Major will be non-zero.
+ if (TT.isOSDarwin() && TT.getOSMajorVersion() != 0) {
unsigned Major, Minor, Update;
- TT.getOSVersion(Major, Minor, Update);
- // If there is a version specified, Major will be non-zero.
- if (Major) {
- MCVersionMinType VersionType;
- if (TT.isWatchOS())
- VersionType = MCVM_WatchOSVersionMin;
- else if (TT.isTvOS())
- VersionType = MCVM_TvOSVersionMin;
- else if (TT.isMacOSX())
- VersionType = MCVM_OSXVersionMin;
- else
- VersionType = MCVM_IOSVersionMin;
- OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
+ MCVersionMinType VersionType;
+ if (TT.isWatchOS()) {
+ VersionType = MCVM_WatchOSVersionMin;
+ TT.getWatchOSVersion(Major, Minor, Update);
+ } else if (TT.isTvOS()) {
+ VersionType = MCVM_TvOSVersionMin;
+ TT.getiOSVersion(Major, Minor, Update);
+ } else if (TT.isMacOSX()) {
+ VersionType = MCVM_OSXVersionMin;
+ if (!TT.getMacOSXVersion(Major, Minor, Update))
+ Major = 0;
+ } else {
+ VersionType = MCVM_IOSVersionMin;
+ TT.getiOSVersion(Major, Minor, Update);
}
+ if (Major != 0)
+ OutStreamer->EmitVersionMin(VersionType, Major, Minor, Update);
}
// Allow the target to emit any magic that it wants at the start of the file.
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index bf794f7f70f6..7b0cdbde3791 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -32,6 +32,39 @@
using namespace llvm;
//===----------------------------------------------------------------------===//
+// EmittingAsmStreamer Implementation
+//===----------------------------------------------------------------------===//
+unsigned EmittingAsmStreamer::emitULEB128(uint64_t Value, const char *Desc,
+ unsigned PadTo) {
+ AP->EmitULEB128(Value, Desc, PadTo);
+ return 0;
+}
+
+unsigned EmittingAsmStreamer::emitInt8(unsigned char Value) {
+ AP->EmitInt8(Value);
+ return 0;
+}
+
+unsigned EmittingAsmStreamer::emitBytes(StringRef Data) {
+ AP->OutStreamer->EmitBytes(Data);
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// SizeReporterAsmStreamer Implementation
+//===----------------------------------------------------------------------===//
+unsigned SizeReporterAsmStreamer::emitULEB128(uint64_t Value, const char *Desc,
+ unsigned PadTo) {
+ return getULEB128Size(Value);
+}
+
+unsigned SizeReporterAsmStreamer::emitInt8(unsigned char Value) { return 1; }
+
+unsigned SizeReporterAsmStreamer::emitBytes(StringRef Data) {
+ return Data.size();
+}
+
+//===----------------------------------------------------------------------===//
// DIEAbbrevData Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 3466f3469f1c..a4fb07eacb3b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -221,7 +221,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
// precedence; fall back to triple-based defaults.
if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
DebuggerTuning = Asm->TM.Options.DebuggerTuning;
- else if (IsDarwin || TT.isOSFreeBSD())
+ else if (IsDarwin)
DebuggerTuning = DebuggerKind::LLDB;
else if (TT.isPS4CPU())
DebuggerTuning = DebuggerKind::SCE;
@@ -561,6 +561,8 @@ void DwarfDebug::finalizeModuleInfo() {
// Collect info for variables that were optimized out.
collectDeadVariables();
+ unsigned MacroOffset = 0;
+ std::unique_ptr<AsmStreamerBase> AS(new SizeReporterAsmStreamer(Asm));
// Handle anything that needs to be done on a per-unit basis after
// all other generation.
for (const auto &P : CUMap) {
@@ -613,6 +615,15 @@ void DwarfDebug::finalizeModuleInfo() {
U.setBaseAddress(TheCU.getRanges().front().getStart());
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
+
+ auto *CUNode = cast<DICompileUnit>(P.first);
+ if (CUNode->getMacros()) {
+ // Compile Unit has macros, emit "DW_AT_macro_info" attribute.
+ U.addUInt(U.getUnitDie(), dwarf::DW_AT_macro_info,
+ dwarf::DW_FORM_sec_offset, MacroOffset);
+ // Update macro section offset
+ MacroOffset += handleMacroNodes(AS.get(), CUNode->getMacros(), U);
+ }
}
// Compute DIE offsets and sizes.
@@ -656,6 +667,9 @@ void DwarfDebug::endModule() {
// Emit info into a debug ranges section.
emitDebugRanges();
+ // Emit info into a debug macinfo section.
+ emitDebugMacinfo();
+
if (useSplitDwarf()) {
emitDebugStrDWO();
emitDebugInfoDWO();
@@ -1833,6 +1847,70 @@ void DwarfDebug::emitDebugRanges() {
}
}
+unsigned DwarfDebug::handleMacroNodes(AsmStreamerBase *AS,
+ DIMacroNodeArray Nodes,
+ DwarfCompileUnit &U) {
+ unsigned Size = 0;
+ for (auto *MN : Nodes) {
+ if (auto *M = dyn_cast<DIMacro>(MN))
+ Size += emitMacro(AS, *M);
+ else if (auto *F = dyn_cast<DIMacroFile>(MN))
+ Size += emitMacroFile(AS, *F, U);
+ else
+ llvm_unreachable("Unexpected DI type!");
+ }
+ return Size;
+}
+
+unsigned DwarfDebug::emitMacro(AsmStreamerBase *AS, DIMacro &M) {
+ int Size = 0;
+ Size += AS->emitULEB128(M.getMacinfoType());
+ Size += AS->emitULEB128(M.getLine());
+ StringRef Name = M.getName();
+ StringRef Value = M.getValue();
+ Size += AS->emitBytes(Name);
+ if (!Value.empty()) {
+ // There should be one space between macro name and macro value.
+ Size += AS->emitInt8(' ');
+ Size += AS->emitBytes(Value);
+ }
+ Size += AS->emitInt8('\0');
+ return Size;
+}
+
+unsigned DwarfDebug::emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F,
+ DwarfCompileUnit &U) {
+ int Size = 0;
+ assert(F.getMacinfoType() == dwarf::DW_MACINFO_start_file);
+ Size += AS->emitULEB128(dwarf::DW_MACINFO_start_file);
+ Size += AS->emitULEB128(F.getLine());
+ DIFile *File = F.getFile();
+ unsigned FID =
+ U.getOrCreateSourceID(File->getFilename(), File->getDirectory());
+ Size += AS->emitULEB128(FID);
+ Size += handleMacroNodes(AS, F.getElements(), U);
+ Size += AS->emitULEB128(dwarf::DW_MACINFO_end_file);
+ return Size;
+}
+
+// Emit visible names into a debug macinfo section.
+void DwarfDebug::emitDebugMacinfo() {
+ if (MCSection *Macinfo = Asm->getObjFileLowering().getDwarfMacinfoSection()) {
+ // Start the dwarf macinfo section.
+ Asm->OutStreamer->SwitchSection(Macinfo);
+ }
+ std::unique_ptr<AsmStreamerBase> AS(new EmittingAsmStreamer(Asm));
+ for (const auto &P : CUMap) {
+ auto &TheCU = *P.second;
+ auto *SkCU = TheCU.getSkeleton();
+ DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
+ auto *CUNode = cast<DICompileUnit>(P.first);
+ handleMacroNodes(AS.get(), CUNode->getMacros(), U);
+ }
+ Asm->OutStreamer->AddComment("End Of Macro List Mark");
+ Asm->EmitInt8(0);
+}
+
// DWARF5 Experimental Separate Dwarf emitters.
void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 4c613a905450..460c186683fc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -400,18 +400,26 @@ class DwarfDebug : public AsmPrinterHandler {
/// Emit visible names into a debug str section.
void emitDebugStr();
- /// Emit visible names into a debug loc section.
+ /// Emit variable locations into a debug loc section.
void emitDebugLoc();
- /// Emit visible names into a debug loc dwo section.
+ /// Emit variable locations into a debug loc dwo section.
void emitDebugLocDWO();
- /// Emit visible names into a debug aranges section.
+ /// Emit address ranges into a debug aranges section.
void emitDebugARanges();
- /// Emit visible names into a debug ranges section.
+ /// Emit address ranges into a debug ranges section.
void emitDebugRanges();
+ /// Emit macros into a debug macinfo section.
+ void emitDebugMacinfo();
+ unsigned emitMacro(AsmStreamerBase *AS, DIMacro &M);
+ unsigned emitMacroFile(AsmStreamerBase *AS, DIMacroFile &F,
+ DwarfCompileUnit &U);
+ unsigned handleMacroNodes(AsmStreamerBase *AS, DIMacroNodeArray Nodes,
+ DwarfCompileUnit &U);
+
/// DWARF 5 Experimental Split Dwarf Emitters
/// Initialize common features of skeleton units.
diff --git a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
index c2c0f84e5c92..1e2f55b71151 100644
--- a/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp
@@ -82,13 +82,24 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
const MDNode *Scope = DL.getScope();
if (!Scope)
return;
+ unsigned LineNumber = DL.getLine();
+ // Skip this line if it is longer than the maximum we can record.
+ if (LineNumber > COFF::CVL_MaxLineNumber)
+ return;
+
+ unsigned ColumnNumber = DL.getCol();
+ // Truncate the column number if it is longer than the maximum we can record.
+ if (ColumnNumber > COFF::CVL_MaxColumnNumber)
+ ColumnNumber = 0;
+
StringRef Filename = getFullFilepath(Scope);
// Skip this instruction if it has the same file:line as the previous one.
assert(CurFn);
if (!CurFn->Instrs.empty()) {
const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()];
- if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine())
+ if (LastInstr.Filename == Filename && LastInstr.LineNumber == LineNumber &&
+ LastInstr.ColumnNumber == ColumnNumber)
return;
}
FileNameRegistry.add(Filename);
@@ -96,7 +107,7 @@ void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
Asm->OutStreamer->EmitLabel(MCL);
CurFn->Instrs.push_back(MCL);
- InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine(), DL.getCol());
+ InstrInfo[MCL] = InstrInfoTy(Filename, LineNumber, ColumnNumber);
}
WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
@@ -282,8 +293,9 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart];
ColSegI != ColSegEnd; ++ColSegI) {
unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber;
+ assert(ColumnNumber <= COFF::CVL_MaxColumnNumber);
Asm->EmitInt16(ColumnNumber); // Start column
- Asm->EmitInt16(ColumnNumber); // End column
+ Asm->EmitInt16(0); // End column
}
Asm->OutStreamer->EmitLabel(FileSegmentEnd);
};
@@ -320,7 +332,10 @@ void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
// The first PC with the given linenumber and the linenumber itself.
EmitLabelDiff(*Asm->OutStreamer, Fn, Instr);
- Asm->EmitInt32(InstrInfo[Instr].LineNumber);
+ uint32_t LineNumber = InstrInfo[Instr].LineNumber;
+ assert(LineNumber <= COFF::CVL_MaxLineNumber);
+ uint32_t LineData = LineNumber | COFF::CVL_IsStatement;
+ Asm->EmitInt32(LineData);
}
FinishPreviousChunk();
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 604feeddd355..df5cac5a9f7a 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -744,18 +744,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
return true;
}
-static bool hasIdenticalMMOs(const MachineInstr *MI1, const MachineInstr *MI2) {
- auto I1 = MI1->memoperands_begin(), E1 = MI1->memoperands_end();
- auto I2 = MI2->memoperands_begin(), E2 = MI2->memoperands_end();
- if ((E1 - I1) != (E2 - I2))
- return false;
- for (; I1 != E1; ++I1, ++I2) {
- if (**I1 != **I2)
- return false;
- }
- return true;
-}
-
static void
removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
MachineBasicBlock &MBBCommon) {
@@ -792,8 +780,7 @@ removeMMOsFromMemoryOperations(MachineBasicBlock::iterator MBBIStartPos,
assert(MBBICommon->isIdenticalTo(&*MBBI) && "Expected matching MIIs!");
if (MBBICommon->mayLoad() || MBBICommon->mayStore())
- if (!hasIdenticalMMOs(&*MBBI, &*MBBICommon))
- MBBICommon->dropMemRefs();
+ MBBICommon->setMemRefs(MBBICommon->mergeMemRefsWith(*MBBI));
++MBBI;
++MBBICommon;
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 6fbdea84c10f..03e57787307a 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1108,7 +1108,7 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
// <16 x i1> %mask, <16 x i32> %passthru)
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
-//
+//
// %1 = bitcast i8* %addr to i32*
// %2 = extractelement <16 x i1> %mask, i32 0
// %3 = icmp eq i1 %2, true
@@ -1272,12 +1272,12 @@ static void ScalarizeMaskedLoad(CallInst *CI) {
// %5 = getelementptr i32* %1, i32 0
// store i32 %4, i32* %5
// br label %else
-//
+//
// else: ; preds = %0, %cond.store
// %6 = extractelement <16 x i1> %mask, i32 1
// %7 = icmp eq i1 %6, true
// br i1 %7, label %cond.store1, label %else2
-//
+//
// cond.store1: ; preds = %else
// %8 = extractelement <16 x i32> %val, i32 1
// %9 = getelementptr i32* %1, i32 1
@@ -1377,24 +1377,24 @@ static void ScalarizeMaskedStore(CallInst *CI) {
// <16 x i1> %Mask, <16 x i32> %Src)
// to a chain of basic blocks, with loading element one-by-one if
// the appropriate mask bit is set
-//
+//
// % Ptrs = getelementptr i32, i32* %base, <16 x i64> %ind
// % Mask0 = extractelement <16 x i1> %Mask, i32 0
// % ToLoad0 = icmp eq i1 % Mask0, true
// br i1 % ToLoad0, label %cond.load, label %else
-//
+//
// cond.load:
// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
// % Load0 = load i32, i32* % Ptr0, align 4
// % Res0 = insertelement <16 x i32> undef, i32 % Load0, i32 0
// br label %else
-//
+//
// else:
// %res.phi.else = phi <16 x i32>[% Res0, %cond.load], [undef, % 0]
// % Mask1 = extractelement <16 x i1> %Mask, i32 1
// % ToLoad1 = icmp eq i1 % Mask1, true
// br i1 % ToLoad1, label %cond.load1, label %else2
-//
+//
// cond.load1:
// % Ptr1 = extractelement <16 x i32*> %Ptrs, i32 1
// % Load1 = load i32, i32* % Ptr1, align 4
@@ -1526,7 +1526,7 @@ static void ScalarizeMaskedGather(CallInst *CI) {
// % Ptr0 = extractelement <16 x i32*> %Ptrs, i32 0
// store i32 %Elt0, i32* % Ptr0, align 4
// br label %else
-//
+//
// else:
// % Mask1 = extractelement <16 x i1> % Mask, i32 1
// % ToStore1 = icmp eq i1 % Mask1, true
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index 98d30b95dd2d..b9937e5570d4 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -19,6 +19,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -30,7 +32,7 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
-#include <deque>
+#include <queue>
#include <list>
using namespace llvm;
@@ -76,16 +78,13 @@ private:
typedef std::list<VarLoc> VarLocList;
typedef SmallDenseMap<const MachineBasicBlock *, VarLocList> VarLocInMBB;
- bool OLChanged; // OutgoingLocs got changed for this bb.
- bool MBBJoined; // The MBB was joined.
-
void transferDebugValue(MachineInstr &MI, VarLocList &OpenRanges);
void transferRegisterDef(MachineInstr &MI, VarLocList &OpenRanges);
- void transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
+ bool transferTerminatorInst(MachineInstr &MI, VarLocList &OpenRanges,
VarLocInMBB &OutLocs);
- void transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
+ bool transfer(MachineInstr &MI, VarLocList &OpenRanges, VarLocInMBB &OutLocs);
- void join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
+ bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs);
bool ExtendRanges(MachineFunction &MF);
@@ -225,24 +224,18 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
}
/// Terminate all open ranges at the end of the current basic block.
-void LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
+bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
VarLocList &OpenRanges,
VarLocInMBB &OutLocs) {
+ bool Changed = false;
const MachineBasicBlock *CurMBB = MI.getParent();
if (!(MI.isTerminator() || (&MI == &CurMBB->instr_back())))
- return;
+ return false;
if (OpenRanges.empty())
- return;
+ return false;
- if (OutLocs.find(CurMBB) == OutLocs.end()) {
- // Create space for new Outgoing locs entries.
- VarLocList VLL;
- OutLocs.insert(std::make_pair(CurMBB, std::move(VLL)));
- }
- auto OL = OutLocs.find(CurMBB);
- assert(OL != OutLocs.end());
- VarLocList &VLL = OL->second;
+ VarLocList &VLL = OutLocs[CurMBB];
for (auto OR : OpenRanges) {
// Copy OpenRanges to OutLocs, if not already present.
@@ -251,28 +244,30 @@ void LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
if (std::find_if(VLL.begin(), VLL.end(),
[&](const VarLoc &V) { return (OR == V); }) == VLL.end()) {
VLL.push_back(std::move(OR));
- OLChanged = true;
+ Changed = true;
}
}
OpenRanges.clear();
+ return Changed;
}
/// This routine creates OpenRanges and OutLocs.
-void LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
+bool LiveDebugValues::transfer(MachineInstr &MI, VarLocList &OpenRanges,
VarLocInMBB &OutLocs) {
+ bool Changed = false;
transferDebugValue(MI, OpenRanges);
transferRegisterDef(MI, OpenRanges);
- transferTerminatorInst(MI, OpenRanges, OutLocs);
+ Changed = transferTerminatorInst(MI, OpenRanges, OutLocs);
+ return Changed;
}
/// This routine joins the analysis results of all incoming edges in @MBB by
/// inserting a new DBG_VALUE instruction at the start of the @MBB - if the same
/// source variable in all the predecessors of @MBB reside in the same location.
-void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
+bool LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
VarLocInMBB &InLocs) {
DEBUG(dbgs() << "join MBB: " << MBB.getName() << "\n");
-
- MBBJoined = false;
+ bool Changed = false;
VarLocList InLocsT; // Temporary incoming locations.
@@ -282,7 +277,7 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
auto OL = OutLocs.find(p);
// Join is null in case of empty OutLocs from any of the pred.
if (OL == OutLocs.end())
- return;
+ return false;
// Just copy over the Out locs to incoming locs for the first predecessor.
if (p == *MBB.pred_begin()) {
@@ -292,27 +287,18 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
// Join with this predecessor.
VarLocList &VLL = OL->second;
- InLocsT.erase(std::remove_if(InLocsT.begin(), InLocsT.end(),
- [&](VarLoc &ILT) {
- return (std::find_if(VLL.begin(), VLL.end(),
- [&](const VarLoc &V) {
- return (ILT == V);
- }) == VLL.end());
- }),
- InLocsT.end());
+ InLocsT.erase(
+ std::remove_if(InLocsT.begin(), InLocsT.end(), [&](VarLoc &ILT) {
+ return (std::find_if(VLL.begin(), VLL.end(), [&](const VarLoc &V) {
+ return (ILT == V);
+ }) == VLL.end());
+ }), InLocsT.end());
}
if (InLocsT.empty())
- return;
+ return false;
- if (InLocs.find(&MBB) == InLocs.end()) {
- // Create space for new Incoming locs entries.
- VarLocList VLL;
- InLocs.insert(std::make_pair(&MBB, std::move(VLL)));
- }
- auto IL = InLocs.find(&MBB);
- assert(IL != InLocs.end());
- VarLocList &ILL = IL->second;
+ VarLocList &ILL = InLocs[&MBB];
// Insert DBG_VALUE instructions, if not already inserted.
for (auto ILT : InLocsT) {
@@ -331,12 +317,13 @@ void LiveDebugValues::join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs,
MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
DEBUG(dbgs() << "Inserted: "; MI->dump(););
++NumInserted;
- MBBJoined = true; // rerun transfer().
+ Changed = true;
VarLoc V(ILT.Var, MI);
ILL.push_back(std::move(V));
}
}
+ return Changed;
}
/// Calculate the liveness information for the given machine function and
@@ -346,48 +333,72 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
DEBUG(dbgs() << "\nDebug Range Extension\n");
bool Changed = false;
- OLChanged = MBBJoined = false;
+ bool OLChanged = false;
+ bool MBBJoined = false;
VarLocList OpenRanges; // Ranges that are open until end of bb.
VarLocInMBB OutLocs; // Ranges that exist beyond bb.
VarLocInMBB InLocs; // Ranges that are incoming after joining.
- std::deque<MachineBasicBlock *> BBWorklist;
-
+ DenseMap<unsigned int, MachineBasicBlock *> OrderToBB;
+ DenseMap<MachineBasicBlock *, unsigned int> BBToOrder;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>> Worklist;
+ std::priority_queue<unsigned int, std::vector<unsigned int>,
+ std::greater<unsigned int>> Pending;
// Initialize every mbb with OutLocs.
for (auto &MBB : MF)
for (auto &MI : MBB)
transfer(MI, OpenRanges, OutLocs);
DEBUG(printVarLocInMBB(OutLocs, "OutLocs after initialization", dbgs()));
- // Construct a worklist of MBBs.
- for (auto &MBB : MF)
- BBWorklist.push_back(&MBB);
-
- // Perform join() and transfer() using the worklist until the ranges converge
- // Ranges have converged when the worklist is empty.
- while (!BBWorklist.empty()) {
- MachineBasicBlock *MBB = BBWorklist.front();
- BBWorklist.pop_front();
-
- join(*MBB, OutLocs, InLocs);
+ ReversePostOrderTraversal<MachineFunction *> RPOT(&MF);
+ unsigned int RPONumber = 0;
+ for (auto RI = RPOT.begin(), RE = RPOT.end(); RI != RE; ++RI) {
+ OrderToBB[RPONumber] = *RI;
+ BBToOrder[*RI] = RPONumber;
+ Worklist.push(RPONumber);
+ ++RPONumber;
+ }
- if (MBBJoined) {
- Changed = true;
- for (auto &MI : *MBB)
- transfer(MI, OpenRanges, OutLocs);
- DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
- DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
-
- if (OLChanged) {
- OLChanged = false;
- for (auto s : MBB->successors())
- if (std::find(BBWorklist.begin(), BBWorklist.end(), s) ==
- BBWorklist.end()) // add if not already present.
- BBWorklist.push_back(s);
+ // This is a standard "union of predecessor outs" dataflow problem.
+ // To solve it, we perform join() and transfer() using the two worklist method
+ // until the ranges converge.
+ // Ranges have converged when both worklists are empty.
+ while (!Worklist.empty() || !Pending.empty()) {
+ // We track what is on the pending worklist to avoid inserting the same
+ // thing twice. We could avoid this with a custom priority queue, but this
+ // is probably not worth it.
+ SmallPtrSet<MachineBasicBlock *, 16> OnPending;
+ while (!Worklist.empty()) {
+ MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
+ Worklist.pop();
+ MBBJoined = join(*MBB, OutLocs, InLocs);
+
+ if (MBBJoined) {
+ MBBJoined = false;
+ Changed = true;
+ for (auto &MI : *MBB)
+ OLChanged |= transfer(MI, OpenRanges, OutLocs);
+ DEBUG(printVarLocInMBB(OutLocs, "OutLocs after propagating", dbgs()));
+ DEBUG(printVarLocInMBB(InLocs, "InLocs after propagating", dbgs()));
+
+ if (OLChanged) {
+ OLChanged = false;
+ for (auto s : MBB->successors())
+ if (!OnPending.count(s)) {
+ OnPending.insert(s);
+ Pending.push(BBToOrder[s]);
+ }
+ }
}
}
+ Worklist.swap(Pending);
+ // At this point, pending must be empty, since it was just the empty
+ // worklist
+ assert(Pending.empty() && "Pending should be empty");
}
+
DEBUG(printVarLocInMBB(OutLocs, "Final OutLocs", dbgs()));
DEBUG(printVarLocInMBB(InLocs, "Final InLocs", dbgs()));
return Changed;
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index efad36ffa3f1..bb3488348f24 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -1328,15 +1328,15 @@ void LiveRangeUpdater::flush() {
LR->verify();
}
-unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveRange &LR) {
// Create initial equivalence classes.
EqClass.clear();
- EqClass.grow(LI->getNumValNums());
+ EqClass.grow(LR.getNumValNums());
const VNInfo *used = nullptr, *unused = nullptr;
// Determine connections.
- for (const VNInfo *VNI : LI->valnos) {
+ for (const VNInfo *VNI : LR.valnos) {
// Group all unused values into one class.
if (VNI->isUnused()) {
if (unused)
@@ -1351,14 +1351,14 @@ unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
// Connect to values live out of predecessors.
for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
PE = MBB->pred_end(); PI != PE; ++PI)
- if (const VNInfo *PVNI = LI->getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
+ if (const VNInfo *PVNI = LR.getVNInfoBefore(LIS.getMBBEndIdx(*PI)))
EqClass.join(VNI->id, PVNI->id);
} else {
// Normal value defined by an instruction. Check for two-addr redef.
// FIXME: This could be coincidental. Should we really check for a tied
// operand constraint?
// Note that VNI->def may be a use slot for an early clobber def.
- if (const VNInfo *UVNI = LI->getVNInfoBefore(VNI->def))
+ if (const VNInfo *UVNI = LR.getVNInfoBefore(VNI->def))
EqClass.join(VNI->id, UVNI->id);
}
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 9451d92bd7ae..a506e0571c09 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -1446,7 +1446,7 @@ void LiveIntervals::removeVRegDefAt(LiveInterval &LI, SlotIndex Pos) {
void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
SmallVectorImpl<LiveInterval*> &SplitLIs) {
ConnectedVNInfoEqClasses ConEQ(*this);
- unsigned NumComp = ConEQ.Classify(&LI);
+ unsigned NumComp = ConEQ.Classify(LI);
if (NumComp <= 1)
return;
DEBUG(dbgs() << " Split " << NumComp << " components: " << LI << '\n');
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 76099f28499b..85d544d94984 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -1182,7 +1182,7 @@ MachineBasicBlock::getProbabilityIterator(MachineBasicBlock::succ_iterator I) {
/// Return whether (physical) register "Reg" has been <def>ined and not <kill>ed
/// as of just before "MI".
-///
+///
/// Search is localised to a neighborhood of
/// Neighborhood instructions before (searching for defs or kills) and N
/// instructions after (searching just for defs) MI.
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 790f5accdb26..4f424ff292cc 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -31,7 +31,7 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
const std::string Banner;
MachineFunctionPrinterPass() : MachineFunctionPass(ID), OS(dbgs()) { }
- MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
+ MachineFunctionPrinterPass(raw_ostream &os, const std::string &banner)
: MachineFunctionPass(ID), OS(os), Banner(banner) {}
const char *getPassName() const override { return "MachineFunction Printer"; }
@@ -42,6 +42,8 @@ struct MachineFunctionPrinterPass : public MachineFunctionPass {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ if (!llvm::isFunctionInPrintList(MF.getName()))
+ return false;
OS << "# " << Banner << ":\n";
MF.print(OS, getAnalysisIfAvailable<SlotIndexes>());
return false;
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 6b8eeccd173d..6dca74d60026 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -866,14 +866,44 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
setMemRefs(NewMemRefs, NewMemRefs + NewNum);
}
+/// Check to see if the MMOs pointed to by the two MemRefs arrays are
+/// identical.
+static bool hasIdenticalMMOs(const MachineInstr &MI1, const MachineInstr &MI2) {
+ auto I1 = MI1.memoperands_begin(), E1 = MI1.memoperands_end();
+ auto I2 = MI2.memoperands_begin(), E2 = MI2.memoperands_end();
+ if ((E1 - I1) != (E2 - I2))
+ return false;
+ for (; I1 != E1; ++I1, ++I2) {
+ if (**I1 != **I2)
+ return false;
+ }
+ return true;
+}
+
std::pair<MachineInstr::mmo_iterator, unsigned>
MachineInstr::mergeMemRefsWith(const MachineInstr& Other) {
- // TODO: If we end up with too many memory operands, return the empty
- // conservative set rather than failing asserts.
+
+ // If either of the incoming memrefs are empty, we must be conservative and
+ // treat this as if we've exhausted our space for memrefs and dropped them.
+ if (memoperands_empty() || Other.memoperands_empty())
+ return std::make_pair(nullptr, 0);
+
+ // If both instructions have identical memrefs, we don't need to merge them.
+ // Since many instructions have a single memref, and we tend to merge things
+ // like pairs of loads from the same location, this catches a large number of
+ // cases in practice.
+ if (hasIdenticalMMOs(*this, Other))
+ return std::make_pair(MemRefs, NumMemRefs);
+
// TODO: consider uniquing elements within the operand lists to reduce
// space usage and fall back to conservative information less often.
- size_t CombinedNumMemRefs = (memoperands_end() - memoperands_begin())
- + (Other.memoperands_end() - Other.memoperands_begin());
+ size_t CombinedNumMemRefs = NumMemRefs + Other.NumMemRefs;
+
+ // If we don't have enough room to store this many memrefs, be conservative
+ // and drop them. Otherwise, we'd fail asserts when trying to add them to
+ // the new instruction.
+ if (CombinedNumMemRefs != uint8_t(CombinedNumMemRefs))
+ return std::make_pair(nullptr, 0);
MachineFunction *MF = getParent()->getParent();
mmo_iterator MemBegin = MF->allocateMemRefsArray(CombinedNumMemRefs);
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index a8368e9c80d6..99a97d2dbd74 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -334,12 +334,11 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
// writes to all slots.
if (MI->memoperands_empty())
return true;
- for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
- oe = MI->memoperands_end(); o != oe; ++o) {
- if (!(*o)->isStore() || !(*o)->getPseudoValue())
+ for (const MachineMemOperand *MemOp : MI->memoperands()) {
+ if (!MemOp->isStore() || !MemOp->getPseudoValue())
continue;
if (const FixedStackPseudoSourceValue *Value =
- dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) {
+ dyn_cast<FixedStackPseudoSourceValue>(MemOp->getPseudoValue())) {
if (Value->getFrameIndex() == FI)
return true;
}
@@ -357,8 +356,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI,
bool RuledOut = false;
bool HasNonInvariantUse = false;
unsigned Def = 0;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
+ for (const MachineOperand &MO : MI->operands()) {
if (MO.isFI()) {
// Remember if the instruction stores to the frame index.
int FI = MO.getIndex();
@@ -452,9 +450,7 @@ void MachineLICM::HoistRegionPostRA() {
// Walk the entire region, count number of defs for each register, and
// collect potential LICM candidates.
const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *BB = Blocks[i];
-
+ for (MachineBasicBlock *BB : Blocks) {
// If the header of the loop containing this basic block is a landing pad,
// then don't try to hoist instructions out of this loop.
const MachineLoop *ML = MLI->getLoopFor(BB);
@@ -469,19 +465,15 @@ void MachineLICM::HoistRegionPostRA() {
}
SpeculationState = SpeculateUnknown;
- for (MachineBasicBlock::iterator
- MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
- MachineInstr *MI = &*MII;
- ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
- }
+ for (MachineInstr &MI : *BB)
+ ProcessMI(&MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
}
// Gather the registers read / clobbered by the terminator.
BitVector TermRegs(NumRegs);
MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
if (TI != Preheader->end()) {
- for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = TI->getOperand(i);
+ for (const MachineOperand &MO : TI->operands()) {
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
@@ -500,17 +492,16 @@ void MachineLICM::HoistRegionPostRA() {
// 3. Make sure candidate def should not clobber
// registers read by the terminator. Similarly its def should not be
// clobbered by the terminator.
- for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
- if (Candidates[i].FI != INT_MIN &&
- StoredFIs.count(Candidates[i].FI))
+ for (CandidateInfo &Candidate : Candidates) {
+ if (Candidate.FI != INT_MIN &&
+ StoredFIs.count(Candidate.FI))
continue;
- unsigned Def = Candidates[i].Def;
+ unsigned Def = Candidate.Def;
if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
bool Safe = true;
- MachineInstr *MI = Candidates[i].MI;
- for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
- const MachineOperand &MO = MI->getOperand(j);
+ MachineInstr *MI = Candidate.MI;
+ for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef() || !MO.getReg())
continue;
unsigned Reg = MO.getReg();
@@ -523,7 +514,7 @@ void MachineLICM::HoistRegionPostRA() {
}
}
if (Safe)
- HoistPostRA(MI, Candidates[i].Def);
+ HoistPostRA(MI, Candidate.Def);
}
}
}
@@ -532,15 +523,11 @@ void MachineLICM::HoistRegionPostRA() {
/// sure it is not killed by any instructions in the loop.
void MachineLICM::AddToLiveIns(unsigned Reg) {
const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
- for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
- MachineBasicBlock *BB = Blocks[i];
+ for (MachineBasicBlock *BB : Blocks) {
if (!BB->isLiveIn(Reg))
BB->addLiveIn(Reg);
- for (MachineBasicBlock::iterator
- MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
- MachineInstr *MI = &*MII;
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineInstr &MI : *BB) {
+ for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
MO.setIsKill(false);
@@ -582,8 +569,8 @@ bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
// Check loop exiting blocks.
SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
- for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
- if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
+ for (MachineBasicBlock *CurrentLoopExitingBlock : CurrentLoopExitingBlocks)
+ if (!DT->dominates(BB, CurrentLoopExitingBlock)) {
SpeculationState = SpeculateTrue;
return false;
}
@@ -689,8 +676,7 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
InitRegPressure(Preheader);
// Now perform LICM.
- for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
- MachineDomTreeNode *Node = Scopes[i];
+ for (MachineDomTreeNode *Node : Scopes) {
MachineBasicBlock *MBB = Node->getBlock();
EnterScope(MBB);
@@ -858,13 +844,11 @@ static bool mayLoadFromGOTOrConstantPool(MachineInstr &MI) {
if (MI.memoperands_empty())
return true;
- for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
- E = MI.memoperands_end(); I != E; ++I) {
- if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
+ for (MachineMemOperand *MemOp : MI.memoperands())
+ if (const PseudoSourceValue *PSV = MemOp->getPseudoValue())
if (PSV->isGOT() || PSV->isConstantPool())
return true;
- }
- }
+
return false;
}
@@ -899,9 +883,7 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
return false;
// The instruction is loop invariant if all of its operands are.
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = I.getOperand(i);
-
+ for (const MachineOperand &MO : I.operands()) {
if (!MO.isReg())
continue;
@@ -1230,11 +1212,8 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
/// preheader that may become duplicates of instructions that are hoisted
/// out of the loop.
void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
- for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
- const MachineInstr *MI = &*I;
- unsigned Opcode = MI->getOpcode();
- CSEMap[Opcode].push_back(MI);
- }
+ for (MachineInstr &MI : *BB)
+ CSEMap[MI.getOpcode()].push_back(&MI);
}
/// Find an instruction amount PrevMIs that is a duplicate of MI.
@@ -1242,11 +1221,10 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
const MachineInstr*
MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
- for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
- const MachineInstr *PrevMI = PrevMIs[i];
+ for (const MachineInstr *PrevMI : PrevMIs)
if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr)))
return PrevMI;
- }
+
return nullptr;
}
@@ -1296,8 +1274,7 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
}
}
- for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
- unsigned Idx = Defs[i];
+ for (unsigned Idx : Defs) {
unsigned Reg = MI->getOperand(Idx).getReg();
unsigned DupReg = Dup->getOperand(Idx).getReg();
MRI->replaceRegWith(Reg, DupReg);
@@ -1370,11 +1347,9 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// Clear the kill flags of any register this instruction defines,
// since they may need to be live throughout the entire loop
// rather than just live for part of it.
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
+ for (MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isDef() && !MO.isDead())
MRI->clearKillFlags(MO.getReg());
- }
// Add to the CSE map.
if (CI != CSEMap.end())
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index cdcd8eb4fbdf..428295ec2cc6 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1736,7 +1736,7 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
// Check the LI only has one connected component.
ConnectedVNInfoEqClasses ConEQ(*LiveInts);
- unsigned NumComp = ConEQ.Classify(&LI);
+ unsigned NumComp = ConEQ.Classify(LI);
if (NumComp > 1) {
report("Multiple connected components in live interval", MF);
report_context(LI);
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index e7b32179bde5..c1ff13ec7ca0 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -2874,7 +2874,7 @@ void RegisterCoalescer::joinAllIntervals() {
std::vector<MBBPriorityInfo> MBBs;
MBBs.reserve(MF->size());
- for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){
+ for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
JoinSplitEdges && isSplitEdge(MBB)));
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 3749b1dd217a..f33dc3e10492 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -313,21 +313,6 @@ static bool containsReg(ArrayRef<unsigned> RegUnits, unsigned RegUnit) {
namespace {
-/// List of register defined and used by a machine instruction.
-class RegisterOperands {
-public:
- SmallVector<unsigned, 8> Uses;
- SmallVector<unsigned, 8> Defs;
- SmallVector<unsigned, 8> DeadDefs;
-
- void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI,
- const MachineRegisterInfo &MRI, bool IgnoreDead = false);
-
- /// Use liveness information to find dead defs not marked with a dead flag
- /// and move them to the DeadDefs vector.
- void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS);
-};
-
/// Collect this instruction's unique uses and defs into SmallVectors for
/// processing defs and uses in order.
///
@@ -385,9 +370,11 @@ class RegisterOperandsCollector {
}
}
- friend class RegisterOperands;
+ friend class llvm::RegisterOperands;
};
+} // namespace
+
void RegisterOperands::collect(const MachineInstr &MI,
const TargetRegisterInfo &TRI,
const MachineRegisterInfo &MRI,
@@ -417,8 +404,6 @@ void RegisterOperands::detectDeadDefs(const MachineInstr &MI,
}
}
-} // namespace
-
/// Initialize an array of N PressureDiffs.
void PressureDiffs::init(unsigned N) {
Size = N;
@@ -431,6 +416,18 @@ void PressureDiffs::init(unsigned N) {
PDiffArray = reinterpret_cast<PressureDiff*>(calloc(N, sizeof(PressureDiff)));
}
+void PressureDiffs::addInstruction(unsigned Idx,
+ const RegisterOperands &RegOpers,
+ const MachineRegisterInfo &MRI) {
+ PressureDiff &PDiff = (*this)[Idx];
+ assert(!PDiff.begin()->isValid() && "stale PDiff");
+ for (unsigned Reg : RegOpers.Defs)
+ PDiff.addPressureChange(Reg, true, &MRI);
+
+ for (unsigned Reg : RegOpers.Uses)
+ PDiff.addPressureChange(Reg, false, &MRI);
+}
+
/// Add a change in pressure to the pressure diff of a given instruction.
void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
const MachineRegisterInfo *MRI) {
@@ -467,18 +464,6 @@ void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec,
}
}
-/// Record the pressure difference induced by the given operand list.
-static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers,
- const MachineRegisterInfo *MRI) {
- assert(!PDiff.begin()->isValid() && "stale PDiff");
-
- for (unsigned Reg : RegOpers.Defs)
- PDiff.addPressureChange(Reg, true, MRI);
-
- for (unsigned Reg : RegOpers.Uses)
- PDiff.addPressureChange(Reg, false, MRI);
-}
-
/// Force liveness of registers.
void RegPressureTracker::addLiveRegs(ArrayRef<unsigned> Regs) {
for (unsigned Reg : Regs) {
@@ -514,39 +499,10 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) {
/// registers that are both defined and used by the instruction. If a pressure
/// difference pointer is provided record the changes is pressure caused by this
/// instruction independent of liveness.
-void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
- PressureDiff *PDiff) {
- assert(CurrPos != MBB->begin());
- if (!isBottomClosed())
- closeBottom();
-
- // Open the top of the region using block iterators.
- if (!RequireIntervals && isTopClosed())
- static_cast<RegionPressure&>(P).openTop(CurrPos);
-
- // Find the previous instruction.
- do
- --CurrPos;
- while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+void RegPressureTracker::recede(const RegisterOperands &RegOpers,
+ SmallVectorImpl<unsigned> *LiveUses) {
assert(!CurrPos->isDebugValue());
- SlotIndex SlotIdx;
- if (RequireIntervals)
- SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
-
- // Open the top of the region using slot indexes.
- if (RequireIntervals && isTopClosed())
- static_cast<IntervalPressure&>(P).openTop(SlotIdx);
-
- const MachineInstr &MI = *CurrPos;
- RegisterOperands RegOpers;
- RegOpers.collect(MI, *TRI, *MRI);
- if (RequireIntervals)
- RegOpers.detectDeadDefs(MI, *LIS);
-
- if (PDiff)
- collectPDiff(*PDiff, RegOpers, MRI);
-
// Boost pressure for all dead defs together.
increaseRegPressure(RegOpers.DeadDefs);
decreaseRegPressure(RegOpers.DeadDefs);
@@ -560,6 +516,10 @@ void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
discoverLiveOut(Reg);
}
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
// Generate liveness for uses.
for (unsigned Reg : RegOpers.Uses) {
if (!LiveRegs.contains(Reg)) {
@@ -586,6 +546,41 @@ void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses,
}
}
+void RegPressureTracker::recedeSkipDebugValues() {
+ assert(CurrPos != MBB->begin());
+ if (!isBottomClosed())
+ closeBottom();
+
+ // Open the top of the region using block iterators.
+ if (!RequireIntervals && isTopClosed())
+ static_cast<RegionPressure&>(P).openTop(CurrPos);
+
+ // Find the previous instruction.
+ do
+ --CurrPos;
+ while (CurrPos != MBB->begin() && CurrPos->isDebugValue());
+
+ SlotIndex SlotIdx;
+ if (RequireIntervals)
+ SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot();
+
+ // Open the top of the region using slot indexes.
+ if (RequireIntervals && isTopClosed())
+ static_cast<IntervalPressure&>(P).openTop(SlotIdx);
+}
+
+void RegPressureTracker::recede(SmallVectorImpl<unsigned> *LiveUses) {
+ recedeSkipDebugValues();
+
+ const MachineInstr &MI = *CurrPos;
+ RegisterOperands RegOpers;
+ RegOpers.collect(MI, *TRI, *MRI);
+ if (RequireIntervals)
+ RegOpers.detectDeadDefs(MI, *LIS);
+
+ recede(RegOpers, LiveUses);
+}
+
/// Advance across the current instruction.
void RegPressureTracker::advance() {
assert(!TrackUntiedDefs && "unsupported mode");
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index fb82ab7a5555..11b246a8de25 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -896,11 +896,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
assert(SU && "No SUnit mapped to this MI");
if (RPTracker) {
- PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : nullptr;
- RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
- assert(RPTracker->getPos() == std::prev(MII) &&
- "RPTracker can't find MI");
collectVRegUses(SU);
+
+ RegisterOperands RegOpers;
+ RegOpers.collect(*MI, *TRI, MRI);
+ if (PDiffs != nullptr)
+ PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
+
+ RPTracker->recedeSkipDebugValues();
+ assert(&*RPTracker->getPos() == MI && "RPTracker in sync");
+ RPTracker->recede(RegOpers);
}
assert(
@@ -1005,6 +1010,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU,
I->second[i], RejectMemNodes, TrueMemOrderLatency);
}
+ // This call must come after calls to addChainDependency() since it
+ // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+ // adds to.
adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
PendingLoads.clear();
@@ -1086,6 +1094,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes);
}
+ // This call must come after calls to addChainDependency() since it
+ // consumes the 'RejectMemNodes' list that addChainDependency() possibly
+ // adds to.
adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU, RejectMemNodes,
TrueMemOrderLatency);
} else if (MI->mayLoad()) {
@@ -1133,13 +1144,16 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
else
NonAliasMemUses[V].push_back(SU);
}
- if (MayAlias)
- adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
- RejectMemNodes, /*Latency=*/0);
// Add dependencies on alias and barrier chains, if needed.
if (MayAlias && AliasChain)
addChainDependency(AAForDep, MFI, MF.getDataLayout(), SU, AliasChain,
RejectMemNodes);
+ if (MayAlias)
+ // This call must come after calls to addChainDependency() since it
+ // consumes the 'RejectMemNodes' list that addChainDependency()
+ // possibly adds to.
+ adjustChainDeps(AA, MFI, MF.getDataLayout(), SU, &ExitSU,
+ RejectMemNodes, /*Latency=*/0);
if (BarrierChain)
BarrierChain->addPred(SDep(SU, SDep::Barrier));
}
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index bc2405b952a6..c741982bc08d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7325,6 +7325,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// fold (bitcast (fneg x)) ->
// flipbit = signbit
// (xor (bitcast x) (build_pair flipbit, flipbit))
+ //
// fold (bitcast (fabs x)) ->
// flipbit = (and (extract_element (bitcast x), 0), signbit)
// (xor (bitcast x) (build_pair flipbit, flipbit))
@@ -8794,20 +8795,21 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) {
ZeroCmp, Zero, RV);
}
+/// copysign(x, fp_extend(y)) -> copysign(x, y)
+/// copysign(x, fp_round(y)) -> copysign(x, y)
static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
- // copysign(x, fp_extend(y)) -> copysign(x, y)
- // copysign(x, fp_round(y)) -> copysign(x, y)
- // Do not optimize out type conversion of f128 type yet.
- // For some target like x86_64, configuration is changed
- // to keep one f128 value in one SSE register, but
- // instruction selection cannot handle FCOPYSIGN on
- // SSE registers yet.
SDValue N1 = N->getOperand(1);
- EVT N1VT = N1->getValueType(0);
- EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
- return (N1.getOpcode() == ISD::FP_EXTEND ||
- N1.getOpcode() == ISD::FP_ROUND) &&
- (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+ if ((N1.getOpcode() == ISD::FP_EXTEND ||
+ N1.getOpcode() == ISD::FP_ROUND)) {
+ // Do not optimize out type conversion of f128 type yet.
+ // For some targets like x86_64, configuration is changed to keep one f128
+ // value in one SSE register, but instruction selection cannot handle
+ // FCOPYSIGN on SSE registers yet.
+ EVT N1VT = N1->getValueType(0);
+ EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
+ return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
+ }
+ return false;
}
SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index b62bd2bd63ee..08815ed787dc 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -297,8 +297,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
else if (Personality == EHPersonality::CoreCLR)
calculateClrEHStateNumbers(&fn, EHInfo);
- calculateCatchReturnSuccessorColors(&fn, EHInfo);
-
// Map all BB references in the WinEH data to MBBs.
for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) {
for (WinEHHandlerType &H : TBME.HandlerArray) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f46767f6c4a1..5d572c4c2b02 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -2941,6 +2941,18 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// This trivially expands to CTLZ.
return DAG.getNode(ISD::CTLZ, dl, Op.getValueType(), Op);
case ISD::CTLZ: {
+ EVT VT = Op.getValueType();
+ unsigned len = VT.getSizeInBits();
+
+ if (TLI.isOperationLegalOrCustom(ISD::CTLZ_ZERO_UNDEF, VT)) {
+ EVT SetCCVT = getSetCCResultType(VT);
+ SDValue CTLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, VT, Op);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue SrcIsZero = DAG.getSetCC(dl, SetCCVT, Op, Zero, ISD::SETEQ);
+ return DAG.getNode(ISD::SELECT, dl, VT, SrcIsZero,
+ DAG.getConstant(len, dl, VT), CTLZ);
+ }
+
// for now, we do this:
// x = x | (x >> 1);
// x = x | (x >> 2);
@@ -2950,9 +2962,7 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
// return popcount(~x);
//
// Ref: "Hacker's Delight" by Henry Warren
- EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
- unsigned len = VT.getSizeInBits();
for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
SDValue Tmp3 = DAG.getConstant(1ULL << i, dl, ShVT);
Op = DAG.getNode(ISD::OR, dl, VT, Op,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index cd114d668e20..74f80db6d01b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -262,12 +262,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT, GetSoftenedFloat(InOp));
case TargetLowering::TypePromoteFloat: {
// Convert the promoted float by hand.
- if (NOutVT.bitsEq(NInVT)) {
- SDValue PromotedOp = GetPromotedFloat(InOp);
- SDValue Trunc = DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
- return DAG.getNode(ISD::AssertZext, dl, NOutVT, Trunc,
- DAG.getValueType(OutVT));
- }
+ SDValue PromotedOp = GetPromotedFloat(InOp);
+ return DAG.getNode(ISD::FP_TO_FP16, dl, NOutVT, PromotedOp);
break;
}
case TargetLowering::TypeExpandInteger:
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e446a934554e..45ae39af7600 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1205,8 +1205,13 @@ void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
// Figure out the funclet membership for the catchret's successor.
// This will be used by the FuncletLayout pass to determine how to order the
// BB's.
- WinEHFuncInfo *EHInfo = DAG.getMachineFunction().getWinEHFuncInfo();
- const BasicBlock *SuccessorColor = EHInfo->CatchRetSuccessorColorMap[&I];
+ // A 'catchret' returns to the outer scope's color.
+ Value *ParentPad = I.getParentPad();
+ const BasicBlock *SuccessorColor;
+ if (isa<ConstantTokenNone>(ParentPad))
+ SuccessorColor = &FuncInfo.Fn->getEntryBlock();
+ else
+ SuccessorColor = cast<Instruction>(ParentPad)->getParent();
assert(SuccessorColor && "No parent funclet for catchret!");
MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 6547a62d0778..02545a730656 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -461,7 +461,9 @@ static void lowerIncomingStatepointValue(SDValue Incoming,
// If the original value was a constant, make sure it gets recorded as
// such in the stackmap. This is required so that the consumer can
// parse any internal format to the deopt state. It also handles null
- // pointers and other constant pointers in GC states
+ // pointers and other constant pointers in GC states. Note the constant
+ // vectors do not appear to actually hit this path and that anything larger
+ // than an i64 value (not type!) will fail asserts here.
pushStackMapConstant(Ops, Builder, C->getSExtValue());
} else if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Incoming)) {
// This handles allocas as arguments to the statepoint (this is only
@@ -505,27 +507,27 @@ static void lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
#ifndef NDEBUG
// Check that each of the gc pointer and bases we've gotten out of the
- // safepoint is something the strategy thinks might be a pointer into the GC
- // heap. This is basically just here to help catch errors during statepoint
- // insertion. TODO: This should actually be in the Verifier, but we can't get
- // to the GCStrategy from there (yet).
+ // safepoint is something the strategy thinks might be a pointer (or vector
+ // of pointers) into the GC heap. This is basically just here to help catch
+ // errors during statepoint insertion. TODO: This should actually be in the
+ // Verifier, but we can't get to the GCStrategy from there (yet).
GCStrategy &S = Builder.GFI->getStrategy();
for (const Value *V : Bases) {
- auto Opt = S.isGCManagedPointer(V->getType());
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed base pointer found in statepoint");
}
}
for (const Value *V : Ptrs) {
- auto Opt = S.isGCManagedPointer(V->getType());
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt.hasValue()) {
assert(Opt.getValue() &&
"non gc managed derived pointer found in statepoint");
}
}
for (const Value *V : Relocations) {
- auto Opt = S.isGCManagedPointer(V->getType());
+ auto Opt = S.isGCManagedPointer(V->getType()->getScalarType());
if (Opt.hasValue()) {
assert(Opt.getValue() && "non gc managed pointer relocated");
}
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index f8aa1e2b0b9a..d361a6c4ad06 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -47,6 +47,7 @@
// MachineFrameInfo is updated with this information.
//===----------------------------------------------------------------------===//
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
// To check for profitability.
@@ -263,6 +264,8 @@ MachineBasicBlock *FindIDom(MachineBasicBlock &Block, ListOfBBs BBs,
if (!IDom)
break;
}
+ if (IDom == &Block)
+ return nullptr;
return IDom;
}
@@ -352,13 +355,9 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
if (MLI->getLoopDepth(Save) > MLI->getLoopDepth(Restore)) {
// Push Save outside of this loop if immediate dominator is different
// from save block. If immediate dominator is not different, bail out.
- MachineBasicBlock *IDom = FindIDom<>(*Save, Save->predecessors(), *MDT);
- if (IDom != Save)
- Save = IDom;
- else {
- Save = nullptr;
+ Save = FindIDom<>(*Save, Save->predecessors(), *MDT);
+ if (!Save)
break;
- }
} else {
// If the loop does not exit, there is no point in looking
// for a post-dominator outside the loop.
@@ -386,6 +385,41 @@ void ShrinkWrap::updateSaveRestorePoints(MachineBasicBlock &MBB,
}
}
+/// Check whether the edge (\p SrcBB, \p DestBB) is a backedge according to MLI.
+/// I.e., check if it exists a loop that contains SrcBB and where DestBB is the
+/// loop header.
+static bool isProperBackedge(const MachineLoopInfo &MLI,
+ const MachineBasicBlock *SrcBB,
+ const MachineBasicBlock *DestBB) {
+ for (const MachineLoop *Loop = MLI.getLoopFor(SrcBB); Loop;
+ Loop = Loop->getParentLoop()) {
+ if (Loop->getHeader() == DestBB)
+ return true;
+ }
+ return false;
+}
+
+/// Check if the CFG of \p MF is irreducible.
+static bool isIrreducibleCFG(const MachineFunction &MF,
+ const MachineLoopInfo &MLI) {
+ const MachineBasicBlock *Entry = &*MF.begin();
+ ReversePostOrderTraversal<const MachineBasicBlock *> RPOT(Entry);
+ BitVector VisitedBB(MF.getNumBlockIDs());
+ for (const MachineBasicBlock *MBB : RPOT) {
+ VisitedBB.set(MBB->getNumber());
+ for (const MachineBasicBlock *SuccBB : MBB->successors()) {
+ if (!VisitedBB.test(SuccBB->getNumber()))
+ continue;
+ // We already visited SuccBB, thus MBB->SuccBB must be a backedge.
+ // Check that the head matches what we have in the loop information.
+ // Otherwise, we have an irreducible graph.
+ if (!isProperBackedge(MLI, MBB, SuccBB))
+ return true;
+ }
+ }
+ return false;
+}
+
bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
if (MF.empty() || !isShrinkWrapEnabled(MF))
return false;
@@ -394,6 +428,17 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) {
init(MF);
+ if (isIrreducibleCFG(MF, *MLI)) {
+ // If MF is irreducible, a block may be in a loop without
+ // MachineLoopInfo reporting it. I.e., we may use the
+ // post-dominance property in loops, which lead to incorrect
+ // results. Moreover, we may miss that the prologue and
+ // epilogue are not in the same loop, leading to unbalanced
+ // construction/deconstruction of the stack frame.
+ DEBUG(dbgs() << "Irreducible CFGs are not supported yet\n");
+ return false;
+ }
+
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
std::unique_ptr<RegScavenger> RS(
TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr);
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index 3541b33a8441..7b5203815172 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -43,6 +43,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/WinEHFuncInfo.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
@@ -570,6 +571,14 @@ void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
}
}
+ // Update the location of C++ catch objects for the MSVC personality routine.
+ if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
+ for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
+ for (WinEHHandlerType &H : TBME.HandlerArray)
+ if (H.CatchObj.FrameIndex != INT_MAX &&
+ SlotRemap.count(H.CatchObj.FrameIndex))
+ H.CatchObj.FrameIndex = SlotRemap[H.CatchObj.FrameIndex];
+
DEBUG(dbgs()<<"Fixed "<<FixedMemOp<<" machine memory operands.\n");
DEBUG(dbgs()<<"Fixed "<<FixedDbg<<" debug locations.\n");
DEBUG(dbgs()<<"Fixed "<<FixedInstr<<" machine instructions.\n");
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 2426c27d43dc..886c5f6070c1 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -144,10 +144,11 @@ static void addTryBlockMapEntry(WinEHFuncInfo &FuncInfo, int TryLow,
HT.TypeDescriptor = cast<GlobalVariable>(TypeInfo->stripPointerCasts());
HT.Adjectives = cast<ConstantInt>(CPI->getArgOperand(1))->getZExtValue();
HT.Handler = CPI->getParent();
- if (isa<ConstantPointerNull>(CPI->getArgOperand(2)))
- HT.CatchObj.Alloca = nullptr;
+ if (auto *AI =
+ dyn_cast<AllocaInst>(CPI->getArgOperand(2)->stripPointerCasts()))
+ HT.CatchObj.Alloca = AI;
else
- HT.CatchObj.Alloca = cast<AllocaInst>(CPI->getArgOperand(2));
+ HT.CatchObj.Alloca = nullptr;
TBME.HandlerArray.push_back(HT);
}
FuncInfo.TryBlockMap.push_back(TBME);
@@ -664,24 +665,6 @@ void WinEHPrepare::colorFunclets(Function &F) {
}
}
-void llvm::calculateCatchReturnSuccessorColors(const Function *Fn,
- WinEHFuncInfo &FuncInfo) {
- for (const BasicBlock &BB : *Fn) {
- const auto *CatchRet = dyn_cast<CatchReturnInst>(BB.getTerminator());
- if (!CatchRet)
- continue;
- // A 'catchret' returns to the outer scope's color.
- Value *ParentPad = CatchRet->getParentPad();
- const BasicBlock *Color;
- if (isa<ConstantTokenNone>(ParentPad))
- Color = &Fn->getEntryBlock();
- else
- Color = cast<Instruction>(ParentPad)->getParent();
- // Record the catchret successor's funclet membership.
- FuncInfo.CatchRetSuccessorColorMap[CatchRet] = Color;
- }
-}
-
void WinEHPrepare::demotePHIsOnFunclets(Function &F) {
// Strip PHI nodes off of EH pads.
SmallVector<PHINode *, 16> PHINodes;
diff --git a/lib/DebugInfo/Symbolize/DIPrinter.cpp b/lib/DebugInfo/Symbolize/DIPrinter.cpp
index c6bfbc07dcf3..a9dee7abeed1 100644
--- a/lib/DebugInfo/Symbolize/DIPrinter.cpp
+++ b/lib/DebugInfo/Symbolize/DIPrinter.cpp
@@ -15,6 +15,7 @@
#include "llvm/DebugInfo/Symbolize/DIPrinter.h"
#include "llvm/DebugInfo/DIContext.h"
+#include "llvm/Support/LineIterator.h"
namespace llvm {
namespace symbolize {
@@ -24,7 +25,37 @@ namespace symbolize {
static const char kDILineInfoBadString[] = "<invalid>";
static const char kBadString[] = "??";
-void DIPrinter::printName(const DILineInfo &Info, bool Inlined) {
+// Prints source code around in the FileName the Line.
+void DIPrinter::printContext(std::string FileName, int64_t Line) {
+ if (PrintSourceContext <= 0)
+ return;
+
+ ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
+ MemoryBuffer::getFile(FileName);
+ if (!BufOrErr)
+ return;
+
+ std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
+ int64_t FirstLine =
+ std::max(static_cast<int64_t>(1), Line - PrintSourceContext / 2);
+ int64_t LastLine = FirstLine + PrintSourceContext;
+ size_t MaxLineNumberWidth = std::ceil(std::log10(LastLine));
+
+ for (line_iterator I = line_iterator(*Buf, false);
+ !I.is_at_eof() && I.line_number() <= LastLine; ++I) {
+ int64_t L = I.line_number();
+ if (L >= FirstLine && L <= LastLine) {
+ OS << format_decimal(L, MaxLineNumberWidth);
+ if (L == Line)
+ OS << " >: ";
+ else
+ OS << " : ";
+ OS << *I << "\n";
+ }
+ }
+}
+
+void DIPrinter::print(const DILineInfo &Info, bool Inlined) {
if (PrintFunctionNames) {
std::string FunctionName = Info.FunctionName;
if (FunctionName == kDILineInfoBadString)
@@ -38,21 +69,22 @@ void DIPrinter::printName(const DILineInfo &Info, bool Inlined) {
if (Filename == kDILineInfoBadString)
Filename = kBadString;
OS << Filename << ":" << Info.Line << ":" << Info.Column << "\n";
+ printContext(Filename, Info.Line);
}
DIPrinter &DIPrinter::operator<<(const DILineInfo &Info) {
- printName(Info, false);
+ print(Info, false);
return *this;
}
DIPrinter &DIPrinter::operator<<(const DIInliningInfo &Info) {
uint32_t FramesNum = Info.getNumberOfFrames();
if (FramesNum == 0) {
- printName(DILineInfo(), false);
+ print(DILineInfo(), false);
return *this;
}
for (uint32_t i = 0; i < FramesNum; i++)
- printName(Info.getFrame(i), i > 0);
+ print(Info.getFrame(i), i > 0);
return *this;
}
diff --git a/lib/ExecutionEngine/Orc/CMakeLists.txt b/lib/ExecutionEngine/Orc/CMakeLists.txt
index a17f52e322e8..d26f212e00c9 100644
--- a/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -2,10 +2,12 @@ add_llvm_library(LLVMOrcJIT
ExecutionUtils.cpp
IndirectionUtils.cpp
NullResolver.cpp
+ OrcArchitectureSupport.cpp
OrcCBindings.cpp
OrcCBindingsStack.cpp
+ OrcError.cpp
OrcMCJITReplacement.cpp
- OrcTargetSupport.cpp
+ OrcRemoteTargetRPCAPI.cpp
ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc
diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
index b931f10b9d78..01e829f7909e 100644
--- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/lib/ExecutionEngine/Orc/OrcArchitectureSupport.cpp
@@ -1,4 +1,4 @@
-//===------- OrcTargetSupport.cpp - Target support utilities for Orc ------===//
+//===------ OrcArchSupport.cpp - Architecture specific support code -------===//
//
// The LLVM Compiler Infrastructure
//
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/Triple.h"
-#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/Support/Process.h"
#include <array>
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
index e519c7f30920..956daae372da 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp
@@ -9,7 +9,7 @@
#include "OrcCBindingsStack.h"
-#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include <cstdio>
diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
index 2e17624ff474..aae6a99432bc 100644
--- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
+++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h
@@ -221,7 +221,8 @@ public:
ModuleHandleT addIRModuleLazy(Module* M,
LLVMOrcSymbolResolverFn ExternalResolver,
void *ExternalResolverCtx) {
- return addIRModule(CODLayer, std::move(M), nullptr,
+ return addIRModule(CODLayer, std::move(M),
+ llvm::make_unique<SectionMemoryManager>(),
std::move(ExternalResolver), ExternalResolverCtx);
}
diff --git a/lib/ExecutionEngine/Orc/OrcError.cpp b/lib/ExecutionEngine/Orc/OrcError.cpp
new file mode 100644
index 000000000000..e95115ec6fed
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/OrcError.cpp
@@ -0,0 +1,57 @@
+//===---------------- OrcError.cpp - Error codes for ORC ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Error codes for ORC.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/OrcError.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/ManagedStatic.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+namespace {
+
+class OrcErrorCategory : public std::error_category {
+public:
+ const char *name() const LLVM_NOEXCEPT override { return "orc"; }
+
+ std::string message(int condition) const override {
+ switch (static_cast<OrcErrorCode>(condition)) {
+ case OrcErrorCode::RemoteAllocatorDoesNotExist:
+ return "Remote allocator does not exist";
+ case OrcErrorCode::RemoteAllocatorIdAlreadyInUse:
+ return "Remote allocator Id already in use";
+ case OrcErrorCode::RemoteMProtectAddrUnrecognized:
+ return "Remote mprotect call references unallocated memory";
+ case OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist:
+ return "Remote indirect stubs owner does not exist";
+ case OrcErrorCode::RemoteIndirectStubsOwnerIdAlreadyInUse:
+ return "Remote indirect stubs owner Id already in use";
+ case OrcErrorCode::UnexpectedRPCCall:
+ return "Unexpected RPC call";
+ }
+ llvm_unreachable("Unhandled error code");
+ }
+};
+
+static ManagedStatic<OrcErrorCategory> OrcErrCat;
+}
+
+namespace llvm {
+namespace orc {
+
+std::error_code orcError(OrcErrorCode ErrCode) {
+ typedef std::underlying_type<OrcErrorCode>::type UT;
+ return std::error_code(static_cast<UT>(ErrCode), *OrcErrCat);
+}
+}
+}
diff --git a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
index 38a27cff5b2f..2ab70a9fee86 100644
--- a/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
+++ b/lib/ExecutionEngine/Orc/OrcMCJITReplacement.h
@@ -54,10 +54,13 @@ class OrcMCJITReplacement : public ExecutionEngine {
return Addr;
}
- void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
- uintptr_t DataSizeRW) override {
- return ClientMM->reserveAllocationSpace(CodeSize, DataSizeRO,
- DataSizeRW);
+ void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+ uintptr_t RODataSize, uint32_t RODataAlign,
+ uintptr_t RWDataSize,
+ uint32_t RWDataAlign) override {
+ return ClientMM->reserveAllocationSpace(CodeSize, CodeAlign,
+ RODataSize, RODataAlign,
+ RWDataSize, RWDataAlign);
}
bool needsToReserveAllocationSpace() override {
@@ -74,6 +77,11 @@ class OrcMCJITReplacement : public ExecutionEngine {
return ClientMM->deregisterEHFrames(Addr, LoadAddr, Size);
}
+ void notifyObjectLoaded(RuntimeDyld &RTDyld,
+ const object::ObjectFile &O) override {
+ return ClientMM->notifyObjectLoaded(RTDyld, O);
+ }
+
void notifyObjectLoaded(ExecutionEngine *EE,
const object::ObjectFile &O) override {
return ClientMM->notifyObjectLoaded(EE, O);
diff --git a/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp b/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp
new file mode 100644
index 000000000000..064633b4e490
--- /dev/null
+++ b/lib/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.cpp
@@ -0,0 +1,83 @@
+//===------- OrcRemoteTargetRPCAPI.cpp - ORC Remote API utilities ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
+
+namespace llvm {
+namespace orc {
+namespace remote {
+
+const char *OrcRemoteTargetRPCAPI::getJITProcIdName(JITProcId Id) {
+ switch (Id) {
+ case InvalidId:
+ return "*** Invalid JITProcId ***";
+ case CallIntVoidId:
+ return "CallIntVoid";
+ case CallIntVoidResponseId:
+ return "CallIntVoidResponse";
+ case CallMainId:
+ return "CallMain";
+ case CallMainResponseId:
+ return "CallMainResponse";
+ case CallVoidVoidId:
+ return "CallVoidVoid";
+ case CallVoidVoidResponseId:
+ return "CallVoidVoidResponse";
+ case CreateRemoteAllocatorId:
+ return "CreateRemoteAllocator";
+ case CreateIndirectStubsOwnerId:
+ return "CreateIndirectStubsOwner";
+ case DestroyRemoteAllocatorId:
+ return "DestroyRemoteAllocator";
+ case DestroyIndirectStubsOwnerId:
+ return "DestroyIndirectStubsOwner";
+ case EmitIndirectStubsId:
+ return "EmitIndirectStubs";
+ case EmitIndirectStubsResponseId:
+ return "EmitIndirectStubsResponse";
+ case EmitResolverBlockId:
+ return "EmitResolverBlock";
+ case EmitTrampolineBlockId:
+ return "EmitTrampolineBlock";
+ case EmitTrampolineBlockResponseId:
+ return "EmitTrampolineBlockResponse";
+ case GetSymbolAddressId:
+ return "GetSymbolAddress";
+ case GetSymbolAddressResponseId:
+ return "GetSymbolAddressResponse";
+ case GetRemoteInfoId:
+ return "GetRemoteInfo";
+ case GetRemoteInfoResponseId:
+ return "GetRemoteInfoResponse";
+ case ReadMemId:
+ return "ReadMem";
+ case ReadMemResponseId:
+ return "ReadMemResponse";
+ case ReserveMemId:
+ return "ReserveMem";
+ case ReserveMemResponseId:
+ return "ReserveMemResponse";
+ case RequestCompileId:
+ return "RequestCompile";
+ case RequestCompileResponseId:
+ return "RequestCompileResponse";
+ case SetProtectionsId:
+ return "SetProtections";
+ case TerminateSessionId:
+ return "TerminateSession";
+ case WriteMemId:
+ return "WriteMem";
+ case WritePtrId:
+ return "WritePtr";
+ };
+ return nullptr;
+}
+}
+}
+}
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index a95f3bbe4179..d16b2db24e1a 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -146,9 +146,12 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) {
// Compute the memory size required to load all sections to be loaded
// and pass this information to the memory manager
if (MemMgr.needsToReserveAllocationSpace()) {
- uint64_t CodeSize = 0, DataSizeRO = 0, DataSizeRW = 0;
- computeTotalAllocSize(Obj, CodeSize, DataSizeRO, DataSizeRW);
- MemMgr.reserveAllocationSpace(CodeSize, DataSizeRO, DataSizeRW);
+ uint64_t CodeSize = 0, RODataSize = 0, RWDataSize = 0;
+ uint32_t CodeAlign = 1, RODataAlign = 1, RWDataAlign = 1;
+ computeTotalAllocSize(Obj, CodeSize, CodeAlign, RODataSize, RODataAlign,
+ RWDataSize, RWDataAlign);
+ MemMgr.reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign,
+ RWDataSize, RWDataAlign);
}
// Used sections from the object file
@@ -335,13 +338,15 @@ static bool isZeroInit(const SectionRef Section) {
// sections
void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
uint64_t &CodeSize,
- uint64_t &DataSizeRO,
- uint64_t &DataSizeRW) {
+ uint32_t &CodeAlign,
+ uint64_t &RODataSize,
+ uint32_t &RODataAlign,
+ uint64_t &RWDataSize,
+ uint32_t &RWDataAlign) {
// Compute the size of all sections required for execution
std::vector<uint64_t> CodeSectionSizes;
std::vector<uint64_t> ROSectionSizes;
std::vector<uint64_t> RWSectionSizes;
- uint64_t MaxAlignment = sizeof(void *);
// Collect sizes of all sections to be loaded;
// also determine the max alignment of all sections
@@ -376,17 +381,15 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
SectionSize = 1;
if (IsCode) {
+ CodeAlign = std::max(CodeAlign, Alignment);
CodeSectionSizes.push_back(SectionSize);
} else if (IsReadOnly) {
+ RODataAlign = std::max(RODataAlign, Alignment);
ROSectionSizes.push_back(SectionSize);
} else {
+ RWDataAlign = std::max(RWDataAlign, Alignment);
RWSectionSizes.push_back(SectionSize);
}
-
- // update the max alignment
- if (Alignment > MaxAlignment) {
- MaxAlignment = Alignment;
- }
}
}
@@ -410,9 +413,9 @@ void RuntimeDyldImpl::computeTotalAllocSize(const ObjectFile &Obj,
// allocated with the max alignment. Note that we cannot compute with the
// individual alignments of the sections, because then the required size
// depends on the order, in which the sections are allocated.
- CodeSize = computeAllocationSizeForSections(CodeSectionSizes, MaxAlignment);
- DataSizeRO = computeAllocationSizeForSections(ROSectionSizes, MaxAlignment);
- DataSizeRW = computeAllocationSizeForSections(RWSectionSizes, MaxAlignment);
+ CodeSize = computeAllocationSizeForSections(CodeSectionSizes, CodeAlign);
+ RODataSize = computeAllocationSizeForSections(ROSectionSizes, RODataAlign);
+ RWDataSize = computeAllocationSizeForSections(RWSectionSizes, RWDataAlign);
}
// compute stub buffer size for the given section
@@ -937,7 +940,9 @@ RuntimeDyld::loadObject(const ObjectFile &Obj) {
if (!Dyld->isCompatibleFile(Obj))
report_fatal_error("Incompatible object format!");
- return Dyld->loadObject(Obj);
+ auto LoadedObjInfo = Dyld->loadObject(Obj);
+ MemMgr.notifyObjectLoaded(*this, Obj);
+ return LoadedObjInfo;
}
void *RuntimeDyld::getSymbolLocalAddress(StringRef Name) const {
@@ -967,6 +972,17 @@ bool RuntimeDyld::hasError() { return Dyld->hasError(); }
StringRef RuntimeDyld::getErrorString() { return Dyld->getErrorString(); }
+void RuntimeDyld::finalizeWithMemoryManagerLocking() {
+ bool MemoryFinalizationLocked = MemMgr.FinalizationLocked;
+ MemMgr.FinalizationLocked = true;
+ resolveRelocations();
+ registerEHFrames();
+ if (!MemoryFinalizationLocked) {
+ MemMgr.finalizeMemory();
+ MemMgr.FinalizationLocked = false;
+ }
+}
+
void RuntimeDyld::registerEHFrames() {
if (Dyld)
Dyld->registerEHFrames();
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index dafd3c8793c3..ab732c69ee2f 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -411,8 +411,10 @@ protected:
// \brief Compute an upper bound of the memory that is required to load all
// sections
- void computeTotalAllocSize(const ObjectFile &Obj, uint64_t &CodeSize,
- uint64_t &DataSizeRO, uint64_t &DataSizeRW);
+ void computeTotalAllocSize(const ObjectFile &Obj,
+ uint64_t &CodeSize, uint32_t &CodeAlign,
+ uint64_t &RODataSize, uint32_t &RODataAlign,
+ uint64_t &RWDataSize, uint32_t &RWDataAlign);
// \brief Compute the stub buffer size required for a section
unsigned computeSectionStubBufSize(const ObjectFile &Obj,
diff --git a/lib/ExecutionEngine/SectionMemoryManager.cpp b/lib/ExecutionEngine/SectionMemoryManager.cpp
index e2f220862cf7..1ad5f1740115 100644
--- a/lib/ExecutionEngine/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/SectionMemoryManager.cpp
@@ -137,9 +137,6 @@ bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
return true;
}
- // Don't allow free memory blocks to be used after setting protection flags.
- RODataMem.FreeMem.clear();
-
// Make read-only data memory read-only.
ec = applyMemoryGroupPermissions(RODataMem,
sys::Memory::MF_READ | sys::Memory::MF_EXEC);
diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp
index e8c117ef6087..66e46dbf3aad 100644
--- a/lib/Fuzzer/FuzzerDriver.cpp
+++ b/lib/Fuzzer/FuzzerDriver.cpp
@@ -259,8 +259,6 @@ int FuzzerDriver(const std::vector<std::string> &Args,
Flags.prefer_small_during_initial_shuffle;
Options.Reload = Flags.reload;
Options.OnlyASCII = Flags.only_ascii;
- Options.TBMDepth = Flags.tbm_depth;
- Options.TBMWidth = Flags.tbm_width;
Options.OutputCSV = Flags.output_csv;
if (Flags.runs >= 0)
Options.MaxNumberOfRuns = Flags.runs;
@@ -286,7 +284,7 @@ int FuzzerDriver(const std::vector<std::string> &Args,
Fuzzer F(USF, Options);
for (auto &U: Dictionary)
- USF.GetMD().AddWordToDictionary(U.data(), U.size());
+ USF.GetMD().AddWordToManualDictionary(U);
// Timer
if (Flags.timeout > 0)
diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def
index 6d98f66ef9c1..977efb76922b 100644
--- a/lib/Fuzzer/FuzzerFlags.def
+++ b/lib/Fuzzer/FuzzerFlags.def
@@ -56,10 +56,6 @@ FUZZER_FLAG_INT(report_slow_units, 10,
FUZZER_FLAG_INT(only_ascii, 0,
"If 1, generate only ASCII (isprint+isspace) inputs.")
FUZZER_FLAG_STRING(dict, "Experimental. Use the dictionary file.")
-FUZZER_FLAG_INT(tbm_depth, 5, "Apply at most this number of consecutive"
- "trace-based-mutations (tbm).")
-FUZZER_FLAG_INT(tbm_width, 5, "Apply at most this number of independent"
- "trace-based-mutations (tbm)")
FUZZER_FLAG_STRING(test_single_input, "Use specified file as test input.")
FUZZER_FLAG_STRING(artifact_prefix, "Write fuzzing artifacts (crash, "
"timeout, or slow inputs) as "
diff --git a/lib/Fuzzer/FuzzerInterface.h b/lib/Fuzzer/FuzzerInterface.h
index 65f1707ba922..e22b27a3dd2b 100644
--- a/lib/Fuzzer/FuzzerInterface.h
+++ b/lib/Fuzzer/FuzzerInterface.h
@@ -16,6 +16,7 @@
#ifndef LLVM_FUZZER_INTERFACE_H
#define LLVM_FUZZER_INTERFACE_H
+#include <limits>
#include <cstddef>
#include <cstdint>
#include <vector>
@@ -86,9 +87,13 @@ class MutationDispatcher {
/// Mutates data by chanding one bit.
size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize);
- /// Mutates data by adding a word from the dictionary.
- size_t Mutate_AddWordFromDictionary(uint8_t *Data, size_t Size,
- size_t MaxSize);
+ /// Mutates data by adding a word from the manual dictionary.
+ size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size,
+ size_t MaxSize);
+
+ /// Mutates data by adding a word from the automatic dictionary.
+ size_t Mutate_AddWordFromAutoDictionary(uint8_t *Data, size_t Size,
+ size_t MaxSize);
/// Tries to find an ASCII integer in Data, changes it to another ASCII int.
size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize);
@@ -104,7 +109,11 @@ class MutationDispatcher {
size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2,
size_t Size2, uint8_t *Out, size_t MaxOutSize);
- void AddWordToDictionary(const uint8_t *Word, size_t Size);
+ void AddWordToManualDictionary(const Unit &Word);
+
+ void AddWordToAutoDictionary(const Unit &Word, size_t PositionHint);
+ void ClearAutoDictionary();
+
void SetCorpus(const std::vector<Unit> *Corpus);
private:
diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h
index 17a2cae94a58..c1e9daac9808 100644
--- a/lib/Fuzzer/FuzzerInternal.h
+++ b/lib/Fuzzer/FuzzerInternal.h
@@ -38,6 +38,7 @@ std::string DirPlusFile(const std::string &DirPath,
void Printf(const char *Fmt, ...);
void Print(const Unit &U, const char *PrintAfter = "");
+void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = "");
void PrintASCII(const Unit &U, const char *PrintAfter = "");
std::string Hash(const Unit &U);
void SetTimer(int Seconds);
@@ -88,8 +89,6 @@ class Fuzzer {
int SyncTimeout = 600;
int ReportSlowUnits = 10;
bool OnlyASCII = false;
- int TBMDepth = 10;
- int TBMWidth = 10;
std::string OutputCorpus;
std::string SyncCommand;
std::string ArtifactPrefix = "./";
@@ -156,10 +155,8 @@ class Fuzzer {
// Start tracing; forget all previously proposed mutations.
void StartTraceRecording();
- // Stop tracing and return the number of proposed mutations.
- size_t StopTraceRecording();
- // Apply Idx-th trace-based mutation to U.
- void ApplyTraceBasedMutation(size_t Idx, Unit *U);
+ // Stop tracing.
+ void StopTraceRecording();
void SetDeathCallback();
static void StaticDeathCallback();
diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp
index 0b1d9d9686a2..5237682ff24d 100644
--- a/lib/Fuzzer/FuzzerLoop.cpp
+++ b/lib/Fuzzer/FuzzerLoop.cpp
@@ -390,7 +390,6 @@ void Fuzzer::MutateAndTestOne() {
U = ChooseUnitToMutate();
for (int i = 0; i < Options.MutateDepth; i++) {
- StartTraceRecording();
size_t Size = U.size();
U.resize(Options.MaxLen);
size_t NewSize = USF.Mutate(U.data(), Size, U.size());
@@ -398,21 +397,10 @@ void Fuzzer::MutateAndTestOne() {
assert(NewSize <= (size_t)Options.MaxLen &&
"Mutator return overisized unit");
U.resize(NewSize);
+ if (i == 0)
+ StartTraceRecording();
RunOneAndUpdateCorpus(U);
- size_t NumTraceBasedMutations = StopTraceRecording();
- size_t TBMWidth =
- std::min((size_t)Options.TBMWidth, NumTraceBasedMutations);
- size_t TBMDepth =
- std::min((size_t)Options.TBMDepth, NumTraceBasedMutations);
- Unit BackUp = U;
- for (size_t w = 0; w < TBMWidth; w++) {
- U = BackUp;
- for (size_t d = 0; d < TBMDepth; d++) {
- TotalNumberOfExecutedTraceBasedMutations++;
- ApplyTraceBasedMutation(USF.GetRand()(NumTraceBasedMutations), &U);
- RunOneAndUpdateCorpus(U);
- }
- }
+ StopTraceRecording();
}
}
@@ -467,12 +455,15 @@ void Fuzzer::Drill() {
PrintStats("REINIT");
SavedOutputCorpusPath.swap(Options.OutputCorpus);
- for (auto &U : SavedCorpus)
+ for (auto &U : SavedCorpus) {
+ CurrentUnit = U;
RunOne(U);
+ }
PrintStats("MERGE ");
Options.PrintNEW = true;
size_t NumMerged = 0;
for (auto &U : Corpus) {
+ CurrentUnit = U;
if (RunOne(U)) {
PrintStatusForNewUnit(U);
NumMerged++;
diff --git a/lib/Fuzzer/FuzzerMutate.cpp b/lib/Fuzzer/FuzzerMutate.cpp
index 84ee18e69fb0..30e5b43c0839 100644
--- a/lib/Fuzzer/FuzzerMutate.cpp
+++ b/lib/Fuzzer/FuzzerMutate.cpp
@@ -22,14 +22,22 @@ struct Mutator {
const char *Name;
};
+struct DictionaryEntry {
+ Unit Word;
+ size_t PositionHint;
+};
+
struct MutationDispatcher::Impl {
- std::vector<Unit> Dictionary;
+ std::vector<DictionaryEntry> ManualDictionary;
+ std::vector<DictionaryEntry> AutoDictionary;
std::vector<Mutator> Mutators;
std::vector<Mutator> CurrentMutatorSequence;
+ std::vector<DictionaryEntry> CurrentDictionaryEntrySequence;
const std::vector<Unit> *Corpus = nullptr;
+ FuzzerRandomBase &Rand;
void Add(Mutator M) { Mutators.push_back(M); }
- Impl() {
+ Impl(FuzzerRandomBase &Rand) : Rand(Rand) {
Add({&MutationDispatcher::Mutate_EraseByte, "EraseByte"});
Add({&MutationDispatcher::Mutate_InsertByte, "InsertByte"});
Add({&MutationDispatcher::Mutate_ChangeByte, "ChangeByte"});
@@ -37,14 +45,14 @@ struct MutationDispatcher::Impl {
Add({&MutationDispatcher::Mutate_ShuffleBytes, "ShuffleBytes"});
Add({&MutationDispatcher::Mutate_ChangeASCIIInteger, "ChangeASCIIInt"});
Add({&MutationDispatcher::Mutate_CrossOver, "CrossOver"});
- }
- void AddWordToDictionary(const uint8_t *Word, size_t Size) {
- if (Dictionary.empty()) {
- Add({&MutationDispatcher::Mutate_AddWordFromDictionary, "AddFromDict"});
- }
- Dictionary.push_back(Unit(Word, Word + Size));
+ Add({&MutationDispatcher::Mutate_AddWordFromManualDictionary,
+ "AddFromManualDict"});
+ Add({&MutationDispatcher::Mutate_AddWordFromAutoDictionary,
+ "AddFromAutoDict"});
}
void SetCorpus(const std::vector<Unit> *Corpus) { this->Corpus = Corpus; }
+ size_t AddWordFromDictionary(const std::vector<DictionaryEntry> &D,
+ uint8_t *Data, size_t Size, size_t MaxSize);
};
static char FlipRandomBit(char X, FuzzerRandomBase &Rand) {
@@ -68,7 +76,8 @@ static char RandCh(FuzzerRandomBase &Rand) {
size_t MutationDispatcher::Mutate_ShuffleBytes(uint8_t *Data, size_t Size,
size_t MaxSize) {
assert(Size);
- size_t ShuffleAmount = Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size.
+ size_t ShuffleAmount =
+ Rand(std::min(Size, (size_t)8)) + 1; // [1,8] and <= Size.
size_t ShuffleStart = Rand(Size - ShuffleAmount);
assert(ShuffleStart + ShuffleAmount <= Size);
std::random_shuffle(Data + ShuffleStart, Data + ShuffleStart + ShuffleAmount,
@@ -110,25 +119,42 @@ size_t MutationDispatcher::Mutate_ChangeBit(uint8_t *Data, size_t Size,
return Size;
}
-size_t MutationDispatcher::Mutate_AddWordFromDictionary(uint8_t *Data,
- size_t Size,
- size_t MaxSize) {
- auto &D = MDImpl->Dictionary;
- assert(!D.empty());
+size_t MutationDispatcher::Mutate_AddWordFromManualDictionary(uint8_t *Data,
+ size_t Size,
+ size_t MaxSize) {
+ return MDImpl->AddWordFromDictionary(MDImpl->ManualDictionary, Data, Size,
+ MaxSize);
+}
+
+size_t MutationDispatcher::Mutate_AddWordFromAutoDictionary(uint8_t *Data,
+ size_t Size,
+ size_t MaxSize) {
+ return MDImpl->AddWordFromDictionary(MDImpl->AutoDictionary, Data, Size,
+ MaxSize);
+}
+
+size_t MutationDispatcher::Impl::AddWordFromDictionary(
+ const std::vector<DictionaryEntry> &D, uint8_t *Data, size_t Size,
+ size_t MaxSize) {
if (D.empty()) return 0;
- const Unit &Word = D[Rand(D.size())];
+ const DictionaryEntry &DE = D[Rand(D.size())];
+ const Unit &Word = DE.Word;
+ size_t PositionHint = DE.PositionHint;
+ bool UsePositionHint = PositionHint != std::numeric_limits<size_t>::max() &&
+ PositionHint + Word.size() < Size && Rand.RandBool();
if (Rand.RandBool()) { // Insert Word.
if (Size + Word.size() > MaxSize) return 0;
- size_t Idx = Rand(Size + 1);
+ size_t Idx = UsePositionHint ? PositionHint : Rand(Size + 1);
memmove(Data + Idx + Word.size(), Data + Idx, Size - Idx);
memcpy(Data + Idx, Word.data(), Word.size());
- return Size + Word.size();
+ Size += Word.size();
} else { // Overwrite some bytes with Word.
if (Word.size() > Size) return 0;
- size_t Idx = Rand(Size - Word.size());
+ size_t Idx = UsePositionHint ? PositionHint : Rand(Size - Word.size());
memcpy(Data + Idx, Word.data(), Word.size());
- return Size;
}
+ CurrentDictionaryEntrySequence.push_back(DE);
+ return Size;
}
size_t MutationDispatcher::Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size,
@@ -182,12 +208,20 @@ size_t MutationDispatcher::Mutate_CrossOver(uint8_t *Data, size_t Size,
void MutationDispatcher::StartMutationSequence() {
MDImpl->CurrentMutatorSequence.clear();
+ MDImpl->CurrentDictionaryEntrySequence.clear();
}
void MutationDispatcher::PrintMutationSequence() {
Printf("MS: %zd ", MDImpl->CurrentMutatorSequence.size());
for (auto M : MDImpl->CurrentMutatorSequence)
Printf("%s-", M.Name);
+ if (!MDImpl->CurrentDictionaryEntrySequence.empty()) {
+ Printf(" DE: ");
+ for (auto DE : MDImpl->CurrentDictionaryEntrySequence) {
+ Printf("\"");
+ PrintASCII(DE.Word, "\"-");
+ }
+ }
}
// Mutates Data in place, returns new size.
@@ -219,12 +253,24 @@ void MutationDispatcher::SetCorpus(const std::vector<Unit> *Corpus) {
MDImpl->SetCorpus(Corpus);
}
-void MutationDispatcher::AddWordToDictionary(const uint8_t *Word, size_t Size) {
- MDImpl->AddWordToDictionary(Word, Size);
+void MutationDispatcher::AddWordToManualDictionary(const Unit &Word) {
+ MDImpl->ManualDictionary.push_back(
+ {Word, std::numeric_limits<size_t>::max()});
+}
+
+void MutationDispatcher::AddWordToAutoDictionary(const Unit &Word,
+ size_t PositionHint) {
+ static const size_t kMaxAutoDictSize = 1 << 14;
+ if (MDImpl->AutoDictionary.size() >= kMaxAutoDictSize) return;
+ MDImpl->AutoDictionary.push_back({Word, PositionHint});
+}
+
+void MutationDispatcher::ClearAutoDictionary() {
+ MDImpl->AutoDictionary.clear();
}
MutationDispatcher::MutationDispatcher(FuzzerRandomBase &Rand) : Rand(Rand) {
- MDImpl = new Impl;
+ MDImpl = new Impl(Rand);
}
MutationDispatcher::~MutationDispatcher() { delete MDImpl; }
diff --git a/lib/Fuzzer/FuzzerTraceState.cpp b/lib/Fuzzer/FuzzerTraceState.cpp
index 241c2f0ce590..b2006fa3aa4d 100644
--- a/lib/Fuzzer/FuzzerTraceState.cpp
+++ b/lib/Fuzzer/FuzzerTraceState.cpp
@@ -46,8 +46,6 @@
// * The __dfsw_* functions (implemented in this file) record the
// parameters (i.e. the application data and the corresponding taint labels)
// in a global state.
-// * Fuzzer::ApplyTraceBasedMutation() tries to use the data recorded
-// by __dfsw_* hooks to guide the fuzzing towards new application states.
//
// Parts of this code will not function when DFSan is not linked in.
// Instead of using ifdefs and thus requiring a separate build of lib/Fuzzer
@@ -78,7 +76,7 @@
#include <algorithm>
#include <cstring>
#include <thread>
-#include <unordered_map>
+#include <map>
#if !LLVM_FUZZER_SUPPORTS_DFSAN
// Stubs for dfsan for platforms where dfsan does not exist and weak
@@ -166,15 +164,19 @@ struct LabelRange {
// For now, very simple: put Size bytes of Data at position Pos.
struct TraceBasedMutation {
- size_t Pos;
- size_t Size;
- uint64_t Data;
+ static const size_t kMaxSize = 28;
+ uint32_t Pos : 24;
+ uint32_t Size : 8;
+ uint8_t Data[kMaxSize];
};
+const size_t TraceBasedMutation::kMaxSize;
+
class TraceState {
public:
- TraceState(const Fuzzer::FuzzingOptions &Options, const Unit &CurrentUnit)
- : Options(Options), CurrentUnit(CurrentUnit) {
+ TraceState(UserSuppliedFuzzer &USF,
+ const Fuzzer::FuzzingOptions &Options, const Unit &CurrentUnit)
+ : USF(USF), Options(Options), CurrentUnit(CurrentUnit) {
// Current trace collection is not thread-friendly and it probably
// does not have to be such, but at least we should not crash in presence
// of threads. So, just ignore all traces coming from all threads but one.
@@ -185,28 +187,71 @@ class TraceState {
void DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
dfsan_label L2);
+ void DFSanMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2, dfsan_label L1,
+ dfsan_label L2);
void DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits, uint64_t Val,
size_t NumCases, uint64_t *Cases, dfsan_label L);
void TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2);
+ void TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2);
void TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits, uint64_t Val,
size_t NumCases, uint64_t *Cases);
int TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
size_t DataSize);
+ int TryToAddDesiredData(const uint8_t *PresentData,
+ const uint8_t *DesiredData, size_t DataSize);
void StartTraceRecording() {
if (!Options.UseTraces) return;
RecordingTraces = true;
- Mutations.clear();
+ NumMutations = 0;
+ USF.GetMD().ClearAutoDictionary();
}
- size_t StopTraceRecording(FuzzerRandomBase &Rand) {
+ void StopTraceRecording() {
+ if (!RecordingTraces) return;
RecordingTraces = false;
- return Mutations.size();
+ for (size_t i = 0; i < NumMutations; i++) {
+ auto &M = Mutations[i];
+ Unit U(M.Data, M.Data + M.Size);
+ if (Options.Verbosity >= 2) {
+ AutoDictUnitCounts[U]++;
+ AutoDictAdds++;
+ if ((AutoDictAdds & (AutoDictAdds - 1)) == 0) {
+ typedef std::pair<size_t, Unit> CU;
+ std::vector<CU> CountedUnits;
+ for (auto &I : AutoDictUnitCounts)
+ CountedUnits.push_back(std::make_pair(I.second, I.first));
+ std::sort(CountedUnits.begin(), CountedUnits.end(),
+ [](const CU &a, const CU &b) { return a.first > b.first; });
+ Printf("AutoDict:\n");
+ for (auto &I : CountedUnits) {
+ Printf(" %zd ", I.first);
+ PrintASCII(I.second);
+ Printf("\n");
+ }
+ }
+ }
+ USF.GetMD().AddWordToAutoDictionary(U, M.Pos);
+ }
}
- void ApplyTraceBasedMutation(size_t Idx, fuzzer::Unit *U);
+ void AddMutation(uint32_t Pos, uint32_t Size, const uint8_t *Data) {
+ if (NumMutations >= kMaxMutations) return;
+ assert(Size <= TraceBasedMutation::kMaxSize);
+ auto &M = Mutations[NumMutations++];
+ M.Pos = Pos;
+ M.Size = Size;
+ memcpy(M.Data, Data, Size);
+ }
+
+ void AddMutation(uint32_t Pos, uint32_t Size, uint64_t Data) {
+ assert(Size <= sizeof(Data));
+ AddMutation(Pos, Size, reinterpret_cast<uint8_t*>(&Data));
+ }
private:
bool IsTwoByteData(uint64_t Data) {
@@ -215,10 +260,15 @@ class TraceState {
return Signed == 0 || Signed == -1L;
}
bool RecordingTraces = false;
- std::vector<TraceBasedMutation> Mutations;
+ static const size_t kMaxMutations = 1 << 16;
+ size_t NumMutations;
+ TraceBasedMutation Mutations[kMaxMutations];
LabelRange LabelRanges[1 << (sizeof(dfsan_label) * 8)];
+ UserSuppliedFuzzer &USF;
const Fuzzer::FuzzingOptions &Options;
const Unit &CurrentUnit;
+ std::map<Unit, size_t> AutoDictUnitCounts;
+ size_t AutoDictAdds = 0;
static thread_local bool IsMyThread;
};
@@ -234,15 +284,6 @@ LabelRange TraceState::GetLabelRange(dfsan_label L) {
return LR = LabelRange::Singleton(LI);
}
-void TraceState::ApplyTraceBasedMutation(size_t Idx, fuzzer::Unit *U) {
- assert(Idx < Mutations.size());
- auto &M = Mutations[Idx];
- if (Options.Verbosity >= 3)
- Printf("TBM %zd %zd %zd\n", M.Pos, M.Size, M.Data);
- if (M.Pos + M.Size > U->size()) return;
- memcpy(U->data() + M.Pos, &M.Data, M.Size);
-}
-
void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2, dfsan_label L1,
dfsan_label L2) {
@@ -257,19 +298,39 @@ void TraceState::DFSanCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
LabelRange LR = L1 ? GetLabelRange(L1) : GetLabelRange(L2);
for (size_t Pos = LR.Beg; Pos + CmpSize <= LR.End; Pos++) {
- Mutations.push_back({Pos, CmpSize, Data});
- Mutations.push_back({Pos, CmpSize, Data + 1});
- Mutations.push_back({Pos, CmpSize, Data - 1});
+ AddMutation(Pos, CmpSize, Data);
+ AddMutation(Pos, CmpSize, Data + 1);
+ AddMutation(Pos, CmpSize, Data - 1);
}
if (CmpSize > LR.End - LR.Beg)
- Mutations.push_back({LR.Beg, (unsigned)(LR.End - LR.Beg), Data});
+ AddMutation(LR.Beg, (unsigned)(LR.End - LR.Beg), Data);
if (Options.Verbosity >= 3)
Printf("DFSanCmpCallback: PC %lx S %zd T %zd A1 %llx A2 %llx R %d L1 %d L2 "
"%d MU %zd\n",
- PC, CmpSize, CmpType, Arg1, Arg2, Res, L1, L2, Mutations.size());
+ PC, CmpSize, CmpType, Arg1, Arg2, Res, L1, L2, NumMutations);
+}
+
+void TraceState::DFSanMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2, dfsan_label L1,
+ dfsan_label L2) {
+
+ assert(ReallyHaveDFSan());
+ if (!RecordingTraces || !IsMyThread) return;
+ if (L1 == 0 && L2 == 0)
+ return; // Not actionable.
+ if (L1 != 0 && L2 != 0)
+ return; // Probably still actionable.
+
+ const uint8_t *Data = L1 ? Data2 : Data1;
+ LabelRange LR = L1 ? GetLabelRange(L1) : GetLabelRange(L2);
+ for (size_t Pos = LR.Beg; Pos + CmpSize <= LR.End; Pos++) {
+ AddMutation(Pos, CmpSize, Data);
+ if (Options.Verbosity >= 3)
+ Printf("DFSanMemcmpCallback: Pos %d Size %d\n", Pos, CmpSize);
+ }
}
void TraceState::DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits,
@@ -286,12 +347,12 @@ void TraceState::DFSanSwitchCallback(uint64_t PC, size_t ValSizeInBits,
for (size_t Pos = LR.Beg; Pos + ValSize <= LR.End; Pos++)
for (size_t i = 0; i < NumCases; i++)
- Mutations.push_back({Pos, ValSize, Cases[i]});
+ AddMutation(Pos, ValSize, Cases[i]);
if (TryShort)
for (size_t Pos = LR.Beg; Pos + 2 <= LR.End; Pos++)
for (size_t i = 0; i < NumCases; i++)
- Mutations.push_back({Pos, 2, Cases[i]});
+ AddMutation(Pos, 2, Cases[i]);
if (Options.Verbosity >= 3)
Printf("DFSanSwitchCallback: PC %lx Val %zd SZ %zd # %zd L %d: {%d, %d} "
@@ -310,10 +371,27 @@ int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
break;
size_t Pos = Cur - Beg;
assert(Pos < CurrentUnit.size());
- if (Mutations.size() > 100000U) return Res; // Just in case.
- Mutations.push_back({Pos, DataSize, DesiredData});
- Mutations.push_back({Pos, DataSize, DesiredData + 1});
- Mutations.push_back({Pos, DataSize, DesiredData - 1});
+ AddMutation(Pos, DataSize, DesiredData);
+ AddMutation(Pos, DataSize, DesiredData + 1);
+ AddMutation(Pos, DataSize, DesiredData - 1);
+ Res++;
+ }
+ return Res;
+}
+
+int TraceState::TryToAddDesiredData(const uint8_t *PresentData,
+ const uint8_t *DesiredData,
+ size_t DataSize) {
+ int Res = 0;
+ const uint8_t *Beg = CurrentUnit.data();
+ const uint8_t *End = Beg + CurrentUnit.size();
+ for (const uint8_t *Cur = Beg; Cur < End; Cur++) {
+ Cur = (uint8_t *)memmem(Cur, End - Cur, PresentData, DataSize);
+ if (!Cur)
+ break;
+ size_t Pos = Cur - Beg;
+ assert(Pos < CurrentUnit.size());
+ AddMutation(Pos, DataSize, DesiredData);
Res++;
}
return Res;
@@ -322,15 +400,31 @@ int TraceState::TryToAddDesiredData(uint64_t PresentData, uint64_t DesiredData,
void TraceState::TraceCmpCallback(uintptr_t PC, size_t CmpSize, size_t CmpType,
uint64_t Arg1, uint64_t Arg2) {
if (!RecordingTraces || !IsMyThread) return;
+ if ((CmpType == ICMP_EQ || CmpType == ICMP_NE) && Arg1 == Arg2)
+ return; // No reason to mutate.
int Added = 0;
- if (Options.Verbosity >= 3)
- Printf("TraceCmp %zd/%zd: %p %zd %zd\n", CmpSize, CmpType, PC, Arg1, Arg2);
Added += TryToAddDesiredData(Arg1, Arg2, CmpSize);
Added += TryToAddDesiredData(Arg2, Arg1, CmpSize);
if (!Added && CmpSize == 4 && IsTwoByteData(Arg1) && IsTwoByteData(Arg2)) {
Added += TryToAddDesiredData(Arg1, Arg2, 2);
Added += TryToAddDesiredData(Arg2, Arg1, 2);
}
+ if (Options.Verbosity >= 3 && Added)
+ Printf("TraceCmp %zd/%zd: %p %zd %zd\n", CmpSize, CmpType, PC, Arg1, Arg2);
+}
+
+void TraceState::TraceMemcmpCallback(size_t CmpSize, const uint8_t *Data1,
+ const uint8_t *Data2) {
+ if (!RecordingTraces || !IsMyThread) return;
+ CmpSize = std::min(CmpSize, TraceBasedMutation::kMaxSize);
+ int Added2 = TryToAddDesiredData(Data1, Data2, CmpSize);
+ int Added1 = TryToAddDesiredData(Data2, Data1, CmpSize);
+ if ((Added1 || Added2) && Options.Verbosity >= 3) {
+ Printf("MemCmp Added %d%d: ", Added1, Added2);
+ if (Added1) PrintASCII(Data1, CmpSize);
+ if (Added2) PrintASCII(Data2, CmpSize);
+ Printf("\n");
+ }
}
void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
@@ -351,7 +445,6 @@ void TraceState::TraceSwitchCallback(uintptr_t PC, size_t ValSizeInBits,
if (TryShort)
TryToAddDesiredData(Val, Cases[i], 2);
}
-
}
static TraceState *TS;
@@ -364,19 +457,14 @@ void Fuzzer::StartTraceRecording() {
TS->StartTraceRecording();
}
-size_t Fuzzer::StopTraceRecording() {
- if (!TS) return 0;
- return TS->StopTraceRecording(USF.GetRand());
-}
-
-void Fuzzer::ApplyTraceBasedMutation(size_t Idx, Unit *U) {
- assert(TS);
- TS->ApplyTraceBasedMutation(Idx, U);
+void Fuzzer::StopTraceRecording() {
+ if (!TS) return;
+ TS->StopTraceRecording();
}
void Fuzzer::InitializeTraceState() {
if (!Options.UseTraces) return;
- TS = new TraceState(Options, CurrentUnit);
+ TS = new TraceState(USF, Options, CurrentUnit);
CurrentUnit.resize(Options.MaxLen);
// The rest really requires DFSan.
if (!ReallyHaveDFSan()) return;
@@ -423,91 +511,79 @@ void dfsan_weak_hook_memcmp(void *caller_pc, const void *s1, const void *s2,
size_t n, dfsan_label s1_label,
dfsan_label s2_label, dfsan_label n_label) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(n, sizeof(S1)));
- memcpy(&S2, s2, std::min(n, sizeof(S2)));
dfsan_label L1 = dfsan_read_label(s1, n);
dfsan_label L2 = dfsan_read_label(s2, n);
- TS->DFSanCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2, L1, L2);
+ TS->DFSanMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2), L1, L2);
}
void dfsan_weak_hook_strncmp(void *caller_pc, const char *s1, const char *s2,
size_t n, dfsan_label s1_label,
dfsan_label s2_label, dfsan_label n_label) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
n = std::min(n, fuzzer::InternalStrnlen(s1, n));
n = std::min(n, fuzzer::InternalStrnlen(s2, n));
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(n, sizeof(S1)));
- memcpy(&S2, s2, std::min(n, sizeof(S2)));
dfsan_label L1 = dfsan_read_label(s1, n);
dfsan_label L2 = dfsan_read_label(s2, n);
- TS->DFSanCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2, L1, L2);
+ TS->DFSanMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2), L1, L2);
}
void dfsan_weak_hook_strcmp(void *caller_pc, const char *s1, const char *s2,
dfsan_label s1_label, dfsan_label s2_label) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
size_t Len1 = strlen(s1);
size_t Len2 = strlen(s2);
size_t N = std::min(Len1, Len2);
if (N <= 1) return; // Not interesting.
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(N, sizeof(S1)));
- memcpy(&S2, s2, std::min(N, sizeof(S2)));
dfsan_label L1 = dfsan_read_label(s1, Len1);
dfsan_label L2 = dfsan_read_label(s2, Len2);
- TS->DFSanCmpCallback(PC, N, fuzzer::ICMP_EQ, S1, S2, L1, L2);
+ TS->DFSanMemcmpCallback(N, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2), L1, L2);
}
+// We may need to avoid defining weak hooks to stay compatible with older clang.
+#ifndef LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
+# define LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS 1
+#endif
+
+#if LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
void __sanitizer_weak_hook_memcmp(void *caller_pc, const void *s1,
- const void *s2, size_t n) {
+ const void *s2, size_t n, int result) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(n, sizeof(S1)));
- memcpy(&S2, s2, std::min(n, sizeof(S2)));
- TS->TraceCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2);
+ if (result == 0) return; // No reason to mutate.
+ if (n <= 1) return; // Not interesting.
+ TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2));
}
void __sanitizer_weak_hook_strncmp(void *caller_pc, const char *s1,
- const char *s2, size_t n) {
+ const char *s2, size_t n, int result) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
+ if (result == 0) return; // No reason to mutate.
size_t Len1 = fuzzer::InternalStrnlen(s1, n);
size_t Len2 = fuzzer::InternalStrnlen(s2, n);
n = std::min(n, Len1);
n = std::min(n, Len2);
if (n <= 1) return; // Not interesting.
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(n, sizeof(S1)));
- memcpy(&S2, s2, std::min(n, sizeof(S2)));
- TS->TraceCmpCallback(PC, n, fuzzer::ICMP_EQ, S1, S2);
+ TS->TraceMemcmpCallback(n, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2));
}
void __sanitizer_weak_hook_strcmp(void *caller_pc, const char *s1,
- const char *s2) {
+ const char *s2, int result) {
if (!TS) return;
- uintptr_t PC = reinterpret_cast<uintptr_t>(caller_pc);
- uint64_t S1 = 0, S2 = 0;
+ if (result == 0) return; // No reason to mutate.
size_t Len1 = strlen(s1);
size_t Len2 = strlen(s2);
size_t N = std::min(Len1, Len2);
if (N <= 1) return; // Not interesting.
- // Simplification: handle only first 8 bytes.
- memcpy(&S1, s1, std::min(N, sizeof(S1)));
- memcpy(&S2, s2, std::min(N, sizeof(S2)));
- TS->TraceCmpCallback(PC, N, fuzzer::ICMP_EQ, S1, S2);
+ TS->TraceMemcmpCallback(N, reinterpret_cast<const uint8_t *>(s1),
+ reinterpret_cast<const uint8_t *>(s2));
}
+#endif // LLVM_FUZZER_DEFINES_SANITIZER_WEAK_HOOOKS
+
__attribute__((visibility("default")))
void __sanitizer_cov_trace_cmp(uint64_t SizeAndType, uint64_t Arg1,
uint64_t Arg2) {
diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp
index 6c1133fffd37..d7226cfce966 100644
--- a/lib/Fuzzer/FuzzerUtil.cpp
+++ b/lib/Fuzzer/FuzzerUtil.cpp
@@ -27,13 +27,26 @@ void Print(const Unit &v, const char *PrintAfter) {
Printf("%s", PrintAfter);
}
+void PrintASCIIByte(uint8_t Byte) {
+ if (Byte == '\\')
+ Printf("\\\\");
+ else if (Byte == '"')
+ Printf("\\\"");
+ else if (Byte >= 32 && Byte < 127)
+ Printf("%c", Byte);
+ else
+ Printf("\\x%02x", Byte);
+}
+
+void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter) {
+ for (size_t i = 0; i < Size; i++)
+ PrintASCIIByte(Data[i]);
+ Printf("%s", PrintAfter);
+}
+
void PrintASCII(const Unit &U, const char *PrintAfter) {
- for (auto X : U) {
- if (isprint(X))
- Printf("%c", X);
- else
- Printf("\\x%x", (unsigned)X);
- }
+ for (auto X : U)
+ PrintASCIIByte(X);
Printf("%s", PrintAfter);
}
diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp
index 8c0012708afc..b33e0c961455 100644
--- a/lib/Fuzzer/test/FuzzerUnittest.cpp
+++ b/lib/Fuzzer/test/FuzzerUnittest.cpp
@@ -247,8 +247,8 @@ void TestAddWordFromDictionary(Mutator M, int NumIter) {
MutationDispatcher MD(Rand);
uint8_t Word1[4] = {0xAA, 0xBB, 0xCC, 0xDD};
uint8_t Word2[3] = {0xFF, 0xEE, 0xEF};
- MD.AddWordToDictionary(Word1, sizeof(Word1));
- MD.AddWordToDictionary(Word2, sizeof(Word2));
+ MD.AddWordToManualDictionary(Unit(Word1, Word1 + sizeof(Word1)));
+ MD.AddWordToManualDictionary(Unit(Word2, Word2 + sizeof(Word2)));
int FoundMask = 0;
uint8_t CH0[7] = {0x00, 0x11, 0x22, 0xAA, 0xBB, 0xCC, 0xDD};
uint8_t CH1[7] = {0x00, 0x11, 0xAA, 0xBB, 0xCC, 0xDD, 0x22};
@@ -274,14 +274,41 @@ void TestAddWordFromDictionary(Mutator M, int NumIter) {
}
TEST(FuzzerMutate, AddWordFromDictionary1) {
- TestAddWordFromDictionary(&MutationDispatcher::Mutate_AddWordFromDictionary,
- 1 << 15);
+ TestAddWordFromDictionary(
+ &MutationDispatcher::Mutate_AddWordFromManualDictionary, 1 << 15);
}
TEST(FuzzerMutate, AddWordFromDictionary2) {
TestAddWordFromDictionary(&MutationDispatcher::Mutate, 1 << 15);
}
+void TestAddWordFromDictionaryWithHint(Mutator M, int NumIter) {
+ FuzzerRandomLibc Rand(0);
+ MutationDispatcher MD(Rand);
+ uint8_t Word[] = {0xAA, 0xBB, 0xCC, 0xDD, 0xFF, 0xEE, 0xEF};
+ size_t PosHint = 7777;
+ MD.AddWordToAutoDictionary(Unit(Word, Word + sizeof(Word)), PosHint);
+ int FoundMask = 0;
+ for (int i = 0; i < NumIter; i++) {
+ uint8_t T[10000];
+ memset(T, 0, sizeof(T));
+ size_t NewSize = (MD.*M)(T, 9000, 10000);
+ if (NewSize >= PosHint + sizeof(Word) &&
+ !memcmp(Word, T + PosHint, sizeof(Word)))
+ FoundMask = 1;
+ }
+ EXPECT_EQ(FoundMask, 1);
+}
+
+TEST(FuzzerMutate, AddWordFromDictionaryWithHint1) {
+ TestAddWordFromDictionaryWithHint(
+ &MutationDispatcher::Mutate_AddWordFromAutoDictionary, 1 << 5);
+}
+
+TEST(FuzzerMutate, AddWordFromDictionaryWithHint2) {
+ TestAddWordFromDictionaryWithHint(&MutationDispatcher::Mutate, 1 << 10);
+}
+
void TestChangeASCIIInteger(Mutator M, int NumIter) {
FuzzerRandomLibc Rand(0);
MutationDispatcher MD(Rand);
diff --git a/lib/Fuzzer/test/MemcmpTest.cpp b/lib/Fuzzer/test/MemcmpTest.cpp
index 47ce59e0d8f5..c19c95717bbb 100644
--- a/lib/Fuzzer/test/MemcmpTest.cpp
+++ b/lib/Fuzzer/test/MemcmpTest.cpp
@@ -10,8 +10,16 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
if (Size >= 12 && memcmp(Data + 8, "ABCD", 4) == 0) {
if (Size >= 14 && memcmp(Data + 12, "XY", 2) == 0) {
if (Size >= 16 && memcmp(Data + 14, "KLM", 3) == 0) {
- fprintf(stderr, "BINGO\n");
- exit(1);
+ if (Size >= 27 && memcmp(Data + 17, "ABCDE-GHIJ", 10) == 0){
+ fprintf(stderr, "BINGO %zd\n", Size);
+ for (size_t i = 0; i < Size; i++) {
+ uint8_t C = Data[i];
+ if (C >= 32 && C < 127)
+ fprintf(stderr, "%c", C);
+ }
+ fprintf(stderr, "\n");
+ exit(1);
+ }
}
}
}
diff --git a/lib/Fuzzer/test/fuzzer-dfsan.test b/lib/Fuzzer/test/fuzzer-dfsan.test
index 982f143669d0..567086ed65af 100644
--- a/lib/Fuzzer/test/fuzzer-dfsan.test
+++ b/lib/Fuzzer/test/fuzzer-dfsan.test
@@ -5,18 +5,19 @@ CHECK4: BINGO
CHECK_DFSanCmpCallback: DFSanCmpCallback: PC
CHECK_DFSanSwitchCallback: DFSanSwitchCallback: PC
+CHECK_DFSanMemcmpCallback: DFSanMemcmpCallback: Pos
RUN: not LLVMFuzzer-SimpleCmpTest-DFSan -use_traces=1 -seed=1 -runs=1000000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK1
RUN: LLVMFuzzer-SimpleCmpTest-DFSan -use_traces=1 -seed=1 -runs=100 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanCmpCallback
RUN: not LLVMFuzzer-MemcmpTest-DFSan -use_traces=1 -seed=1 -runs=10000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK2
-RUN: LLVMFuzzer-MemcmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanCmpCallback
+RUN: LLVMFuzzer-MemcmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanMemcmpCallback
RUN: not LLVMFuzzer-StrncmpTest-DFSan -use_traces=1 -seed=1 -runs=10000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK3
-RUN: LLVMFuzzer-StrncmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanCmpCallback
+RUN: LLVMFuzzer-StrncmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanMemcmpCallback
RUN: not LLVMFuzzer-StrcmpTest-DFSan -use_traces=1 -seed=1 -runs=10000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK3
-RUN: LLVMFuzzer-StrcmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanCmpCallback
+RUN: LLVMFuzzer-StrcmpTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanMemcmpCallback
RUN: not LLVMFuzzer-SwitchTest-DFSan -use_traces=1 -seed=1 -runs=100000 -timeout=5 2>&1 | FileCheck %s --check-prefix=CHECK4
RUN: LLVMFuzzer-SwitchTest-DFSan -use_traces=1 -seed=1 -runs=2 -timeout=5 -verbosity=3 2>&1 | FileCheck %s -check-prefix=CHECK_DFSanSwitchCallback
diff --git a/lib/Fuzzer/test/fuzzer-dict.test b/lib/Fuzzer/test/fuzzer-dict.test
new file mode 100644
index 000000000000..dec002f6a377
--- /dev/null
+++ b/lib/Fuzzer/test/fuzzer-dict.test
@@ -0,0 +1,6 @@
+CHECK: BINGO
+Done1000000: Done 1000000 runs in
+
+RUN: not LLVMFuzzer-SimpleDictionaryTest -dict=%S/dict1.txt -seed=1 -runs=1000003 2>&1 | FileCheck %s
+RUN: LLVMFuzzer-SimpleDictionaryTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
+
diff --git a/lib/Fuzzer/test/fuzzer-traces.test b/lib/Fuzzer/test/fuzzer-traces.test
index 084cf30d6982..3b8639b8e941 100644
--- a/lib/Fuzzer/test/fuzzer-traces.test
+++ b/lib/Fuzzer/test/fuzzer-traces.test
@@ -1,7 +1,8 @@
CHECK: BINGO
Done1000000: Done 1000000 runs in
+Done10000000: Done 10000000 runs in
-RUN: not LLVMFuzzer-SimpleCmpTest -use_traces=1 -seed=1 -runs=1000001 2>&1 | FileCheck %s
+RUN: not LLVMFuzzer-SimpleCmpTest -use_traces=1 -seed=1 -runs=10000001 2>&1 | FileCheck %s
RUN: not LLVMFuzzer-MemcmpTest -use_traces=1 -seed=4294967295 -runs=100000 2>&1 | FileCheck %s
RUN: LLVMFuzzer-MemcmpTest -seed=4294967295 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
@@ -15,5 +16,5 @@ RUN: LLVMFuzzer-StrcmpTest -seed=1 -runs=1000000 2>&1 | FileC
RUN: not LLVMFuzzer-SwitchTest -use_traces=1 -seed=1 -runs=1000002 2>&1 | FileCheck %s
RUN: LLVMFuzzer-SwitchTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
-RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=1 -runs=100000 2>&1 | FileCheck %s
-RUN: LLVMFuzzer-SimpleHashTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
+RUN: not LLVMFuzzer-SimpleHashTest -use_traces=1 -seed=1 -runs=10000000 2>&1 | FileCheck %s
+RUN: LLVMFuzzer-SimpleHashTest -seed=1 -runs=10000000 2>&1 | FileCheck %s --check-prefix=Done10000000
diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test
index 150fc7202b00..c63014f59d62 100644
--- a/lib/Fuzzer/test/fuzzer.test
+++ b/lib/Fuzzer/test/fuzzer.test
@@ -20,14 +20,12 @@ NullDerefTestExactPath: Test unit written to FOOBAR
RUN: not LLVMFuzzer-CounterTest -use_counters=1 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s
RUN: not LLVMFuzzer-CallerCalleeTest -cross_over=0 -max_len=6 -seed=1 -timeout=15 2>&1 | FileCheck %s
-RUN: LLVMFuzzer-CallerCalleeTest -use_indir_calls=0 -cross_over=0 -max_len=6 -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
+# This one is flaky, may actually find the goal even w/o use_indir_calls.
+# LLVMFuzzer-CallerCalleeTest -use_indir_calls=0 -cross_over=0 -max_len=6 -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
RUN: not LLVMFuzzer-UserSuppliedFuzzerTest -seed=1 -timeout=15 2>&1 | FileCheck %s
-RUN: not LLVMFuzzer-SimpleDictionaryTest -dict=%S/dict1.txt -seed=1 -runs=1000003 2>&1 | FileCheck %s
-RUN: LLVMFuzzer-SimpleDictionaryTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000
-
RUN: not LLVMFuzzer-UninstrumentedTest-Uninstrumented 2>&1 | FileCheck %s --check-prefix=UNINSTRUMENTED
UNINSTRUMENTED: ERROR: __sanitizer_set_death_callback is not defined. Exiting.
diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp
index 1ebe9b7ee5bc..0ce44e105cc3 100644
--- a/lib/IR/AsmWriter.cpp
+++ b/lib/IR/AsmWriter.cpp
@@ -3121,7 +3121,7 @@ void AssemblyWriter::printMetadataAttachments(
return;
if (MDNames.empty())
- TheModule->getMDKindNames(MDNames);
+ MDs[0].second->getContext().getMDKindNames(MDNames);
for (const auto &I : MDs) {
unsigned Kind = I.first;
diff --git a/lib/IR/Core.cpp b/lib/IR/Core.cpp
index 7f39c8085a69..591dafa22a4b 100644
--- a/lib/IR/Core.cpp
+++ b/lib/IR/Core.cpp
@@ -1722,7 +1722,7 @@ void LLVMSetFunctionCallConv(LLVMValueRef Fn, unsigned CC) {
const char *LLVMGetGC(LLVMValueRef Fn) {
Function *F = unwrap<Function>(Fn);
- return F->hasGC()? F->getGC() : nullptr;
+ return F->hasGC()? F->getGC().c_str() : nullptr;
}
void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
diff --git a/lib/IR/Function.cpp b/lib/IR/Function.cpp
index cfb40b19c733..cfdfc40cd8aa 100644
--- a/lib/IR/Function.cpp
+++ b/lib/IR/Function.cpp
@@ -366,47 +366,21 @@ void Function::addDereferenceableOrNullAttr(unsigned i, uint64_t Bytes) {
setAttributes(PAL);
}
-// Maintain the GC name for each function in an on-the-side table. This saves
-// allocating an additional word in Function for programs which do not use GC
-// (i.e., most programs) at the cost of increased overhead for clients which do
-// use GC.
-static DenseMap<const Function*,PooledStringPtr> *GCNames;
-static StringPool *GCNamePool;
-static ManagedStatic<sys::SmartRWMutex<true> > GCLock;
-
-bool Function::hasGC() const {
- sys::SmartScopedReader<true> Reader(*GCLock);
- return GCNames && GCNames->count(this);
-}
-
-const char *Function::getGC() const {
+const std::string &Function::getGC() const {
assert(hasGC() && "Function has no collector");
- sys::SmartScopedReader<true> Reader(*GCLock);
- return *(*GCNames)[this];
+ return getContext().getGC(*this);
}
-void Function::setGC(const char *Str) {
- sys::SmartScopedWriter<true> Writer(*GCLock);
- if (!GCNamePool)
- GCNamePool = new StringPool();
- if (!GCNames)
- GCNames = new DenseMap<const Function*,PooledStringPtr>();
- (*GCNames)[this] = GCNamePool->intern(Str);
+void Function::setGC(const std::string Str) {
+ setValueSubclassDataBit(14, !Str.empty());
+ getContext().setGC(*this, std::move(Str));
}
void Function::clearGC() {
- sys::SmartScopedWriter<true> Writer(*GCLock);
- if (GCNames) {
- GCNames->erase(this);
- if (GCNames->empty()) {
- delete GCNames;
- GCNames = nullptr;
- if (GCNamePool->empty()) {
- delete GCNamePool;
- GCNamePool = nullptr;
- }
- }
- }
+ if (!hasGC())
+ return;
+ getContext().deleteGC(*this);
+ setValueSubclassDataBit(14, false);
}
/// Copy all additional attributes (those not needed to create a Function) from
diff --git a/lib/IR/IRPrintingPasses.cpp b/lib/IR/IRPrintingPasses.cpp
index c1ac336c1fbf..822dbeb08b33 100644
--- a/lib/IR/IRPrintingPasses.cpp
+++ b/lib/IR/IRPrintingPasses.cpp
@@ -28,7 +28,13 @@ PrintModulePass::PrintModulePass(raw_ostream &OS, const std::string &Banner,
PreservedAnalyses PrintModulePass::run(Module &M) {
OS << Banner;
- M.print(OS, nullptr, ShouldPreserveUseListOrder);
+ if (llvm::isFunctionInPrintList("*"))
+ M.print(OS, nullptr, ShouldPreserveUseListOrder);
+ else {
+ for(const auto &F : M.functions())
+ if (llvm::isFunctionInPrintList(F.getName()))
+ F.print(OS);
+ }
return PreservedAnalyses::all();
}
@@ -37,7 +43,8 @@ PrintFunctionPass::PrintFunctionPass(raw_ostream &OS, const std::string &Banner)
: OS(OS), Banner(Banner) {}
PreservedAnalyses PrintFunctionPass::run(Function &F) {
- OS << Banner << static_cast<Value &>(F);
+ if (isFunctionInPrintList(F.getName()))
+ OS << Banner << static_cast<Value &>(F);
return PreservedAnalyses::all();
}
diff --git a/lib/IR/LLVMContext.cpp b/lib/IR/LLVMContext.cpp
index 8848bcb71477..48b53b0f532a 100644
--- a/lib/IR/LLVMContext.cpp
+++ b/lib/IR/LLVMContext.cpp
@@ -304,3 +304,19 @@ void LLVMContext::getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const {
uint32_t LLVMContext::getOperandBundleTagID(StringRef Tag) const {
return pImpl->getOperandBundleTagID(Tag);
}
+
+void LLVMContext::setGC(const Function &Fn, std::string GCName) {
+ auto It = pImpl->GCNames.find(&Fn);
+
+ if (It == pImpl->GCNames.end()) {
+ pImpl->GCNames.insert(std::make_pair(&Fn, std::move(GCName)));
+ return;
+ }
+ It->second = std::move(GCName);
+}
+const std::string &LLVMContext::getGC(const Function &Fn) {
+ return pImpl->GCNames[&Fn];
+}
+void LLVMContext::deleteGC(const Function &Fn) {
+ pImpl->GCNames.erase(&Fn);
+}
diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h
index a24114d0a0ae..d42047d4e775 100644
--- a/lib/IR/LLVMContextImpl.h
+++ b/lib/IR/LLVMContextImpl.h
@@ -1027,6 +1027,13 @@ public:
void getOperandBundleTags(SmallVectorImpl<StringRef> &Tags) const;
uint32_t getOperandBundleTagID(StringRef Tag) const;
+ /// Maintain the GC name for each function.
+ ///
+ /// This saves allocating an additional word in Function for programs which
+ /// do not use GC (i.e., most programs) at the cost of increased overhead for
+ /// clients which do use GC.
+ DenseMap<const Function*, std::string> GCNames;
+
LLVMContextImpl(LLVMContext &C);
~LLVMContextImpl();
diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp
index f2e0c7d32c02..63d89f21b350 100644
--- a/lib/IR/LegacyPassManager.cpp
+++ b/lib/IR/LegacyPassManager.cpp
@@ -28,6 +28,7 @@
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <map>
+#include <unordered_set>
using namespace llvm;
using namespace llvm::legacy;
@@ -83,6 +84,13 @@ PrintAfterAll("print-after-all",
llvm::cl::desc("Print IR after each pass"),
cl::init(false));
+static cl::list<std::string>
+ PrintFuncsList("filter-print-funcs", cl::value_desc("function names"),
+ cl::desc("Only print IR for functions whose name "
+ "match this for all print-[before|after][-all] "
+ "options"),
+ cl::CommaSeparated);
+
/// This is a helper to determine whether to print IR before or
/// after a pass.
@@ -109,6 +117,11 @@ static bool ShouldPrintAfterPass(const PassInfo *PI) {
return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter);
}
+bool llvm::isFunctionInPrintList(StringRef FunctionName) {
+ static std::unordered_set<std::string> PrintFuncNames(PrintFuncsList.begin(),
+ PrintFuncsList.end());
+ return PrintFuncNames.empty() || PrintFuncNames.count(FunctionName);
+}
/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions
/// or higher is specified.
bool PMDataManager::isPassDebuggingExecutionsOrMore() const {
diff --git a/lib/IR/Metadata.cpp b/lib/IR/Metadata.cpp
index d8eaceb9ea2b..9a9a5017841c 100644
--- a/lib/IR/Metadata.cpp
+++ b/lib/IR/Metadata.cpp
@@ -557,7 +557,7 @@ void MDNode::decrementUnresolvedOperandCount() {
resolve();
}
-void MDNode::resolveCycles(bool AllowTemps) {
+void MDNode::resolveRecursivelyImpl(bool AllowTemps) {
if (isResolved())
return;
diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp
index 6dfb05d94491..9198b0e1fb58 100644
--- a/lib/IR/Verifier.cpp
+++ b/lib/IR/Verifier.cpp
@@ -45,6 +45,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/IR/Verifier.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -145,6 +146,11 @@ private:
OS << *C;
}
+ template <typename T> void Write(ArrayRef<T> Vs) {
+ for (const T &V : Vs)
+ Write(V);
+ }
+
template <typename T1, typename... Ts>
void WriteTs(const T1 &V1, const Ts &... Vs) {
Write(V1);
@@ -204,6 +210,10 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
/// given function and the largest index passed to llvm.localrecover.
DenseMap<Function *, std::pair<unsigned, unsigned>> FrameEscapeInfo;
+ // Maps catchswitches and cleanuppads that unwind to siblings to the
+ // terminators that indicate the unwind, used to detect cycles therein.
+ MapVector<Instruction *, TerminatorInst *> SiblingFuncletInfo;
+
/// Cache of constants visited in search of ConstantExprs.
SmallPtrSet<const Constant *, 32> ConstantExprVisited;
@@ -245,9 +255,11 @@ public:
Broken = false;
// FIXME: We strip const here because the inst visitor strips const.
visit(const_cast<Function &>(F));
+ verifySiblingFuncletUnwinds();
InstsInThisBlock.clear();
LandingPadResultTy = nullptr;
SawFrameEscape = false;
+ SiblingFuncletInfo.clear();
return !Broken;
}
@@ -403,6 +415,7 @@ private:
void visitCatchPadInst(CatchPadInst &CPI);
void visitCatchReturnInst(CatchReturnInst &CatchReturn);
void visitCleanupPadInst(CleanupPadInst &CPI);
+ void visitFuncletPadInst(FuncletPadInst &FPI);
void visitCatchSwitchInst(CatchSwitchInst &CatchSwitch);
void visitCleanupReturnInst(CleanupReturnInst &CRI);
@@ -428,6 +441,7 @@ private:
void visitConstantExpr(const ConstantExpr *CE);
void VerifyStatepoint(ImmutableCallSite CS);
void verifyFrameRecoverIndices();
+ void verifySiblingFuncletUnwinds();
// Module-level debug info verification...
void verifyTypeRefs();
@@ -984,6 +998,9 @@ void Verifier::visitDIMacro(const DIMacro &N) {
N.getMacinfoType() == dwarf::DW_MACINFO_undef,
"invalid macinfo type", &N);
Assert(!N.getName().empty(), "anonymous macro", &N);
+ if (!N.getValue().empty()) {
+ assert(N.getValue().data()[0] != ' ' && "Macro value has a space prefix");
+ }
}
void Verifier::visitDIMacroFile(const DIMacroFile &N) {
@@ -1693,6 +1710,59 @@ void Verifier::verifyFrameRecoverIndices() {
}
}
+static Instruction *getSuccPad(TerminatorInst *Terminator) {
+ BasicBlock *UnwindDest;
+ if (auto *II = dyn_cast<InvokeInst>(Terminator))
+ UnwindDest = II->getUnwindDest();
+ else if (auto *CSI = dyn_cast<CatchSwitchInst>(Terminator))
+ UnwindDest = CSI->getUnwindDest();
+ else
+ UnwindDest = cast<CleanupReturnInst>(Terminator)->getUnwindDest();
+ return UnwindDest->getFirstNonPHI();
+}
+
+void Verifier::verifySiblingFuncletUnwinds() {
+ SmallPtrSet<Instruction *, 8> Visited;
+ SmallPtrSet<Instruction *, 8> Active;
+ for (const auto &Pair : SiblingFuncletInfo) {
+ Instruction *PredPad = Pair.first;
+ if (Visited.count(PredPad))
+ continue;
+ Active.insert(PredPad);
+ TerminatorInst *Terminator = Pair.second;
+ do {
+ Instruction *SuccPad = getSuccPad(Terminator);
+ if (Active.count(SuccPad)) {
+ // Found a cycle; report error
+ Instruction *CyclePad = SuccPad;
+ SmallVector<Instruction *, 8> CycleNodes;
+ do {
+ CycleNodes.push_back(CyclePad);
+ TerminatorInst *CycleTerminator = SiblingFuncletInfo[CyclePad];
+ if (CycleTerminator != CyclePad)
+ CycleNodes.push_back(CycleTerminator);
+ CyclePad = getSuccPad(CycleTerminator);
+ } while (CyclePad != SuccPad);
+ Assert(false, "EH pads can't handle each other's exceptions",
+ ArrayRef<Instruction *>(CycleNodes));
+ }
+ // Don't re-walk a node we've already checked
+ if (!Visited.insert(SuccPad).second)
+ break;
+ // Walk to this successor if it has a map entry.
+ PredPad = SuccPad;
+ auto TermI = SiblingFuncletInfo.find(PredPad);
+ if (TermI == SiblingFuncletInfo.end())
+ break;
+ Terminator = TermI->second;
+ Active.insert(PredPad);
+ } while (true);
+ // Each node only has one successor, so we've walked all the active
+ // nodes' successors.
+ Active.clear();
+ }
+}
+
// visitFunction - Verify that a function is ok.
//
void Verifier::visitFunction(const Function &F) {
@@ -2892,6 +2962,13 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) {
visitInstruction(IVI);
}
+static Value *getParentPad(Value *EHPad) {
+ if (auto *FPI = dyn_cast<FuncletPadInst>(EHPad))
+ return FPI->getParentPad();
+
+ return cast<CatchSwitchInst>(EHPad)->getParentPad();
+}
+
void Verifier::visitEHPadPredecessors(Instruction &I) {
assert(I.isEHPad());
@@ -2919,16 +2996,45 @@ void Verifier::visitEHPadPredecessors(Instruction &I) {
"Block containg CatchPadInst must be jumped to "
"only by its catchswitch.",
CPI);
+ Assert(BB != CPI->getCatchSwitch()->getUnwindDest(),
+ "Catchswitch cannot unwind to one of its catchpads",
+ CPI->getCatchSwitch(), CPI);
return;
}
+ // Verify that each pred has a legal terminator with a legal to/from EH
+ // pad relationship.
+ Instruction *ToPad = &I;
+ Value *ToPadParent = getParentPad(ToPad);
for (BasicBlock *PredBB : predecessors(BB)) {
TerminatorInst *TI = PredBB->getTerminator();
+ Value *FromPad;
if (auto *II = dyn_cast<InvokeInst>(TI)) {
Assert(II->getUnwindDest() == BB && II->getNormalDest() != BB,
- "EH pad must be jumped to via an unwind edge", &I, II);
- } else if (!isa<CleanupReturnInst>(TI) && !isa<CatchSwitchInst>(TI)) {
- Assert(false, "EH pad must be jumped to via an unwind edge", &I, TI);
+ "EH pad must be jumped to via an unwind edge", ToPad, II);
+ if (auto Bundle = II->getOperandBundle(LLVMContext::OB_funclet))
+ FromPad = Bundle->Inputs[0];
+ else
+ FromPad = ConstantTokenNone::get(II->getContext());
+ } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
+ FromPad = CRI->getCleanupPad();
+ Assert(FromPad != ToPadParent, "A cleanupret must exit its cleanup", CRI);
+ } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
+ FromPad = CSI;
+ } else {
+ Assert(false, "EH pad must be jumped to via an unwind edge", ToPad, TI);
+ }
+
+ // The edge may exit from zero or more nested pads.
+ for (;; FromPad = getParentPad(FromPad)) {
+ Assert(FromPad != ToPad,
+ "EH pad cannot handle exceptions raised within it", FromPad, TI);
+ if (FromPad == ToPadParent) {
+ // This is a legal unwind edge.
+ break;
+ }
+ Assert(!isa<ConstantTokenNone>(FromPad),
+ "A single unwind edge may only enter one EH pad", TI);
}
}
}
@@ -2992,7 +3098,7 @@ void Verifier::visitCatchPadInst(CatchPadInst &CPI) {
Assert(BB->getFirstNonPHI() == &CPI,
"CatchPadInst not the first non-PHI instruction in the block.", &CPI);
- visitInstruction(CPI);
+ visitFuncletPadInst(CPI);
}
void Verifier::visitCatchReturnInst(CatchReturnInst &CatchReturn) {
@@ -3022,33 +3128,160 @@ void Verifier::visitCleanupPadInst(CleanupPadInst &CPI) {
Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
"CleanupPadInst has an invalid parent.", &CPI);
+ visitFuncletPadInst(CPI);
+}
+
+void Verifier::visitFuncletPadInst(FuncletPadInst &FPI) {
User *FirstUser = nullptr;
- BasicBlock *FirstUnwindDest = nullptr;
- for (User *U : CPI.users()) {
- BasicBlock *UnwindDest;
- if (CleanupReturnInst *CRI = dyn_cast<CleanupReturnInst>(U)) {
- UnwindDest = CRI->getUnwindDest();
- } else if (isa<CleanupPadInst>(U) || isa<CatchSwitchInst>(U)) {
- continue;
- } else if (CallSite(U)) {
- continue;
- } else {
- Assert(false, "bogus cleanuppad use", &CPI);
+ Value *FirstUnwindPad = nullptr;
+ SmallVector<FuncletPadInst *, 8> Worklist({&FPI});
+ while (!Worklist.empty()) {
+ FuncletPadInst *CurrentPad = Worklist.pop_back_val();
+ Value *UnresolvedAncestorPad = nullptr;
+ for (User *U : CurrentPad->users()) {
+ BasicBlock *UnwindDest;
+ if (auto *CRI = dyn_cast<CleanupReturnInst>(U)) {
+ UnwindDest = CRI->getUnwindDest();
+ } else if (auto *CSI = dyn_cast<CatchSwitchInst>(U)) {
+ // We allow catchswitch unwind to caller to nest
+ // within an outer pad that unwinds somewhere else,
+ // because catchswitch doesn't have a nounwind variant.
+ // See e.g. SimplifyCFGOpt::SimplifyUnreachable.
+ if (CSI->unwindsToCaller())
+ continue;
+ UnwindDest = CSI->getUnwindDest();
+ } else if (auto *II = dyn_cast<InvokeInst>(U)) {
+ UnwindDest = II->getUnwindDest();
+ } else if (isa<CallInst>(U)) {
+ // Calls which don't unwind may be found inside funclet
+ // pads that unwind somewhere else. We don't *require*
+ // such calls to be annotated nounwind.
+ continue;
+ } else if (auto *CPI = dyn_cast<CleanupPadInst>(U)) {
+ // The unwind dest for a cleanup can only be found by
+ // recursive search. Add it to the worklist, and we'll
+ // search for its first use that determines where it unwinds.
+ Worklist.push_back(CPI);
+ continue;
+ } else {
+ Assert(isa<CatchReturnInst>(U), "Bogus funclet pad use", U);
+ continue;
+ }
+
+ Value *UnwindPad;
+ bool ExitsFPI;
+ if (UnwindDest) {
+ UnwindPad = UnwindDest->getFirstNonPHI();
+ Value *UnwindParent = getParentPad(UnwindPad);
+ // Ignore unwind edges that don't exit CurrentPad.
+ if (UnwindParent == CurrentPad)
+ continue;
+ // Determine whether the original funclet pad is exited,
+ // and if we are scanning nested pads determine how many
+ // of them are exited so we can stop searching their
+ // children.
+ Value *ExitedPad = CurrentPad;
+ ExitsFPI = false;
+ do {
+ if (ExitedPad == &FPI) {
+ ExitsFPI = true;
+ // Now we can resolve any ancestors of CurrentPad up to
+ // FPI, but not including FPI since we need to make sure
+ // to check all direct users of FPI for consistency.
+ UnresolvedAncestorPad = &FPI;
+ break;
+ }
+ Value *ExitedParent = getParentPad(ExitedPad);
+ if (ExitedParent == UnwindParent) {
+ // ExitedPad is the ancestor-most pad which this unwind
+ // edge exits, so we can resolve up to it, meaning that
+ // ExitedParent is the first ancestor still unresolved.
+ UnresolvedAncestorPad = ExitedParent;
+ break;
+ }
+ ExitedPad = ExitedParent;
+ } while (!isa<ConstantTokenNone>(ExitedPad));
+ } else {
+ // Unwinding to caller exits all pads.
+ UnwindPad = ConstantTokenNone::get(FPI.getContext());
+ ExitsFPI = true;
+ UnresolvedAncestorPad = &FPI;
+ }
+
+ if (ExitsFPI) {
+ // This unwind edge exits FPI. Make sure it agrees with other
+ // such edges.
+ if (FirstUser) {
+ Assert(UnwindPad == FirstUnwindPad, "Unwind edges out of a funclet "
+ "pad must have the same unwind "
+ "dest",
+ &FPI, U, FirstUser);
+ } else {
+ FirstUser = U;
+ FirstUnwindPad = UnwindPad;
+ // Record cleanup sibling unwinds for verifySiblingFuncletUnwinds
+ if (isa<CleanupPadInst>(&FPI) && !isa<ConstantTokenNone>(UnwindPad) &&
+ getParentPad(UnwindPad) == getParentPad(&FPI))
+ SiblingFuncletInfo[&FPI] = cast<TerminatorInst>(U);
+ }
+ }
+ // Make sure we visit all uses of FPI, but for nested pads stop as
+ // soon as we know where they unwind to.
+ if (CurrentPad != &FPI)
+ break;
}
+ if (UnresolvedAncestorPad) {
+ if (CurrentPad == UnresolvedAncestorPad) {
+ // When CurrentPad is FPI itself, we don't mark it as resolved even if
+ // we've found an unwind edge that exits it, because we need to verify
+ // all direct uses of FPI.
+ assert(CurrentPad == &FPI);
+ continue;
+ }
+ // Pop off the worklist any nested pads that we've found an unwind
+ // destination for. The pads on the worklist are the uncles,
+ // great-uncles, etc. of CurrentPad. We've found an unwind destination
+ // for all ancestors of CurrentPad up to but not including
+ // UnresolvedAncestorPad.
+ Value *ResolvedPad = CurrentPad;
+ while (!Worklist.empty()) {
+ Value *UnclePad = Worklist.back();
+ Value *AncestorPad = getParentPad(UnclePad);
+ // Walk ResolvedPad up the ancestor list until we either find the
+ // uncle's parent or the last resolved ancestor.
+ while (ResolvedPad != AncestorPad) {
+ Value *ResolvedParent = getParentPad(ResolvedPad);
+ if (ResolvedParent == UnresolvedAncestorPad) {
+ break;
+ }
+ ResolvedPad = ResolvedParent;
+ }
+ // If the resolved ancestor search didn't find the uncle's parent,
+ // then the uncle is not yet resolved.
+ if (ResolvedPad != AncestorPad)
+ break;
+ // This uncle is resolved, so pop it from the worklist.
+ Worklist.pop_back();
+ }
+ }
+ }
- if (!FirstUser) {
- FirstUser = U;
- FirstUnwindDest = UnwindDest;
- } else {
- Assert(
- UnwindDest == FirstUnwindDest,
- "cleanupret instructions from the same cleanuppad must have the same "
- "unwind destination",
- FirstUser, U);
+ if (FirstUnwindPad) {
+ if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(FPI.getParentPad())) {
+ BasicBlock *SwitchUnwindDest = CatchSwitch->getUnwindDest();
+ Value *SwitchUnwindPad;
+ if (SwitchUnwindDest)
+ SwitchUnwindPad = SwitchUnwindDest->getFirstNonPHI();
+ else
+ SwitchUnwindPad = ConstantTokenNone::get(FPI.getContext());
+ Assert(SwitchUnwindPad == FirstUnwindPad,
+ "Unwind edges out of a catch must have the same unwind dest as "
+ "the parent catchswitch",
+ &FPI, FirstUser, CatchSwitch);
}
}
- visitInstruction(CPI);
+ visitInstruction(FPI);
}
void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
@@ -3067,17 +3300,21 @@ void Verifier::visitCatchSwitchInst(CatchSwitchInst &CatchSwitch) {
"CatchSwitchInst not the first non-PHI instruction in the block.",
&CatchSwitch);
+ auto *ParentPad = CatchSwitch.getParentPad();
+ Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
+ "CatchSwitchInst has an invalid parent.", ParentPad);
+
if (BasicBlock *UnwindDest = CatchSwitch.getUnwindDest()) {
Instruction *I = UnwindDest->getFirstNonPHI();
Assert(I->isEHPad() && !isa<LandingPadInst>(I),
"CatchSwitchInst must unwind to an EH block which is not a "
"landingpad.",
&CatchSwitch);
- }
- auto *ParentPad = CatchSwitch.getParentPad();
- Assert(isa<ConstantTokenNone>(ParentPad) || isa<FuncletPadInst>(ParentPad),
- "CatchSwitchInst has an invalid parent.", ParentPad);
+ // Record catchswitch sibling unwinds for verifySiblingFuncletUnwinds
+ if (getParentPad(I) == ParentPad)
+ SiblingFuncletInfo[&CatchSwitch] = &CatchSwitch;
+ }
Assert(CatchSwitch.getNumHandlers() != 0,
"CatchSwitchInst cannot have empty handler list", &CatchSwitch);
@@ -3652,6 +3889,9 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
case Intrinsic::experimental_gc_relocate: {
Assert(CS.getNumArgOperands() == 3, "wrong number of arguments", CS);
+ Assert(isa<PointerType>(CS.getType()->getScalarType()),
+ "gc.relocate must return a pointer or a vector of pointers", CS);
+
// Check that this relocate is correctly tied to the statepoint
// This is case for relocate on the unwinding path of an invoke statepoint
@@ -3734,17 +3974,20 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) {
"'gc parameters' section of the statepoint call",
CS);
- // Relocated value must be a pointer type, but gc_relocate does not need to return the
- // same pointer type as the relocated pointer. It can be casted to the correct type later
- // if it's desired. However, they must have the same address space.
+ // Relocated value must be either a pointer type or vector-of-pointer type,
+ // but gc_relocate does not need to return the same pointer type as the
+ // relocated pointer. It can be casted to the correct type later if it's
+ // desired. However, they must have the same address space and 'vectorness'
GCRelocateInst &Relocate = cast<GCRelocateInst>(*CS.getInstruction());
- Assert(Relocate.getDerivedPtr()->getType()->isPointerTy(),
+ Assert(Relocate.getDerivedPtr()->getType()->getScalarType()->isPointerTy(),
"gc.relocate: relocated value must be a gc pointer", CS);
- // gc_relocate return type must be a pointer type, and is verified earlier in
- // VerifyIntrinsicType().
- Assert(cast<PointerType>(CS.getType())->getAddressSpace() ==
- cast<PointerType>(Relocate.getDerivedPtr()->getType())->getAddressSpace(),
+ auto ResultType = CS.getType();
+ auto DerivedType = Relocate.getDerivedPtr()->getType();
+ Assert(ResultType->isVectorTy() == DerivedType->isVectorTy(),
+ "gc.relocate: vector relocates to vector and pointer to pointer", CS);
+ Assert(ResultType->getPointerAddressSpace() ==
+ DerivedType->getPointerAddressSpace(),
"gc.relocate: relocating a pointer shouldn't change its address space", CS);
break;
}
diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp
index 6baaaa4b1395..66df23bab1b5 100644
--- a/lib/LTO/LTOCodeGenerator.cpp
+++ b/lib/LTO/LTOCodeGenerator.cpp
@@ -92,7 +92,8 @@ void LTOCodeGenerator::initializeLTOPasses() {
initializeSROALegacyPassPass(R);
initializeSROA_DTPass(R);
initializeSROA_SSAUpPass(R);
- initializeFunctionAttrsPass(R);
+ initializePostOrderFunctionAttrsPass(R);
+ initializeReversePostOrderFunctionAttrsPass(R);
initializeGlobalsAAWrapperPassPass(R);
initializeLICMPass(R);
initializeMergedLoadStoreMotionPass(R);
diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp
index 309690f61d74..8dd59f9e0e3e 100644
--- a/lib/Linker/IRMover.cpp
+++ b/lib/Linker/IRMover.cpp
@@ -773,6 +773,16 @@ GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV,
NewGV->setLinkage(GlobalValue::ExternalWeakLinkage);
NewGV->copyAttributesFrom(SGV);
+
+ // Remove these copied constants in case this stays a declaration, since
+ // they point to the source module. If the def is linked the values will
+ // be mapped in during linkFunctionBody.
+ if (auto *NewF = dyn_cast<Function>(NewGV)) {
+ NewF->setPersonalityFn(nullptr);
+ NewF->setPrefixData(nullptr);
+ NewF->setPrologueData(nullptr);
+ }
+
return NewGV;
}
@@ -1211,6 +1221,18 @@ void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) {
for (unsigned I = 0, E = CompileUnits->getNumOperands(); I != E; ++I) {
auto *CU = cast<DICompileUnit>(CompileUnits->getOperand(I));
assert(CU && "Expected valid compile unit");
+ // Ensure that we don't remove subprograms referenced by DIImportedEntity.
+ // It is not legal to have a DIImportedEntity with a null entity or scope.
+ // FIXME: The DISubprogram for functions not linked in but kept due to
+ // being referenced by a DIImportedEntity should also get their
+ // IsDefinition flag is unset.
+ SmallPtrSet<DISubprogram *, 8> ImportedEntitySPs;
+ for (auto *IE : CU->getImportedEntities()) {
+ if (auto *SP = dyn_cast<DISubprogram>(IE->getEntity()))
+ ImportedEntitySPs.insert(SP);
+ if (auto *SP = dyn_cast<DISubprogram>(IE->getScope()))
+ ImportedEntitySPs.insert(SP);
+ }
for (auto *Op : CU->getSubprograms()) {
// Unless we were doing function importing and deferred metadata linking,
// any needed SPs should have been mapped as they would be reached
@@ -1218,7 +1240,7 @@ void IRLinker::findNeededSubprograms(ValueToValueMapTy &ValueMap) {
// function bodies, or from DILocation on inlined instructions).
assert(!(ValueMap.MD()[Op] && IsMetadataLinkingPostpass) &&
"DISubprogram shouldn't be mapped yet");
- if (!ValueMap.MD()[Op])
+ if (!ValueMap.MD()[Op] && !ImportedEntitySPs.count(Op))
UnneededSubprograms.insert(Op);
}
}
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 9de3be412d75..6ffa71e14779 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -65,9 +65,6 @@ class ModuleLinker {
return Flags & Linker::InternalizeLinkedSymbols;
}
- /// Check if we should promote the given local value to global scope.
- bool doPromoteLocalToGlobal(const GlobalValue *SGV);
-
bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest,
const GlobalValue &Src);
@@ -97,11 +94,11 @@ class ModuleLinker {
Module &DstM = Mover.getModule();
// If the source has no name it can't link. If it has local linkage,
// there is no name match-up going on.
- if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(getLinkage(SrcGV)))
+ if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(SrcGV->getLinkage()))
return nullptr;
// Otherwise see if we have a match in the destination module's symtab.
- GlobalValue *DGV = DstM.getNamedValue(getName(SrcGV));
+ GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName());
if (!DGV)
return nullptr;
@@ -116,6 +113,64 @@ class ModuleLinker {
bool linkIfNeeded(GlobalValue &GV);
+ /// Helper method to check if we are importing from the current source
+ /// module.
+ bool isPerformingImport() const { return FunctionsToImport != nullptr; }
+
+ /// If we are importing from the source module, checks if we should
+ /// import SGV as a definition, otherwise import as a declaration.
+ bool doImportAsDefinition(const GlobalValue *SGV);
+
+public:
+ ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags,
+ const FunctionInfoIndex *Index = nullptr,
+ DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
+ DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr)
+ : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index),
+ FunctionsToImport(FunctionsToImport),
+ ValIDToTempMDMap(ValIDToTempMDMap) {
+ assert((ImportIndex || !FunctionsToImport) &&
+ "Expect a FunctionInfoIndex when importing");
+ // If we have a FunctionInfoIndex but no function to import,
+ // then this is the primary module being compiled in a ThinLTO
+ // backend compilation, and we need to see if it has functions that
+ // may be exported to another backend compilation.
+ if (ImportIndex && !FunctionsToImport)
+ HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM);
+ assert((ValIDToTempMDMap || !FunctionsToImport) &&
+ "Function importing must provide a ValIDToTempMDMap");
+ }
+
+ bool run();
+};
+
+/// Class to handle necessary GlobalValue changes required by ThinLTO including
+/// linkage changes and any necessary renaming.
+class ThinLTOGlobalProcessing {
+ /// The Module which we are exporting or importing functions from.
+ Module &M;
+
+ /// Function index passed in for function importing/exporting handling.
+ const FunctionInfoIndex *ImportIndex;
+
+ /// Functions to import from this module, all other functions will be
+ /// imported as declarations instead of definitions.
+ DenseSet<const GlobalValue *> *FunctionsToImport;
+
+ /// Set to true if the given FunctionInfoIndex contains any functions
+ /// from this source module, in which case we must conservatively assume
+ /// that any of its functions may be imported into another module
+ /// as part of a different backend compilation process.
+ bool HasExportedFunctions = false;
+
+ /// Populated during ThinLTO global processing with locals promoted
+ /// to global scope in an exporting module, which now need to be linked
+ /// in if calling from the ModuleLinker.
+ SetVector<GlobalValue *> NewExportedValues;
+
+ /// Check if we should promote the given local value to global scope.
+ bool doPromoteLocalToGlobal(const GlobalValue *SGV);
+
/// Helper methods to check if we are importing from or potentially
/// exporting from the current source module.
bool isPerformingImport() const { return FunctionsToImport != nullptr; }
@@ -143,32 +198,30 @@ class ModuleLinker {
GlobalValue::LinkageTypes getLinkage(const GlobalValue *SGV);
public:
- ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags,
- const FunctionInfoIndex *Index = nullptr,
- DenseSet<const GlobalValue *> *FunctionsToImport = nullptr,
- DenseMap<unsigned, MDNode *> *ValIDToTempMDMap = nullptr)
- : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index),
- FunctionsToImport(FunctionsToImport),
- ValIDToTempMDMap(ValIDToTempMDMap) {
- assert((ImportIndex || !FunctionsToImport) &&
- "Expect a FunctionInfoIndex when importing");
+ ThinLTOGlobalProcessing(
+ Module &M, const FunctionInfoIndex *Index,
+ DenseSet<const GlobalValue *> *FunctionsToImport = nullptr)
+ : M(M), ImportIndex(Index), FunctionsToImport(FunctionsToImport) {
// If we have a FunctionInfoIndex but no function to import,
// then this is the primary module being compiled in a ThinLTO
// backend compilation, and we need to see if it has functions that
// may be exported to another backend compilation.
- if (ImportIndex && !FunctionsToImport)
- HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM);
- assert((ValIDToTempMDMap || !FunctionsToImport) &&
- "Function importing must provide a ValIDToTempMDMap");
+ if (!FunctionsToImport)
+ HasExportedFunctions = ImportIndex->hasExportedFunctions(M);
}
bool run();
+
+ /// Access the promoted globals that are now exported and need to be linked.
+ SetVector<GlobalValue *> &getNewExportedValues() { return NewExportedValues; }
};
}
-bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
- if (!isPerformingImport())
- return false;
+/// Checks if we should import SGV as a definition, otherwise import as a
+/// declaration.
+static bool
+doImportAsDefinitionImpl(const GlobalValue *SGV,
+ DenseSet<const GlobalValue *> *FunctionsToImport) {
auto *GA = dyn_cast<GlobalAlias>(SGV);
if (GA) {
if (GA->hasWeakAnyLinkage())
@@ -176,7 +229,7 @@ bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
const GlobalObject *GO = GA->getBaseObject();
if (!GO->hasLinkOnceODRLinkage())
return false;
- return doImportAsDefinition(GO);
+ return doImportAsDefinitionImpl(GO, FunctionsToImport);
}
// Always import GlobalVariable definitions, except for the special
// case of WeakAny which are imported as ExternalWeak declarations
@@ -196,7 +249,19 @@ bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
return false;
}
-bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) {
+bool ThinLTOGlobalProcessing::doImportAsDefinition(const GlobalValue *SGV) {
+ if (!isPerformingImport())
+ return false;
+ return doImportAsDefinitionImpl(SGV, FunctionsToImport);
+}
+
+bool ModuleLinker::doImportAsDefinition(const GlobalValue *SGV) {
+ if (!isPerformingImport())
+ return false;
+ return doImportAsDefinitionImpl(SGV, FunctionsToImport);
+}
+
+bool ThinLTOGlobalProcessing::doPromoteLocalToGlobal(const GlobalValue *SGV) {
assert(SGV->hasLocalLinkage());
// Both the imported references and the original local variable must
// be promoted.
@@ -220,7 +285,7 @@ bool ModuleLinker::doPromoteLocalToGlobal(const GlobalValue *SGV) {
return true;
}
-std::string ModuleLinker::getName(const GlobalValue *SGV) {
+std::string ThinLTOGlobalProcessing::getName(const GlobalValue *SGV) {
// For locals that must be promoted to global scope, ensure that
// the promoted name uniquely identifies the copy in the original module,
// using the ID assigned during combined index creation. When importing,
@@ -234,7 +299,8 @@ std::string ModuleLinker::getName(const GlobalValue *SGV) {
return SGV->getName();
}
-GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) {
+GlobalValue::LinkageTypes
+ThinLTOGlobalProcessing::getLinkage(const GlobalValue *SGV) {
// Any local variable that is referenced by an exported function needs
// to be promoted to global scope. Since we don't currently know which
// functions reference which local variables/functions, we must treat
@@ -298,8 +364,7 @@ GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) {
// since it would cause global constructors/destructors to be
// executed multiple times. This should have already been handled
// by linkIfNeeded, and we will assert in shouldLinkFromSource
- // if we try to import, so we simply return AppendingLinkage here
- // as this helper is called more widely in getLinkedToGlobal.
+ // if we try to import, so we simply return AppendingLinkage.
return GlobalValue::AppendingLinkage;
case GlobalValue::InternalLinkage:
@@ -652,7 +717,7 @@ void ModuleLinker::addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add) {
}
}
-void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) {
+void ThinLTOGlobalProcessing::processGlobalForThinLTO(GlobalValue &GV) {
if (GV.hasLocalLinkage() &&
(doPromoteLocalToGlobal(&GV) || isPerformingImport())) {
GV.setName(getName(&GV));
@@ -660,21 +725,26 @@ void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) {
if (!GV.hasLocalLinkage())
GV.setVisibility(GlobalValue::HiddenVisibility);
if (isModuleExporting())
- ValuesToLink.insert(&GV);
+ NewExportedValues.insert(&GV);
return;
}
GV.setLinkage(getLinkage(&GV));
}
-void ModuleLinker::processGlobalsForThinLTO() {
- for (GlobalVariable &GV : SrcM.globals())
+void ThinLTOGlobalProcessing::processGlobalsForThinLTO() {
+ for (GlobalVariable &GV : M.globals())
processGlobalForThinLTO(GV);
- for (Function &SF : SrcM)
+ for (Function &SF : M)
processGlobalForThinLTO(SF);
- for (GlobalAlias &GA : SrcM.aliases())
+ for (GlobalAlias &GA : M.aliases())
processGlobalForThinLTO(GA);
}
+bool ThinLTOGlobalProcessing::run() {
+ processGlobalsForThinLTO();
+ return false;
+}
+
bool ModuleLinker::run() {
for (const auto &SMEC : SrcM.getComdatSymbolTable()) {
const Comdat &C = SMEC.getValue();
@@ -713,7 +783,14 @@ bool ModuleLinker::run() {
if (linkIfNeeded(GA))
return true;
- processGlobalsForThinLTO();
+ if (ImportIndex) {
+ ThinLTOGlobalProcessing ThinLTOProcessing(SrcM, ImportIndex,
+ FunctionsToImport);
+ if (ThinLTOProcessing.run())
+ return true;
+ for (auto *GV : ThinLTOProcessing.getNewExportedValues())
+ ValuesToLink.insert(GV);
+ }
for (unsigned I = 0; I < ValuesToLink.size(); ++I) {
GlobalValue *GV = ValuesToLink[I];
@@ -786,15 +863,9 @@ bool Linker::linkModules(Module &Dest, std::unique_ptr<Module> Src,
return L.linkInModule(std::move(Src), Flags);
}
-std::unique_ptr<Module>
-llvm::renameModuleForThinLTO(std::unique_ptr<Module> M,
- const FunctionInfoIndex *Index) {
- std::unique_ptr<llvm::Module> RenamedModule(
- new llvm::Module(M->getModuleIdentifier(), M->getContext()));
- Linker L(*RenamedModule.get());
- if (L.linkInModule(std::move(M), llvm::Linker::Flags::None, Index))
- return nullptr;
- return RenamedModule;
+bool llvm::renameModuleForThinLTO(Module &M, const FunctionInfoIndex *Index) {
+ ThinLTOGlobalProcessing ThinLTOProcessing(M, Index);
+ return ThinLTOProcessing.run();
}
//===----------------------------------------------------------------------===//
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index 0f26b38c29d7..748644bd9c8f 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -300,6 +300,7 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_Hexagon_LD_PLT: return "LDPLT";
case VK_Hexagon_IE: return "IE";
case VK_Hexagon_IE_GOT: return "IEGOT";
+ case VK_WebAssembly_FUNCTION: return "FUNCTION";
case VK_TPREL: return "tprel";
case VK_DTPREL: return "dtprel";
}
diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp
index 34f49cac1628..f86f7e40acb4 100644
--- a/lib/MC/MCObjectFileInfo.cpp
+++ b/lib/MC/MCObjectFileInfo.cpp
@@ -256,6 +256,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) {
DwarfRangesSection =
Ctx->getMachOSection("__DWARF", "__debug_ranges", MachO::S_ATTR_DEBUG,
SectionKind::getMetadata(), "debug_range");
+ DwarfMacinfoSection =
+ Ctx->getMachOSection("__DWARF", "__debug_macinfo", MachO::S_ATTR_DEBUG,
+ SectionKind::getMetadata());
DwarfDebugInlineSection =
Ctx->getMachOSection("__DWARF", "__debug_inlined", MachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
@@ -505,6 +508,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) {
Ctx->getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0);
DwarfRangesSection =
Ctx->getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0, "debug_range");
+ DwarfMacinfoSection =
+ Ctx->getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0);
// DWARF5 Experimental Debug Info
@@ -684,6 +689,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) {
COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getMetadata(), "debug_range");
+ DwarfMacinfoSection = Ctx->getCOFFSection(
+ ".debug_macinfo",
+ COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ,
+ SectionKind::getMetadata());
DwarfInfoDWOSection = Ctx->getCOFFSection(
".debug_info.dwo",
COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
diff --git a/lib/MC/MCObjectStreamer.cpp b/lib/MC/MCObjectStreamer.cpp
index d0a7dafa15b8..972610ac8d6e 100644
--- a/lib/MC/MCObjectStreamer.cpp
+++ b/lib/MC/MCObjectStreamer.cpp
@@ -64,8 +64,6 @@ void MCObjectStreamer::emitAbsoluteSymbolDiff(const MCSymbol *Hi,
return;
}
- assert(Hi->getOffset() >= Lo->getOffset() &&
- "Expected Hi to be greater than Lo");
EmitIntValue(Hi->getOffset() - Lo->getOffset(), Size);
}
diff --git a/lib/MC/WinCOFFObjectWriter.cpp b/lib/MC/WinCOFFObjectWriter.cpp
index a3820906b76b..a76cbdbd5447 100644
--- a/lib/MC/WinCOFFObjectWriter.cpp
+++ b/lib/MC/WinCOFFObjectWriter.cpp
@@ -969,9 +969,6 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
Header.PointerToSymbolTable = offset;
- // FIXME: Remove the #else branch and make the #if branch unconditional once
- // LLVM's self host configuration is aware of /Brepro.
-#if (ENABLE_TIMESTAMPS == 1)
// MS LINK expects to be able to use this timestamp to implement their
// /INCREMENTAL feature.
if (Asm.isIncrementalLinkerCompatible()) {
@@ -980,12 +977,9 @@ void WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
Now = UINT32_MAX;
Header.TimeDateStamp = Now;
} else {
+ // Have deterministic output if /INCREMENTAL isn't needed. Also matches GNU.
Header.TimeDateStamp = 0;
}
-#else
- // We want a deterministic output. It looks like GNU as also writes 0 in here.
- Header.TimeDateStamp = 0;
-#endif
// Write it all to disk...
WriteFileHeader(Header);
diff --git a/lib/Object/COFFObjectFile.cpp b/lib/Object/COFFObjectFile.cpp
index 1f2111759a0e..4cd6aff5f17c 100644
--- a/lib/Object/COFFObjectFile.cpp
+++ b/lib/Object/COFFObjectFile.cpp
@@ -1336,6 +1336,30 @@ ExportDirectoryEntryRef::getSymbolName(StringRef &Result) const {
return std::error_code();
}
+std::error_code ExportDirectoryEntryRef::isForwarder(bool &Result) const {
+ const data_directory *DataEntry;
+ if (auto EC = OwningObject->getDataDirectory(COFF::EXPORT_TABLE, DataEntry))
+ return EC;
+ uint32_t RVA;
+ if (auto EC = getExportRVA(RVA))
+ return EC;
+ uint32_t Begin = DataEntry->RelativeVirtualAddress;
+ uint32_t End = DataEntry->RelativeVirtualAddress + DataEntry->Size;
+ Result = (Begin <= RVA && RVA < End);
+ return std::error_code();
+}
+
+std::error_code ExportDirectoryEntryRef::getForwardTo(StringRef &Result) const {
+ uint32_t RVA;
+ if (auto EC = getExportRVA(RVA))
+ return EC;
+ uintptr_t IntPtr = 0;
+ if (auto EC = OwningObject->getRvaPtr(RVA, IntPtr))
+ return EC;
+ Result = StringRef(reinterpret_cast<const char *>(IntPtr));
+ return std::error_code();
+}
+
bool ImportedSymbolRef::
operator==(const ImportedSymbolRef &Other) const {
return Entry32 == Other.Entry32 && Entry64 == Other.Entry64
diff --git a/lib/Object/ELF.cpp b/lib/Object/ELF.cpp
index 62c27cc427a6..12b772d930ba 100644
--- a/lib/Object/ELF.cpp
+++ b/lib/Object/ELF.cpp
@@ -91,6 +91,13 @@ StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) {
break;
}
break;
+ case ELF::EM_WEBASSEMBLY:
+ switch (Type) {
+#include "llvm/Support/ELFRelocs/WebAssembly.def"
+ default:
+ break;
+ }
+ break;
default:
break;
}
diff --git a/lib/ProfileData/CoverageMapping.cpp b/lib/ProfileData/CoverageMapping.cpp
index 55c0fb4792ef..f5d477bd139a 100644
--- a/lib/ProfileData/CoverageMapping.cpp
+++ b/lib/ProfileData/CoverageMapping.cpp
@@ -517,6 +517,6 @@ class CoverageMappingErrorCategoryType : public std::error_category {
static ManagedStatic<CoverageMappingErrorCategoryType> ErrorCategory;
-const std::error_category &llvm::coveragemap_category() {
+const std::error_category &llvm::coverage::coveragemap_category() {
return *ErrorCategory;
}
diff --git a/lib/ProfileData/CoverageMappingReader.cpp b/lib/ProfileData/CoverageMappingReader.cpp
index 32c692d8073a..89e1cf42c577 100644
--- a/lib/ProfileData/CoverageMappingReader.cpp
+++ b/lib/ProfileData/CoverageMappingReader.cpp
@@ -319,21 +319,14 @@ static std::error_code readCoverageMappingData(
if (Buf + sizeof(CovMapHeader) > End)
return coveragemap_error::malformed;
auto CovHeader = reinterpret_cast<const coverage::CovMapHeader *>(Buf);
- uint32_t NRecords =
- endian::byte_swap<uint32_t, Endian>(CovHeader->NRecords);
- uint32_t FilenamesSize =
- endian::byte_swap<uint32_t, Endian>(CovHeader->FilenamesSize);
- uint32_t CoverageSize =
- endian::byte_swap<uint32_t, Endian>(CovHeader->CoverageSize);
- uint32_t Version = endian::byte_swap<uint32_t, Endian>(CovHeader->Version);
+ uint32_t NRecords = CovHeader->getNRecords<Endian>();
+ uint32_t FilenamesSize = CovHeader->getFilenamesSize<Endian>();
+ uint32_t CoverageSize = CovHeader->getCoverageSize<Endian>();
+ uint32_t Version = CovHeader->getVersion<Endian>();
Buf = reinterpret_cast<const char *>(++CovHeader);
- switch (Version) {
- case CoverageMappingVersion1:
- break;
- default:
+ if (Version > coverage::CoverageMappingCurrentVersion)
return coveragemap_error::unsupported_version;
- }
// Skip past the function records, saving the start and end for later.
const char *FunBuf = Buf;
@@ -364,11 +357,8 @@ static std::error_code readCoverageMappingData(
reinterpret_cast<const coverage::CovMapFunctionRecord<T> *>(FunBuf);
while ((const char *)CFR < FunEnd) {
// Read the function information
- T NamePtr = endian::byte_swap<T, Endian>(CFR->NamePtr);
- uint32_t NameSize = endian::byte_swap<uint32_t, Endian>(CFR->NameSize);
- uint32_t DataSize = endian::byte_swap<uint32_t, Endian>(CFR->DataSize);
- uint64_t FuncHash = endian::byte_swap<uint64_t, Endian>(CFR->FuncHash);
- CFR++;
+ uint32_t DataSize = CFR->template getDataSize<Endian>();
+ uint64_t FuncHash = CFR->template getFuncHash<Endian>();
// Now use that to read the coverage data.
if (CovBuf + DataSize > CovEnd)
@@ -379,16 +369,18 @@ static std::error_code readCoverageMappingData(
// Ignore this record if we already have a record that points to the same
// function name. This is useful to ignore the redundant records for the
// functions with ODR linkage.
- if (!UniqueFunctionMappingData.insert(NamePtr).second)
+ T NameRef = CFR->template getFuncNameRef<Endian>();
+ if (!UniqueFunctionMappingData.insert(NameRef).second)
continue;
- // Finally, grab the name and create a record.
- StringRef FuncName = ProfileNames.getFuncName(NamePtr, NameSize);
- if (NameSize && FuncName.empty())
- return coveragemap_error::malformed;
+ StringRef FuncName;
+ if (std::error_code EC =
+ CFR->template getFuncName<Endian>(ProfileNames, FuncName))
+ return EC;
Records.push_back(BinaryCoverageReader::ProfileMappingRecord(
CoverageMappingVersion(Version), FuncName, FuncHash, Mapping,
FilenamesBegin, Filenames.size() - FilenamesBegin));
+ CFR++;
}
}
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index 027f0f78c546..d6777639abe7 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -257,9 +257,8 @@ int readPGOFuncNameStrings(StringRef NameStrings, InstrProfSymtab &Symtab) {
return 0;
}
-instrprof_error
-InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
- uint64_t Weight) {
+instrprof_error InstrProfValueSiteRecord::merge(InstrProfValueSiteRecord &Input,
+ uint64_t Weight) {
this->sortByTargetValues();
Input.sortByTargetValues();
auto I = ValueData.begin();
@@ -270,14 +269,8 @@ InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
while (I != IE && I->Value < J->Value)
++I;
if (I != IE && I->Value == J->Value) {
- uint64_t JCount = J->Count;
bool Overflowed;
- if (Weight > 1) {
- JCount = SaturatingMultiply(JCount, Weight, &Overflowed);
- if (Overflowed)
- Result = instrprof_error::counter_overflow;
- }
- I->Count = SaturatingAdd(I->Count, JCount, &Overflowed);
+ I->Count = SaturatingMultiplyAdd(J->Count, Weight, I->Count, &Overflowed);
if (Overflowed)
Result = instrprof_error::counter_overflow;
++I;
@@ -288,6 +281,17 @@ InstrProfValueSiteRecord::mergeValueData(InstrProfValueSiteRecord &Input,
return Result;
}
+instrprof_error InstrProfValueSiteRecord::scale(uint64_t Weight) {
+ instrprof_error Result = instrprof_error::success;
+ for (auto I = ValueData.begin(), IE = ValueData.end(); I != IE; ++I) {
+ bool Overflowed;
+ I->Count = SaturatingMultiply(I->Count, Weight, &Overflowed);
+ if (Overflowed)
+ Result = instrprof_error::counter_overflow;
+ }
+ return Result;
+}
+
// Merge Value Profile data from Src record to this record for ValueKind.
// Scale merged value counts by \p Weight.
instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
@@ -303,8 +307,7 @@ instrprof_error InstrProfRecord::mergeValueProfData(uint32_t ValueKind,
Src.getValueSitesForKind(ValueKind);
instrprof_error Result = instrprof_error::success;
for (uint32_t I = 0; I < ThisNumValueSites; I++)
- MergeResult(Result,
- ThisSiteRecords[I].mergeValueData(OtherSiteRecords[I], Weight));
+ MergeResult(Result, ThisSiteRecords[I].merge(OtherSiteRecords[I], Weight));
return Result;
}
@@ -319,13 +322,8 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other,
for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) {
bool Overflowed;
- uint64_t OtherCount = Other.Counts[I];
- if (Weight > 1) {
- OtherCount = SaturatingMultiply(OtherCount, Weight, &Overflowed);
- if (Overflowed)
- Result = instrprof_error::counter_overflow;
- }
- Counts[I] = SaturatingAdd(Counts[I], OtherCount, &Overflowed);
+ Counts[I] =
+ SaturatingMultiplyAdd(Other.Counts[I], Weight, Counts[I], &Overflowed);
if (Overflowed)
Result = instrprof_error::counter_overflow;
}
@@ -336,6 +334,32 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other,
return Result;
}
+instrprof_error InstrProfRecord::scaleValueProfData(uint32_t ValueKind,
+ uint64_t Weight) {
+ uint32_t ThisNumValueSites = getNumValueSites(ValueKind);
+ std::vector<InstrProfValueSiteRecord> &ThisSiteRecords =
+ getValueSitesForKind(ValueKind);
+ instrprof_error Result = instrprof_error::success;
+ for (uint32_t I = 0; I < ThisNumValueSites; I++)
+ MergeResult(Result, ThisSiteRecords[I].scale(Weight));
+ return Result;
+}
+
+instrprof_error InstrProfRecord::scale(uint64_t Weight) {
+ instrprof_error Result = instrprof_error::success;
+ for (auto &Count : this->Counts) {
+ bool Overflowed;
+ Count = SaturatingMultiply(Count, Weight, &Overflowed);
+ if (Overflowed && Result == instrprof_error::success) {
+ Result = instrprof_error::counter_overflow;
+ }
+ }
+ for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
+ MergeResult(Result, scaleValueProfData(Kind, Weight));
+
+ return Result;
+}
+
// Map indirect call target name hash to name string.
uint64_t InstrProfRecord::remapValue(uint64_t Value, uint32_t ValueKind,
ValueMapType *ValueMap) {
diff --git a/lib/ProfileData/InstrProfWriter.cpp b/lib/ProfileData/InstrProfWriter.cpp
index 9bb03e1e77a3..f5227248af20 100644
--- a/lib/ProfileData/InstrProfWriter.cpp
+++ b/lib/ProfileData/InstrProfWriter.cpp
@@ -104,27 +104,21 @@ std::error_code InstrProfWriter::addRecord(InstrProfRecord &&I,
ProfileDataMap.insert(std::make_pair(I.Hash, InstrProfRecord()));
InstrProfRecord &Dest = Where->second;
- instrprof_error Result;
+ instrprof_error Result = instrprof_error::success;
if (NewFunc) {
// We've never seen a function with this name and hash, add it.
Dest = std::move(I);
// Fix up the name to avoid dangling reference.
Dest.Name = FunctionData.find(Dest.Name)->getKey();
- Result = instrprof_error::success;
- if (Weight > 1) {
- for (auto &Count : Dest.Counts) {
- bool Overflowed;
- Count = SaturatingMultiply(Count, Weight, &Overflowed);
- if (Overflowed && Result == instrprof_error::success) {
- Result = instrprof_error::counter_overflow;
- }
- }
- }
+ if (Weight > 1)
+ Result = Dest.scale(Weight);
} else {
// We're updating a function we've seen before.
Result = Dest.merge(I, Weight);
}
+ Dest.sortValueData();
+
// We keep track of the max function count as we go for simplicity.
// Update this statistic no matter the result of the merge.
if (Dest.Counts[0] > MaxFunctionCount)
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index 47751fce3fcd..323d53279b45 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -95,7 +95,10 @@ struct DebugOnlyOpt {
if (Val.empty())
return;
DebugFlag = true;
- CurrentDebugType->push_back(Val);
+ SmallVector<StringRef,8> dbgTypes;
+ StringRef(Val).split(dbgTypes, ',', -1, false);
+ for (auto dbgType : dbgTypes)
+ CurrentDebugType->push_back(dbgType);
}
};
@@ -104,10 +107,9 @@ struct DebugOnlyOpt {
static DebugOnlyOpt DebugOnlyOptLoc;
static cl::opt<DebugOnlyOpt, true, cl::parser<std::string> >
-DebugOnly("debug-only", cl::desc("Enable a specific type of debug output"),
+DebugOnly("debug-only", cl::desc("Enable a specific type of debug output (comma separated list of types)"),
cl::Hidden, cl::ZeroOrMore, cl::value_desc("debug string"),
cl::location(DebugOnlyOptLoc), cl::ValueRequired);
-
// Signal handlers - dump debug output on termination.
static void debug_user_sig_handler(void *Cookie) {
// This is a bit sneaky. Since this is under #ifndef NDEBUG, we
diff --git a/lib/Support/IntEqClasses.cpp b/lib/Support/IntEqClasses.cpp
index 11344956e4c9..ff213570807c 100644
--- a/lib/Support/IntEqClasses.cpp
+++ b/lib/Support/IntEqClasses.cpp
@@ -29,7 +29,7 @@ void IntEqClasses::grow(unsigned N) {
EC.push_back(EC.size());
}
-void IntEqClasses::join(unsigned a, unsigned b) {
+unsigned IntEqClasses::join(unsigned a, unsigned b) {
assert(NumClasses == 0 && "join() called after compress().");
unsigned eca = EC[a];
unsigned ecb = EC[b];
@@ -41,6 +41,8 @@ void IntEqClasses::join(unsigned a, unsigned b) {
EC[b] = eca, b = ecb, ecb = EC[b];
else
EC[a] = ecb, a = eca, eca = EC[a];
+
+ return eca;
}
unsigned IntEqClasses::findLeader(unsigned a) const {
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 3bb1116007ed..0e5d3ac50379 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -1154,8 +1154,6 @@ Triple Triple::get32BitArchVariant() const {
Triple T(*this);
switch (getArch()) {
case Triple::UnknownArch:
- case Triple::aarch64:
- case Triple::aarch64_be:
case Triple::amdgcn:
case Triple::avr:
case Triple::bpfel:
@@ -1191,17 +1189,19 @@ Triple Triple::get32BitArchVariant() const {
// Already 32-bit.
break;
- case Triple::le64: T.setArch(Triple::le32); break;
- case Triple::mips64: T.setArch(Triple::mips); break;
- case Triple::mips64el: T.setArch(Triple::mipsel); break;
- case Triple::nvptx64: T.setArch(Triple::nvptx); break;
- case Triple::ppc64: T.setArch(Triple::ppc); break;
- case Triple::sparcv9: T.setArch(Triple::sparc); break;
- case Triple::x86_64: T.setArch(Triple::x86); break;
- case Triple::amdil64: T.setArch(Triple::amdil); break;
- case Triple::hsail64: T.setArch(Triple::hsail); break;
- case Triple::spir64: T.setArch(Triple::spir); break;
- case Triple::wasm64: T.setArch(Triple::wasm32); break;
+ case Triple::aarch64: T.setArch(Triple::arm); break;
+ case Triple::aarch64_be: T.setArch(Triple::armeb); break;
+ case Triple::le64: T.setArch(Triple::le32); break;
+ case Triple::mips64: T.setArch(Triple::mips); break;
+ case Triple::mips64el: T.setArch(Triple::mipsel); break;
+ case Triple::nvptx64: T.setArch(Triple::nvptx); break;
+ case Triple::ppc64: T.setArch(Triple::ppc); break;
+ case Triple::sparcv9: T.setArch(Triple::sparc); break;
+ case Triple::x86_64: T.setArch(Triple::x86); break;
+ case Triple::amdil64: T.setArch(Triple::amdil); break;
+ case Triple::hsail64: T.setArch(Triple::hsail); break;
+ case Triple::spir64: T.setArch(Triple::spir); break;
+ case Triple::wasm64: T.setArch(Triple::wasm32); break;
}
return T;
}
@@ -1210,16 +1210,12 @@ Triple Triple::get64BitArchVariant() const {
Triple T(*this);
switch (getArch()) {
case Triple::UnknownArch:
- case Triple::arm:
- case Triple::armeb:
case Triple::avr:
case Triple::hexagon:
case Triple::kalimba:
case Triple::msp430:
case Triple::r600:
case Triple::tce:
- case Triple::thumb:
- case Triple::thumbeb:
case Triple::xcore:
case Triple::sparcel:
case Triple::shave:
@@ -1247,17 +1243,21 @@ Triple Triple::get64BitArchVariant() const {
// Already 64-bit.
break;
- case Triple::le32: T.setArch(Triple::le64); break;
- case Triple::mips: T.setArch(Triple::mips64); break;
- case Triple::mipsel: T.setArch(Triple::mips64el); break;
- case Triple::nvptx: T.setArch(Triple::nvptx64); break;
- case Triple::ppc: T.setArch(Triple::ppc64); break;
- case Triple::sparc: T.setArch(Triple::sparcv9); break;
- case Triple::x86: T.setArch(Triple::x86_64); break;
- case Triple::amdil: T.setArch(Triple::amdil64); break;
- case Triple::hsail: T.setArch(Triple::hsail64); break;
- case Triple::spir: T.setArch(Triple::spir64); break;
- case Triple::wasm32: T.setArch(Triple::wasm64); break;
+ case Triple::arm: T.setArch(Triple::aarch64); break;
+ case Triple::armeb: T.setArch(Triple::aarch64_be); break;
+ case Triple::le32: T.setArch(Triple::le64); break;
+ case Triple::mips: T.setArch(Triple::mips64); break;
+ case Triple::mipsel: T.setArch(Triple::mips64el); break;
+ case Triple::nvptx: T.setArch(Triple::nvptx64); break;
+ case Triple::ppc: T.setArch(Triple::ppc64); break;
+ case Triple::sparc: T.setArch(Triple::sparcv9); break;
+ case Triple::x86: T.setArch(Triple::x86_64); break;
+ case Triple::amdil: T.setArch(Triple::amdil64); break;
+ case Triple::hsail: T.setArch(Triple::hsail64); break;
+ case Triple::spir: T.setArch(Triple::spir64); break;
+ case Triple::thumb: T.setArch(Triple::aarch64); break;
+ case Triple::thumbeb: T.setArch(Triple::aarch64_be); break;
+ case Triple::wasm32: T.setArch(Triple::wasm64); break;
}
return T;
}
diff --git a/lib/Support/Windows/Path.inc b/lib/Support/Windows/Path.inc
index 4e4841231eff..5ef77b150ef0 100644
--- a/lib/Support/Windows/Path.inc
+++ b/lib/Support/Windows/Path.inc
@@ -38,6 +38,7 @@ typedef int errno_t;
#ifdef _MSC_VER
# pragma comment(lib, "advapi32.lib") // This provides CryptAcquireContextW.
+# pragma comment(lib, "ole32.lib") // This provides CoTaskMemFree
#endif
using namespace llvm;
diff --git a/lib/Support/Windows/Signals.inc b/lib/Support/Windows/Signals.inc
index d109a66d7035..f40ca72996a1 100644
--- a/lib/Support/Windows/Signals.inc
+++ b/lib/Support/Windows/Signals.inc
@@ -405,7 +405,10 @@ static void RegisterHandler() {
// If we cannot load up the APIs (which would be unexpected as they should
// exist on every version of Windows we support), we will bail out since
// there would be nothing to report.
- assert(load64BitDebugHelp() && "These APIs should always be available");
+ if (!load64BitDebugHelp()) {
+ assert(false && "These APIs should always be available");
+ return;
+ }
if (RegisteredUnhandledExceptionFilter) {
EnterCriticalSection(&CriticalSection);
diff --git a/lib/Support/Windows/WindowsSupport.h b/lib/Support/Windows/WindowsSupport.h
index c65e3148921e..60490f266434 100644
--- a/lib/Support/Windows/WindowsSupport.h
+++ b/lib/Support/Windows/WindowsSupport.h
@@ -47,20 +47,27 @@
#include <string>
#include <vector>
-#if !defined(__CYGWIN__) && !defined(__MINGW32__)
-#include <VersionHelpers.h>
-#else
-// Cygwin does not have the IsWindows8OrGreater() API.
-// Some version of mingw does not have the API either.
-inline bool IsWindows8OrGreater() {
- OSVERSIONINFO osvi = {};
+/// Determines if the program is running on Windows 8 or newer. This
+/// reimplements one of the helpers in the Windows 8.1 SDK, which are intended
+/// to supercede raw calls to GetVersionEx. Old SDKs, Cygwin, and MinGW don't
+/// yet have VersionHelpers.h, so we have our own helper.
+inline bool RunningWindows8OrGreater() {
+ // Windows 8 is version 6.2, service pack 0.
+ OSVERSIONINFOEXW osvi = {};
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
- if (!::GetVersionEx(&osvi))
- return false;
- return (osvi.dwMajorVersion > 6 ||
- (osvi.dwMajorVersion == 6 && osvi.dwMinorVersion >= 2));
+ osvi.dwMajorVersion = 6;
+ osvi.dwMinorVersion = 2;
+ osvi.wServicePackMajor = 0;
+
+ DWORDLONG Mask = 0;
+ Mask = VerSetConditionMask(Mask, VER_MAJORVERSION, VER_GREATER_EQUAL);
+ Mask = VerSetConditionMask(Mask, VER_MINORVERSION, VER_GREATER_EQUAL);
+ Mask = VerSetConditionMask(Mask, VER_SERVICEPACKMAJOR, VER_GREATER_EQUAL);
+
+ return VerifyVersionInfoW(&osvi, VER_MAJORVERSION | VER_MINORVERSION |
+ VER_SERVICEPACKMAJOR,
+ Mask) != FALSE;
}
-#endif // __CYGWIN__
inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) {
if (!ErrMsg)
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 57162dc6e95a..15813fd3e669 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -577,7 +577,7 @@ void raw_fd_ostream::write_impl(const char *Ptr, size_t Size) {
// Writing a large size of output to Windows console returns ENOMEM. It seems
// that, prior to Windows 8, WriteFile() is redirecting to WriteConsole(), and
// the latter has a size limit (66000 bytes or less, depending on heap usage).
- bool ShouldWriteInChunks = !!::_isatty(FD) && !IsWindows8OrGreater();
+ bool ShouldWriteInChunks = !!::_isatty(FD) && !RunningWindows8OrGreater();
#endif
do {
diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
index 3ef3c8b840cb..f398117de953 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.cpp
+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
@@ -2487,15 +2487,36 @@ static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
return true;
}
-/// Return true when there is potentially a faster code sequence
-/// for an instruction chain ending in \p Root. All potential patterns are
-/// listed
-/// in the \p Pattern vector. Pattern should be sorted in priority order since
-/// the pattern evaluator stops checking as soon as it finds a faster sequence.
+// TODO: There are many more machine instruction opcodes to match:
+// 1. Other data types (integer, vectors)
+// 2. Other math / logic operations (xor, or)
+// 3. Other forms of the same operation (intrinsics and other variants)
+bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst) const {
+ switch (Inst.getOpcode()) {
+ case AArch64::FADDDrr:
+ case AArch64::FADDSrr:
+ case AArch64::FADDv2f32:
+ case AArch64::FADDv2f64:
+ case AArch64::FADDv4f32:
+ case AArch64::FMULDrr:
+ case AArch64::FMULSrr:
+ case AArch64::FMULX32:
+ case AArch64::FMULX64:
+ case AArch64::FMULXv2f32:
+ case AArch64::FMULXv2f64:
+ case AArch64::FMULXv4f32:
+ case AArch64::FMULv2f32:
+ case AArch64::FMULv2f64:
+ case AArch64::FMULv4f32:
+ return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
+ default:
+ return false;
+ }
+}
-bool AArch64InstrInfo::getMachineCombinerPatterns(
- MachineInstr &Root,
- SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+/// Find instructions that can be turned into madd.
+static bool getMaddPatterns(MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) {
unsigned Opc = Root.getOpcode();
MachineBasicBlock &MBB = *Root.getParent();
bool Found = false;
@@ -2600,6 +2621,20 @@ bool AArch64InstrInfo::getMachineCombinerPatterns(
return Found;
}
+/// Return true when there is potentially a faster code sequence for an
+/// instruction chain ending in \p Root. All potential patterns are listed in
+/// the \p Pattern vector. Pattern should be sorted in priority order since the
+/// pattern evaluator stops checking as soon as it finds a faster sequence.
+
+bool AArch64InstrInfo::getMachineCombinerPatterns(
+ MachineInstr &Root,
+ SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
+ if (getMaddPatterns(Root, Patterns))
+ return true;
+
+ return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
+}
+
/// genMadd - Generate madd instruction and combine mul and add.
/// Example:
/// MUL I=A,B,0
@@ -2713,8 +2748,10 @@ void AArch64InstrInfo::genAlternativeCodeSequence(
unsigned Opc;
switch (Pattern) {
default:
- // signal error.
- break;
+ // Reassociate instructions.
+ TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
+ DelInstrs, InstrIdxForVirtReg);
+ return;
case MachineCombinerPattern::MULADDW_OP1:
case MachineCombinerPattern::MULADDX_OP1:
// MUL I=A,B,0
diff --git a/lib/Target/AArch64/AArch64InstrInfo.h b/lib/Target/AArch64/AArch64InstrInfo.h
index ae02822a32e6..b5bb446f8c16 100644
--- a/lib/Target/AArch64/AArch64InstrInfo.h
+++ b/lib/Target/AArch64/AArch64InstrInfo.h
@@ -169,7 +169,9 @@ public:
bool getMachineCombinerPatterns(MachineInstr &Root,
SmallVectorImpl<MachineCombinerPattern> &Patterns)
const override;
-
+ /// Return true when Inst is associative and commutative so that it can be
+ /// reassociated.
+ bool isAssociativeAndCommutative(const MachineInstr &Inst) const override;
/// When getMachineCombinerPatterns() finds patterns, this function generates
/// the instructions that could replace the original code sequence
void genAlternativeCodeSequence(
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 8c3cb567fc7e..5d00e1cb3be5 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -20,8 +20,10 @@ class AMDGPUInstrPrinter;
class AMDGPUSubtarget;
class AMDGPUTargetMachine;
class FunctionPass;
+class MachineSchedContext;
class MCAsmInfo;
class raw_ostream;
+class ScheduleDAGInstrs;
class Target;
class TargetMachine;
@@ -49,6 +51,8 @@ FunctionPass *createSIFixSGPRLiveRangesPass();
FunctionPass *createSICodeEmitterPass(formatted_raw_ostream &OS);
FunctionPass *createSIInsertWaits(TargetMachine &tm);
+ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C);
+
ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 9c3790264377..1239dfb235ef 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -91,6 +91,25 @@ AMDGPUAsmPrinter::AMDGPUAsmPrinter(TargetMachine &TM,
std::unique_ptr<MCStreamer> Streamer)
: AsmPrinter(TM, std::move(Streamer)) {}
+void AMDGPUAsmPrinter::EmitStartOfAsmFile(Module &M) {
+ if (TM.getTargetTriple().getOS() != Triple::AMDHSA)
+ return;
+
+ // Need to construct an MCSubtargetInfo here in case we have no functions
+ // in the module.
+ std::unique_ptr<MCSubtargetInfo> STI(TM.getTarget().createMCSubtargetInfo(
+ TM.getTargetTriple().str(), TM.getTargetCPU(),
+ TM.getTargetFeatureString()));
+
+ AMDGPUTargetStreamer *TS =
+ static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
+
+ TS->EmitDirectiveHSACodeObjectVersion(1, 0);
+ AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(STI->getFeatureBits());
+ TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
+ "AMD", "AMDGPU");
+}
+
void AMDGPUAsmPrinter::EmitFunctionBodyStart() {
const AMDGPUSubtarget &STM = MF->getSubtarget<AMDGPUSubtarget>();
SIProgramInfo KernelInfo;
@@ -148,11 +167,15 @@ void AMDGPUAsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
TS->EmitAMDGPUHsaProgramScopeGlobal(GV->getName());
}
+ MCSymbolELF *GVSym = cast<MCSymbolELF>(getSymbol(GV));
const DataLayout &DL = getDataLayout();
+
+ // Emit the size
+ uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+ OutStreamer->emitELFSize(GVSym, MCConstantExpr::create(Size, OutContext));
OutStreamer->PushSection();
OutStreamer->SwitchSection(
getObjFileLowering().SectionForGlobal(GV, *Mang, TM));
- MCSymbol *GVSym = getSymbol(GV);
const Constant *C = GV->getInitializer();
OutStreamer->EmitLabel(GVSym);
EmitGlobalConstant(DL, C);
@@ -178,13 +201,6 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
if (!STM.isAmdHsaOS()) {
EmitProgramInfoSI(MF, KernelInfo);
}
- // Emit directives
- AMDGPUTargetStreamer *TS =
- static_cast<AMDGPUTargetStreamer *>(OutStreamer->getTargetStreamer());
- TS->EmitDirectiveHSACodeObjectVersion(1, 0);
- AMDGPU::IsaVersion ISA = STM.getIsaVersion();
- TS->EmitDirectiveHSACodeObjectISA(ISA.Major, ISA.Minor, ISA.Stepping,
- "AMD", "AMDGPU");
} else {
EmitProgramInfoR600(MF);
}
@@ -417,16 +433,24 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo,
}
}
- if (VCCUsed || FlatUsed || STM.isXNACKEnabled()) {
- MaxSGPR += 2;
+ unsigned ExtraSGPRs = 0;
- if (FlatUsed)
- MaxSGPR += 2;
+ if (VCCUsed)
+ ExtraSGPRs = 2;
+ if (STM.getGeneration() < AMDGPUSubtarget::VOLCANIC_ISLANDS) {
+ if (FlatUsed)
+ ExtraSGPRs = 4;
+ } else {
if (STM.isXNACKEnabled())
- MaxSGPR += 2;
+ ExtraSGPRs = 4;
+
+ if (FlatUsed)
+ ExtraSGPRs = 6;
}
+ MaxSGPR += ExtraSGPRs;
+
// We found the maximum register index. They start at 0, so add one to get the
// number of registers.
ProgInfo.NumVGPR = MaxVGPR + 1;
@@ -563,7 +587,9 @@ void AMDGPUAsmPrinter::EmitProgramInfoSI(const MachineFunction &MF,
OutStreamer->EmitIntValue(R_00B02C_SPI_SHADER_PGM_RSRC2_PS, 4);
OutStreamer->EmitIntValue(S_00B02C_EXTRA_LDS_SIZE(KernelInfo.LDSBlocks), 4);
OutStreamer->EmitIntValue(R_0286CC_SPI_PS_INPUT_ENA, 4);
- OutStreamer->EmitIntValue(MFI->PSInputAddr, 4);
+ OutStreamer->EmitIntValue(MFI->PSInputEna, 4);
+ OutStreamer->EmitIntValue(R_0286D0_SPI_PS_INPUT_ADDR, 4);
+ OutStreamer->EmitIntValue(MFI->getPSInputAddr(), 4);
}
}
diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
index 817cbfc0c0eb..99d4091670fe 100644
--- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
+++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h
@@ -103,6 +103,8 @@ public:
void EmitGlobalVariable(const GlobalVariable *GV) override;
+ void EmitStartOfAsmFile(Module &M) override;
+
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &O) override;
diff --git a/lib/Target/AMDGPU/AMDGPUCallingConv.td b/lib/Target/AMDGPU/AMDGPUCallingConv.td
index 6ffa7a083583..b0db26124a0c 100644
--- a/lib/Target/AMDGPU/AMDGPUCallingConv.td
+++ b/lib/Target/AMDGPU/AMDGPUCallingConv.td
@@ -20,28 +20,83 @@ def CC_SI : CallingConv<[
CCIfInReg<CCIfType<[f32, i32] , CCAssignToReg<[
SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
- SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21
+ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
+ SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
+ SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
]>>>,
CCIfInReg<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
+ SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
+ SGPR32, SGPR34, SGPR36, SGPR38 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
+ SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
+ SGPR33, SGPR35, SGPR37, SGPR39 ]
>>>,
+ // 32*4 + 4 is the minimum for a fetch shader consumer with 32 inputs.
CCIfNotInReg<CCIfType<[f32, i32] , CCAssignToReg<[
VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
- VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
+ VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
+ VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
+ VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
+ VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
+ VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
+ VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
+ VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
+ VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
+ VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
+ VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
+ VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
+ VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
+ VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
]>>>,
CCIfByVal<CCIfType<[i64] , CCAssignToRegWithShadow<
- [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14 ],
- [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15 ]
+ [ SGPR0, SGPR2, SGPR4, SGPR6, SGPR8, SGPR10, SGPR12, SGPR14,
+ SGPR16, SGPR18, SGPR20, SGPR22, SGPR24, SGPR26, SGPR28, SGPR30,
+ SGPR32, SGPR34, SGPR36, SGPR38 ],
+ [ SGPR1, SGPR3, SGPR5, SGPR7, SGPR9, SGPR11, SGPR13, SGPR15,
+ SGPR17, SGPR19, SGPR21, SGPR23, SGPR25, SGPR27, SGPR29, SGPR31,
+ SGPR33, SGPR35, SGPR37, SGPR39 ]
>>>
]>;
+def RetCC_SI : CallingConv<[
+ CCIfType<[i32] , CCAssignToReg<[
+ SGPR0, SGPR1, SGPR2, SGPR3, SGPR4, SGPR5, SGPR6, SGPR7,
+ SGPR8, SGPR9, SGPR10, SGPR11, SGPR12, SGPR13, SGPR14, SGPR15,
+ SGPR16, SGPR17, SGPR18, SGPR19, SGPR20, SGPR21, SGPR22, SGPR23,
+ SGPR24, SGPR25, SGPR26, SGPR27, SGPR28, SGPR29, SGPR30, SGPR31,
+ SGPR32, SGPR33, SGPR34, SGPR35, SGPR36, SGPR37, SGPR38, SGPR39
+ ]>>,
+
+ // 32*4 + 4 is the minimum for a fetch shader with 32 outputs.
+ CCIfType<[f32] , CCAssignToReg<[
+ VGPR0, VGPR1, VGPR2, VGPR3, VGPR4, VGPR5, VGPR6, VGPR7,
+ VGPR8, VGPR9, VGPR10, VGPR11, VGPR12, VGPR13, VGPR14, VGPR15,
+ VGPR16, VGPR17, VGPR18, VGPR19, VGPR20, VGPR21, VGPR22, VGPR23,
+ VGPR24, VGPR25, VGPR26, VGPR27, VGPR28, VGPR29, VGPR30, VGPR31,
+ VGPR32, VGPR33, VGPR34, VGPR35, VGPR36, VGPR37, VGPR38, VGPR39,
+ VGPR40, VGPR41, VGPR42, VGPR43, VGPR44, VGPR45, VGPR46, VGPR47,
+ VGPR48, VGPR49, VGPR50, VGPR51, VGPR52, VGPR53, VGPR54, VGPR55,
+ VGPR56, VGPR57, VGPR58, VGPR59, VGPR60, VGPR61, VGPR62, VGPR63,
+ VGPR64, VGPR65, VGPR66, VGPR67, VGPR68, VGPR69, VGPR70, VGPR71,
+ VGPR72, VGPR73, VGPR74, VGPR75, VGPR76, VGPR77, VGPR78, VGPR79,
+ VGPR80, VGPR81, VGPR82, VGPR83, VGPR84, VGPR85, VGPR86, VGPR87,
+ VGPR88, VGPR89, VGPR90, VGPR91, VGPR92, VGPR93, VGPR94, VGPR95,
+ VGPR96, VGPR97, VGPR98, VGPR99, VGPR100, VGPR101, VGPR102, VGPR103,
+ VGPR104, VGPR105, VGPR106, VGPR107, VGPR108, VGPR109, VGPR110, VGPR111,
+ VGPR112, VGPR113, VGPR114, VGPR115, VGPR116, VGPR117, VGPR118, VGPR119,
+ VGPR120, VGPR121, VGPR122, VGPR123, VGPR124, VGPR125, VGPR126, VGPR127,
+ VGPR128, VGPR129, VGPR130, VGPR131, VGPR132, VGPR133, VGPR134, VGPR135
+ ]>>
+]>;
+
// Calling convention for R600
def CC_R600 : CallingConv<[
CCIfInReg<CCIfType<[v4f32, v4i32] , CCAssignToReg<[
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 222f63161be5..1a59a460ee7d 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -282,12 +282,19 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM,
setOperationAction(ISD::SMAX, MVT::i32, Legal);
setOperationAction(ISD::UMAX, MVT::i32, Legal);
- if (!Subtarget->hasFFBH())
+ if (Subtarget->hasFFBH())
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
+ else
setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Expand);
if (!Subtarget->hasFFBL())
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Expand);
+
+ setOperationAction(ISD::CTLZ, MVT::i64, Custom);
+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i64, Custom);
+
static const MVT::SimpleValueType VectorIntTypes[] = {
MVT::v2i32, MVT::v4i32
};
@@ -565,6 +572,12 @@ void AMDGPUTargetLowering::AnalyzeFormalArguments(CCState &State,
State.AnalyzeFormalArguments(Ins, CC_AMDGPU);
}
+void AMDGPUTargetLowering::AnalyzeReturn(CCState &State,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) const {
+
+ State.AnalyzeReturn(Outs, RetCC_SI);
+}
+
SDValue AMDGPUTargetLowering::LowerReturn(
SDValue Chain,
CallingConv::ID CallConv,
@@ -633,6 +646,9 @@ SDValue AMDGPUTargetLowering::LowerOperation(SDValue Op,
case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT: return LowerFP_TO_SINT(Op, DAG);
case ISD::FP_TO_UINT: return LowerFP_TO_UINT(Op, DAG);
+ case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
+ return LowerCTLZ(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
}
return Op;
@@ -2159,6 +2175,145 @@ SDValue AMDGPUTargetLowering::LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const {
return DAG.getNode(ISD::FADD, SL, MVT::f64, Trunc, Add);
}
+SDValue AMDGPUTargetLowering::LowerCTLZ(SDValue Op, SelectionDAG &DAG) const {
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+ bool ZeroUndef = Op.getOpcode() == ISD::CTLZ_ZERO_UNDEF;
+
+ if (ZeroUndef && Src.getValueType() == MVT::i32)
+ return DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Src);
+
+ SDValue Vec = DAG.getNode(ISD::BITCAST, SL, MVT::v2i32, Src);
+
+ const SDValue Zero = DAG.getConstant(0, SL, MVT::i32);
+ const SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+ SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, Zero);
+ SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, MVT::i32, Vec, One);
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), MVT::i32);
+
+ SDValue Hi0 = DAG.getSetCC(SL, SetCCVT, Hi, Zero, ISD::SETEQ);
+
+ SDValue CtlzLo = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Lo);
+ SDValue CtlzHi = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i32, Hi);
+
+ const SDValue Bits32 = DAG.getConstant(32, SL, MVT::i32);
+ SDValue Add = DAG.getNode(ISD::ADD, SL, MVT::i32, CtlzLo, Bits32);
+
+ // ctlz(x) = hi_32(x) == 0 ? ctlz(lo_32(x)) + 32 : ctlz(hi_32(x))
+ SDValue NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32, Hi0, Add, CtlzHi);
+
+ if (!ZeroUndef) {
+ // Test if the full 64-bit input is zero.
+
+ // FIXME: DAG combines turn what should be an s_and_b64 into a v_or_b32,
+ // which we probably don't want.
+ SDValue Lo0 = DAG.getSetCC(SL, SetCCVT, Lo, Zero, ISD::SETEQ);
+ SDValue SrcIsZero = DAG.getNode(ISD::AND, SL, SetCCVT, Lo0, Hi0);
+
+ // TODO: If i64 setcc is half rate, it can result in 1 fewer instruction
+ // with the same cycles, otherwise it is slower.
+ // SDValue SrcIsZero = DAG.getSetCC(SL, SetCCVT, Src,
+ // DAG.getConstant(0, SL, MVT::i64), ISD::SETEQ);
+
+ const SDValue Bits32 = DAG.getConstant(64, SL, MVT::i32);
+
+ // The instruction returns -1 for 0 input, but the defined intrinsic
+ // behavior is to return the number of bits.
+ NewCtlz = DAG.getNode(ISD::SELECT, SL, MVT::i32,
+ SrcIsZero, Bits32, NewCtlz);
+ }
+
+ return DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i64, NewCtlz);
+}
+
+SDValue AMDGPUTargetLowering::LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG,
+ bool Signed) const {
+ // Unsigned
+ // cul2f(ulong u)
+ //{
+ // uint lz = clz(u);
+ // uint e = (u != 0) ? 127U + 63U - lz : 0;
+ // u = (u << lz) & 0x7fffffffffffffffUL;
+ // ulong t = u & 0xffffffffffUL;
+ // uint v = (e << 23) | (uint)(u >> 40);
+ // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
+ // return as_float(v + r);
+ //}
+ // Signed
+ // cl2f(long l)
+ //{
+ // long s = l >> 63;
+ // float r = cul2f((l + s) ^ s);
+ // return s ? -r : r;
+ //}
+
+ SDLoc SL(Op);
+ SDValue Src = Op.getOperand(0);
+ SDValue L = Src;
+
+ SDValue S;
+ if (Signed) {
+ const SDValue SignBit = DAG.getConstant(63, SL, MVT::i64);
+ S = DAG.getNode(ISD::SRA, SL, MVT::i64, L, SignBit);
+
+ SDValue LPlusS = DAG.getNode(ISD::ADD, SL, MVT::i64, L, S);
+ L = DAG.getNode(ISD::XOR, SL, MVT::i64, LPlusS, S);
+ }
+
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(),
+ *DAG.getContext(), MVT::f32);
+
+
+ SDValue ZeroI32 = DAG.getConstant(0, SL, MVT::i32);
+ SDValue ZeroI64 = DAG.getConstant(0, SL, MVT::i64);
+ SDValue LZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SL, MVT::i64, L);
+ LZ = DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, LZ);
+
+ SDValue K = DAG.getConstant(127U + 63U, SL, MVT::i32);
+ SDValue E = DAG.getSelect(SL, MVT::i32,
+ DAG.getSetCC(SL, SetCCVT, L, ZeroI64, ISD::SETNE),
+ DAG.getNode(ISD::SUB, SL, MVT::i32, K, LZ),
+ ZeroI32);
+
+ SDValue U = DAG.getNode(ISD::AND, SL, MVT::i64,
+ DAG.getNode(ISD::SHL, SL, MVT::i64, L, LZ),
+ DAG.getConstant((-1ULL) >> 1, SL, MVT::i64));
+
+ SDValue T = DAG.getNode(ISD::AND, SL, MVT::i64, U,
+ DAG.getConstant(0xffffffffffULL, SL, MVT::i64));
+
+ SDValue UShl = DAG.getNode(ISD::SRL, SL, MVT::i64,
+ U, DAG.getConstant(40, SL, MVT::i64));
+
+ SDValue V = DAG.getNode(ISD::OR, SL, MVT::i32,
+ DAG.getNode(ISD::SHL, SL, MVT::i32, E, DAG.getConstant(23, SL, MVT::i32)),
+ DAG.getNode(ISD::TRUNCATE, SL, MVT::i32, UShl));
+
+ SDValue C = DAG.getConstant(0x8000000000ULL, SL, MVT::i64);
+ SDValue RCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETUGT);
+ SDValue TCmp = DAG.getSetCC(SL, SetCCVT, T, C, ISD::SETEQ);
+
+ SDValue One = DAG.getConstant(1, SL, MVT::i32);
+
+ SDValue VTrunc1 = DAG.getNode(ISD::AND, SL, MVT::i32, V, One);
+
+ SDValue R = DAG.getSelect(SL, MVT::i32,
+ RCmp,
+ One,
+ DAG.getSelect(SL, MVT::i32, TCmp, VTrunc1, ZeroI32));
+ R = DAG.getNode(ISD::ADD, SL, MVT::i32, V, R);
+ R = DAG.getNode(ISD::BITCAST, SL, MVT::f32, R);
+
+ if (!Signed)
+ return R;
+
+ SDValue RNeg = DAG.getNode(ISD::FNEG, SL, MVT::f32, R);
+ return DAG.getSelect(SL, MVT::f32, DAG.getSExtOrTrunc(S, SL, SetCCVT), RNeg, R);
+}
+
SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
bool Signed) const {
SDLoc SL(Op);
@@ -2184,35 +2339,29 @@ SDValue AMDGPUTargetLowering::LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG,
SDValue AMDGPUTargetLowering::LowerUINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- SDValue S0 = Op.getOperand(0);
- if (S0.getValueType() != MVT::i64)
- return SDValue();
+ assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+ "operation should be legal");
EVT DestVT = Op.getValueType();
if (DestVT == MVT::f64)
return LowerINT_TO_FP64(Op, DAG, false);
- assert(DestVT == MVT::f32);
-
- SDLoc DL(Op);
+ if (DestVT == MVT::f32)
+ return LowerINT_TO_FP32(Op, DAG, false);
- // f32 uint_to_fp i64
- SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
- DAG.getConstant(0, DL, MVT::i32));
- SDValue FloatLo = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Lo);
- SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, S0,
- DAG.getConstant(1, DL, MVT::i32));
- SDValue FloatHi = DAG.getNode(ISD::UINT_TO_FP, DL, MVT::f32, Hi);
- // TODO: Should this propagate fast-math-flags?
- FloatHi = DAG.getNode(ISD::FMUL, DL, MVT::f32, FloatHi,
- DAG.getConstantFP(4294967296.0f, DL, MVT::f32)); // 2^32
- return DAG.getNode(ISD::FADD, DL, MVT::f32, FloatLo, FloatHi);
+ return SDValue();
}
SDValue AMDGPUTargetLowering::LowerSINT_TO_FP(SDValue Op,
SelectionDAG &DAG) const {
- SDValue Src = Op.getOperand(0);
- if (Src.getValueType() == MVT::i64 && Op.getValueType() == MVT::f64)
+ assert(Op.getOperand(0).getValueType() == MVT::i64 &&
+ "operation should be legal");
+
+ EVT DestVT = Op.getValueType();
+ if (DestVT == MVT::f32)
+ return LowerINT_TO_FP32(Op, DAG, true);
+
+ if (DestVT == MVT::f64)
return LowerINT_TO_FP64(Op, DAG, true);
return SDValue();
@@ -2447,6 +2596,97 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
return DAG.getSExtOrTrunc(Mul, DL, VT);
}
+static bool isNegativeOne(SDValue Val) {
+ if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val))
+ return C->isAllOnesValue();
+ return false;
+}
+
+static bool isCtlzOpc(unsigned Opc) {
+ return Opc == ISD::CTLZ || Opc == ISD::CTLZ_ZERO_UNDEF;
+}
+
+// Get FFBH node if the incoming op may have been type legalized from a smaller
+// type VT.
+// Need to match pre-legalized type because the generic legalization inserts the
+// add/sub between the select and compare.
+static SDValue getFFBH_U32(const TargetLowering &TLI,
+ SelectionDAG &DAG, SDLoc SL, SDValue Op) {
+ EVT VT = Op.getValueType();
+ EVT LegalVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (LegalVT != MVT::i32)
+ return SDValue();
+
+ if (VT != MVT::i32)
+ Op = DAG.getNode(ISD::ZERO_EXTEND, SL, MVT::i32, Op);
+
+ SDValue FFBH = DAG.getNode(AMDGPUISD::FFBH_U32, SL, MVT::i32, Op);
+ if (VT != MVT::i32)
+ FFBH = DAG.getNode(ISD::TRUNCATE, SL, VT, FFBH);
+
+ return FFBH;
+}
+
+// The native instructions return -1 on 0 input. Optimize out a select that
+// produces -1 on 0.
+//
+// TODO: If zero is not undef, we could also do this if the output is compared
+// against the bitwidth.
+//
+// TODO: Should probably combine against FFBH_U32 instead of ctlz directly.
+SDValue AMDGPUTargetLowering::performCtlzCombine(SDLoc SL,
+ SDValue Cond,
+ SDValue LHS,
+ SDValue RHS,
+ DAGCombinerInfo &DCI) const {
+ ConstantSDNode *CmpRhs = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (!CmpRhs || !CmpRhs->isNullValue())
+ return SDValue();
+
+ SelectionDAG &DAG = DCI.DAG;
+ ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ SDValue CmpLHS = Cond.getOperand(0);
+
+ // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
+ if (CCOpcode == ISD::SETEQ &&
+ isCtlzOpc(RHS.getOpcode()) &&
+ RHS.getOperand(0) == CmpLHS &&
+ isNegativeOne(LHS)) {
+ return getFFBH_U32(*this, DAG, SL, CmpLHS);
+ }
+
+ // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
+ if (CCOpcode == ISD::SETNE &&
+ isCtlzOpc(LHS.getOpcode()) &&
+ LHS.getOperand(0) == CmpLHS &&
+ isNegativeOne(RHS)) {
+ return getFFBH_U32(*this, DAG, SL, CmpLHS);
+ }
+
+ return SDValue();
+}
+
+SDValue AMDGPUTargetLowering::performSelectCombine(SDNode *N,
+ DAGCombinerInfo &DCI) const {
+ SDValue Cond = N->getOperand(0);
+ if (Cond.getOpcode() != ISD::SETCC)
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ SDValue LHS = Cond.getOperand(0);
+ SDValue RHS = Cond.getOperand(1);
+ SDValue CC = Cond.getOperand(2);
+
+ SDValue True = N->getOperand(1);
+ SDValue False = N->getOperand(2);
+
+ if (VT == MVT::f32 && Cond.hasOneUse())
+ return CombineFMinMaxLegacy(SDLoc(N), VT, LHS, RHS, True, False, CC, DCI);
+
+ // There's no reason to not do this if the condition has other uses.
+ return performCtlzCombine(SDLoc(N), Cond, True, False, DCI);
+}
+
SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
SelectionDAG &DAG = DCI.DAG;
@@ -2471,23 +2711,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
simplifyI24(N1, DCI);
return SDValue();
}
- case ISD::SELECT: {
- SDValue Cond = N->getOperand(0);
- if (Cond.getOpcode() == ISD::SETCC && Cond.hasOneUse()) {
- EVT VT = N->getValueType(0);
- SDValue LHS = Cond.getOperand(0);
- SDValue RHS = Cond.getOperand(1);
- SDValue CC = Cond.getOperand(2);
-
- SDValue True = N->getOperand(1);
- SDValue False = N->getOperand(2);
-
- if (VT == MVT::f32)
- return CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
- }
-
- break;
- }
+ case ISD::SELECT:
+ return performSelectCombine(N, DCI);
case AMDGPUISD::BFE_I32:
case AMDGPUISD::BFE_U32: {
assert(!N->getValueType(0).isVector() &&
@@ -2699,6 +2924,7 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(BFE_I32)
NODE_NAME_CASE(BFI)
NODE_NAME_CASE(BFM)
+ NODE_NAME_CASE(FFBH_U32)
NODE_NAME_CASE(MUL_U24)
NODE_NAME_CASE(MUL_I24)
NODE_NAME_CASE(MAD_U24)
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.h b/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 7314cc050ba5..37925416a9c4 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -54,6 +54,9 @@ private:
SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFFLOOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG) const;
+
+ SDValue LowerINT_TO_FP32(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerINT_TO_FP64(SDValue Op, SelectionDAG &DAG, bool Signed) const;
SDValue LowerUINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
@@ -67,6 +70,9 @@ private:
SDValue performStoreCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performCtlzCombine(SDLoc SL, SDValue Cond, SDValue LHS, SDValue RHS,
+ DAGCombinerInfo &DCI) const;
+ SDValue performSelectCombine(SDNode *N, DAGCombinerInfo &DCI) const;
protected:
static EVT getEquivalentMemType(LLVMContext &Context, EVT VT);
@@ -109,6 +115,8 @@ protected:
SmallVectorImpl<ISD::InputArg> &OrigIns) const;
void AnalyzeFormalArguments(CCState &State,
const SmallVectorImpl<ISD::InputArg> &Ins) const;
+ void AnalyzeReturn(CCState &State,
+ const SmallVectorImpl<ISD::OutputArg> &Outs) const;
public:
AMDGPUTargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI);
@@ -263,6 +271,7 @@ enum NodeType : unsigned {
BFE_I32, // Extract range of bits with sign extension to 32-bits.
BFI, // (src0 & src1) | (~src0 & src2)
BFM, // Insert a range of bits into a 32-bit word.
+ FFBH_U32, // ctlz with -1 if input is zero.
MUL_U24,
MUL_I24,
MAD_U24,
diff --git a/lib/Target/AMDGPU/AMDGPUInstrInfo.td b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
index 70e589c28429..575dfe413658 100644
--- a/lib/Target/AMDGPU/AMDGPUInstrInfo.td
+++ b/lib/Target/AMDGPU/AMDGPUInstrInfo.td
@@ -191,6 +191,8 @@ def AMDGPUbfe_i32 : SDNode<"AMDGPUISD::BFE_I32", AMDGPUDTIntTernaryOp>;
def AMDGPUbfi : SDNode<"AMDGPUISD::BFI", AMDGPUDTIntTernaryOp>;
def AMDGPUbfm : SDNode<"AMDGPUISD::BFM", SDTIntBinOp>;
+def AMDGPUffbh_u32 : SDNode<"AMDGPUISD::FFBH_U32", SDTIntUnaryOp>;
+
// Signed and unsigned 24-bit mulitply. The highest 8-bits are ignore when
// performing the mulitply. The result is a 32-bit value.
def AMDGPUmul_u24 : SDNode<"AMDGPUISD::MUL_U24", SDTIntBinOp,
@@ -240,4 +242,4 @@ def IL_brcond : SDNode<"AMDGPUISD::BRANCH_COND", SDTIL_BRCond, [SDNPHasChai
// Call/Return DAG Nodes
//===----------------------------------------------------------------------===//
def IL_retflag : SDNode<"AMDGPUISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInGlue]>;
+ [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 22f85b3e663c..b1be6197a6c6 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -66,8 +66,12 @@ static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
}
static MachineSchedRegistry
-SchedCustomRegistry("r600", "Run R600's custom scheduler",
- createR600MachineScheduler);
+R600SchedRegistry("r600", "Run R600's custom scheduler",
+ createR600MachineScheduler);
+
+static MachineSchedRegistry
+SISchedRegistry("si", "Run SI's custom scheduler",
+ createSIMachineScheduler);
static std::string computeDataLayout(const Triple &TT) {
std::string Ret = "e-p:32:32";
diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt
index 30bb0e0adde8..b9ef0e821763 100644
--- a/lib/Target/AMDGPU/CMakeLists.txt
+++ b/lib/Target/AMDGPU/CMakeLists.txt
@@ -58,6 +58,7 @@ add_llvm_target(AMDGPUCodeGen
SILowerControlFlow.cpp
SILowerI1Copies.cpp
SIMachineFunctionInfo.cpp
+ SIMachineScheduler.cpp
SIRegisterInfo.cpp
SIShrinkInstructions.cpp
SITypeRewriter.cpp
diff --git a/lib/Target/AMDGPU/EvergreenInstructions.td b/lib/Target/AMDGPU/EvergreenInstructions.td
index 779a14e95d22..2245f1417e53 100644
--- a/lib/Target/AMDGPU/EvergreenInstructions.td
+++ b/lib/Target/AMDGPU/EvergreenInstructions.td
@@ -349,7 +349,7 @@ def BCNT_INT : R600_1OP_Helper <0xAA, "BCNT_INT", ctpop, VecALU>;
def ADDC_UINT : R600_2OP_Helper <0x52, "ADDC_UINT", AMDGPUcarry>;
def SUBB_UINT : R600_2OP_Helper <0x53, "SUBB_UINT", AMDGPUborrow>;
-def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", ctlz_zero_undef, VecALU>;
+def FFBH_UINT : R600_1OP_Helper <0xAB, "FFBH_UINT", AMDGPUffbh_u32, VecALU>;
def FFBL_INT : R600_1OP_Helper <0xAC, "FFBL_INT", cttz_zero_undef, VecALU>;
let hasSideEffects = 1 in {
diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
index 68b1d1ae83cc..4bc80a028936 100644
--- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
+++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCAsmInfo.cpp
@@ -28,7 +28,6 @@ AMDGPUMCAsmInfo::AMDGPUMCAsmInfo(const Triple &TT) : MCAsmInfoELF() {
//===--- Global Variable Emission Directives --------------------------===//
HasAggressiveSymbolFolding = true;
COMMDirectiveAlignmentIsInBytes = false;
- HasDotTypeDotSizeDirective = false;
HasNoDeadStrip = true;
WeakRefDirective = ".weakref\t";
//===--- Dwarf Emission Directives -----------------------------------===//
diff --git a/lib/Target/AMDGPU/SIDefines.h b/lib/Target/AMDGPU/SIDefines.h
index 7f79dd34f3ba..aa1e352ed748 100644
--- a/lib/Target/AMDGPU/SIDefines.h
+++ b/lib/Target/AMDGPU/SIDefines.h
@@ -137,7 +137,7 @@ namespace SIOutMods {
#define C_00B84C_EXCP_EN
#define R_0286CC_SPI_PS_INPUT_ENA 0x0286CC
-
+#define R_0286D0_SPI_PS_INPUT_ADDR 0x0286D0
#define R_00B848_COMPUTE_PGM_RSRC1 0x00B848
#define S_00B848_VGPRS(x) (((x) & 0x3F) << 0)
diff --git a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
index 96e37c566240..f59d9948f98e 100644
--- a/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
+++ b/lib/Target/AMDGPU/SIFixSGPRCopies.cpp
@@ -215,7 +215,7 @@ static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI,
for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) {
unsigned SrcReg = MI.getOperand(I).getReg();
- unsigned SrcSubReg = MI.getOperand(I).getReg();
+ unsigned SrcSubReg = MI.getOperand(I).getSubReg();
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
assert(TRI->isSGPRClass(SrcRC) &&
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index 02a39307e74e..6230d1e28b74 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -334,12 +334,20 @@ bool SIFoldOperands::runOnMachineFunction(MachineFunction &MF) {
!MRI.hasOneUse(MI.getOperand(0).getReg()))
continue;
- // FIXME: Fold operands with subregs.
if (OpToFold.isReg() &&
- (!TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()) ||
- OpToFold.getSubReg()))
+ !TargetRegisterInfo::isVirtualRegister(OpToFold.getReg()))
continue;
+ // Prevent folding operands backwards in the function. For example,
+ // the COPY opcode must not be replaced by 1 in this example:
+ //
+ // %vreg3<def> = COPY %VGPR0; VGPR_32:%vreg3
+ // ...
+ // %VGPR0<def> = V_MOV_B32_e32 1, %EXEC<imp-use>
+ MachineOperand &Dst = MI.getOperand(0);
+ if (Dst.isReg() &&
+ !TargetRegisterInfo::isVirtualRegister(Dst.getReg()))
+ continue;
// We need mutate the operands of new mov instructions to add implicit
// uses of EXEC, but adding them invalidates the use_iterator, so defer
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 0e043cb47da7..544867513d9c 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -259,7 +259,6 @@ SITargetLowering::SITargetLowering(TargetMachine &TM,
setTargetDAGCombine(ISD::SMAX);
setTargetDAGCombine(ISD::UMIN);
setTargetDAGCombine(ISD::UMAX);
- setTargetDAGCombine(ISD::SELECT_CC);
setTargetDAGCombine(ISD::SETCC);
setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::OR);
@@ -598,18 +597,20 @@ SDValue SITargetLowering::LowerFormalArguments(
// First check if it's a PS input addr
if (Info->getShaderType() == ShaderType::PIXEL && !Arg.Flags.isInReg() &&
- !Arg.Flags.isByVal()) {
+ !Arg.Flags.isByVal() && PSInputNum <= 15) {
- assert((PSInputNum <= 15) && "Too many PS inputs!");
-
- if (!Arg.Used) {
+ if (!Arg.Used && !Info->isPSInputAllocated(PSInputNum)) {
// We can safely skip PS inputs
Skipped.set(i);
++PSInputNum;
continue;
}
- Info->PSInputAddr |= 1 << PSInputNum++;
+ Info->markPSInputAllocated(PSInputNum);
+ if (Arg.Used)
+ Info->PSInputEna |= 1 << PSInputNum;
+
+ ++PSInputNum;
}
// Second split vertices into their elements
@@ -639,11 +640,25 @@ SDValue SITargetLowering::LowerFormalArguments(
*DAG.getContext());
// At least one interpolation mode must be enabled or else the GPU will hang.
+ //
+ // Check PSInputAddr instead of PSInputEna. The idea is that if the user set
+ // PSInputAddr, the user wants to enable some bits after the compilation
+ // based on run-time states. Since we can't know what the final PSInputEna
+ // will look like, so we shouldn't do anything here and the user should take
+ // responsibility for the correct programming.
+ //
+ // Otherwise, the following restrictions apply:
+ // - At least one of PERSP_* (0xF) or LINEAR_* (0x70) must be enabled.
+ // - If POS_W_FLOAT (11) is enabled, at least one of PERSP_* must be
+ // enabled too.
if (Info->getShaderType() == ShaderType::PIXEL &&
- (Info->PSInputAddr & 0x7F) == 0) {
- Info->PSInputAddr |= 1;
+ ((Info->getPSInputAddr() & 0x7F) == 0 ||
+ ((Info->getPSInputAddr() & 0xF) == 0 &&
+ Info->isPSInputAllocated(11)))) {
CCInfo.AllocateReg(AMDGPU::VGPR0);
CCInfo.AllocateReg(AMDGPU::VGPR1);
+ Info->markPSInputAllocated(0);
+ Info->PSInputEna |= 1;
}
if (Info->getShaderType() == ShaderType::COMPUTE) {
@@ -872,6 +887,97 @@ SDValue SITargetLowering::LowerFormalArguments(
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
}
+SDValue SITargetLowering::LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc DL, SelectionDAG &DAG) const {
+ MachineFunction &MF = DAG.getMachineFunction();
+ SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>();
+
+ if (Info->getShaderType() == ShaderType::COMPUTE)
+ return AMDGPUTargetLowering::LowerReturn(Chain, CallConv, isVarArg, Outs,
+ OutVals, DL, DAG);
+
+ Info->setIfReturnsVoid(Outs.size() == 0);
+
+ SmallVector<ISD::OutputArg, 48> Splits;
+ SmallVector<SDValue, 48> SplitVals;
+
+ // Split vectors into their elements.
+ for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
+ const ISD::OutputArg &Out = Outs[i];
+
+ if (Out.VT.isVector()) {
+ MVT VT = Out.VT.getVectorElementType();
+ ISD::OutputArg NewOut = Out;
+ NewOut.Flags.setSplit();
+ NewOut.VT = VT;
+
+ // We want the original number of vector elements here, e.g.
+ // three or five, not four or eight.
+ unsigned NumElements = Out.ArgVT.getVectorNumElements();
+
+ for (unsigned j = 0; j != NumElements; ++j) {
+ SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, OutVals[i],
+ DAG.getConstant(j, DL, MVT::i32));
+ SplitVals.push_back(Elem);
+ Splits.push_back(NewOut);
+ NewOut.PartOffset += NewOut.VT.getStoreSize();
+ }
+ } else {
+ SplitVals.push_back(OutVals[i]);
+ Splits.push_back(Out);
+ }
+ }
+
+ // CCValAssign - represent the assignment of the return value to a location.
+ SmallVector<CCValAssign, 48> RVLocs;
+
+ // CCState - Info about the registers and stack slots.
+ CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+
+ // Analyze outgoing return values.
+ AnalyzeReturn(CCInfo, Splits);
+
+ SDValue Flag;
+ SmallVector<SDValue, 48> RetOps;
+ RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0, realRVLocIdx = 0;
+ i != RVLocs.size();
+ ++i, ++realRVLocIdx) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ SDValue Arg = SplitVals[realRVLocIdx];
+
+ // Copied from other backends.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BITCAST, DL, VA.getLocVT(), Arg);
+ break;
+ }
+
+ Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Arg, Flag);
+ Flag = Chain.getValue(1);
+ RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
+ }
+
+ // Update chain and glue.
+ RetOps[0] = Chain;
+ if (Flag.getNode())
+ RetOps.push_back(Flag);
+
+ return DAG.getNode(AMDGPUISD::RET_FLAG, DL, MVT::Other, RetOps);
+}
+
MachineBasicBlock * SITargetLowering::EmitInstrWithCustomInserter(
MachineInstr * MI, MachineBasicBlock * BB) const {
@@ -1158,6 +1264,13 @@ SDValue SITargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
switch (IntrinsicID) {
case Intrinsic::amdgcn_dispatch_ptr:
+ if (!Subtarget->isAmdHsaOS()) {
+ DiagnosticInfoUnsupported BadIntrin(*MF.getFunction(),
+ "hsa intrinsic without hsa target");
+ DAG.getContext()->diagnose(BadIntrin);
+ return DAG.getUNDEF(VT);
+ }
+
return CreateLiveInRegister(DAG, &AMDGPU::SReg_64RegClass,
TRI->getPreloadedValue(MF, SIRegisterInfo::DISPATCH_PTR), VT);
@@ -2027,7 +2140,7 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
case ISD::UINT_TO_FP: {
return performUCharToFloatCombine(N, DCI);
-
+ }
case ISD::FADD: {
if (DCI.getDAGCombineLevel() < AfterLegalizeDAG)
break;
@@ -2109,7 +2222,6 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
break;
}
- }
case ISD::LOAD:
case ISD::STORE:
case ISD::ATOMIC_LOAD:
diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h
index e2f8cb19d6be..f01b2c0d09f3 100644
--- a/lib/Target/AMDGPU/SIISelLowering.h
+++ b/lib/Target/AMDGPU/SIISelLowering.h
@@ -95,6 +95,13 @@ public:
SDLoc DL, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv,
+ bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<SDValue> &OutVals,
+ SDLoc DL, SelectionDAG &DAG) const override;
+
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr * MI,
MachineBasicBlock * BB) const override;
bool enableAggressiveFMAFusion(EVT VT) const override;
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index 821aada526c7..94e614750d2f 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -84,6 +84,9 @@ private:
bool LastInstWritesM0;
+ /// \brief Whether the machine function returns void
+ bool ReturnsVoid;
+
/// \brief Get increment/decrement amount for this instruction.
Counters getHwCounts(MachineInstr &MI);
@@ -322,7 +325,9 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
const Counters &Required) {
// End of program? No need to wait on anything
- if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+ // A function not returning void needs to wait, because other bytecode will
+ // be appended after it and we don't know what it will be.
+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM && ReturnsVoid)
return false;
// Figure out if the async instructions execute in order
@@ -465,6 +470,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
LastIssued = ZeroCounts;
LastOpcodeType = OTHER;
LastInstWritesM0 = false;
+ ReturnsVoid = MF.getInfo<SIMachineFunctionInfo>()->returnsVoid();
memset(&UsedRegs, 0, sizeof(UsedRegs));
memset(&DefinedRegs, 0, sizeof(DefinedRegs));
@@ -488,6 +494,14 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// Wait for everything at the end of the MBB
Changes |= insertWait(MBB, MBB.getFirstTerminator(), LastIssued);
+
+ // Functions returning something shouldn't contain S_ENDPGM, because other
+ // bytecode will be appended after it.
+ if (!ReturnsVoid) {
+ MachineBasicBlock::iterator I = MBB.getFirstTerminator();
+ if (I != MBB.end() && I->getOpcode() == AMDGPU::S_ENDPGM)
+ I->eraseFromParent();
+ }
}
return Changes;
diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp
index a08a5a8fed36..1e10d25e8fb7 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -1777,6 +1777,10 @@ bool SIInstrInfo::isLegalRegOperand(const MachineRegisterInfo &MRI,
MRI.getRegClass(Reg) :
RI.getPhysRegClass(Reg);
+ const SIRegisterInfo *TRI =
+ static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
+ RC = TRI->getSubRegClass(RC, MO.getSubReg());
+
// In order to be legal, the common sub-class must be equal to the
// class of the current operand. For example:
//
@@ -3075,3 +3079,15 @@ uint64_t SIInstrInfo::getScratchRsrcWords23() const {
return Rsrc23;
}
+
+bool SIInstrInfo::isLowLatencyInstruction(const MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+
+ return isSMRD(Opc);
+}
+
+bool SIInstrInfo::isHighLatencyInstruction(const MachineInstr *MI) const {
+ unsigned Opc = MI->getOpcode();
+
+ return isMUBUF(Opc) || isMTBUF(Opc) || isMIMG(Opc);
+}
diff --git a/lib/Target/AMDGPU/SIInstrInfo.h b/lib/Target/AMDGPU/SIInstrInfo.h
index 307ef67ed263..cce1ae725611 100644
--- a/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/lib/Target/AMDGPU/SIInstrInfo.h
@@ -462,6 +462,9 @@ public:
uint64_t getDefaultRsrcDataFormat() const;
uint64_t getScratchRsrcWords23() const;
+
+ bool isLowLatencyInstruction(const MachineInstr *MI) const;
+ bool isHighLatencyInstruction(const MachineInstr *MI) const;
};
namespace AMDGPU {
diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
index b7df058b7c0c..89692ab71f4d 100644
--- a/lib/Target/AMDGPU/SIInstructions.td
+++ b/lib/Target/AMDGPU/SIInstructions.td
@@ -144,7 +144,7 @@ defm S_FF1_I32_B32 : SOP1_32 <sop1<0x13, 0x10>, "s_ff1_i32_b32",
defm S_FF1_I32_B64 : SOP1_32_64 <sop1<0x14, 0x11>, "s_ff1_i32_b64", []>;
defm S_FLBIT_I32_B32 : SOP1_32 <sop1<0x15, 0x12>, "s_flbit_i32_b32",
- [(set i32:$dst, (ctlz_zero_undef i32:$src0))]
+ [(set i32:$dst, (AMDGPUffbh_u32 i32:$src0))]
>;
defm S_FLBIT_I32_B64 : SOP1_32_64 <sop1<0x16, 0x13>, "s_flbit_i32_b64", []>;
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
index bf15516bea7b..49677fc2b0a3 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
@@ -46,8 +46,10 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
WorkGroupIDZSystemSGPR(AMDGPU::NoRegister),
WorkGroupInfoSystemSGPR(AMDGPU::NoRegister),
PrivateSegmentWaveByteOffsetSystemSGPR(AMDGPU::NoRegister),
- LDSWaveSpillSize(0),
PSInputAddr(0),
+ ReturnsVoid(true),
+ LDSWaveSpillSize(0),
+ PSInputEna(0),
NumUserSGPRs(0),
NumSystemSGPRs(0),
HasSpilledSGPRs(false),
@@ -72,6 +74,8 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
const Function *F = MF.getFunction();
+ PSInputAddr = AMDGPU::getInitialPSInputAddr(*F);
+
const MachineFrameInfo *FrameInfo = MF.getFrameInfo();
if (getShaderType() == ShaderType::COMPUTE)
diff --git a/lib/Target/AMDGPU/SIMachineFunctionInfo.h b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
index 9c528d63bd0e..846ee5de057d 100644
--- a/lib/Target/AMDGPU/SIMachineFunctionInfo.h
+++ b/lib/Target/AMDGPU/SIMachineFunctionInfo.h
@@ -57,10 +57,14 @@ class SIMachineFunctionInfo : public AMDGPUMachineFunction {
unsigned WorkGroupInfoSystemSGPR;
unsigned PrivateSegmentWaveByteOffsetSystemSGPR;
+ // Graphics info.
+ unsigned PSInputAddr;
+ bool ReturnsVoid;
+
public:
// FIXME: Make private
unsigned LDSWaveSpillSize;
- unsigned PSInputAddr;
+ unsigned PSInputEna;
std::map<unsigned, unsigned> LaneVGPRs;
unsigned ScratchOffsetReg;
unsigned NumUserSGPRs;
@@ -273,6 +277,26 @@ public:
HasSpilledVGPRs = Spill;
}
+ unsigned getPSInputAddr() const {
+ return PSInputAddr;
+ }
+
+ bool isPSInputAllocated(unsigned Index) const {
+ return PSInputAddr & (1 << Index);
+ }
+
+ void markPSInputAllocated(unsigned Index) {
+ PSInputAddr |= 1 << Index;
+ }
+
+ bool returnsVoid() const {
+ return ReturnsVoid;
+ }
+
+ void setIfReturnsVoid(bool Value) {
+ ReturnsVoid = Value;
+ }
+
unsigned getMaximumWorkGroupSize(const MachineFunction &MF) const;
};
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.cpp b/lib/Target/AMDGPU/SIMachineScheduler.cpp
new file mode 100644
index 000000000000..1cfa98430020
--- /dev/null
+++ b/lib/Target/AMDGPU/SIMachineScheduler.cpp
@@ -0,0 +1,1968 @@
+//===-- SIMachineScheduler.cpp - SI Scheduler Interface -*- C++ -*-----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#include "SIMachineScheduler.h"
+#include "AMDGPUSubtarget.h"
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "misched"
+
+// This scheduler implements a different scheduling algorithm than
+// GenericScheduler.
+//
+// There are several specific architecture behaviours that can't be modelled
+// for GenericScheduler:
+// . When accessing the result of an SGPR load instruction, you have to wait
+// for all the SGPR load instructions before your current instruction to
+// have finished.
+// . When accessing the result of an VGPR load instruction, you have to wait
+// for all the VGPR load instructions previous to the VGPR load instruction
+// you are interested in to finish.
+// . The less the register pressure, the best load latencies are hidden
+//
+// Moreover some specifities (like the fact a lot of instructions in the shader
+// have few dependencies) makes the generic scheduler have some unpredictable
+// behaviours. For example when register pressure becomes high, it can either
+// manage to prevent register pressure from going too high, or it can
+// increase register pressure even more than if it hadn't taken register
+// pressure into account.
+//
+// Also some other bad behaviours are generated, like loading at the beginning
+// of the shader a constant in VGPR you won't need until the end of the shader.
+//
+// The scheduling problem for SI can distinguish three main parts:
+// . Hiding high latencies (texture sampling, etc)
+// . Hiding low latencies (SGPR constant loading, etc)
+// . Keeping register usage low for better latency hiding and general
+// performance
+//
+// Some other things can also affect performance, but are hard to predict
+// (cache usage, the fact the HW can issue several instructions from different
+// wavefronts if different types, etc)
+//
+// This scheduler tries to solve the scheduling problem by dividing it into
+// simpler sub-problems. It divides the instructions into blocks, schedules
+// locally inside the blocks where it takes care of low latencies, and then
+// chooses the order of the blocks by taking care of high latencies.
+// Dividing the instructions into blocks helps control keeping register
+// usage low.
+//
+// First the instructions are put into blocks.
+// We want the blocks help control register usage and hide high latencies
+// later. To help control register usage, we typically want all local
+// computations, when for example you create a result that can be comsummed
+// right away, to be contained in a block. Block inputs and outputs would
+// typically be important results that are needed in several locations of
+// the shader. Since we do want blocks to help hide high latencies, we want
+// the instructions inside the block to have a minimal set of dependencies
+// on high latencies. It will make it easy to pick blocks to hide specific
+// high latencies.
+// The block creation algorithm is divided into several steps, and several
+// variants can be tried during the scheduling process.
+//
+// Second the order of the instructions inside the blocks is choosen.
+// At that step we do take into account only register usage and hiding
+// low latency instructions
+//
+// Third the block order is choosen, there we try to hide high latencies
+// and keep register usage low.
+//
+// After the third step, a pass is done to improve the hiding of low
+// latencies.
+//
+// Actually when talking about 'low latency' or 'high latency' it includes
+// both the latency to get the cache (or global mem) data go to the register,
+// and the bandwith limitations.
+// Increasing the number of active wavefronts helps hide the former, but it
+// doesn't solve the latter, thus why even if wavefront count is high, we have
+// to try have as many instructions hiding high latencies as possible.
+// The OpenCL doc says for example latency of 400 cycles for a global mem access,
+// which is hidden by 10 instructions if the wavefront count is 10.
+
+// Some figures taken from AMD docs:
+// Both texture and constant L1 caches are 4-way associative with 64 bytes
+// lines.
+// Constant cache is shared with 4 CUs.
+// For texture sampling, the address generation unit receives 4 texture
+// addresses per cycle, thus we could expect texture sampling latency to be
+// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
+// instructions in a wavefront group are executed every 4 cycles),
+// or 16 instructions if the other wavefronts associated to the 3 other VALUs
+// of the CU do texture sampling too. (Don't take these figures too seriously,
+// as I'm not 100% sure of the computation)
+// Data exports should get similar latency.
+// For constant loading, the cache is shader with 4 CUs.
+// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
+// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
+// It means a simple s_buffer_load should take one instruction to hide, as
+// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
+// cache line.
+//
+// As of today the driver doesn't preload the constants in cache, thus the
+// first loads get extra latency. The doc says global memory access can be
+// 300-600 cycles. We do not specially take that into account when scheduling
+// As we expect the driver to be able to preload the constants soon.
+
+
+// common code //
+
+#ifndef NDEBUG
+
+static const char *getReasonStr(SIScheduleCandReason Reason) {
+ switch (Reason) {
+ case NoCand: return "NOCAND";
+ case RegUsage: return "REGUSAGE";
+ case Latency: return "LATENCY";
+ case Successor: return "SUCCESSOR";
+ case Depth: return "DEPTH";
+ case NodeOrder: return "ORDER";
+ }
+ llvm_unreachable("Unknown reason!");
+}
+
+#endif
+
+static bool tryLess(int TryVal, int CandVal,
+ SISchedulerCandidate &TryCand,
+ SISchedulerCandidate &Cand,
+ SIScheduleCandReason Reason) {
+ if (TryVal < CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal > CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ Cand.setRepeat(Reason);
+ return false;
+}
+
+static bool tryGreater(int TryVal, int CandVal,
+ SISchedulerCandidate &TryCand,
+ SISchedulerCandidate &Cand,
+ SIScheduleCandReason Reason) {
+ if (TryVal > CandVal) {
+ TryCand.Reason = Reason;
+ return true;
+ }
+ if (TryVal < CandVal) {
+ if (Cand.Reason > Reason)
+ Cand.Reason = Reason;
+ return true;
+ }
+ Cand.setRepeat(Reason);
+ return false;
+}
+
+// SIScheduleBlock //
+
+void SIScheduleBlock::addUnit(SUnit *SU) {
+ NodeNum2Index[SU->NodeNum] = SUnits.size();
+ SUnits.push_back(SU);
+}
+
+#ifndef NDEBUG
+
+void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
+
+ dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
+ dbgs() << '\n';
+}
+#endif
+
+void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
+ SISchedCandidate &TryCand) {
+ // Initialize the candidate if needed.
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return;
+ }
+
+ if (Cand.SGPRUsage > 60 &&
+ tryLess(TryCand.SGPRUsage, Cand.SGPRUsage, TryCand, Cand, RegUsage))
+ return;
+
+ // Schedule low latency instructions as top as possible.
+ // Order of priority is:
+ // . Low latency instructions which do not depend on other low latency
+ // instructions we haven't waited for
+ // . Other instructions which do not depend on low latency instructions
+ // we haven't waited for
+ // . Low latencies
+ // . All other instructions
+ // Goal is to get: low latency instructions - independant instructions
+ // - (eventually some more low latency instructions)
+ // - instructions that depend on the first low latency instructions.
+ // If in the block there is a lot of constant loads, the SGPR usage
+ // could go quite high, thus above the arbitrary limit of 60 will encourage
+ // use the already loaded constants (in order to release some SGPRs) before
+ // loading more.
+ if (tryLess(TryCand.HasLowLatencyNonWaitedParent,
+ Cand.HasLowLatencyNonWaitedParent,
+ TryCand, Cand, SIScheduleCandReason::Depth))
+ return;
+
+ if (tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
+ TryCand, Cand, SIScheduleCandReason::Depth))
+ return;
+
+ if (TryCand.IsLowLatency &&
+ tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
+ TryCand, Cand, SIScheduleCandReason::Depth))
+ return;
+
+ if (tryLess(TryCand.VGPRUsage, Cand.VGPRUsage, TryCand, Cand, RegUsage))
+ return;
+
+ // Fall through to original instruction order.
+ if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
+ TryCand.Reason = NodeOrder;
+ }
+}
+
+SUnit* SIScheduleBlock::pickNode() {
+ SISchedCandidate TopCand;
+
+ for (SUnit* SU : TopReadySUs) {
+ SISchedCandidate TryCand;
+ std::vector<unsigned> pressure;
+ std::vector<unsigned> MaxPressure;
+ // Predict register usage after this instruction.
+ TryCand.SU = SU;
+ TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
+ TryCand.SGPRUsage = pressure[DAG->getSGPRSetID()];
+ TryCand.VGPRUsage = pressure[DAG->getVGPRSetID()];
+ TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
+ TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
+ TryCand.HasLowLatencyNonWaitedParent =
+ HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
+ tryCandidateTopDown(TopCand, TryCand);
+ if (TryCand.Reason != NoCand)
+ TopCand.setBest(TryCand);
+ }
+
+ return TopCand.SU;
+}
+
+
+// Schedule something valid.
+void SIScheduleBlock::fastSchedule() {
+ TopReadySUs.clear();
+ if (Scheduled)
+ undoSchedule();
+
+ for (SUnit* SU : SUnits) {
+ if (!SU->NumPredsLeft)
+ TopReadySUs.push_back(SU);
+ }
+
+ while (!TopReadySUs.empty()) {
+ SUnit *SU = TopReadySUs[0];
+ ScheduledSUnits.push_back(SU);
+ nodeScheduled(SU);
+ }
+
+ Scheduled = true;
+}
+
+// Returns if the register was set between first and last.
+static bool isDefBetween(unsigned Reg,
+ SlotIndex First, SlotIndex Last,
+ const MachineRegisterInfo *MRI,
+ const LiveIntervals *LIS) {
+ for (MachineRegisterInfo::def_instr_iterator
+ UI = MRI->def_instr_begin(Reg),
+ UE = MRI->def_instr_end(); UI != UE; ++UI) {
+ const MachineInstr* MI = &*UI;
+ if (MI->isDebugValue())
+ continue;
+ SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot();
+ if (InstSlot >= First && InstSlot <= Last)
+ return true;
+ }
+ return false;
+}
+
+void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
+ MachineBasicBlock::iterator EndBlock) {
+ IntervalPressure Pressure, BotPressure;
+ RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
+ LiveIntervals *LIS = DAG->getLIS();
+ MachineRegisterInfo *MRI = DAG->getMRI();
+ DAG->initRPTracker(TopRPTracker);
+ DAG->initRPTracker(BotRPTracker);
+ DAG->initRPTracker(RPTracker);
+
+ // Goes though all SU. RPTracker captures what had to be alive for the SUs
+ // to execute, and what is still alive at the end.
+ for (SUnit* SU : ScheduledSUnits) {
+ RPTracker.setPos(SU->getInstr());
+ RPTracker.advance();
+ }
+
+ // Close the RPTracker to finalize live ins/outs.
+ RPTracker.closeRegion();
+
+ // Initialize the live ins and live outs.
+ TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
+ BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
+
+ // Do not Track Physical Registers, because it messes up.
+ for (unsigned Reg : RPTracker.getPressure().LiveInRegs) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ LiveInRegs.insert(Reg);
+ }
+ LiveOutRegs.clear();
+ // There is several possibilities to distinguish:
+ // 1) Reg is not input to any instruction in the block, but is output of one
+ // 2) 1) + read in the block and not needed after it
+ // 3) 1) + read in the block but needed in another block
+ // 4) Reg is input of an instruction but another block will read it too
+ // 5) Reg is input of an instruction and then rewritten in the block.
+ // result is not read in the block (implies used in another block)
+ // 6) Reg is input of an instruction and then rewritten in the block.
+ // result is read in the block and not needed in another block
+ // 7) Reg is input of an instruction and then rewritten in the block.
+ // result is read in the block but also needed in another block
+ // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
+ // We want LiveOutRegs to contain only Regs whose content will be read after
+ // in another block, and whose content was written in the current block,
+ // that is we want it to get 1, 3, 5, 7
+ // Since we made the MIs of a block to be packed all together before
+ // scheduling, then the LiveIntervals were correct, and the RPTracker was
+ // able to correctly handle 5 vs 6, 2 vs 3.
+ // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
+ // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
+ // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
+ // The use of findDefBetween removes the case 4.
+ for (unsigned Reg : RPTracker.getPressure().LiveOutRegs) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ isDefBetween(Reg, LIS->getInstructionIndex(BeginBlock).getRegSlot(),
+ LIS->getInstructionIndex(EndBlock).getRegSlot(),
+ MRI, LIS)) {
+ LiveOutRegs.insert(Reg);
+ }
+ }
+
+ // Pressure = sum_alive_registers register size
+ // Internally llvm will represent some registers as big 128 bits registers
+ // for example, but they actually correspond to 4 actual 32 bits registers.
+ // Thus Pressure is not equal to num_alive_registers * constant.
+ LiveInPressure = TopPressure.MaxSetPressure;
+ LiveOutPressure = BotPressure.MaxSetPressure;
+
+ // Prepares TopRPTracker for top down scheduling.
+ TopRPTracker.closeTop();
+}
+
+void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
+ MachineBasicBlock::iterator EndBlock) {
+ if (!Scheduled)
+ fastSchedule();
+
+ // PreScheduling phase to set LiveIn and LiveOut.
+ initRegPressure(BeginBlock, EndBlock);
+ undoSchedule();
+
+ // Schedule for real now.
+
+ TopReadySUs.clear();
+
+ for (SUnit* SU : SUnits) {
+ if (!SU->NumPredsLeft)
+ TopReadySUs.push_back(SU);
+ }
+
+ while (!TopReadySUs.empty()) {
+ SUnit *SU = pickNode();
+ ScheduledSUnits.push_back(SU);
+ TopRPTracker.setPos(SU->getInstr());
+ TopRPTracker.advance();
+ nodeScheduled(SU);
+ }
+
+ // TODO: compute InternalAdditionnalPressure.
+ InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
+
+ // Check everything is right.
+#ifndef NDEBUG
+ assert(SUnits.size() == ScheduledSUnits.size() &&
+ TopReadySUs.empty());
+ for (SUnit* SU : SUnits) {
+ assert(SU->isScheduled &&
+ SU->NumPredsLeft == 0);
+ }
+#endif
+
+ Scheduled = true;
+}
+
+void SIScheduleBlock::undoSchedule() {
+ for (SUnit* SU : SUnits) {
+ SU->isScheduled = false;
+ for (SDep& Succ : SU->Succs) {
+ if (BC->isSUInBlock(Succ.getSUnit(), ID))
+ undoReleaseSucc(SU, &Succ);
+ }
+ }
+ HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
+ ScheduledSUnits.clear();
+ Scheduled = false;
+}
+
+void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ ++SuccSU->WeakPredsLeft;
+ return;
+ }
+ ++SuccSU->NumPredsLeft;
+}
+
+void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
+ SUnit *SuccSU = SuccEdge->getSUnit();
+
+ if (SuccEdge->isWeak()) {
+ --SuccSU->WeakPredsLeft;
+ return;
+ }
+#ifndef NDEBUG
+ if (SuccSU->NumPredsLeft == 0) {
+ dbgs() << "*** Scheduling failed! ***\n";
+ SuccSU->dump(DAG);
+ dbgs() << " has been released too many times!\n";
+ llvm_unreachable(nullptr);
+ }
+#endif
+
+ --SuccSU->NumPredsLeft;
+}
+
+/// Release Successors of the SU that are in the block or not.
+void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
+ for (SDep& Succ : SU->Succs) {
+ SUnit *SuccSU = Succ.getSUnit();
+
+ if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
+ continue;
+
+ releaseSucc(SU, &Succ);
+ if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
+ TopReadySUs.push_back(SuccSU);
+ }
+}
+
+void SIScheduleBlock::nodeScheduled(SUnit *SU) {
+ // Is in TopReadySUs
+ assert (!SU->NumPredsLeft);
+ std::vector<SUnit*>::iterator I =
+ std::find(TopReadySUs.begin(), TopReadySUs.end(), SU);
+ if (I == TopReadySUs.end()) {
+ dbgs() << "Data Structure Bug in SI Scheduler\n";
+ llvm_unreachable(nullptr);
+ }
+ TopReadySUs.erase(I);
+
+ releaseSuccessors(SU, true);
+ // Scheduling this node will trigger a wait,
+ // thus propagate to other instructions that they do not need to wait either.
+ if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
+ HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
+
+ if (DAG->IsLowLatencySU[SU->NodeNum]) {
+ for (SDep& Succ : SU->Succs) {
+ std::map<unsigned, unsigned>::iterator I =
+ NodeNum2Index.find(Succ.getSUnit()->NodeNum);
+ if (I != NodeNum2Index.end())
+ HasLowLatencyNonWaitedParent[I->second] = 1;
+ }
+ }
+ SU->isScheduled = true;
+}
+
+void SIScheduleBlock::finalizeUnits() {
+ // We remove links from outside blocks to enable scheduling inside the block.
+ for (SUnit* SU : SUnits) {
+ releaseSuccessors(SU, false);
+ if (DAG->IsHighLatencySU[SU->NodeNum])
+ HighLatencyBlock = true;
+ }
+ HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
+}
+
+// we maintain ascending order of IDs
+void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
+ unsigned PredID = Pred->getID();
+
+ // Check if not already predecessor.
+ for (SIScheduleBlock* P : Preds) {
+ if (PredID == P->getID())
+ return;
+ }
+ Preds.push_back(Pred);
+
+#ifndef NDEBUG
+ for (SIScheduleBlock* S : Succs) {
+ if (PredID == S->getID())
+ assert(!"Loop in the Block Graph!\n");
+ }
+#endif
+}
+
+void SIScheduleBlock::addSucc(SIScheduleBlock *Succ) {
+ unsigned SuccID = Succ->getID();
+
+ // Check if not already predecessor.
+ for (SIScheduleBlock* S : Succs) {
+ if (SuccID == S->getID())
+ return;
+ }
+ if (Succ->isHighLatencyBlock())
+ ++NumHighLatencySuccessors;
+ Succs.push_back(Succ);
+#ifndef NDEBUG
+ for (SIScheduleBlock* P : Preds) {
+ if (SuccID == P->getID())
+ assert("Loop in the Block Graph!\n");
+ }
+#endif
+}
+
+#ifndef NDEBUG
+void SIScheduleBlock::printDebug(bool full) {
+ dbgs() << "Block (" << ID << ")\n";
+ if (!full)
+ return;
+
+ dbgs() << "\nContains High Latency Instruction: "
+ << HighLatencyBlock << '\n';
+ dbgs() << "\nDepends On:\n";
+ for (SIScheduleBlock* P : Preds) {
+ P->printDebug(false);
+ }
+
+ dbgs() << "\nSuccessors:\n";
+ for (SIScheduleBlock* S : Succs) {
+ S->printDebug(false);
+ }
+
+ if (Scheduled) {
+ dbgs() << "LiveInPressure " << LiveInPressure[DAG->getSGPRSetID()] << ' '
+ << LiveInPressure[DAG->getVGPRSetID()] << '\n';
+ dbgs() << "LiveOutPressure " << LiveOutPressure[DAG->getSGPRSetID()] << ' '
+ << LiveOutPressure[DAG->getVGPRSetID()] << "\n\n";
+ dbgs() << "LiveIns:\n";
+ for (unsigned Reg : LiveInRegs)
+ dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+
+ dbgs() << "\nLiveOuts:\n";
+ for (unsigned Reg : LiveOutRegs)
+ dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+ }
+
+ dbgs() << "\nInstructions:\n";
+ if (!Scheduled) {
+ for (SUnit* SU : SUnits) {
+ SU->dump(DAG);
+ }
+ } else {
+ for (SUnit* SU : SUnits) {
+ SU->dump(DAG);
+ }
+ }
+
+ dbgs() << "///////////////////////\n";
+}
+
+#endif
+
+// SIScheduleBlockCreator //
+
+SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG) :
+DAG(DAG) {
+}
+
+SIScheduleBlockCreator::~SIScheduleBlockCreator() {
+}
+
+SIScheduleBlocks
+SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
+ std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
+ Blocks.find(BlockVariant);
+ if (B == Blocks.end()) {
+ SIScheduleBlocks Res;
+ createBlocksForVariant(BlockVariant);
+ topologicalSort();
+ scheduleInsideBlocks();
+ fillStats();
+ Res.Blocks = CurrentBlocks;
+ Res.TopDownIndex2Block = TopDownIndex2Block;
+ Res.TopDownBlock2Index = TopDownBlock2Index;
+ Blocks[BlockVariant] = Res;
+ return Res;
+ } else {
+ return B->second;
+ }
+}
+
+bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
+ if (SU->NodeNum >= DAG->SUnits.size())
+ return false;
+ return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
+}
+
+void SIScheduleBlockCreator::colorHighLatenciesAlone() {
+ unsigned DAGSize = DAG->SUnits.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ if (DAG->IsHighLatencySU[SU->NodeNum]) {
+ CurrentColoring[SU->NodeNum] = NextReservedID++;
+ }
+ }
+}
+
+void SIScheduleBlockCreator::colorHighLatenciesGroups() {
+ unsigned DAGSize = DAG->SUnits.size();
+ unsigned NumHighLatencies = 0;
+ unsigned GroupSize;
+ unsigned Color = NextReservedID;
+ unsigned Count = 0;
+ std::set<unsigned> FormingGroup;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ if (DAG->IsHighLatencySU[SU->NodeNum])
+ ++NumHighLatencies;
+ }
+
+ if (NumHighLatencies == 0)
+ return;
+
+ if (NumHighLatencies <= 6)
+ GroupSize = 2;
+ else if (NumHighLatencies <= 12)
+ GroupSize = 3;
+ else
+ GroupSize = 4;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ if (DAG->IsHighLatencySU[SU->NodeNum]) {
+ unsigned CompatibleGroup = true;
+ unsigned ProposedColor = Color;
+ for (unsigned j : FormingGroup) {
+ // TODO: Currently CompatibleGroup will always be false,
+ // because the graph enforces the load order. This
+ // can be fixed, but as keeping the load order is often
+ // good for performance that causes a performance hit (both
+ // the default scheduler and this scheduler).
+ // When this scheduler determines a good load order,
+ // this can be fixed.
+ if (!DAG->canAddEdge(SU, &DAG->SUnits[j]) ||
+ !DAG->canAddEdge(&DAG->SUnits[j], SU))
+ CompatibleGroup = false;
+ }
+ if (!CompatibleGroup || ++Count == GroupSize) {
+ FormingGroup.clear();
+ Color = ++NextReservedID;
+ if (!CompatibleGroup) {
+ ProposedColor = Color;
+ FormingGroup.insert(SU->NodeNum);
+ }
+ Count = 0;
+ } else {
+ FormingGroup.insert(SU->NodeNum);
+ }
+ CurrentColoring[SU->NodeNum] = ProposedColor;
+ }
+ }
+}
+
+void SIScheduleBlockCreator::colorComputeReservedDependencies() {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::map<std::set<unsigned>, unsigned> ColorCombinations;
+
+ CurrentTopDownReservedDependencyColoring.clear();
+ CurrentBottomUpReservedDependencyColoring.clear();
+
+ CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
+ CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
+
+ // Traverse TopDown, and give different colors to SUs depending
+ // on which combination of High Latencies they depend on.
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->TopDownIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ // Already given.
+ if (CurrentColoring[SU->NodeNum]) {
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
+ CurrentColoring[SU->NodeNum];
+ continue;
+ }
+
+ for (SDep& PredDep : SU->Preds) {
+ SUnit *Pred = PredDep.getSUnit();
+ if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
+ continue;
+ if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
+ SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
+ }
+ // Color 0 by default.
+ if (SUColors.empty())
+ continue;
+ // Same color than parents.
+ if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
+ *SUColors.begin();
+ else {
+ std::map<std::set<unsigned>, unsigned>::iterator Pos =
+ ColorCombinations.find(SUColors);
+ if (Pos != ColorCombinations.end()) {
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
+ } else {
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
+ NextNonReservedID;
+ ColorCombinations[SUColors] = NextNonReservedID++;
+ }
+ }
+ }
+
+ ColorCombinations.clear();
+
+ // Same as before, but BottomUp.
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ // Already given.
+ if (CurrentColoring[SU->NodeNum]) {
+ CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
+ CurrentColoring[SU->NodeNum];
+ continue;
+ }
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
+ SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
+ }
+ // Keep color 0.
+ if (SUColors.empty())
+ continue;
+ // Same color than parents.
+ if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
+ CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
+ *SUColors.begin();
+ else {
+ std::map<std::set<unsigned>, unsigned>::iterator Pos =
+ ColorCombinations.find(SUColors);
+ if (Pos != ColorCombinations.end()) {
+ CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
+ } else {
+ CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
+ NextNonReservedID;
+ ColorCombinations[SUColors] = NextNonReservedID++;
+ }
+ }
+ }
+}
+
+void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
+
+ // Every combination of colors given by the top down
+ // and bottom up Reserved node dependency
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ std::pair<unsigned, unsigned> SUColors;
+
+ // High latency instructions: already given.
+ if (CurrentColoring[SU->NodeNum])
+ continue;
+
+ SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
+ SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
+
+ std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
+ ColorCombinations.find(SUColors);
+ if (Pos != ColorCombinations.end()) {
+ CurrentColoring[SU->NodeNum] = Pos->second;
+ } else {
+ CurrentColoring[SU->NodeNum] = NextNonReservedID;
+ ColorCombinations[SUColors] = NextNonReservedID++;
+ }
+ }
+}
+
+void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::vector<int> PendingColoring = CurrentColoring;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+ std::set<unsigned> SUColorsPending;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
+ CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
+ CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1 && SUColorsPending.size() == 1)
+ PendingColoring[SU->NodeNum] = *SUColors.begin();
+ else // TODO: Attribute new colors depending on color
+ // combination of children.
+ PendingColoring[SU->NodeNum] = NextNonReservedID++;
+ }
+ CurrentColoring = PendingColoring;
+}
+
+
+void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
+ unsigned DAGSize = DAG->SUnits.size();
+ unsigned PreviousColor;
+ std::set<unsigned> SeenColors;
+
+ if (DAGSize <= 1)
+ return;
+
+ PreviousColor = CurrentColoring[0];
+
+ for (unsigned i = 1, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ unsigned CurrentColor = CurrentColoring[i];
+ unsigned PreviousColorSave = PreviousColor;
+ assert(i == SU->NodeNum);
+
+ if (CurrentColor != PreviousColor)
+ SeenColors.insert(PreviousColor);
+ PreviousColor = CurrentColor;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ if (SeenColors.find(CurrentColor) == SeenColors.end())
+ continue;
+
+ if (PreviousColorSave != CurrentColor)
+ CurrentColoring[i] = NextNonReservedID++;
+ else
+ CurrentColoring[i] = CurrentColoring[i-1];
+ }
+}
+
+void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
+ unsigned DAGSize = DAG->SUnits.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ // No predecessor: Vgpr constant loading.
+ // Low latency instructions usually have a predecessor (the address)
+ if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1)
+ CurrentColoring[SU->NodeNum] = *SUColors.begin();
+ }
+}
+
+void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
+ unsigned DAGSize = DAG->SUnits.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1)
+ CurrentColoring[SU->NodeNum] = *SUColors.begin();
+ }
+}
+
+void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
+ unsigned DAGSize = DAG->SUnits.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ std::set<unsigned> SUColors;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
+ CurrentColoring[SU->NodeNum] = *SUColors.begin();
+ }
+}
+
+void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::map<unsigned, unsigned> ColorCount;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ unsigned color = CurrentColoring[SU->NodeNum];
+ std::map<unsigned, unsigned>::iterator Pos = ColorCount.find(color);
+ if (Pos != ColorCount.end()) {
+ ++ColorCount[color];
+ } else {
+ ColorCount[color] = 1;
+ }
+ }
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ unsigned color = CurrentColoring[SU->NodeNum];
+ std::set<unsigned> SUColors;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ if (ColorCount[color] > 1)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ SUColors.insert(CurrentColoring[Succ->NodeNum]);
+ }
+ if (SUColors.size() == 1 && *SUColors.begin() != color) {
+ --ColorCount[color];
+ CurrentColoring[SU->NodeNum] = *SUColors.begin();
+ ++ColorCount[*SUColors.begin()];
+ }
+ }
+}
+
+void SIScheduleBlockCreator::cutHugeBlocks() {
+ // TODO
+}
+
+void SIScheduleBlockCreator::regroupNoUserInstructions() {
+ unsigned DAGSize = DAG->SUnits.size();
+ int GroupID = NextNonReservedID++;
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[DAG->BottomUpIndex2SU[i]];
+ bool hasSuccessor = false;
+
+ if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
+ continue;
+
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ hasSuccessor = true;
+ }
+ if (!hasSuccessor)
+ CurrentColoring[SU->NodeNum] = GroupID;
+ }
+}
+
+void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
+ unsigned DAGSize = DAG->SUnits.size();
+ std::map<unsigned,unsigned> RealID;
+
+ CurrentBlocks.clear();
+ CurrentColoring.clear();
+ CurrentColoring.resize(DAGSize, 0);
+ Node2CurrentBlock.clear();
+
+ // Restore links previous scheduling variant has overridden.
+ DAG->restoreSULinksLeft();
+
+ NextReservedID = 1;
+ NextNonReservedID = DAGSize + 1;
+
+ DEBUG(dbgs() << "Coloring the graph\n");
+
+ if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
+ colorHighLatenciesGroups();
+ else
+ colorHighLatenciesAlone();
+ colorComputeReservedDependencies();
+ colorAccordingToReservedDependencies();
+ colorEndsAccordingToDependencies();
+ if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
+ colorForceConsecutiveOrderInGroup();
+ regroupNoUserInstructions();
+ colorMergeConstantLoadsNextGroup();
+ colorMergeIfPossibleNextGroupOnlyForReserved();
+
+ // Put SUs of same color into same block
+ Node2CurrentBlock.resize(DAGSize, -1);
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ unsigned Color = CurrentColoring[SU->NodeNum];
+ if (RealID.find(Color) == RealID.end()) {
+ int ID = CurrentBlocks.size();
+ BlockPtrs.push_back(
+ make_unique<SIScheduleBlock>(DAG, this, ID));
+ CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
+ RealID[Color] = ID;
+ }
+ CurrentBlocks[RealID[Color]]->addUnit(SU);
+ Node2CurrentBlock[SU->NodeNum] = RealID[Color];
+ }
+
+ // Build dependencies between blocks.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &DAG->SUnits[i];
+ int SUID = Node2CurrentBlock[i];
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
+ continue;
+ if (Node2CurrentBlock[Succ->NodeNum] != SUID)
+ CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]]);
+ }
+ for (SDep& PredDep : SU->Preds) {
+ SUnit *Pred = PredDep.getSUnit();
+ if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
+ continue;
+ if (Node2CurrentBlock[Pred->NodeNum] != SUID)
+ CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
+ }
+ }
+
+ // Free root and leafs of all blocks to enable scheduling inside them.
+ for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->finalizeUnits();
+ }
+ DEBUG(
+ dbgs() << "Blocks created:\n\n";
+ for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->printDebug(true);
+ }
+ );
+}
+
+// Two functions taken from Codegen/MachineScheduler.cpp
+
+/// If this iterator is a debug value, increment until reaching the End or a
+/// non-debug instruction.
+static MachineBasicBlock::const_iterator
+nextIfDebug(MachineBasicBlock::const_iterator I,
+ MachineBasicBlock::const_iterator End) {
+ for(; I != End; ++I) {
+ if (!I->isDebugValue())
+ break;
+ }
+ return I;
+}
+
+/// Non-const version.
+static MachineBasicBlock::iterator
+nextIfDebug(MachineBasicBlock::iterator I,
+ MachineBasicBlock::const_iterator End) {
+ // Cast the return value to nonconst MachineInstr, then cast to an
+ // instr_iterator, which does not check for null, finally return a
+ // bundle_iterator.
+ return MachineBasicBlock::instr_iterator(
+ const_cast<MachineInstr*>(
+ &*nextIfDebug(MachineBasicBlock::const_iterator(I), End)));
+}
+
+void SIScheduleBlockCreator::topologicalSort() {
+ unsigned DAGSize = CurrentBlocks.size();
+ std::vector<int> WorkList;
+
+ DEBUG(dbgs() << "Topological Sort\n");
+
+ WorkList.reserve(DAGSize);
+ TopDownIndex2Block.resize(DAGSize);
+ TopDownBlock2Index.resize(DAGSize);
+ BottomUpIndex2Block.resize(DAGSize);
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ unsigned Degree = Block->getSuccs().size();
+ TopDownBlock2Index[i] = Degree;
+ if (Degree == 0) {
+ WorkList.push_back(i);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ int i = WorkList.back();
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ WorkList.pop_back();
+ TopDownBlock2Index[i] = --Id;
+ TopDownIndex2Block[Id] = i;
+ for (SIScheduleBlock* Pred : Block->getPreds()) {
+ if (!--TopDownBlock2Index[Pred->getID()])
+ WorkList.push_back(Pred->getID());
+ }
+ }
+
+#ifndef NDEBUG
+ // Check correctness of the ordering.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ for (SIScheduleBlock* Pred : Block->getPreds()) {
+ assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&
+ "Wrong Top Down topological sorting");
+ }
+ }
+#endif
+
+ BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
+ TopDownIndex2Block.rend());
+}
+
+void SIScheduleBlockCreator::scheduleInsideBlocks() {
+ unsigned DAGSize = CurrentBlocks.size();
+
+ DEBUG(dbgs() << "\nScheduling Blocks\n\n");
+
+ // We do schedule a valid scheduling such that a Block corresponds
+ // to a range of instructions.
+ DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n");
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->fastSchedule();
+ }
+
+ // Note: the following code, and the part restoring previous position
+ // is by far the most expensive operation of the Scheduler.
+
+ // Do not update CurrentTop.
+ MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
+ std::vector<MachineBasicBlock::iterator> PosOld;
+ std::vector<MachineBasicBlock::iterator> PosNew;
+ PosOld.reserve(DAG->SUnits.size());
+ PosNew.reserve(DAG->SUnits.size());
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ int BlockIndice = TopDownIndex2Block[i];
+ SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
+ std::vector<SUnit*> SUs = Block->getScheduledUnits();
+
+ for (SUnit* SU : SUs) {
+ MachineInstr *MI = SU->getInstr();
+ MachineBasicBlock::iterator Pos = MI;
+ PosOld.push_back(Pos);
+ if (&*CurrentTopFastSched == MI) {
+ PosNew.push_back(Pos);
+ CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
+ DAG->getCurrentBottom());
+ } else {
+ // Update the instruction stream.
+ DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
+
+ // Update LiveIntervals.
+ // Note: Moving all instructions and calling handleMove everytime
+ // is the most cpu intensive operation of the scheduler.
+ // It would gain a lot if there was a way to recompute the
+ // LiveIntervals for the entire scheduling region.
+ DAG->getLIS()->handleMove(MI, /*UpdateFlags=*/true);
+ PosNew.push_back(CurrentTopFastSched);
+ }
+ }
+ }
+
+ // Now we have Block of SUs == Block of MI.
+ // We do the final schedule for the instructions inside the block.
+ // The property that all the SUs of the Block are grouped together as MI
+ // is used for correct reg usage tracking.
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ std::vector<SUnit*> SUs = Block->getScheduledUnits();
+ Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
+ }
+
+ DEBUG(dbgs() << "Restoring MI Pos\n");
+ // Restore old ordering (which prevents a LIS->handleMove bug).
+ for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
+ MachineBasicBlock::iterator POld = PosOld[i-1];
+ MachineBasicBlock::iterator PNew = PosNew[i-1];
+ if (PNew != POld) {
+ // Update the instruction stream.
+ DAG->getBB()->splice(POld, DAG->getBB(), PNew);
+
+ // Update LiveIntervals.
+ DAG->getLIS()->handleMove(POld, /*UpdateFlags=*/true);
+ }
+ }
+
+ DEBUG(
+ for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = CurrentBlocks[i];
+ Block->printDebug(true);
+ }
+ );
+}
+
+void SIScheduleBlockCreator::fillStats() {
+ unsigned DAGSize = CurrentBlocks.size();
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ int BlockIndice = TopDownIndex2Block[i];
+ SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
+ if (Block->getPreds().size() == 0)
+ Block->Depth = 0;
+ else {
+ unsigned Depth = 0;
+ for (SIScheduleBlock *Pred : Block->getPreds()) {
+ if (Depth < Pred->Depth + 1)
+ Depth = Pred->Depth + 1;
+ }
+ Block->Depth = Depth;
+ }
+ }
+
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ int BlockIndice = BottomUpIndex2Block[i];
+ SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
+ if (Block->getSuccs().size() == 0)
+ Block->Height = 0;
+ else {
+ unsigned Height = 0;
+ for (SIScheduleBlock *Succ : Block->getSuccs()) {
+ if (Height < Succ->Height + 1)
+ Height = Succ->Height + 1;
+ }
+ Block->Height = Height;
+ }
+ }
+}
+
+// SIScheduleBlockScheduler //
+
+SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
+ SISchedulerBlockSchedulerVariant Variant,
+ SIScheduleBlocks BlocksStruct) :
+ DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
+ LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
+ SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
+
+ // Fill the usage of every output
+ // Warning: while by construction we always have a link between two blocks
+ // when one needs a result from the other, the number of users of an output
+ // is not the sum of child blocks having as input the same virtual register.
+ // Here is an example. A produces x and y. B eats x and produces x'.
+ // C eats x' and y. The register coalescer may have attributed the same
+ // virtual register to x and x'.
+ // To count accurately, we do a topological sort. In case the register is
+ // found for several parents, we increment the usage of the one with the
+ // highest topological index.
+ LiveOutRegsNumUsages.resize(Blocks.size());
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ for (unsigned Reg : Block->getInRegs()) {
+ bool Found = false;
+ int topoInd = -1;
+ for (SIScheduleBlock* Pred: Block->getPreds()) {
+ std::set<unsigned> PredOutRegs = Pred->getOutRegs();
+ std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
+
+ if (RegPos != PredOutRegs.end()) {
+ Found = true;
+ if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
+ topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
+ }
+ }
+ }
+
+ if (!Found)
+ continue;
+
+ int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
+ std::map<unsigned, unsigned>::iterator RegPos =
+ LiveOutRegsNumUsages[PredID].find(Reg);
+ if (RegPos != LiveOutRegsNumUsages[PredID].end()) {
+ ++LiveOutRegsNumUsages[PredID][Reg];
+ } else {
+ LiveOutRegsNumUsages[PredID][Reg] = 1;
+ }
+ }
+ }
+
+ LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
+ BlockNumPredsLeft.resize(Blocks.size());
+ BlockNumSuccsLeft.resize(Blocks.size());
+
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ BlockNumPredsLeft[i] = Block->getPreds().size();
+ BlockNumSuccsLeft[i] = Block->getSuccs().size();
+ }
+
+#ifndef NDEBUG
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ assert(Block->getID() == i);
+ }
+#endif
+
+ std::set<unsigned> InRegs = DAG->getInRegs();
+ addLiveRegs(InRegs);
+
+ // Fill LiveRegsConsumers for regs that were already
+ // defined before scheduling.
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ for (unsigned Reg : Block->getInRegs()) {
+ bool Found = false;
+ for (SIScheduleBlock* Pred: Block->getPreds()) {
+ std::set<unsigned> PredOutRegs = Pred->getOutRegs();
+ std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
+
+ if (RegPos != PredOutRegs.end()) {
+ Found = true;
+ break;
+ }
+ }
+
+ if (!Found) {
+ if (LiveRegsConsumers.find(Reg) == LiveRegsConsumers.end())
+ LiveRegsConsumers[Reg] = 1;
+ else
+ ++LiveRegsConsumers[Reg];
+ }
+ }
+ }
+
+ for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
+ SIScheduleBlock *Block = Blocks[i];
+ if (BlockNumPredsLeft[i] == 0) {
+ ReadyBlocks.push_back(Block);
+ }
+ }
+
+ while (SIScheduleBlock *Block = pickBlock()) {
+ BlocksScheduled.push_back(Block);
+ blockScheduled(Block);
+ }
+
+ DEBUG(
+ dbgs() << "Block Order:";
+ for (SIScheduleBlock* Block : BlocksScheduled) {
+ dbgs() << ' ' << Block->getID();
+ }
+ );
+}
+
+bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
+ SIBlockSchedCandidate &TryCand) {
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ // Try to hide high latencies.
+ if (tryLess(TryCand.LastPosHighLatParentScheduled,
+ Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
+ return true;
+ // Schedule high latencies early so you can hide them better.
+ if (tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
+ TryCand, Cand, Latency))
+ return true;
+ if (TryCand.IsHighLatency && tryGreater(TryCand.Height, Cand.Height,
+ TryCand, Cand, Depth))
+ return true;
+ if (tryGreater(TryCand.NumHighLatencySuccessors,
+ Cand.NumHighLatencySuccessors,
+ TryCand, Cand, Successor))
+ return true;
+ return false;
+}
+
+bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
+ SIBlockSchedCandidate &TryCand) {
+ if (!Cand.isValid()) {
+ TryCand.Reason = NodeOrder;
+ return true;
+ }
+
+ if (tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
+ TryCand, Cand, RegUsage))
+ return true;
+ if (tryGreater(TryCand.NumSuccessors > 0,
+ Cand.NumSuccessors > 0,
+ TryCand, Cand, Successor))
+ return true;
+ if (tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
+ return true;
+ if (tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
+ TryCand, Cand, RegUsage))
+ return true;
+ return false;
+}
+
+SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
+ SIBlockSchedCandidate Cand;
+ std::vector<SIScheduleBlock*>::iterator Best;
+ SIScheduleBlock *Block;
+ if (ReadyBlocks.empty())
+ return nullptr;
+
+ DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
+ VregCurrentUsage, SregCurrentUsage);
+ if (VregCurrentUsage > maxVregUsage)
+ maxVregUsage = VregCurrentUsage;
+ if (VregCurrentUsage > maxSregUsage)
+ maxSregUsage = VregCurrentUsage;
+ DEBUG(
+ dbgs() << "Picking New Blocks\n";
+ dbgs() << "Available: ";
+ for (SIScheduleBlock* Block : ReadyBlocks)
+ dbgs() << Block->getID() << ' ';
+ dbgs() << "\nCurrent Live:\n";
+ for (unsigned Reg : LiveRegs)
+ dbgs() << PrintVRegOrUnit(Reg, DAG->getTRI()) << ' ';
+ dbgs() << '\n';
+ dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';
+ dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';
+ );
+
+ Cand.Block = nullptr;
+ for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
+ E = ReadyBlocks.end(); I != E; ++I) {
+ SIBlockSchedCandidate TryCand;
+ TryCand.Block = *I;
+ TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
+ TryCand.VGPRUsageDiff =
+ checkRegUsageImpact(TryCand.Block->getInRegs(),
+ TryCand.Block->getOutRegs())[DAG->getVGPRSetID()];
+ TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
+ TryCand.NumHighLatencySuccessors =
+ TryCand.Block->getNumHighLatencySuccessors();
+ TryCand.LastPosHighLatParentScheduled =
+ (unsigned int) std::max<int> (0,
+ LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
+ LastPosWaitedHighLatency);
+ TryCand.Height = TryCand.Block->Height;
+ // Try not to increase VGPR usage too much, else we may spill.
+ if (VregCurrentUsage > 120 ||
+ Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
+ if (!tryCandidateRegUsage(Cand, TryCand) &&
+ Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
+ tryCandidateLatency(Cand, TryCand);
+ } else {
+ if (!tryCandidateLatency(Cand, TryCand))
+ tryCandidateRegUsage(Cand, TryCand);
+ }
+ if (TryCand.Reason != NoCand) {
+ Cand.setBest(TryCand);
+ Best = I;
+ DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' '
+ << getReasonStr(Cand.Reason) << '\n');
+ }
+ }
+
+ DEBUG(
+ dbgs() << "Picking: " << Cand.Block->getID() << '\n';
+ dbgs() << "Is a block with high latency instruction: "
+ << (Cand.IsHighLatency ? "yes\n" : "no\n");
+ dbgs() << "Position of last high latency dependency: "
+ << Cand.LastPosHighLatParentScheduled << '\n';
+ dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';
+ dbgs() << '\n';
+ );
+
+ Block = Cand.Block;
+ ReadyBlocks.erase(Best);
+ return Block;
+}
+
+// Tracking of currently alive registers to determine VGPR Usage.
+
+void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
+ for (unsigned Reg : Regs) {
+ // For now only track virtual registers.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ // If not already in the live set, then add it.
+ (void) LiveRegs.insert(Reg);
+ }
+}
+
+void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
+ std::set<unsigned> &Regs) {
+ for (unsigned Reg : Regs) {
+ // For now only track virtual registers.
+ std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
+ assert (Pos != LiveRegs.end() && // Reg must be live.
+ LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&
+ LiveRegsConsumers[Reg] >= 1);
+ --LiveRegsConsumers[Reg];
+ if (LiveRegsConsumers[Reg] == 0)
+ LiveRegs.erase(Pos);
+ }
+}
+
+void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
+ for (SIScheduleBlock* Block : Parent->getSuccs()) {
+ --BlockNumPredsLeft[Block->getID()];
+ if (BlockNumPredsLeft[Block->getID()] == 0) {
+ ReadyBlocks.push_back(Block);
+ }
+ // TODO: Improve check. When the dependency between the high latency
+ // instructions and the instructions of the other blocks are WAR or WAW
+ // there will be no wait triggered. We would like these cases to not
+ // update LastPosHighLatencyParentScheduled.
+ if (Parent->isHighLatencyBlock())
+ LastPosHighLatencyParentScheduled[Block->getID()] = NumBlockScheduled;
+ }
+}
+
+void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
+ decreaseLiveRegs(Block, Block->getInRegs());
+ addLiveRegs(Block->getOutRegs());
+ releaseBlockSuccs(Block);
+ for (std::map<unsigned, unsigned>::iterator RegI =
+ LiveOutRegsNumUsages[Block->getID()].begin(),
+ E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
+ std::pair<unsigned, unsigned> RegP = *RegI;
+ if (LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end())
+ LiveRegsConsumers[RegP.first] = RegP.second;
+ else {
+ assert(LiveRegsConsumers[RegP.first] == 0);
+ LiveRegsConsumers[RegP.first] += RegP.second;
+ }
+ }
+ if (LastPosHighLatencyParentScheduled[Block->getID()] >
+ (unsigned)LastPosWaitedHighLatency)
+ LastPosWaitedHighLatency =
+ LastPosHighLatencyParentScheduled[Block->getID()];
+ ++NumBlockScheduled;
+}
+
+std::vector<int>
+SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
+ std::set<unsigned> &OutRegs) {
+ std::vector<int> DiffSetPressure;
+ DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
+
+ for (unsigned Reg : InRegs) {
+ // For now only track virtual registers.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (LiveRegsConsumers[Reg] > 1)
+ continue;
+ PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
+ for (; PSetI.isValid(); ++PSetI) {
+ DiffSetPressure[*PSetI] -= PSetI.getWeight();
+ }
+ }
+
+ for (unsigned Reg : OutRegs) {
+ // For now only track virtual registers.
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
+ for (; PSetI.isValid(); ++PSetI) {
+ DiffSetPressure[*PSetI] += PSetI.getWeight();
+ }
+ }
+
+ return DiffSetPressure;
+}
+
+// SIScheduler //
+
+struct SIScheduleBlockResult
+SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
+ SISchedulerBlockSchedulerVariant ScheduleVariant) {
+ SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
+ SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
+ std::vector<SIScheduleBlock*> ScheduledBlocks;
+ struct SIScheduleBlockResult Res;
+
+ ScheduledBlocks = Scheduler.getBlocks();
+
+ for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
+ SIScheduleBlock *Block = ScheduledBlocks[b];
+ std::vector<SUnit*> SUs = Block->getScheduledUnits();
+
+ for (SUnit* SU : SUs)
+ Res.SUs.push_back(SU->NodeNum);
+ }
+
+ Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
+ Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
+ return Res;
+}
+
+// SIScheduleDAGMI //
+
+SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
+ ScheduleDAGMILive(C, make_unique<GenericScheduler>(C)) {
+ SITII = static_cast<const SIInstrInfo*>(TII);
+ SITRI = static_cast<const SIRegisterInfo*>(TRI);
+
+ VGPRSetID = SITRI->getVGPR32PressureSet();
+ SGPRSetID = SITRI->getSGPR32PressureSet();
+}
+
+SIScheduleDAGMI::~SIScheduleDAGMI() {
+}
+
+ScheduleDAGInstrs *llvm::createSIMachineScheduler(MachineSchedContext *C) {
+ return new SIScheduleDAGMI(C);
+}
+
+// Code adapted from scheduleDAG.cpp
+// Does a topological sort over the SUs.
+// Both TopDown and BottomUp
+void SIScheduleDAGMI::topologicalSort() {
+ std::vector<int> TopDownSU2Index;
+ unsigned DAGSize = SUnits.size();
+ std::vector<SUnit*> WorkList;
+
+ DEBUG(dbgs() << "Topological Sort\n");
+ WorkList.reserve(DAGSize);
+
+ TopDownIndex2SU.resize(DAGSize);
+ TopDownSU2Index.resize(DAGSize);
+ BottomUpIndex2SU.resize(DAGSize);
+
+ WorkList.push_back(&getExitSU());
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ int NodeNum = SU->NodeNum;
+ unsigned Degree = SU->Succs.size();
+ TopDownSU2Index[NodeNum] = Degree;
+ if (Degree == 0) {
+ assert(SU->Succs.empty() && "SUnit should have no successors");
+ WorkList.push_back(SU);
+ }
+ }
+
+ int Id = DAGSize;
+ while (!WorkList.empty()) {
+ SUnit *SU = WorkList.back();
+ WorkList.pop_back();
+ if (SU->NodeNum < DAGSize) {
+ TopDownSU2Index[SU->NodeNum] = --Id;
+ TopDownIndex2SU[Id] = SU->NodeNum;
+ }
+ for (SDep& Pred : SU->Preds) {
+ SUnit *SU = Pred.getSUnit();
+ if (SU->NodeNum < DAGSize && !--TopDownSU2Index[SU->NodeNum])
+ WorkList.push_back(SU);
+ }
+ }
+
+ BottomUpIndex2SU = std::vector<int>(TopDownIndex2SU.rbegin(),
+ TopDownIndex2SU.rend());
+
+#ifndef NDEBUG
+ // Check correctness of the ordering
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SDep& Pred : SU->Preds) {
+ if (Pred.getSUnit()->NodeNum >= DAGSize)
+ continue;
+ assert(TopDownSU2Index[SU->NodeNum] >
+ TopDownSU2Index[Pred.getSUnit()->NodeNum] &&
+ "Wrong Top Down topological sorting");
+ }
+ }
+ for (unsigned i = 0, e = DAGSize; i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ for (SDep& Succ : SU->Succs) {
+ if (Succ.getSUnit()->NodeNum >= DAGSize)
+ continue;
+ assert(TopDownSU2Index[SU->NodeNum] <
+ TopDownSU2Index[Succ.getSUnit()->NodeNum] &&
+ "Wrong Bottom Up topological sorting");
+ }
+ }
+#endif
+}
+
+// Move low latencies further from their user without
+// increasing SGPR usage (in general)
+// This is to be replaced by a better pass that would
+// take into account SGPR usage (based on VGPR Usage
+// and the corresponding wavefront count), that would
+// try to merge groups of loads if it make sense, etc
+void SIScheduleDAGMI::moveLowLatencies() {
+ unsigned DAGSize = SUnits.size();
+ int LastLowLatencyUser = -1;
+ int LastLowLatencyPos = -1;
+
+ for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[ScheduledSUnits[i]];
+ bool IsLowLatencyUser = false;
+ unsigned MinPos = 0;
+
+ for (SDep& PredDep : SU->Preds) {
+ SUnit *Pred = PredDep.getSUnit();
+ if (SITII->isLowLatencyInstruction(Pred->getInstr())) {
+ IsLowLatencyUser = true;
+ }
+ if (Pred->NodeNum >= DAGSize)
+ continue;
+ unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
+ if (PredPos >= MinPos)
+ MinPos = PredPos + 1;
+ }
+
+ if (SITII->isLowLatencyInstruction(SU->getInstr())) {
+ unsigned BestPos = LastLowLatencyUser + 1;
+ if ((int)BestPos <= LastLowLatencyPos)
+ BestPos = LastLowLatencyPos + 1;
+ if (BestPos < MinPos)
+ BestPos = MinPos;
+ if (BestPos < i) {
+ for (unsigned u = i; u > BestPos; --u) {
+ ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
+ ScheduledSUnits[u] = ScheduledSUnits[u-1];
+ }
+ ScheduledSUnits[BestPos] = SU->NodeNum;
+ ScheduledSUnitsInv[SU->NodeNum] = BestPos;
+ }
+ LastLowLatencyPos = BestPos;
+ if (IsLowLatencyUser)
+ LastLowLatencyUser = BestPos;
+ } else if (IsLowLatencyUser) {
+ LastLowLatencyUser = i;
+ // Moves COPY instructions on which depends
+ // the low latency instructions too.
+ } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
+ bool CopyForLowLat = false;
+ for (SDep& SuccDep : SU->Succs) {
+ SUnit *Succ = SuccDep.getSUnit();
+ if (SITII->isLowLatencyInstruction(Succ->getInstr())) {
+ CopyForLowLat = true;
+ }
+ }
+ if (!CopyForLowLat)
+ continue;
+ if (MinPos < i) {
+ for (unsigned u = i; u > MinPos; --u) {
+ ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
+ ScheduledSUnits[u] = ScheduledSUnits[u-1];
+ }
+ ScheduledSUnits[MinPos] = SU->NodeNum;
+ ScheduledSUnitsInv[SU->NodeNum] = MinPos;
+ }
+ }
+ }
+}
+
+void SIScheduleDAGMI::restoreSULinksLeft() {
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnits[i].isScheduled = false;
+ SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
+ SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
+ SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
+ SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
+ }
+}
+
+// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
+template<typename _Iterator> void
+SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
+ unsigned &VgprUsage, unsigned &SgprUsage) {
+ VgprUsage = 0;
+ SgprUsage = 0;
+ for (_Iterator RegI = First; RegI != End; ++RegI) {
+ unsigned Reg = *RegI;
+ // For now only track virtual registers
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ PSetIterator PSetI = MRI.getPressureSets(Reg);
+ for (; PSetI.isValid(); ++PSetI) {
+ if (*PSetI == VGPRSetID)
+ VgprUsage += PSetI.getWeight();
+ else if (*PSetI == SGPRSetID)
+ SgprUsage += PSetI.getWeight();
+ }
+ }
+}
+
+void SIScheduleDAGMI::schedule()
+{
+ SmallVector<SUnit*, 8> TopRoots, BotRoots;
+ SIScheduleBlockResult Best, Temp;
+ DEBUG(dbgs() << "Preparing Scheduling\n");
+
+ buildDAGWithRegPressure();
+ DEBUG(
+ for(SUnit& SU : SUnits)
+ SU.dumpAll(this)
+ );
+
+ Topo.InitDAGTopologicalSorting();
+ topologicalSort();
+ findRootsAndBiasEdges(TopRoots, BotRoots);
+ // We reuse several ScheduleDAGMI and ScheduleDAGMILive
+ // functions, but to make them happy we must initialize
+ // the default Scheduler implementation (even if we do not
+ // run it)
+ SchedImpl->initialize(this);
+ initQueues(TopRoots, BotRoots);
+
+ // Fill some stats to help scheduling.
+
+ SUnitsLinksBackup = SUnits;
+ IsLowLatencySU.clear();
+ LowLatencyOffset.clear();
+ IsHighLatencySU.clear();
+
+ IsLowLatencySU.resize(SUnits.size(), 0);
+ LowLatencyOffset.resize(SUnits.size(), 0);
+ IsHighLatencySU.resize(SUnits.size(), 0);
+
+ for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ unsigned BaseLatReg, OffLatReg;
+ if (SITII->isLowLatencyInstruction(SU->getInstr())) {
+ IsLowLatencySU[i] = 1;
+ if (SITII->getMemOpBaseRegImmOfs(SU->getInstr(), BaseLatReg,
+ OffLatReg, TRI))
+ LowLatencyOffset[i] = OffLatReg;
+ } else if (SITII->isHighLatencyInstruction(SU->getInstr()))
+ IsHighLatencySU[i] = 1;
+ }
+
+ SIScheduler Scheduler(this);
+ Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
+ SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
+#if 0 // To enable when handleMove fix lands
+ // if VGPR usage is extremely high, try other good performing variants
+ // which could lead to lower VGPR usage
+ if (Best.MaxVGPRUsage > 180) {
+ std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+ { LatenciesAlone, BlockRegUsageLatency },
+// { LatenciesAlone, BlockRegUsage },
+ { LatenciesGrouped, BlockLatencyRegUsage },
+// { LatenciesGrouped, BlockRegUsageLatency },
+// { LatenciesGrouped, BlockRegUsage },
+ { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
+// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
+// { LatenciesAlonePlusConsecutive, BlockRegUsage }
+ };
+ for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
+ Temp = Scheduler.scheduleVariant(v.first, v.second);
+ if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
+ Best = Temp;
+ }
+ }
+ // if VGPR usage is still extremely high, we may spill. Try other variants
+ // which are less performing, but that could lead to lower VGPR usage.
+ if (Best.MaxVGPRUsage > 200) {
+ std::vector<std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant>> Variants = {
+// { LatenciesAlone, BlockRegUsageLatency },
+ { LatenciesAlone, BlockRegUsage },
+// { LatenciesGrouped, BlockLatencyRegUsage },
+ { LatenciesGrouped, BlockRegUsageLatency },
+ { LatenciesGrouped, BlockRegUsage },
+// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
+ { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
+ { LatenciesAlonePlusConsecutive, BlockRegUsage }
+ };
+ for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
+ Temp = Scheduler.scheduleVariant(v.first, v.second);
+ if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
+ Best = Temp;
+ }
+ }
+#endif
+ ScheduledSUnits = Best.SUs;
+ ScheduledSUnitsInv.resize(SUnits.size());
+
+ for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
+ ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
+ }
+
+ moveLowLatencies();
+
+ // Tell the outside world about the result of the scheduling.
+
+ assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker");
+ TopRPTracker.setPos(CurrentTop);
+
+ for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
+ E = ScheduledSUnits.end(); I != E; ++I) {
+ SUnit *SU = &SUnits[*I];
+
+ scheduleMI(SU, true);
+
+ DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "
+ << *SU->getInstr());
+ }
+
+ assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
+
+ placeDebugValues();
+
+ DEBUG({
+ unsigned BBNum = begin()->getParent()->getNumber();
+ dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n";
+ dumpSchedule();
+ dbgs() << '\n';
+ });
+}
diff --git a/lib/Target/AMDGPU/SIMachineScheduler.h b/lib/Target/AMDGPU/SIMachineScheduler.h
new file mode 100644
index 000000000000..b270136811c6
--- /dev/null
+++ b/lib/Target/AMDGPU/SIMachineScheduler.h
@@ -0,0 +1,489 @@
+//===-- SIMachineScheduler.h - SI Scheduler Interface -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// \brief SI Machine Scheduler interface
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
+#define LLVM_LIB_TARGET_AMDGPU_SIMACHINESCHEDULER_H
+
+#include "SIInstrInfo.h"
+#include "llvm/CodeGen/MachineScheduler.h"
+#include "llvm/CodeGen/RegisterPressure.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+enum SIScheduleCandReason {
+ NoCand,
+ RegUsage,
+ Latency,
+ Successor,
+ Depth,
+ NodeOrder
+};
+
+struct SISchedulerCandidate {
+ // The reason for this candidate.
+ SIScheduleCandReason Reason;
+
+ // Set of reasons that apply to multiple candidates.
+ uint32_t RepeatReasonSet;
+
+ SISchedulerCandidate()
+ : Reason(NoCand), RepeatReasonSet(0) {}
+
+ bool isRepeat(SIScheduleCandReason R) { return RepeatReasonSet & (1 << R); }
+ void setRepeat(SIScheduleCandReason R) { RepeatReasonSet |= (1 << R); }
+};
+
+class SIScheduleDAGMI;
+class SIScheduleBlockCreator;
+
+class SIScheduleBlock {
+ SIScheduleDAGMI *DAG;
+ SIScheduleBlockCreator *BC;
+
+ std::vector<SUnit*> SUnits;
+ std::map<unsigned, unsigned> NodeNum2Index;
+ std::vector<SUnit*> TopReadySUs;
+ std::vector<SUnit*> ScheduledSUnits;
+
+ /// The top of the unscheduled zone.
+ IntervalPressure TopPressure;
+ RegPressureTracker TopRPTracker;
+
+ // Pressure: number of said class of registers needed to
+ // store the live virtual and real registers.
+ // We do care only of SGPR32 and VGPR32 and do track only virtual registers.
+ // Pressure of additional registers required inside the block.
+ std::vector<unsigned> InternalAdditionnalPressure;
+ // Pressure of input and output registers
+ std::vector<unsigned> LiveInPressure;
+ std::vector<unsigned> LiveOutPressure;
+ // Registers required by the block, and outputs.
+ // We do track only virtual registers.
+ // Note that some registers are not 32 bits,
+ // and thus the pressure is not equal
+ // to the number of live registers.
+ std::set<unsigned> LiveInRegs;
+ std::set<unsigned> LiveOutRegs;
+
+ bool Scheduled;
+ bool HighLatencyBlock;
+
+ std::vector<unsigned> HasLowLatencyNonWaitedParent;
+
+ // Unique ID, the index of the Block in the SIScheduleDAGMI Blocks table.
+ unsigned ID;
+
+ std::vector<SIScheduleBlock*> Preds; // All blocks predecessors.
+ std::vector<SIScheduleBlock*> Succs; // All blocks successors.
+ unsigned NumHighLatencySuccessors;
+
+public:
+ SIScheduleBlock(SIScheduleDAGMI *DAG, SIScheduleBlockCreator *BC,
+ unsigned ID):
+ DAG(DAG), BC(BC), SUnits(), TopReadySUs(), ScheduledSUnits(),
+ TopRPTracker(TopPressure), Scheduled(false),
+ HighLatencyBlock(false), ID(ID),
+ Preds(), Succs(), NumHighLatencySuccessors(0) {};
+
+ ~SIScheduleBlock() {};
+
+ unsigned getID() const { return ID; }
+
+ /// Functions for Block construction.
+ void addUnit(SUnit *SU);
+
+ // When all SUs have been added.
+ void finalizeUnits();
+
+ // Add block pred, which has instruction predecessor of SU.
+ void addPred(SIScheduleBlock *Pred);
+ void addSucc(SIScheduleBlock *Succ);
+
+ const std::vector<SIScheduleBlock*>& getPreds() const { return Preds; }
+ const std::vector<SIScheduleBlock*>& getSuccs() const { return Succs; }
+
+ unsigned Height; // Maximum topdown path length to block without outputs
+ unsigned Depth; // Maximum bottomup path length to block without inputs
+
+ unsigned getNumHighLatencySuccessors() const {
+ return NumHighLatencySuccessors;
+ }
+
+ bool isHighLatencyBlock() { return HighLatencyBlock; }
+
+ // This is approximative.
+ // Ideally should take into accounts some instructions (rcp, etc)
+ // are 4 times slower.
+ int getCost() { return SUnits.size(); }
+
+ // The block Predecessors and Successors must be all registered
+ // before fastSchedule().
+ // Fast schedule with no particular requirement.
+ void fastSchedule();
+
+ std::vector<SUnit*> getScheduledUnits() { return ScheduledSUnits; }
+
+ // Complete schedule that will try to minimize reg pressure and
+ // low latencies, and will fill liveins and liveouts.
+ // Needs all MIs to be grouped between BeginBlock and EndBlock.
+ // The MIs can be moved after the scheduling,
+ // it is just used to allow correct track of live registers.
+ void schedule(MachineBasicBlock::iterator BeginBlock,
+ MachineBasicBlock::iterator EndBlock);
+
+ bool isScheduled() { return Scheduled; }
+
+
+ // Needs the block to be scheduled inside
+ // TODO: find a way to compute it.
+ std::vector<unsigned> &getInternalAdditionnalRegUsage() {
+ return InternalAdditionnalPressure;
+ }
+
+ std::set<unsigned> &getInRegs() { return LiveInRegs; }
+ std::set<unsigned> &getOutRegs() { return LiveOutRegs; }
+
+ void printDebug(bool Full);
+
+private:
+ struct SISchedCandidate : SISchedulerCandidate {
+ // The best SUnit candidate.
+ SUnit *SU;
+
+ unsigned SGPRUsage;
+ unsigned VGPRUsage;
+ bool IsLowLatency;
+ unsigned LowLatencyOffset;
+ bool HasLowLatencyNonWaitedParent;
+
+ SISchedCandidate()
+ : SU(nullptr) {}
+
+ bool isValid() const { return SU; }
+
+ // Copy the status of another candidate without changing policy.
+ void setBest(SISchedCandidate &Best) {
+ assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+ SU = Best.SU;
+ Reason = Best.Reason;
+ SGPRUsage = Best.SGPRUsage;
+ VGPRUsage = Best.VGPRUsage;
+ IsLowLatency = Best.IsLowLatency;
+ LowLatencyOffset = Best.LowLatencyOffset;
+ HasLowLatencyNonWaitedParent = Best.HasLowLatencyNonWaitedParent;
+ }
+ };
+
+ void undoSchedule();
+
+ void undoReleaseSucc(SUnit *SU, SDep *SuccEdge);
+ void releaseSucc(SUnit *SU, SDep *SuccEdge);
+ // InOrOutBlock: restrict to links pointing inside the block (true),
+ // or restrict to links pointing outside the block (false).
+ void releaseSuccessors(SUnit *SU, bool InOrOutBlock);
+
+ void nodeScheduled(SUnit *SU);
+ void tryCandidateTopDown(SISchedCandidate &Cand, SISchedCandidate &TryCand);
+ void tryCandidateBottomUp(SISchedCandidate &Cand, SISchedCandidate &TryCand);
+ SUnit* pickNode();
+ void traceCandidate(const SISchedCandidate &Cand);
+ void initRegPressure(MachineBasicBlock::iterator BeginBlock,
+ MachineBasicBlock::iterator EndBlock);
+};
+
+struct SIScheduleBlocks {
+ std::vector<SIScheduleBlock*> Blocks;
+ std::vector<int> TopDownIndex2Block;
+ std::vector<int> TopDownBlock2Index;
+};
+
+enum SISchedulerBlockCreatorVariant {
+ LatenciesAlone,
+ LatenciesGrouped,
+ LatenciesAlonePlusConsecutive
+};
+
+class SIScheduleBlockCreator {
+ SIScheduleDAGMI *DAG;
+ // unique_ptr handles freeing memory for us.
+ std::vector<std::unique_ptr<SIScheduleBlock>> BlockPtrs;
+ std::map<SISchedulerBlockCreatorVariant,
+ SIScheduleBlocks> Blocks;
+ std::vector<SIScheduleBlock*> CurrentBlocks;
+ std::vector<int> Node2CurrentBlock;
+
+ // Topological sort
+ // Maps topological index to the node number.
+ std::vector<int> TopDownIndex2Block;
+ std::vector<int> TopDownBlock2Index;
+ std::vector<int> BottomUpIndex2Block;
+
+ // 0 -> Color not given.
+ // 1 to SUnits.size() -> Reserved group (you should only add elements to them).
+ // Above -> Other groups.
+ int NextReservedID;
+ int NextNonReservedID;
+ std::vector<int> CurrentColoring;
+ std::vector<int> CurrentTopDownReservedDependencyColoring;
+ std::vector<int> CurrentBottomUpReservedDependencyColoring;
+
+public:
+ SIScheduleBlockCreator(SIScheduleDAGMI *DAG);
+ ~SIScheduleBlockCreator();
+
+ SIScheduleBlocks
+ getBlocks(SISchedulerBlockCreatorVariant BlockVariant);
+
+ bool isSUInBlock(SUnit *SU, unsigned ID);
+
+private:
+ // Give a Reserved color to every high latency.
+ void colorHighLatenciesAlone();
+
+ // Create groups of high latencies with a Reserved color.
+ void colorHighLatenciesGroups();
+
+ // Compute coloring for topdown and bottom traversals with
+ // different colors depending on dependencies on Reserved colors.
+ void colorComputeReservedDependencies();
+
+ // Give color to all non-colored SUs according to Reserved groups dependencies.
+ void colorAccordingToReservedDependencies();
+
+ // Divides Blocks having no bottom up or top down dependencies on Reserved groups.
+ // The new colors are computed according to the dependencies on the other blocks
+ // formed with colorAccordingToReservedDependencies.
+ void colorEndsAccordingToDependencies();
+
+ // Cut groups into groups with SUs in consecutive order (except for Reserved groups).
+ void colorForceConsecutiveOrderInGroup();
+
+ // Merge Constant loads that have all their users into another group to the group.
+ // (TODO: else if all their users depend on the same group, put them there)
+ void colorMergeConstantLoadsNextGroup();
+
+ // Merge SUs that have all their users into another group to the group
+ void colorMergeIfPossibleNextGroup();
+
+ // Merge SUs that have all their users into another group to the group,
+ // but only for Reserved groups.
+ void colorMergeIfPossibleNextGroupOnlyForReserved();
+
+ // Merge SUs that have all their users into another group to the group,
+ // but only if the group is no more than a few SUs.
+ void colorMergeIfPossibleSmallGroupsToNextGroup();
+
+ // Divides Blocks with important size.
+ // Idea of implementation: attribute new colors depending on topdown and
+ // bottom up links to other blocks.
+ void cutHugeBlocks();
+
+ // Put in one group all instructions with no users in this scheduling region
+ // (we'd want these groups be at the end).
+ void regroupNoUserInstructions();
+
+ void createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant);
+
+ void topologicalSort();
+
+ void scheduleInsideBlocks();
+
+ void fillStats();
+};
+
+enum SISchedulerBlockSchedulerVariant {
+ BlockLatencyRegUsage,
+ BlockRegUsageLatency,
+ BlockRegUsage
+};
+
+class SIScheduleBlockScheduler {
+ SIScheduleDAGMI *DAG;
+ SISchedulerBlockSchedulerVariant Variant;
+ std::vector<SIScheduleBlock*> Blocks;
+
+ std::vector<std::map<unsigned, unsigned>> LiveOutRegsNumUsages;
+ std::set<unsigned> LiveRegs;
+ // Num of schedulable unscheduled blocks reading the register.
+ std::map<unsigned, unsigned> LiveRegsConsumers;
+
+ std::vector<unsigned> LastPosHighLatencyParentScheduled;
+ int LastPosWaitedHighLatency;
+
+ std::vector<SIScheduleBlock*> BlocksScheduled;
+ unsigned NumBlockScheduled;
+ std::vector<SIScheduleBlock*> ReadyBlocks;
+
+ unsigned VregCurrentUsage;
+ unsigned SregCurrentUsage;
+
+ // Currently is only approximation.
+ unsigned maxVregUsage;
+ unsigned maxSregUsage;
+
+ std::vector<unsigned> BlockNumPredsLeft;
+ std::vector<unsigned> BlockNumSuccsLeft;
+
+public:
+ SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
+ SISchedulerBlockSchedulerVariant Variant,
+ SIScheduleBlocks BlocksStruct);
+ ~SIScheduleBlockScheduler() {};
+
+ std::vector<SIScheduleBlock*> getBlocks() { return BlocksScheduled; };
+
+ unsigned getVGPRUsage() { return maxVregUsage; };
+ unsigned getSGPRUsage() { return maxSregUsage; };
+
+private:
+ struct SIBlockSchedCandidate : SISchedulerCandidate {
+ // The best Block candidate.
+ SIScheduleBlock *Block;
+
+ bool IsHighLatency;
+ int VGPRUsageDiff;
+ unsigned NumSuccessors;
+ unsigned NumHighLatencySuccessors;
+ unsigned LastPosHighLatParentScheduled;
+ unsigned Height;
+
+ SIBlockSchedCandidate()
+ : Block(nullptr) {}
+
+ bool isValid() const { return Block; }
+
+ // Copy the status of another candidate without changing policy.
+ void setBest(SIBlockSchedCandidate &Best) {
+ assert(Best.Reason != NoCand && "uninitialized Sched candidate");
+ Block = Best.Block;
+ Reason = Best.Reason;
+ IsHighLatency = Best.IsHighLatency;
+ VGPRUsageDiff = Best.VGPRUsageDiff;
+ NumSuccessors = Best.NumSuccessors;
+ NumHighLatencySuccessors = Best.NumHighLatencySuccessors;
+ LastPosHighLatParentScheduled = Best.LastPosHighLatParentScheduled;
+ Height = Best.Height;
+ }
+ };
+
+ bool tryCandidateLatency(SIBlockSchedCandidate &Cand,
+ SIBlockSchedCandidate &TryCand);
+ bool tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
+ SIBlockSchedCandidate &TryCand);
+ SIScheduleBlock *pickBlock();
+
+ void addLiveRegs(std::set<unsigned> &Regs);
+ void decreaseLiveRegs(SIScheduleBlock *Block, std::set<unsigned> &Regs);
+ void releaseBlockSuccs(SIScheduleBlock *Parent);
+ void blockScheduled(SIScheduleBlock *Block);
+
+ // Check register pressure change
+ // by scheduling a block with these LiveIn and LiveOut.
+ std::vector<int> checkRegUsageImpact(std::set<unsigned> &InRegs,
+ std::set<unsigned> &OutRegs);
+
+ void schedule();
+};
+
+struct SIScheduleBlockResult {
+ std::vector<unsigned> SUs;
+ unsigned MaxSGPRUsage;
+ unsigned MaxVGPRUsage;
+};
+
+class SIScheduler {
+ SIScheduleDAGMI *DAG;
+ SIScheduleBlockCreator BlockCreator;
+
+public:
+ SIScheduler(SIScheduleDAGMI *DAG) : DAG(DAG), BlockCreator(DAG) {};
+
+ ~SIScheduler() {};
+
+ struct SIScheduleBlockResult
+ scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
+ SISchedulerBlockSchedulerVariant ScheduleVariant);
+};
+
+class SIScheduleDAGMI : public ScheduleDAGMILive {
+ const SIInstrInfo *SITII;
+ const SIRegisterInfo *SITRI;
+
+ std::vector<SUnit> SUnitsLinksBackup;
+
+ // For moveLowLatencies. After all Scheduling variants are tested.
+ std::vector<unsigned> ScheduledSUnits;
+ std::vector<unsigned> ScheduledSUnitsInv;
+
+ unsigned VGPRSetID;
+ unsigned SGPRSetID;
+
+public:
+ SIScheduleDAGMI(MachineSchedContext *C);
+
+ ~SIScheduleDAGMI() override;
+
+ // Entry point for the schedule.
+ void schedule() override;
+
+ // To init Block's RPTracker.
+ void initRPTracker(RegPressureTracker &RPTracker) {
+ RPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
+ }
+
+ MachineBasicBlock *getBB() { return BB; }
+ MachineBasicBlock::iterator getCurrentTop() { return CurrentTop; };
+ MachineBasicBlock::iterator getCurrentBottom() { return CurrentBottom; };
+ LiveIntervals *getLIS() { return LIS; }
+ MachineRegisterInfo *getMRI() { return &MRI; }
+ const TargetRegisterInfo *getTRI() { return TRI; }
+ SUnit& getEntrySU() { return EntrySU; };
+ SUnit& getExitSU() { return ExitSU; };
+
+ void restoreSULinksLeft();
+
+ template<typename _Iterator> void fillVgprSgprCost(_Iterator First,
+ _Iterator End,
+ unsigned &VgprUsage,
+ unsigned &SgprUsage);
+ std::set<unsigned> getInRegs() {
+ std::set<unsigned> InRegs (RPTracker.getPressure().LiveInRegs.begin(),
+ RPTracker.getPressure().LiveInRegs.end());
+ return InRegs;
+ };
+
+ unsigned getVGPRSetID() const { return VGPRSetID; }
+ unsigned getSGPRSetID() const { return SGPRSetID; }
+
+private:
+ void topologicalSort();
+ // After scheduling is done, improve low latency placements.
+ void moveLowLatencies();
+
+public:
+ // Some stats for scheduling inside blocks.
+ std::vector<unsigned> IsLowLatencySU;
+ std::vector<unsigned> LowLatencyOffset;
+ std::vector<unsigned> IsHighLatencySU;
+ // Topological sort
+ // Maps topological index to the node number.
+ std::vector<int> TopDownIndex2SU;
+ std::vector<int> BottomUpIndex2SU;
+};
+
+} // namespace llvm
+
+#endif /* SIMACHINESCHEDULER_H_ */
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 2afa00996609..609f5e7df549 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -23,7 +23,20 @@
using namespace llvm;
-SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {}
+SIRegisterInfo::SIRegisterInfo() : AMDGPURegisterInfo() {
+ unsigned NumRegPressureSets = getNumRegPressureSets();
+
+ SGPR32SetID = NumRegPressureSets;
+ VGPR32SetID = NumRegPressureSets;
+ for (unsigned i = 0; i < NumRegPressureSets; ++i) {
+ if (strncmp("SGPR_32", getRegPressureSetName(i), 7) == 0)
+ SGPR32SetID = i;
+ else if (strncmp("VGPR_32", getRegPressureSetName(i), 7) == 0)
+ VGPR32SetID = i;
+ }
+ assert(SGPR32SetID < NumRegPressureSets &&
+ VGPR32SetID < NumRegPressureSets);
+}
void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved, unsigned Reg) const {
MCRegAliasIterator R(Reg, this, true);
@@ -36,18 +49,15 @@ unsigned SIRegisterInfo::reservedPrivateSegmentBufferReg(
const MachineFunction &MF) const {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
- unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 4;
- if (ST.isXNACKEnabled())
- BaseIdx -= 4;
-
+ // Leave space for flat_scr, xnack_mask, vcc, and alignment
+ unsigned BaseIdx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 8 - 4;
unsigned BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SReg_128RegClass);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- // 98/99 need to be reserved for flat_scr or 96/97 for flat_scr and
- // 98/99 for xnack_mask, and 100/101 for vcc. This is the next sgpr128 down
- // either way.
+ // 96/97 need to be reserved for flat_scr, 98/99 for xnack_mask, and
+ // 100/101 for vcc. This is the next sgpr128 down.
return AMDGPU::SGPR92_SGPR93_SGPR94_SGPR95;
}
@@ -58,25 +68,14 @@ unsigned SIRegisterInfo::reservedPrivateSegmentWaveByteOffsetReg(
const MachineFunction &MF) const {
const AMDGPUSubtarget &ST = MF.getSubtarget<AMDGPUSubtarget>();
if (ST.hasSGPRInitBug()) {
- unsigned Idx;
-
- if (!ST.isXNACKEnabled())
- Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4 - 5;
- else
- Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
-
+ unsigned Idx = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6 - 1;
return AMDGPU::SGPR_32RegClass.getRegister(Idx);
}
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
- if (!ST.isXNACKEnabled()) {
- // Next register before reservations for flat_scr and vcc.
- return AMDGPU::SGPR97;
- } else {
- // Next register before reservations for flat_scr, xnack_mask, vcc,
- // and scratch resource.
- return AMDGPU::SGPR91;
- }
+ // Next register before reservations for flat_scr, xnack_mask, vcc,
+ // and scratch resource.
+ return AMDGPU::SGPR91;
}
return AMDGPU::SGPR95;
@@ -99,23 +98,22 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
if (ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
// SI/CI have 104 SGPRs. VI has 102. We need to shift down the reservation
- // for VCC/FLAT_SCR.
+ // for VCC/XNACK_MASK/FLAT_SCR.
+ //
+ // TODO The SGPRs that alias to XNACK_MASK could be used as general purpose
+ // SGPRs when the XNACK feature is not used. This is currently not done
+ // because the code that counts SGPRs cannot account for such holes.
+ reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
reserveRegisterTuples(Reserved, AMDGPU::SGPR98_SGPR99);
reserveRegisterTuples(Reserved, AMDGPU::SGPR100_SGPR101);
-
- if (ST.isXNACKEnabled())
- reserveRegisterTuples(Reserved, AMDGPU::SGPR96_SGPR97);
}
// Tonga and Iceland can only allocate a fixed number of SGPRs due
// to a hw bug.
if (ST.hasSGPRInitBug()) {
unsigned NumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
- // Reserve some SGPRs for FLAT_SCRATCH and VCC (4 SGPRs).
- unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 4;
-
- if (ST.isXNACKEnabled())
- Limit -= 2;
+ // Reserve some SGPRs for FLAT_SCRATCH, XNACK_MASK, and VCC (6 SGPRs).
+ unsigned Limit = AMDGPUSubtarget::FIXED_SGPR_COUNT_FOR_INIT_BUG - 6;
for (unsigned i = Limit; i < NumSGPRs; ++i) {
unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
@@ -479,12 +477,38 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
if (SubIdx == AMDGPU::NoSubRegister)
return RC;
- // If this register has a sub-register, we can safely assume it is a 32-bit
- // register, because all of SI's sub-registers are 32-bit.
+ // We can assume that each lane corresponds to one 32-bit register.
+ unsigned Count = countPopulation(getSubRegIndexLaneMask(SubIdx));
if (isSGPRClass(RC)) {
- return &AMDGPU::SGPR_32RegClass;
+ switch (Count) {
+ case 1:
+ return &AMDGPU::SGPR_32RegClass;
+ case 2:
+ return &AMDGPU::SReg_64RegClass;
+ case 4:
+ return &AMDGPU::SReg_128RegClass;
+ case 8:
+ return &AMDGPU::SReg_256RegClass;
+ case 16: /* fall-through */
+ default:
+ llvm_unreachable("Invalid sub-register class size");
+ }
} else {
- return &AMDGPU::VGPR_32RegClass;
+ switch (Count) {
+ case 1:
+ return &AMDGPU::VGPR_32RegClass;
+ case 2:
+ return &AMDGPU::VReg_64RegClass;
+ case 3:
+ return &AMDGPU::VReg_96RegClass;
+ case 4:
+ return &AMDGPU::VReg_128RegClass;
+ case 8:
+ return &AMDGPU::VReg_256RegClass;
+ case 16: /* fall-through */
+ default:
+ llvm_unreachable("Invalid sub-register class size");
+ }
}
}
diff --git a/lib/Target/AMDGPU/SIRegisterInfo.h b/lib/Target/AMDGPU/SIRegisterInfo.h
index 1795237c2140..9410e2049cba 100644
--- a/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -25,6 +25,9 @@ namespace llvm {
struct SIRegisterInfo : public AMDGPURegisterInfo {
private:
+ unsigned SGPR32SetID;
+ unsigned VGPR32SetID;
+
void reserveRegisterTuples(BitVector &, unsigned Reg) const;
public:
@@ -146,6 +149,9 @@ public:
unsigned findUnusedRegister(const MachineRegisterInfo &MRI,
const TargetRegisterClass *RC) const;
+ unsigned getSGPR32PressureSet() const { return SGPR32SetID; };
+ unsigned getVGPR32PressureSet() const { return VGPR32SetID; };
+
private:
void buildScratchLoadStore(MachineBasicBlock::iterator MI,
unsigned LoadStoreOp, unsigned Value,
diff --git a/lib/Target/AMDGPU/SITypeRewriter.cpp b/lib/Target/AMDGPU/SITypeRewriter.cpp
index dbdc76b917f3..d36c5d29b127 100644
--- a/lib/Target/AMDGPU/SITypeRewriter.cpp
+++ b/lib/Target/AMDGPU/SITypeRewriter.cpp
@@ -98,6 +98,9 @@ void SITypeRewriter::visitCallInst(CallInst &I) {
SmallVector <Type*, 8> Types;
bool NeedToReplace = false;
Function *F = I.getCalledFunction();
+ if (!F)
+ return;
+
std::string Name = F->getName();
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
Value *Arg = I.getArgOperand(i);
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index add415e215cf..3b4c235c0dc9 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -106,20 +106,27 @@ bool isReadOnlySegment(const GlobalValue *GV) {
return GV->getType()->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS;
}
-static const char ShaderTypeAttribute[] = "ShaderType";
-
-unsigned getShaderType(const Function &F) {
- Attribute A = F.getFnAttribute(ShaderTypeAttribute);
- unsigned ShaderType = ShaderType::COMPUTE;
+static unsigned getIntegerAttribute(const Function &F, const char *Name,
+ unsigned Default) {
+ Attribute A = F.getFnAttribute(Name);
+ unsigned Result = Default;
if (A.isStringAttribute()) {
StringRef Str = A.getValueAsString();
- if (Str.getAsInteger(0, ShaderType)) {
+ if (Str.getAsInteger(0, Result)) {
LLVMContext &Ctx = F.getContext();
Ctx.emitError("can't parse shader type");
}
}
- return ShaderType;
+ return Result;
+}
+
+unsigned getShaderType(const Function &F) {
+ return getIntegerAttribute(F, "ShaderType", ShaderType::COMPUTE);
+}
+
+unsigned getInitialPSInputAddr(const Function &F) {
+ return getIntegerAttribute(F, "InitialPSInputAddr", 0);
}
bool isSI(const MCSubtargetInfo &STI) {
diff --git a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index 19419a29f5e0..57cbe1b58f98 100644
--- a/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -45,6 +45,8 @@ bool isGlobalSegment(const GlobalValue *GV);
bool isReadOnlySegment(const GlobalValue *GV);
unsigned getShaderType(const Function &F);
+unsigned getInitialPSInputAddr(const Function &F);
+
bool isSI(const MCSubtargetInfo &STI);
bool isCI(const MCSubtargetInfo &STI);
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 419717c85a79..a5207705fc69 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -87,9 +87,22 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
}
}
+ if (STI.isTargetDarwin() && F->getCallingConv() == CallingConv::CXX_FAST_TLS)
+ return MF->getInfo<ARMFunctionInfo>()->isSplitCSR()
+ ? CSR_iOS_CXX_TLS_PE_SaveList
+ : CSR_iOS_CXX_TLS_SaveList;
return RegList;
}
+const MCPhysReg *ARMBaseRegisterInfo::getCalleeSavedRegsViaCopy(
+ const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getInfo<ARMFunctionInfo>()->isSplitCSR())
+ return CSR_iOS_CXX_TLS_ViaCopy_SaveList;
+ return nullptr;
+}
+
const uint32_t *
ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
@@ -97,6 +110,8 @@ ARMBaseRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
if (CC == CallingConv::GHC)
// This is academic becase all GHC calls are (supposed to be) tail calls
return CSR_NoRegs_RegMask;
+ if (STI.isTargetDarwin() && CC == CallingConv::CXX_FAST_TLS)
+ return CSR_iOS_CXX_TLS_RegMask;
return STI.isTargetDarwin() ? CSR_iOS_RegMask : CSR_AAPCS_RegMask;
}
@@ -106,6 +121,14 @@ ARMBaseRegisterInfo::getNoPreservedMask() const {
}
const uint32_t *
+ARMBaseRegisterInfo::getTLSCallPreservedMask(const MachineFunction &MF) const {
+ assert(MF.getSubtarget<ARMSubtarget>().isTargetDarwin() &&
+ "only know about special TLS call on Darwin");
+ return CSR_iOS_TLSCall_RegMask;
+}
+
+
+const uint32_t *
ARMBaseRegisterInfo::getThisReturnPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
const ARMSubtarget &STI = MF.getSubtarget<ARMSubtarget>();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index cea8b80c7821..6a9a45a65687 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -62,6 +62,12 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) {
switch (Reg) {
case D15: case D14: case D13: case D12:
case D11: case D10: case D9: case D8:
+ case D7: case D6: case D5: case D4:
+ case D3: case D2: case D1: case D0:
+ case D31: case D30: case D29: case D28:
+ case D27: case D26: case D25: case D24:
+ case D23: case D22: case D21: case D20:
+ case D19: case D18: case D17: case D16:
return true;
default:
return false;
@@ -92,9 +98,12 @@ protected:
public:
/// Code Generation virtual methods...
const MCPhysReg *getCalleeSavedRegs(const MachineFunction *MF) const override;
+ const MCPhysReg *
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const override;
+ const uint32_t *getTLSCallPreservedMask(const MachineFunction &MF) const;
/// getThisReturnPreservedMask - Returns a call preserved mask specific to the
/// case that 'returned' is on an i32 first argument if the calling convention
diff --git a/lib/Target/ARM/ARMCallingConv.td b/lib/Target/ARM/ARMCallingConv.td
index 233516415149..847ef87c1b26 100644
--- a/lib/Target/ARM/ARMCallingConv.td
+++ b/lib/Target/ARM/ARMCallingConv.td
@@ -225,6 +225,21 @@ def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>;
def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4,
(sub CSR_AAPCS_ThisReturn, R9))>;
+def CSR_iOS_TLSCall : CalleeSavedRegs<(add LR, SP,
+ (sequence "R%u", 12, 1),
+ (sequence "D%u", 31, 0))>;
+
+// C++ TLS access function saves all registers except SP. Try to match
+// the order of CSRs in CSR_iOS.
+def CSR_iOS_CXX_TLS : CalleeSavedRegs<(add CSR_iOS, (sequence "R%u", 12, 1),
+ (sequence "D%u", 31, 0))>;
+
+// CSRs that are handled by prologue, epilogue.
+def CSR_iOS_CXX_TLS_PE : CalleeSavedRegs<(add LR)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, LR)>;
+
// The "interrupt" attribute is used to generate code that is acceptable in
// exception-handlers of various kinds. It makes us use a different return
// instruction (handled elsewhere) and affects which registers we must return to
diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp
index 9bdf823c85bd..ff2fcfa349dc 100644
--- a/lib/Target/ARM/ARMFastISel.cpp
+++ b/lib/Target/ARM/ARMFastISel.cpp
@@ -578,7 +578,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) {
unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) {
// For now 32-bit only.
- if (VT != MVT::i32) return 0;
+ if (VT != MVT::i32 || GV->isThreadLocal()) return 0;
Reloc::Model RelocM = TM.getRelocationModel();
bool IsIndirect = Subtarget->GVIsIndirectSymbol(GV, RelocM);
@@ -2083,6 +2083,9 @@ bool ARMFastISel::SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
// Build a list of return value registers.
SmallVector<unsigned, 4> RetRegs;
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 024244092a34..dfbb96959470 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -622,7 +622,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else
Base = N;
@@ -801,7 +802,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N,
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getRegister(0, MVT::i32);
@@ -1067,7 +1069,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
Base = CurDAG->getTargetFrameIndex(
FI, TLI->getPointerTy(CurDAG->getDataLayout()));
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
}
Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
@@ -1186,7 +1189,8 @@ ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
if (N.getOpcode() == ISD::ADD) {
return false; // We want to select register offset instead
} else if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
} else {
Base = N;
@@ -1292,7 +1296,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
}
if (N.getOpcode() == ARMISD::Wrapper &&
- N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress) {
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
+ N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
Base = N.getOperand(0);
if (Base.getOpcode() == ISD::TargetConstantPool)
return false; // We want to select t2LDRpci instead.
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 9cfb06b00c4b..37c0795af283 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -744,7 +744,7 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SUBE, MVT::i32, Custom);
}
- if (!Subtarget->isThumb1Only())
+ if (!Subtarget->isThumb1Only() && Subtarget->hasV6T2Ops())
setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
// ARM does not have ROTL.
@@ -1385,6 +1385,7 @@ ARMTargetLowering::getEffectiveCallingConv(CallingConv::ID CC,
else
return CallingConv::ARM_AAPCS;
case CallingConv::Fast:
+ case CallingConv::CXX_FAST_TLS:
if (!Subtarget->isAAPCS_ABI()) {
if (Subtarget->hasVFP2() && !Subtarget->isThumb1Only() && !isVarArg)
return CallingConv::Fast;
@@ -2347,6 +2348,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
Flag = Chain.getValue(1);
RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
}
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+ if (ARM::GPRRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i32));
+ else if (ARM::DPRRegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::getFloatingPointVT(64)));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
// Update chain and glue.
RetOps[0] = Chain;
@@ -2530,6 +2544,72 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op,
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
+/// \brief Convert a TLS address reference into the correct sequence of loads
+/// and calls to compute the variable's address for Darwin, and return an
+/// SDValue containing the final node.
+
+/// Darwin only has one TLS scheme which must be capable of dealing with the
+/// fully general situation, in the worst case. This means:
+/// + "extern __thread" declaration.
+/// + Defined in a possibly unknown dynamic library.
+///
+/// The general system is that each __thread variable has a [3 x i32] descriptor
+/// which contains information used by the runtime to calculate the address. The
+/// only part of this the compiler needs to know about is the first word, which
+/// contains a function pointer that must be called with the address of the
+/// entire descriptor in "r0".
+///
+/// Since this descriptor may be in a different unit, in general access must
+/// proceed along the usual ARM rules. A common sequence to produce is:
+///
+/// movw rT1, :lower16:_var$non_lazy_ptr
+/// movt rT1, :upper16:_var$non_lazy_ptr
+/// ldr r0, [rT1]
+/// ldr rT2, [r0]
+/// blx rT2
+/// [...address now in r0...]
+SDValue
+ARMTargetLowering::LowerGlobalTLSAddressDarwin(SDValue Op,
+ SelectionDAG &DAG) const {
+ assert(Subtarget->isTargetDarwin() && "TLS only supported on Darwin");
+ SDLoc DL(Op);
+
+ // First step is to get the address of the actua global symbol. This is where
+ // the TLS descriptor lives.
+ SDValue DescAddr = LowerGlobalAddressDarwin(Op, DAG);
+
+ // The first entry in the descriptor is a function pointer that we must call
+ // to obtain the address of the variable.
+ SDValue Chain = DAG.getEntryNode();
+ SDValue FuncTLVGet =
+ DAG.getLoad(MVT::i32, DL, Chain, DescAddr,
+ MachinePointerInfo::getGOT(DAG.getMachineFunction()),
+ false, true, true, 4);
+ Chain = FuncTLVGet.getValue(1);
+
+ MachineFunction &F = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = F.getFrameInfo();
+ MFI->setAdjustsStack(true);
+
+ // TLS calls preserve all registers except those that absolutely must be
+ // trashed: R0 (it takes an argument), LR (it's a call) and CPSR (let's not be
+ // silly).
+ auto TRI =
+ getTargetMachine().getSubtargetImpl(*F.getFunction())->getRegisterInfo();
+ auto ARI = static_cast<const ARMRegisterInfo *>(TRI);
+ const uint32_t *Mask = ARI->getTLSCallPreservedMask(DAG.getMachineFunction());
+
+ // Finally, we can make the call. This is just a degenerate version of a
+ // normal AArch64 call node: r0 takes the address of the descriptor, and
+ // returns the address of the variable in this thread.
+ Chain = DAG.getCopyToReg(Chain, DL, ARM::R0, DescAddr, SDValue());
+ Chain =
+ DAG.getNode(ARMISD::CALL, DL, DAG.getVTList(MVT::Other, MVT::Glue),
+ Chain, FuncTLVGet, DAG.getRegister(ARM::R0, MVT::i32),
+ DAG.getRegisterMask(Mask), Chain.getValue(1));
+ return DAG.getCopyFromReg(Chain, DL, ARM::R0, MVT::i32, Chain.getValue(1));
+}
+
// Lower ISD::GlobalTLSAddress using the "general dynamic" model
SDValue
ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
@@ -2631,9 +2711,11 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SDValue
ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
+ if (Subtarget->isTargetDarwin())
+ return LowerGlobalTLSAddressDarwin(Op, DAG);
+
// TODO: implement the "local dynamic" model
- assert(Subtarget->isTargetELF() &&
- "TLS not implemented for non-ELF targets");
+ assert(Subtarget->isTargetELF() && "Only ELF implemented here");
GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
if (DAG.getTarget().Options.EmulatedTLS)
return LowerToTLSEmulatedModel(GA, DAG);
@@ -11407,7 +11489,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
case 'J':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a constant between -255 and -1, for negated ADD
// immediates. This can be used in GCC with an "n" modifier that
// prints the negated value, for use with SUB instructions. It is
@@ -11476,7 +11558,7 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op,
return;
case 'M':
- if (Subtarget->isThumb()) { // FIXME thumb2
+ if (Subtarget->isThumb1Only()) {
// This must be a multiple of 4 between 0 and 1020, for
// ADD sp + immediate.
if ((CVal >= 0 && CVal <= 1020) && ((CVal & 3) == 0))
@@ -12324,3 +12406,49 @@ unsigned ARMTargetLowering::getExceptionSelectorRegister(
// via the personality function.
return Subtarget->useSjLjEH() ? ARM::NoRegister : ARM::R1;
}
+
+void ARMTargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ // Update IsSplitCSR in ARMFunctionInfo.
+ ARMFunctionInfo *AFI = Entry->getParent()->getInfo<ARMFunctionInfo>();
+ AFI->setIsSplitCSR(true);
+}
+
+void ARMTargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const ARMBaseRegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (ARM::GPRRegClass.contains(*I))
+ RC = &ARM::GPRRegClass;
+ else if (ARM::DPRRegClass.contains(*I))
+ RC = &ARM::DPRRegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVR)
+ .addReg(*I);
+
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index b764624f1492..96b56c3ec330 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -526,6 +526,8 @@ namespace llvm {
SDValue LowerToTLSExecModels(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
TLSModel::Model model) const;
+ SDValue LowerGlobalTLSAddressDarwin(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
@@ -578,6 +580,15 @@ namespace llvm {
SmallVectorImpl<SDValue> &InVals,
bool isThisReturn, SDValue ThisVal) const;
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index b9de83bfe6dc..c446ba3109e4 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -5398,6 +5398,27 @@ def MOV_ga_pcrel_ldr : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr),
Requires<[IsARM, UseMovt]>;
} // isReMaterializable
+// The many different faces of TLS access.
+def : ARMPat<(ARMWrapper tglobaltlsaddr :$dst),
+ (MOVi32imm tglobaltlsaddr :$dst)>,
+ Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapper tglobaltlsaddr:$src),
+ (LDRLIT_ga_abs tglobaltlsaddr:$src)>,
+ Requires<[IsARM, DontUseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (MOV_ga_pcrel tglobaltlsaddr:$addr)>, Requires<[IsARM, UseMovt]>;
+
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (LDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsARM, DontUseMovt]>;
+let AddedComplexity = 10 in
+def : Pat<(load (ARMWrapperPIC tglobaltlsaddr:$addr)),
+ (MOV_ga_pcrel_ldr tglobaltlsaddr:$addr)>,
+ Requires<[IsARM, UseMovt]>;
+
+
// ConstantPool, GlobalAddress, and JumpTable
def : ARMPat<(ARMWrapper tconstpool :$dst), (LEApcrel tconstpool :$dst)>;
def : ARMPat<(ARMWrapper tglobaladdr :$dst), (MOVi32imm tglobaladdr :$dst)>,
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 7020ffb41b64..defef4ea9073 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -5689,7 +5689,10 @@ def : NEONInstAlias<"vmov${p} $Vd, $Vm",
// VMOV : Vector Move (Immediate)
-let isReMaterializable = 1 in {
+// Although VMOVs are not strictly speaking cheap, they are as expensive
+// as their copies counterpart (VORR), so we should prefer rematerialization
+// over splitting when it applies.
+let isReMaterializable = 1, isAsCheapAsAMove=1 in {
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$Vd),
(ins nImmSplatI8:$SIMM), IIC_VMOVImm,
"vmov", "i8", "$Vd, $SIMM", "",
@@ -5744,7 +5747,7 @@ def VMOVv4f32 : N1ModImm<1, 0b000, 0b1111, 0, 1, 0, 1, (outs QPR:$Vd),
(ins nImmVMOVF32:$SIMM), IIC_VMOVImm,
"vmov", "f32", "$Vd, $SIMM", "",
[(set QPR:$Vd, (v4f32 (NEONvmovFPImm timm:$SIMM)))]>;
-} // isReMaterializable
+} // isReMaterializable, isAsCheapAsAMove
// Add support for bytes replication feature, so it could be GAS compatible.
// E.g. instructions below:
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index df6f24306354..5b1f9a06442e 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -1366,6 +1366,14 @@ def tLDRLIT_ga_abs : PseudoInst<(outs tGPR:$dst), (ins i32imm:$src),
(ARMWrapper tglobaladdr:$src))]>,
Requires<[IsThumb, DontUseMovt]>;
+// TLS globals
+def : Pat<(ARMWrapperPIC tglobaltlsaddr:$addr),
+ (tLDRLIT_ga_pcrel tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, DontUseMovt]>;
+def : Pat<(ARMWrapper tglobaltlsaddr:$addr),
+ (tLDRLIT_ga_abs tglobaltlsaddr:$addr)>,
+ Requires<[IsThumb, DontUseMovt]>;
+
// JumpTable
def : T1Pat<(ARMWrapperJT tjumptable:$dst),
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index d460d33fa0a3..f42f4569b2f8 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -3875,6 +3875,13 @@ def t2MOV_ga_pcrel : PseudoInst<(outs rGPR:$dst), (ins i32imm:$addr),
}
+def : T2Pat<(ARMWrapperPIC tglobaltlsaddr :$dst),
+ (t2MOV_ga_pcrel tglobaltlsaddr:$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+def : T2Pat<(ARMWrapper tglobaltlsaddr:$dst),
+ (t2MOVi32imm tglobaltlsaddr:$dst)>,
+ Requires<[IsThumb2, UseMovt]>;
+
// ConstantPool, GlobalAddress, and JumpTable
def : T2Pat<(ARMWrapper tconstpool :$dst), (t2LEApcrel tconstpool :$dst)>;
def : T2Pat<(ARMWrapper tglobaladdr :$dst), (t2MOVi32imm tglobaladdr :$dst)>,
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 050cd1a445ad..63e7940bb14e 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -930,10 +930,10 @@ def VMOVDRR : AVConv5I<0b11000100, 0b1011,
// and could enable the conversion to float to be removed completely.
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
(VMOVDRR GPR:$Rl, (BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
- Requires<[IsARM]>;
+ Requires<[IsARM, HasV6T2]>;
def : Pat<(fabs (arm_fmdrr GPR:$Rl, GPR:$Rh)),
(VMOVDRR GPR:$Rl, (t2BFC GPR:$Rh, (i32 0x7FFFFFFF)))>,
- Requires<[IsThumb2]>;
+ Requires<[IsThumb2, HasV6T2]>;
def : Pat<(fneg (arm_fmdrr GPR:$Rl, GPR:$Rh)),
(VMOVDRR GPR:$Rl, (EORri GPR:$Rh, (i32 0x80000000)))>,
Requires<[IsARM]>;
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.cpp b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
index ac0330fbcb34..71ad7a4a732a 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.cpp
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.cpp
@@ -20,4 +20,5 @@ ARMFunctionInfo::ARMFunctionInfo(MachineFunction &MF)
RestoreSPFromFP(false), LRSpilledForFarJump(false),
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSSize(0),
- PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false) {}
+ PICLabelUId(0), VarArgsFrameIndex(0), HasITBlocks(false),
+ IsSplitCSR(false) {}
diff --git a/lib/Target/ARM/ARMMachineFunctionInfo.h b/lib/Target/ARM/ARMMachineFunctionInfo.h
index d6447978ef2c..68f9aec8cae5 100644
--- a/lib/Target/ARM/ARMMachineFunctionInfo.h
+++ b/lib/Target/ARM/ARMMachineFunctionInfo.h
@@ -118,6 +118,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// coalesced weights.
DenseMap<const MachineBasicBlock*, unsigned> CoalescedWeights;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies.
+ bool IsSplitCSR;
+
public:
ARMFunctionInfo() :
isThumb(false),
@@ -128,7 +132,7 @@ public:
FramePtrSpillOffset(0), GPRCS1Offset(0), GPRCS2Offset(0), DPRCSOffset(0),
GPRCS1Size(0), GPRCS2Size(0), DPRCSAlignGapSize(0), DPRCSSize(0),
NumAlignedDPRCS2Regs(0), PICLabelUId(0),
- VarArgsFrameIndex(0), HasITBlocks(false) {}
+ VarArgsFrameIndex(0), HasITBlocks(false), IsSplitCSR(false) {}
explicit ARMFunctionInfo(MachineFunction &MF);
@@ -199,6 +203,9 @@ public:
bool hasITBlocks() const { return HasITBlocks; }
void setHasITBlocks(bool h) { HasITBlocks = h; }
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
+
void recordCPEClone(unsigned CPIdx, unsigned CPCloneIdx) {
if (!CPEClones.insert(std::make_pair(CPCloneIdx, CPIdx)).second)
llvm_unreachable("Duplicate entries!");
diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
index 6084f22c8470..57577dc834b7 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp
@@ -388,6 +388,9 @@ private:
size_t calculateContentSize() const;
+ // Reset state between object emissions
+ void reset() override;
+
public:
ARMTargetELFStreamer(MCStreamer &S)
: ARMTargetStreamer(S), CurrentVendor("aeabi"), FPU(ARM::FK_INVALID),
@@ -415,7 +418,7 @@ public:
MCCodeEmitter *Emitter, bool IsThumb)
: MCELFStreamer(Context, TAB, OS, Emitter), IsThumb(IsThumb),
MappingSymbolCounter(0), LastEMS(EMS_None) {
- Reset();
+ EHReset();
}
~ARMELFStreamer() {}
@@ -579,7 +582,10 @@ private:
}
// Helper functions for ARM exception handling directives
- void Reset();
+ void EHReset();
+
+ // Reset state between object emissions
+ void reset() override;
void EmitPersonalityFixup(StringRef Name);
void FlushPendingOffset();
@@ -1040,6 +1046,8 @@ void ARMTargetELFStreamer::emitInst(uint32_t Inst, char Suffix) {
getStreamer().emitInst(Inst, Suffix);
}
+void ARMTargetELFStreamer::reset() { AttributeSection = nullptr; }
+
void ARMELFStreamer::FinishImpl() {
MCTargetStreamer &TS = *getTargetStreamer();
ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
@@ -1048,6 +1056,18 @@ void ARMELFStreamer::FinishImpl() {
MCELFStreamer::FinishImpl();
}
+void ARMELFStreamer::reset() {
+ MCTargetStreamer &TS = *getTargetStreamer();
+ ARMTargetStreamer &ATS = static_cast<ARMTargetStreamer &>(TS);
+ ATS.reset();
+ MappingSymbolCounter = 0;
+ MCELFStreamer::reset();
+ // MCELFStreamer clear's the assembler's e_flags. However, for
+ // arm we manually set the ABI version on streamer creation, so
+ // do the same here
+ getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5);
+}
+
inline void ARMELFStreamer::SwitchToEHSection(const char *Prefix,
unsigned Type,
unsigned Flags,
@@ -1094,7 +1114,7 @@ void ARMELFStreamer::EmitFixup(const MCExpr *Expr, MCFixupKind Kind) {
Kind));
}
-void ARMELFStreamer::Reset() {
+void ARMELFStreamer::EHReset() {
ExTab = nullptr;
FnStart = nullptr;
Personality = nullptr;
@@ -1164,7 +1184,7 @@ void ARMELFStreamer::emitFnEnd() {
SwitchSection(&FnStart->getSection());
// Clean exception handling frame information
- Reset();
+ EHReset();
}
void ARMELFStreamer::emitCantUnwind() { CantUnwind = true; }
diff --git a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
index dad50f2834ee..c0d10c896354 100644
--- a/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
+++ b/lib/Target/ARM/MCTargetDesc/ARMTargetStreamer.cpp
@@ -38,6 +38,9 @@ void ARMTargetStreamer::emitCurrentConstantPool() {
// finish() - write out any non-empty assembler constant pools.
void ARMTargetStreamer::finish() { ConstantPools->emitAll(Streamer); }
+// reset() - Reset any state
+void ARMTargetStreamer::reset() {}
+
// The remaining callbacks should be handled separately by each
// streamer.
void ARMTargetStreamer::emitFnStart() {}
diff --git a/lib/Target/ARM/Thumb1FrameLowering.h b/lib/Target/ARM/Thumb1FrameLowering.h
index 812f9830824d..27faac63683a 100644
--- a/lib/Target/ARM/Thumb1FrameLowering.h
+++ b/lib/Target/ARM/Thumb1FrameLowering.h
@@ -53,6 +53,11 @@ public:
/// \p MBB will be correctly handled by the target.
bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override;
+ /// Disable shrink wrap as tBfar/BL will be used to adjust for long jumps.
+ bool enableShrinkWrapping(const MachineFunction &MF) const override {
+ return false;
+ }
+
private:
/// Check if the frame lowering of \p MF needs a special fixup
/// code sequence for the epilogue.
diff --git a/lib/Target/AVR/AVR.h b/lib/Target/AVR/AVR.h
new file mode 100644
index 000000000000..4c1667ed341c
--- /dev/null
+++ b/lib/Target/AVR/AVR.h
@@ -0,0 +1,54 @@
+//===-- AVR.h - Top-level interface for AVR representation ------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in the LLVM
+// AVR back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_H
+#define LLVM_AVR_H
+
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
+
+namespace llvm {
+
+class AVRTargetMachine;
+class FunctionPass;
+
+FunctionPass *createAVRISelDag(AVRTargetMachine &TM,
+ CodeGenOpt::Level OptLevel);
+FunctionPass *createAVRExpandPseudoPass();
+FunctionPass *createAVRFrameAnalyzerPass();
+FunctionPass *createAVRDynAllocaSRPass();
+FunctionPass *createAVRBranchSelectionPass();
+
+/**
+ * Contains the AVR backend.
+ */
+namespace AVR {
+
+enum AddressSpace { DataMemory, ProgramMemory };
+
+template <typename T> bool isProgramMemoryAddress(T *V) {
+ return cast<PointerType>(V->getType())->getAddressSpace() == ProgramMemory;
+}
+
+inline bool isProgramMemoryAccess(MemSDNode const *N) {
+ auto V = N->getMemOperand()->getValue();
+
+ return (V != nullptr) ? isProgramMemoryAddress(V) : false;
+}
+
+} // end of namespace AVR
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_H
diff --git a/lib/Target/AVR/AVRSelectionDAGInfo.h b/lib/Target/AVR/AVRSelectionDAGInfo.h
new file mode 100644
index 000000000000..ee832ad1bc75
--- /dev/null
+++ b/lib/Target/AVR/AVRSelectionDAGInfo.h
@@ -0,0 +1,29 @@
+//===-- AVRSelectionDAGInfo.h - AVR SelectionDAG Info -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the AVR subclass for TargetSelectionDAGInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_SELECTION_DAG_INFO_H
+#define LLVM_AVR_SELECTION_DAG_INFO_H
+
+#include "llvm/Target/TargetSelectionDAGInfo.h"
+
+namespace llvm {
+/**
+ * Holds information about the AVR instruction selection DAG.
+ */
+class AVRSelectionDAGInfo : public TargetSelectionDAGInfo {
+public:
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_SELECTION_DAG_INFO_H
diff --git a/lib/Target/AVR/AVRTargetObjectFile.cpp b/lib/Target/AVR/AVRTargetObjectFile.cpp
new file mode 100644
index 000000000000..85f03e818e83
--- /dev/null
+++ b/lib/Target/AVR/AVRTargetObjectFile.cpp
@@ -0,0 +1,40 @@
+//===-- AVRTargetObjectFile.cpp - AVR Object Files ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AVRTargetObjectFile.h"
+
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Mangler.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Support/ELF.h"
+
+#include "AVR.h"
+
+namespace llvm {
+void AVRTargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ Base::Initialize(Ctx, TM);
+ ProgmemDataSection =
+ Ctx.getELFSection(".progmem.data", ELF::SHT_PROGBITS, ELF::SHF_ALLOC);
+}
+
+MCSection *
+AVRTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind, Mangler &Mang,
+ const TargetMachine &TM) const {
+ // Global values in flash memory are placed in the progmem.data section
+ // unless they already have a user assigned section.
+ if (AVR::isProgramMemoryAddress(GV) && !GV->hasSection())
+ return ProgmemDataSection;
+
+ // Otherwise, we work the same way as ELF.
+ return Base::SelectSectionForGlobal(GV, Kind, Mang, TM);
+}
+} // end of namespace llvm
diff --git a/lib/Target/AVR/AVRTargetObjectFile.h b/lib/Target/AVR/AVRTargetObjectFile.h
new file mode 100644
index 000000000000..bdda35b34993
--- /dev/null
+++ b/lib/Target/AVR/AVRTargetObjectFile.h
@@ -0,0 +1,35 @@
+//===-- AVRTargetObjectFile.h - AVR Object Info -----------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_AVR_TARGET_OBJECT_FILE_H
+#define LLVM_AVR_TARGET_OBJECT_FILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+/**
+ * Lowering for an AVR ELF32 object file.
+ */
+class AVRTargetObjectFile : public TargetLoweringObjectFileELF {
+ typedef TargetLoweringObjectFileELF Base;
+
+public:
+ void Initialize(MCContext &ctx, const TargetMachine &TM) override;
+
+ MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler &Mang,
+ const TargetMachine &TM) const override;
+
+private:
+ MCSection *ProgmemDataSection;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_AVR_TARGET_OBJECT_FILE_H
diff --git a/lib/Target/AVR/CMakeLists.txt b/lib/Target/AVR/CMakeLists.txt
index 22b30ef35855..b4fb0d9ceac2 100644
--- a/lib/Target/AVR/CMakeLists.txt
+++ b/lib/Target/AVR/CMakeLists.txt
@@ -6,6 +6,7 @@ add_public_tablegen_target(AVRCommonTableGen)
add_llvm_target(AVRCodeGen
AVRTargetMachine.cpp
+ AVRTargetObjectFile.cpp
)
add_dependencies(LLVMAVRCodeGen intrinsics_gen)
diff --git a/lib/Target/Hexagon/CMakeLists.txt b/lib/Target/Hexagon/CMakeLists.txt
index 181e4e3aa85d..333ca6a757aa 100644
--- a/lib/Target/Hexagon/CMakeLists.txt
+++ b/lib/Target/Hexagon/CMakeLists.txt
@@ -39,6 +39,8 @@ add_llvm_target(HexagonCodeGen
HexagonNewValueJump.cpp
HexagonOptimizeSZextends.cpp
HexagonPeephole.cpp
+ HexagonRDF.cpp
+ HexagonRDFOpt.cpp
HexagonRegisterInfo.cpp
HexagonSelectionDAGInfo.cpp
HexagonSplitConst32AndConst64.cpp
@@ -49,6 +51,10 @@ add_llvm_target(HexagonCodeGen
HexagonTargetObjectFile.cpp
HexagonTargetTransformInfo.cpp
HexagonVLIWPacketizer.cpp
+ RDFCopy.cpp
+ RDFDeadCode.cpp
+ RDFGraph.cpp
+ RDFLiveness.cpp
)
add_subdirectory(AsmParser)
diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
index e213089687e8..4c7c0392a132 100644
--- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp
+++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp
@@ -190,9 +190,9 @@ bool HexagonAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
-MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
- MCStreamer &OutStreamer,
- const MCOperand &Imm, int AlignSize) {
+static MCSymbol *smallData(AsmPrinter &AP, const MachineInstr &MI,
+ MCStreamer &OutStreamer, const MCOperand &Imm,
+ int AlignSize) {
MCSymbol *Sym;
int64_t Value;
if (Imm.getExpr()->evaluateAsAbsolute(Value)) {
diff --git a/lib/Target/Hexagon/HexagonBitSimplify.cpp b/lib/Target/Hexagon/HexagonBitSimplify.cpp
index 77907b054d54..4d2b54521e83 100644
--- a/lib/Target/Hexagon/HexagonBitSimplify.cpp
+++ b/lib/Target/Hexagon/HexagonBitSimplify.cpp
@@ -1275,6 +1275,8 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
if (!BT.has(RD.Reg))
continue;
const BitTracker::RegisterCell &DC = BT.lookup(RD.Reg);
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
// Find a source operand that is equal to the result.
for (auto &Op : MI->uses()) {
@@ -1298,7 +1300,7 @@ bool RedundantInstrElimination::processBlock(MachineBasicBlock &B,
DebugLoc DL = MI->getDebugLoc();
const TargetRegisterClass *FRC = HBS::getFinalVRegClass(RD, MRI);
unsigned NewR = MRI.createVirtualRegister(FRC);
- BuildMI(B, I, DL, HII.get(TargetOpcode::COPY), NewR)
+ BuildMI(B, At, DL, HII.get(TargetOpcode::COPY), NewR)
.addReg(RS.Reg, 0, RS.Sub);
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
BT.put(BitTracker::RegisterRef(NewR), SC);
@@ -1925,7 +1927,9 @@ bool BitSimplification::genPackhl(MachineInstr *MI,
MachineBasicBlock &B = *MI->getParent();
unsigned NewR = MRI.createVirtualRegister(&Hexagon::DoubleRegsRegClass);
DebugLoc DL = MI->getDebugLoc();
- BuildMI(B, MI, DL, HII.get(Hexagon::S2_packhl), NewR)
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ BuildMI(B, At, DL, HII.get(Hexagon::S2_packhl), NewR)
.addReg(Rs.Reg, 0, Rs.Sub)
.addReg(Rt.Reg, 0, Rt.Sub);
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
@@ -1950,9 +1954,11 @@ bool BitSimplification::genExtractHalf(MachineInstr *MI,
// Prefer zxth, since zxth can go in any slot, while extractu only in
// slots 2 and 3.
unsigned NewR = 0;
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
if (L.Low && Opc != Hexagon::A2_zxth) {
NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- BuildMI(B, MI, DL, HII.get(Hexagon::A2_zxth), NewR)
+ BuildMI(B, At, DL, HII.get(Hexagon::A2_zxth), NewR)
.addReg(L.Reg, 0, L.Sub);
} else if (!L.Low && Opc != Hexagon::S2_extractu) {
NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
@@ -1989,7 +1995,9 @@ bool BitSimplification::genCombineHalf(MachineInstr *MI,
MachineBasicBlock &B = *MI->getParent();
DebugLoc DL = MI->getDebugLoc();
unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- BuildMI(B, MI, DL, HII.get(COpc), NewR)
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ BuildMI(B, At, DL, HII.get(COpc), NewR)
.addReg(H.Reg, 0, H.Sub)
.addReg(L.Reg, 0, L.Sub);
HBS::replaceSubWithSub(RD.Reg, RD.Sub, NewR, 0, MRI);
@@ -2043,7 +2051,9 @@ bool BitSimplification::genExtractLow(MachineInstr *MI,
continue;
unsigned NewR = MRI.createVirtualRegister(&Hexagon::IntRegsRegClass);
- auto MIB = BuildMI(B, MI, DL, HII.get(NewOpc), NewR)
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
+ auto MIB = BuildMI(B, At, DL, HII.get(NewOpc), NewR)
.addReg(RS.Reg, 0, RS.Sub);
if (NewOpc == Hexagon::A2_andir)
MIB.addImm((1 << W) - 1);
@@ -2076,6 +2086,8 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
if (!BT.has(RS.Reg) || !HBS::getSubregMask(RS, F, W, MRI))
return false;
MachineBasicBlock &B = *MI->getParent();
+ auto At = MI->isPHI() ? B.getFirstNonPHI()
+ : MachineBasicBlock::iterator(MI);
const BitTracker::RegisterCell &SC = BT.lookup(RS.Reg);
const BitTracker::BitValue &V = SC[F+BN];
@@ -2098,7 +2110,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
}
if (P != UINT_MAX) {
unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
- BuildMI(B, MI, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
+ BuildMI(B, At, DL, HII.get(Hexagon::S2_tstbit_i), NewR)
.addReg(RR.Reg, 0, RR.Sub)
.addImm(P);
HBS::replaceReg(RD.Reg, NewR, MRI);
@@ -2108,7 +2120,7 @@ bool BitSimplification::simplifyTstbit(MachineInstr *MI,
} else if (V.is(0) || V.is(1)) {
unsigned NewR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
unsigned NewOpc = V.is(0) ? Hexagon::TFR_PdFalse : Hexagon::TFR_PdTrue;
- BuildMI(B, MI, DL, HII.get(NewOpc), NewR);
+ BuildMI(B, At, DL, HII.get(NewOpc), NewR);
HBS::replaceReg(RD.Reg, NewR, MRI);
return true;
}
diff --git a/lib/Target/Hexagon/HexagonInstrInfoV4.td b/lib/Target/Hexagon/HexagonInstrInfoV4.td
index 87d6b359f5fb..37c2042a2ccd 100644
--- a/lib/Target/Hexagon/HexagonInstrInfoV4.td
+++ b/lib/Target/Hexagon/HexagonInstrInfoV4.td
@@ -3320,6 +3320,7 @@ class T_StoreAbsGP <string mnemonic, RegisterClass RC, Operand ImmOp,
/* u16_0Imm */ addr{15-0})));
// Store upper-half and store doubleword cannot be NV.
let isNVStorable = !if (!eq(mnemonic, "memd"), 0, !if(isHalf,0,1));
+ let Uses = !if (isAbs, [], [GP]);
let IClass = 0b0100;
let Inst{27} = 1;
@@ -3425,6 +3426,7 @@ class T_StoreAbsGP_NV <string mnemonic, Operand ImmOp, bits<2>MajOp, bit isAbs>
!if (!eq(ImmOpStr, "u16_2Imm"), addr{17-2},
!if (!eq(ImmOpStr, "u16_1Imm"), addr{16-1},
/* u16_0Imm */ addr{15-0})));
+ let Uses = !if (isAbs, [], [GP]);
let IClass = 0b0100;
let Inst{27} = 1;
@@ -3736,7 +3738,7 @@ defm loadrd : LD_Abs<"memd", "LDrid", DoubleRegs, u16_3Imm, 0b110>;
// if ([!]Pv[.new]) Rx=mem[bhwd](##global)
//===----------------------------------------------------------------------===//
-let isAsmParserOnly = 1 in
+let isAsmParserOnly = 1, Uses = [GP] in
class T_LoadGP <string mnemonic, string BaseOp, RegisterClass RC, Operand ImmOp,
bits<3> MajOp>
: T_LoadAbsGP <mnemonic, RC, ImmOp, MajOp>, PredNewRel {
diff --git a/lib/Target/Hexagon/HexagonRDF.cpp b/lib/Target/Hexagon/HexagonRDF.cpp
new file mode 100644
index 000000000000..06719cddf4b6
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRDF.cpp
@@ -0,0 +1,60 @@
+//===--- HexagonRDF.cpp ---------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonRDF.h"
+#include "HexagonInstrInfo.h"
+#include "HexagonRegisterInfo.h"
+
+#include "llvm/CodeGen/MachineInstr.h"
+
+using namespace llvm;
+using namespace rdf;
+
+bool HexagonRegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const {
+ if (RA == RB)
+ return true;
+
+ if (TargetRegisterInfo::isVirtualRegister(RA.Reg) &&
+ TargetRegisterInfo::isVirtualRegister(RB.Reg)) {
+ // Hexagon-specific cases.
+ if (RA.Reg == RB.Reg) {
+ if (RA.Sub == 0)
+ return true;
+ if (RB.Sub == 0)
+ return false;
+ }
+ }
+
+ return RegisterAliasInfo::covers(RA, RB);
+}
+
+bool HexagonRegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR)
+ const {
+ if (RRs.count(RR))
+ return true;
+
+ if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg)) {
+ assert(TargetRegisterInfo::isVirtualRegister(RR.Reg));
+ // Check if both covering subregisters are present.
+ bool HasLo = RRs.count({RR.Reg, Hexagon::subreg_loreg});
+ bool HasHi = RRs.count({RR.Reg, Hexagon::subreg_hireg});
+ if (HasLo && HasHi)
+ return true;
+ }
+
+ if (RR.Sub == 0) {
+ // Check if both covering subregisters are present.
+ unsigned Lo = TRI.getSubReg(RR.Reg, Hexagon::subreg_loreg);
+ unsigned Hi = TRI.getSubReg(RR.Reg, Hexagon::subreg_hireg);
+ if (RRs.count({Lo, 0}) && RRs.count({Hi, 0}))
+ return true;
+ }
+
+ return RegisterAliasInfo::covers(RRs, RR);
+}
diff --git a/lib/Target/Hexagon/HexagonRDF.h b/lib/Target/Hexagon/HexagonRDF.h
new file mode 100644
index 000000000000..00c1889e8eb5
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRDF.h
@@ -0,0 +1,28 @@
+//===--- HexagonRDF.h -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef HEXAGON_RDF_H
+#define HEXAGON_RDF_H
+#include "RDFGraph.h"
+
+namespace llvm {
+ class TargetRegisterInfo;
+}
+
+namespace rdf {
+ struct HexagonRegisterAliasInfo : public RegisterAliasInfo {
+ HexagonRegisterAliasInfo(const TargetRegisterInfo &TRI)
+ : RegisterAliasInfo(TRI) {}
+ bool covers(RegisterRef RA, RegisterRef RR) const override;
+ bool covers(const RegisterSet &RRs, RegisterRef RR) const override;
+ };
+}
+
+#endif
+
diff --git a/lib/Target/Hexagon/HexagonRDFOpt.cpp b/lib/Target/Hexagon/HexagonRDFOpt.cpp
new file mode 100644
index 000000000000..3fcda984d265
--- /dev/null
+++ b/lib/Target/Hexagon/HexagonRDFOpt.cpp
@@ -0,0 +1,272 @@
+//===--- HexagonRDFOpt.cpp ------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "HexagonInstrInfo.h"
+#include "HexagonRDF.h"
+#include "HexagonSubtarget.h"
+#include "RDFCopy.h"
+#include "RDFDeadCode.h"
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+namespace llvm {
+ void initializeHexagonRDFOptPass(PassRegistry&);
+ FunctionPass *createHexagonRDFOpt();
+}
+
+namespace {
+ cl::opt<unsigned> RDFLimit("rdf-limit", cl::init(UINT_MAX));
+ unsigned RDFCount = 0;
+ cl::opt<bool> RDFDump("rdf-dump", cl::init(false));
+
+ class HexagonRDFOpt : public MachineFunctionPass {
+ public:
+ HexagonRDFOpt() : MachineFunctionPass(ID) {
+ initializeHexagonRDFOptPass(*PassRegistry::getPassRegistry());
+ }
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<MachineDominanceFrontier>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ const char *getPassName() const override {
+ return "Hexagon RDF optimizations";
+ }
+ bool runOnMachineFunction(MachineFunction &MF) override;
+
+ static char ID;
+
+ private:
+ MachineDominatorTree *MDT;
+ MachineRegisterInfo *MRI;
+ };
+
+ char HexagonRDFOpt::ID = 0;
+}
+
+INITIALIZE_PASS_BEGIN(HexagonRDFOpt, "rdfopt", "Hexagon RDF opt", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineDominanceFrontier)
+INITIALIZE_PASS_END(HexagonRDFOpt, "rdfopt", "Hexagon RDF opt", false, false)
+
+
+struct HexagonDCE : public DeadCodeElimination {
+ HexagonDCE(DataFlowGraph &G, MachineRegisterInfo &MRI)
+ : DeadCodeElimination(G, MRI) {}
+ bool rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove);
+ void removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum);
+
+ bool run();
+};
+
+
+bool HexagonDCE::run() {
+ bool Collected = collect();
+ if (!Collected)
+ return false;
+
+ const SetVector<NodeId> &DeadNodes = getDeadNodes();
+ const SetVector<NodeId> &DeadInstrs = getDeadInstrs();
+
+ typedef DenseMap<NodeId,NodeId> RefToInstrMap;
+ RefToInstrMap R2I;
+ SetVector<NodeId> PartlyDead;
+ DataFlowGraph &DFG = getDFG();
+
+ for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+ for (auto TA : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Stmt>, DFG)) {
+ NodeAddr<StmtNode*> SA = TA;
+ for (NodeAddr<RefNode*> RA : SA.Addr->members(DFG)) {
+ R2I.insert(std::make_pair(RA.Id, SA.Id));
+ if (DFG.IsDef(RA) && DeadNodes.count(RA.Id))
+ if (!DeadInstrs.count(SA.Id))
+ PartlyDead.insert(SA.Id);
+ }
+ }
+ }
+
+ // Nodes to remove.
+ SetVector<NodeId> Remove = DeadInstrs;
+
+ bool Changed = false;
+ for (NodeId N : PartlyDead) {
+ auto SA = DFG.addr<StmtNode*>(N);
+ if (trace())
+ dbgs() << "Partly dead: " << *SA.Addr->getCode();
+ Changed |= rewrite(SA, Remove);
+ }
+
+ return erase(Remove) || Changed;
+}
+
+
+void HexagonDCE::removeOperand(NodeAddr<InstrNode*> IA, unsigned OpNum) {
+ MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+
+ auto getOpNum = [MI] (MachineOperand &Op) -> unsigned {
+ for (unsigned i = 0, n = MI->getNumOperands(); i != n; ++i)
+ if (&MI->getOperand(i) == &Op)
+ return i;
+ llvm_unreachable("Invalid operand");
+ };
+ DenseMap<NodeId,unsigned> OpMap;
+ NodeList Refs = IA.Addr->members(getDFG());
+ for (NodeAddr<RefNode*> RA : Refs)
+ OpMap.insert(std::make_pair(RA.Id, getOpNum(RA.Addr->getOp())));
+
+ MI->RemoveOperand(OpNum);
+
+ for (NodeAddr<RefNode*> RA : Refs) {
+ unsigned N = OpMap[RA.Id];
+ if (N < OpNum)
+ RA.Addr->setRegRef(&MI->getOperand(N));
+ else if (N > OpNum)
+ RA.Addr->setRegRef(&MI->getOperand(N-1));
+ }
+}
+
+
+bool HexagonDCE::rewrite(NodeAddr<InstrNode*> IA, SetVector<NodeId> &Remove) {
+ if (!getDFG().IsCode<NodeAttrs::Stmt>(IA))
+ return false;
+ DataFlowGraph &DFG = getDFG();
+ MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+ auto &HII = static_cast<const HexagonInstrInfo&>(DFG.getTII());
+ if (HII.getAddrMode(MI) != HexagonII::PostInc)
+ return false;
+ unsigned Opc = MI->getOpcode();
+ unsigned OpNum, NewOpc;
+ switch (Opc) {
+ case Hexagon::L2_loadri_pi:
+ NewOpc = Hexagon::L2_loadri_io;
+ OpNum = 1;
+ break;
+ case Hexagon::L2_loadrd_pi:
+ NewOpc = Hexagon::L2_loadrd_io;
+ OpNum = 1;
+ break;
+ case Hexagon::V6_vL32b_pi:
+ NewOpc = Hexagon::V6_vL32b_ai;
+ OpNum = 1;
+ break;
+ case Hexagon::S2_storeri_pi:
+ NewOpc = Hexagon::S2_storeri_io;
+ OpNum = 0;
+ break;
+ case Hexagon::S2_storerd_pi:
+ NewOpc = Hexagon::S2_storerd_io;
+ OpNum = 0;
+ break;
+ case Hexagon::V6_vS32b_pi:
+ NewOpc = Hexagon::V6_vS32b_ai;
+ OpNum = 0;
+ break;
+ default:
+ return false;
+ }
+ auto IsDead = [this] (NodeAddr<DefNode*> DA) -> bool {
+ return getDeadNodes().count(DA.Id);
+ };
+ NodeList Defs;
+ MachineOperand &Op = MI->getOperand(OpNum);
+ for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG)) {
+ if (&DA.Addr->getOp() != &Op)
+ continue;
+ Defs = DFG.getRelatedRefs(IA, DA);
+ if (!std::all_of(Defs.begin(), Defs.end(), IsDead))
+ return false;
+ break;
+ }
+
+ // Mark all nodes in Defs for removal.
+ for (auto D : Defs)
+ Remove.insert(D.Id);
+
+ if (trace())
+ dbgs() << "Rewriting: " << *MI;
+ MI->setDesc(HII.get(NewOpc));
+ MI->getOperand(OpNum+2).setImm(0);
+ removeOperand(IA, OpNum);
+ if (trace())
+ dbgs() << " to: " << *MI;
+
+ return true;
+}
+
+
+bool HexagonRDFOpt::runOnMachineFunction(MachineFunction &MF) {
+ if (RDFLimit.getPosition()) {
+ if (RDFCount >= RDFLimit)
+ return false;
+ RDFCount++;
+ }
+
+ MDT = &getAnalysis<MachineDominatorTree>();
+ const auto &MDF = getAnalysis<MachineDominanceFrontier>();
+ const auto &HII = *MF.getSubtarget<HexagonSubtarget>().getInstrInfo();
+ const auto &HRI = *MF.getSubtarget<HexagonSubtarget>().getRegisterInfo();
+ MRI = &MF.getRegInfo();
+
+ HexagonRegisterAliasInfo HAI(HRI);
+ TargetOperandInfo TOI(HII);
+
+ if (RDFDump)
+ MF.print(dbgs() << "Before " << getPassName() << "\n", nullptr);
+ DataFlowGraph G(MF, HII, HRI, *MDT, MDF, HAI, TOI);
+ G.build();
+ if (RDFDump) {
+ dbgs() << PrintNode<FuncNode*>(G.getFunc(), G) << '\n';
+ dbgs() << MF.getName() << '\n';
+ }
+
+ bool Changed;
+ CopyPropagation CP(G);
+ CP.trace(RDFDump);
+ Changed = CP.run();
+ if (Changed)
+ G.build();
+
+ HexagonDCE DCE(G, *MRI);
+ DCE.trace(RDFDump);
+ Changed |= DCE.run();
+
+ if (Changed) {
+ Liveness LV(*MRI, G);
+ LV.trace(RDFDump);
+ LV.computeLiveIns();
+ LV.resetLiveIns();
+ LV.resetKills();
+ }
+
+ if (RDFDump)
+ MF.print(dbgs() << "After " << getPassName() << "\n", nullptr);
+ return false;
+}
+
+
+FunctionPass *llvm::createHexagonRDFOpt() {
+ return new HexagonRDFOpt();
+}
+
+
diff --git a/lib/Target/Hexagon/HexagonRegisterInfo.cpp b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
index 61c0589fb5bf..6e5f7324aca8 100644
--- a/lib/Target/Hexagon/HexagonRegisterInfo.cpp
+++ b/lib/Target/Hexagon/HexagonRegisterInfo.cpp
@@ -103,6 +103,8 @@ BitVector HexagonRegisterInfo::getReservedRegs(const MachineFunction &MF)
Reserved.set(Hexagon::R30);
Reserved.set(Hexagon::R31);
Reserved.set(Hexagon::PC);
+ Reserved.set(Hexagon::GP);
+ Reserved.set(Hexagon::D14);
Reserved.set(Hexagon::D15);
Reserved.set(Hexagon::LC0);
Reserved.set(Hexagon::LC1);
diff --git a/lib/Target/Hexagon/HexagonTargetMachine.cpp b/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 9dccd696c989..34b03fb74cef 100644
--- a/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -26,7 +26,11 @@
using namespace llvm;
-static cl:: opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
+
+static cl::opt<bool> EnableRDFOpt("rdf-opt", cl::Hidden, cl::ZeroOrMore,
+ cl::init(true), cl::desc("Enable RDF-based optimizations"));
+
+static cl::opt<bool> DisableHardwareLoops("disable-hexagon-hwloops",
cl::Hidden, cl::desc("Disable Hardware Loops for Hexagon target"));
static cl::opt<bool> DisableHexagonCFGOpt("disable-hexagon-cfgopt",
@@ -111,6 +115,7 @@ namespace llvm {
FunctionPass *createHexagonOptimizeSZextends();
FunctionPass *createHexagonPacketizer();
FunctionPass *createHexagonPeephole();
+ FunctionPass *createHexagonRDFOpt();
FunctionPass *createHexagonSplitConst32AndConst64();
FunctionPass *createHexagonSplitDoubleRegs();
FunctionPass *createHexagonStoreWidening();
@@ -262,9 +267,12 @@ void HexagonPassConfig::addPreRegAlloc() {
}
void HexagonPassConfig::addPostRegAlloc() {
- if (getOptLevel() != CodeGenOpt::None)
+ if (getOptLevel() != CodeGenOpt::None) {
+ if (EnableRDFOpt)
+ addPass(createHexagonRDFOpt());
if (!DisableHexagonCFGOpt)
addPass(createHexagonCFGOptimizer(), false);
+ }
}
void HexagonPassConfig::addPreSched2() {
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
index c2c6275e7e8d..4b07ca7490a8 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp
@@ -334,21 +334,21 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI,
// The only relocs left should be GP relative:
default:
if (MCID.mayStore() || MCID.mayLoad()) {
- for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses;
- ++ImpUses) {
- if (*ImpUses == Hexagon::GP) {
- switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
- case HexagonII::MemAccessSize::ByteAccess:
- return fixup_Hexagon_GPREL16_0;
- case HexagonII::MemAccessSize::HalfWordAccess:
- return fixup_Hexagon_GPREL16_1;
- case HexagonII::MemAccessSize::WordAccess:
- return fixup_Hexagon_GPREL16_2;
- case HexagonII::MemAccessSize::DoubleWordAccess:
- return fixup_Hexagon_GPREL16_3;
- default:
- llvm_unreachable("unhandled fixup");
- }
+ for (const MCPhysReg *ImpUses = MCID.getImplicitUses();
+ ImpUses && *ImpUses; ++ImpUses) {
+ if (*ImpUses != Hexagon::GP)
+ continue;
+ switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) {
+ case HexagonII::MemAccessSize::ByteAccess:
+ return fixup_Hexagon_GPREL16_0;
+ case HexagonII::MemAccessSize::HalfWordAccess:
+ return fixup_Hexagon_GPREL16_1;
+ case HexagonII::MemAccessSize::WordAccess:
+ return fixup_Hexagon_GPREL16_2;
+ case HexagonII::MemAccessSize::DoubleWordAccess:
+ return fixup_Hexagon_GPREL16_3;
+ default:
+ llvm_unreachable("unhandled fixup");
}
}
} else
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
index 6ceb848ba20c..4e1cce3bd7d1 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp
@@ -95,14 +95,7 @@ unsigned HexagonResource::setWeight(unsigned s) {
return (Weight);
}
-HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL;
-
-bool HexagonCVIResource::SetUp = HexagonCVIResource::setup();
-
-bool HexagonCVIResource::setup() {
- assert(!TUL);
- TUL = new (TypeUnitsAndLanes);
-
+void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) {
(*TUL)[HexagonII::TypeCVI_VA] =
UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1);
(*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2);
@@ -123,13 +116,12 @@ bool HexagonCVIResource::setup() {
(*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0);
(*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1);
(*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4);
-
- return true;
}
-HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s,
+HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL,
+ MCInstrInfo const &MCII, unsigned s,
MCInst const *id)
- : HexagonResource(s) {
+ : HexagonResource(s), TUL(TUL) {
unsigned T = HexagonMCInstrInfo::getType(MCII, *id);
if (TUL->count(T)) {
@@ -153,6 +145,7 @@ HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII,
MCSubtargetInfo const &STI)
: MCII(MCII), STI(STI) {
reset();
+ HexagonCVIResource::SetupTUL(&TUL, STI.getCPU());
}
void HexagonShuffler::reset() {
@@ -163,7 +156,7 @@ void HexagonShuffler::reset() {
void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender,
unsigned S, bool X) {
- HexagonInstr PI(MCII, ID, Extender, S, X);
+ HexagonInstr PI(&TUL, MCII, ID, Extender, S, X);
Packet.push_back(PI);
}
diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
index 174f10fb2580..a093f8545132 100644
--- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
+++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h
@@ -20,6 +20,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
using namespace llvm;
@@ -53,9 +54,11 @@ public:
// HVX insn resources.
class HexagonCVIResource : public HexagonResource {
+public:
typedef std::pair<unsigned, unsigned> UnitsAndLanes;
typedef llvm::DenseMap<unsigned, UnitsAndLanes> TypeUnitsAndLanes;
+private:
// Available HVX slots.
enum {
CVI_NONE = 0,
@@ -65,9 +68,7 @@ class HexagonCVIResource : public HexagonResource {
CVI_MPY1 = 1 << 3
};
- static bool SetUp;
- static bool setup();
- static TypeUnitsAndLanes *TUL;
+ TypeUnitsAndLanes *TUL;
// Count of adjacent slots that the insn requires to be executed.
unsigned Lanes;
@@ -81,7 +82,9 @@ class HexagonCVIResource : public HexagonResource {
void setStore(bool f = true) { Store = f; };
public:
- HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id);
+ HexagonCVIResource(TypeUnitsAndLanes *TUL, MCInstrInfo const &MCII,
+ unsigned s, MCInst const *id);
+ static void SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU);
bool isValid() const { return (Valid); };
unsigned getLanes() const { return (Lanes); };
@@ -100,10 +103,11 @@ class HexagonInstr {
bool SoloException;
public:
- HexagonInstr(MCInstrInfo const &MCII, MCInst const *id,
+ HexagonInstr(HexagonCVIResource::TypeUnitsAndLanes *T,
+ MCInstrInfo const &MCII, MCInst const *id,
MCInst const *Extender, unsigned s, bool x = false)
- : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id),
- SoloException(x){};
+ : ID(id), Extender(Extender), Core(s), CVI(T, MCII, s, id),
+ SoloException(x) {};
MCInst const *getDesc() const { return (ID); };
@@ -136,6 +140,8 @@ class HexagonShuffler {
// Shuffling error code.
unsigned Error;
+ HexagonCVIResource::TypeUnitsAndLanes TUL;
+
protected:
int64_t BundleFlags;
MCInstrInfo const &MCII;
diff --git a/lib/Target/Hexagon/RDFCopy.cpp b/lib/Target/Hexagon/RDFCopy.cpp
new file mode 100644
index 000000000000..c547c7195075
--- /dev/null
+++ b/lib/Target/Hexagon/RDFCopy.cpp
@@ -0,0 +1,180 @@
+//===--- RDFCopy.cpp ------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simplistic RDF-based copy propagation.
+
+#include "RDFCopy.h"
+#include "RDFGraph.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/Support/CommandLine.h"
+
+#include <atomic>
+
+#ifndef NDEBUG
+static cl::opt<unsigned> CpLimit("rdf-cp-limit", cl::init(0), cl::Hidden);
+static unsigned CpCount = 0;
+#endif
+
+using namespace llvm;
+using namespace rdf;
+
+void CopyPropagation::recordCopy(NodeAddr<StmtNode*> SA, MachineInstr *MI) {
+ assert(MI->getOpcode() == TargetOpcode::COPY);
+ const MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1);
+ RegisterRef DstR = { Op0.getReg(), Op0.getSubReg() };
+ RegisterRef SrcR = { Op1.getReg(), Op1.getSubReg() };
+ auto FS = DefM.find(SrcR);
+ if (FS == DefM.end() || FS->second.empty())
+ return;
+ Copies.push_back(SA.Id);
+ RDefMap[SrcR][SA.Id] = FS->second.top()->Id;
+ // Insert DstR into the map.
+ RDefMap[DstR];
+}
+
+
+void CopyPropagation::updateMap(NodeAddr<InstrNode*> IA) {
+ RegisterSet RRs;
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+ RRs.insert(RA.Addr->getRegRef());
+ bool Common = false;
+ for (auto &R : RDefMap) {
+ if (!RRs.count(R.first))
+ continue;
+ Common = true;
+ break;
+ }
+ if (!Common)
+ return;
+
+ for (auto &R : RDefMap) {
+ if (!RRs.count(R.first))
+ continue;
+ auto F = DefM.find(R.first);
+ if (F == DefM.end() || F->second.empty())
+ continue;
+ R.second[IA.Id] = F->second.top()->Id;
+ }
+}
+
+
+bool CopyPropagation::scanBlock(MachineBasicBlock *B) {
+ bool Changed = false;
+ auto BA = DFG.getFunc().Addr->findBlock(B, DFG);
+ DFG.markBlock(BA.Id, DefM);
+
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
+ if (DFG.IsCode<NodeAttrs::Stmt>(IA)) {
+ NodeAddr<StmtNode*> SA = IA;
+ MachineInstr *MI = SA.Addr->getCode();
+ if (MI->isCopy())
+ recordCopy(SA, MI);
+ }
+
+ updateMap(IA);
+ DFG.pushDefs(IA, DefM);
+ }
+
+ MachineDomTreeNode *N = MDT.getNode(B);
+ for (auto I : *N)
+ Changed |= scanBlock(I->getBlock());
+
+ DFG.releaseBlock(BA.Id, DefM);
+ return Changed;
+}
+
+
+bool CopyPropagation::run() {
+ scanBlock(&DFG.getMF().front());
+
+ if (trace()) {
+ dbgs() << "Copies:\n";
+ for (auto I : Copies)
+ dbgs() << *DFG.addr<StmtNode*>(I).Addr->getCode();
+ dbgs() << "\nRDef map:\n";
+ for (auto R : RDefMap) {
+ dbgs() << Print<RegisterRef>(R.first, DFG) << " -> {";
+ for (auto &M : R.second)
+ dbgs() << ' ' << Print<NodeId>(M.first, DFG) << ':'
+ << Print<NodeId>(M.second, DFG);
+ dbgs() << " }\n";
+ }
+ }
+
+ bool Changed = false;
+ NodeSet Deleted;
+#ifndef NDEBUG
+ bool HasLimit = CpLimit.getNumOccurrences() > 0;
+#endif
+
+ for (auto I : Copies) {
+#ifndef NDEBUG
+ if (HasLimit && CpCount >= CpLimit)
+ break;
+#endif
+ if (Deleted.count(I))
+ continue;
+ auto SA = DFG.addr<InstrNode*>(I);
+ NodeList Ds = SA.Addr->members_if(DFG.IsDef, DFG);
+ if (Ds.size() != 1)
+ continue;
+ NodeAddr<DefNode*> DA = Ds[0];
+ RegisterRef DR0 = DA.Addr->getRegRef();
+ NodeList Us = SA.Addr->members_if(DFG.IsUse, DFG);
+ if (Us.size() != 1)
+ continue;
+ NodeAddr<UseNode*> UA0 = Us[0];
+ RegisterRef UR0 = UA0.Addr->getRegRef();
+ NodeId RD0 = UA0.Addr->getReachingDef();
+
+ for (NodeId N = DA.Addr->getReachedUse(), NextN; N; N = NextN) {
+ auto UA = DFG.addr<UseNode*>(N);
+ NextN = UA.Addr->getSibling();
+ uint16_t F = UA.Addr->getFlags();
+ if ((F & NodeAttrs::PhiRef) || (F & NodeAttrs::Fixed))
+ continue;
+ if (UA.Addr->getRegRef() != DR0)
+ continue;
+ NodeAddr<InstrNode*> IA = UA.Addr->getOwner(DFG);
+ assert(DFG.IsCode<NodeAttrs::Stmt>(IA));
+ MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+ if (RDefMap[UR0][IA.Id] != RD0)
+ continue;
+ MachineOperand &Op = UA.Addr->getOp();
+ if (Op.isTied())
+ continue;
+ if (trace()) {
+ dbgs() << "can replace " << Print<RegisterRef>(DR0, DFG)
+ << " with " << Print<RegisterRef>(UR0, DFG) << " in "
+ << *NodeAddr<StmtNode*>(IA).Addr->getCode();
+ }
+
+ Op.setReg(UR0.Reg);
+ Op.setSubReg(UR0.Sub);
+ Changed = true;
+#ifndef NDEBUG
+ if (HasLimit && CpCount >= CpLimit)
+ break;
+ CpCount++;
+#endif
+
+ if (MI->isCopy()) {
+ MachineOperand &Op0 = MI->getOperand(0), &Op1 = MI->getOperand(1);
+ if (Op0.getReg() == Op1.getReg() && Op0.getSubReg() == Op1.getSubReg())
+ MI->eraseFromParent();
+ Deleted.insert(IA.Id);
+ }
+ }
+ }
+
+ return Changed;
+}
+
diff --git a/lib/Target/Hexagon/RDFCopy.h b/lib/Target/Hexagon/RDFCopy.h
new file mode 100644
index 000000000000..02531b94c9b0
--- /dev/null
+++ b/lib/Target/Hexagon/RDFCopy.h
@@ -0,0 +1,48 @@
+//===--- RDFCopy.h --------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef RDF_COPY_H
+#define RDF_COPY_H
+
+#include "RDFGraph.h"
+#include <map>
+#include <vector>
+
+namespace llvm {
+ class MachineBasicBlock;
+ class MachineDominatorTree;
+ class MachineInstr;
+}
+
+namespace rdf {
+ struct CopyPropagation {
+ CopyPropagation(DataFlowGraph &dfg) : MDT(dfg.getDT()), DFG(dfg),
+ Trace(false) {}
+
+ bool run();
+ void trace(bool On) { Trace = On; }
+ bool trace() const { return Trace; }
+
+ private:
+ const MachineDominatorTree &MDT;
+ DataFlowGraph &DFG;
+ DataFlowGraph::DefStackMap DefM;
+ bool Trace;
+
+ // map: register -> (map: stmt -> reaching def)
+ std::map<RegisterRef,std::map<NodeId,NodeId>> RDefMap;
+ std::vector<NodeId> Copies;
+
+ void recordCopy(NodeAddr<StmtNode*> SA, MachineInstr *MI);
+ void updateMap(NodeAddr<InstrNode*> IA);
+ bool scanBlock(MachineBasicBlock *B);
+ };
+}
+
+#endif
diff --git a/lib/Target/Hexagon/RDFDeadCode.cpp b/lib/Target/Hexagon/RDFDeadCode.cpp
new file mode 100644
index 000000000000..95668577bd50
--- /dev/null
+++ b/lib/Target/Hexagon/RDFDeadCode.cpp
@@ -0,0 +1,204 @@
+//===--- RDFDeadCode.cpp --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// RDF-based generic dead code elimination.
+
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "RDFDeadCode.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+// Check if the given instruction has observable side-effects, i.e. if
+// it should be considered "live". It is safe for this function to be
+// overly conservative (i.e. return "true" for all instructions), but it
+// is not safe to return "false" for an instruction that should not be
+// considered removable.
+bool DeadCodeElimination::isLiveInstr(const MachineInstr *MI) const {
+ if (MI->mayStore() || MI->isBranch() || MI->isCall() || MI->isReturn())
+ return true;
+ if (MI->hasOrderedMemoryRef() || MI->hasUnmodeledSideEffects())
+ return true;
+ if (MI->isPHI())
+ return false;
+ for (auto &Op : MI->operands())
+ if (Op.isReg() && MRI.isReserved(Op.getReg()))
+ return true;
+ return false;
+}
+
+void DeadCodeElimination::scanInstr(NodeAddr<InstrNode*> IA,
+ SetVector<NodeId> &WorkQ) {
+ if (!DFG.IsCode<NodeAttrs::Stmt>(IA))
+ return;
+ if (!isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode()))
+ return;
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG)) {
+ if (!LiveNodes.count(RA.Id))
+ WorkQ.insert(RA.Id);
+ }
+}
+
+void DeadCodeElimination::processDef(NodeAddr<DefNode*> DA,
+ SetVector<NodeId> &WorkQ) {
+ NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
+ for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
+ if (!LiveNodes.count(UA.Id))
+ WorkQ.insert(UA.Id);
+ }
+ for (NodeAddr<DefNode*> TA : DFG.getRelatedRefs(IA, DA))
+ LiveNodes.insert(TA.Id);
+}
+
+void DeadCodeElimination::processUse(NodeAddr<UseNode*> UA,
+ SetVector<NodeId> &WorkQ) {
+ for (NodeAddr<DefNode*> DA : LV.getAllReachingDefs(UA)) {
+ if (!LiveNodes.count(DA.Id))
+ WorkQ.insert(DA.Id);
+ }
+}
+
+// Traverse the DFG and collect the set dead RefNodes and the set of
+// dead instructions. Return "true" if any of these sets is non-empty,
+// "false" otherwise.
+bool DeadCodeElimination::collect() {
+ // This function works by first finding all live nodes. The dead nodes
+ // are then the complement of the set of live nodes.
+ //
+ // Assume that all nodes are dead. Identify instructions which must be
+ // considered live, i.e. instructions with observable side-effects, such
+ // as calls and stores. All arguments of such instructions are considered
+ // live. For each live def, all operands used in the corresponding
+ // instruction are considered live. For each live use, all its reaching
+ // defs are considered live.
+ LiveNodes.clear();
+ SetVector<NodeId> WorkQ;
+ for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG))
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG))
+ scanInstr(IA, WorkQ);
+
+ while (!WorkQ.empty()) {
+ NodeId N = *WorkQ.begin();
+ WorkQ.remove(N);
+ LiveNodes.insert(N);
+ auto RA = DFG.addr<RefNode*>(N);
+ if (DFG.IsDef(RA))
+ processDef(RA, WorkQ);
+ else
+ processUse(RA, WorkQ);
+ }
+
+ if (trace()) {
+ dbgs() << "Live nodes:\n";
+ for (NodeId N : LiveNodes) {
+ auto RA = DFG.addr<RefNode*>(N);
+ dbgs() << PrintNode<RefNode*>(RA, DFG) << "\n";
+ }
+ }
+
+ auto IsDead = [this] (NodeAddr<InstrNode*> IA) -> bool {
+ for (NodeAddr<DefNode*> DA : IA.Addr->members_if(DFG.IsDef, DFG))
+ if (LiveNodes.count(DA.Id))
+ return false;
+ return true;
+ };
+
+ for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+ if (!LiveNodes.count(RA.Id))
+ DeadNodes.insert(RA.Id);
+ if (DFG.IsCode<NodeAttrs::Stmt>(IA))
+ if (isLiveInstr(NodeAddr<StmtNode*>(IA).Addr->getCode()))
+ continue;
+ if (IsDead(IA)) {
+ DeadInstrs.insert(IA.Id);
+ if (trace())
+ dbgs() << "Dead instr: " << PrintNode<InstrNode*>(IA, DFG) << "\n";
+ }
+ }
+ }
+
+ return !DeadNodes.empty();
+}
+
+// Erase the nodes given in the Nodes set from DFG. In addition to removing
+// them from the DFG, if a node corresponds to a statement, the corresponding
+// machine instruction is erased from the function.
+bool DeadCodeElimination::erase(const SetVector<NodeId> &Nodes) {
+ if (Nodes.empty())
+ return false;
+
+ // Prepare the actual set of ref nodes to remove: ref nodes from Nodes
+ // are included directly, for each InstrNode in Nodes, include the set
+ // of all RefNodes from it.
+ NodeList DRNs, DINs;
+ for (auto I : Nodes) {
+ auto BA = DFG.addr<NodeBase*>(I);
+ uint16_t Type = BA.Addr->getType();
+ if (Type == NodeAttrs::Ref) {
+ DRNs.push_back(DFG.addr<RefNode*>(I));
+ continue;
+ }
+
+ // If it's a code node, add all ref nodes from it.
+ uint16_t Kind = BA.Addr->getKind();
+ if (Kind == NodeAttrs::Stmt || Kind == NodeAttrs::Phi) {
+ for (auto N : NodeAddr<CodeNode*>(BA).Addr->members(DFG))
+ DRNs.push_back(N);
+ DINs.push_back(DFG.addr<InstrNode*>(I));
+ } else {
+ llvm_unreachable("Unexpected code node");
+ return false;
+ }
+ }
+
+ // Sort the list so that use nodes are removed first. This makes the
+ // "unlink" functions a bit faster.
+ auto UsesFirst = [] (NodeAddr<RefNode*> A, NodeAddr<RefNode*> B) -> bool {
+ uint16_t KindA = A.Addr->getKind(), KindB = B.Addr->getKind();
+ if (KindA == NodeAttrs::Use && KindB == NodeAttrs::Def)
+ return true;
+ if (KindA == NodeAttrs::Def && KindB == NodeAttrs::Use)
+ return false;
+ return A.Id < B.Id;
+ };
+ std::sort(DRNs.begin(), DRNs.end(), UsesFirst);
+
+ if (trace())
+ dbgs() << "Removing dead ref nodes:\n";
+ for (NodeAddr<RefNode*> RA : DRNs) {
+ if (trace())
+ dbgs() << " " << PrintNode<RefNode*>(RA, DFG) << '\n';
+ if (DFG.IsUse(RA))
+ DFG.unlinkUse(RA);
+ else if (DFG.IsDef(RA))
+ DFG.unlinkDef(RA);
+ }
+
+ // Now, remove all dead instruction nodes.
+ for (NodeAddr<InstrNode*> IA : DINs) {
+ NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ BA.Addr->removeMember(IA, DFG);
+ if (!DFG.IsCode<NodeAttrs::Stmt>(IA))
+ continue;
+
+ MachineInstr *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+ if (trace())
+ dbgs() << "erasing: " << *MI;
+ MI->eraseFromParent();
+ }
+ return true;
+}
diff --git a/lib/Target/Hexagon/RDFDeadCode.h b/lib/Target/Hexagon/RDFDeadCode.h
new file mode 100644
index 000000000000..f4373fb5007d
--- /dev/null
+++ b/lib/Target/Hexagon/RDFDeadCode.h
@@ -0,0 +1,65 @@
+//===--- RDFDeadCode.h ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// RDF-based generic dead code elimination.
+//
+// The main interface of this class are functions "collect" and "erase".
+// This allows custom processing of the function being optimized by a
+// particular consumer. The simplest way to use this class would be to
+// instantiate an object, and then simply call "collect" and "erase",
+// passing the result of "getDeadInstrs()" to it.
+// A more complex scenario would be to call "collect" first, then visit
+// all post-increment instructions to see if the address update is dead
+// or not, and if it is, convert the instruction to a non-updating form.
+// After that "erase" can be called with the set of nodes including both,
+// dead defs from the updating instructions and the nodes corresponding
+// to the dead instructions.
+
+#ifndef RDF_DEADCODE_H
+#define RDF_DEADCODE_H
+
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/ADT/SetVector.h"
+
+namespace llvm {
+ class MachineRegisterInfo;
+}
+
+namespace rdf {
+ struct DeadCodeElimination {
+ DeadCodeElimination(DataFlowGraph &dfg, MachineRegisterInfo &mri)
+ : Trace(false), DFG(dfg), MRI(mri), LV(mri, dfg) {}
+
+ bool collect();
+ bool erase(const SetVector<NodeId> &Nodes);
+ void trace(bool On) { Trace = On; }
+ bool trace() const { return Trace; }
+
+ SetVector<NodeId> getDeadNodes() { return DeadNodes; }
+ SetVector<NodeId> getDeadInstrs() { return DeadInstrs; }
+ DataFlowGraph &getDFG() { return DFG; }
+
+ private:
+ bool Trace;
+ SetVector<NodeId> LiveNodes;
+ SetVector<NodeId> DeadNodes;
+ SetVector<NodeId> DeadInstrs;
+ DataFlowGraph &DFG;
+ MachineRegisterInfo &MRI;
+ Liveness LV;
+
+ bool isLiveInstr(const MachineInstr *MI) const;
+ void scanInstr(NodeAddr<InstrNode*> IA, SetVector<NodeId> &WorkQ);
+ void processDef(NodeAddr<DefNode*> DA, SetVector<NodeId> &WorkQ);
+ void processUse(NodeAddr<UseNode*> UA, SetVector<NodeId> &WorkQ);
+ };
+}
+
+#endif
diff --git a/lib/Target/Hexagon/RDFGraph.cpp b/lib/Target/Hexagon/RDFGraph.cpp
new file mode 100644
index 000000000000..9b47422153bb
--- /dev/null
+++ b/lib/Target/Hexagon/RDFGraph.cpp
@@ -0,0 +1,1716 @@
+//===--- RDFGraph.cpp -----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Target-independent, SSA-based data flow graph for register data flow (RDF).
+//
+#include "RDFGraph.h"
+
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+// Printing functions. Have them here first, so that the rest of the code
+// can use them.
+namespace rdf {
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterRef> &P) {
+ auto &TRI = P.G.getTRI();
+ if (P.Obj.Reg > 0 && P.Obj.Reg < TRI.getNumRegs())
+ OS << TRI.getName(P.Obj.Reg);
+ else
+ OS << '#' << P.Obj.Reg;
+ if (P.Obj.Sub > 0) {
+ OS << ':';
+ if (P.Obj.Sub < TRI.getNumSubRegIndices())
+ OS << TRI.getSubRegIndexName(P.Obj.Sub);
+ else
+ OS << '#' << P.Obj.Sub;
+ }
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeId> &P) {
+ auto NA = P.G.addr<NodeBase*>(P.Obj);
+ uint16_t Attrs = NA.Addr->getAttrs();
+ uint16_t Kind = NodeAttrs::kind(Attrs);
+ uint16_t Flags = NodeAttrs::flags(Attrs);
+ switch (NodeAttrs::type(Attrs)) {
+ case NodeAttrs::Code:
+ switch (Kind) {
+ case NodeAttrs::Func: OS << 'f'; break;
+ case NodeAttrs::Block: OS << 'b'; break;
+ case NodeAttrs::Stmt: OS << 's'; break;
+ case NodeAttrs::Phi: OS << 'p'; break;
+ default: OS << "c?"; break;
+ }
+ break;
+ case NodeAttrs::Ref:
+ if (Flags & NodeAttrs::Preserving)
+ OS << '+';
+ if (Flags & NodeAttrs::Clobbering)
+ OS << '~';
+ switch (Kind) {
+ case NodeAttrs::Use: OS << 'u'; break;
+ case NodeAttrs::Def: OS << 'd'; break;
+ case NodeAttrs::Block: OS << 'b'; break;
+ default: OS << "r?"; break;
+ }
+ break;
+ default:
+ OS << '?';
+ break;
+ }
+ OS << P.Obj;
+ if (Flags & NodeAttrs::Shadow)
+ OS << '"';
+ return OS;
+}
+
+namespace {
+ void printRefHeader(raw_ostream &OS, const NodeAddr<RefNode*> RA,
+ const DataFlowGraph &G) {
+ OS << Print<NodeId>(RA.Id, G) << '<'
+ << Print<RegisterRef>(RA.Addr->getRegRef(), G) << '>';
+ if (RA.Addr->getFlags() & NodeAttrs::Fixed)
+ OS << '!';
+ }
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<DefNode*>> &P) {
+ printRefHeader(OS, P.Obj, P.G);
+ OS << '(';
+ if (NodeId N = P.Obj.Addr->getReachingDef())
+ OS << Print<NodeId>(N, P.G);
+ OS << ',';
+ if (NodeId N = P.Obj.Addr->getReachedDef())
+ OS << Print<NodeId>(N, P.G);
+ OS << ',';
+ if (NodeId N = P.Obj.Addr->getReachedUse())
+ OS << Print<NodeId>(N, P.G);
+ OS << "):";
+ if (NodeId N = P.Obj.Addr->getSibling())
+ OS << Print<NodeId>(N, P.G);
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<UseNode*>> &P) {
+ printRefHeader(OS, P.Obj, P.G);
+ OS << '(';
+ if (NodeId N = P.Obj.Addr->getReachingDef())
+ OS << Print<NodeId>(N, P.G);
+ OS << "):";
+ if (NodeId N = P.Obj.Addr->getSibling())
+ OS << Print<NodeId>(N, P.G);
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<PhiUseNode*>> &P) {
+ printRefHeader(OS, P.Obj, P.G);
+ OS << '(';
+ if (NodeId N = P.Obj.Addr->getReachingDef())
+ OS << Print<NodeId>(N, P.G);
+ OS << ',';
+ if (NodeId N = P.Obj.Addr->getPredecessor())
+ OS << Print<NodeId>(N, P.G);
+ OS << "):";
+ if (NodeId N = P.Obj.Addr->getSibling())
+ OS << Print<NodeId>(N, P.G);
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<RefNode*>> &P) {
+ switch (P.Obj.Addr->getKind()) {
+ case NodeAttrs::Def:
+ OS << PrintNode<DefNode*>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Use:
+ if (P.Obj.Addr->getFlags() & NodeAttrs::PhiRef)
+ OS << PrintNode<PhiUseNode*>(P.Obj, P.G);
+ else
+ OS << PrintNode<UseNode*>(P.Obj, P.G);
+ break;
+ }
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeList> &P) {
+ unsigned N = P.Obj.size();
+ for (auto I : P.Obj) {
+ OS << Print<NodeId>(I.Id, P.G);
+ if (--N)
+ OS << ' ';
+ }
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeSet> &P) {
+ unsigned N = P.Obj.size();
+ for (auto I : P.Obj) {
+ OS << Print<NodeId>(I, P.G);
+ if (--N)
+ OS << ' ';
+ }
+ return OS;
+}
+
+namespace {
+ template <typename T>
+ struct PrintListV {
+ PrintListV(const NodeList &L, const DataFlowGraph &G) : List(L), G(G) {}
+ typedef T Type;
+ const NodeList &List;
+ const DataFlowGraph &G;
+ };
+
+ template <typename T>
+ raw_ostream &operator<< (raw_ostream &OS, const PrintListV<T> &P) {
+ unsigned N = P.List.size();
+ for (NodeAddr<T> A : P.List) {
+ OS << PrintNode<T>(A, P.G);
+ if (--N)
+ OS << ", ";
+ }
+ return OS;
+ }
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<NodeAddr<PhiNode*>> &P) {
+ OS << Print<NodeId>(P.Obj.Id, P.G) << ": phi ["
+ << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<StmtNode*>> &P) {
+ unsigned Opc = P.Obj.Addr->getCode()->getOpcode();
+ OS << Print<NodeId>(P.Obj.Id, P.G) << ": " << P.G.getTII().getName(Opc)
+ << " [" << PrintListV<RefNode*>(P.Obj.Addr->members(P.G), P.G) << ']';
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<InstrNode*>> &P) {
+ switch (P.Obj.Addr->getKind()) {
+ case NodeAttrs::Phi:
+ OS << PrintNode<PhiNode*>(P.Obj, P.G);
+ break;
+ case NodeAttrs::Stmt:
+ OS << PrintNode<StmtNode*>(P.Obj, P.G);
+ break;
+ default:
+ OS << "instr? " << Print<NodeId>(P.Obj.Id, P.G);
+ break;
+ }
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<BlockNode*>> &P) {
+ auto *BB = P.Obj.Addr->getCode();
+ unsigned NP = BB->pred_size();
+ std::vector<int> Ns;
+ auto PrintBBs = [&OS,&P] (std::vector<int> Ns) -> void {
+ unsigned N = Ns.size();
+ for (auto I : Ns) {
+ OS << "BB#" << I;
+ if (--N)
+ OS << ", ";
+ }
+ };
+
+ OS << Print<NodeId>(P.Obj.Id, P.G) << ": === BB#" << BB->getNumber()
+ << " === preds(" << NP << "): ";
+ for (auto I : BB->predecessors())
+ Ns.push_back(I->getNumber());
+ PrintBBs(Ns);
+
+ unsigned NS = BB->succ_size();
+ OS << " succs(" << NS << "): ";
+ Ns.clear();
+ for (auto I : BB->successors())
+ Ns.push_back(I->getNumber());
+ PrintBBs(Ns);
+ OS << '\n';
+
+ for (auto I : P.Obj.Addr->members(P.G))
+ OS << PrintNode<InstrNode*>(I, P.G) << '\n';
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<NodeAddr<FuncNode*>> &P) {
+ OS << "DFG dump:[\n" << Print<NodeId>(P.Obj.Id, P.G) << ": Function: "
+ << P.Obj.Addr->getCode()->getName() << '\n';
+ for (auto I : P.Obj.Addr->members(P.G))
+ OS << PrintNode<BlockNode*>(I, P.G) << '\n';
+ OS << "]\n";
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS, const Print<RegisterSet> &P) {
+ OS << '{';
+ for (auto I : P.Obj)
+ OS << ' ' << Print<RegisterRef>(I, P.G);
+ OS << " }";
+ return OS;
+}
+
+template<>
+raw_ostream &operator<< (raw_ostream &OS,
+ const Print<DataFlowGraph::DefStack> &P) {
+ for (auto I = P.Obj.top(), E = P.Obj.bottom(); I != E; ) {
+ OS << Print<NodeId>(I->Id, P.G)
+ << '<' << Print<RegisterRef>(I->Addr->getRegRef(), P.G) << '>';
+ I.down();
+ if (I != E)
+ OS << ' ';
+ }
+ return OS;
+}
+
+} // namespace rdf
+
+// Node allocation functions.
+//
+// Node allocator is like a slab memory allocator: it allocates blocks of
+// memory in sizes that are multiples of the size of a node. Each block has
+// the same size. Nodes are allocated from the currently active block, and
+// when it becomes full, a new one is created.
+// There is a mapping scheme between node id and its location in a block,
+// and within that block is described in the header file.
+//
+void NodeAllocator::startNewBlock() {
+ void *T = MemPool.Allocate(NodesPerBlock*NodeMemSize, NodeMemSize);
+ char *P = static_cast<char*>(T);
+ Blocks.push_back(P);
+ // Check if the block index is still within the allowed range, i.e. less
+ // than 2^N, where N is the number of bits in NodeId for the block index.
+ // BitsPerIndex is the number of bits per node index.
+ assert((Blocks.size() < (1U << (8*sizeof(NodeId)-BitsPerIndex))) &&
+ "Out of bits for block index");
+ ActiveEnd = P;
+}
+
+bool NodeAllocator::needNewBlock() {
+ if (Blocks.empty())
+ return true;
+
+ char *ActiveBegin = Blocks.back();
+ uint32_t Index = (ActiveEnd-ActiveBegin)/NodeMemSize;
+ return Index >= NodesPerBlock;
+}
+
+NodeAddr<NodeBase*> NodeAllocator::New() {
+ if (needNewBlock())
+ startNewBlock();
+
+ uint32_t ActiveB = Blocks.size()-1;
+ uint32_t Index = (ActiveEnd - Blocks[ActiveB])/NodeMemSize;
+ NodeAddr<NodeBase*> NA = { reinterpret_cast<NodeBase*>(ActiveEnd),
+ makeId(ActiveB, Index) };
+ ActiveEnd += NodeMemSize;
+ return NA;
+}
+
+NodeId NodeAllocator::id(const NodeBase *P) const {
+ uintptr_t A = reinterpret_cast<uintptr_t>(P);
+ for (unsigned i = 0, n = Blocks.size(); i != n; ++i) {
+ uintptr_t B = reinterpret_cast<uintptr_t>(Blocks[i]);
+ if (A < B || A >= B + NodesPerBlock*NodeMemSize)
+ continue;
+ uint32_t Idx = (A-B)/NodeMemSize;
+ return makeId(i, Idx);
+ }
+ llvm_unreachable("Invalid node address");
+}
+
+void NodeAllocator::clear() {
+ MemPool.Reset();
+ Blocks.clear();
+ ActiveEnd = nullptr;
+}
+
+
+// Insert node NA after "this" in the circular chain.
+void NodeBase::append(NodeAddr<NodeBase*> NA) {
+ NodeId Nx = Next;
+ // If NA is already "next", do nothing.
+ if (Next != NA.Id) {
+ Next = NA.Id;
+ NA.Addr->Next = Nx;
+ }
+}
+
+
+// Fundamental node manipulator functions.
+
+// Obtain the register reference from a reference node.
+RegisterRef RefNode::getRegRef() const {
+ assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+ if (NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef)
+ return Ref.RR;
+ assert(Ref.Op != nullptr);
+ return { Ref.Op->getReg(), Ref.Op->getSubReg() };
+}
+
+// Set the register reference in the reference node directly (for references
+// in phi nodes).
+void RefNode::setRegRef(RegisterRef RR) {
+ assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+ assert(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef);
+ Ref.RR = RR;
+}
+
+// Set the register reference in the reference node based on a machine
+// operand (for references in statement nodes).
+void RefNode::setRegRef(MachineOperand *Op) {
+ assert(NodeAttrs::type(Attrs) == NodeAttrs::Ref);
+ assert(!(NodeAttrs::flags(Attrs) & NodeAttrs::PhiRef));
+ Ref.Op = Op;
+}
+
+// Get the owner of a given reference node.
+NodeAddr<NodeBase*> RefNode::getOwner(const DataFlowGraph &G) {
+ NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+
+ while (NA.Addr != this) {
+ if (NA.Addr->getType() == NodeAttrs::Code)
+ return NA;
+ NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ }
+ llvm_unreachable("No owner in circular list");
+}
+
+// Connect the def node to the reaching def node.
+void DefNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
+ Ref.RD = DA.Id;
+ Ref.Sib = DA.Addr->getReachedDef();
+ DA.Addr->setReachedDef(Self);
+}
+
+// Connect the use node to the reaching def node.
+void UseNode::linkToDef(NodeId Self, NodeAddr<DefNode*> DA) {
+ Ref.RD = DA.Id;
+ Ref.Sib = DA.Addr->getReachedUse();
+ DA.Addr->setReachedUse(Self);
+}
+
+// Get the first member of the code node.
+NodeAddr<NodeBase*> CodeNode::getFirstMember(const DataFlowGraph &G) const {
+ if (Code.FirstM == 0)
+ return NodeAddr<NodeBase*>();
+ return G.addr<NodeBase*>(Code.FirstM);
+}
+
+// Get the last member of the code node.
+NodeAddr<NodeBase*> CodeNode::getLastMember(const DataFlowGraph &G) const {
+ if (Code.LastM == 0)
+ return NodeAddr<NodeBase*>();
+ return G.addr<NodeBase*>(Code.LastM);
+}
+
+// Add node NA at the end of the member list of the given code node.
+void CodeNode::addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
+ auto ML = getLastMember(G);
+ if (ML.Id != 0) {
+ ML.Addr->append(NA);
+ } else {
+ Code.FirstM = NA.Id;
+ NodeId Self = G.id(this);
+ NA.Addr->setNext(Self);
+ }
+ Code.LastM = NA.Id;
+}
+
+// Add node NA after member node MA in the given code node.
+void CodeNode::addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
+ const DataFlowGraph &G) {
+ MA.Addr->append(NA);
+ if (Code.LastM == MA.Id)
+ Code.LastM = NA.Id;
+}
+
+// Remove member node NA from the given code node.
+void CodeNode::removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G) {
+ auto MA = getFirstMember(G);
+ assert(MA.Id != 0);
+
+ // Special handling if the member to remove is the first member.
+ if (MA.Id == NA.Id) {
+ if (Code.LastM == MA.Id) {
+ // If it is the only member, set both first and last to 0.
+ Code.FirstM = Code.LastM = 0;
+ } else {
+ // Otherwise, advance the first member.
+ Code.FirstM = MA.Addr->getNext();
+ }
+ return;
+ }
+
+ while (MA.Addr != this) {
+ NodeId MX = MA.Addr->getNext();
+ if (MX == NA.Id) {
+ MA.Addr->setNext(NA.Addr->getNext());
+ // If the member to remove happens to be the last one, update the
+ // LastM indicator.
+ if (Code.LastM == NA.Id)
+ Code.LastM = MA.Id;
+ return;
+ }
+ MA = G.addr<NodeBase*>(MX);
+ }
+ llvm_unreachable("No such member");
+}
+
+// Return the list of all members of the code node.
+NodeList CodeNode::members(const DataFlowGraph &G) const {
+ static auto True = [] (NodeAddr<NodeBase*>) -> bool { return true; };
+ return members_if(True, G);
+}
+
+// Return the owner of the given instr node.
+NodeAddr<NodeBase*> InstrNode::getOwner(const DataFlowGraph &G) {
+ NodeAddr<NodeBase*> NA = G.addr<NodeBase*>(getNext());
+
+ while (NA.Addr != this) {
+ assert(NA.Addr->getType() == NodeAttrs::Code);
+ if (NA.Addr->getKind() == NodeAttrs::Block)
+ return NA;
+ NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ }
+ llvm_unreachable("No owner in circular list");
+}
+
+// Add the phi node PA to the given block node.
+void BlockNode::addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G) {
+ auto M = getFirstMember(G);
+ if (M.Id == 0) {
+ addMember(PA, G);
+ return;
+ }
+
+ assert(M.Addr->getType() == NodeAttrs::Code);
+ if (M.Addr->getKind() == NodeAttrs::Stmt) {
+ // If the first member of the block is a statement, insert the phi as
+ // the first member.
+ Code.FirstM = PA.Id;
+ PA.Addr->setNext(M.Id);
+ } else {
+ // If the first member is a phi, find the last phi, and append PA to it.
+ assert(M.Addr->getKind() == NodeAttrs::Phi);
+ NodeAddr<NodeBase*> MN = M;
+ do {
+ M = MN;
+ MN = G.addr<NodeBase*>(M.Addr->getNext());
+ assert(MN.Addr->getType() == NodeAttrs::Code);
+ } while (MN.Addr->getKind() == NodeAttrs::Phi);
+
+ // M is the last phi.
+ addMemberAfter(M, PA, G);
+ }
+}
+
+// Find the block node corresponding to the machine basic block BB in the
+// given func node.
+NodeAddr<BlockNode*> FuncNode::findBlock(const MachineBasicBlock *BB,
+ const DataFlowGraph &G) const {
+ auto EqBB = [BB] (NodeAddr<NodeBase*> NA) -> bool {
+ return NodeAddr<BlockNode*>(NA).Addr->getCode() == BB;
+ };
+ NodeList Ms = members_if(EqBB, G);
+ if (!Ms.empty())
+ return Ms[0];
+ return NodeAddr<BlockNode*>();
+}
+
+// Get the block node for the entry block in the given function.
+NodeAddr<BlockNode*> FuncNode::getEntryBlock(const DataFlowGraph &G) {
+ MachineBasicBlock *EntryB = &getCode()->front();
+ return findBlock(EntryB, G);
+}
+
+
+// Register aliasing information.
+//
+// In theory, the lane information could be used to determine register
+// covering (and aliasing), but depending on the sub-register structure,
+// the lane mask information may be missing. The covering information
+// must be available for this framework to work, so relying solely on
+// the lane data is not sufficient.
+
+// Determine whether RA covers RB.
+bool RegisterAliasInfo::covers(RegisterRef RA, RegisterRef RB) const {
+ if (RA == RB)
+ return true;
+ if (TargetRegisterInfo::isVirtualRegister(RA.Reg)) {
+ assert(TargetRegisterInfo::isVirtualRegister(RB.Reg));
+ if (RA.Reg != RB.Reg)
+ return false;
+ if (RA.Sub == 0)
+ return true;
+ return TRI.composeSubRegIndices(RA.Sub, RB.Sub) == RA.Sub;
+ }
+
+ assert(TargetRegisterInfo::isPhysicalRegister(RA.Reg) &&
+ TargetRegisterInfo::isPhysicalRegister(RB.Reg));
+ unsigned A = RA.Sub != 0 ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg;
+ unsigned B = RB.Sub != 0 ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg;
+ return TRI.isSubRegister(A, B);
+}
+
+// Determine whether RR is covered by the set of references RRs.
+bool RegisterAliasInfo::covers(const RegisterSet &RRs, RegisterRef RR) const {
+ if (RRs.count(RR))
+ return true;
+
+ // For virtual registers, we cannot accurately determine covering based
+ // on subregisters. If RR itself is not present in RRs, but it has a sub-
+ // register reference, check for the super-register alone. Otherwise,
+ // assume non-covering.
+ if (TargetRegisterInfo::isVirtualRegister(RR.Reg)) {
+ if (RR.Sub != 0)
+ return RRs.count({RR.Reg, 0});
+ return false;
+ }
+
+ // If any super-register of RR is present, then RR is covered.
+ unsigned Reg = RR.Sub == 0 ? RR.Reg : TRI.getSubReg(RR.Reg, RR.Sub);
+ for (MCSuperRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
+ if (RRs.count({*SR, 0}))
+ return true;
+
+ return false;
+}
+
+// Get the list of references aliased to RR.
+std::vector<RegisterRef> RegisterAliasInfo::getAliasSet(RegisterRef RR) const {
+ // Do not include RR in the alias set. For virtual registers return an
+ // empty set.
+ std::vector<RegisterRef> AS;
+ if (TargetRegisterInfo::isVirtualRegister(RR.Reg))
+ return AS;
+ assert(TargetRegisterInfo::isPhysicalRegister(RR.Reg));
+ unsigned R = RR.Reg;
+ if (RR.Sub)
+ R = TRI.getSubReg(RR.Reg, RR.Sub);
+
+ for (MCRegAliasIterator AI(R, &TRI, false); AI.isValid(); ++AI)
+ AS.push_back(RegisterRef({*AI, 0}));
+ return AS;
+}
+
+// Check whether RA and RB are aliased.
+bool RegisterAliasInfo::alias(RegisterRef RA, RegisterRef RB) const {
+ bool VirtA = TargetRegisterInfo::isVirtualRegister(RA.Reg);
+ bool VirtB = TargetRegisterInfo::isVirtualRegister(RB.Reg);
+ bool PhysA = TargetRegisterInfo::isPhysicalRegister(RA.Reg);
+ bool PhysB = TargetRegisterInfo::isPhysicalRegister(RB.Reg);
+
+ if (VirtA != VirtB)
+ return false;
+
+ if (VirtA) {
+ if (RA.Reg != RB.Reg)
+ return false;
+ // RA and RB refer to the same register. If any of them refer to the
+ // whole register, they must be aliased.
+ if (RA.Sub == 0 || RB.Sub == 0)
+ return true;
+ unsigned SA = TRI.getSubRegIdxSize(RA.Sub);
+ unsigned OA = TRI.getSubRegIdxOffset(RA.Sub);
+ unsigned SB = TRI.getSubRegIdxSize(RB.Sub);
+ unsigned OB = TRI.getSubRegIdxOffset(RB.Sub);
+ if (OA <= OB && OA+SA > OB)
+ return true;
+ if (OB <= OA && OB+SB > OA)
+ return true;
+ return false;
+ }
+
+ assert(PhysA && PhysB);
+ (void)PhysA, (void)PhysB;
+ unsigned A = RA.Sub ? TRI.getSubReg(RA.Reg, RA.Sub) : RA.Reg;
+ unsigned B = RB.Sub ? TRI.getSubReg(RB.Reg, RB.Sub) : RB.Reg;
+ for (MCRegAliasIterator I(A, &TRI, true); I.isValid(); ++I)
+ if (B == *I)
+ return true;
+ return false;
+}
+
+
+// Target operand information.
+//
+
+// For a given instruction, check if there are any bits of RR that can remain
+// unchanged across this def.
+bool TargetOperandInfo::isPreserving(const MachineInstr &In, unsigned OpNum)
+ const {
+ return TII.isPredicated(&In);
+}
+
+// Check if the definition of RR produces an unspecified value.
+bool TargetOperandInfo::isClobbering(const MachineInstr &In, unsigned OpNum)
+ const {
+ if (In.isCall())
+ if (In.getOperand(OpNum).isImplicit())
+ return true;
+ return false;
+}
+
+// Check if the given instruction specifically requires
+bool TargetOperandInfo::isFixedReg(const MachineInstr &In, unsigned OpNum)
+ const {
+ if (In.isCall() || In.isReturn())
+ return true;
+ const MCInstrDesc &D = In.getDesc();
+ if (!D.getImplicitDefs() && !D.getImplicitUses())
+ return false;
+ const MachineOperand &Op = In.getOperand(OpNum);
+ // If there is a sub-register, treat the operand as non-fixed. Currently,
+ // fixed registers are those that are listed in the descriptor as implicit
+ // uses or defs, and those lists do not allow sub-registers.
+ if (Op.getSubReg() != 0)
+ return false;
+ unsigned Reg = Op.getReg();
+ const MCPhysReg *ImpR = Op.isDef() ? D.getImplicitDefs()
+ : D.getImplicitUses();
+ if (!ImpR)
+ return false;
+ while (*ImpR)
+ if (*ImpR++ == Reg)
+ return true;
+ return false;
+}
+
+
+//
+// The data flow graph construction.
+//
+
+DataFlowGraph::DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai,
+ const TargetOperandInfo &toi)
+ : TimeG("rdf"), MF(mf), TII(tii), TRI(tri), MDT(mdt), MDF(mdf), RAI(rai),
+ TOI(toi) {
+}
+
+
+// The implementation of the definition stack.
+// Each register reference has its own definition stack. In particular,
+// for a register references "Reg" and "Reg:subreg" will each have their
+// own definition stacks.
+
+// Construct a stack iterator.
+DataFlowGraph::DefStack::Iterator::Iterator(const DataFlowGraph::DefStack &S,
+ bool Top) : DS(S) {
+ if (!Top) {
+ // Initialize to bottom.
+ Pos = 0;
+ return;
+ }
+ // Initialize to the top, i.e. top-most non-delimiter (or 0, if empty).
+ Pos = DS.Stack.size();
+ while (Pos > 0 && DS.isDelimiter(DS.Stack[Pos-1]))
+ Pos--;
+}
+
+// Return the size of the stack, including block delimiters.
+unsigned DataFlowGraph::DefStack::size() const {
+ unsigned S = 0;
+ for (auto I = top(), E = bottom(); I != E; I.down())
+ S++;
+ return S;
+}
+
+// Remove the top entry from the stack. Remove all intervening delimiters
+// so that after this, the stack is either empty, or the top of the stack
+// is a non-delimiter.
+void DataFlowGraph::DefStack::pop() {
+ assert(!empty());
+ unsigned P = nextDown(Stack.size());
+ Stack.resize(P);
+}
+
+// Push a delimiter for block node N on the stack.
+void DataFlowGraph::DefStack::start_block(NodeId N) {
+ assert(N != 0);
+ Stack.push_back(NodeAddr<DefNode*>(nullptr, N));
+}
+
+// Remove all nodes from the top of the stack, until the delimited for
+// block node N is encountered. Remove the delimiter as well. In effect,
+// this will remove from the stack all definitions from block N.
+void DataFlowGraph::DefStack::clear_block(NodeId N) {
+ assert(N != 0);
+ unsigned P = Stack.size();
+ while (P > 0) {
+ bool Found = isDelimiter(Stack[P-1], N);
+ P--;
+ if (Found)
+ break;
+ }
+ // This will also remove the delimiter, if found.
+ Stack.resize(P);
+}
+
+// Move the stack iterator up by one.
+unsigned DataFlowGraph::DefStack::nextUp(unsigned P) const {
+ // Get the next valid position after P (skipping all delimiters).
+ // The input position P does not have to point to a non-delimiter.
+ unsigned SS = Stack.size();
+ bool IsDelim;
+ assert(P < SS);
+ do {
+ P++;
+ IsDelim = isDelimiter(Stack[P-1]);
+ } while (P < SS && IsDelim);
+ assert(!IsDelim);
+ return P;
+}
+
+// Move the stack iterator down by one.
+unsigned DataFlowGraph::DefStack::nextDown(unsigned P) const {
+ // Get the preceding valid position before P (skipping all delimiters).
+ // The input position P does not have to point to a non-delimiter.
+ assert(P > 0 && P <= Stack.size());
+ bool IsDelim = isDelimiter(Stack[P-1]);
+ do {
+ if (--P == 0)
+ break;
+ IsDelim = isDelimiter(Stack[P-1]);
+ } while (P > 0 && IsDelim);
+ assert(!IsDelim);
+ return P;
+}
+
+// Node management functions.
+
+// Get the pointer to the node with the id N.
+NodeBase *DataFlowGraph::ptr(NodeId N) const {
+ if (N == 0)
+ return nullptr;
+ return Memory.ptr(N);
+}
+
+// Get the id of the node at the address P.
+NodeId DataFlowGraph::id(const NodeBase *P) const {
+ if (P == nullptr)
+ return 0;
+ return Memory.id(P);
+}
+
+// Allocate a new node and set the attributes to Attrs.
+NodeAddr<NodeBase*> DataFlowGraph::newNode(uint16_t Attrs) {
+ NodeAddr<NodeBase*> P = Memory.New();
+ P.Addr->init();
+ P.Addr->setAttrs(Attrs);
+ return P;
+}
+
+// Make a copy of the given node B, except for the data-flow links, which
+// are set to 0.
+NodeAddr<NodeBase*> DataFlowGraph::cloneNode(const NodeAddr<NodeBase*> B) {
+ NodeAddr<NodeBase*> NA = newNode(0);
+ memcpy(NA.Addr, B.Addr, sizeof(NodeBase));
+ // Ref nodes need to have the data-flow links reset.
+ if (NA.Addr->getType() == NodeAttrs::Ref) {
+ NodeAddr<RefNode*> RA = NA;
+ RA.Addr->setReachingDef(0);
+ RA.Addr->setSibling(0);
+ if (NA.Addr->getKind() == NodeAttrs::Def) {
+ NodeAddr<DefNode*> DA = NA;
+ DA.Addr->setReachedDef(0);
+ DA.Addr->setReachedUse(0);
+ }
+ }
+ return NA;
+}
+
+
+// Allocation routines for specific node types/kinds.
+
+NodeAddr<UseNode*> DataFlowGraph::newUse(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags) {
+ NodeAddr<UseNode*> UA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+ UA.Addr->setRegRef(&Op);
+ return UA;
+}
+
+NodeAddr<PhiUseNode*> DataFlowGraph::newPhiUse(NodeAddr<PhiNode*> Owner,
+ RegisterRef RR, NodeAddr<BlockNode*> PredB, uint16_t Flags) {
+ NodeAddr<PhiUseNode*> PUA = newNode(NodeAttrs::Ref | NodeAttrs::Use | Flags);
+ assert(Flags & NodeAttrs::PhiRef);
+ PUA.Addr->setRegRef(RR);
+ PUA.Addr->setPredecessor(PredB.Id);
+ return PUA;
+}
+
+NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags) {
+ NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+ DA.Addr->setRegRef(&Op);
+ return DA;
+}
+
+NodeAddr<DefNode*> DataFlowGraph::newDef(NodeAddr<InstrNode*> Owner,
+ RegisterRef RR, uint16_t Flags) {
+ NodeAddr<DefNode*> DA = newNode(NodeAttrs::Ref | NodeAttrs::Def | Flags);
+ assert(Flags & NodeAttrs::PhiRef);
+ DA.Addr->setRegRef(RR);
+ return DA;
+}
+
+NodeAddr<PhiNode*> DataFlowGraph::newPhi(NodeAddr<BlockNode*> Owner) {
+ NodeAddr<PhiNode*> PA = newNode(NodeAttrs::Code | NodeAttrs::Phi);
+ Owner.Addr->addPhi(PA, *this);
+ return PA;
+}
+
+NodeAddr<StmtNode*> DataFlowGraph::newStmt(NodeAddr<BlockNode*> Owner,
+ MachineInstr *MI) {
+ NodeAddr<StmtNode*> SA = newNode(NodeAttrs::Code | NodeAttrs::Stmt);
+ SA.Addr->setCode(MI);
+ Owner.Addr->addMember(SA, *this);
+ return SA;
+}
+
+NodeAddr<BlockNode*> DataFlowGraph::newBlock(NodeAddr<FuncNode*> Owner,
+ MachineBasicBlock *BB) {
+ NodeAddr<BlockNode*> BA = newNode(NodeAttrs::Code | NodeAttrs::Block);
+ BA.Addr->setCode(BB);
+ Owner.Addr->addMember(BA, *this);
+ return BA;
+}
+
+NodeAddr<FuncNode*> DataFlowGraph::newFunc(MachineFunction *MF) {
+ NodeAddr<FuncNode*> FA = newNode(NodeAttrs::Code | NodeAttrs::Func);
+ FA.Addr->setCode(MF);
+ return FA;
+}
+
+// Build the data flow graph.
+void DataFlowGraph::build() {
+ reset();
+ Func = newFunc(&MF);
+
+ if (MF.empty())
+ return;
+
+ for (auto &B : MF) {
+ auto BA = newBlock(Func, &B);
+ for (auto &I : B) {
+ if (I.isDebugValue())
+ continue;
+ buildStmt(BA, I);
+ }
+ }
+
+ // Collect information about block references.
+ NodeAddr<BlockNode*> EA = Func.Addr->getEntryBlock(*this);
+ BlockRefsMap RefM;
+ buildBlockRefs(EA, RefM);
+
+ // Add function-entry phi nodes.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I) {
+ NodeAddr<PhiNode*> PA = newPhi(EA);
+ RegisterRef RR = { I->first, 0 };
+ uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+ NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+ PA.Addr->addMember(DA, *this);
+ }
+
+ // Build a map "PhiM" which will contain, for each block, the set
+ // of references that will require phi definitions in that block.
+ BlockRefsMap PhiM;
+ auto Blocks = Func.Addr->members(*this);
+ for (NodeAddr<BlockNode*> BA : Blocks)
+ recordDefsForDF(PhiM, RefM, BA);
+ for (NodeAddr<BlockNode*> BA : Blocks)
+ buildPhis(PhiM, RefM, BA);
+
+ // Link all the refs. This will recursively traverse the dominator tree.
+ DefStackMap DM;
+ linkBlockRefs(DM, EA);
+
+ // Finally, remove all unused phi nodes.
+ removeUnusedPhis();
+}
+
+// For each stack in the map DefM, push the delimiter for block B on it.
+void DataFlowGraph::markBlock(NodeId B, DefStackMap &DefM) {
+ // Push block delimiters.
+ for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
+ I->second.start_block(B);
+}
+
+// Remove all definitions coming from block B from each stack in DefM.
+void DataFlowGraph::releaseBlock(NodeId B, DefStackMap &DefM) {
+ // Pop all defs from this block from the definition stack. Defs that were
+ // added to the map during the traversal of instructions will not have a
+ // delimiter, but for those, the whole stack will be emptied.
+ for (auto I = DefM.begin(), E = DefM.end(); I != E; ++I)
+ I->second.clear_block(B);
+
+ // Finally, remove empty stacks from the map.
+ for (auto I = DefM.begin(), E = DefM.end(), NextI = I; I != E; I = NextI) {
+ NextI = std::next(I);
+ // This preserves the validity of iterators other than I.
+ if (I->second.empty())
+ DefM.erase(I);
+ }
+}
+
+// Push all definitions from the instruction node IA to an appropriate
+// stack in DefM.
+void DataFlowGraph::pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DefM) {
+ NodeList Defs = IA.Addr->members_if(IsDef, *this);
+ NodeSet Visited;
+#ifndef NDEBUG
+ RegisterSet Defined;
+#endif
+
+ // The important objectives of this function are:
+ // - to be able to handle instructions both while the graph is being
+ // constructed, and after the graph has been constructed, and
+ // - maintain proper ordering of definitions on the stack for each
+ // register reference:
+ // - if there are two or more related defs in IA (i.e. coming from
+ // the same machine operand), then only push one def on the stack,
+ // - if there are multiple unrelated defs of non-overlapping
+ // subregisters of S, then the stack for S will have both (in an
+ // unspecified order), but the order does not matter from the data-
+ // -flow perspective.
+
+ for (NodeAddr<DefNode*> DA : Defs) {
+ if (Visited.count(DA.Id))
+ continue;
+ NodeList Rel = getRelatedRefs(IA, DA);
+ NodeAddr<DefNode*> PDA = Rel.front();
+ // Push the definition on the stack for the register and all aliases.
+ RegisterRef RR = PDA.Addr->getRegRef();
+#ifndef NDEBUG
+ // Assert if the register is defined in two or more unrelated defs.
+ // This could happen if there are two or more def operands defining it.
+ if (!Defined.insert(RR).second) {
+ auto *MI = NodeAddr<StmtNode*>(IA).Addr->getCode();
+ dbgs() << "Multiple definitions of register: "
+ << Print<RegisterRef>(RR, *this) << " in\n " << *MI
+ << "in BB#" << MI->getParent()->getNumber() << '\n';
+ llvm_unreachable(nullptr);
+ }
+#endif
+ DefM[RR].push(DA);
+ for (auto A : RAI.getAliasSet(RR)) {
+ assert(A != RR);
+ DefM[A].push(DA);
+ }
+ // Mark all the related defs as visited.
+ for (auto T : Rel)
+ Visited.insert(T.Id);
+ }
+}
+
+// Return the list of all reference nodes related to RA, including RA itself.
+// See "getNextRelated" for the meaning of a "related reference".
+NodeList DataFlowGraph::getRelatedRefs(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ NodeList Refs;
+ NodeId Start = RA.Id;
+ do {
+ Refs.push_back(RA);
+ RA = getNextRelated(IA, RA);
+ } while (RA.Id != 0 && RA.Id != Start);
+ return Refs;
+}
+
+
+// Clear all information in the graph.
+void DataFlowGraph::reset() {
+ Memory.clear();
+ Func = NodeAddr<FuncNode*>();
+}
+
+
+// Return the next reference node in the instruction node IA that is related
+// to RA. Conceptually, two reference nodes are related if they refer to the
+// same instance of a register access, but differ in flags or other minor
+// characteristics. Specific examples of related nodes are shadow reference
+// nodes.
+// Return the equivalent of nullptr if there are no more related references.
+NodeAddr<RefNode*> DataFlowGraph::getNextRelated(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ auto Related = [RA](NodeAddr<RefNode*> TA) -> bool {
+ if (TA.Addr->getKind() != RA.Addr->getKind())
+ return false;
+ if (TA.Addr->getRegRef() != RA.Addr->getRegRef())
+ return false;
+ return true;
+ };
+ auto RelatedStmt = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
+ return Related(TA) &&
+ &RA.Addr->getOp() == &TA.Addr->getOp();
+ };
+ auto RelatedPhi = [&Related,RA](NodeAddr<RefNode*> TA) -> bool {
+ if (!Related(TA))
+ return false;
+ if (TA.Addr->getKind() != NodeAttrs::Use)
+ return true;
+ // For phi uses, compare predecessor blocks.
+ const NodeAddr<const PhiUseNode*> TUA = TA;
+ const NodeAddr<const PhiUseNode*> RUA = RA;
+ return TUA.Addr->getPredecessor() == RUA.Addr->getPredecessor();
+ };
+
+ RegisterRef RR = RA.Addr->getRegRef();
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ return RA.Addr->getNextRef(RR, RelatedStmt, true, *this);
+ return RA.Addr->getNextRef(RR, RelatedPhi, true, *this);
+}
+
+// Find the next node related to RA in IA that satisfies condition P.
+// If such a node was found, return a pair where the second element is the
+// located node. If such a node does not exist, return a pair where the
+// first element is the element after which such a node should be inserted,
+// and the second element is a null-address.
+template <typename Predicate>
+std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
+DataFlowGraph::locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+ Predicate P) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ NodeAddr<RefNode*> NA;
+ NodeId Start = RA.Id;
+ while (true) {
+ NA = getNextRelated(IA, RA);
+ if (NA.Id == 0 || NA.Id == Start)
+ break;
+ if (P(NA))
+ break;
+ RA = NA;
+ }
+
+ if (NA.Id != 0 && NA.Id != Start)
+ return std::make_pair(RA, NA);
+ return std::make_pair(RA, NodeAddr<RefNode*>());
+}
+
+// Get the next shadow node in IA corresponding to RA, and optionally create
+// such a node if it does not exist.
+NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA, bool Create) {
+ assert(IA.Id != 0 && RA.Id != 0);
+
+ uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
+ auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
+ return TA.Addr->getFlags() == Flags;
+ };
+ auto Loc = locateNextRef(IA, RA, IsShadow);
+ if (Loc.second.Id != 0 || !Create)
+ return Loc.second;
+
+ // Create a copy of RA and mark is as shadow.
+ NodeAddr<RefNode*> NA = cloneNode(RA);
+ NA.Addr->setFlags(Flags | NodeAttrs::Shadow);
+ IA.Addr->addMemberAfter(Loc.first, NA, *this);
+ return NA;
+}
+
+// Get the next shadow node in IA corresponding to RA. Return null-address
+// if such a node does not exist.
+NodeAddr<RefNode*> DataFlowGraph::getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const {
+ assert(IA.Id != 0 && RA.Id != 0);
+ uint16_t Flags = RA.Addr->getFlags() | NodeAttrs::Shadow;
+ auto IsShadow = [Flags] (NodeAddr<RefNode*> TA) -> bool {
+ return TA.Addr->getFlags() == Flags;
+ };
+ return locateNextRef(IA, RA, IsShadow).second;
+}
+
+// Create a new statement node in the block node BA that corresponds to
+// the machine instruction MI.
+void DataFlowGraph::buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In) {
+ auto SA = newStmt(BA, &In);
+
+ // Collect a set of registers that this instruction implicitly uses
+ // or defines. Implicit operands from an instruction will be ignored
+ // unless they are listed here.
+ RegisterSet ImpUses, ImpDefs;
+ if (const uint16_t *ImpD = In.getDesc().getImplicitDefs())
+ while (uint16_t R = *ImpD++)
+ ImpDefs.insert({R, 0});
+ if (const uint16_t *ImpU = In.getDesc().getImplicitUses())
+ while (uint16_t R = *ImpU++)
+ ImpUses.insert({R, 0});
+
+ bool IsCall = In.isCall(), IsReturn = In.isReturn();
+ bool IsPredicated = TII.isPredicated(&In);
+ unsigned NumOps = In.getNumOperands();
+
+ // Avoid duplicate implicit defs. This will not detect cases of implicit
+ // defs that define registers that overlap, but it is not clear how to
+ // interpret that in the absence of explicit defs. Overlapping explicit
+ // defs are likely illegal already.
+ RegisterSet DoneDefs;
+ // Process explicit defs first.
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isReg() || !Op.isDef() || Op.isImplicit())
+ continue;
+ RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+ uint16_t Flags = NodeAttrs::None;
+ if (TOI.isPreserving(In, OpN))
+ Flags |= NodeAttrs::Preserving;
+ if (TOI.isClobbering(In, OpN))
+ Flags |= NodeAttrs::Clobbering;
+ if (TOI.isFixedReg(In, OpN))
+ Flags |= NodeAttrs::Fixed;
+ NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ SA.Addr->addMember(DA, *this);
+ DoneDefs.insert(RR);
+ }
+
+ // Process implicit defs, skipping those that have already been added
+ // as explicit.
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isReg() || !Op.isDef() || !Op.isImplicit())
+ continue;
+ RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+ if (!IsCall && !ImpDefs.count(RR))
+ continue;
+ if (DoneDefs.count(RR))
+ continue;
+ uint16_t Flags = NodeAttrs::None;
+ if (TOI.isPreserving(In, OpN))
+ Flags |= NodeAttrs::Preserving;
+ if (TOI.isClobbering(In, OpN))
+ Flags |= NodeAttrs::Clobbering;
+ if (TOI.isFixedReg(In, OpN))
+ Flags |= NodeAttrs::Fixed;
+ NodeAddr<DefNode*> DA = newDef(SA, Op, Flags);
+ SA.Addr->addMember(DA, *this);
+ DoneDefs.insert(RR);
+ }
+
+ for (unsigned OpN = 0; OpN < NumOps; ++OpN) {
+ MachineOperand &Op = In.getOperand(OpN);
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ RegisterRef RR = { Op.getReg(), Op.getSubReg() };
+ // Add implicit uses on return and call instructions, and on predicated
+ // instructions regardless of whether or not they appear in the instruction
+ // descriptor's list.
+ bool Implicit = Op.isImplicit();
+ bool TakeImplicit = IsReturn || IsCall || IsPredicated;
+ if (Implicit && !TakeImplicit && !ImpUses.count(RR))
+ continue;
+ uint16_t Flags = NodeAttrs::None;
+ if (TOI.isFixedReg(In, OpN))
+ Flags |= NodeAttrs::Fixed;
+ NodeAddr<UseNode*> UA = newUse(SA, Op, Flags);
+ SA.Addr->addMember(UA, *this);
+ }
+}
+
+// Build a map that for each block will have the set of all references from
+// that block, and from all blocks dominated by it.
+void DataFlowGraph::buildBlockRefs(NodeAddr<BlockNode*> BA,
+ BlockRefsMap &RefM) {
+ auto &Refs = RefM[BA.Id];
+ MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
+ assert(N);
+ for (auto I : *N) {
+ MachineBasicBlock *SB = I->getBlock();
+ auto SBA = Func.Addr->findBlock(SB, *this);
+ buildBlockRefs(SBA, RefM);
+ const auto &SRs = RefM[SBA.Id];
+ Refs.insert(SRs.begin(), SRs.end());
+ }
+
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this))
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(*this))
+ Refs.insert(RA.Addr->getRegRef());
+}
+
+// Scan all defs in the block node BA and record in PhiM the locations of
+// phi nodes corresponding to these defs.
+void DataFlowGraph::recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+ NodeAddr<BlockNode*> BA) {
+ // Check all defs from block BA and record them in each block in BA's
+ // iterated dominance frontier. This information will later be used to
+ // create phi nodes.
+ MachineBasicBlock *BB = BA.Addr->getCode();
+ assert(BB);
+ auto DFLoc = MDF.find(BB);
+ if (DFLoc == MDF.end() || DFLoc->second.empty())
+ return;
+
+ // Traverse all instructions in the block and collect the set of all
+ // defined references. For each reference there will be a phi created
+ // in the block's iterated dominance frontier.
+ // This is done to make sure that each defined reference gets only one
+ // phi node, even if it is defined multiple times.
+ RegisterSet Defs;
+ for (auto I : BA.Addr->members(*this)) {
+ assert(I.Addr->getType() == NodeAttrs::Code);
+ assert(I.Addr->getKind() == NodeAttrs::Phi ||
+ I.Addr->getKind() == NodeAttrs::Stmt);
+ NodeAddr<InstrNode*> IA = I;
+ for (NodeAddr<RefNode*> RA : IA.Addr->members_if(IsDef, *this))
+ Defs.insert(RA.Addr->getRegRef());
+ }
+
+ // Finally, add the set of defs to each block in the iterated dominance
+ // frontier.
+ const MachineDominanceFrontier::DomSetType &DF = DFLoc->second;
+ SetVector<MachineBasicBlock*> IDF(DF.begin(), DF.end());
+ for (unsigned i = 0; i < IDF.size(); ++i) {
+ auto F = MDF.find(IDF[i]);
+ if (F != MDF.end())
+ IDF.insert(F->second.begin(), F->second.end());
+ }
+
+ // Get the register references that are reachable from this block.
+ RegisterSet &Refs = RefM[BA.Id];
+ for (auto DB : IDF) {
+ auto DBA = Func.Addr->findBlock(DB, *this);
+ const auto &Rs = RefM[DBA.Id];
+ Refs.insert(Rs.begin(), Rs.end());
+ }
+
+ for (auto DB : IDF) {
+ auto DBA = Func.Addr->findBlock(DB, *this);
+ PhiM[DBA.Id].insert(Defs.begin(), Defs.end());
+ }
+}
+
+// Given the locations of phi nodes in the map PhiM, create the phi nodes
+// that are located in the block node BA.
+void DataFlowGraph::buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+ NodeAddr<BlockNode*> BA) {
+ // Check if this blocks has any DF defs, i.e. if there are any defs
+ // that this block is in the iterated dominance frontier of.
+ auto HasDF = PhiM.find(BA.Id);
+ if (HasDF == PhiM.end() || HasDF->second.empty())
+ return;
+
+ // First, remove all R in Refs in such that there exists T in Refs
+ // such that T covers R. In other words, only leave those refs that
+ // are not covered by another ref (i.e. maximal with respect to covering).
+
+ auto MaxCoverIn = [this] (RegisterRef RR, RegisterSet &RRs) -> RegisterRef {
+ for (auto I : RRs)
+ if (I != RR && RAI.covers(I, RR))
+ RR = I;
+ return RR;
+ };
+
+ RegisterSet MaxDF;
+ for (auto I : HasDF->second)
+ MaxDF.insert(MaxCoverIn(I, HasDF->second));
+
+ std::vector<RegisterRef> MaxRefs;
+ auto &RefB = RefM[BA.Id];
+ for (auto I : MaxDF)
+ MaxRefs.push_back(MaxCoverIn(I, RefB));
+
+ // Now, for each R in MaxRefs, get the alias closure of R. If the closure
+ // only has R in it, create a phi a def for R. Otherwise, create a phi,
+ // and add a def for each S in the closure.
+
+ // Sort the refs so that the phis will be created in a deterministic order.
+ std::sort(MaxRefs.begin(), MaxRefs.end());
+ // Remove duplicates.
+ auto NewEnd = std::unique(MaxRefs.begin(), MaxRefs.end());
+ MaxRefs.erase(NewEnd, MaxRefs.end());
+
+ auto Aliased = [this,&MaxRefs](RegisterRef RR,
+ std::vector<unsigned> &Closure) -> bool {
+ for (auto I : Closure)
+ if (RAI.alias(RR, MaxRefs[I]))
+ return true;
+ return false;
+ };
+
+ // Prepare a list of NodeIds of the block's predecessors.
+ std::vector<NodeId> PredList;
+ const MachineBasicBlock *MBB = BA.Addr->getCode();
+ for (auto PB : MBB->predecessors()) {
+ auto B = Func.Addr->findBlock(PB, *this);
+ PredList.push_back(B.Id);
+ }
+
+ while (!MaxRefs.empty()) {
+ // Put the first element in the closure, and then add all subsequent
+ // elements from MaxRefs to it, if they alias at least one element
+ // already in the closure.
+ // ClosureIdx: vector of indices in MaxRefs of members of the closure.
+ std::vector<unsigned> ClosureIdx = { 0 };
+ for (unsigned i = 1; i != MaxRefs.size(); ++i)
+ if (Aliased(MaxRefs[i], ClosureIdx))
+ ClosureIdx.push_back(i);
+
+ // Build a phi for the closure.
+ unsigned CS = ClosureIdx.size();
+ NodeAddr<PhiNode*> PA = newPhi(BA);
+
+ // Add defs.
+ for (unsigned X = 0; X != CS; ++X) {
+ RegisterRef RR = MaxRefs[ClosureIdx[X]];
+ uint16_t PhiFlags = NodeAttrs::PhiRef | NodeAttrs::Preserving;
+ NodeAddr<DefNode*> DA = newDef(PA, RR, PhiFlags);
+ PA.Addr->addMember(DA, *this);
+ }
+ // Add phi uses.
+ for (auto P : PredList) {
+ auto PBA = addr<BlockNode*>(P);
+ for (unsigned X = 0; X != CS; ++X) {
+ RegisterRef RR = MaxRefs[ClosureIdx[X]];
+ NodeAddr<PhiUseNode*> PUA = newPhiUse(PA, RR, PBA);
+ PA.Addr->addMember(PUA, *this);
+ }
+ }
+
+ // Erase from MaxRefs all elements in the closure.
+ auto Begin = MaxRefs.begin();
+ for (unsigned i = ClosureIdx.size(); i != 0; --i)
+ MaxRefs.erase(Begin + ClosureIdx[i-1]);
+ }
+}
+
+// Remove any unneeded phi nodes that were created during the build process.
+void DataFlowGraph::removeUnusedPhis() {
+ // This will remove unused phis, i.e. phis where each def does not reach
+ // any uses or other defs. This will not detect or remove circular phi
+ // chains that are otherwise dead. Unused/dead phis are created during
+ // the build process and this function is intended to remove these cases
+ // that are easily determinable to be unnecessary.
+
+ SetVector<NodeId> PhiQ;
+ for (NodeAddr<BlockNode*> BA : Func.Addr->members(*this)) {
+ for (auto P : BA.Addr->members_if(IsPhi, *this))
+ PhiQ.insert(P.Id);
+ }
+
+ static auto HasUsedDef = [](NodeList &Ms) -> bool {
+ for (auto M : Ms) {
+ if (M.Addr->getKind() != NodeAttrs::Def)
+ continue;
+ NodeAddr<DefNode*> DA = M;
+ if (DA.Addr->getReachedDef() != 0 || DA.Addr->getReachedUse() != 0)
+ return true;
+ }
+ return false;
+ };
+
+ // Any phi, if it is removed, may affect other phis (make them dead).
+ // For each removed phi, collect the potentially affected phis and add
+ // them back to the queue.
+ while (!PhiQ.empty()) {
+ auto PA = addr<PhiNode*>(PhiQ[0]);
+ PhiQ.remove(PA.Id);
+ NodeList Refs = PA.Addr->members(*this);
+ if (HasUsedDef(Refs))
+ continue;
+ for (NodeAddr<RefNode*> RA : Refs) {
+ if (NodeId RD = RA.Addr->getReachingDef()) {
+ auto RDA = addr<DefNode*>(RD);
+ NodeAddr<InstrNode*> OA = RDA.Addr->getOwner(*this);
+ if (IsPhi(OA))
+ PhiQ.insert(OA.Id);
+ }
+ if (RA.Addr->isDef())
+ unlinkDef(RA);
+ else
+ unlinkUse(RA);
+ }
+ NodeAddr<BlockNode*> BA = PA.Addr->getOwner(*this);
+ BA.Addr->removeMember(PA, *this);
+ }
+}
+
+// For a given reference node TA in an instruction node IA, connect the
+// reaching def of TA to the appropriate def node. Create any shadow nodes
+// as appropriate.
+template <typename T>
+void DataFlowGraph::linkRefUp(NodeAddr<InstrNode*> IA, NodeAddr<T> TA,
+ DefStack &DS) {
+ if (DS.empty())
+ return;
+ RegisterRef RR = TA.Addr->getRegRef();
+ NodeAddr<T> TAP;
+
+ // References from the def stack that have been examined so far.
+ RegisterSet Defs;
+
+ for (auto I = DS.top(), E = DS.bottom(); I != E; I.down()) {
+ RegisterRef QR = I->Addr->getRegRef();
+ auto AliasQR = [QR,this] (RegisterRef RR) -> bool {
+ return RAI.alias(QR, RR);
+ };
+ bool PrecUp = RAI.covers(QR, RR);
+ // Skip all defs that are aliased to any of the defs that we have already
+ // seen. If we encounter a covering def, stop the stack traversal early.
+ if (std::any_of(Defs.begin(), Defs.end(), AliasQR)) {
+ if (PrecUp)
+ break;
+ continue;
+ }
+ // The reaching def.
+ NodeAddr<DefNode*> RDA = *I;
+
+ // Pick the reached node.
+ if (TAP.Id == 0) {
+ TAP = TA;
+ } else {
+ // Mark the existing ref as "shadow" and create a new shadow.
+ TAP.Addr->setFlags(TAP.Addr->getFlags() | NodeAttrs::Shadow);
+ TAP = getNextShadow(IA, TAP, true);
+ }
+
+ // Create the link.
+ TAP.Addr->linkToDef(TAP.Id, RDA);
+
+ if (PrecUp)
+ break;
+ Defs.insert(QR);
+ }
+}
+
+// Create data-flow links for all reference nodes in the statement node SA.
+void DataFlowGraph::linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA) {
+ RegisterSet Defs;
+
+ // Link all nodes (upwards in the data-flow) with their reaching defs.
+ for (NodeAddr<RefNode*> RA : SA.Addr->members(*this)) {
+ uint16_t Kind = RA.Addr->getKind();
+ assert(Kind == NodeAttrs::Def || Kind == NodeAttrs::Use);
+ RegisterRef RR = RA.Addr->getRegRef();
+ // Do not process multiple defs of the same reference.
+ if (Kind == NodeAttrs::Def && Defs.count(RR))
+ continue;
+ Defs.insert(RR);
+
+ auto F = DefM.find(RR);
+ if (F == DefM.end())
+ continue;
+ DefStack &DS = F->second;
+ if (Kind == NodeAttrs::Use)
+ linkRefUp<UseNode*>(SA, RA, DS);
+ else if (Kind == NodeAttrs::Def)
+ linkRefUp<DefNode*>(SA, RA, DS);
+ else
+ llvm_unreachable("Unexpected node in instruction");
+ }
+}
+
+// Create data-flow links for all instructions in the block node BA. This
+// will include updating any phi nodes in BA.
+void DataFlowGraph::linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA) {
+ // Push block delimiters.
+ markBlock(BA.Id, DefM);
+
+ // For each non-phi instruction in the block, link all the defs and uses
+ // to their reaching defs. For any member of the block (including phis),
+ // push the defs on the corresponding stacks.
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(*this)) {
+ // Ignore phi nodes here. They will be linked part by part from the
+ // predecessors.
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ linkStmtRefs(DefM, IA);
+
+ // Push the definitions on the stack.
+ pushDefs(IA, DefM);
+ }
+
+ // Recursively process all children in the dominator tree.
+ MachineDomTreeNode *N = MDT.getNode(BA.Addr->getCode());
+ for (auto I : *N) {
+ MachineBasicBlock *SB = I->getBlock();
+ auto SBA = Func.Addr->findBlock(SB, *this);
+ linkBlockRefs(DefM, SBA);
+ }
+
+ // Link the phi uses from the successor blocks.
+ auto IsUseForBA = [BA](NodeAddr<NodeBase*> NA) -> bool {
+ if (NA.Addr->getKind() != NodeAttrs::Use)
+ return false;
+ assert(NA.Addr->getFlags() & NodeAttrs::PhiRef);
+ NodeAddr<PhiUseNode*> PUA = NA;
+ return PUA.Addr->getPredecessor() == BA.Id;
+ };
+ MachineBasicBlock *MBB = BA.Addr->getCode();
+ for (auto SB : MBB->successors()) {
+ auto SBA = Func.Addr->findBlock(SB, *this);
+ for (NodeAddr<InstrNode*> IA : SBA.Addr->members_if(IsPhi, *this)) {
+ // Go over each phi use associated with MBB, and link it.
+ for (auto U : IA.Addr->members_if(IsUseForBA, *this)) {
+ NodeAddr<PhiUseNode*> PUA = U;
+ RegisterRef RR = PUA.Addr->getRegRef();
+ linkRefUp<UseNode*>(IA, PUA, DefM[RR]);
+ }
+ }
+ }
+
+ // Pop all defs from this block from the definition stacks.
+ releaseBlock(BA.Id, DefM);
+}
+
+// Remove the use node UA from any data-flow and structural links.
+void DataFlowGraph::unlinkUse(NodeAddr<UseNode*> UA) {
+ NodeId RD = UA.Addr->getReachingDef();
+ NodeId Sib = UA.Addr->getSibling();
+
+ NodeAddr<InstrNode*> IA = UA.Addr->getOwner(*this);
+ IA.Addr->removeMember(UA, *this);
+
+ if (RD == 0) {
+ assert(Sib == 0);
+ return;
+ }
+
+ auto RDA = addr<DefNode*>(RD);
+ auto TA = addr<UseNode*>(RDA.Addr->getReachedUse());
+ if (TA.Id == UA.Id) {
+ RDA.Addr->setReachedUse(Sib);
+ return;
+ }
+
+ while (TA.Id != 0) {
+ NodeId S = TA.Addr->getSibling();
+ if (S == UA.Id) {
+ TA.Addr->setSibling(UA.Addr->getSibling());
+ return;
+ }
+ TA = addr<UseNode*>(S);
+ }
+}
+
+// Remove the def node DA from any data-flow and structural links.
+void DataFlowGraph::unlinkDef(NodeAddr<DefNode*> DA) {
+ //
+ // RD
+ // | reached
+ // | def
+ // :
+ // .
+ // +----+
+ // ... -- | DA | -- ... -- 0 : sibling chain of DA
+ // +----+
+ // | | reached
+ // | : def
+ // | .
+ // | ... : Siblings (defs)
+ // |
+ // : reached
+ // . use
+ // ... : sibling chain of reached uses
+
+ NodeId RD = DA.Addr->getReachingDef();
+
+ // Visit all siblings of the reached def and reset their reaching defs.
+ // Also, defs reached by DA are now "promoted" to being reached by RD,
+ // so all of them will need to be spliced into the sibling chain where
+ // DA belongs.
+ auto getAllNodes = [this] (NodeId N) -> NodeList {
+ NodeList Res;
+ while (N) {
+ auto RA = addr<RefNode*>(N);
+ // Keep the nodes in the exact sibling order.
+ Res.push_back(RA);
+ N = RA.Addr->getSibling();
+ }
+ return Res;
+ };
+ NodeList ReachedDefs = getAllNodes(DA.Addr->getReachedDef());
+ NodeList ReachedUses = getAllNodes(DA.Addr->getReachedUse());
+
+ if (RD == 0) {
+ for (NodeAddr<RefNode*> I : ReachedDefs)
+ I.Addr->setSibling(0);
+ for (NodeAddr<RefNode*> I : ReachedUses)
+ I.Addr->setSibling(0);
+ }
+ for (NodeAddr<DefNode*> I : ReachedDefs)
+ I.Addr->setReachingDef(RD);
+ for (NodeAddr<UseNode*> I : ReachedUses)
+ I.Addr->setReachingDef(RD);
+
+ NodeId Sib = DA.Addr->getSibling();
+ if (RD == 0) {
+ assert(Sib == 0);
+ return;
+ }
+
+ // Update the reaching def node and remove DA from the sibling list.
+ auto RDA = addr<DefNode*>(RD);
+ auto TA = addr<DefNode*>(RDA.Addr->getReachedDef());
+ if (TA.Id == DA.Id) {
+ // If DA is the first reached def, just update the RD's reached def
+ // to the DA's sibling.
+ RDA.Addr->setReachedDef(Sib);
+ } else {
+ // Otherwise, traverse the sibling list of the reached defs and remove
+ // DA from it.
+ while (TA.Id != 0) {
+ NodeId S = TA.Addr->getSibling();
+ if (S == DA.Id) {
+ TA.Addr->setSibling(Sib);
+ break;
+ }
+ TA = addr<DefNode*>(S);
+ }
+ }
+
+ // Splice the DA's reached defs into the RDA's reached def chain.
+ if (!ReachedDefs.empty()) {
+ auto Last = NodeAddr<DefNode*>(ReachedDefs.back());
+ Last.Addr->setSibling(RDA.Addr->getReachedDef());
+ RDA.Addr->setReachedDef(ReachedDefs.front().Id);
+ }
+ // Splice the DA's reached uses into the RDA's reached use chain.
+ if (!ReachedUses.empty()) {
+ auto Last = NodeAddr<UseNode*>(ReachedUses.back());
+ Last.Addr->setSibling(RDA.Addr->getReachedUse());
+ RDA.Addr->setReachedUse(ReachedUses.front().Id);
+ }
+
+ NodeAddr<InstrNode*> IA = DA.Addr->getOwner(*this);
+ IA.Addr->removeMember(DA, *this);
+}
diff --git a/lib/Target/Hexagon/RDFGraph.h b/lib/Target/Hexagon/RDFGraph.h
new file mode 100644
index 000000000000..7da7bb5973cf
--- /dev/null
+++ b/lib/Target/Hexagon/RDFGraph.h
@@ -0,0 +1,841 @@
+//===--- RDFGraph.h -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Target-independent, SSA-based data flow graph for register data flow (RDF)
+// for a non-SSA program representation (e.g. post-RA machine code).
+//
+//
+// *** Introduction
+//
+// The RDF graph is a collection of nodes, each of which denotes some element
+// of the program. There are two main types of such elements: code and refe-
+// rences. Conceptually, "code" is something that represents the structure
+// of the program, e.g. basic block or a statement, while "reference" is an
+// instance of accessing a register, e.g. a definition or a use. Nodes are
+// connected with each other based on the structure of the program (such as
+// blocks, instructions, etc.), and based on the data flow (e.g. reaching
+// definitions, reached uses, etc.). The single-reaching-definition principle
+// of SSA is generally observed, although, due to the non-SSA representation
+// of the program, there are some differences between the graph and a "pure"
+// SSA representation.
+//
+//
+// *** Implementation remarks
+//
+// Since the graph can contain a large number of nodes, memory consumption
+// was one of the major design considerations. As a result, there is a single
+// base class NodeBase which defines all members used by all possible derived
+// classes. The members are arranged in a union, and a derived class cannot
+// add any data members of its own. Each derived class only defines the
+// functional interface, i.e. member functions. NodeBase must be a POD,
+// which implies that all of its members must also be PODs.
+// Since nodes need to be connected with other nodes, pointers have been
+// replaced with 32-bit identifiers: each node has an id of type NodeId.
+// There are mapping functions in the graph that translate between actual
+// memory addresses and the corresponding identifiers.
+// A node id of 0 is equivalent to nullptr.
+//
+//
+// *** Structure of the graph
+//
+// A code node is always a collection of other nodes. For example, a code
+// node corresponding to a basic block will contain code nodes corresponding
+// to instructions. In turn, a code node corresponding to an instruction will
+// contain a list of reference nodes that correspond to the definitions and
+// uses of registers in that instruction. The members are arranged into a
+// circular list, which is yet another consequence of the effort to save
+// memory: for each member node it should be possible to obtain its owner,
+// and it should be possible to access all other members. There are other
+// ways to accomplish that, but the circular list seemed the most natural.
+//
+// +- CodeNode -+
+// | | <---------------------------------------------------+
+// +-+--------+-+ |
+// |FirstM |LastM |
+// | +-------------------------------------+ |
+// | | |
+// V V |
+// +----------+ Next +----------+ Next Next +----------+ Next |
+// | |----->| |-----> ... ----->| |----->-+
+// +- Member -+ +- Member -+ +- Member -+
+//
+// The order of members is such that related reference nodes (see below)
+// should be contiguous on the member list.
+//
+// A reference node is a node that encapsulates an access to a register,
+// in other words, data flowing into or out of a register. There are two
+// major kinds of reference nodes: defs and uses. A def node will contain
+// the id of the first reached use, and the id of the first reached def.
+// Each def and use will contain the id of the reaching def, and also the
+// id of the next reached def (for def nodes) or use (for use nodes).
+// The "next node sharing the same reaching def" is denoted as "sibling".
+// In summary:
+// - Def node contains: reaching def, sibling, first reached def, and first
+// reached use.
+// - Use node contains: reaching def and sibling.
+//
+// +-- DefNode --+
+// | R2 = ... | <---+--------------------+
+// ++---------+--+ | |
+// |Reached |Reached | |
+// |Def |Use | |
+// | | |Reaching |Reaching
+// | V |Def |Def
+// | +-- UseNode --+ Sib +-- UseNode --+ Sib Sib
+// | | ... = R2 |----->| ... = R2 |----> ... ----> 0
+// | +-------------+ +-------------+
+// V
+// +-- DefNode --+ Sib
+// | R2 = ... |----> ...
+// ++---------+--+
+// | |
+// | |
+// ... ...
+//
+// To get a full picture, the circular lists connecting blocks within a
+// function, instructions within a block, etc. should be superimposed with
+// the def-def, def-use links shown above.
+// To illustrate this, consider a small example in a pseudo-assembly:
+// foo:
+// add r2, r0, r1 ; r2 = r0+r1
+// addi r0, r2, 1 ; r0 = r2+1
+// ret r0 ; return value in r0
+//
+// The graph (in a format used by the debugging functions) would look like:
+//
+// DFG dump:[
+// f1: Function foo
+// b2: === BB#0 === preds(0), succs(0):
+// p3: phi [d4<r0>(,d12,u9):]
+// p5: phi [d6<r1>(,,u10):]
+// s7: add [d8<r2>(,,u13):, u9<r0>(d4):, u10<r1>(d6):]
+// s11: addi [d12<r0>(d4,,u15):, u13<r2>(d8):]
+// s14: ret [u15<r0>(d12):]
+// ]
+//
+// The f1, b2, p3, etc. are node ids. The letter is prepended to indicate the
+// kind of the node (i.e. f - function, b - basic block, p - phi, s - state-
+// ment, d - def, u - use).
+// The format of a def node is:
+// dN<R>(rd,d,u):sib,
+// where
+// N - numeric node id,
+// R - register being defined
+// rd - reaching def,
+// d - reached def,
+// u - reached use,
+// sib - sibling.
+// The format of a use node is:
+// uN<R>[!](rd):sib,
+// where
+// N - numeric node id,
+// R - register being used,
+// rd - reaching def,
+// sib - sibling.
+// Possible annotations (usually preceding the node id):
+// + - preserving def,
+// ~ - clobbering def,
+// " - shadow ref (follows the node id),
+// ! - fixed register (appears after register name).
+//
+// The circular lists are not explicit in the dump.
+//
+//
+// *** Node attributes
+//
+// NodeBase has a member "Attrs", which is the primary way of determining
+// the node's characteristics. The fields in this member decide whether
+// the node is a code node or a reference node (i.e. node's "type"), then
+// within each type, the "kind" determines what specifically this node
+// represents. The remaining bits, "flags", contain additional information
+// that is even more detailed than the "kind".
+// CodeNode's kinds are:
+// - Phi: Phi node, members are reference nodes.
+// - Stmt: Statement, members are reference nodes.
+// - Block: Basic block, members are instruction nodes (i.e. Phi or Stmt).
+// - Func: The whole function. The members are basic block nodes.
+// RefNode's kinds are:
+// - Use.
+// - Def.
+//
+// Meaning of flags:
+// - Preserving: applies only to defs. A preserving def is one that can
+// preserve some of the original bits among those that are included in
+// the register associated with that def. For example, if R0 is a 32-bit
+// register, but a def can only change the lower 16 bits, then it will
+// be marked as preserving.
+// - Shadow: a reference that has duplicates holding additional reaching
+// defs (see more below).
+// - Clobbering: applied only to defs, indicates that the value generated
+// by this def is unspecified. A typical example would be volatile registers
+// after function calls.
+//
+//
+// *** Shadow references
+//
+// It may happen that a super-register can have two (or more) non-overlapping
+// sub-registers. When both of these sub-registers are defined and followed
+// by a use of the super-register, the use of the super-register will not
+// have a unique reaching def: both defs of the sub-registers need to be
+// accounted for. In such cases, a duplicate use of the super-register is
+// added and it points to the extra reaching def. Both uses are marked with
+// a flag "shadow". Example:
+// Assume t0 is a super-register of r0 and r1, r0 and r1 do not overlap:
+// set r0, 1 ; r0 = 1
+// set r1, 1 ; r1 = 1
+// addi t1, t0, 1 ; t1 = t0+1
+//
+// The DFG:
+// s1: set [d2<r0>(,,u9):]
+// s3: set [d4<r1>(,,u10):]
+// s5: addi [d6<t1>(,,):, u7"<t0>(d2):, u8"<t0>(d4):]
+//
+// The statement s5 has two use nodes for t0: u7" and u9". The quotation
+// mark " indicates that the node is a shadow.
+//
+#ifndef RDF_GRAPH_H
+#define RDF_GRAPH_H
+
+#include "llvm/ADT/BitVector.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <functional>
+#include <map>
+#include <set>
+#include <vector>
+
+using namespace llvm;
+
+namespace llvm {
+ class MachineBasicBlock;
+ class MachineFunction;
+ class MachineInstr;
+ class MachineOperand;
+ class MachineDominanceFrontier;
+ class MachineDominatorTree;
+ class TargetInstrInfo;
+ class TargetRegisterInfo;
+}
+
+namespace rdf {
+ typedef uint32_t NodeId;
+
+ struct NodeAttrs {
+ enum : uint16_t {
+ None = 0x0000, // Nothing
+
+ // Types: 2 bits
+ TypeMask = 0x0003,
+ Code = 0x0001, // 01, Container
+ Ref = 0x0002, // 10, Reference
+
+ // Kind: 3 bits
+ KindMask = 0x0007 << 2,
+ Def = 0x0001 << 2, // 001
+ Use = 0x0002 << 2, // 010
+ Phi = 0x0003 << 2, // 011
+ Stmt = 0x0004 << 2, // 100
+ Block = 0x0005 << 2, // 101
+ Func = 0x0006 << 2, // 110
+
+ // Flags: 5 bits for now
+ FlagMask = 0x001F << 5,
+ Shadow = 0x0001 << 5, // 00001, Has extra reaching defs.
+ Clobbering = 0x0002 << 5, // 00010, Produces unspecified values.
+ PhiRef = 0x0004 << 5, // 00100, Member of PhiNode.
+ Preserving = 0x0008 << 5, // 01000, Def can keep original bits.
+ Fixed = 0x0010 << 5, // 10000, Fixed register.
+ };
+
+ static uint16_t type(uint16_t T) { return T & TypeMask; }
+ static uint16_t kind(uint16_t T) { return T & KindMask; }
+ static uint16_t flags(uint16_t T) { return T & FlagMask; }
+
+ static uint16_t set_type(uint16_t A, uint16_t T) {
+ return (A & ~TypeMask) | T;
+ }
+ static uint16_t set_kind(uint16_t A, uint16_t K) {
+ return (A & ~KindMask) | K;
+ }
+ static uint16_t set_flags(uint16_t A, uint16_t F) {
+ return (A & ~FlagMask) | F;
+ }
+
+ // Test if A contains B.
+ static bool contains(uint16_t A, uint16_t B) {
+ if (type(A) != Code)
+ return false;
+ uint16_t KB = kind(B);
+ switch (kind(A)) {
+ case Func:
+ return KB == Block;
+ case Block:
+ return KB == Phi || KB == Stmt;
+ case Phi:
+ case Stmt:
+ return type(B) == Ref;
+ }
+ return false;
+ }
+ };
+
+ template <typename T> struct NodeAddr {
+ NodeAddr() : Addr(nullptr), Id(0) {}
+ NodeAddr(T A, NodeId I) : Addr(A), Id(I) {}
+ NodeAddr(const NodeAddr&) = default;
+ NodeAddr &operator= (const NodeAddr&) = default;
+
+ bool operator== (const NodeAddr<T> &NA) const {
+ assert((Addr == NA.Addr) == (Id == NA.Id));
+ return Addr == NA.Addr;
+ }
+ bool operator!= (const NodeAddr<T> &NA) const {
+ return !operator==(NA);
+ }
+ // Type cast (casting constructor). The reason for having this class
+ // instead of std::pair.
+ template <typename S> NodeAddr(const NodeAddr<S> &NA)
+ : Addr(static_cast<T>(NA.Addr)), Id(NA.Id) {}
+
+ T Addr;
+ NodeId Id;
+ };
+
+ struct NodeBase;
+
+ // Fast memory allocation and translation between node id and node address.
+ // This is really the same idea as the one underlying the "bump pointer
+ // allocator", the difference being in the translation. A node id is
+ // composed of two components: the index of the block in which it was
+ // allocated, and the index within the block. With the default settings,
+ // where the number of nodes per block is 4096, the node id (minus 1) is:
+ //
+ // bit position: 11 0
+ // +----------------------------+--------------+
+ // | Index of the block |Index in block|
+ // +----------------------------+--------------+
+ //
+ // The actual node id is the above plus 1, to avoid creating a node id of 0.
+ //
+ // This method significantly improved the build time, compared to using maps
+ // (std::unordered_map or DenseMap) to translate between pointers and ids.
+ struct NodeAllocator {
+ // Amount of storage for a single node.
+ enum { NodeMemSize = 32 };
+ NodeAllocator(uint32_t NPB = 4096)
+ : NodesPerBlock(NPB), BitsPerIndex(Log2_32(NPB)),
+ IndexMask((1 << BitsPerIndex)-1), ActiveEnd(nullptr) {
+ assert(isPowerOf2_32(NPB));
+ }
+ NodeBase *ptr(NodeId N) const {
+ uint32_t N1 = N-1;
+ uint32_t BlockN = N1 >> BitsPerIndex;
+ uint32_t Offset = (N1 & IndexMask) * NodeMemSize;
+ return reinterpret_cast<NodeBase*>(Blocks[BlockN]+Offset);
+ }
+ NodeId id(const NodeBase *P) const;
+ NodeAddr<NodeBase*> New();
+ void clear();
+
+ private:
+ void startNewBlock();
+ bool needNewBlock();
+ uint32_t makeId(uint32_t Block, uint32_t Index) const {
+ // Add 1 to the id, to avoid the id of 0, which is treated as "null".
+ return ((Block << BitsPerIndex) | Index) + 1;
+ }
+
+ const uint32_t NodesPerBlock;
+ const uint32_t BitsPerIndex;
+ const uint32_t IndexMask;
+ char *ActiveEnd;
+ std::vector<char*> Blocks;
+ typedef BumpPtrAllocatorImpl<MallocAllocator, 65536> AllocatorTy;
+ AllocatorTy MemPool;
+ };
+
+ struct RegisterRef {
+ unsigned Reg, Sub;
+
+ // No non-trivial constructors, since this will be a member of a union.
+ RegisterRef() = default;
+ RegisterRef(const RegisterRef &RR) = default;
+ RegisterRef &operator= (const RegisterRef &RR) = default;
+ bool operator== (const RegisterRef &RR) const {
+ return Reg == RR.Reg && Sub == RR.Sub;
+ }
+ bool operator!= (const RegisterRef &RR) const {
+ return !operator==(RR);
+ }
+ bool operator< (const RegisterRef &RR) const {
+ return Reg < RR.Reg || (Reg == RR.Reg && Sub < RR.Sub);
+ }
+ };
+ typedef std::set<RegisterRef> RegisterSet;
+
+ struct RegisterAliasInfo {
+ RegisterAliasInfo(const TargetRegisterInfo &tri) : TRI(tri) {}
+ virtual ~RegisterAliasInfo() {}
+
+ virtual std::vector<RegisterRef> getAliasSet(RegisterRef RR) const;
+ virtual bool alias(RegisterRef RA, RegisterRef RB) const;
+ virtual bool covers(RegisterRef RA, RegisterRef RB) const;
+ virtual bool covers(const RegisterSet &RRs, RegisterRef RR) const;
+
+ const TargetRegisterInfo &TRI;
+ };
+
+ struct TargetOperandInfo {
+ TargetOperandInfo(const TargetInstrInfo &tii) : TII(tii) {}
+ virtual ~TargetOperandInfo() {}
+ virtual bool isPreserving(const MachineInstr &In, unsigned OpNum) const;
+ virtual bool isClobbering(const MachineInstr &In, unsigned OpNum) const;
+ virtual bool isFixedReg(const MachineInstr &In, unsigned OpNum) const;
+
+ const TargetInstrInfo &TII;
+ };
+
+
+ struct DataFlowGraph;
+
+ struct NodeBase {
+ public:
+ // Make sure this is a POD.
+ NodeBase() = default;
+ uint16_t getType() const { return NodeAttrs::type(Attrs); }
+ uint16_t getKind() const { return NodeAttrs::kind(Attrs); }
+ uint16_t getFlags() const { return NodeAttrs::flags(Attrs); }
+ NodeId getNext() const { return Next; }
+
+ uint16_t getAttrs() const { return Attrs; }
+ void setAttrs(uint16_t A) { Attrs = A; }
+ void setFlags(uint16_t F) { setAttrs(NodeAttrs::set_flags(getAttrs(), F)); }
+
+ // Insert node NA after "this" in the circular chain.
+ void append(NodeAddr<NodeBase*> NA);
+ // Initialize all members to 0.
+ void init() { memset(this, 0, sizeof *this); }
+ void setNext(NodeId N) { Next = N; }
+
+ protected:
+ uint16_t Attrs;
+ uint16_t Reserved;
+ NodeId Next; // Id of the next node in the circular chain.
+ // Definitions of nested types. Using anonymous nested structs would make
+ // this class definition clearer, but unnamed structs are not a part of
+ // the standard.
+ struct Def_struct {
+ NodeId DD, DU; // Ids of the first reached def and use.
+ };
+ struct PhiU_struct {
+ NodeId PredB; // Id of the predecessor block for a phi use.
+ };
+ struct Code_struct {
+ void *CP; // Pointer to the actual code.
+ NodeId FirstM, LastM; // Id of the first member and last.
+ };
+ struct Ref_struct {
+ NodeId RD, Sib; // Ids of the reaching def and the sibling.
+ union {
+ Def_struct Def;
+ PhiU_struct PhiU;
+ };
+ union {
+ MachineOperand *Op; // Non-phi refs point to a machine operand.
+ RegisterRef RR; // Phi refs store register info directly.
+ };
+ };
+
+ // The actual payload.
+ union {
+ Ref_struct Ref;
+ Code_struct Code;
+ };
+ };
+ // The allocator allocates chunks of 32 bytes for each node. The fact that
+ // each node takes 32 bytes in memory is used for fast translation between
+ // the node id and the node address.
+ static_assert(sizeof(NodeBase) <= NodeAllocator::NodeMemSize,
+ "NodeBase must be at most NodeAllocator::NodeMemSize bytes");
+
+ typedef std::vector<NodeAddr<NodeBase*>> NodeList;
+ typedef std::set<NodeId> NodeSet;
+
+ struct RefNode : public NodeBase {
+ RefNode() = default;
+ RegisterRef getRegRef() const;
+ MachineOperand &getOp() {
+ assert(!(getFlags() & NodeAttrs::PhiRef));
+ return *Ref.Op;
+ }
+ void setRegRef(RegisterRef RR);
+ void setRegRef(MachineOperand *Op);
+ NodeId getReachingDef() const {
+ return Ref.RD;
+ }
+ void setReachingDef(NodeId RD) {
+ Ref.RD = RD;
+ }
+ NodeId getSibling() const {
+ return Ref.Sib;
+ }
+ void setSibling(NodeId Sib) {
+ Ref.Sib = Sib;
+ }
+ bool isUse() const {
+ assert(getType() == NodeAttrs::Ref);
+ return getKind() == NodeAttrs::Use;
+ }
+ bool isDef() const {
+ assert(getType() == NodeAttrs::Ref);
+ return getKind() == NodeAttrs::Def;
+ }
+
+ template <typename Predicate>
+ NodeAddr<RefNode*> getNextRef(RegisterRef RR, Predicate P, bool NextOnly,
+ const DataFlowGraph &G);
+ NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
+ };
+
+ struct DefNode : public RefNode {
+ NodeId getReachedDef() const {
+ return Ref.Def.DD;
+ }
+ void setReachedDef(NodeId D) {
+ Ref.Def.DD = D;
+ }
+ NodeId getReachedUse() const {
+ return Ref.Def.DU;
+ }
+ void setReachedUse(NodeId U) {
+ Ref.Def.DU = U;
+ }
+
+ void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
+ };
+
+ struct UseNode : public RefNode {
+ void linkToDef(NodeId Self, NodeAddr<DefNode*> DA);
+ };
+
+ struct PhiUseNode : public UseNode {
+ NodeId getPredecessor() const {
+ assert(getFlags() & NodeAttrs::PhiRef);
+ return Ref.PhiU.PredB;
+ }
+ void setPredecessor(NodeId B) {
+ assert(getFlags() & NodeAttrs::PhiRef);
+ Ref.PhiU.PredB = B;
+ }
+ };
+
+ struct CodeNode : public NodeBase {
+ template <typename T> T getCode() const {
+ return static_cast<T>(Code.CP);
+ }
+ void setCode(void *C) {
+ Code.CP = C;
+ }
+
+ NodeAddr<NodeBase*> getFirstMember(const DataFlowGraph &G) const;
+ NodeAddr<NodeBase*> getLastMember(const DataFlowGraph &G) const;
+ void addMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
+ void addMemberAfter(NodeAddr<NodeBase*> MA, NodeAddr<NodeBase*> NA,
+ const DataFlowGraph &G);
+ void removeMember(NodeAddr<NodeBase*> NA, const DataFlowGraph &G);
+
+ NodeList members(const DataFlowGraph &G) const;
+ template <typename Predicate>
+ NodeList members_if(Predicate P, const DataFlowGraph &G) const;
+ };
+
+ struct InstrNode : public CodeNode {
+ NodeAddr<NodeBase*> getOwner(const DataFlowGraph &G);
+ };
+
+ struct PhiNode : public InstrNode {
+ MachineInstr *getCode() const {
+ return nullptr;
+ }
+ };
+
+ struct StmtNode : public InstrNode {
+ MachineInstr *getCode() const {
+ return CodeNode::getCode<MachineInstr*>();
+ }
+ };
+
+ struct BlockNode : public CodeNode {
+ MachineBasicBlock *getCode() const {
+ return CodeNode::getCode<MachineBasicBlock*>();
+ }
+ void addPhi(NodeAddr<PhiNode*> PA, const DataFlowGraph &G);
+ };
+
+ struct FuncNode : public CodeNode {
+ MachineFunction *getCode() const {
+ return CodeNode::getCode<MachineFunction*>();
+ }
+ NodeAddr<BlockNode*> findBlock(const MachineBasicBlock *BB,
+ const DataFlowGraph &G) const;
+ NodeAddr<BlockNode*> getEntryBlock(const DataFlowGraph &G);
+ };
+
+ struct DataFlowGraph {
+ DataFlowGraph(MachineFunction &mf, const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri, const MachineDominatorTree &mdt,
+ const MachineDominanceFrontier &mdf, const RegisterAliasInfo &rai,
+ const TargetOperandInfo &toi);
+
+ NodeBase *ptr(NodeId N) const;
+ template <typename T> T ptr(NodeId N) const {
+ return static_cast<T>(ptr(N));
+ }
+ NodeId id(const NodeBase *P) const;
+
+ template <typename T> NodeAddr<T> addr(NodeId N) const {
+ return { ptr<T>(N), N };
+ }
+
+ NodeAddr<FuncNode*> getFunc() const {
+ return Func;
+ }
+ MachineFunction &getMF() const {
+ return MF;
+ }
+ const TargetInstrInfo &getTII() const {
+ return TII;
+ }
+ const TargetRegisterInfo &getTRI() const {
+ return TRI;
+ }
+ const MachineDominatorTree &getDT() const {
+ return MDT;
+ }
+ const MachineDominanceFrontier &getDF() const {
+ return MDF;
+ }
+ const RegisterAliasInfo &getRAI() const {
+ return RAI;
+ }
+
+ struct DefStack {
+ DefStack() = default;
+ bool empty() const { return Stack.empty() || top() == bottom(); }
+ private:
+ typedef NodeAddr<DefNode*> value_type;
+ struct Iterator {
+ typedef DefStack::value_type value_type;
+ Iterator &up() { Pos = DS.nextUp(Pos); return *this; }
+ Iterator &down() { Pos = DS.nextDown(Pos); return *this; }
+ value_type operator*() const {
+ assert(Pos >= 1);
+ return DS.Stack[Pos-1];
+ }
+ const value_type *operator->() const {
+ assert(Pos >= 1);
+ return &DS.Stack[Pos-1];
+ }
+ bool operator==(const Iterator &It) const { return Pos == It.Pos; }
+ bool operator!=(const Iterator &It) const { return Pos != It.Pos; }
+ private:
+ Iterator(const DefStack &S, bool Top);
+ // Pos-1 is the index in the StorageType object that corresponds to
+ // the top of the DefStack.
+ const DefStack &DS;
+ unsigned Pos;
+ friend struct DefStack;
+ };
+ public:
+ typedef Iterator iterator;
+ iterator top() const { return Iterator(*this, true); }
+ iterator bottom() const { return Iterator(*this, false); }
+ unsigned size() const;
+
+ void push(NodeAddr<DefNode*> DA) { Stack.push_back(DA); }
+ void pop();
+ void start_block(NodeId N);
+ void clear_block(NodeId N);
+ private:
+ friend struct Iterator;
+ typedef std::vector<value_type> StorageType;
+ bool isDelimiter(const StorageType::value_type &P, NodeId N = 0) const {
+ return (P.Addr == nullptr) && (N == 0 || P.Id == N);
+ }
+ unsigned nextUp(unsigned P) const;
+ unsigned nextDown(unsigned P) const;
+ StorageType Stack;
+ };
+
+ typedef std::map<RegisterRef,DefStack> DefStackMap;
+
+ void build();
+ void pushDefs(NodeAddr<InstrNode*> IA, DefStackMap &DM);
+ void markBlock(NodeId B, DefStackMap &DefM);
+ void releaseBlock(NodeId B, DefStackMap &DefM);
+
+ NodeAddr<RefNode*> getNextRelated(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+ NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA, bool Create);
+ NodeAddr<RefNode*> getNextImp(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+ NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA, bool Create);
+ NodeAddr<RefNode*> getNextShadow(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+
+ NodeList getRelatedRefs(NodeAddr<InstrNode*> IA,
+ NodeAddr<RefNode*> RA) const;
+
+ void unlinkUse(NodeAddr<UseNode*> UA);
+ void unlinkDef(NodeAddr<DefNode*> DA);
+
+ // Some useful filters.
+ template <uint16_t Kind>
+ static bool IsRef(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == Kind;
+ }
+ template <uint16_t Kind>
+ static bool IsCode(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Code &&
+ BA.Addr->getKind() == Kind;
+ }
+ static bool IsDef(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == NodeAttrs::Def;
+ }
+ static bool IsUse(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Ref &&
+ BA.Addr->getKind() == NodeAttrs::Use;
+ }
+ static bool IsPhi(const NodeAddr<NodeBase*> BA) {
+ return BA.Addr->getType() == NodeAttrs::Code &&
+ BA.Addr->getKind() == NodeAttrs::Phi;
+ }
+
+ private:
+ void reset();
+
+ NodeAddr<NodeBase*> newNode(uint16_t Attrs);
+ NodeAddr<NodeBase*> cloneNode(const NodeAddr<NodeBase*> B);
+ NodeAddr<UseNode*> newUse(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+ NodeAddr<PhiUseNode*> newPhiUse(NodeAddr<PhiNode*> Owner,
+ RegisterRef RR, NodeAddr<BlockNode*> PredB,
+ uint16_t Flags = NodeAttrs::PhiRef);
+ NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
+ MachineOperand &Op, uint16_t Flags = NodeAttrs::None);
+ NodeAddr<DefNode*> newDef(NodeAddr<InstrNode*> Owner,
+ RegisterRef RR, uint16_t Flags = NodeAttrs::PhiRef);
+ NodeAddr<PhiNode*> newPhi(NodeAddr<BlockNode*> Owner);
+ NodeAddr<StmtNode*> newStmt(NodeAddr<BlockNode*> Owner,
+ MachineInstr *MI);
+ NodeAddr<BlockNode*> newBlock(NodeAddr<FuncNode*> Owner,
+ MachineBasicBlock *BB);
+ NodeAddr<FuncNode*> newFunc(MachineFunction *MF);
+
+ template <typename Predicate>
+ std::pair<NodeAddr<RefNode*>,NodeAddr<RefNode*>>
+ locateNextRef(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+ Predicate P) const;
+
+ typedef std::map<NodeId,RegisterSet> BlockRefsMap;
+
+ void buildStmt(NodeAddr<BlockNode*> BA, MachineInstr &In);
+ void buildBlockRefs(NodeAddr<BlockNode*> BA, BlockRefsMap &RefM);
+ void recordDefsForDF(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+ NodeAddr<BlockNode*> BA);
+ void buildPhis(BlockRefsMap &PhiM, BlockRefsMap &RefM,
+ NodeAddr<BlockNode*> BA);
+ void removeUnusedPhis();
+
+ template <typename T> void linkRefUp(NodeAddr<InstrNode*> IA,
+ NodeAddr<T> TA, DefStack &DS);
+ void linkStmtRefs(DefStackMap &DefM, NodeAddr<StmtNode*> SA);
+ void linkBlockRefs(DefStackMap &DefM, NodeAddr<BlockNode*> BA);
+
+ TimerGroup TimeG;
+ NodeAddr<FuncNode*> Func;
+ NodeAllocator Memory;
+
+ MachineFunction &MF;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ const RegisterAliasInfo &RAI;
+ const TargetOperandInfo &TOI;
+ }; // struct DataFlowGraph
+
+ template <typename Predicate>
+ NodeAddr<RefNode*> RefNode::getNextRef(RegisterRef RR, Predicate P,
+ bool NextOnly, const DataFlowGraph &G) {
+ // Get the "Next" reference in the circular list that references RR and
+ // satisfies predicate "Pred".
+ auto NA = G.addr<NodeBase*>(getNext());
+
+ while (NA.Addr != this) {
+ if (NA.Addr->getType() == NodeAttrs::Ref) {
+ NodeAddr<RefNode*> RA = NA;
+ if (RA.Addr->getRegRef() == RR && P(NA))
+ return NA;
+ if (NextOnly)
+ break;
+ NA = G.addr<NodeBase*>(NA.Addr->getNext());
+ } else {
+ // We've hit the beginning of the chain.
+ assert(NA.Addr->getType() == NodeAttrs::Code);
+ NodeAddr<CodeNode*> CA = NA;
+ NA = CA.Addr->getFirstMember(G);
+ }
+ }
+ // Return the equivalent of "nullptr" if such a node was not found.
+ return NodeAddr<RefNode*>();
+ }
+
+ template <typename Predicate>
+ NodeList CodeNode::members_if(Predicate P, const DataFlowGraph &G) const {
+ NodeList MM;
+ auto M = getFirstMember(G);
+ if (M.Id == 0)
+ return MM;
+
+ while (M.Addr != this) {
+ if (P(M))
+ MM.push_back(M);
+ M = G.addr<NodeBase*>(M.Addr->getNext());
+ }
+ return MM;
+ }
+
+
+ template <typename T> struct Print;
+ template <typename T>
+ raw_ostream &operator<< (raw_ostream &OS, const Print<T> &P);
+
+ template <typename T>
+ struct Print {
+ Print(const T &x, const DataFlowGraph &g) : Obj(x), G(g) {}
+ const T &Obj;
+ const DataFlowGraph &G;
+ };
+
+ template <typename T>
+ struct PrintNode : Print<NodeAddr<T>> {
+ PrintNode(const NodeAddr<T> &x, const DataFlowGraph &g)
+ : Print<NodeAddr<T>>(x, g) {}
+ };
+} // namespace rdf
+
+#endif // RDF_GRAPH_H
diff --git a/lib/Target/Hexagon/RDFLiveness.cpp b/lib/Target/Hexagon/RDFLiveness.cpp
new file mode 100644
index 000000000000..1d9bd372ff4e
--- /dev/null
+++ b/lib/Target/Hexagon/RDFLiveness.cpp
@@ -0,0 +1,848 @@
+//===--- RDFLiveness.cpp --------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Computation of the liveness information from the data-flow graph.
+//
+// The main functionality of this code is to compute block live-in
+// information. With the live-in information in place, the placement
+// of kill flags can also be recalculated.
+//
+// The block live-in calculation is based on the ideas from the following
+// publication:
+//
+// Dibyendu Das, Ramakrishna Upadrasta, Benoit Dupont de Dinechin.
+// "Efficient Liveness Computation Using Merge Sets and DJ-Graphs."
+// ACM Transactions on Architecture and Code Optimization, Association for
+// Computing Machinery, 2012, ACM TACO Special Issue on "High-Performance
+// and Embedded Architectures and Compilers", 8 (4),
+// <10.1145/2086696.2086706>. <hal-00647369>
+//
+#include "RDFGraph.h"
+#include "RDFLiveness.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineDominanceFrontier.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+using namespace rdf;
+
+namespace rdf {
+ template<>
+ raw_ostream &operator<< (raw_ostream &OS, const Print<Liveness::RefMap> &P) {
+ OS << '{';
+ for (auto I : P.Obj) {
+ OS << ' ' << Print<RegisterRef>(I.first, P.G) << '{';
+ for (auto J = I.second.begin(), E = I.second.end(); J != E; ) {
+ OS << Print<NodeId>(*J, P.G);
+ if (++J != E)
+ OS << ',';
+ }
+ OS << '}';
+ }
+ OS << " }";
+ return OS;
+ }
+}
+
+// The order in the returned sequence is the order of reaching defs in the
+// upward traversal: the first def is the closest to the given reference RefA,
+// the next one is further up, and so on.
+// The list ends at a reaching phi def, or when the reference from RefA is
+// covered by the defs in the list (see FullChain).
+// This function provides two modes of operation:
+// (1) Returning the sequence of reaching defs for a particular reference
+// node. This sequence will terminate at the first phi node [1].
+// (2) Returning a partial sequence of reaching defs, where the final goal
+// is to traverse past phi nodes to the actual defs arising from the code
+// itself.
+// In mode (2), the register reference for which the search was started
+// may be different from the reference node RefA, for which this call was
+// made, hence the argument RefRR, which holds the original register.
+// Also, some definitions may have already been encountered in a previous
+// call that will influence register covering. The register references
+// already defined are passed in through DefRRs.
+// In mode (1), the "continuation" considerations do not apply, and the
+// RefRR is the same as the register in RefA, and the set DefRRs is empty.
+//
+// [1] It is possible for multiple phi nodes to be included in the returned
+// sequence:
+// SubA = phi ...
+// SubB = phi ...
+// ... = SuperAB(rdef:SubA), SuperAB"(rdef:SubB)
+// However, these phi nodes are independent from one another in terms of
+// the data-flow.
+
+NodeList Liveness::getAllReachingDefs(RegisterRef RefRR,
+ NodeAddr<RefNode*> RefA, bool FullChain, const RegisterSet &DefRRs) {
+ SetVector<NodeId> DefQ;
+ SetVector<NodeId> Owners;
+
+ // The initial queue should not have reaching defs for shadows. The
+ // whole point of a shadow is that it will have a reaching def that
+ // is not aliased to the reaching defs of the related shadows.
+ NodeId Start = RefA.Id;
+ auto SNA = DFG.addr<RefNode*>(Start);
+ if (NodeId RD = SNA.Addr->getReachingDef())
+ DefQ.insert(RD);
+
+ // Collect all the reaching defs, going up until a phi node is encountered,
+ // or there are no more reaching defs. From this set, the actual set of
+ // reaching defs will be selected.
+ // The traversal upwards must go on until a covering def is encountered.
+ // It is possible that a collection of non-covering (individually) defs
+ // will be sufficient, but keep going until a covering one is found.
+ for (unsigned i = 0; i < DefQ.size(); ++i) {
+ auto TA = DFG.addr<DefNode*>(DefQ[i]);
+ if (TA.Addr->getFlags() & NodeAttrs::PhiRef)
+ continue;
+ // Stop at the covering/overwriting def of the initial register reference.
+ RegisterRef RR = TA.Addr->getRegRef();
+ if (RAI.covers(RR, RefRR)) {
+ uint16_t Flags = TA.Addr->getFlags();
+ if (!(Flags & NodeAttrs::Preserving))
+ continue;
+ }
+ // Get the next level of reaching defs. This will include multiple
+ // reaching defs for shadows.
+ for (auto S : DFG.getRelatedRefs(TA.Addr->getOwner(DFG), TA))
+ if (auto RD = NodeAddr<RefNode*>(S).Addr->getReachingDef())
+ DefQ.insert(RD);
+ }
+
+ // Remove all non-phi defs that are not aliased to RefRR, and collect
+ // the owners of the remaining defs.
+ SetVector<NodeId> Defs;
+ for (auto N : DefQ) {
+ auto TA = DFG.addr<DefNode*>(N);
+ bool IsPhi = TA.Addr->getFlags() & NodeAttrs::PhiRef;
+ if (!IsPhi && !RAI.alias(RefRR, TA.Addr->getRegRef()))
+ continue;
+ Defs.insert(TA.Id);
+ Owners.insert(TA.Addr->getOwner(DFG).Id);
+ }
+
+ // Return the MachineBasicBlock containing a given instruction.
+ auto Block = [this] (NodeAddr<InstrNode*> IA) -> MachineBasicBlock* {
+ if (IA.Addr->getKind() == NodeAttrs::Stmt)
+ return NodeAddr<StmtNode*>(IA).Addr->getCode()->getParent();
+ assert(IA.Addr->getKind() == NodeAttrs::Phi);
+ NodeAddr<PhiNode*> PA = IA;
+ NodeAddr<BlockNode*> BA = PA.Addr->getOwner(DFG);
+ return BA.Addr->getCode();
+ };
+ // Less(A,B) iff instruction A is further down in the dominator tree than B.
+ auto Less = [&Block,this] (NodeId A, NodeId B) -> bool {
+ if (A == B)
+ return false;
+ auto OA = DFG.addr<InstrNode*>(A), OB = DFG.addr<InstrNode*>(B);
+ MachineBasicBlock *BA = Block(OA), *BB = Block(OB);
+ if (BA != BB)
+ return MDT.dominates(BB, BA);
+ // They are in the same block.
+ bool StmtA = OA.Addr->getKind() == NodeAttrs::Stmt;
+ bool StmtB = OB.Addr->getKind() == NodeAttrs::Stmt;
+ if (StmtA) {
+ if (!StmtB) // OB is a phi and phis dominate statements.
+ return true;
+ auto CA = NodeAddr<StmtNode*>(OA).Addr->getCode();
+ auto CB = NodeAddr<StmtNode*>(OB).Addr->getCode();
+ // The order must be linear, so tie-break such equalities.
+ if (CA == CB)
+ return A < B;
+ return MDT.dominates(CB, CA);
+ } else {
+ // OA is a phi.
+ if (StmtB)
+ return false;
+ // Both are phis. There is no ordering between phis (in terms of
+ // the data-flow), so tie-break this via node id comparison.
+ return A < B;
+ }
+ };
+
+ std::vector<NodeId> Tmp(Owners.begin(), Owners.end());
+ std::sort(Tmp.begin(), Tmp.end(), Less);
+
+ // The vector is a list of instructions, so that defs coming from
+ // the same instruction don't need to be artificially ordered.
+ // Then, when computing the initial segment, and iterating over an
+ // instruction, pick the defs that contribute to the covering (i.e. is
+ // not covered by previously added defs). Check the defs individually,
+ // i.e. first check each def if is covered or not (without adding them
+ // to the tracking set), and then add all the selected ones.
+
+ // The reason for this is this example:
+ // *d1<A>, *d2<B>, ... Assume A and B are aliased (can happen in phi nodes).
+ // *d3<C> If A \incl BuC, and B \incl AuC, then *d2 would be
+ // covered if we added A first, and A would be covered
+ // if we added B first.
+
+ NodeList RDefs;
+ RegisterSet RRs = DefRRs;
+
+ auto DefInSet = [&Defs] (NodeAddr<RefNode*> TA) -> bool {
+ return TA.Addr->getKind() == NodeAttrs::Def &&
+ Defs.count(TA.Id);
+ };
+ for (auto T : Tmp) {
+ if (!FullChain && RAI.covers(RRs, RefRR))
+ break;
+ auto TA = DFG.addr<InstrNode*>(T);
+ bool IsPhi = DFG.IsCode<NodeAttrs::Phi>(TA);
+ NodeList Ds;
+ for (NodeAddr<DefNode*> DA : TA.Addr->members_if(DefInSet, DFG)) {
+ auto QR = DA.Addr->getRegRef();
+ // Add phi defs even if they are covered by subsequent defs. This is
+ // for cases where the reached use is not covered by any of the defs
+ // encountered so far: the phi def is needed to expose the liveness
+ // of that use to the entry of the block.
+ // Example:
+ // phi d1<R3>(,d2,), ... Phi def d1 is covered by d2.
+ // d2<R3>(d1,,u3), ...
+ // ..., u3<D1>(d2) This use needs to be live on entry.
+ if (FullChain || IsPhi || !RAI.covers(RRs, QR))
+ Ds.push_back(DA);
+ }
+ RDefs.insert(RDefs.end(), Ds.begin(), Ds.end());
+ for (NodeAddr<DefNode*> DA : Ds) {
+ // When collecting a full chain of definitions, do not consider phi
+ // defs to actually define a register.
+ uint16_t Flags = DA.Addr->getFlags();
+ if (!FullChain || !(Flags & NodeAttrs::PhiRef))
+ if (!(Flags & NodeAttrs::Preserving))
+ RRs.insert(DA.Addr->getRegRef());
+ }
+ }
+
+ return RDefs;
+}
+
+
+static const RegisterSet NoRegs;
+
+NodeList Liveness::getAllReachingDefs(NodeAddr<RefNode*> RefA) {
+ return getAllReachingDefs(RefA.Addr->getRegRef(), RefA, false, NoRegs);
+}
+
+
+void Liveness::computePhiInfo() {
+ NodeList Phis;
+ NodeAddr<FuncNode*> FA = DFG.getFunc();
+ auto Blocks = FA.Addr->members(DFG);
+ for (NodeAddr<BlockNode*> BA : Blocks) {
+ auto Ps = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
+ Phis.insert(Phis.end(), Ps.begin(), Ps.end());
+ }
+
+ // phi use -> (map: reaching phi -> set of registers defined in between)
+ std::map<NodeId,std::map<NodeId,RegisterSet>> PhiUp;
+ std::vector<NodeId> PhiUQ; // Work list of phis for upward propagation.
+
+ // Go over all phis.
+ for (NodeAddr<PhiNode*> PhiA : Phis) {
+ // Go over all defs and collect the reached uses that are non-phi uses
+ // (i.e. the "real uses").
+ auto &RealUses = RealUseMap[PhiA.Id];
+ auto PhiRefs = PhiA.Addr->members(DFG);
+
+ // Have a work queue of defs whose reached uses need to be found.
+ // For each def, add to the queue all reached (non-phi) defs.
+ SetVector<NodeId> DefQ;
+ NodeSet PhiDefs;
+ for (auto R : PhiRefs) {
+ if (!DFG.IsRef<NodeAttrs::Def>(R))
+ continue;
+ DefQ.insert(R.Id);
+ PhiDefs.insert(R.Id);
+ }
+ for (unsigned i = 0; i < DefQ.size(); ++i) {
+ NodeAddr<DefNode*> DA = DFG.addr<DefNode*>(DefQ[i]);
+ NodeId UN = DA.Addr->getReachedUse();
+ while (UN != 0) {
+ NodeAddr<UseNode*> A = DFG.addr<UseNode*>(UN);
+ if (!(A.Addr->getFlags() & NodeAttrs::PhiRef))
+ RealUses[getRestrictedRegRef(A)].insert(A.Id);
+ UN = A.Addr->getSibling();
+ }
+ NodeId DN = DA.Addr->getReachedDef();
+ while (DN != 0) {
+ NodeAddr<DefNode*> A = DFG.addr<DefNode*>(DN);
+ for (auto T : DFG.getRelatedRefs(A.Addr->getOwner(DFG), A)) {
+ uint16_t Flags = NodeAddr<DefNode*>(T).Addr->getFlags();
+ // Must traverse the reached-def chain. Consider:
+ // def(D0) -> def(R0) -> def(R0) -> use(D0)
+ // The reachable use of D0 passes through a def of R0.
+ if (!(Flags & NodeAttrs::PhiRef))
+ DefQ.insert(T.Id);
+ }
+ DN = A.Addr->getSibling();
+ }
+ }
+ // Filter out these uses that appear to be reachable, but really
+ // are not. For example:
+ //
+ // R1:0 = d1
+ // = R1:0 u2 Reached by d1.
+ // R0 = d3
+ // = R1:0 u4 Still reached by d1: indirectly through
+ // the def d3.
+ // R1 = d5
+ // = R1:0 u6 Not reached by d1 (covered collectively
+ // by d3 and d5), but following reached
+ // defs and uses from d1 will lead here.
+ auto HasDef = [&PhiDefs] (NodeAddr<DefNode*> DA) -> bool {
+ return PhiDefs.count(DA.Id);
+ };
+ for (auto UI = RealUses.begin(), UE = RealUses.end(); UI != UE; ) {
+ // For each reached register UI->first, there is a set UI->second, of
+ // uses of it. For each such use, check if it is reached by this phi,
+ // i.e. check if the set of its reaching uses intersects the set of
+ // this phi's defs.
+ auto &Uses = UI->second;
+ for (auto I = Uses.begin(), E = Uses.end(); I != E; ) {
+ auto UA = DFG.addr<UseNode*>(*I);
+ NodeList RDs = getAllReachingDefs(UI->first, UA);
+ if (std::any_of(RDs.begin(), RDs.end(), HasDef))
+ ++I;
+ else
+ I = Uses.erase(I);
+ }
+ if (Uses.empty())
+ UI = RealUses.erase(UI);
+ else
+ ++UI;
+ }
+
+ // If this phi reaches some "real" uses, add it to the queue for upward
+ // propagation.
+ if (!RealUses.empty())
+ PhiUQ.push_back(PhiA.Id);
+
+ // Go over all phi uses and check if the reaching def is another phi.
+ // Collect the phis that are among the reaching defs of these uses.
+ // While traversing the list of reaching defs for each phi use, collect
+ // the set of registers defined between this phi (Phi) and the owner phi
+ // of the reaching def.
+ for (auto I : PhiRefs) {
+ if (!DFG.IsRef<NodeAttrs::Use>(I))
+ continue;
+ NodeAddr<UseNode*> UA = I;
+ auto &UpMap = PhiUp[UA.Id];
+ RegisterSet DefRRs;
+ for (NodeAddr<DefNode*> DA : getAllReachingDefs(UA)) {
+ if (DA.Addr->getFlags() & NodeAttrs::PhiRef)
+ UpMap[DA.Addr->getOwner(DFG).Id] = DefRRs;
+ else
+ DefRRs.insert(DA.Addr->getRegRef());
+ }
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "Phi-up-to-phi map:\n";
+ for (auto I : PhiUp) {
+ dbgs() << "phi " << Print<NodeId>(I.first, DFG) << " -> {";
+ for (auto R : I.second)
+ dbgs() << ' ' << Print<NodeId>(R.first, DFG)
+ << Print<RegisterSet>(R.second, DFG);
+ dbgs() << " }\n";
+ }
+ }
+
+ // Propagate the reached registers up in the phi chain.
+ //
+ // The following type of situation needs careful handling:
+ //
+ // phi d1<R1:0> (1)
+ // |
+ // ... d2<R1>
+ // |
+ // phi u3<R1:0> (2)
+ // |
+ // ... u4<R1>
+ //
+ // The phi node (2) defines a register pair R1:0, and reaches a "real"
+ // use u4 of just R1. The same phi node is also known to reach (upwards)
+ // the phi node (1). However, the use u4 is not reached by phi (1),
+ // because of the intervening definition d2 of R1. The data flow between
+ // phis (1) and (2) is restricted to R1:0 minus R1, i.e. R0.
+ //
+ // When propagating uses up the phi chains, get the all reaching defs
+ // for a given phi use, and traverse the list until the propagated ref
+ // is covered, or until or until reaching the final phi. Only assume
+ // that the reference reaches the phi in the latter case.
+
+ for (unsigned i = 0; i < PhiUQ.size(); ++i) {
+ auto PA = DFG.addr<PhiNode*>(PhiUQ[i]);
+ auto &RealUses = RealUseMap[PA.Id];
+ for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+ NodeAddr<UseNode*> UA = U;
+ auto &UpPhis = PhiUp[UA.Id];
+ for (auto UP : UpPhis) {
+ bool Changed = false;
+ auto &MidDefs = UP.second;
+ // Collect the set UpReached of uses that are reached by the current
+ // phi PA, and are not covered by any intervening def between PA and
+ // the upward phi UP.
+ RegisterSet UpReached;
+ for (auto T : RealUses) {
+ if (!isRestricted(PA, UA, T.first))
+ continue;
+ if (!RAI.covers(MidDefs, T.first))
+ UpReached.insert(T.first);
+ }
+ if (UpReached.empty())
+ continue;
+ // Update the set PRUs of real uses reached by the upward phi UP with
+ // the actual set of uses (UpReached) that the UP phi reaches.
+ auto &PRUs = RealUseMap[UP.first];
+ for (auto R : UpReached) {
+ unsigned Z = PRUs[R].size();
+ PRUs[R].insert(RealUses[R].begin(), RealUses[R].end());
+ Changed |= (PRUs[R].size() != Z);
+ }
+ if (Changed)
+ PhiUQ.push_back(UP.first);
+ }
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "Real use map:\n";
+ for (auto I : RealUseMap) {
+ dbgs() << "phi " << Print<NodeId>(I.first, DFG);
+ NodeAddr<PhiNode*> PA = DFG.addr<PhiNode*>(I.first);
+ NodeList Ds = PA.Addr->members_if(DFG.IsRef<NodeAttrs::Def>, DFG);
+ if (!Ds.empty()) {
+ RegisterRef RR = NodeAddr<DefNode*>(Ds[0]).Addr->getRegRef();
+ dbgs() << '<' << Print<RegisterRef>(RR, DFG) << '>';
+ } else {
+ dbgs() << "<noreg>";
+ }
+ dbgs() << " -> " << Print<RefMap>(I.second, DFG) << '\n';
+ }
+ }
+}
+
+
+void Liveness::computeLiveIns() {
+ // Populate the node-to-block map. This speeds up the calculations
+ // significantly.
+ NBMap.clear();
+ for (NodeAddr<BlockNode*> BA : DFG.getFunc().Addr->members(DFG)) {
+ MachineBasicBlock *BB = BA.Addr->getCode();
+ for (NodeAddr<InstrNode*> IA : BA.Addr->members(DFG)) {
+ for (NodeAddr<RefNode*> RA : IA.Addr->members(DFG))
+ NBMap.insert(std::make_pair(RA.Id, BB));
+ NBMap.insert(std::make_pair(IA.Id, BB));
+ }
+ }
+
+ MachineFunction &MF = DFG.getMF();
+
+ // Compute IDF first, then the inverse.
+ decltype(IIDF) IDF;
+ for (auto &B : MF) {
+ auto F1 = MDF.find(&B);
+ if (F1 == MDF.end())
+ continue;
+ SetVector<MachineBasicBlock*> IDFB(F1->second.begin(), F1->second.end());
+ for (unsigned i = 0; i < IDFB.size(); ++i) {
+ auto F2 = MDF.find(IDFB[i]);
+ if (F2 != MDF.end())
+ IDFB.insert(F2->second.begin(), F2->second.end());
+ }
+ // Add B to the IDF(B). This will put B in the IIDF(B).
+ IDFB.insert(&B);
+ IDF[&B].insert(IDFB.begin(), IDFB.end());
+ }
+
+ for (auto I : IDF)
+ for (auto S : I.second)
+ IIDF[S].insert(I.first);
+
+ computePhiInfo();
+
+ NodeAddr<FuncNode*> FA = DFG.getFunc();
+ auto Blocks = FA.Addr->members(DFG);
+
+ // Build the phi live-on-entry map.
+ for (NodeAddr<BlockNode*> BA : Blocks) {
+ MachineBasicBlock *MB = BA.Addr->getCode();
+ auto &LON = PhiLON[MB];
+ for (auto P : BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG))
+ for (auto S : RealUseMap[P.Id])
+ LON[S.first].insert(S.second.begin(), S.second.end());
+ }
+
+ if (Trace) {
+ dbgs() << "Phi live-on-entry map:\n";
+ for (auto I : PhiLON)
+ dbgs() << "block #" << I.first->getNumber() << " -> "
+ << Print<RefMap>(I.second, DFG) << '\n';
+ }
+
+ // Build the phi live-on-exit map. Each phi node has some set of reached
+ // "real" uses. Propagate this set backwards into the block predecessors
+ // through the reaching defs of the corresponding phi uses.
+ for (NodeAddr<BlockNode*> BA : Blocks) {
+ auto Phis = BA.Addr->members_if(DFG.IsCode<NodeAttrs::Phi>, DFG);
+ for (NodeAddr<PhiNode*> PA : Phis) {
+ auto &RUs = RealUseMap[PA.Id];
+ if (RUs.empty())
+ continue;
+
+ for (auto U : PA.Addr->members_if(DFG.IsRef<NodeAttrs::Use>, DFG)) {
+ NodeAddr<PhiUseNode*> UA = U;
+ if (UA.Addr->getReachingDef() == 0)
+ continue;
+
+ // Mark all reached "real" uses of P as live on exit in the
+ // predecessor.
+ // Remap all the RUs so that they have a correct reaching def.
+ auto PrA = DFG.addr<BlockNode*>(UA.Addr->getPredecessor());
+ auto &LOX = PhiLOX[PrA.Addr->getCode()];
+ for (auto R : RUs) {
+ RegisterRef RR = R.first;
+ if (!isRestricted(PA, UA, RR))
+ RR = getRestrictedRegRef(UA);
+ // The restricted ref may be different from the ref that was
+ // accessed in the "real use". This means that this phi use
+ // is not the one that carries this reference, so skip it.
+ if (!RAI.alias(R.first, RR))
+ continue;
+ for (auto D : getAllReachingDefs(RR, UA))
+ LOX[RR].insert(D.Id);
+ }
+ } // for U : phi uses
+ } // for P : Phis
+ } // for B : Blocks
+
+ if (Trace) {
+ dbgs() << "Phi live-on-exit map:\n";
+ for (auto I : PhiLOX)
+ dbgs() << "block #" << I.first->getNumber() << " -> "
+ << Print<RefMap>(I.second, DFG) << '\n';
+ }
+
+ RefMap LiveIn;
+ traverse(&MF.front(), LiveIn);
+
+ // Add function live-ins to the live-in set of the function entry block.
+ auto &EntryIn = LiveMap[&MF.front()];
+ for (auto I = MRI.livein_begin(), E = MRI.livein_end(); I != E; ++I)
+ EntryIn.insert({I->first,0});
+
+ if (Trace) {
+ // Dump the liveness map
+ for (auto &B : MF) {
+ BitVector LV(TRI.getNumRegs());
+ for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
+ LV.set(I->PhysReg);
+ dbgs() << "BB#" << B.getNumber() << "\t rec = {";
+ for (int x = LV.find_first(); x >= 0; x = LV.find_next(x))
+ dbgs() << ' ' << Print<RegisterRef>({unsigned(x),0}, DFG);
+ dbgs() << " }\n";
+ dbgs() << "\tcomp = " << Print<RegisterSet>(LiveMap[&B], DFG) << '\n';
+ }
+ }
+}
+
+
+void Liveness::resetLiveIns() {
+ for (auto &B : DFG.getMF()) {
+ // Remove all live-ins.
+ std::vector<unsigned> T;
+ for (auto I = B.livein_begin(), E = B.livein_end(); I != E; ++I)
+ T.push_back(I->PhysReg);
+ for (auto I : T)
+ B.removeLiveIn(I);
+ // Add the newly computed live-ins.
+ auto &LiveIns = LiveMap[&B];
+ for (auto I : LiveIns) {
+ assert(I.Sub == 0);
+ B.addLiveIn(I.Reg);
+ }
+ }
+}
+
+
+void Liveness::resetKills() {
+ for (auto &B : DFG.getMF())
+ resetKills(&B);
+}
+
+
+void Liveness::resetKills(MachineBasicBlock *B) {
+ auto CopyLiveIns = [] (MachineBasicBlock *B, BitVector &LV) -> void {
+ for (auto I = B->livein_begin(), E = B->livein_end(); I != E; ++I)
+ LV.set(I->PhysReg);
+ };
+
+ BitVector LiveIn(TRI.getNumRegs()), Live(TRI.getNumRegs());
+ CopyLiveIns(B, LiveIn);
+ for (auto SI : B->successors())
+ CopyLiveIns(SI, Live);
+
+ for (auto I = B->rbegin(), E = B->rend(); I != E; ++I) {
+ MachineInstr *MI = &*I;
+ if (MI->isDebugValue())
+ continue;
+
+ MI->clearKillInfo();
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(R))
+ continue;
+ for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
+ Live.reset(*SR);
+ }
+ for (auto &Op : MI->operands()) {
+ if (!Op.isReg() || !Op.isUse())
+ continue;
+ unsigned R = Op.getReg();
+ if (!TargetRegisterInfo::isPhysicalRegister(R))
+ continue;
+ bool IsLive = false;
+ for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR) {
+ if (!Live[*SR])
+ continue;
+ IsLive = true;
+ break;
+ }
+ if (IsLive)
+ continue;
+ Op.setIsKill(true);
+ for (MCSubRegIterator SR(R, &TRI, true); SR.isValid(); ++SR)
+ Live.set(*SR);
+ }
+ }
+}
+
+
+// For shadows, determine if RR is aliased to a reaching def of any other
+// shadow associated with RA. If it is not, then RR is "restricted" to RA,
+// and so it can be considered a value specific to RA. This is important
+// for accurately determining values associated with phi uses.
+// For non-shadows, this function returns "true".
+bool Liveness::isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+ RegisterRef RR) const {
+ NodeId Start = RA.Id;
+ for (NodeAddr<RefNode*> TA = DFG.getNextShadow(IA, RA);
+ TA.Id != 0 && TA.Id != Start; TA = DFG.getNextShadow(IA, TA)) {
+ NodeId RD = TA.Addr->getReachingDef();
+ if (RD == 0)
+ continue;
+ if (RAI.alias(RR, DFG.addr<DefNode*>(RD).Addr->getRegRef()))
+ return false;
+ }
+ return true;
+}
+
+
+RegisterRef Liveness::getRestrictedRegRef(NodeAddr<RefNode*> RA) const {
+ assert(DFG.IsRef<NodeAttrs::Use>(RA));
+ if (RA.Addr->getFlags() & NodeAttrs::Shadow) {
+ NodeId RD = RA.Addr->getReachingDef();
+ assert(RD);
+ RA = DFG.addr<DefNode*>(RD);
+ }
+ return RA.Addr->getRegRef();
+}
+
+
+unsigned Liveness::getPhysReg(RegisterRef RR) const {
+ if (!TargetRegisterInfo::isPhysicalRegister(RR.Reg))
+ return 0;
+ return RR.Sub ? TRI.getSubReg(RR.Reg, RR.Sub) : RR.Reg;
+}
+
+
+// Helper function to obtain the basic block containing the reaching def
+// of the given use.
+MachineBasicBlock *Liveness::getBlockWithRef(NodeId RN) const {
+ auto F = NBMap.find(RN);
+ if (F != NBMap.end())
+ return F->second;
+ llvm_unreachable("Node id not in map");
+}
+
+
+void Liveness::traverse(MachineBasicBlock *B, RefMap &LiveIn) {
+ // The LiveIn map, for each (physical) register, contains the set of live
+ // reaching defs of that register that are live on entry to the associated
+ // block.
+
+ // The summary of the traversal algorithm:
+ //
+ // R is live-in in B, if there exists a U(R), such that rdef(R) dom B
+ // and (U \in IDF(B) or B dom U).
+ //
+ // for (C : children) {
+ // LU = {}
+ // traverse(C, LU)
+ // LiveUses += LU
+ // }
+ //
+ // LiveUses -= Defs(B);
+ // LiveUses += UpwardExposedUses(B);
+ // for (C : IIDF[B])
+ // for (U : LiveUses)
+ // if (Rdef(U) dom C)
+ // C.addLiveIn(U)
+ //
+
+ // Go up the dominator tree (depth-first).
+ MachineDomTreeNode *N = MDT.getNode(B);
+ for (auto I : *N) {
+ RefMap L;
+ MachineBasicBlock *SB = I->getBlock();
+ traverse(SB, L);
+
+ for (auto S : L)
+ LiveIn[S.first].insert(S.second.begin(), S.second.end());
+ }
+
+ if (Trace) {
+ dbgs() << LLVM_FUNCTION_NAME << " in BB#" << B->getNumber()
+ << " after recursion into";
+ for (auto I : *N)
+ dbgs() << ' ' << I->getBlock()->getNumber();
+ dbgs() << "\n LiveIn: " << Print<RefMap>(LiveIn, DFG);
+ dbgs() << "\n Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+ }
+
+ // Add phi uses that are live on exit from this block.
+ RefMap &PUs = PhiLOX[B];
+ for (auto S : PUs)
+ LiveIn[S.first].insert(S.second.begin(), S.second.end());
+
+ if (Trace) {
+ dbgs() << "after LOX\n";
+ dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+ }
+
+ // Stop tracking all uses defined in this block: erase those records
+ // where the reaching def is located in B and which cover all reached
+ // uses.
+ auto Copy = LiveIn;
+ LiveIn.clear();
+
+ for (auto I : Copy) {
+ auto &Defs = LiveIn[I.first];
+ NodeSet Rest;
+ for (auto R : I.second) {
+ auto DA = DFG.addr<DefNode*>(R);
+ RegisterRef DDR = DA.Addr->getRegRef();
+ NodeAddr<InstrNode*> IA = DA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ // Defs from a different block need to be preserved. Defs from this
+ // block will need to be processed further, except for phi defs, the
+ // liveness of which is handled through the PhiLON/PhiLOX maps.
+ if (B != BA.Addr->getCode())
+ Defs.insert(R);
+ else {
+ bool IsPreserving = DA.Addr->getFlags() & NodeAttrs::Preserving;
+ if (IA.Addr->getKind() != NodeAttrs::Phi && !IsPreserving) {
+ bool Covering = RAI.covers(DDR, I.first);
+ NodeId U = DA.Addr->getReachedUse();
+ while (U && Covering) {
+ auto DUA = DFG.addr<UseNode*>(U);
+ RegisterRef Q = DUA.Addr->getRegRef();
+ Covering = RAI.covers(DA.Addr->getRegRef(), Q);
+ U = DUA.Addr->getSibling();
+ }
+ if (!Covering)
+ Rest.insert(R);
+ }
+ }
+ }
+
+ // Non-covering defs from B.
+ for (auto R : Rest) {
+ auto DA = DFG.addr<DefNode*>(R);
+ RegisterRef DRR = DA.Addr->getRegRef();
+ RegisterSet RRs;
+ for (NodeAddr<DefNode*> TA : getAllReachingDefs(DA)) {
+ NodeAddr<InstrNode*> IA = TA.Addr->getOwner(DFG);
+ NodeAddr<BlockNode*> BA = IA.Addr->getOwner(DFG);
+ // Preserving defs do not count towards covering.
+ if (!(TA.Addr->getFlags() & NodeAttrs::Preserving))
+ RRs.insert(TA.Addr->getRegRef());
+ if (BA.Addr->getCode() == B)
+ continue;
+ if (RAI.covers(RRs, DRR))
+ break;
+ Defs.insert(TA.Id);
+ }
+ }
+ }
+
+ emptify(LiveIn);
+
+ if (Trace) {
+ dbgs() << "after defs in block\n";
+ dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+ }
+
+ // Scan the block for upward-exposed uses and add them to the tracking set.
+ for (auto I : DFG.getFunc().Addr->findBlock(B, DFG).Addr->members(DFG)) {
+ NodeAddr<InstrNode*> IA = I;
+ if (IA.Addr->getKind() != NodeAttrs::Stmt)
+ continue;
+ for (NodeAddr<UseNode*> UA : IA.Addr->members_if(DFG.IsUse, DFG)) {
+ RegisterRef RR = UA.Addr->getRegRef();
+ for (auto D : getAllReachingDefs(UA))
+ if (getBlockWithRef(D.Id) != B)
+ LiveIn[RR].insert(D.Id);
+ }
+ }
+
+ if (Trace) {
+ dbgs() << "after uses in block\n";
+ dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print<RegisterSet>(LiveMap[B], DFG) << '\n';
+ }
+
+ // Phi uses should not be propagated up the dominator tree, since they
+ // are not dominated by their corresponding reaching defs.
+ auto &Local = LiveMap[B];
+ auto &LON = PhiLON[B];
+ for (auto R : LON)
+ Local.insert(R.first);
+
+ if (Trace) {
+ dbgs() << "after phi uses in block\n";
+ dbgs() << " LiveIn: " << Print<RefMap>(LiveIn, DFG) << '\n';
+ dbgs() << " Local: " << Print<RegisterSet>(Local, DFG) << '\n';
+ }
+
+ for (auto C : IIDF[B]) {
+ auto &LiveC = LiveMap[C];
+ for (auto S : LiveIn)
+ for (auto R : S.second)
+ if (MDT.properlyDominates(getBlockWithRef(R), C))
+ LiveC.insert(S.first);
+ }
+}
+
+
+void Liveness::emptify(RefMap &M) {
+ for (auto I = M.begin(), E = M.end(); I != E; )
+ I = I->second.empty() ? M.erase(I) : std::next(I);
+}
+
diff --git a/lib/Target/Hexagon/RDFLiveness.h b/lib/Target/Hexagon/RDFLiveness.h
new file mode 100644
index 000000000000..4c1e8f3ee838
--- /dev/null
+++ b/lib/Target/Hexagon/RDFLiveness.h
@@ -0,0 +1,106 @@
+//===--- RDFLiveness.h ----------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Recalculate the liveness information given a data flow graph.
+// This includes block live-ins and kill flags.
+
+#ifndef RDF_LIVENESS_H
+#define RDF_LIVENESS_H
+
+#include "RDFGraph.h"
+#include "llvm/ADT/DenseMap.h"
+#include <map>
+
+using namespace llvm;
+
+namespace llvm {
+ class MachineBasicBlock;
+ class MachineFunction;
+ class MachineRegisterInfo;
+ class TargetRegisterInfo;
+ class MachineDominatorTree;
+ class MachineDominanceFrontier;
+}
+
+namespace rdf {
+ struct Liveness {
+ public:
+ typedef std::map<MachineBasicBlock*,RegisterSet> LiveMapType;
+ typedef std::map<RegisterRef,NodeSet> RefMap;
+
+ Liveness(MachineRegisterInfo &mri, const DataFlowGraph &g)
+ : DFG(g), TRI(g.getTRI()), MDT(g.getDT()), MDF(g.getDF()),
+ RAI(g.getRAI()), MRI(mri), Empty(), Trace(false) {}
+
+ NodeList getAllReachingDefs(RegisterRef RefRR, NodeAddr<RefNode*> RefA,
+ bool FullChain = false, const RegisterSet &DefRRs = RegisterSet());
+ NodeList getAllReachingDefs(NodeAddr<RefNode*> RefA);
+
+ LiveMapType &getLiveMap() { return LiveMap; }
+ const LiveMapType &getLiveMap() const { return LiveMap; }
+ const RefMap &getRealUses(NodeId P) const {
+ auto F = RealUseMap.find(P);
+ return F == RealUseMap.end() ? Empty : F->second;
+ }
+
+ void computePhiInfo();
+ void computeLiveIns();
+ void resetLiveIns();
+ void resetKills();
+ void resetKills(MachineBasicBlock *B);
+
+ void trace(bool T) { Trace = T; }
+
+ private:
+ const DataFlowGraph &DFG;
+ const TargetRegisterInfo &TRI;
+ const MachineDominatorTree &MDT;
+ const MachineDominanceFrontier &MDF;
+ const RegisterAliasInfo &RAI;
+ MachineRegisterInfo &MRI;
+ LiveMapType LiveMap;
+ const RefMap Empty;
+ bool Trace;
+
+ // Cache of mapping from node ids (for RefNodes) to the containing
+ // basic blocks. Not computing it each time for each node reduces
+ // the liveness calculation time by a large fraction.
+ typedef DenseMap<NodeId,MachineBasicBlock*> NodeBlockMap;
+ NodeBlockMap NBMap;
+
+ // Phi information:
+ //
+ // map: NodeId -> (map: RegisterRef -> NodeSet)
+ // phi id -> (map: register -> set of reached non-phi uses)
+ std::map<NodeId, RefMap> RealUseMap;
+
+ // Inverse iterated dominance frontier.
+ std::map<MachineBasicBlock*,std::set<MachineBasicBlock*>> IIDF;
+
+ // Live on entry.
+ std::map<MachineBasicBlock*,RefMap> PhiLON;
+
+ // Phi uses are considered to be located at the end of the block that
+ // they are associated with. The reaching def of a phi use dominates the
+ // block that the use corresponds to, but not the block that contains
+ // the phi itself. To include these uses in the liveness propagation (up
+ // the dominator tree), create a map: block -> set of uses live on exit.
+ std::map<MachineBasicBlock*,RefMap> PhiLOX;
+
+ bool isRestricted(NodeAddr<InstrNode*> IA, NodeAddr<RefNode*> RA,
+ RegisterRef RR) const;
+ RegisterRef getRestrictedRegRef(NodeAddr<RefNode*> RA) const;
+ unsigned getPhysReg(RegisterRef RR) const;
+ MachineBasicBlock *getBlockWithRef(NodeId RN) const;
+ void traverse(MachineBasicBlock *B, RefMap &LiveIn);
+ void emptify(RefMap &M);
+ };
+}
+
+#endif // RDF_LIVENESS_H
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 6756c1702f76..5680130b91b2 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -277,8 +277,6 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::SELECT, MVT::f32, Custom);
setOperationAction(ISD::SELECT, MVT::f64, Custom);
setOperationAction(ISD::SELECT, MVT::i32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
- setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
setOperationAction(ISD::SETCC, MVT::f32, Custom);
setOperationAction(ISD::SETCC, MVT::f64, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Custom);
@@ -327,6 +325,8 @@ MipsTargetLowering::MipsTargetLowering(const MipsTargetMachine &TM,
setOperationAction(ISD::BR_CC, MVT::i64, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i64, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
+ setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
setOperationAction(ISD::UINT_TO_FP, MVT::i64, Expand);
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
@@ -872,7 +872,6 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) const
case ISD::GlobalTLSAddress: return lowerGlobalTLSAddress(Op, DAG);
case ISD::JumpTable: return lowerJumpTable(Op, DAG);
case ISD::SELECT: return lowerSELECT(Op, DAG);
- case ISD::SELECT_CC: return lowerSELECT_CC(Op, DAG);
case ISD::SETCC: return lowerSETCC(Op, DAG);
case ISD::VASTART: return lowerVASTART(Op, DAG);
case ISD::VAARG: return lowerVAARG(Op, DAG);
@@ -1648,20 +1647,6 @@ lowerSELECT(SDValue Op, SelectionDAG &DAG) const
SDLoc(Op));
}
-SDValue MipsTargetLowering::
-lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const
-{
- SDLoc DL(Op);
- EVT Ty = Op.getOperand(0).getValueType();
- SDValue Cond =
- DAG.getNode(ISD::SETCC, DL, getSetCCResultType(DAG.getDataLayout(),
- *DAG.getContext(), Ty),
- Op.getOperand(0), Op.getOperand(1), Op.getOperand(4));
-
- return DAG.getNode(ISD::SELECT, DL, Op.getValueType(), Cond, Op.getOperand(2),
- Op.getOperand(3));
-}
-
SDValue MipsTargetLowering::lowerSETCC(SDValue Op, SelectionDAG &DAG) const {
assert(!Subtarget.hasMips32r6() && !Subtarget.hasMips64r6());
SDValue Cond = createFPCmp(DAG, Op);
diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h
index b33e125b81b7..0dc683e3df27 100644
--- a/lib/Target/Mips/MipsISelLowering.h
+++ b/lib/Target/Mips/MipsISelLowering.h
@@ -430,7 +430,6 @@ namespace llvm {
SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSELECT(SDValue Op, SelectionDAG &DAG) const;
- SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerSETCC(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const;
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index d9fb8c890739..ffda491f0c86 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -1003,7 +1003,7 @@ class IndirectBranch<string opstr, RegisterOperand RO> : JumpFR<opstr, RO> {
let isCall=1, hasDelaySlot=1, Defs = [RA] in {
class JumpLink<string opstr, DAGOperand opnd> :
InstSE<(outs), (ins opnd:$target), !strconcat(opstr, "\t$target"),
- [(MipsJmpLink imm:$target)], II_JAL, FrmJ, opstr> {
+ [(MipsJmpLink tglobaladdr:$target)], II_JAL, FrmJ, opstr> {
let DecoderMethod = "DecodeJumpTarget";
}
@@ -2075,8 +2075,6 @@ def : MipsPat<(MipsSync (i32 immz)),
(SYNC 0)>, ISA_MIPS2;
// Call
-def : MipsPat<(MipsJmpLink (i32 tglobaladdr:$dst)),
- (JAL tglobaladdr:$dst)>;
def : MipsPat<(MipsJmpLink (i32 texternalsym:$dst)),
(JAL texternalsym:$dst)>;
//def : MipsPat<(MipsJmpLink GPR32:$dst),
diff --git a/lib/Target/Mips/MipsSEInstrInfo.cpp b/lib/Target/Mips/MipsSEInstrInfo.cpp
index e6f7fe9aae1d..d4aeaf928655 100644
--- a/lib/Target/Mips/MipsSEInstrInfo.cpp
+++ b/lib/Target/Mips/MipsSEInstrInfo.cpp
@@ -544,8 +544,6 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB,
const MachineOperand &SrcLo = I->getOperand(1), &SrcHi = I->getOperand(2);
MachineInstrBuilder LoInst = BuildMI(MBB, I, DL, get(LoOpc));
MachineInstrBuilder HiInst = BuildMI(MBB, I, DL, get(HiOpc));
- LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill()));
- HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill()));
// Add lo/hi registers if the mtlo/hi instructions created have explicit
// def registers.
@@ -556,6 +554,9 @@ void MipsSEInstrInfo::expandPseudoMTLoHi(MachineBasicBlock &MBB,
LoInst.addReg(DstLo, RegState::Define);
HiInst.addReg(DstHi, RegState::Define);
}
+
+ LoInst.addReg(SrcLo.getReg(), getKillRegState(SrcLo.isKill()));
+ HiInst.addReg(SrcHi.getReg(), getKillRegState(SrcHi.isKill()));
}
void MipsSEInstrInfo::expandCvtFPInt(MachineBasicBlock &MBB,
diff --git a/lib/Target/NVPTX/NVPTXISelLowering.cpp b/lib/Target/NVPTX/NVPTXISelLowering.cpp
index 766369631e14..be735f6c1bce 100644
--- a/lib/Target/NVPTX/NVPTXISelLowering.cpp
+++ b/lib/Target/NVPTX/NVPTXISelLowering.cpp
@@ -4549,6 +4549,7 @@ NVPTXTargetObjectFile::~NVPTXTargetObjectFile() {
delete static_cast<NVPTXSection *>(DwarfLocSection);
delete static_cast<NVPTXSection *>(DwarfARangesSection);
delete static_cast<NVPTXSection *>(DwarfRangesSection);
+ delete static_cast<NVPTXSection *>(DwarfMacinfoSection);
}
MCSection *
diff --git a/lib/Target/NVPTX/NVPTXTargetObjectFile.h b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
index 0f88ddfaa934..683b9a3f49f7 100644
--- a/lib/Target/NVPTX/NVPTXTargetObjectFile.h
+++ b/lib/Target/NVPTX/NVPTXTargetObjectFile.h
@@ -41,6 +41,7 @@ public:
DwarfLocSection = nullptr;
DwarfARangesSection = nullptr;
DwarfRangesSection = nullptr;
+ DwarfMacinfoSection = nullptr;
}
virtual ~NVPTXTargetObjectFile();
@@ -81,6 +82,8 @@ public:
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
DwarfRangesSection =
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
+ DwarfMacinfoSection =
+ new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
}
MCSection *getSectionForConstant(const DataLayout &DL, SectionKind Kind,
diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 9a63c14b5053..ec354c209ca0 100644
--- a/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -1092,8 +1092,28 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
}
// ELFv2 ABI - Normal entry label.
- if (Subtarget->isELFv2ABI())
+ if (Subtarget->isELFv2ABI()) {
+ // In the Large code model, we allow arbitrary displacements between
+ // the text section and its associated TOC section. We place the
+ // full 8-byte offset to the TOC in memory immediatedly preceding
+ // the function global entry point.
+ if (TM.getCodeModel() == CodeModel::Large
+ && !MF->getRegInfo().use_empty(PPC::X2)) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
+
+ MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+ MCSymbol *GlobalEPSymbol = PPCFI->getGlobalEPSymbol();
+ const MCExpr *TOCDeltaExpr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
+ MCSymbolRefExpr::create(GlobalEPSymbol,
+ OutContext),
+ OutContext);
+
+ OutStreamer->EmitLabel(PPCFI->getTOCOffsetSymbol());
+ OutStreamer->EmitValue(TOCDeltaExpr, 8);
+ }
return AsmPrinter::EmitFunctionEntryLabel();
+ }
// Emit an official procedure descriptor.
MCSectionSubPair Current = OutStreamer->getCurrentSection();
@@ -1160,10 +1180,25 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
// thus emit a prefix sequence along the following lines:
//
// func:
+ // .Lfunc_gepNN:
+ // # global entry point
+ // addis r2,r12,(.TOC.-.Lfunc_gepNN)@ha
+ // addi r2,r2,(.TOC.-.Lfunc_gepNN)@l
+ // .Lfunc_lepNN:
+ // .localentry func, .Lfunc_lepNN-.Lfunc_gepNN
+ // # local entry point, followed by function body
+ //
+ // For the Large code model, we create
+ //
+ // .Lfunc_tocNN:
+ // .quad .TOC.-.Lfunc_gepNN # done by EmitFunctionEntryLabel
+ // func:
+ // .Lfunc_gepNN:
// # global entry point
- // addis r2,r12,(.TOC.-func)@ha
- // addi r2,r2,(.TOC.-func)@l
- // .localentry func, .-func
+ // ld r2,.Lfunc_tocNN-.Lfunc_gepNN(r12)
+ // add r2,r2,r12
+ // .Lfunc_lepNN:
+ // .localentry func, .Lfunc_lepNN-.Lfunc_gepNN
// # local entry point, followed by function body
//
// This ensures we have r2 set up correctly while executing the function
@@ -1171,32 +1206,49 @@ void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
if (Subtarget->isELFv2ABI()
// Only do all that if the function uses r2 in the first place.
&& !MF->getRegInfo().use_empty(PPC::X2)) {
+ const PPCFunctionInfo *PPCFI = MF->getInfo<PPCFunctionInfo>();
- MCSymbol *GlobalEntryLabel = OutContext.createTempSymbol();
+ MCSymbol *GlobalEntryLabel = PPCFI->getGlobalEPSymbol();
OutStreamer->EmitLabel(GlobalEntryLabel);
const MCSymbolRefExpr *GlobalEntryLabelExp =
MCSymbolRefExpr::create(GlobalEntryLabel, OutContext);
- MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
- const MCExpr *TOCDeltaExpr =
- MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
- GlobalEntryLabelExp, OutContext);
+ if (TM.getCodeModel() != CodeModel::Large) {
+ MCSymbol *TOCSymbol = OutContext.getOrCreateSymbol(StringRef(".TOC."));
+ const MCExpr *TOCDeltaExpr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCSymbol, OutContext),
+ GlobalEntryLabelExp, OutContext);
- const MCExpr *TOCDeltaHi =
- PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
- .addReg(PPC::X2)
- .addReg(PPC::X12)
- .addExpr(TOCDeltaHi));
-
- const MCExpr *TOCDeltaLo =
- PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
- EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
- .addReg(PPC::X2)
- .addReg(PPC::X2)
- .addExpr(TOCDeltaLo));
-
- MCSymbol *LocalEntryLabel = OutContext.createTempSymbol();
+ const MCExpr *TOCDeltaHi =
+ PPCMCExpr::createHa(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDIS)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12)
+ .addExpr(TOCDeltaHi));
+
+ const MCExpr *TOCDeltaLo =
+ PPCMCExpr::createLo(TOCDeltaExpr, false, OutContext);
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADDI)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addExpr(TOCDeltaLo));
+ } else {
+ MCSymbol *TOCOffset = PPCFI->getTOCOffsetSymbol();
+ const MCExpr *TOCOffsetDeltaExpr =
+ MCBinaryExpr::createSub(MCSymbolRefExpr::create(TOCOffset, OutContext),
+ GlobalEntryLabelExp, OutContext);
+
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::LD)
+ .addReg(PPC::X2)
+ .addExpr(TOCOffsetDeltaExpr)
+ .addReg(PPC::X12));
+ EmitToStreamer(*OutStreamer, MCInstBuilder(PPC::ADD8)
+ .addReg(PPC::X2)
+ .addReg(PPC::X2)
+ .addReg(PPC::X12));
+ }
+
+ MCSymbol *LocalEntryLabel = PPCFI->getLocalEPSymbol();
OutStreamer->EmitLabel(LocalEntryLabel);
const MCSymbolRefExpr *LocalEntryLabelExp =
MCSymbolRefExpr::create(LocalEntryLabel, OutContext);
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 075e093e41a1..79e4fe379c2d 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -299,22 +299,35 @@ def : Pat<(PPCtc_return CTRRC8:$dst, imm:$imm),
// 64-bit CR instructions
let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
let hasSideEffects = 0 in {
+// mtocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraDefRegAllocReq = 1 in {
def MTOCRF8: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins g8rc:$ST),
"mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that
+// is dependent on the cr fields being set.
def MTCRF8 : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, g8rc:$rS),
"mtcrf $FXM, $rS", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraDefRegAllocReq = 1
-let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+// mfocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraSrcRegAllocReq = 1 in {
def MFOCRF8: XFXForm_5a<31, 19, (outs g8rc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that
+// is dependent on the cr fields being copied.
def MFCR8 : XFXForm_3<31, 19, (outs g8rc:$rT), (ins),
"mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraSrcRegAllocReq = 1
} // hasSideEffects = 0
let hasSideEffects = 1, isBarrier = 1, usesCustomInserter = 1 in {
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index c17603a7718a..dcff6ad2486f 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -744,20 +744,43 @@ void PPCInstrInfo::insertSelect(MachineBasicBlock &MBB,
"isel is for regular integer GPRs only");
unsigned OpCode = Is64Bit ? PPC::ISEL8 : PPC::ISEL;
- unsigned SelectPred = Cond[0].getImm();
+ auto SelectPred = static_cast<PPC::Predicate>(Cond[0].getImm());
unsigned SubIdx;
bool SwapOps;
switch (SelectPred) {
- default: llvm_unreachable("invalid predicate for isel");
- case PPC::PRED_EQ: SubIdx = PPC::sub_eq; SwapOps = false; break;
- case PPC::PRED_NE: SubIdx = PPC::sub_eq; SwapOps = true; break;
- case PPC::PRED_LT: SubIdx = PPC::sub_lt; SwapOps = false; break;
- case PPC::PRED_GE: SubIdx = PPC::sub_lt; SwapOps = true; break;
- case PPC::PRED_GT: SubIdx = PPC::sub_gt; SwapOps = false; break;
- case PPC::PRED_LE: SubIdx = PPC::sub_gt; SwapOps = true; break;
- case PPC::PRED_UN: SubIdx = PPC::sub_un; SwapOps = false; break;
- case PPC::PRED_NU: SubIdx = PPC::sub_un; SwapOps = true; break;
+ case PPC::PRED_EQ:
+ case PPC::PRED_EQ_MINUS:
+ case PPC::PRED_EQ_PLUS:
+ SubIdx = PPC::sub_eq; SwapOps = false; break;
+ case PPC::PRED_NE:
+ case PPC::PRED_NE_MINUS:
+ case PPC::PRED_NE_PLUS:
+ SubIdx = PPC::sub_eq; SwapOps = true; break;
+ case PPC::PRED_LT:
+ case PPC::PRED_LT_MINUS:
+ case PPC::PRED_LT_PLUS:
+ SubIdx = PPC::sub_lt; SwapOps = false; break;
+ case PPC::PRED_GE:
+ case PPC::PRED_GE_MINUS:
+ case PPC::PRED_GE_PLUS:
+ SubIdx = PPC::sub_lt; SwapOps = true; break;
+ case PPC::PRED_GT:
+ case PPC::PRED_GT_MINUS:
+ case PPC::PRED_GT_PLUS:
+ SubIdx = PPC::sub_gt; SwapOps = false; break;
+ case PPC::PRED_LE:
+ case PPC::PRED_LE_MINUS:
+ case PPC::PRED_LE_PLUS:
+ SubIdx = PPC::sub_gt; SwapOps = true; break;
+ case PPC::PRED_UN:
+ case PPC::PRED_UN_MINUS:
+ case PPC::PRED_UN_PLUS:
+ SubIdx = PPC::sub_un; SwapOps = false; break;
+ case PPC::PRED_NU:
+ case PPC::PRED_NU_MINUS:
+ case PPC::PRED_NU_PLUS:
+ SubIdx = PPC::sub_un; SwapOps = true; break;
case PPC::PRED_BIT_SET: SubIdx = 0; SwapOps = false; break;
case PPC::PRED_BIT_UNSET: SubIdx = 0; SwapOps = true; break;
}
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 6c4364aad331..ce0f9e6f52a7 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -2299,22 +2299,35 @@ def RESTORE_VRSAVE : Pseudo<(outs VRSAVERC:$vrsave), (ins memri:$F),
"#RESTORE_VRSAVE", []>;
let hasSideEffects = 0 in {
+// mtocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraDefRegAllocReq = 1 in {
def MTOCRF: XFXForm_5a<31, 144, (outs crbitm:$FXM), (ins gprc:$ST),
"mtocrf $FXM, $ST", IIC_BrMCRX>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+// Similarly to mtocrf, the mask for mtcrf must be prepared in a way that
+// is dependent on the cr fields being set.
def MTCRF : XFXForm_5<31, 144, (outs), (ins i32imm:$FXM, gprc:$rS),
"mtcrf $FXM, $rS", IIC_BrMCRX>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraDefRegAllocReq = 1
-let hasExtraSrcRegAllocReq = 1 in // to enable post-ra anti-dep breaking.
+// mfocrf's input needs to be prepared by shifting by an amount dependent
+// on the cr register selected. Thus, post-ra anti-dep breaking must not
+// later change that register assignment.
+let hasExtraSrcRegAllocReq = 1 in {
def MFOCRF: XFXForm_5a<31, 19, (outs gprc:$rT), (ins crbitm:$FXM),
"mfocrf $rT, $FXM", IIC_SprMFCRF>,
PPC970_DGroup_First, PPC970_Unit_CRU;
+// Similarly to mfocrf, the mask for mfcrf must be prepared in a way that
+// is dependent on the cr fields being copied.
def MFCR : XFXForm_3<31, 19, (outs gprc:$rT), (ins),
"mfcr $rT", IIC_SprMFCR>,
PPC970_MicroCode, PPC970_Unit_CRU;
+} // hasExtraSrcRegAllocReq = 1
} // hasSideEffects = 0
// Pseudo instruction to perform FADD in round-to-zero mode.
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
index 95f163153c74..9d91e31165de 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.cpp
@@ -23,3 +23,24 @@ MCSymbol *PPCFunctionInfo::getPICOffsetSymbol() const {
Twine(MF.getFunctionNumber()) +
"$poff");
}
+
+MCSymbol *PPCFunctionInfo::getGlobalEPSymbol() const {
+ const DataLayout &DL = MF.getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "func_gep" +
+ Twine(MF.getFunctionNumber()));
+}
+
+MCSymbol *PPCFunctionInfo::getLocalEPSymbol() const {
+ const DataLayout &DL = MF.getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "func_lep" +
+ Twine(MF.getFunctionNumber()));
+}
+
+MCSymbol *PPCFunctionInfo::getTOCOffsetSymbol() const {
+ const DataLayout &DL = MF.getDataLayout();
+ return MF.getContext().getOrCreateSymbol(Twine(DL.getPrivateGlobalPrefix()) +
+ "func_toc" +
+ Twine(MF.getFunctionNumber()));
+}
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index 607cdf612eef..10a8ce068d40 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -197,6 +197,10 @@ public:
bool usesPICBase() const { return UsesPICBase; }
MCSymbol *getPICOffsetSymbol() const;
+
+ MCSymbol *getGlobalEPSymbol() const;
+ MCSymbol *getLocalEPSymbol() const;
+ MCSymbol *getTOCOffsetSymbol() const;
};
} // end of namespace llvm
diff --git a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
index 2dc0d825c80d..a9d2e888f4b7 100644
--- a/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
+++ b/lib/Target/PowerPC/PPCTLSDynamicCall.cpp
@@ -99,6 +99,11 @@ protected:
break;
}
+ // Don't really need to save data to the stack - the clobbered
+ // registers are already saved when the SDNode (e.g. PPCaddiTlsgdLAddr)
+ // gets translated to the pseudo instruction (e.g. ADDItlsgdLADDR).
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKDOWN)).addImm(0);
+
// Expand into two ops built prior to the existing instruction.
MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
.addReg(InReg);
@@ -113,6 +118,8 @@ protected:
.addReg(GPR3));
Call->addOperand(MI->getOperand(3));
+ BuildMI(MBB, I, DL, TII->get(PPC::ADJCALLSTACKUP)).addImm(0).addImm(0);
+
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
.addReg(GPR3);
diff --git a/lib/Target/Sparc/SparcInstrInfo.cpp b/lib/Target/Sparc/SparcInstrInfo.cpp
index 733027a5d2be..05006ac5772b 100644
--- a/lib/Target/Sparc/SparcInstrInfo.cpp
+++ b/lib/Target/Sparc/SparcInstrInfo.cpp
@@ -83,7 +83,6 @@ static bool IsIntegerCC(unsigned CC)
return (CC <= SPCC::ICC_VC);
}
-
static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
{
switch(CC) {
@@ -124,106 +123,103 @@ static SPCC::CondCodes GetOppositeBranchCondition(SPCC::CondCodes CC)
llvm_unreachable("Invalid cond code");
}
-bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
- MachineBasicBlock *&TBB,
- MachineBasicBlock *&FBB,
- SmallVectorImpl<MachineOperand> &Cond,
- bool AllowModify) const
-{
-
- MachineBasicBlock::iterator I = MBB.end();
- MachineBasicBlock::iterator UnCondBrIter = MBB.end();
- while (I != MBB.begin()) {
- --I;
+static bool isUncondBranchOpcode(int Opc) { return Opc == SP::BA; }
- if (I->isDebugValue())
- continue;
+static bool isCondBranchOpcode(int Opc) {
+ return Opc == SP::FBCOND || Opc == SP::BCOND;
+}
- // When we see a non-terminator, we are done.
- if (!isUnpredicatedTerminator(I))
- break;
+static bool isIndirectBranchOpcode(int Opc) {
+ return Opc == SP::BINDrr || Opc == SP::BINDri;
+}
- // Terminator is not a branch.
- if (!I->isBranch())
- return true;
+static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
+ SmallVectorImpl<MachineOperand> &Cond) {
+ Cond.push_back(MachineOperand::CreateImm(LastInst->getOperand(1).getImm()));
+ Target = LastInst->getOperand(0).getMBB();
+}
- // Handle Unconditional branches.
- if (I->getOpcode() == SP::BA) {
- UnCondBrIter = I;
+bool SparcInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
+ MachineBasicBlock *&TBB,
+ MachineBasicBlock *&FBB,
+ SmallVectorImpl<MachineOperand> &Cond,
+ bool AllowModify) const {
+ MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
+ if (I == MBB.end())
+ return false;
+
+ if (!isUnpredicatedTerminator(I))
+ return false;
+
+ // Get the last instruction in the block.
+ MachineInstr *LastInst = I;
+ unsigned LastOpc = LastInst->getOpcode();
+
+ // If there is only one terminator instruction, process it.
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ if (isUncondBranchOpcode(LastOpc)) {
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
+ if (isCondBranchOpcode(LastOpc)) {
+ // Block ends with fall-through condbranch.
+ parseCondBranch(LastInst, TBB, Cond);
+ return false;
+ }
+ return true; // Can't handle indirect branch.
+ }
- if (!AllowModify) {
- TBB = I->getOperand(0).getMBB();
- continue;
+ // Get the instruction before it if it is a terminator.
+ MachineInstr *SecondLastInst = I;
+ unsigned SecondLastOpc = SecondLastInst->getOpcode();
+
+ // If AllowModify is true and the block ends with two or more unconditional
+ // branches, delete all but the first unconditional branch.
+ if (AllowModify && isUncondBranchOpcode(LastOpc)) {
+ while (isUncondBranchOpcode(SecondLastOpc)) {
+ LastInst->eraseFromParent();
+ LastInst = SecondLastInst;
+ LastOpc = LastInst->getOpcode();
+ if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) {
+ // Return now the only terminator is an unconditional branch.
+ TBB = LastInst->getOperand(0).getMBB();
+ return false;
+ } else {
+ SecondLastInst = I;
+ SecondLastOpc = SecondLastInst->getOpcode();
}
+ }
+ }
- while (std::next(I) != MBB.end())
- std::next(I)->eraseFromParent();
-
- Cond.clear();
- FBB = nullptr;
+ // If there are three terminators, we don't know what sort of block this is.
+ if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
+ return true;
- if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
- TBB = nullptr;
- I->eraseFromParent();
- I = MBB.end();
- UnCondBrIter = MBB.end();
- continue;
- }
+ // If the block ends with a B and a Bcc, handle it.
+ if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ parseCondBranch(SecondLastInst, TBB, Cond);
+ FBB = LastInst->getOperand(0).getMBB();
+ return false;
+ }
- TBB = I->getOperand(0).getMBB();
- continue;
- }
+ // If the block ends with two unconditional branches, handle it. The second
+ // one is not executed.
+ if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ TBB = SecondLastInst->getOperand(0).getMBB();
+ return false;
+ }
- unsigned Opcode = I->getOpcode();
- if (Opcode != SP::BCOND && Opcode != SP::FBCOND)
- return true; // Unknown Opcode.
-
- SPCC::CondCodes BranchCode = (SPCC::CondCodes)I->getOperand(1).getImm();
-
- if (Cond.empty()) {
- MachineBasicBlock *TargetBB = I->getOperand(0).getMBB();
- if (AllowModify && UnCondBrIter != MBB.end() &&
- MBB.isLayoutSuccessor(TargetBB)) {
-
- // Transform the code
- //
- // brCC L1
- // ba L2
- // L1:
- // ..
- // L2:
- //
- // into
- //
- // brnCC L2
- // L1:
- // ...
- // L2:
- //
- BranchCode = GetOppositeBranchCondition(BranchCode);
- MachineBasicBlock::iterator OldInst = I;
- BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(Opcode))
- .addMBB(UnCondBrIter->getOperand(0).getMBB()).addImm(BranchCode);
- BuildMI(MBB, UnCondBrIter, MBB.findDebugLoc(I), get(SP::BA))
- .addMBB(TargetBB);
-
- OldInst->eraseFromParent();
- UnCondBrIter->eraseFromParent();
-
- UnCondBrIter = MBB.end();
- I = MBB.end();
- continue;
- }
- FBB = TBB;
- TBB = I->getOperand(0).getMBB();
- Cond.push_back(MachineOperand::CreateImm(BranchCode));
- continue;
- }
- // FIXME: Handle subsequent conditional branches.
- // For now, we can't handle multiple conditional branches.
+ // ...likewise if it ends with an indirect branch followed by an unconditional
+ // branch.
+ if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
return true;
}
- return false;
+
+ // Otherwise, can't handle this.
+ return true;
}
unsigned
@@ -277,6 +273,14 @@ unsigned SparcInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const
return Count;
}
+bool SparcInstrInfo::ReverseBranchCondition(
+ SmallVectorImpl<MachineOperand> &Cond) const {
+ assert(Cond.size() == 1);
+ SPCC::CondCodes CC = static_cast<SPCC::CondCodes>(Cond[0].getImm());
+ Cond[0].setImm(GetOppositeBranchCondition(CC));
+ return false;
+}
+
void SparcInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/Sparc/SparcInstrInfo.h b/lib/Target/Sparc/SparcInstrInfo.h
index 15673f134d80..9de624cc9582 100644
--- a/lib/Target/Sparc/SparcInstrInfo.h
+++ b/lib/Target/Sparc/SparcInstrInfo.h
@@ -76,6 +76,9 @@ public:
MachineBasicBlock *FBB, ArrayRef<MachineOperand> Cond,
DebugLoc DL) const override;
+ bool
+ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const override;
+
void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index b9f2eb5514a5..d5dabc2cd6ab 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -1219,6 +1219,9 @@ def PFDRL : PrefetchRILPC<"pfdrl", 0xC62, z_prefetch>;
// Atomic operations
//===----------------------------------------------------------------------===//
+// A serialization instruction that acts as a barrier for all memory
+// accesses, which expands to "bcr 14, 0".
+let hasSideEffects = 1 in
def Serialize : Alias<2, (outs), (ins), [(z_serialize)]>;
let Predicates = [FeatureInterlockedAccess1], Defs = [CC] in {
diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt
index 284a7d91bc62..e5c68e598479 100644
--- a/lib/Target/WebAssembly/CMakeLists.txt
+++ b/lib/Target/WebAssembly/CMakeLists.txt
@@ -39,6 +39,7 @@ add_llvm_target(WebAssemblyCodeGen
add_dependencies(LLVMWebAssemblyCodeGen intrinsics_gen)
+add_subdirectory(Disassembler)
add_subdirectory(InstPrinter)
-add_subdirectory(TargetInfo)
add_subdirectory(MCTargetDesc)
+add_subdirectory(TargetInfo)
diff --git a/lib/Target/WebAssembly/Disassembler/CMakeLists.txt b/lib/Target/WebAssembly/Disassembler/CMakeLists.txt
new file mode 100644
index 000000000000..5e55e2958aeb
--- /dev/null
+++ b/lib/Target/WebAssembly/Disassembler/CMakeLists.txt
@@ -0,0 +1,3 @@
+add_llvm_library(LLVMWebAssemblyDisassembler
+ WebAssemblyDisassembler.cpp
+ )
diff --git a/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt b/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt
new file mode 100644
index 000000000000..a452ca1acd04
--- /dev/null
+++ b/lib/Target/WebAssembly/Disassembler/LLVMBuild.txt
@@ -0,0 +1,23 @@
+;===-- ./lib/Target/WebAssembly/Disassembler/LLVMBuild.txt -----*- Conf -*--===;
+;
+; The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+; http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = WebAssemblyDisassembler
+parent = WebAssembly
+required_libraries = MCDisassembler WebAssemblyInfo Support
+add_to_library_groups = WebAssembly
diff --git a/lib/Target/WebAssembly/Disassembler/Makefile b/lib/Target/WebAssembly/Disassembler/Makefile
new file mode 100644
index 000000000000..bcd36ba6f01f
--- /dev/null
+++ b/lib/Target/WebAssembly/Disassembler/Makefile
@@ -0,0 +1,16 @@
+##===-- lib/Target/WebAssembly/Disassembler/Makefile -------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMWebAssemblyDisassembler
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
new file mode 100644
index 000000000000..0143b10c0ab1
--- /dev/null
+++ b/lib/Target/WebAssembly/Disassembler/WebAssemblyDisassembler.cpp
@@ -0,0 +1,148 @@
+//==- WebAssemblyDisassembler.cpp - Disassembler for WebAssembly -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file is part of the WebAssembly Disassembler.
+///
+/// It contains code to translate the data produced by the decoder into
+/// MCInsts.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssembly.h"
+#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/TargetRegistry.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "wasm-disassembler"
+
+namespace {
+class WebAssemblyDisassembler final : public MCDisassembler {
+ std::unique_ptr<const MCInstrInfo> MCII;
+
+ DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes, uint64_t Address,
+ raw_ostream &VStream,
+ raw_ostream &CStream) const override;
+
+public:
+ WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
+ std::unique_ptr<const MCInstrInfo> MCII)
+ : MCDisassembler(STI, Ctx), MCII(std::move(MCII)) {}
+};
+} // end anonymous namespace
+
+static MCDisassembler *createWebAssemblyDisassembler(const Target &T,
+ const MCSubtargetInfo &STI,
+ MCContext &Ctx) {
+ std::unique_ptr<const MCInstrInfo> MCII(T.createMCInstrInfo());
+ return new WebAssemblyDisassembler(STI, Ctx, std::move(MCII));
+}
+
+extern "C" void LLVMInitializeWebAssemblyDisassembler() {
+ // Register the disassembler for each target.
+ TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget32,
+ createWebAssemblyDisassembler);
+ TargetRegistry::RegisterMCDisassembler(TheWebAssemblyTarget64,
+ createWebAssemblyDisassembler);
+}
+
+MCDisassembler::DecodeStatus WebAssemblyDisassembler::getInstruction(
+ MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t /*Address*/,
+ raw_ostream &OS, raw_ostream &CS) const {
+ Size = 0;
+ uint64_t Pos = 0;
+
+ // Read the opcode.
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ uint64_t Opcode = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+
+ if (Opcode >= WebAssembly::INSTRUCTION_LIST_END)
+ return MCDisassembler::Fail;
+
+ MI.setOpcode(Opcode);
+ const MCInstrDesc &Desc = MCII->get(Opcode);
+ unsigned NumFixedOperands = Desc.NumOperands;
+
+ // If it's variadic, read the number of extra operands.
+ unsigned NumExtraOperands = 0;
+ if (Desc.isVariadic()) {
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ NumExtraOperands = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+ }
+
+ // Read the fixed operands. These are described by the MCInstrDesc.
+ for (unsigned i = 0; i < NumFixedOperands; ++i) {
+ const MCOperandInfo &Info = Desc.OpInfo[i];
+ switch (Info.OperandType) {
+ case MCOI::OPERAND_IMMEDIATE:
+ case WebAssembly::OPERAND_BASIC_BLOCK: {
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+ MI.addOperand(MCOperand::createImm(Imm));
+ break;
+ }
+ case MCOI::OPERAND_REGISTER: {
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+ MI.addOperand(MCOperand::createReg(Reg));
+ break;
+ }
+ case WebAssembly::OPERAND_FPIMM: {
+ // TODO: MC converts all floating point immediate operands to double.
+ // This is fine for numeric values, but may cause NaNs to change bits.
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ uint64_t Bits = support::endian::read64le(Bytes.data() + Pos);
+ Pos += sizeof(uint64_t);
+ double Imm;
+ memcpy(&Imm, &Bits, sizeof(Imm));
+ MI.addOperand(MCOperand::createFPImm(Imm));
+ break;
+ }
+ default:
+ llvm_unreachable("unimplemented operand kind");
+ }
+ }
+
+ // Read the extra operands.
+ assert(NumExtraOperands == 0 || Desc.isVariadic());
+ for (unsigned i = 0; i < NumExtraOperands; ++i) {
+ if (Pos + sizeof(uint64_t) > Bytes.size())
+ return MCDisassembler::Fail;
+ if (Desc.TSFlags & WebAssemblyII::VariableOpIsImmediate) {
+ // Decode extra immediate operands.
+ uint64_t Imm = support::endian::read64le(Bytes.data() + Pos);
+ MI.addOperand(MCOperand::createImm(Imm));
+ } else {
+ // Decode extra register operands.
+ uint64_t Reg = support::endian::read64le(Bytes.data() + Pos);
+ MI.addOperand(MCOperand::createReg(Reg));
+ }
+ Pos += sizeof(uint64_t);
+ }
+
+ Size = Pos;
+ return MCDisassembler::Success;
+}
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
index 7ce3a00ae360..9a95150cb557 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp
@@ -16,6 +16,8 @@
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
#include "WebAssembly.h"
#include "WebAssemblyMachineFunctionInfo.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
@@ -33,7 +35,7 @@ using namespace llvm;
WebAssemblyInstPrinter::WebAssemblyInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI)
- : MCInstPrinter(MAI, MII, MRI) {}
+ : MCInstPrinter(MAI, MII, MRI), ControlFlowCounter(0) {}
void WebAssemblyInstPrinter::printRegName(raw_ostream &OS,
unsigned RegNo) const {
@@ -59,6 +61,52 @@ void WebAssemblyInstPrinter::printInst(const MCInst *MI, raw_ostream &OS,
// Print any added annotation.
printAnnotation(OS, Annot);
+
+ if (CommentStream) {
+ // Observe any effects on the control flow stack, for use in annotating
+ // control flow label references.
+ switch (MI->getOpcode()) {
+ default:
+ break;
+ case WebAssembly::LOOP: {
+ // Grab the TopLabel value first so that labels print in numeric order.
+ uint64_t TopLabel = ControlFlowCounter++;
+ ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+ printAnnotation(OS, "label" + utostr(TopLabel) + ':');
+ ControlFlowStack.push_back(std::make_pair(TopLabel, true));
+ break;
+ }
+ case WebAssembly::BLOCK:
+ ControlFlowStack.push_back(std::make_pair(ControlFlowCounter++, false));
+ break;
+ case WebAssembly::END_LOOP:
+ ControlFlowStack.pop_back();
+ printAnnotation(
+ OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ break;
+ case WebAssembly::END_BLOCK:
+ printAnnotation(
+ OS, "label" + utostr(ControlFlowStack.pop_back_val().first) + ':');
+ break;
+ }
+
+ // Annotate any control flow label references.
+ unsigned NumFixedOperands = Desc.NumOperands;
+ SmallSet<uint64_t, 8> Printed;
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ const MCOperandInfo &Info = Desc.OpInfo[i];
+ if (!(i < NumFixedOperands
+ ? (Info.OperandType == WebAssembly::OPERAND_BASIC_BLOCK)
+ : (Desc.TSFlags & WebAssemblyII::VariableOpImmediateIsLabel)))
+ continue;
+ uint64_t Depth = MI->getOperand(i).getImm();
+ if (!Printed.insert(Depth).second)
+ continue;
+ const auto &Pair = ControlFlowStack.rbegin()[Depth];
+ printAnnotation(OS, utostr(Depth) + ": " + (Pair.second ? "up" : "down") +
+ " to label" + utostr(Pair.first));
+ }
+ }
}
static std::string toString(const APFloat &FP) {
@@ -82,6 +130,9 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isReg()) {
+ assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+ MII.get(MI->getOpcode()).TSFlags == 0) &&
+ "WebAssembly variable_ops register ops don't use TSFlags");
unsigned WAReg = Op.getReg();
if (int(WAReg) >= 0)
printRegName(O, WAReg);
@@ -95,19 +146,27 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
if (OpNo < MII.get(MI->getOpcode()).getNumDefs())
O << '=';
} else if (Op.isImm()) {
- switch (MI->getOpcode()) {
- case WebAssembly::PARAM:
- case WebAssembly::RESULT:
- case WebAssembly::LOCAL:
- O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm()));
- break;
- default:
- O << Op.getImm();
- break;
- }
- } else if (Op.isFPImm())
+ assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+ (MII.get(MI->getOpcode()).TSFlags &
+ WebAssemblyII::VariableOpIsImmediate)) &&
+ "WebAssemblyII::VariableOpIsImmediate should be set for "
+ "variable_ops immediate ops");
+ // TODO: (MII.get(MI->getOpcode()).TSFlags &
+ // WebAssemblyII::VariableOpImmediateIsLabel)
+ // can tell us whether this is an immediate referencing a label in the
+ // control flow stack, and it may be nice to pretty-print.
+ O << Op.getImm();
+ } else if (Op.isFPImm()) {
+ assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+ MII.get(MI->getOpcode()).TSFlags == 0) &&
+ "WebAssembly variable_ops floating point ops don't use TSFlags");
O << toString(APFloat(Op.getFPImm()));
- else {
+ } else {
+ assert((OpNo < MII.get(MI->getOpcode()).getNumOperands() ||
+ (MII.get(MI->getOpcode()).TSFlags &
+ WebAssemblyII::VariableOpIsImmediate)) &&
+ "WebAssemblyII::VariableOpIsImmediate should be set for "
+ "variable_ops expr ops");
assert(Op.isExpr() && "unknown operand kind in printOperand");
Op.getExpr()->print(O, &MAI);
}
diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
index 39a16f59fd78..cd6c59a41c33 100644
--- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
+++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h
@@ -23,6 +23,9 @@ namespace llvm {
class MCSubtargetInfo;
class WebAssemblyInstPrinter final : public MCInstPrinter {
+ uint64_t ControlFlowCounter;
+ SmallVector<std::pair<uint64_t, bool>, 0> ControlFlowStack;
+
public:
WebAssemblyInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI);
diff --git a/lib/Target/WebAssembly/LLVMBuild.txt b/lib/Target/WebAssembly/LLVMBuild.txt
index 9c4d6dcb35a3..82737ea3a878 100644
--- a/lib/Target/WebAssembly/LLVMBuild.txt
+++ b/lib/Target/WebAssembly/LLVMBuild.txt
@@ -16,13 +16,14 @@
;===------------------------------------------------------------------------===;
[common]
-subdirectories = InstPrinter MCTargetDesc TargetInfo
+subdirectories = Disassembler InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = WebAssembly
parent = Target
has_asmprinter = 1
+has_disassembler = 1
[component_1]
type = Library
diff --git a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
index c8d1d821861a..fd41df7b9635 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
+++ b/lib/Target/WebAssembly/MCTargetDesc/CMakeLists.txt
@@ -4,4 +4,5 @@ add_llvm_library(LLVMWebAssemblyDesc
WebAssemblyMCAsmInfo.cpp
WebAssemblyMCCodeEmitter.cpp
WebAssemblyMCTargetDesc.cpp
+ WebAssemblyTargetStreamer.cpp
)
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
index b158ccb46f99..bba06f65e169 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyAsmBackend.cpp
@@ -95,9 +95,6 @@ WebAssemblyAsmBackend::createObjectWriter(raw_pwrite_stream &OS) const {
}
} // end anonymous namespace
-MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- const Triple &TT,
- StringRef CPU) {
+MCAsmBackend *llvm::createWebAssemblyAsmBackend(const Triple &TT) {
return new WebAssemblyAsmBackend(TT.isArch64Bit());
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
index c47a3d9094e5..2bb58b33934e 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyELFObjectWriter.cpp
@@ -30,19 +30,31 @@ protected:
};
} // end anonymous namespace
-// FIXME: Use EM_NONE as a temporary hack. Should we decide to pursue ELF
-// writing seriously, we should email generic-abi@googlegroups.com and ask
-// for our own ELF code.
WebAssemblyELFObjectWriter::WebAssemblyELFObjectWriter(bool Is64Bit,
uint8_t OSABI)
- : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_NONE,
- /*HasRelocationAddend=*/true) {}
+ : MCELFObjectTargetWriter(Is64Bit, OSABI, ELF::EM_WEBASSEMBLY,
+ /*HasRelocationAddend=*/false) {}
unsigned WebAssemblyELFObjectWriter::GetRelocType(const MCValue &Target,
const MCFixup &Fixup,
bool IsPCRel) const {
- // FIXME: Do we need our own relocs?
- return Fixup.getKind();
+ // WebAssembly functions are not allocated in the address space. To resolve a
+ // pointer to a function, we must use a special relocation type.
+ if (const MCSymbolRefExpr *SyExp =
+ dyn_cast<MCSymbolRefExpr>(Fixup.getValue()))
+ if (SyExp->getKind() == MCSymbolRefExpr::VK_WebAssembly_FUNCTION)
+ return ELF::R_WEBASSEMBLY_FUNCTION;
+
+ switch (Fixup.getKind()) {
+ case FK_Data_4:
+ assert(!is64Bit() && "4-byte relocations only supported on wasm32");
+ return ELF::R_WEBASSEMBLY_DATA;
+ case FK_Data_8:
+ assert(is64Bit() && "8-byte relocations only supported on wasm64");
+ return ELF::R_WEBASSEMBLY_DATA;
+ default:
+ llvm_unreachable("unimplemented fixup kind");
+ }
}
MCObjectWriter *llvm::createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
index d2617796ca99..02c717a92101 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCAsmInfo.cpp
@@ -27,11 +27,12 @@ WebAssemblyMCAsmInfo::WebAssemblyMCAsmInfo(const Triple &T) {
// TODO: What should MaxInstLength be?
- PrivateGlobalPrefix = "";
- PrivateLabelPrefix = "";
-
UseDataRegionDirectives = true;
+ // Use .skip instead of .zero because .zero is confusing when used with two
+ // arguments (it doesn't actually zero things out).
+ ZeroDirective = "\t.skip\t";
+
Data8bitsDirective = "\t.int8\t";
Data16bitsDirective = "\t.int16\t";
Data32bitsDirective = "\t.int32\t";
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
index 7c6c79eb5db2..f409bd77442c 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCCodeEmitter.cpp
@@ -13,6 +13,7 @@
//===----------------------------------------------------------------------===//
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCFixup.h"
@@ -26,75 +27,66 @@ using namespace llvm;
#define DEBUG_TYPE "mccodeemitter"
+STATISTIC(MCNumEmitted, "Number of MC instructions emitted.");
+STATISTIC(MCNumFixups, "Number of MC fixups created.");
+
namespace {
class WebAssemblyMCCodeEmitter final : public MCCodeEmitter {
- const MCRegisterInfo &MRI;
-
-public:
- WebAssemblyMCCodeEmitter(const MCInstrInfo &, const MCRegisterInfo &mri,
- MCContext &)
- : MRI(mri) {}
+ const MCInstrInfo &MCII;
+ const MCContext &Ctx;
- ~WebAssemblyMCCodeEmitter() override {}
-
- /// TableGen'erated function for getting the binary encoding for an
- /// instruction.
+ // Implementation generated by tablegen.
uint64_t getBinaryCodeForInstr(const MCInst &MI,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const;
- /// Return binary encoding of operand. If the machine operand requires
- /// relocation, record the relocation and return zero.
- unsigned getMachineOpValue(const MCInst &MI, const MCOperand &MO,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
- uint64_t getMemoryOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const;
-
void encodeInstruction(const MCInst &MI, raw_ostream &OS,
SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const override;
+
+public:
+ WebAssemblyMCCodeEmitter(const MCInstrInfo &mcii, MCContext &ctx)
+ : MCII(mcii), Ctx(ctx) {}
};
} // end anonymous namespace
MCCodeEmitter *llvm::createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx) {
- return new WebAssemblyMCCodeEmitter(MCII, MRI, Ctx);
-}
-
-unsigned WebAssemblyMCCodeEmitter::getMachineOpValue(
- const MCInst &MI, const MCOperand &MO, SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- if (MO.isReg())
- return MRI.getEncodingValue(MO.getReg());
- if (MO.isImm())
- return static_cast<unsigned>(MO.getImm());
-
- assert(MO.isExpr());
-
- assert(MO.getExpr()->getKind() == MCExpr::SymbolRef);
-
- assert(false && "FIXME: not implemented yet");
-
- return 0;
+ return new WebAssemblyMCCodeEmitter(MCII, Ctx);
}
void WebAssemblyMCCodeEmitter::encodeInstruction(
const MCInst &MI, raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
const MCSubtargetInfo &STI) const {
- assert(false && "FIXME: not implemented yet");
-}
-
-// Encode WebAssembly Memory Operand
-uint64_t
-WebAssemblyMCCodeEmitter::getMemoryOpValue(const MCInst &MI, unsigned Op,
- SmallVectorImpl<MCFixup> &Fixups,
- const MCSubtargetInfo &STI) const {
- assert(false && "FIXME: not implemented yet");
- return 0;
+ // FIXME: This is not the real binary encoding. This is an extremely
+ // over-simplified encoding where we just use uint64_t for everything. This
+ // is a temporary measure.
+ support::endian::Writer<support::little>(OS).write<uint64_t>(MI.getOpcode());
+ const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+ if (Desc.isVariadic())
+ support::endian::Writer<support::little>(OS).write<uint64_t>(
+ MI.getNumOperands() - Desc.NumOperands);
+ for (unsigned i = 0, e = MI.getNumOperands(); i < e; ++i) {
+ const MCOperand &MO = MI.getOperand(i);
+ if (MO.isReg()) {
+ support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getReg());
+ } else if (MO.isImm()) {
+ support::endian::Writer<support::little>(OS).write<uint64_t>(MO.getImm());
+ } else if (MO.isFPImm()) {
+ support::endian::Writer<support::little>(OS).write<double>(MO.getFPImm());
+ } else if (MO.isExpr()) {
+ support::endian::Writer<support::little>(OS).write<uint64_t>(0);
+ Fixups.push_back(MCFixup::create(
+ (1 + MCII.get(MI.getOpcode()).isVariadic() + i) * sizeof(uint64_t),
+ MO.getExpr(), STI.getTargetTriple().isArch64Bit() ? FK_Data_8 : FK_Data_4,
+ MI.getLoc()));
+ ++MCNumFixups;
+ } else {
+ llvm_unreachable("unexpected operand kind");
+ }
+ }
+
+ ++MCNumEmitted; // Keep track of the # of mi's emitted.
}
#include "WebAssemblyGenMCCodeEmitter.inc"
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
index 14cd295353d5..37000f1cd571 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.cpp
@@ -15,10 +15,10 @@
#include "WebAssemblyMCTargetDesc.h"
#include "InstPrinter/WebAssemblyInstPrinter.h"
#include "WebAssemblyMCAsmInfo.h"
+#include "WebAssemblyTargetStreamer.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
-#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
@@ -35,52 +35,89 @@ using namespace llvm;
#define GET_REGINFO_MC_DESC
#include "WebAssemblyGenRegisterInfo.inc"
-static MCAsmInfo *createWebAssemblyMCAsmInfo(const MCRegisterInfo & /*MRI*/,
- const Triple &TT) {
+static MCAsmInfo *createMCAsmInfo(const MCRegisterInfo & /*MRI*/,
+ const Triple &TT) {
return new WebAssemblyMCAsmInfo(TT);
}
-static MCInstrInfo *createWebAssemblyMCInstrInfo() {
+static MCInstrInfo *createMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitWebAssemblyMCInstrInfo(X);
return X;
}
-static MCStreamer *createWebAssemblyMCStreamer(const Triple &T, MCContext &Ctx,
- MCAsmBackend &MAB,
- raw_pwrite_stream &OS,
- MCCodeEmitter *Emitter,
- bool RelaxAll) {
- return createELFStreamer(Ctx, MAB, OS, Emitter, RelaxAll);
+static MCRegisterInfo *createMCRegisterInfo(const Triple & /*T*/) {
+ MCRegisterInfo *X = new MCRegisterInfo();
+ InitWebAssemblyMCRegisterInfo(X, 0);
+ return X;
}
-static MCInstPrinter *
-createWebAssemblyMCInstPrinter(const Triple & /*T*/, unsigned SyntaxVariant,
- const MCAsmInfo &MAI, const MCInstrInfo &MII,
- const MCRegisterInfo &MRI) {
+static MCInstPrinter *createMCInstPrinter(const Triple & /*T*/,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ const MCInstrInfo &MII,
+ const MCRegisterInfo &MRI) {
assert(SyntaxVariant == 0);
return new WebAssemblyInstPrinter(MAI, MII, MRI);
}
+static MCCodeEmitter *createCodeEmitter(const MCInstrInfo &MCII,
+ const MCRegisterInfo & /*MRI*/,
+ MCContext &Ctx) {
+ return createWebAssemblyMCCodeEmitter(MCII, Ctx);
+}
+
+static MCAsmBackend *createAsmBackend(const Target & /*T*/,
+ const MCRegisterInfo & /*MRI*/,
+ const Triple &TT, StringRef /*CPU*/) {
+ return createWebAssemblyAsmBackend(TT);
+}
+
+static MCSubtargetInfo *createMCSubtargetInfo(const Triple &TT, StringRef CPU,
+ StringRef FS) {
+ return createWebAssemblyMCSubtargetInfoImpl(TT, CPU, FS);
+}
+
+static MCTargetStreamer *
+createObjectTargetStreamer(MCStreamer &S, const MCSubtargetInfo & /*STI*/) {
+ return new WebAssemblyTargetELFStreamer(S);
+}
+
+static MCTargetStreamer *createAsmTargetStreamer(MCStreamer &S,
+ formatted_raw_ostream &OS,
+ MCInstPrinter * /*InstPrint*/,
+ bool /*isVerboseAsm*/) {
+ return new WebAssemblyTargetAsmStreamer(S, OS);
+}
+
// Force static initialization.
extern "C" void LLVMInitializeWebAssemblyTargetMC() {
for (Target *T : {&TheWebAssemblyTarget32, &TheWebAssemblyTarget64}) {
// Register the MC asm info.
- RegisterMCAsmInfoFn X(*T, createWebAssemblyMCAsmInfo);
+ RegisterMCAsmInfoFn X(*T, createMCAsmInfo);
// Register the MC instruction info.
- TargetRegistry::RegisterMCInstrInfo(*T, createWebAssemblyMCInstrInfo);
+ TargetRegistry::RegisterMCInstrInfo(*T, createMCInstrInfo);
- // Register the object streamer
- TargetRegistry::RegisterELFStreamer(*T, createWebAssemblyMCStreamer);
+ // Register the MC register info.
+ TargetRegistry::RegisterMCRegInfo(*T, createMCRegisterInfo);
// Register the MCInstPrinter.
- TargetRegistry::RegisterMCInstPrinter(*T, createWebAssemblyMCInstPrinter);
+ TargetRegistry::RegisterMCInstPrinter(*T, createMCInstPrinter);
+
+ // Register the MC code emitter.
+ TargetRegistry::RegisterMCCodeEmitter(*T, createCodeEmitter);
+
+ // Register the ASM Backend.
+ TargetRegistry::RegisterMCAsmBackend(*T, createAsmBackend);
- // Register the MC code emitter
- TargetRegistry::RegisterMCCodeEmitter(*T, createWebAssemblyMCCodeEmitter);
+ // Register the MC subtarget info.
+ TargetRegistry::RegisterMCSubtargetInfo(*T, createMCSubtargetInfo);
- // Register the ASM Backend
- TargetRegistry::RegisterMCAsmBackend(*T, createWebAssemblyAsmBackend);
+ // Register the object target streamer.
+ TargetRegistry::RegisterObjectTargetStreamer(*T,
+ createObjectTargetStreamer);
+ // Register the asm target streamer.
+ TargetRegistry::RegisterAsmTargetStreamer(*T, createAsmTargetStreamer);
}
}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
index e78f73e3da95..9bac4f82822a 100644
--- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h
@@ -15,40 +15,62 @@
#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H
+#include "llvm/MC/MCInstrDesc.h"
#include "llvm/Support/DataTypes.h"
namespace llvm {
-class formatted_raw_ostream;
class MCAsmBackend;
class MCCodeEmitter;
class MCContext;
class MCInstrInfo;
-class MCRegisterInfo;
class MCObjectWriter;
-class MCStreamer;
class MCSubtargetInfo;
-class MCTargetStreamer;
-class StringRef;
class Target;
class Triple;
-class raw_ostream;
class raw_pwrite_stream;
extern Target TheWebAssemblyTarget32;
extern Target TheWebAssemblyTarget64;
MCCodeEmitter *createWebAssemblyMCCodeEmitter(const MCInstrInfo &MCII,
- const MCRegisterInfo &MRI,
MCContext &Ctx);
-MCAsmBackend *createWebAssemblyAsmBackend(const Target &T,
- const MCRegisterInfo &MRI,
- const Triple &TT, StringRef CPU);
+MCAsmBackend *createWebAssemblyAsmBackend(const Triple &TT);
MCObjectWriter *createWebAssemblyELFObjectWriter(raw_pwrite_stream &OS,
bool Is64Bit, uint8_t OSABI);
+namespace WebAssembly {
+enum OperandType {
+ /// Basic block label in a branch construct.
+ OPERAND_BASIC_BLOCK = MCOI::OPERAND_FIRST_TARGET,
+ /// Floating-point immediate.
+ OPERAND_FPIMM
+};
+
+/// WebAssembly-specific directive identifiers.
+enum Directive {
+ // FIXME: This is not the real binary encoding.
+ DotParam = UINT64_MAX - 0, ///< .param
+ DotResult = UINT64_MAX - 1, ///< .result
+ DotLocal = UINT64_MAX - 2, ///< .local
+ DotEndFunc = UINT64_MAX - 3, ///< .endfunc
+};
+
+} // end namespace WebAssembly
+
+namespace WebAssemblyII {
+enum {
+ // For variadic instructions, this flag indicates whether an operand
+ // in the variable_ops range is an immediate value.
+ VariableOpIsImmediate = (1 << 0),
+ // For immediate values in the variable_ops range, this flag indicates
+ // whether the value represents a control-flow label.
+ VariableOpImmediateIsLabel = (1 << 1),
+};
+} // end namespace WebAssemblyII
+
} // end namespace llvm
// Defines symbolic names for WebAssembly registers. This defines a mapping from
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
new file mode 100644
index 000000000000..1d2822869a15
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.cpp
@@ -0,0 +1,94 @@
+//==-- WebAssemblyTargetStreamer.cpp - WebAssembly Target Streamer Methods --=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file defines WebAssembly-specific target streamer classes.
+/// These are for implementing support for target-specific assembly directives.
+///
+//===----------------------------------------------------------------------===//
+
+#include "WebAssemblyTargetStreamer.h"
+#include "InstPrinter/WebAssemblyInstPrinter.h"
+#include "WebAssemblyMCTargetDesc.h"
+#include "WebAssemblyTargetObjectFile.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/ELF.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/FormattedStream.h"
+using namespace llvm;
+
+WebAssemblyTargetStreamer::WebAssemblyTargetStreamer(MCStreamer &S)
+ : MCTargetStreamer(S) {}
+
+WebAssemblyTargetAsmStreamer::WebAssemblyTargetAsmStreamer(
+ MCStreamer &S, formatted_raw_ostream &OS)
+ : WebAssemblyTargetStreamer(S), OS(OS) {}
+
+WebAssemblyTargetELFStreamer::WebAssemblyTargetELFStreamer(MCStreamer &S)
+ : WebAssemblyTargetStreamer(S) {}
+
+static void PrintTypes(formatted_raw_ostream &OS, ArrayRef<MVT> Types) {
+ bool First = true;
+ for (MVT Type : Types) {
+ if (First)
+ First = false;
+ else
+ OS << ", ";
+ OS << WebAssembly::TypeToString(Type);
+ }
+ OS << '\n';
+}
+
+void WebAssemblyTargetAsmStreamer::emitParam(ArrayRef<MVT> Types) {
+ OS << "\t.param \t";
+ PrintTypes(OS, Types);
+}
+
+void WebAssemblyTargetAsmStreamer::emitResult(ArrayRef<MVT> Types) {
+ OS << "\t.result \t";
+ PrintTypes(OS, Types);
+}
+
+void WebAssemblyTargetAsmStreamer::emitLocal(ArrayRef<MVT> Types) {
+ OS << "\t.local \t";
+ PrintTypes(OS, Types);
+}
+
+void WebAssemblyTargetAsmStreamer::emitEndFunc() { OS << "\t.endfunc\n"; }
+
+// FIXME: What follows is not the real binary encoding.
+
+static void EncodeTypes(MCStreamer &Streamer, ArrayRef<MVT> Types) {
+ Streamer.EmitIntValue(Types.size(), sizeof(uint64_t));
+ for (MVT Type : Types)
+ Streamer.EmitIntValue(Type.SimpleTy, sizeof(uint64_t));
+}
+
+void WebAssemblyTargetELFStreamer::emitParam(ArrayRef<MVT> Types) {
+ Streamer.EmitIntValue(WebAssembly::DotParam, sizeof(uint64_t));
+ EncodeTypes(Streamer, Types);
+}
+
+void WebAssemblyTargetELFStreamer::emitResult(ArrayRef<MVT> Types) {
+ Streamer.EmitIntValue(WebAssembly::DotResult, sizeof(uint64_t));
+ EncodeTypes(Streamer, Types);
+}
+
+void WebAssemblyTargetELFStreamer::emitLocal(ArrayRef<MVT> Types) {
+ Streamer.EmitIntValue(WebAssembly::DotLocal, sizeof(uint64_t));
+ EncodeTypes(Streamer, Types);
+}
+
+void WebAssemblyTargetELFStreamer::emitEndFunc() {
+ Streamer.EmitIntValue(WebAssembly::DotEndFunc, sizeof(uint64_t));
+}
diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
new file mode 100644
index 000000000000..c66a51574efb
--- /dev/null
+++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyTargetStreamer.h
@@ -0,0 +1,68 @@
+//==-- WebAssemblyTargetStreamer.h - WebAssembly Target Streamer -*- C++ -*-==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// \brief This file declares WebAssembly-specific target streamer classes.
+/// These are for implementing support for target-specific assembly directives.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
+#define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYTARGETSTREAMER_H
+
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/MC/MCStreamer.h"
+
+namespace llvm {
+
+class MCELFStreamer;
+
+/// WebAssembly-specific streamer interface, to implement support
+/// WebAssembly-specific assembly directives.
+class WebAssemblyTargetStreamer : public MCTargetStreamer {
+public:
+ explicit WebAssemblyTargetStreamer(MCStreamer &S);
+
+ /// .param
+ virtual void emitParam(ArrayRef<MVT> Types) = 0;
+ /// .result
+ virtual void emitResult(ArrayRef<MVT> Types) = 0;
+ /// .local
+ virtual void emitLocal(ArrayRef<MVT> Types) = 0;
+ /// .endfunc
+ virtual void emitEndFunc() = 0;
+};
+
+/// This part is for ascii assembly output
+class WebAssemblyTargetAsmStreamer final : public WebAssemblyTargetStreamer {
+ formatted_raw_ostream &OS;
+
+public:
+ WebAssemblyTargetAsmStreamer(MCStreamer &S, formatted_raw_ostream &OS);
+
+ void emitParam(ArrayRef<MVT> Types) override;
+ void emitResult(ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<MVT> Types) override;
+ void emitEndFunc() override;
+};
+
+/// This part is for ELF object output
+class WebAssemblyTargetELFStreamer final : public WebAssemblyTargetStreamer {
+public:
+ explicit WebAssemblyTargetELFStreamer(MCStreamer &S);
+
+ void emitParam(ArrayRef<MVT> Types) override;
+ void emitResult(ArrayRef<MVT> Types) override;
+ void emitLocal(ArrayRef<MVT> Types) override;
+ void emitEndFunc() override;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/WebAssembly/Makefile b/lib/Target/WebAssembly/Makefile
index ccf63f0be554..c501a2b1ab15 100644
--- a/lib/Target/WebAssembly/Makefile
+++ b/lib/Target/WebAssembly/Makefile
@@ -21,6 +21,6 @@ BUILT_SOURCES = \
WebAssemblyGenRegisterInfo.inc \
WebAssemblyGenSubtargetInfo.inc
-DIRS = InstPrinter TargetInfo MCTargetDesc
+DIRS = InstPrinter TargetInfo MCTargetDesc Disassembler
include $(LEVEL)/Makefile.common
diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
index 0d2b4d9debb9..45ac99d90ed9 100644
--- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp
@@ -17,6 +17,7 @@
#include "WebAssembly.h"
#include "InstPrinter/WebAssemblyInstPrinter.h"
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
+#include "MCTargetDesc/WebAssemblyTargetStreamer.h"
#include "WebAssemblyMCInstLower.h"
#include "WebAssemblyMachineFunctionInfo.h"
#include "WebAssemblyRegisterInfo.h"
@@ -69,7 +70,9 @@ private:
void EmitJumpTableInfo() override;
void EmitConstantPool() override;
void EmitFunctionBodyStart() override;
+ void EmitFunctionBodyEnd() override;
void EmitInstruction(const MachineInstr *MI) override;
+ const MCExpr *lowerConstant(const Constant *CV) override;
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode,
raw_ostream &OS) override;
@@ -80,6 +83,7 @@ private:
MVT getRegType(unsigned RegNo) const;
const char *toString(MVT VT) const;
std::string regToString(const MachineOperand &MO);
+ WebAssemblyTargetStreamer *getTargetStreamer();
};
} // end anonymous namespace
@@ -90,9 +94,9 @@ private:
MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
const TargetRegisterClass *TRC =
- TargetRegisterInfo::isVirtualRegister(RegNo) ?
- MRI->getRegClass(RegNo) :
- MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
+ TargetRegisterInfo::isVirtualRegister(RegNo)
+ ? MRI->getRegClass(RegNo)
+ : MRI->getTargetRegisterInfo()->getMinimalPhysRegClass(RegNo);
for (MVT T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
if (TRC->hasType(T))
return T;
@@ -101,6 +105,10 @@ MVT WebAssemblyAsmPrinter::getRegType(unsigned RegNo) const {
return MVT::Other;
}
+const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
+ return WebAssembly::TypeToString(VT);
+}
+
std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
unsigned RegNo = MO.getReg();
assert(TargetRegisterInfo::isVirtualRegister(RegNo) &&
@@ -111,8 +119,10 @@ std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) {
return '$' + utostr(WAReg);
}
-const char *WebAssemblyAsmPrinter::toString(MVT VT) const {
- return WebAssembly::TypeToString(VT);
+WebAssemblyTargetStreamer *
+WebAssemblyAsmPrinter::getTargetStreamer() {
+ MCTargetStreamer *TS = OutStreamer->getTargetStreamer();
+ return static_cast<WebAssemblyTargetStreamer *>(TS);
}
//===----------------------------------------------------------------------===//
@@ -145,29 +155,20 @@ static void ComputeLegalValueVTs(const Function &F, const TargetMachine &TM,
}
void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
- if (!MFI->getParams().empty()) {
- MCInst Param;
- Param.setOpcode(WebAssembly::PARAM);
- for (MVT VT : MFI->getParams())
- Param.addOperand(MCOperand::createImm(VT.SimpleTy));
- EmitToStreamer(*OutStreamer, Param);
- }
+ if (!MFI->getParams().empty())
+ getTargetStreamer()->emitParam(MFI->getParams());
SmallVector<MVT, 4> ResultVTs;
const Function &F(*MF->getFunction());
ComputeLegalValueVTs(F, TM, F.getReturnType(), ResultVTs);
+
// If the return type needs to be legalized it will get converted into
// passing a pointer.
- if (ResultVTs.size() == 1) {
- MCInst Result;
- Result.setOpcode(WebAssembly::RESULT);
- Result.addOperand(MCOperand::createImm(ResultVTs.front().SimpleTy));
- EmitToStreamer(*OutStreamer, Result);
- }
+ if (ResultVTs.size() == 1)
+ getTargetStreamer()->emitResult(ResultVTs);
bool AnyWARegs = false;
- MCInst Local;
- Local.setOpcode(WebAssembly::LOCAL);
+ SmallVector<MVT, 16> LocalTypes;
for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
unsigned VReg = TargetRegisterInfo::index2VirtReg(Idx);
unsigned WAReg = MFI->getWAReg(VReg);
@@ -180,22 +181,26 @@ void WebAssemblyAsmPrinter::EmitFunctionBodyStart() {
// Don't declare stackified registers.
if (int(WAReg) < 0)
continue;
- Local.addOperand(MCOperand::createImm(getRegType(VReg).SimpleTy));
+ LocalTypes.push_back(getRegType(VReg));
AnyWARegs = true;
}
auto &PhysRegs = MFI->getPhysRegs();
for (unsigned PReg = 0; PReg < PhysRegs.size(); ++PReg) {
if (PhysRegs[PReg] == -1U)
continue;
- Local.addOperand(MCOperand::createImm(getRegType(PReg).SimpleTy));
+ LocalTypes.push_back(getRegType(PReg));
AnyWARegs = true;
}
if (AnyWARegs)
- EmitToStreamer(*OutStreamer, Local);
+ getTargetStreamer()->emitLocal(LocalTypes);
AsmPrinter::EmitFunctionBodyStart();
}
+void WebAssemblyAsmPrinter::EmitFunctionBodyEnd() {
+ getTargetStreamer()->emitEndFunc();
+}
+
void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
DEBUG(dbgs() << "EmitInstruction: " << *MI << '\n');
@@ -207,10 +212,6 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
// These represent values which are live into the function entry, so there's
// no instruction to emit.
break;
- case WebAssembly::LOOP_END:
- // This is a no-op which just exists to tell AsmPrinter.cpp that there's a
- // fallthrough which nevertheless requires a label for the destination here.
- break;
default: {
WebAssemblyMCInstLower MCInstLowering(OutContext, *this);
MCInst TmpInst;
@@ -221,6 +222,14 @@ void WebAssemblyAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
}
+const MCExpr *WebAssemblyAsmPrinter::lowerConstant(const Constant *CV) {
+ if (const GlobalValue *GV = dyn_cast<GlobalValue>(CV))
+ if (GV->getValueType()->isFunctionTy())
+ return MCSymbolRefExpr::create(
+ getSymbol(GV), MCSymbolRefExpr::VK_WebAssembly_FUNCTION, OutContext);
+ return AsmPrinter::lowerConstant(CV);
+}
+
bool WebAssemblyAsmPrinter::PrintAsmOperand(const MachineInstr *MI,
unsigned OpNo, unsigned AsmVariant,
const char *ExtraCode,
diff --git a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
index e9671ee07e69..a39349c562fd 100644
--- a/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyCFGStackify.cpp
@@ -34,6 +34,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -256,7 +257,8 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
/// code) for a branch instruction to both branch to a block and fallthrough
/// to it, so we check the actual branch operands to see if there are any
/// explicit mentions.
-static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred, MachineBasicBlock *MBB) {
+static bool ExplicitlyBranchesTo(MachineBasicBlock *Pred,
+ MachineBasicBlock *MBB) {
for (MachineInstr &MI : Pred->terminators())
for (MachineOperand &MO : MI.explicit_operands())
if (MO.isMBB() && MO.getMBB() == MBB)
@@ -325,13 +327,21 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
InsertPos = Header->getFirstTerminator();
while (InsertPos != Header->begin() &&
prev(InsertPos)->definesRegister(WebAssembly::EXPR_STACK) &&
- prev(InsertPos)->getOpcode() != WebAssembly::LOOP)
+ prev(InsertPos)->getOpcode() != WebAssembly::LOOP &&
+ prev(InsertPos)->getOpcode() != WebAssembly::END_BLOCK &&
+ prev(InsertPos)->getOpcode() != WebAssembly::END_LOOP)
--InsertPos;
}
// Add the BLOCK.
- BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK))
- .addMBB(&MBB);
+ BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK));
+
+ // Mark the end of the block.
+ InsertPos = MBB.begin();
+ while (InsertPos != MBB.end() &&
+ InsertPos->getOpcode() == WebAssembly::END_LOOP)
+ ++InsertPos;
+ BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::END_BLOCK));
// Track the farthest-spanning scope that ends at this point.
int Number = MBB.getNumber();
@@ -341,10 +351,11 @@ static void PlaceBlockMarker(MachineBasicBlock &MBB, MachineFunction &MF,
}
/// Insert a LOOP marker for a loop starting at MBB (if it's a loop header).
-static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
- SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
- const WebAssemblyInstrInfo &TII,
- const MachineLoopInfo &MLI) {
+static void PlaceLoopMarker(
+ MachineBasicBlock &MBB, MachineFunction &MF,
+ SmallVectorImpl<MachineBasicBlock *> &ScopeTops,
+ DenseMap<const MachineInstr *, const MachineBasicBlock *> &LoopTops,
+ const WebAssemblyInstrInfo &TII, const MachineLoopInfo &MLI) {
MachineLoop *Loop = MLI.getLoopFor(&MBB);
if (!Loop || Loop->getHeader() != &MBB)
return;
@@ -361,14 +372,19 @@ static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
Iter = next(MachineFunction::iterator(Bottom));
}
MachineBasicBlock *AfterLoop = &*Iter;
- BuildMI(MBB, MBB.begin(), DebugLoc(), TII.get(WebAssembly::LOOP))
- .addMBB(AfterLoop);
- // Emit a special no-op telling the asm printer that we need a label to close
- // the loop scope, even though the destination is only reachable by
- // fallthrough.
- if (!Bottom->back().isBarrier())
- BuildMI(*Bottom, Bottom->end(), DebugLoc(), TII.get(WebAssembly::LOOP_END));
+ // Mark the beginning of the loop (after the end of any existing loop that
+ // ends here).
+ auto InsertPos = MBB.begin();
+ while (InsertPos != MBB.end() &&
+ InsertPos->getOpcode() == WebAssembly::END_LOOP)
+ ++InsertPos;
+ BuildMI(MBB, InsertPos, DebugLoc(), TII.get(WebAssembly::LOOP));
+
+ // Mark the end of the loop.
+ MachineInstr *End = BuildMI(*AfterLoop, AfterLoop->begin(), DebugLoc(),
+ TII.get(WebAssembly::END_LOOP));
+ LoopTops[End] = &MBB;
assert((!ScopeTops[AfterLoop->getNumber()] ||
ScopeTops[AfterLoop->getNumber()]->getNumber() < MBB.getNumber()) &&
@@ -377,6 +393,19 @@ static void PlaceLoopMarker(MachineBasicBlock &MBB, MachineFunction &MF,
ScopeTops[AfterLoop->getNumber()] = &MBB;
}
+static unsigned
+GetDepth(const SmallVectorImpl<const MachineBasicBlock *> &Stack,
+ const MachineBasicBlock *MBB) {
+ unsigned Depth = 0;
+ for (auto X : reverse(Stack)) {
+ if (X == MBB)
+ break;
+ ++Depth;
+ }
+ assert(Depth < Stack.size() && "Branch destination should be in scope");
+ return Depth;
+}
+
/// Insert LOOP and BLOCK markers at appropriate places.
static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
const WebAssemblyInstrInfo &TII,
@@ -388,25 +417,57 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
// we may insert at the end.
SmallVector<MachineBasicBlock *, 8> ScopeTops(MF.getNumBlockIDs() + 1);
+ // For eacn LOOP_END, the corresponding LOOP.
+ DenseMap<const MachineInstr *, const MachineBasicBlock *> LoopTops;
+
for (auto &MBB : MF) {
// Place the LOOP for MBB if MBB is the header of a loop.
- PlaceLoopMarker(MBB, MF, ScopeTops, TII, MLI);
+ PlaceLoopMarker(MBB, MF, ScopeTops, LoopTops, TII, MLI);
// Place the BLOCK for MBB if MBB is branched to from above.
PlaceBlockMarker(MBB, MF, ScopeTops, TII, MLI, MDT);
}
-}
-#ifndef NDEBUG
-static bool
-IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack,
- const MachineBasicBlock *MBB) {
- for (const auto &Pair : Stack)
- if (Pair.first == MBB)
- return true;
- return false;
+ // Now rewrite references to basic blocks to be depth immediates.
+ SmallVector<const MachineBasicBlock *, 8> Stack;
+ for (auto &MBB : reverse(MF)) {
+ for (auto &MI : reverse(MBB)) {
+ switch (MI.getOpcode()) {
+ case WebAssembly::BLOCK:
+ assert(ScopeTops[Stack.back()->getNumber()] == &MBB &&
+ "Block should be balanced");
+ Stack.pop_back();
+ break;
+ case WebAssembly::LOOP:
+ assert(Stack.back() == &MBB && "Loop top should be balanced");
+ Stack.pop_back();
+ Stack.pop_back();
+ break;
+ case WebAssembly::END_BLOCK:
+ Stack.push_back(&MBB);
+ break;
+ case WebAssembly::END_LOOP:
+ Stack.push_back(&MBB);
+ Stack.push_back(LoopTops[&MI]);
+ break;
+ default:
+ if (MI.isTerminator()) {
+ // Rewrite MBB operands to be depth immediates.
+ SmallVector<MachineOperand, 4> Ops(MI.operands());
+ while (MI.getNumOperands() > 0)
+ MI.RemoveOperand(MI.getNumOperands() - 1);
+ for (auto MO : Ops) {
+ if (MO.isMBB())
+ MO = MachineOperand::CreateImm(GetDepth(Stack, MO.getMBB()));
+ MI.addOperand(MF, MO);
+ }
+ }
+ break;
+ }
+ }
+ }
+ assert(Stack.empty() && "Control flow should be balanced");
}
-#endif
bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "********** CFG Stackifying **********\n"
@@ -415,7 +476,9 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
const auto &MLI = getAnalysis<MachineLoopInfo>();
auto &MDT = getAnalysis<MachineDominatorTree>();
+ // Liveness is not tracked for EXPR_STACK physreg.
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
+ MF.getRegInfo().invalidateLiveness();
// RPO sorting needs all loops to be single-entry.
EliminateMultipleEntryLoops(MF, MLI);
@@ -426,43 +489,5 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
// Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
PlaceMarkers(MF, MLI, TII, MDT);
-#ifndef NDEBUG
- // Verify that block and loop beginnings and endings are in LIFO order, and
- // that all references to blocks are to blocks on the stack at the point of
- // the reference.
- SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack;
- for (auto &MBB : MF) {
- while (!Stack.empty() && Stack.back().first == &MBB)
- if (Stack.back().second) {
- assert(Stack.size() >= 2);
- Stack.pop_back();
- Stack.pop_back();
- } else {
- assert(Stack.size() >= 1);
- Stack.pop_back();
- }
- for (auto &MI : MBB)
- switch (MI.getOpcode()) {
- case WebAssembly::LOOP:
- Stack.push_back(std::make_pair(&MBB, false));
- Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true));
- break;
- case WebAssembly::BLOCK:
- Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false));
- break;
- default:
- // Verify that all referenced blocks are in scope. A reference to a
- // block with a negative number is invalid, but can happen with inline
- // asm, so we shouldn't assert on it, but instead let CodeGen properly
- // fail on it.
- for (const MachineOperand &MO : MI.explicit_operands())
- if (MO.isMBB() && MO.getMBB()->getNumber() >= 0)
- assert(IsOnStack(Stack, MO.getMBB()));
- break;
- }
- }
- assert(Stack.empty());
-#endif
-
return true;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 7a89f788c1ad..e9933b092988 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -573,7 +573,8 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
SDLoc DL(Op);
const auto *GA = cast<GlobalAddressSDNode>(Op);
EVT VT = Op.getValueType();
- assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+ assert(GA->getTargetFlags() == 0 &&
+ "Unexpected target flags on generic GlobalAddressSDNode");
if (GA->getAddressSpace() != 0)
fail(DL, DAG, "WebAssembly only expects the 0 address space");
return DAG.getNode(
@@ -587,9 +588,16 @@ WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
SDLoc DL(Op);
const auto *ES = cast<ExternalSymbolSDNode>(Op);
EVT VT = Op.getValueType();
- assert(ES->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
+ assert(ES->getTargetFlags() == 0 &&
+ "Unexpected target flags on generic ExternalSymbolSDNode");
+ // Set the TargetFlags to 0x1 which indicates that this is a "function"
+ // symbol rather than a data symbol. We do this unconditionally even though
+ // we don't know anything about the symbol other than its name, because all
+ // external symbols used in target-independent SelectionDAG code are for
+ // functions.
return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT,
- DAG.getTargetExternalSymbol(ES->getSymbol(), VT));
+ DAG.getTargetExternalSymbol(ES->getSymbol(), VT,
+ /*TargetFlags=*/0x1));
}
SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 05efe8903413..fda95953db81 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -41,28 +41,33 @@ let Defs = [ARGUMENTS] in {
// TODO: SelectionDAG's lowering insists on using a pointer as the index for
// jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode
// currently.
+// Set TSFlags{0} to 1 to indicate that the variable_ops are immediates.
+// Set TSFlags{1} to 1 to indicate that the immediates represent labels.
let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops),
[(WebAssemblytableswitch I32:$index, bb:$default)],
- "tableswitch\t$index, $default">;
+ "tableswitch\t$index, $default"> {
+ let TSFlags{0} = 1;
+ let TSFlags{1} = 1;
+}
def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops),
[(WebAssemblytableswitch I64:$index, bb:$default)],
- "tableswitch\t$index, $default">;
+ "tableswitch\t$index, $default"> {
+ let TSFlags{0} = 1;
+ let TSFlags{1} = 1;
+}
} // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1
-// Placemarkers to indicate the start of a block or loop scope. These
+// Placemarkers to indicate the start or end of a block or loop scope. These
// use/clobber EXPR_STACK to prevent them from being moved into the middle of
// an expression tree.
let Uses = [EXPR_STACK], Defs = [EXPR_STACK] in {
-def BLOCK : I<(outs), (ins bb_op:$dst), [], "block \t$dst">;
-def LOOP : I<(outs), (ins bb_op:$dst), [], "loop \t$dst">;
+def BLOCK : I<(outs), (ins), [], "block">;
+def LOOP : I<(outs), (ins), [], "loop">;
+def END_BLOCK : I<(outs), (ins), [], "end_block">;
+def END_LOOP : I<(outs), (ins), [], "end_loop">;
} // Uses = [EXPR_STACK], Defs = [EXPR_STACK]
-// No-op to indicate to the AsmPrinter that a loop ends here, so a
-// basic block label is needed even if it wouldn't otherwise appear so.
-let isTerminator = 1, hasCtrlDep = 1 in
-def LOOP_END : I<(outs), (ins), []>;
-
multiclass RETURN<WebAssemblyRegClass vt> {
def RETURN_#vt : I<(outs), (ins vt:$val), [(WebAssemblyreturn vt:$val)],
"return \t$val">;
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 5e7663cdb506..028e9af0834f 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -74,6 +74,9 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
case WebAssembly::BR_IF:
if (HaveCond)
return true;
+ // If we're running after CFGStackify, we can't optimize further.
+ if (!MI.getOperand(1).isMBB())
+ return true;
Cond.push_back(MachineOperand::CreateImm(true));
Cond.push_back(MI.getOperand(0));
TBB = MI.getOperand(1).getMBB();
@@ -82,12 +85,18 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
case WebAssembly::BR_UNLESS:
if (HaveCond)
return true;
+ // If we're running after CFGStackify, we can't optimize further.
+ if (!MI.getOperand(1).isMBB())
+ return true;
Cond.push_back(MachineOperand::CreateImm(false));
Cond.push_back(MI.getOperand(0));
TBB = MI.getOperand(1).getMBB();
HaveCond = true;
break;
case WebAssembly::BR:
+ // If we're running after CFGStackify, we can't optimize further.
+ if (!MI.getOperand(0).isMBB())
+ return true;
if (!HaveCond)
TBB = MI.getOperand(0).getMBB();
else
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index f0b4ce7caf51..2e682a475471 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -66,8 +66,18 @@ def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper",
// WebAssembly-specific Operands.
//===----------------------------------------------------------------------===//
+let OperandNamespace = "WebAssembly" in {
+
+let OperandType = "OPERAND_BASIC_BLOCK" in
def bb_op : Operand<OtherVT>;
+let OperandType = "OPERAND_FPIMM" in {
+def f32imm_op : Operand<f32>;
+def f64imm_op : Operand<f64>;
+} // OperandType = "OPERAND_FPIMM"
+
+} // OperandNamespace = "WebAssembly"
+
//===----------------------------------------------------------------------===//
// WebAssembly Instruction Format Definitions.
//===----------------------------------------------------------------------===//
@@ -120,31 +130,20 @@ def CONST_I32 : I<(outs I32:$res), (ins i32imm:$imm),
def CONST_I64 : I<(outs I64:$res), (ins i64imm:$imm),
[(set I64:$res, imm:$imm)],
"i64.const\t$res, $imm">;
-def CONST_F32 : I<(outs F32:$res), (ins f32imm:$imm),
+def CONST_F32 : I<(outs F32:$res), (ins f32imm_op:$imm),
[(set F32:$res, fpimm:$imm)],
"f32.const\t$res, $imm">;
-def CONST_F64 : I<(outs F64:$res), (ins f64imm:$imm),
+def CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
[(set F64:$res, fpimm:$imm)],
"f64.const\t$res, $imm">;
} // isMoveImm = 1
} // Defs = [ARGUMENTS]
-def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)),
- (CONST_I32 tglobaladdr:$dst)>;
-def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)),
- (CONST_I32 texternalsym:$dst)>;
-def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)),
- (CONST_I32 tjumptable:$dst)>;
-
-let Defs = [ARGUMENTS] in {
-
-// Function signature and local variable declaration "instructions".
-def PARAM : I<(outs), (ins variable_ops), [], ".param \t">;
-def RESULT : I<(outs), (ins variable_ops), [], ".result \t">;
-def LOCAL : I<(outs), (ins variable_ops), [], ".local \t">;
-
-} // Defs = [ARGUMENTS]
+def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
+ (CONST_I32 tglobaladdr:$addr)>;
+def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
+ (CONST_I32 texternalsym:$addr)>;
//===----------------------------------------------------------------------===//
// Additional sets of instructions.
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
index 74ec45d58644..b39ac5212f87 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td
@@ -24,10 +24,25 @@
// WebAssembly constant offsets are performed as unsigned with infinite
// precision, so we need to check for NoUnsignedWrap so that we don't fold an
// offset for an add that needs wrapping.
-def regPlusImm : PatFrag<(ops node:$off, node:$addr),
+def regPlusImm : PatFrag<(ops node:$addr, node:$off),
(add node:$addr, node:$off),
[{ return N->getFlags()->hasNoUnsignedWrap(); }]>;
+// GlobalAddresses are conceptually unsigned values, so we can also fold them
+// into immediate values as long as their offsets are non-negative.
+def regPlusGA : PatFrag<(ops node:$addr, node:$off),
+ (add node:$addr, node:$off),
+ [{
+ return N->getFlags()->hasNoUnsignedWrap() ||
+ (N->getOperand(1)->getOpcode() == WebAssemblyISD::Wrapper &&
+ isa<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0)) &&
+ cast<GlobalAddressSDNode>(N->getOperand(1)->getOperand(0))
+ ->getOffset() >= 0);
+}]>;
+
+// We don't need a regPlusES because external symbols never have constant
+// offsets folded into them, so we can just use add.
+
let Defs = [ARGUMENTS] in {
// Basic load.
@@ -49,29 +64,33 @@ def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>;
def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>;
// Select loads with a constant offset.
-def : Pat<(i32 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (load (regPlusImm I32:$addr, imm:$off))),
(LOAD_I32 imm:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (load (regPlusImm I32:$addr, imm:$off))),
(LOAD_I64 imm:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(f32 (load (regPlusImm I32:$addr, imm:$off))),
(LOAD_F32 imm:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(f64 (load (regPlusImm I32:$addr, imm:$off))),
(LOAD_F64 imm:$off, $addr)>;
-def : Pat<(i32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (load (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (load (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(f32 (load (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD_F32 tglobaladdr:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(f64 (load (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD_F64 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
(LOAD_I32 texternalsym:$off, $addr)>;
-def : Pat<(i64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
(LOAD_I64 texternalsym:$off, $addr)>;
-def : Pat<(f32 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(f32 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
(LOAD_F32 texternalsym:$off, $addr)>;
-def : Pat<(f64 (load (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(f64 (load (add I32:$addr, (WebAssemblywrapper texternalsym:$off)))),
(LOAD_F64 texternalsym:$off, $addr)>;
// Select loads with just a constant offset.
@@ -135,65 +154,85 @@ def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>;
def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
// Select extending loads with a constant offset.
-def : Pat<(i32 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_S_I32 imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_U_I32 imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_S_I32 imm:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_U_I32 imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_S_I64 imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_S_I64 imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi32 (regPlusImm I32:$addr, imm:$off))),
(LOAD32_S_I64 imm:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi32 (regPlusImm I32:$addr, imm:$off))),
(LOAD32_U_I64 imm:$off, $addr)>;
-def : Pat<(i32 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_S_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_S_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_S_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_S_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi32 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD32_S_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi32 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD32_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_S_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (sextloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_S_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (zextloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_S_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_S_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (sextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (sextloadi32 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD32_S_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (zextloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (zextloadi32 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD32_U_I64 texternalsym:$off, $addr)>;
// Select extending loads with just a constant offset.
@@ -259,35 +298,45 @@ def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>;
def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>;
// Select "don't care" extending loads with a constant offset.
-def : Pat<(i32 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_U_I32 imm:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i32 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_U_I32 imm:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (extloadi8 (regPlusImm I32:$addr, imm:$off))),
(LOAD8_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (extloadi16 (regPlusImm I32:$addr, imm:$off))),
(LOAD16_U_I64 imm:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm imm:$off, I32:$addr))),
+def : Pat<(i64 (extloadi32 (regPlusImm I32:$addr, imm:$off))),
(LOAD32_U_I64 imm:$off, $addr)>;
-def : Pat<(i32 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (extloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i32 (extloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_U_I32 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (extloadi8 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD8_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (extloadi16 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD16_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm tglobaladdr:$off, I32:$addr))),
+def : Pat<(i64 (extloadi32 (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off)))),
(LOAD32_U_I64 tglobaladdr:$off, $addr)>;
-def : Pat<(i32 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (extloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i32 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i32 (extloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_U_I32 texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi8 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (extloadi8 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD8_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi16 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (extloadi16 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD16_U_I64 texternalsym:$off, $addr)>;
-def : Pat<(i64 (extloadi32 (regPlusImm texternalsym:$off, I32:$addr))),
+def : Pat<(i64 (extloadi32 (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off)))),
(LOAD32_U_I64 texternalsym:$off, $addr)>;
// Select "don't care" extending loads with just a constant offset.
@@ -343,29 +392,37 @@ def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>;
def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>;
// Select stores with a constant offset.
-def : Pat<(store I32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store I32:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE_I32 imm:$off, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store I64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store F32:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE_F32 imm:$off, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(store F64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE_F64 imm:$off, I32:$addr, F64:$val)>;
-def : Pat<(store I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store I32:$val, (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store I64:$val, (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store F32:$val, (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE_F32 tglobaladdr:$off, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(store F64:$val, (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE_F64 tglobaladdr:$off, I32:$addr, F64:$val)>;
-def : Pat<(store I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store I32:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE_I32 texternalsym:$off, I32:$addr, I32:$val)>;
-def : Pat<(store I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store I64:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE_I64 texternalsym:$off, I32:$addr, I64:$val)>;
-def : Pat<(store F32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store F32:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE_F32 texternalsym:$off, I32:$addr, F32:$val)>;
-def : Pat<(store F64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(store F64:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE_F64 texternalsym:$off, I32:$addr, F64:$val)>;
// Select stores with just a constant offset.
@@ -423,35 +480,54 @@ def : Pat<(truncstorei32 I64:$val, I32:$addr),
(STORE32_I64 0, I32:$addr, I64:$val)>;
// Select truncating stores with a constant offset.
-def : Pat<(truncstorei8 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei8 I32:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE8_I32 imm:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei16 I32:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei16 I32:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE16_I32 imm:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei8 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei8 I64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE8_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei16 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei16 I64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE16_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei32 I64:$val, (regPlusImm imm:$off, I32:$addr)),
+def : Pat<(truncstorei32 I64:$val, (regPlusImm I32:$addr, imm:$off)),
(STORE32_I64 imm:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei8 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei8 I32:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE8_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei16 I32:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei16 I32:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE16_I32 tglobaladdr:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei8 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei8 I64:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE8_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei16 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei16 I64:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE16_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei32 I64:$val, (regPlusImm tglobaladdr:$off, I32:$addr)),
+def : Pat<(truncstorei32 I64:$val,
+ (regPlusGA I32:$addr,
+ (WebAssemblywrapper tglobaladdr:$off))),
(STORE32_I64 tglobaladdr:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei8 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei8 I32:$val, (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE8_I32 texternalsym:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei16 I32:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei16 I32:$val,
+ (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE16_I32 texternalsym:$off, I32:$addr, I32:$val)>;
-def : Pat<(truncstorei8 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei8 I64:$val,
+ (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE8_I64 texternalsym:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei16 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei16 I64:$val,
+ (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE16_I64 texternalsym:$off, I32:$addr, I64:$val)>;
-def : Pat<(truncstorei32 I64:$val, (regPlusImm texternalsym:$off, I32:$addr)),
+def : Pat<(truncstorei32 I64:$val,
+ (add I32:$addr,
+ (WebAssemblywrapper texternalsym:$off))),
(STORE32_I64 texternalsym:$off, I32:$addr, I64:$val)>;
// Select truncating stores with just a constant offset.
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
index a953f8247006..022a448590ec 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp
@@ -36,15 +36,17 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol(
return Printer.GetExternalSymbolSymbol(MO.getSymbolName());
}
-MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO,
- MCSymbol *Sym) const {
- assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags");
+MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(MCSymbol *Sym,
+ int64_t Offset,
+ bool IsFunc) const {
+ MCSymbolRefExpr::VariantKind VK =
+ IsFunc ? MCSymbolRefExpr::VK_WebAssembly_FUNCTION
+ : MCSymbolRefExpr::VK_None;
+ const MCExpr *Expr = MCSymbolRefExpr::create(Sym, VK, Ctx);
- const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx);
-
- int64_t Offset = MO.getOffset();
if (Offset != 0) {
- assert(!MO.isJTI() && "Unexpected offset with jump table index");
+ if (IsFunc)
+ report_fatal_error("Function addresses with offsets not supported");
Expr =
MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx);
}
@@ -64,6 +66,9 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
default:
MI->dump();
llvm_unreachable("unknown operand type");
+ case MachineOperand::MO_MachineBasicBlock:
+ MI->dump();
+ llvm_unreachable("MachineBasicBlock operand should have been rewritten");
case MachineOperand::MO_Register: {
// Ignore all implicit register operands.
if (MO.isImplicit())
@@ -89,15 +94,19 @@ void WebAssemblyMCInstLower::Lower(const MachineInstr *MI,
llvm_unreachable("unknown floating point immediate type");
break;
}
- case MachineOperand::MO_MachineBasicBlock:
- MCOp = MCOperand::createExpr(
- MCSymbolRefExpr::create(MO.getMBB()->getSymbol(), Ctx));
- break;
case MachineOperand::MO_GlobalAddress:
- MCOp = LowerSymbolOperand(MO, GetGlobalAddressSymbol(MO));
+ assert(MO.getTargetFlags() == 0 &&
+ "WebAssembly does not use target flags on GlobalAddresses");
+ MCOp = LowerSymbolOperand(GetGlobalAddressSymbol(MO), MO.getOffset(),
+ MO.getGlobal()->getValueType()->isFunctionTy());
break;
case MachineOperand::MO_ExternalSymbol:
- MCOp = LowerSymbolOperand(MO, GetExternalSymbolSymbol(MO));
+ // The target flag indicates whether this is a symbol for a
+ // variable or a function.
+ assert((MO.getTargetFlags() & -2) == 0 &&
+ "WebAssembly uses only one target flag bit on ExternalSymbols");
+ MCOp = LowerSymbolOperand(GetExternalSymbolSymbol(MO), /*Offset=*/0,
+ MO.getTargetFlags() & 1);
break;
}
diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
index 6d704704f576..ab4ba1c28d53 100644
--- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
+++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.h
@@ -31,9 +31,10 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyMCInstLower {
MCContext &Ctx;
AsmPrinter &Printer;
- MCOperand LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const;
MCSymbol *GetGlobalAddressSymbol(const MachineOperand &MO) const;
MCSymbol *GetExternalSymbolSymbol(const MachineOperand &MO) const;
+ MCOperand LowerSymbolOperand(MCSymbol *Sym, int64_t Offset,
+ bool IsFunc) const;
public:
WebAssemblyMCInstLower(MCContext &ctx, AsmPrinter &printer)
diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
index 89ef5cdb2bef..537c147e6142 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp
@@ -147,8 +147,10 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
// block boundaries, and the blocks aren't ordered so the block visitation
// order isn't significant, but we may want to change this in the future.
for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : reverse(MBB)) {
- MachineInstr *Insert = &MI;
+ // Don't use a range-based for loop, because we modify the list as we're
+ // iterating over it and the end iterator may change.
+ for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
+ MachineInstr *Insert = &*MII;
// Don't nest anything inside a phi.
if (Insert->getOpcode() == TargetOpcode::PHI)
break;
@@ -221,7 +223,7 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
Insert = Def;
}
if (AnyStackified)
- ImposeStackOrdering(&MI);
+ ImposeStackOrdering(&*MII);
}
}
diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
index dcada45f96d1..90d8dda530ba 100644
--- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp
@@ -61,17 +61,23 @@ void WebAssemblyRegisterInfo::eliminateFrameIndex(
MachineFunction &MF = *MBB.getParent();
int FrameIndex = MI.getOperand(FIOperandNum).getIndex();
const MachineFrameInfo& MFI = *MF.getFrameInfo();
- int FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
+ int64_t FrameOffset = MFI.getStackSize() + MFI.getObjectOffset(FrameIndex);
if (MI.mayLoadOrStore()) {
// If this is a load or store, make it relative to SP and fold the frame
- // offset directly in
- assert(MI.getOperand(1).getImm() == 0 &&
- "Can't eliminate FI yet if offset is already set");
- MI.getOperand(1).setImm(FrameOffset);
+ // offset directly in.
+ assert(FrameOffset >= 0 && MI.getOperand(1).getImm() >= 0);
+ int64_t Offset = MI.getOperand(1).getImm() + FrameOffset;
+
+ if (static_cast<uint64_t>(Offset) > std::numeric_limits<uint32_t>::max()) {
+ // If this happens the program is invalid, but better to error here than
+ // generate broken code.
+ report_fatal_error("Memory offset field overflow");
+ }
+ MI.getOperand(1).setImm(Offset);
MI.getOperand(2).ChangeToRegister(WebAssembly::SP32, /*IsDef=*/false);
} else {
- // Otherwise create an i32.add SP, offset and make it the operand
+ // Otherwise create an i32.add SP, offset and make it the operand.
auto &MRI = MF.getRegInfo();
const auto *TII = MF.getSubtarget().getInstrInfo();
diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
index e31ea46de9f5..b290b4bf7440 100644
--- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp
@@ -45,8 +45,9 @@ WebAssemblyTargetMachine::WebAssemblyTargetMachine(
const Target &T, const Triple &TT, StringRef CPU, StringRef FS,
const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
- : LLVMTargetMachine(T, TT.isArch64Bit() ? "e-p:64:64-i64:64-n32:64-S128"
- : "e-p:32:32-i64:64-n32:64-S128",
+ : LLVMTargetMachine(T,
+ TT.isArch64Bit() ? "e-m:e-p:64:64-i64:64-n32:64-S128"
+ : "e-m:e-p:32:32-i64:64-n32:64-S128",
TT, CPU, FS, Options, RM, CM, OL),
TLOF(make_unique<WebAssemblyTargetObjectFile>()) {
// WebAssembly type-checks expressions, but a noreturn function with a return
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 92ecde3f90d6..91b3fff05dca 100644
--- a/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -5,23 +5,6 @@
pr38151.c
va-arg-22.c
-# WebAssemblyRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator, int, unsigned int, llvm::RegScavenger *) const: Assertion `MI.getOperand(1).getImm() == 0 && "Can't eliminate FI yet if offset is already set"'
-20030313-1.c
-20030916-1.c
-20031012-1.c
-20041126-1.c
-20060420-1.c
-20071202-1.c
-20120808-1.c
-pr20527-1.c
-pr27073.c
-pr36339.c
-pr37573.c
-pr43236.c
-pr43835.c
-pr45070.c
-pr51933.c
-
# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed.
struct-ret-1.c
va-arg-11.c
@@ -140,8 +123,6 @@ pr38051.c
pr39100.c
pr39339.c
-pr40022.c
-pr40657.c
pr43987.c
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index fbec6626d99d..01e65b89f480 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -29,7 +29,7 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
CodeGenOpt::Level OptLevel);
/// This pass initializes a global base register for PIC on x86-32.
-FunctionPass* createX86GlobalBaseRegPass();
+FunctionPass *createX86GlobalBaseRegPass();
/// This pass combines multiple accesses to local-dynamic TLS variables so that
/// the TLS base address for the module is only fetched once per execution path
@@ -49,12 +49,13 @@ FunctionPass *createX86IssueVZeroUpperPass();
/// This will prevent a stall when returning on the Atom.
FunctionPass *createX86PadShortFunctions();
-/// Return a a pass that selectively replaces certain instructions (like add,
+/// Return a pass that selectively replaces certain instructions (like add,
/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA
/// instructions, in order to eliminate execution delays in some processors.
FunctionPass *createX86FixupLEAs();
-/// Return a pass that removes redundant address recalculations.
+/// Return a pass that removes redundant LEA instructions and redundant address
+/// recalculations.
FunctionPass *createX86OptimizeLEAs();
/// Return a pass that optimizes the code-size of x86 call sequences. This is
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index 54d88cbb244e..e8b96e74a7af 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -831,6 +831,12 @@ def CSR_Win64 : CalleeSavedRegs<(add RBX, RBP, RDI, RSI, R12, R13, R14, R15,
def CSR_64_TLS_Darwin : CalleeSavedRegs<(add CSR_64, RCX, RDX, RSI,
R8, R9, R10, R11)>;
+// CSRs that are handled by prologue, epilogue.
+def CSR_64_CXX_TLS_Darwin_PE : CalleeSavedRegs<(add)>;
+
+// CSRs that are handled explicitly via copies.
+def CSR_64_CXX_TLS_Darwin_ViaCopy : CalleeSavedRegs<(add CSR_64_TLS_Darwin)>;
+
// All GPRs - except r11
def CSR_64_RT_MostRegs : CalleeSavedRegs<(add CSR_64, RAX, RCX, RDX, RSI, RDI,
R8, R9, R10, RSP)>;
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 629d4d3565f2..f48b47934e03 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1002,6 +1002,9 @@ bool X86FastISel::X86SelectRet(const Instruction *I) {
if (!FuncInfo.CanLowerReturn)
return false;
+ if (TLI.supportSplitCSR(FuncInfo.MF))
+ return false;
+
CallingConv::ID CC = F.getCallingConv();
if (CC != CallingConv::C &&
CC != CallingConv::Fast &&
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d31aab0fa141..1ec93b5f2d23 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -265,7 +265,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Without SSE, i64->f64 goes through memory.
setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
}
- }
+ } else if (!Subtarget->is64Bit())
+ setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
@@ -2310,6 +2311,18 @@ X86TargetLowering::LowerReturn(SDValue Chain,
DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
}
+ const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *I =
+ TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
+ if (I) {
+ for (; *I; ++I) {
+ if (X86::GR64RegClass.contains(*I))
+ RetOps.push_back(DAG.getRegister(*I, MVT::i64));
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+ }
+ }
+
RetOps[0] = Chain; // Update chain.
// Add the flag if we have it.
@@ -3907,6 +3920,7 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::PSHUFHW:
case X86ISD::PSHUFLW:
case X86ISD::SHUFP:
+ case X86ISD::INSERTPS:
case X86ISD::PALIGNR:
case X86ISD::MOVLHPS:
case X86ISD::MOVLHPD:
@@ -4157,6 +4171,35 @@ static bool hasFPCMov(unsigned X86CC) {
}
}
+
+bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
+ const CallInst &I,
+ unsigned Intrinsic) const {
+
+ const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
+ if (!IntrData)
+ return false;
+
+ switch (IntrData->Type) {
+ case LOADA:
+ case LOADU: {
+ Info.opc = ISD::INTRINSIC_W_CHAIN;
+ Info.memVT = MVT::getVT(I.getType());
+ Info.ptrVal = I.getArgOperand(0);
+ Info.offset = 0;
+ Info.align = (IntrData->Type == LOADA ? Info.memVT.getSizeInBits()/8 : 1);
+ Info.vol = false;
+ Info.readMem = true;
+ Info.writeMem = false;
+ return true;
+ }
+ default:
+ break;
+ }
+
+ return false;
+}
+
/// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
@@ -4743,8 +4786,7 @@ static SDValue getShuffleVectorZeroOrUndef(SDValue V2, unsigned Idx,
/// uses one source. Note that this will set IsUnary for shuffles which use a
/// single input multiple times, and in those cases it will
/// adjust the mask to only have indices within that single input.
-/// FIXME: Add support for Decode*Mask functions that return SM_SentinelZero.
-static bool getTargetShuffleMask(SDNode *N, MVT VT,
+static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
SmallVectorImpl<int> &Mask, bool &IsUnary) {
unsigned NumElems = VT.getVectorNumElements();
SDValue ImmN;
@@ -4761,6 +4803,11 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
DecodeSHUFPMask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
+ case X86ISD::INSERTPS:
+ ImmN = N->getOperand(N->getNumOperands()-1);
+ DecodeINSERTPSMask(cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
+ break;
case X86ISD::UNPCKH:
DecodeUNPCKHMask(VT, Mask);
IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
@@ -4870,10 +4917,7 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
case X86ISD::VPERM2X128:
ImmN = N->getOperand(N->getNumOperands()-1);
DecodeVPERM2X128Mask(VT, cast<ConstantSDNode>(ImmN)->getZExtValue(), Mask);
- // Mask only contains negative index if an element is zero.
- if (std::any_of(Mask.begin(), Mask.end(),
- [](int M){ return M == SM_SentinelZero; }))
- return false;
+ IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1);
break;
case X86ISD::MOVSLDUP:
DecodeMOVSLDUPMask(VT, Mask);
@@ -5008,6 +5052,12 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT,
if (Mask.empty())
return false;
+ // Check if we're getting a shuffle mask with zero'd elements.
+ if (!AllowSentinelZero)
+ if (std::any_of(Mask.begin(), Mask.end(),
+ [](int M){ return M == SM_SentinelZero; }))
+ return false;
+
// If we have a fake unary shuffle, the shuffle mask is spread across two
// inputs that are actually the same node. Re-map the mask to always point
// into the first input.
@@ -5046,19 +5096,19 @@ static SDValue getShuffleScalarElt(SDNode *N, unsigned Index, SelectionDAG &DAG,
// Recurse into target specific vector shuffles to find scalars.
if (isTargetShuffle(Opcode)) {
MVT ShufVT = V.getSimpleValueType();
- unsigned NumElems = ShufVT.getVectorNumElements();
+ int NumElems = (int)ShufVT.getVectorNumElements();
SmallVector<int, 16> ShuffleMask;
bool IsUnary;
- if (!getTargetShuffleMask(N, ShufVT, ShuffleMask, IsUnary))
+ if (!getTargetShuffleMask(N, ShufVT, false, ShuffleMask, IsUnary))
return SDValue();
int Elt = ShuffleMask[Index];
- if (Elt < 0)
+ if (Elt == SM_SentinelUndef)
return DAG.getUNDEF(ShufVT.getVectorElementType());
- SDValue NewV = (Elt < (int)NumElems) ? N->getOperand(0)
- : N->getOperand(1);
+ assert(0 <= Elt && Elt < (2*NumElems) && "Shuffle index out of range");
+ SDValue NewV = (Elt < NumElems) ? N->getOperand(0) : N->getOperand(1);
return getShuffleScalarElt(NewV.getNode(), Elt % NumElems, DAG,
Depth+1);
}
@@ -8165,6 +8215,13 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
DL, VT, V.getOperand(0), BroadcastIdx, Subtarget, DAG))
return TruncBroadcast;
+ MVT BroadcastVT = VT;
+
+ // Peek through any bitcast (only useful for loads).
+ SDValue BC = V;
+ while (BC.getOpcode() == ISD::BITCAST)
+ BC = BC.getOperand(0);
+
// Also check the simpler case, where we can directly reuse the scalar.
if (V.getOpcode() == ISD::BUILD_VECTOR ||
(V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0)) {
@@ -8174,13 +8231,17 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
// Only AVX2 has register broadcasts.
if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V))
return SDValue();
- } else if (MayFoldLoad(V) && !cast<LoadSDNode>(V)->isVolatile()) {
+ } else if (MayFoldLoad(BC) && !cast<LoadSDNode>(BC)->isVolatile()) {
+ // 32-bit targets need to load i64 as a f64 and then bitcast the result.
+ if (!Subtarget->is64Bit() && VT.getScalarType() == MVT::i64)
+ BroadcastVT = MVT::getVectorVT(MVT::f64, VT.getVectorNumElements());
+
// If we are broadcasting a load that is only used by the shuffle
// then we can reduce the vector load to the broadcasted scalar load.
- LoadSDNode *Ld = cast<LoadSDNode>(V);
+ LoadSDNode *Ld = cast<LoadSDNode>(BC);
SDValue BaseAddr = Ld->getOperand(1);
EVT AddrVT = BaseAddr.getValueType();
- EVT SVT = VT.getScalarType();
+ EVT SVT = BroadcastVT.getScalarType();
unsigned Offset = BroadcastIdx * SVT.getStoreSize();
SDValue NewAddr = DAG.getNode(
ISD::ADD, DL, AddrVT, BaseAddr,
@@ -8194,7 +8255,8 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V,
return SDValue();
}
- return DAG.getNode(X86ISD::VBROADCAST, DL, VT, V);
+ V = DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, V);
+ return DAG.getBitcast(VT, V);
}
// Check for whether we can use INSERTPS to perform the shuffle. We only use
@@ -12474,8 +12536,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
// location.
SDValue Chain = DAG.getEntryNode();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, DL, true), DL);
SDValue Args[] = { Chain, Offset };
Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args);
+ Chain =
+ DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true),
+ DAG.getIntPtrConstant(0, DL, true), SDValue(), DL);
// TLSCALL will be codegen'ed as call. Inform MFI that function has calls.
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
@@ -12648,13 +12714,21 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op,
return Op;
}
+ SDValue ValueToStore = Op.getOperand(0);
+ if (SrcVT == MVT::i64 && isScalarFPTypeInSSEReg(Op.getValueType()) &&
+ !Subtarget->is64Bit())
+ // Bitcasting to f64 here allows us to do a single 64-bit store from
+ // an SSE register, avoiding the store forwarding penalty that would come
+ // with two 32-bit stores.
+ ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
auto PtrVT = getPointerTy(MF.getDataLayout());
int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Chain = DAG.getStore(
- DAG.getEntryNode(), dl, Op.getOperand(0), StackSlot,
+ DAG.getEntryNode(), dl, ValueToStore, StackSlot,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), false,
false, 0);
return BuildFILD(Op, SrcVT, Chain, StackSlot, DAG);
@@ -13027,7 +13101,13 @@ SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op,
}
assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP");
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
+ SDValue ValueToStore = Op.getOperand(0);
+ if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget->is64Bit())
+ // Bitcasting to f64 here allows us to do a single 64-bit store from
+ // an SSE register, avoiding the store forwarding penalty that would come
+ // with two 32-bit stores.
+ ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, ValueToStore,
StackSlot, MachinePointerInfo(),
false, false, 0);
// For i64 source, we need to add the appropriate power of 2 if the input
@@ -17487,7 +17567,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
return DAG.getMergeValues(Results, dl);
}
case COMPRESS_TO_MEM: {
- SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue DataToCompress = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
@@ -17513,7 +17592,6 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
case TRUNCATE_TO_MEM_VI32:
return LowerINTRINSIC_TRUNCATE_TO_MEM(Op, DAG, MVT::i32);
case EXPAND_FROM_MEM: {
- SDLoc dl(Op);
SDValue Mask = Op.getOperand(4);
SDValue PassThru = Op.getOperand(3);
SDValue Addr = Op.getOperand(2);
@@ -17533,6 +17611,25 @@ static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget *Subtarget,
Mask, PassThru, Subtarget, DAG), Chain};
return DAG.getMergeValues(Results, dl);
}
+ case LOADU:
+ case LOADA: {
+ SDValue Mask = Op.getOperand(4);
+ SDValue PassThru = Op.getOperand(3);
+ SDValue Addr = Op.getOperand(2);
+ SDValue Chain = Op.getOperand(0);
+ MVT VT = Op.getSimpleValueType();
+
+ MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op);
+ assert(MemIntr && "Expected MemIntrinsicSDNode!");
+
+ if (isAllOnesConstant(Mask)) // return just a load
+ return DAG.getLoad(VT, dl, Chain, Addr, MemIntr->getMemOperand());
+
+ MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements());
+ SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl);
+ return DAG.getMaskedLoad(VT, dl, Chain, Addr, VMask, PassThru, VT,
+ MemIntr->getMemOperand(), ISD::NON_EXTLOAD);
+ }
}
}
@@ -19512,24 +19609,37 @@ static SDValue LowerBITCAST(SDValue Op, const X86Subtarget *Subtarget,
MVT SrcVT = Op.getOperand(0).getSimpleValueType();
MVT DstVT = Op.getSimpleValueType();
- if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) {
+ if (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 ||
+ SrcVT == MVT::i64) {
assert(Subtarget->hasSSE2() && "Requires at least SSE2!");
if (DstVT != MVT::f64)
// This conversion needs to be expanded.
return SDValue();
- SDValue InVec = Op->getOperand(0);
- SDLoc dl(Op);
- unsigned NumElts = SrcVT.getVectorNumElements();
- MVT SVT = SrcVT.getVectorElementType();
-
- // Widen the vector in input in the case of MVT::v2i32.
- // Example: from MVT::v2i32 to MVT::v4i32.
+ SDValue Op0 = Op->getOperand(0);
SmallVector<SDValue, 16> Elts;
- for (unsigned i = 0, e = NumElts; i != e; ++i)
- Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, InVec,
- DAG.getIntPtrConstant(i, dl)));
-
+ SDLoc dl(Op);
+ unsigned NumElts;
+ MVT SVT;
+ if (SrcVT.isVector()) {
+ NumElts = SrcVT.getVectorNumElements();
+ SVT = SrcVT.getVectorElementType();
+
+ // Widen the vector in input in the case of MVT::v2i32.
+ // Example: from MVT::v2i32 to MVT::v4i32.
+ for (unsigned i = 0, e = NumElts; i != e; ++i)
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, SVT, Op0,
+ DAG.getIntPtrConstant(i, dl)));
+ } else {
+ assert(SrcVT == MVT::i64 && !Subtarget->is64Bit() &&
+ "Unexpected source type in LowerBITCAST");
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+ DAG.getIntPtrConstant(0, dl)));
+ Elts.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op0,
+ DAG.getIntPtrConstant(1, dl)));
+ NumElts = 2;
+ SVT = MVT::i32;
+ }
// Explicitly mark the extra elements as Undef.
Elts.append(NumElts, DAG.getUNDEF(SVT));
@@ -20685,6 +20795,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::VSHLI: return "X86ISD::VSHLI";
case X86ISD::VSRLI: return "X86ISD::VSRLI";
case X86ISD::VSRAI: return "X86ISD::VSRAI";
+ case X86ISD::VROTLI: return "X86ISD::VROTLI";
+ case X86ISD::VROTRI: return "X86ISD::VROTRI";
case X86ISD::CMPP: return "X86ISD::CMPP";
case X86ISD::PCMPEQ: return "X86ISD::PCMPEQ";
case X86ISD::PCMPGT: return "X86ISD::PCMPGT";
@@ -23184,7 +23296,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
return false;
SmallVector<int, 16> OpMask;
bool IsUnary;
- bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, OpMask, IsUnary);
+ bool HaveMask = getTargetShuffleMask(Op.getNode(), VT, true, OpMask, IsUnary);
// We only can combine unary shuffles which we can decode the mask for.
if (!HaveMask || !IsUnary)
return false;
@@ -23281,7 +23393,7 @@ static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) {
MVT VT = N.getSimpleValueType();
SmallVector<int, 4> Mask;
bool IsUnary;
- bool HaveMask = getTargetShuffleMask(N.getNode(), VT, Mask, IsUnary);
+ bool HaveMask = getTargetShuffleMask(N.getNode(), VT, false, Mask, IsUnary);
(void)HaveMask;
assert(HaveMask);
@@ -23854,6 +23966,7 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
SDValue InVec = N->getOperand(0);
SDValue EltNo = N->getOperand(1);
+ EVT EltVT = N->getValueType(0);
if (!isa<ConstantSDNode>(EltNo))
return SDValue();
@@ -23882,14 +23995,22 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
SmallVector<int, 16> ShuffleMask;
bool UnaryShuffle;
- if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(),
+ if (!getTargetShuffleMask(InVec.getNode(), CurrentVT.getSimpleVT(), true,
ShuffleMask, UnaryShuffle))
return SDValue();
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = CurrentVT.getVectorNumElements();
int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
- int Idx = (Elt > (int)NumElems) ? -1 : ShuffleMask[Elt];
+ int Idx = (Elt > (int)NumElems) ? SM_SentinelUndef : ShuffleMask[Elt];
+
+ if (Idx == SM_SentinelZero)
+ return EltVT.isInteger() ? DAG.getConstant(0, SDLoc(N), EltVT)
+ : DAG.getConstantFP(+0.0, SDLoc(N), EltVT);
+ if (Idx == SM_SentinelUndef)
+ return DAG.getUNDEF(EltVT);
+
+ assert(0 <= Idx && Idx < (int)(2 * NumElems) && "Shuffle index out of range");
SDValue LdNode = (Idx < (int)NumElems) ? InVec.getOperand(0)
: InVec.getOperand(1);
@@ -23914,7 +24035,6 @@ static SDValue XFormVExtractWithShuffleIntoLoad(SDNode *N, SelectionDAG &DAG,
if (!LN0 ||!LN0->hasNUsesOfValue(AllowedUses, 0) || LN0->isVolatile())
return SDValue();
- EVT EltVT = N->getValueType(0);
// If there's a bitcast before the shuffle, check if the load type and
// alignment is valid.
unsigned Align = LN0->getAlignment();
@@ -27233,6 +27353,32 @@ static SDValue promoteSextBeforeAddNSW(SDNode *Sext, SelectionDAG &DAG,
return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewSext, NewConstant, &Flags);
}
+/// (i8,i32 {s/z}ext ({s/u}divrem (i8 x, i8 y)) ->
+/// (i8,i32 ({s/u}divrem_sext_hreg (i8 x, i8 y)
+/// This exposes the {s/z}ext to the sdivrem lowering, so that it directly
+/// extends from AH (which we otherwise need to do contortions to access).
+static SDValue getDivRem8(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ auto OpcodeN = N->getOpcode();
+ auto OpcodeN0 = N0.getOpcode();
+ if (!((OpcodeN == ISD::SIGN_EXTEND && OpcodeN0 == ISD::SDIVREM) ||
+ (OpcodeN == ISD::ZERO_EXTEND && OpcodeN0 == ISD::UDIVREM)))
+ return SDValue();
+
+ EVT VT = N->getValueType(0);
+ EVT InVT = N0.getValueType();
+ if (N0.getResNo() != 1 || InVT != MVT::i8 || VT != MVT::i32)
+ return SDValue();
+
+ SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
+ auto DivRemOpcode = OpcodeN0 == ISD::SDIVREM ? X86ISD::SDIVREM8_SEXT_HREG
+ : X86ISD::UDIVREM8_ZEXT_HREG;
+ SDValue R = DAG.getNode(DivRemOpcode, SDLoc(N), NodeTys, N0.getOperand(0),
+ N0.getOperand(1));
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
+ return R.getValue(1);
+}
+
static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget *Subtarget) {
@@ -27243,18 +27389,8 @@ static SDValue PerformSExtCombine(SDNode *N, SelectionDAG &DAG,
EVT InSVT = InVT.getScalarType();
SDLoc DL(N);
- // (i8,i32 sext (sdivrem (i8 x, i8 y)) ->
- // (i8,i32 (sdivrem_sext_hreg (i8 x, i8 y)
- // This exposes the sext to the sdivrem lowering, so that it directly extends
- // from AH (which we otherwise need to do contortions to access).
- if (N0.getOpcode() == ISD::SDIVREM && N0.getResNo() == 1 &&
- InVT == MVT::i8 && VT == MVT::i32) {
- SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
- SDValue R = DAG.getNode(X86ISD::SDIVREM8_SEXT_HREG, DL, NodeTys,
- N0.getOperand(0), N0.getOperand(1));
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
- return R.getValue(1);
- }
+ if (SDValue DivRem8 = getDivRem8(N, DAG))
+ return DivRem8;
if (!DCI.isBeforeLegalizeOps()) {
if (InVT == MVT::i1) {
@@ -27413,19 +27549,8 @@ static SDValue PerformZExtCombine(SDNode *N, SelectionDAG &DAG,
if (SDValue R = WidenMaskArithmetic(N, DAG, DCI, Subtarget))
return R;
- // (i8,i32 zext (udivrem (i8 x, i8 y)) ->
- // (i8,i32 (udivrem_zext_hreg (i8 x, i8 y)
- // This exposes the zext to the udivrem lowering, so that it directly extends
- // from AH (which we otherwise need to do contortions to access).
- if (N0.getOpcode() == ISD::UDIVREM &&
- N0.getResNo() == 1 && N0.getValueType() == MVT::i8 &&
- VT == MVT::i32) {
- SDVTList NodeTys = DAG.getVTList(MVT::i8, VT);
- SDValue R = DAG.getNode(X86ISD::UDIVREM8_ZEXT_HREG, dl, NodeTys,
- N0.getOperand(0), N0.getOperand(1));
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(0), R.getValue(0));
- return R.getValue(1);
- }
+ if (SDValue DivRem8 = getDivRem8(N, DAG))
+ return DivRem8;
return SDValue();
}
@@ -27923,7 +28048,6 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget);
case X86ISD::BT: return PerformBTCombine(N, DAG, DCI);
case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG);
-// TODO: refactor the [SU]DIVREM8_[SZ]EXT_HREG code so that it's not duplicated.
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND: return PerformZExtCombine(N, DAG, DCI, Subtarget);
case ISD::SIGN_EXTEND: return PerformSExtCombine(N, DAG, DCI, Subtarget);
@@ -28763,3 +28887,51 @@ bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeSet Attr) const {
Attribute::MinSize);
return OptSize && !VT.isVector();
}
+
+void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const {
+ if (!Subtarget->is64Bit())
+ return;
+
+ // Update IsSplitCSR in X86MachineFunctionInfo.
+ X86MachineFunctionInfo *AFI =
+ Entry->getParent()->getInfo<X86MachineFunctionInfo>();
+ AFI->setIsSplitCSR(true);
+}
+
+void X86TargetLowering::insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const {
+ const X86RegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent());
+ if (!IStart)
+ return;
+
+ const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo();
+ for (const MCPhysReg *I = IStart; *I; ++I) {
+ const TargetRegisterClass *RC = nullptr;
+ if (X86::GR64RegClass.contains(*I))
+ RC = &X86::GR64RegClass;
+ else
+ llvm_unreachable("Unexpected register class in CSRsViaCopy!");
+
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ // Create copy from CSR to a virtual register.
+ // FIXME: this currently does not emit CFI pseudo-instructions, it works
+ // fine for CXX_FAST_TLS since the C++-style TLS access functions should be
+ // nounwind. If we want to generalize this later, we may need to emit
+ // CFI pseudo-instructions.
+ assert(Entry->getParent()->getFunction()->hasFnAttribute(
+ Attribute::NoUnwind) &&
+ "Function should be nounwind in insertCopiesSplitCSR!");
+ Entry->addLiveIn(*I);
+ BuildMI(*Entry, Entry->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ NewVR)
+ .addReg(*I);
+
+ for (auto *Exit : Exits)
+ BuildMI(*Exit, Exit->begin(), DebugLoc(), TII->get(TargetOpcode::COPY),
+ *I)
+ .addReg(NewVR);
+ }
+}
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 8bb0e5f8bd36..0ab786e08e02 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -316,6 +316,9 @@ namespace llvm {
// Vector shift elements by immediate
VSHLI, VSRLI, VSRAI,
+ // Bit rotate by immediate
+ VROTLI, VROTRI,
+
// Vector packed double/float comparison.
CMPP,
@@ -837,6 +840,13 @@ namespace llvm {
/// from i32 to i8 but not from i32 to i16.
bool isNarrowingProfitable(EVT VT1, EVT VT2) const override;
+ /// Given an intrinsic, checks if on the target the intrinsic will need to map
+ /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
+ /// true and stores the intrinsic information into the IntrinsicInfo that was
+ /// passed to the function.
+ bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
+ unsigned Intrinsic) const override;
+
/// Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
@@ -1057,6 +1067,15 @@ namespace llvm {
const SmallVectorImpl<SDValue> &OutVals,
SDLoc dl, SelectionDAG &DAG) const override;
+ bool supportSplitCSR(MachineFunction *MF) const override {
+ return MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getFunction()->hasFnAttribute(Attribute::NoUnwind);
+ }
+ void initializeSplitCSR(MachineBasicBlock *Entry) const override;
+ void insertCopiesSplitCSR(
+ MachineBasicBlock *Entry,
+ const SmallVectorImpl<MachineBasicBlock *> &Exits) const override;
+
bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const override;
bool mayBeEmittedAsTailCall(CallInst *CI) const override;
diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td
index 0a27c33f033e..49be64883939 100644
--- a/lib/Target/X86/X86InstrAVX512.td
+++ b/lib/Target/X86/X86InstrAVX512.td
@@ -188,7 +188,7 @@ multiclass AVX512_maskable_custom<bits<8> O, Format F,
let isCommutable = IsCommutable in
def NAME: AVX512<O, F, Outs, Ins,
OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
- "$dst , "#IntelSrcAsm#"}",
+ "$dst, "#IntelSrcAsm#"}",
Pattern, itin>;
// Prefer over VMOV*rrk Pat<>
@@ -323,18 +323,16 @@ multiclass AVX512_maskable_custom_cmp<bits<8> O, Format F,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
list<dag> Pattern,
- list<dag> MaskingPattern,
- string Round = "",
- InstrItinClass itin = NoItinerary> {
+ list<dag> MaskingPattern> {
def NAME: AVX512<O, F, Outs, Ins,
- OpcodeStr#"\t{"#AttSrcAsm#", $dst "#Round#"|"#
- "$dst "#Round#", "#IntelSrcAsm#"}",
- Pattern, itin>;
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst|"#
+ "$dst, "#IntelSrcAsm#"}",
+ Pattern, NoItinerary>;
def NAME#k: AVX512<O, F, Outs, MaskingIns,
- OpcodeStr#"\t{"#Round#AttSrcAsm#", $dst {${mask}}|"#
- "$dst {${mask}}, "#IntelSrcAsm#Round#"}",
- MaskingPattern, itin>, EVEX_K;
+ OpcodeStr#"\t{"#AttSrcAsm#", $dst {${mask}}|"#
+ "$dst {${mask}}, "#IntelSrcAsm#"}",
+ MaskingPattern, NoItinerary>, EVEX_K;
}
multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
@@ -342,33 +340,27 @@ multiclass AVX512_maskable_common_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Ins, dag MaskingIns,
string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS, dag MaskingRHS,
- string Round = "",
- InstrItinClass itin = NoItinerary> :
+ dag RHS, dag MaskingRHS> :
AVX512_maskable_custom_cmp<O, F, Outs, Ins, MaskingIns, OpcodeStr,
AttSrcAsm, IntelSrcAsm,
[(set _.KRC:$dst, RHS)],
- [(set _.KRC:$dst, MaskingRHS)],
- Round, NoItinerary>;
+ [(set _.KRC:$dst, MaskingRHS)]>;
multiclass AVX512_maskable_cmp<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm,
- dag RHS, string Round = "",
- InstrItinClass itin = NoItinerary> :
+ dag RHS> :
AVX512_maskable_common_cmp<O, F, _, Outs, Ins,
!con((ins _.KRCWM:$mask), Ins),
OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS,
- (and _.KRCWM:$mask, RHS),
- Round, itin>;
+ (and _.KRCWM:$mask, RHS)>;
multiclass AVX512_maskable_cmp_alt<bits<8> O, Format F, X86VectorVTInfo _,
dag Outs, dag Ins, string OpcodeStr,
string AttSrcAsm, string IntelSrcAsm> :
AVX512_maskable_custom_cmp<O, F, Outs,
Ins, !con((ins _.KRCWM:$mask),Ins), OpcodeStr,
- AttSrcAsm, IntelSrcAsm,
- [],[],"", NoItinerary>;
+ AttSrcAsm, IntelSrcAsm, [],[]>;
// Bitcasts between 512-bit vector types. Return the original type since
// no instruction is needed for the conversion
@@ -1294,7 +1286,7 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
def rr : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"),
+ "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
[]>, EVEX_4V;
def rrk : AVX5128I<opc, MRMSrcReg, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2),
@@ -1311,7 +1303,7 @@ multiclass avx512_blendmask<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {
def rm : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.RC:$src1, _.MemOp:$src2),
!strconcat(OpcodeStr,
- "\t{$src2, $src1, ${dst} |${dst}, $src1, $src2}"),
+ "\t{$src2, $src1, ${dst}|${dst}, $src1, $src2}"),
[]>, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>;
def rmk : AVX5128I<opc, MRMSrcMem, (outs _.RC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, _.MemOp:$src2),
@@ -1426,7 +1418,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
- "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+ "{sae}, $src2, $src1", "$src1, $src2, {sae}",
(OpNodeRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
@@ -1449,7 +1441,7 @@ multiclass avx512_cmp_scalar<X86VectorVTInfo _, SDNode OpNode, SDNode OpNodeRnd>
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
- "$cc,{sae}, $src2, $src1","$src1, $src2,{sae}, $cc">,
+ "$cc, {sae}, $src2, $src1","$src1, $src2, {sae}, $cc">,
EVEX_4V, EVEX_B;
}// let isAsmParserOnly = 1, hasSideEffects = 0
@@ -1831,7 +1823,7 @@ multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
defm rrib : AVX512_maskable_cmp<0xC2, MRMSrcReg, _,
(outs _.KRC:$dst),(ins _.RC:$src1, _.RC:$src2, AVXCC:$cc),
"vcmp${cc}"#_.Suffix,
- "{sae}, $src2, $src1", "$src1, $src2,{sae}",
+ "{sae}, $src2, $src1", "$src1, $src2, {sae}",
(X86cmpmRnd (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
imm:$cc,
@@ -1842,8 +1834,8 @@ multiclass avx512_vcmp_sae<X86VectorVTInfo _> {
(outs _.KRC:$dst),
(ins _.RC:$src1, _.RC:$src2, u8imm:$cc),
"vcmp"#_.Suffix,
- "$cc,{sae}, $src2, $src1",
- "$src1, $src2,{sae}, $cc">, EVEX_B;
+ "$cc, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $cc">, EVEX_B;
}
}
@@ -1889,13 +1881,13 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
let Predicates = [prd] in {
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),//_.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2)))], NoItinerary>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#
- "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
@@ -1903,14 +1895,14 @@ multiclass avx512_scalar_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
- "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2)))], NoItinerary>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##
- "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
@@ -1925,13 +1917,13 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
X86VectorVTInfo _, string mem, string broadcast>{
def rr : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ OpcodeStr##_.Suffix#"\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2)))], NoItinerary>;
def rrk : AVX512<opc, MRMSrcReg, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.RC:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix#
- "\t{$src2, $src1, $dst {${mask}}| $dst {${mask}}, $src1, $src2}",
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask,
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
@@ -1939,21 +1931,21 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
def rm : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
- "\t{$src2, $src1, $dst | $dst, $src1, $src2}",
+ "\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set _.KRC:$dst,(OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2)))], NoItinerary>;
def rmk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.MemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##mem#
- "\t{$src2, $src1, $dst {${mask}} | $dst {${mask}}, $src1, $src2}",
+ "\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}",
[(set _.KRC:$dst, (or _.KRCWM:$mask, (OpNode
(_.VT (bitconvert (_.LdFrag addr:$src1))),
(i32 imm:$src2))))], NoItinerary>, EVEX_K;
def rmb : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
- _.BroadcastStr##", $dst | $dst, ${src1}"
+ _.BroadcastStr##", $dst|$dst, ${src1}"
##_.BroadcastStr##", $src2}",
[(set _.KRC:$dst,(OpNode
(_.VT (X86VBroadcast
@@ -1962,7 +1954,7 @@ multiclass avx512_vector_fpclass<bits<8> opc, string OpcodeStr, SDNode OpNode,
def rmbk : AVX512<opc, MRMSrcMem, (outs _.KRC:$dst),
(ins _.KRCWM:$mask, _.ScalarMemOp:$src1, i32u8imm:$src2),
OpcodeStr##_.Suffix##broadcast##"\t{$src2, ${src1}"##
- _.BroadcastStr##", $dst {${mask}} | $dst {${mask}}, ${src1}"##
+ _.BroadcastStr##", $dst {${mask}}|$dst {${mask}}, ${src1}"##
_.BroadcastStr##", $src2}",
[(set _.KRC:$dst,(or _.KRCWM:$mask, (OpNode
(_.VT (X86VBroadcast
@@ -2715,30 +2707,6 @@ defm VMOVUPD : avx512_load_vl<0x10, "vmovupd", avx512vl_f64_info, HasAVX512, 0>,
avx512_store_vl<0x11, "vmovupd", avx512vl_f64_info, HasAVX512>,
PD, VEX_W, EVEX_CD8<64, CD8VF>;
-def: Pat<(v8f64 (int_x86_avx512_mask_loadu_pd_512 addr:$ptr,
- (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
- (VMOVUPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
-
-def: Pat<(v16f32 (int_x86_avx512_mask_loadu_ps_512 addr:$ptr,
- (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
- (VMOVUPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
-
-def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
- (bc_v8f64 (v16i32 immAllZerosV)), GR8:$mask)),
- (VMOVAPDZrmkz (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), addr:$ptr)>;
-
-def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
- (bc_v16f32 (v16i32 immAllZerosV)), GR16:$mask)),
- (VMOVAPSZrmkz (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), addr:$ptr)>;
-
-def: Pat<(v8f64 (int_x86_avx512_mask_load_pd_512 addr:$ptr,
- (bc_v8f64 (v16i32 immAllZerosV)), (i8 -1))),
- (VMOVAPDZrm addr:$ptr)>;
-
-def: Pat<(v16f32 (int_x86_avx512_mask_load_ps_512 addr:$ptr,
- (bc_v16f32 (v16i32 immAllZerosV)), (i16 -1))),
- (VMOVAPSZrm addr:$ptr)>;
-
def: Pat<(int_x86_avx512_mask_storeu_ps_512 addr:$ptr, (v16f32 VR512:$src),
GR16:$mask),
(VMOVUPSZmrk addr:$ptr, (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)),
@@ -4088,8 +4056,8 @@ defm VPSLL : avx512_shift_rmi_dq<0x72, 0x73, MRM6r, MRM6m, "vpsll", X86vshli>,
defm VPSRA : avx512_shift_rmi_dq<0x72, 0x72, MRM4r, MRM4m, "vpsra", X86vsrai>,
avx512_shift_rmi_w<0x71, MRM4r, MRM4m, "vpsraw", X86vsrai>, AVX512BIi8Base, EVEX_4V;
-defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", rotr>, AVX512BIi8Base, EVEX_4V;
-defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", rotl>, AVX512BIi8Base, EVEX_4V;
+defm VPROR : avx512_shift_rmi_dq<0x72, 0x72, MRM0r, MRM0m, "vpror", X86vrotri>, AVX512BIi8Base, EVEX_4V;
+defm VPROL : avx512_shift_rmi_dq<0x72, 0x72, MRM1r, MRM1m, "vprol", X86vrotli>, AVX512BIi8Base, EVEX_4V;
defm VPSLL : avx512_shift_types<0xF2, 0xF3, 0xF1, "vpsll", X86vshl>;
defm VPSRA : avx512_shift_types<0xE2, 0xE2, 0xE1, "vpsra", X86vsra>;
@@ -6057,12 +6025,12 @@ multiclass avx512_trunc_common<bits<8> opc, string OpcodeStr, SDNode OpNode,
let mayStore = 1 in {
def mr : AVX512XS8I<opc, MRMDestMem, (outs),
(ins x86memop:$dst, SrcInfo.RC:$src),
- OpcodeStr # "\t{$src, $dst |$dst, $src}",
+ OpcodeStr # "\t{$src, $dst|$dst, $src}",
[]>, EVEX;
def mrk : AVX512XS8I<opc, MRMDestMem, (outs),
(ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+ OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
[]>, EVEX, EVEX_K;
}//mayStore = 1
}
@@ -6666,12 +6634,12 @@ multiclass compress_by_vec_width<bits<8> opc, X86VectorVTInfo _,
let mayStore = 1 in {
def mr : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.RC:$src),
- OpcodeStr # "\t{$src, $dst |$dst, $src}",
+ OpcodeStr # "\t{$src, $dst|$dst, $src}",
[]>, EVEX_CD8<_.EltSize, CD8VT1>;
def mrk : AVX5128I<opc, MRMDestMem, (outs),
(ins _.MemOp:$dst, _.KRCWM:$mask, _.RC:$src),
- OpcodeStr # "\t{$src, $dst {${mask}} |$dst {${mask}}, $src}",
+ OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}",
[(store (_.VT (vselect _.KRCWM:$mask,
(_.VT (X86compress _.RC:$src)), _.ImmAllZerosV)),
addr:$dst)]>,
@@ -6766,7 +6734,7 @@ multiclass avx512_unary_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, i32u8imm:$src2),
- OpcodeStr##_.Suffix, "$src2,{sae}, $src1",
+ OpcodeStr##_.Suffix, "$src2, {sae}, $src1",
"$src1, {sae}, $src2",
(OpNode (_.VT _.RC:$src1),
(i32 imm:$src2),
@@ -6895,8 +6863,8 @@ multiclass avx512_fp_sae_packed_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _>{
defm rrib : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
- OpcodeStr, "$src3,{sae}, $src2, $src1",
- "$src1, $src2,{sae}, $src3",
+ OpcodeStr, "$src3, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3),
@@ -6907,8 +6875,8 @@ multiclass avx512_fp_sae_scalar_imm<bits<8> opc, string OpcodeStr,
SDNode OpNode, X86VectorVTInfo _> {
defm NAME#rrib : AVX512_maskable_scalar<opc, MRMSrcReg, _, (outs _.RC:$dst),
(ins _.RC:$src1, _.RC:$src2, i32u8imm:$src3),
- OpcodeStr, "$src3,{sae}, $src2, $src1",
- "$src1, $src2,{sae}, $src3",
+ OpcodeStr, "$src3, {sae}, $src2, $src1",
+ "$src1, $src2, {sae}, $src3",
(OpNode (_.VT _.RC:$src1),
(_.VT _.RC:$src2),
(i32 imm:$src3),
diff --git a/lib/Target/X86/X86InstrExtension.td b/lib/Target/X86/X86InstrExtension.td
index c4b2d6d3bb75..af43d9f53325 100644
--- a/lib/Target/X86/X86InstrExtension.td
+++ b/lib/Target/X86/X86InstrExtension.td
@@ -98,22 +98,22 @@ let hasSideEffects = 0, isCodeGenOnly = 1 in {
def MOVZX32_NOREXrr8 : I<0xB6, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- [], IIC_MOVZX>, TB, Sched<[WriteALU]>;
+ [], IIC_MOVZX>, TB, OpSize32, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVZX32_NOREXrm8 : I<0xB6, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movz{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- [], IIC_MOVZX>, TB, Sched<[WriteALULd]>;
+ [], IIC_MOVZX>, TB, OpSize32, Sched<[WriteALULd]>;
def MOVSX32_NOREXrr8 : I<0xBE, MRMSrcReg,
(outs GR32_NOREX:$dst), (ins GR8_NOREX:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- [], IIC_MOVSX>, TB, Sched<[WriteALU]>;
+ [], IIC_MOVSX>, TB, OpSize32, Sched<[WriteALU]>;
let mayLoad = 1 in
def MOVSX32_NOREXrm8 : I<0xBE, MRMSrcMem,
(outs GR32_NOREX:$dst), (ins i8mem_NOREX:$src),
"movs{bl|x}\t{$src, $dst|$dst, $src} # NOREX",
- [], IIC_MOVSX>, TB, Sched<[WriteALULd]>;
+ [], IIC_MOVSX>, TB, OpSize32, Sched<[WriteALULd]>;
}
// MOVSX64rr8 always has a REX prefix and it has an 8-bit register
@@ -146,18 +146,22 @@ def MOVSX64rm32: RI<0x63, MRMSrcMem, (outs GR64:$dst), (ins i32mem:$src),
Sched<[WriteALULd]>, Requires<[In64BitMode]>;
// movzbq and movzwq encodings for the disassembler
-def MOVZX64rr8_Q : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB, Sched<[WriteALU]>;
-def MOVZX64rm8_Q : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
- "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB, Sched<[WriteALULd]>;
-def MOVZX64rr16_Q : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB, Sched<[WriteALU]>;
-def MOVZX64rm16_Q : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
- "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
- TB, Sched<[WriteALULd]>;
+let hasSideEffects = 0 in {
+def MOVZX64rr8 : RI<0xB6, MRMSrcReg, (outs GR64:$dst), (ins GR8:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm8 : RI<0xB6, MRMSrcMem, (outs GR64:$dst), (ins i8mem:$src),
+ "movz{bq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALULd]>;
+def MOVZX64rr16 : RI<0xB7, MRMSrcReg, (outs GR64:$dst), (ins GR16:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALU]>;
+let mayLoad = 1 in
+def MOVZX64rm16 : RI<0xB7, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
+ "movz{wq|x}\t{$src, $dst|$dst, $src}", [], IIC_MOVZX>,
+ TB, Sched<[WriteALULd]>;
+}
// 64-bit zero-extension patterns use SUBREG_TO_REG and an operation writing a
// 32-bit register.
diff --git a/lib/Target/X86/X86InstrFragmentsSIMD.td b/lib/Target/X86/X86InstrFragmentsSIMD.td
index 829cedd55fb3..643286324e25 100644
--- a/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -225,6 +225,9 @@ def X86vshli : SDNode<"X86ISD::VSHLI", SDTIntShiftOp>;
def X86vsrli : SDNode<"X86ISD::VSRLI", SDTIntShiftOp>;
def X86vsrai : SDNode<"X86ISD::VSRAI", SDTIntShiftOp>;
+def X86vrotli : SDNode<"X86ISD::VROTLI", SDTIntShiftOp>;
+def X86vrotri : SDNode<"X86ISD::VROTRI", SDTIntShiftOp>;
+
def X86vprot : SDNode<"X86ISD::VPROT",
SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>,
SDTCisSameAs<0,2>]>>;
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index ea8e56206ce6..9c8339a841c9 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -1273,7 +1273,7 @@ def STOSW : I<0xAB, RawFrmDst, (outs dstidx16:$dst), (ins),
let Defs = [EDI], Uses = [EAX,EDI,EFLAGS] in
def STOSL : I<0xAB, RawFrmDst, (outs dstidx32:$dst), (ins),
"stos{l|d}\t{%eax, $dst|$dst, eax}", [], IIC_STOS>, OpSize32;
-let Defs = [RCX,RDI], Uses = [RAX,RCX,RDI,EFLAGS] in
+let Defs = [RDI], Uses = [RAX,RDI,EFLAGS] in
def STOSQ : RI<0xAB, RawFrmDst, (outs dstidx64:$dst), (ins),
"stosq\t{%rax, $dst|$dst, rax}", [], IIC_STOS>;
@@ -2755,56 +2755,56 @@ def : InstAlias<"aam", (AAM8i8 10)>, Requires<[Not64BitMode]>;
// Disambiguate the mem/imm form of bt-without-a-suffix as btl.
// Likewise for btc/btr/bts.
-def : InstAlias<"bt {$imm, $mem|$mem, $imm}",
+def : InstAlias<"bt\t{$imm, $mem|$mem, $imm}",
(BT32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
-def : InstAlias<"btc {$imm, $mem|$mem, $imm}",
+def : InstAlias<"btc\t{$imm, $mem|$mem, $imm}",
(BTC32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
-def : InstAlias<"btr {$imm, $mem|$mem, $imm}",
+def : InstAlias<"btr\t{$imm, $mem|$mem, $imm}",
(BTR32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
-def : InstAlias<"bts {$imm, $mem|$mem, $imm}",
+def : InstAlias<"bts\t{$imm, $mem|$mem, $imm}",
(BTS32mi8 i32mem:$mem, i32i8imm:$imm), 0>;
// clr aliases.
-def : InstAlias<"clrb $reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>;
-def : InstAlias<"clrw $reg", (XOR16rr GR16:$reg, GR16:$reg), 0>;
-def : InstAlias<"clrl $reg", (XOR32rr GR32:$reg, GR32:$reg), 0>;
-def : InstAlias<"clrq $reg", (XOR64rr GR64:$reg, GR64:$reg), 0>;
+def : InstAlias<"clrb\t$reg", (XOR8rr GR8 :$reg, GR8 :$reg), 0>;
+def : InstAlias<"clrw\t$reg", (XOR16rr GR16:$reg, GR16:$reg), 0>;
+def : InstAlias<"clrl\t$reg", (XOR32rr GR32:$reg, GR32:$reg), 0>;
+def : InstAlias<"clrq\t$reg", (XOR64rr GR64:$reg, GR64:$reg), 0>;
// lods aliases. Accept the destination being omitted because it's implicit
// in the mnemonic, or the mnemonic suffix being omitted because it's implicit
// in the destination.
-def : InstAlias<"lodsb $src", (LODSB srcidx8:$src), 0>;
-def : InstAlias<"lodsw $src", (LODSW srcidx16:$src), 0>;
-def : InstAlias<"lods{l|d} $src", (LODSL srcidx32:$src), 0>;
-def : InstAlias<"lodsq $src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"lods {$src, %al|al, $src}", (LODSB srcidx8:$src), 0>;
-def : InstAlias<"lods {$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>;
-def : InstAlias<"lods {$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>;
-def : InstAlias<"lods {$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"lodsb\t$src", (LODSB srcidx8:$src), 0>;
+def : InstAlias<"lodsw\t$src", (LODSW srcidx16:$src), 0>;
+def : InstAlias<"lods{l|d}\t$src", (LODSL srcidx32:$src), 0>;
+def : InstAlias<"lodsq\t$src", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"lods\t{$src, %al|al, $src}", (LODSB srcidx8:$src), 0>;
+def : InstAlias<"lods\t{$src, %ax|ax, $src}", (LODSW srcidx16:$src), 0>;
+def : InstAlias<"lods\t{$src, %eax|eax, $src}", (LODSL srcidx32:$src), 0>;
+def : InstAlias<"lods\t{$src, %rax|rax, $src}", (LODSQ srcidx64:$src), 0>, Requires<[In64BitMode]>;
// stos aliases. Accept the source being omitted because it's implicit in
// the mnemonic, or the mnemonic suffix being omitted because it's implicit
// in the source.
-def : InstAlias<"stosb $dst", (STOSB dstidx8:$dst), 0>;
-def : InstAlias<"stosw $dst", (STOSW dstidx16:$dst), 0>;
-def : InstAlias<"stos{l|d} $dst", (STOSL dstidx32:$dst), 0>;
-def : InstAlias<"stosq $dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"stos {%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>;
-def : InstAlias<"stos {%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>;
-def : InstAlias<"stos {%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>;
-def : InstAlias<"stos {%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"stosb\t$dst", (STOSB dstidx8:$dst), 0>;
+def : InstAlias<"stosw\t$dst", (STOSW dstidx16:$dst), 0>;
+def : InstAlias<"stos{l|d}\t$dst", (STOSL dstidx32:$dst), 0>;
+def : InstAlias<"stosq\t$dst", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"stos\t{%al, $dst|$dst, al}", (STOSB dstidx8:$dst), 0>;
+def : InstAlias<"stos\t{%ax, $dst|$dst, ax}", (STOSW dstidx16:$dst), 0>;
+def : InstAlias<"stos\t{%eax, $dst|$dst, eax}", (STOSL dstidx32:$dst), 0>;
+def : InstAlias<"stos\t{%rax, $dst|$dst, rax}", (STOSQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
// scas aliases. Accept the destination being omitted because it's implicit
// in the mnemonic, or the mnemonic suffix being omitted because it's implicit
// in the destination.
-def : InstAlias<"scasb $dst", (SCASB dstidx8:$dst), 0>;
-def : InstAlias<"scasw $dst", (SCASW dstidx16:$dst), 0>;
-def : InstAlias<"scas{l|d} $dst", (SCASL dstidx32:$dst), 0>;
-def : InstAlias<"scasq $dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"scas {$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>;
-def : InstAlias<"scas {$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>;
-def : InstAlias<"scas {$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>;
-def : InstAlias<"scas {$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"scasb\t$dst", (SCASB dstidx8:$dst), 0>;
+def : InstAlias<"scasw\t$dst", (SCASW dstidx16:$dst), 0>;
+def : InstAlias<"scas{l|d}\t$dst", (SCASL dstidx32:$dst), 0>;
+def : InstAlias<"scasq\t$dst", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"scas\t{$dst, %al|al, $dst}", (SCASB dstidx8:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %ax|ax, $dst}", (SCASW dstidx16:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %eax|eax, $dst}", (SCASL dstidx32:$dst), 0>;
+def : InstAlias<"scas\t{$dst, %rax|rax, $dst}", (SCASQ dstidx64:$dst), 0>, Requires<[In64BitMode]>;
// div and idiv aliases for explicit A register.
def : InstAlias<"div{b}\t{$src, %al|al, $src}", (DIV8r GR8 :$src)>;
@@ -2892,30 +2892,30 @@ def : InstAlias<"fnstsw" , (FNSTSW16r)>;
// lcall and ljmp aliases. This seems to be an odd mapping in 64-bit mode, but
// this is compatible with what GAS does.
-def : InstAlias<"lcall $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall {*}$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"ljmp {*}$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
-def : InstAlias<"lcall $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"lcall {*}$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"ljmp {*}$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
-
-def : InstAlias<"call {*}$dst", (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"jmp {*}$dst", (JMP64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
-def : InstAlias<"call {*}$dst", (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"jmp {*}$dst", (JMP32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
-def : InstAlias<"call {*}$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
-def : InstAlias<"jmp {*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall\t{*}$dst", (FARCALL32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"ljmp\t{*}$dst", (FARJMP32m opaque48mem:$dst), 0>, Requires<[Not16BitMode]>;
+def : InstAlias<"lcall\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"lcall\t{*}$dst", (FARCALL16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"ljmp\t{*}$dst", (FARJMP16m opaque32mem:$dst), 0>, Requires<[In16BitMode]>;
+
+def : InstAlias<"call\t{*}$dst", (CALL64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"jmp\t{*}$dst", (JMP64m i64mem:$dst), 0>, Requires<[In64BitMode]>;
+def : InstAlias<"call\t{*}$dst", (CALL32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"jmp\t{*}$dst", (JMP32m i32mem:$dst), 0>, Requires<[In32BitMode]>;
+def : InstAlias<"call\t{*}$dst", (CALL16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
+def : InstAlias<"jmp\t{*}$dst", (JMP16m i16mem:$dst), 0>, Requires<[In16BitMode]>;
// "imul <imm>, B" is an alias for "imul <imm>, B, B".
-def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
-def : InstAlias<"imul{w} {$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
-def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
-def : InstAlias<"imul{l} {$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
-def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
-def : InstAlias<"imul{q} {$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
+def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri GR16:$r, GR16:$r, i16imm:$imm), 0>;
+def : InstAlias<"imul{w}\t{$imm, $r|$r, $imm}", (IMUL16rri8 GR16:$r, GR16:$r, i16i8imm:$imm), 0>;
+def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri GR32:$r, GR32:$r, i32imm:$imm), 0>;
+def : InstAlias<"imul{l}\t{$imm, $r|$r, $imm}", (IMUL32rri8 GR32:$r, GR32:$r, i32i8imm:$imm), 0>;
+def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri32 GR64:$r, GR64:$r, i64i32imm:$imm), 0>;
+def : InstAlias<"imul{q}\t{$imm, $r|$r, $imm}", (IMUL64rri8 GR64:$r, GR64:$r, i64i8imm:$imm), 0>;
// inb %dx -> inb %al, %dx
def : InstAlias<"inb\t{%dx|dx}", (IN8rr), 0>;
@@ -2927,46 +2927,46 @@ def : InstAlias<"inl\t$port", (IN32ri u8imm:$port), 0>;
// jmp and call aliases for lcall and ljmp. jmp $42,$5 -> ljmp
-def : InstAlias<"call $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"jmp $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
-def : InstAlias<"call $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
-def : InstAlias<"jmp $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
-def : InstAlias<"callw $seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
-def : InstAlias<"jmpw $seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>;
-def : InstAlias<"calll $seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
-def : InstAlias<"jmpl $seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"call\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
+def : InstAlias<"jmp\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>, Requires<[In16BitMode]>;
+def : InstAlias<"call\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
+def : InstAlias<"jmp\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>, Requires<[Not16BitMode]>;
+def : InstAlias<"callw\t$seg, $off", (FARCALL16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpw\t$seg, $off", (FARJMP16i i16imm:$off, i16imm:$seg)>;
+def : InstAlias<"calll\t$seg, $off", (FARCALL32i i32imm:$off, i16imm:$seg)>;
+def : InstAlias<"jmpl\t$seg, $off", (FARJMP32i i32imm:$off, i16imm:$seg)>;
// Force mov without a suffix with a segment and mem to prefer the 'l' form of
// the move. All segment/mem forms are equivalent, this has the shortest
// encoding.
-def : InstAlias<"mov {$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>;
-def : InstAlias<"mov {$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
+def : InstAlias<"mov\t{$mem, $seg|$seg, $mem}", (MOV32sm SEGMENT_REG:$seg, i32mem:$mem), 0>;
+def : InstAlias<"mov\t{$seg, $mem|$mem, $seg}", (MOV32ms i32mem:$mem, SEGMENT_REG:$seg), 0>;
// Match 'movq <largeimm>, <reg>' as an alias for movabsq.
-def : InstAlias<"movq {$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
+def : InstAlias<"movq\t{$imm, $reg|$reg, $imm}", (MOV64ri GR64:$reg, i64imm:$imm), 0>;
// Match 'movq GR64, MMX' as an alias for movd.
-def : InstAlias<"movq {$src, $dst|$dst, $src}",
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
(MMX_MOVD64to64rr VR64:$dst, GR64:$src), 0>;
-def : InstAlias<"movq {$src, $dst|$dst, $src}",
+def : InstAlias<"movq\t{$src, $dst|$dst, $src}",
(MMX_MOVD64from64rr GR64:$dst, VR64:$src), 0>;
// movsx aliases
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
-def : InstAlias<"movsx {$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr16 GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movsx\t{$src, $dst|$dst, $src}", (MOVSX64rr32 GR64:$dst, GR32:$src), 0>;
// movzx aliases
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr8_Q GR64:$dst, GR8:$src), 0>;
-def : InstAlias<"movzx {$src, $dst|$dst, $src}", (MOVZX64rr16_Q GR64:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rr8 GR16:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX16rm8 GR16:$dst, i8mem:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr8 GR32:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX32rr16 GR32:$dst, GR16:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr8 GR64:$dst, GR8:$src), 0>;
+def : InstAlias<"movzx\t{$src, $dst|$dst, $src}", (MOVZX64rr16 GR64:$dst, GR16:$src), 0>;
// Note: No GR32->GR64 movzx form.
// outb %dx -> outb %al, %dx
diff --git a/lib/Target/X86/X86InstrMPX.td b/lib/Target/X86/X86InstrMPX.td
index 31608cd4c128..71ab97374dd6 100644
--- a/lib/Target/X86/X86InstrMPX.td
+++ b/lib/Target/X86/X86InstrMPX.td
@@ -15,10 +15,10 @@
multiclass mpx_bound_make<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs BNDR:$dst), (ins i32mem:$src),
- OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+ OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: RI<opc, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
- OpcodeStr#" \t{$src, $dst|$dst, $src}", []>,
+ OpcodeStr#"\t{$src, $dst|$dst, $src}", []>,
Requires<[HasMPX, In64BitMode]>;
}
@@ -26,16 +26,16 @@ defm BNDMK : mpx_bound_make<0x1B, "bndmk">, XS;
multiclass mpx_bound_check<bits<8> opc, string OpcodeStr> {
def 32rm: I<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i32mem:$src2),
- OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rm: RI<opc, MRMSrcMem, (outs), (ins BNDR:$src1, i64mem:$src2),
- OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
def 32rr: I<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR32:$src2),
- OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, Not64BitMode]>;
def 64rr: RI<opc, MRMSrcReg, (outs), (ins BNDR:$src1, GR64:$src2),
- OpcodeStr#" \t{$src2, $src1|$src1, $src2}", []>,
+ OpcodeStr#"\t{$src2, $src1|$src1, $src2}", []>,
Requires<[HasMPX, In64BitMode]>;
}
defm BNDCL : mpx_bound_check<0x1A, "bndcl">, XS;
@@ -43,28 +43,28 @@ defm BNDCU : mpx_bound_check<0x1A, "bndcu">, XD;
defm BNDCN : mpx_bound_check<0x1B, "bndcn">, XD;
def BNDMOVRMrr : I<0x1A, MRMSrcReg, (outs BNDR:$dst), (ins BNDR:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>;
def BNDMOVRM32rm : I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>;
def BNDMOVRM64rm : RI<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i128mem:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>;
def BNDMOVMRrr : I<0x1B, MRMDestReg, (outs BNDR:$dst), (ins BNDR:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX]>;
def BNDMOVMR32mr : I<0x1B, MRMDestMem, (outs i64mem:$dst), (ins BNDR:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, Not64BitMode]>;
def BNDMOVMR64mr : RI<0x1B, MRMDestMem, (outs i128mem:$dst), (ins BNDR:$src),
- "bndmov \t{$src, $dst|$dst, $src}", []>, PD,
+ "bndmov\t{$src, $dst|$dst, $src}", []>, PD,
Requires<[HasMPX, In64BitMode]>;
def BNDSTXmr: I<0x1B, MRMDestMem, (outs), (ins i64mem:$dst, BNDR:$src),
- "bndstx \t{$src, $dst|$dst, $src}", []>, PS,
+ "bndstx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
def BNDLDXrm: I<0x1A, MRMSrcMem, (outs BNDR:$dst), (ins i64mem:$src),
- "bndldx \t{$src, $dst|$dst, $src}", []>, PS,
+ "bndldx\t{$src, $dst|$dst, $src}", []>, PS,
Requires<[HasMPX]>;
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 624b9316e6fd..6a7c45665e9c 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -1808,7 +1808,7 @@ def : InstAlias<"cvtss2si{q}\t{$src, $dst|$dst, $src}",
def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
(CVTSD2SI64rr GR64:$dst, VR128:$src), 0>;
def : InstAlias<"cvtsd2si{q}\t{$src, $dst|$dst, $src}",
- (CVTSD2SI64rm GR64:$dst, sdmem:$src)>;
+ (CVTSD2SI64rm GR64:$dst, sdmem:$src), 0>;
/// SSE 2 Only
@@ -7838,9 +7838,7 @@ class avx_broadcast_rm<bits<8> opc, string OpcodeStr, RegisterClass RC,
AVX8I<opc, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src),
!strconcat(OpcodeStr, "\t{$src, $dst|$dst, $src}"),
[(set RC:$dst, (VT (X86VBroadcast (ld_frag addr:$src))))]>,
- Sched<[Sched]>, VEX {
- let mayLoad = 1;
-}
+ Sched<[Sched]>, VEX;
// AVX2 adds register forms
class avx2_broadcast_rr<bits<8> opc, string OpcodeStr, RegisterClass RC,
@@ -7871,7 +7869,7 @@ let ExeDomain = SSEPackedDouble in
def VBROADCASTSDYrr : avx2_broadcast_rr<0x19, "vbroadcastsd", VR256,
v4f64, v2f64, WriteFShuffle256>, VEX_L;
-let mayLoad = 1, Predicates = [HasAVX2] in
+let mayLoad = 1, hasSideEffects = 0, Predicates = [HasAVX2] in
def VBROADCASTI128 : AVX8I<0x5A, MRMSrcMem, (outs VR256:$dst),
(ins i128mem:$src),
"vbroadcasti128\t{$src, $dst|$dst, $src}", []>,
@@ -8259,6 +8257,9 @@ let Predicates = [HasF16C] in {
(VCVTPH2PSrm addr:$src)>;
def : Pat<(int_x86_vcvtph2ps_128 (vzload_v2i64 addr:$src)),
(VCVTPH2PSrm addr:$src)>;
+ def : Pat<(int_x86_vcvtph2ps_128 (bitconvert
+ (v2i64 (scalar_to_vector (loadi64 addr:$src))))),
+ (VCVTPH2PSrm addr:$src)>;
def : Pat<(store (f64 (extractelt (bc_v2f64 (v8i16
(int_x86_vcvtps2ph_128 VR128:$src1, i32:$src2))), (iPTR 0))),
diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h
index 646b556faa8f..b525d5eb60a7 100644
--- a/lib/Target/X86/X86IntrinsicsInfo.h
+++ b/lib/Target/X86/X86IntrinsicsInfo.h
@@ -29,7 +29,7 @@ enum IntrinsicType {
INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM,
COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC,
TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32,
- EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC,
+ EXPAND_FROM_MEM, LOADA, LOADU, BLEND, INSERT_SUBVEC,
TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, CONVERT_MASK_TO_VEC, CONVERT_TO_MASK
};
@@ -143,6 +143,18 @@ static const IntrinsicData IntrinsicsWithChain[] = {
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512,
EXPAND_FROM_MEM, X86ISD::EXPAND, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_pd_128, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_pd_256, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_pd_512, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_ps_128, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_ps_256, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_load_ps_512, LOADA, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_pd_128, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_pd_256, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_pd_512, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_ps_128, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_ps_256, LOADU, ISD::DELETED_NODE, 0),
+ X86_INTRINSIC_DATA(avx512_mask_loadu_ps_512, LOADU, ISD::DELETED_NODE, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8,
X86ISD::VTRUNC, 0),
X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8,
@@ -1129,6 +1141,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VTRUNCS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovs_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxb_w_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxd_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovsxw_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VSEXT, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_128, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_db_256, INTR_TYPE_1OP_MASK,
@@ -1165,6 +1213,42 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86ISD::VTRUNCUS, 0),
X86_INTRINSIC_DATA(avx512_mask_pmovus_wb_512, INTR_TYPE_1OP_MASK,
X86ISD::VTRUNCUS, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxb_w_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxd_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_d_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_128, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_256, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pmovzxw_q_512, INTR_TYPE_1OP_MASK,
+ X86ISD::VZEXT, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_128, INTR_TYPE_2OP_MASK,
X86ISD::PMULDQ, 0),
X86_INTRINSIC_DATA(avx512_mask_pmul_dq_256, INTR_TYPE_2OP_MASK,
@@ -1201,12 +1285,54 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_por_q_128, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_256, INTR_TYPE_2OP_MASK, ISD::OR, 0),
X86_INTRINSIC_DATA(avx512_mask_por_q_512, INTR_TYPE_2OP_MASK, ISD::OR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_d_128, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_d_256, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_d_512, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_q_128, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_q_256, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prol_q_512, INTR_TYPE_2OP_MASK, X86ISD::VROTLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_d_128, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_d_256, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_d_512, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prolv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_d_128, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_d_256, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_d_512, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_q_128, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_q_256, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pror_q_512, INTR_TYPE_2OP_MASK, X86ISD::VROTRI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_d_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_d_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_d_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_q_128, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_q_256, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
+ X86_INTRINSIC_DATA(avx512_mask_prorv_q_512, INTR_TYPE_2OP_MASK, ISD::ROTR, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_128, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_256, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
X86_INTRINSIC_DATA(avx512_mask_pshuf_b_512, INTR_TYPE_2OP_MASK,
X86ISD::PSHUFB, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_d_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_d_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshuf_d_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFD, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufh_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFHW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufh_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFHW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufh_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFHW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufl_w_128, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFLW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufl_w_256, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFLW, 0),
+ X86_INTRINSIC_DATA(avx512_mask_pshufl_w_512, INTR_TYPE_2OP_MASK,
+ X86ISD::PSHUFLW, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
@@ -1219,8 +1345,21 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psll_qi_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_qi_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_psll_qi_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_w_128, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_w_256, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_w_512, INTR_TYPE_2OP_MASK, X86ISD::VSHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_wi_128, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_wi_256, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psll_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_d, VSHIFT_MASK, X86ISD::VSHLI, 0),
X86_INTRINSIC_DATA(avx512_mask_pslli_q, VSHIFT_MASK, X86ISD::VSHLI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv16_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv2_di, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv32hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv4_di, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv4_si, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv8_hi, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psllv8_si, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psllv_d, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psllv_q, INTR_TYPE_2OP_MASK, ISD::SHL, 0),
X86_INTRINSIC_DATA(avx512_mask_psra_d, INTR_TYPE_2OP_MASK, X86ISD::VSRA, 0),
@@ -1243,8 +1382,15 @@ static const IntrinsicData IntrinsicsWithoutChain[] = {
X86_INTRINSIC_DATA(avx512_mask_psra_wi_512, INTR_TYPE_2OP_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_d, VSHIFT_MASK, X86ISD::VSRAI, 0),
X86_INTRINSIC_DATA(avx512_mask_psrai_q, VSHIFT_MASK, X86ISD::VSRAI, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav16_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav32_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav4_si, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav8_hi, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav8_si, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrav_d, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrav_q, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_q_128, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
+ X86_INTRINSIC_DATA(avx512_mask_psrav_q_256, INTR_TYPE_2OP_MASK, ISD::SRA, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d_128, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
X86_INTRINSIC_DATA(avx512_mask_psrl_d_256, INTR_TYPE_2OP_MASK, X86ISD::VSRL, 0),
diff --git a/lib/Target/X86/X86MachineFunctionInfo.h b/lib/Target/X86/X86MachineFunctionInfo.h
index 3a7a98db50f4..00515dde5568 100644
--- a/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/lib/Target/X86/X86MachineFunctionInfo.h
@@ -92,6 +92,10 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// used to address arguments in a function using a base pointer.
int SEHFramePtrSaveIndex = 0;
+ /// True if this function has a subset of CSRs that is handled explicitly via
+ /// copies.
+ bool IsSplitCSR = false;
+
private:
/// ForwardedMustTailRegParms - A list of virtual and physical registers
/// that must be forwarded to every musttail call.
@@ -160,6 +164,9 @@ public:
SmallVectorImpl<ForwardedRegister> &getForwardedMustTailRegParms() {
return ForwardedMustTailRegParms;
}
+
+ bool isSplitCSR() const { return IsSplitCSR; }
+ void setIsSplitCSR(bool s) { IsSplitCSR = s; }
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp
index 58020d909a43..45cc0aef1d93 100644
--- a/lib/Target/X86/X86OptimizeLEAs.cpp
+++ b/lib/Target/X86/X86OptimizeLEAs.cpp
@@ -9,8 +9,10 @@
//
// This file defines the pass that performs some optimizations with LEA
// instructions in order to improve code size.
-// Currently, it does one thing:
-// 1) Address calculations in load and store instructions are replaced by
+// Currently, it does two things:
+// 1) If there are two LEA instructions calculating addresses which only differ
+// by displacement inside a basic block, one of them is removed.
+// 2) Address calculations in load and store instructions are replaced by
// existing LEA def registers where possible.
//
//===----------------------------------------------------------------------===//
@@ -38,6 +40,7 @@ static cl::opt<bool> EnableX86LEAOpt("enable-x86-lea-opt", cl::Hidden,
cl::init(false));
STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions");
+STATISTIC(NumRedundantLEAs, "Number of redundant LEA instructions removed");
namespace {
class OptimizeLEAPass : public MachineFunctionPass {
@@ -71,6 +74,13 @@ private:
/// \brief Returns true if the instruction is LEA.
bool isLEA(const MachineInstr &MI);
+ /// \brief Returns true if the \p Last LEA instruction can be replaced by the
+ /// \p First. The difference between displacements of the addresses calculated
+ /// by these LEAs is returned in \p AddrDispShift. It'll be used for proper
+ /// replacement of the \p Last LEA's uses with the \p First's def register.
+ bool isReplaceable(const MachineInstr &First, const MachineInstr &Last,
+ int64_t &AddrDispShift);
+
/// \brief Returns true if two instructions have memory operands that only
/// differ by displacement. The numbers of the first memory operands for both
/// instructions are specified through \p N1 and \p N2. The address
@@ -79,13 +89,20 @@ private:
const MachineInstr &MI2, unsigned N2,
int64_t &AddrDispShift);
- /// \brief Find all LEA instructions in the basic block.
+ /// \brief Find all LEA instructions in the basic block. Also, assign position
+ /// numbers to all instructions in the basic block to speed up calculation of
+ /// distance between them.
void findLEAs(const MachineBasicBlock &MBB,
SmallVectorImpl<MachineInstr *> &List);
/// \brief Removes redundant address calculations.
bool removeRedundantAddrCalc(const SmallVectorImpl<MachineInstr *> &List);
+ /// \brief Removes LEAs which calculate similar addresses.
+ bool removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List);
+
+ DenseMap<const MachineInstr *, unsigned> InstrPos;
+
MachineRegisterInfo *MRI;
const X86InstrInfo *TII;
const X86RegisterInfo *TRI;
@@ -99,14 +116,15 @@ FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); }
int OptimizeLEAPass::calcInstrDist(const MachineInstr &First,
const MachineInstr &Last) {
- const MachineBasicBlock *MBB = First.getParent();
-
- // Both instructions must be in the same basic block.
- assert(Last.getParent() == MBB &&
+ // Both instructions must be in the same basic block and they must be
+ // presented in InstrPos.
+ assert(Last.getParent() == First.getParent() &&
"Instructions are in different basic blocks");
+ assert(InstrPos.find(&First) != InstrPos.end() &&
+ InstrPos.find(&Last) != InstrPos.end() &&
+ "Instructions' positions are undefined");
- return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) -
- std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First));
+ return InstrPos[&Last] - InstrPos[&First];
}
// Find the best LEA instruction in the List to replace address recalculation in
@@ -189,6 +207,69 @@ bool OptimizeLEAPass::isLEA(const MachineInstr &MI) {
Opcode == X86::LEA64r || Opcode == X86::LEA64_32r;
}
+// Check that the Last LEA can be replaced by the First LEA. To be so,
+// these requirements must be met:
+// 1) Addresses calculated by LEAs differ only by displacement.
+// 2) Def registers of LEAs belong to the same class.
+// 3) All uses of the Last LEA def register are replaceable, thus the
+// register is used only as address base.
+bool OptimizeLEAPass::isReplaceable(const MachineInstr &First,
+ const MachineInstr &Last,
+ int64_t &AddrDispShift) {
+ assert(isLEA(First) && isLEA(Last) &&
+ "The function works only with LEA instructions");
+
+ // Compare instructions' memory operands.
+ if (!isSimilarMemOp(Last, 1, First, 1, AddrDispShift))
+ return false;
+
+ // Make sure that LEA def registers belong to the same class. There may be
+ // instructions (like MOV8mr_NOREX) which allow a limited set of registers to
+ // be used as their operands, so we must be sure that replacing one LEA
+ // with another won't lead to putting a wrong register in the instruction.
+ if (MRI->getRegClass(First.getOperand(0).getReg()) !=
+ MRI->getRegClass(Last.getOperand(0).getReg()))
+ return false;
+
+ // Loop over all uses of the Last LEA to check that its def register is
+ // used only as address base for memory accesses. If so, it can be
+ // replaced, otherwise - no.
+ for (auto &MO : MRI->use_operands(Last.getOperand(0).getReg())) {
+ MachineInstr &MI = *MO.getParent();
+
+ // Get the number of the first memory operand.
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode());
+
+ // If the use instruction has no memory operand - the LEA is not
+ // replaceable.
+ if (MemOpNo < 0)
+ return false;
+
+ MemOpNo += X86II::getOperandBias(Desc);
+
+ // If the address base of the use instruction is not the LEA def register -
+ // the LEA is not replaceable.
+ if (!isIdenticalOp(MI.getOperand(MemOpNo + X86::AddrBaseReg), MO))
+ return false;
+
+ // If the LEA def register is used as any other operand of the use
+ // instruction - the LEA is not replaceable.
+ for (unsigned i = 0; i < MI.getNumOperands(); i++)
+ if (i != (unsigned)(MemOpNo + X86::AddrBaseReg) &&
+ isIdenticalOp(MI.getOperand(i), MO))
+ return false;
+
+ // Check that the new address displacement will fit 4 bytes.
+ if (MI.getOperand(MemOpNo + X86::AddrDisp).isImm() &&
+ !isInt<32>(MI.getOperand(MemOpNo + X86::AddrDisp).getImm() +
+ AddrDispShift))
+ return false;
+ }
+
+ return true;
+}
+
// Check if MI1 and MI2 have memory operands which represent addresses that
// differ only by displacement.
bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
@@ -219,7 +300,15 @@ bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1,
void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB,
SmallVectorImpl<MachineInstr *> &List) {
+ unsigned Pos = 0;
for (auto &MI : MBB) {
+ // Assign the position number to the instruction. Note that we are going to
+ // move some instructions during the optimization however there will never
+ // be a need to move two instructions before any selected instruction. So to
+ // avoid multiple positions' updates during moves we just increase position
+ // counter by two leaving a free space for instructions which will be moved.
+ InstrPos[&MI] = Pos += 2;
+
if (isLEA(MI))
List.push_back(const_cast<MachineInstr *>(&MI));
}
@@ -270,6 +359,13 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(
if (Dist < 0) {
DefMI->removeFromParent();
MBB->insert(MachineBasicBlock::iterator(&MI), DefMI);
+ InstrPos[DefMI] = InstrPos[&MI] - 1;
+
+ // Make sure the instructions' position numbers are sane.
+ assert(((InstrPos[DefMI] == 1 && DefMI == MBB->begin()) ||
+ InstrPos[DefMI] >
+ InstrPos[std::prev(MachineBasicBlock::iterator(DefMI))]) &&
+ "Instruction positioning is broken");
}
// Since we can possibly extend register lifetime, clear kill flags.
@@ -296,6 +392,81 @@ bool OptimizeLEAPass::removeRedundantAddrCalc(
return Changed;
}
+// Try to find similar LEAs in the list and replace one with another.
+bool
+OptimizeLEAPass::removeRedundantLEAs(SmallVectorImpl<MachineInstr *> &List) {
+ bool Changed = false;
+
+ // Loop over all LEA pairs.
+ auto I1 = List.begin();
+ while (I1 != List.end()) {
+ MachineInstr &First = **I1;
+ auto I2 = std::next(I1);
+ while (I2 != List.end()) {
+ MachineInstr &Last = **I2;
+ int64_t AddrDispShift;
+
+ // LEAs should be in occurence order in the list, so we can freely
+ // replace later LEAs with earlier ones.
+ assert(calcInstrDist(First, Last) > 0 &&
+ "LEAs must be in occurence order in the list");
+
+ // Check that the Last LEA instruction can be replaced by the First.
+ if (!isReplaceable(First, Last, AddrDispShift)) {
+ ++I2;
+ continue;
+ }
+
+ // Loop over all uses of the Last LEA and update their operands. Note that
+ // the correctness of this has already been checked in the isReplaceable
+ // function.
+ for (auto UI = MRI->use_begin(Last.getOperand(0).getReg()),
+ UE = MRI->use_end();
+ UI != UE;) {
+ MachineOperand &MO = *UI++;
+ MachineInstr &MI = *MO.getParent();
+
+ // Get the number of the first memory operand.
+ const MCInstrDesc &Desc = MI.getDesc();
+ int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) +
+ X86II::getOperandBias(Desc);
+
+ // Update address base.
+ MO.setReg(First.getOperand(0).getReg());
+
+ // Update address disp.
+ MachineOperand *Op = &MI.getOperand(MemOpNo + X86::AddrDisp);
+ if (Op->isImm())
+ Op->setImm(Op->getImm() + AddrDispShift);
+ else if (Op->isGlobal())
+ Op->setOffset(Op->getOffset() + AddrDispShift);
+ else
+ llvm_unreachable("Invalid address displacement operand");
+ }
+
+ // Since we can possibly extend register lifetime, clear kill flags.
+ MRI->clearKillFlags(First.getOperand(0).getReg());
+
+ ++NumRedundantLEAs;
+ DEBUG(dbgs() << "OptimizeLEAs: Remove redundant LEA: "; Last.dump(););
+
+ // By this moment, all of the Last LEA's uses must be replaced. So we can
+ // freely remove it.
+ assert(MRI->use_empty(Last.getOperand(0).getReg()) &&
+ "The LEA's def register must have no uses");
+ Last.eraseFromParent();
+
+ // Erase removed LEA from the list.
+ I2 = List.erase(I2);
+
+ Changed = true;
+ }
+ ++I1;
+ }
+
+ return Changed;
+}
+
bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
@@ -310,6 +481,7 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
// Process all basic blocks.
for (auto &MBB : MF) {
SmallVector<MachineInstr *, 16> LEAs;
+ InstrPos.clear();
// Find all LEA instructions in basic block.
findLEAs(MBB, LEAs);
@@ -318,6 +490,11 @@ bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) {
if (LEAs.empty())
continue;
+ // Remove redundant LEA instructions. The optimization may have a negative
+ // effect on performance, so do it only for -Oz.
+ if (MF.getFunction()->optForMinSize())
+ Changed |= removeRedundantLEAs(LEAs);
+
// Remove redundant address calculations.
Changed |= removeRedundantAddrCalc(LEAs);
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 58404433e1ae..274b56688558 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -250,7 +250,8 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_64_RT_AllRegs_SaveList;
case CallingConv::CXX_FAST_TLS:
if (Is64Bit)
- return CSR_64_TLS_Darwin_SaveList;
+ return MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR() ?
+ CSR_64_CXX_TLS_Darwin_PE_SaveList : CSR_64_TLS_Darwin_SaveList;
break;
case CallingConv::Intel_OCL_BI: {
if (HasAVX512 && IsWin64)
@@ -305,6 +306,15 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
return CSR_32_SaveList;
}
+const MCPhysReg *X86RegisterInfo::getCalleeSavedRegsViaCopy(
+ const MachineFunction *MF) const {
+ assert(MF && "Invalid MachineFunction pointer.");
+ if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS &&
+ MF->getInfo<X86MachineFunctionInfo>()->isSplitCSR())
+ return CSR_64_CXX_TLS_Darwin_ViaCopy_SaveList;
+ return nullptr;
+}
+
const uint32_t *
X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const {
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index f014c8f6ff61..8d0094cbf3d6 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -99,6 +99,8 @@ public:
/// callee-save registers on this target.
const MCPhysReg *
getCalleeSavedRegs(const MachineFunction* MF) const override;
+ const MCPhysReg *
+ getCalleeSavedRegsViaCopy(const MachineFunction *MF) const override;
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID) const override;
const uint32_t *getNoPreservedMask() const override;
diff --git a/lib/Transforms/IPO/ForceFunctionAttrs.cpp b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
index 816291dac9e8..6df044762cf4 100644
--- a/lib/Transforms/IPO/ForceFunctionAttrs.cpp
+++ b/lib/Transforms/IPO/ForceFunctionAttrs.cpp
@@ -22,7 +22,7 @@ static cl::list<std::string>
ForceAttributes("force-attribute", cl::Hidden,
cl::desc("Add an attribute to a function. This should be a "
"pair of 'function-name:attribute-name', for "
- "example -force-add-attribute=foo:noinline. This "
+ "example -force-attribute=foo:noinline. This "
"option can be specified multiple times."));
static Attribute::AttrKind parseAttrKind(StringRef Kind) {
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 6dcfb3f83004..527fdd1885a4 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -6,16 +6,11 @@
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
-//
-// This file implements a simple interprocedural pass which walks the
-// call-graph, looking for functions which do not access or only read
-// non-local memory, and marking them readnone/readonly. It does the
-// same with function arguments independently, marking them readonly/
-// readnone/nocapture. Finally, well-known library call declarations
-// are marked with all attributes that are consistent with the
-// function's standard definition. This pass is implemented as a
-// bottom-up traversal of the call-graph.
-//
+///
+/// \file
+/// This file implements interprocedural passes which walk the
+/// call-graph deducing and/or propagating function attributes.
+///
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO.h"
@@ -57,19 +52,14 @@ typedef SmallSetVector<Function *, 8> SCCNodeSet;
}
namespace {
-struct FunctionAttrs : public CallGraphSCCPass {
+struct PostOrderFunctionAttrs : public CallGraphSCCPass {
static char ID; // Pass identification, replacement for typeid
- FunctionAttrs() : CallGraphSCCPass(ID) {
- initializeFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ PostOrderFunctionAttrs() : CallGraphSCCPass(ID) {
+ initializePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
}
bool runOnSCC(CallGraphSCC &SCC) override;
- bool doInitialization(CallGraph &CG) override {
- Revisit.clear();
- return false;
- }
- bool doFinalization(CallGraph &CG) override;
-
+
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<AssumptionCacheTracker>();
@@ -79,20 +69,19 @@ struct FunctionAttrs : public CallGraphSCCPass {
private:
TargetLibraryInfo *TLI;
- SmallVector<WeakVH,16> Revisit;
};
}
-char FunctionAttrs::ID = 0;
-INITIALIZE_PASS_BEGIN(FunctionAttrs, "functionattrs",
+char PostOrderFunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(PostOrderFunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
-INITIALIZE_PASS_END(FunctionAttrs, "functionattrs",
+INITIALIZE_PASS_END(PostOrderFunctionAttrs, "functionattrs",
"Deduce function attributes", false, false)
-Pass *llvm::createFunctionAttrsPass() { return new FunctionAttrs(); }
+Pass *llvm::createPostOrderFunctionAttrsPass() { return new PostOrderFunctionAttrs(); }
namespace {
/// The three kinds of memory access relevant to 'readonly' and
@@ -949,8 +938,7 @@ static bool setDoesNotRecurse(Function &F) {
return true;
}
-static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
- SmallVectorImpl<WeakVH> &Revisit) {
+static bool addNoRecurseAttrs(const CallGraphSCC &SCC) {
// Try and identify functions that do not recurse.
// If the SCC contains multiple nodes we know for sure there is recursion.
@@ -973,32 +961,11 @@ static bool addNoRecurseAttrs(const CallGraphSCC &SCC,
// Function calls a potentially recursive function.
return setDoesNotRecurse(*F);
- // We know that F is not obviously recursive, but we haven't been able to
- // prove that it doesn't actually recurse. Add it to the Revisit list to try
- // again top-down later.
- Revisit.push_back(F);
+ // Nothing else we can deduce usefully during the postorder traversal.
return false;
}
-static bool addNoRecurseAttrsTopDownOnly(Function *F) {
- // If F is internal and all uses are in norecurse functions, then F is also
- // norecurse.
- if (F->doesNotRecurse())
- return false;
- if (F->hasInternalLinkage()) {
- for (auto *U : F->users())
- if (auto *I = dyn_cast<Instruction>(U)) {
- if (!I->getParent()->getParent()->doesNotRecurse())
- return false;
- } else {
- return false;
- }
- return setDoesNotRecurse(*F);
- }
- return false;
-}
-
-bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
+bool PostOrderFunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
bool Changed = false;
@@ -1040,19 +1007,100 @@ bool FunctionAttrs::runOnSCC(CallGraphSCC &SCC) {
Changed |= addNoAliasAttrs(SCCNodes);
Changed |= addNonNullAttrs(SCCNodes, *TLI);
}
-
- Changed |= addNoRecurseAttrs(SCC, Revisit);
+
+ Changed |= addNoRecurseAttrs(SCC);
return Changed;
}
-bool FunctionAttrs::doFinalization(CallGraph &CG) {
+namespace {
+/// A pass to do RPO deduction and propagation of function attributes.
+///
+/// This pass provides a general RPO or "top down" propagation of
+/// function attributes. For a few (rare) cases, we can deduce significantly
+/// more about function attributes by working in RPO, so this pass
+/// provides the compliment to the post-order pass above where the majority of
+/// deduction is performed.
+// FIXME: Currently there is no RPO CGSCC pass structure to slide into and so
+// this is a boring module pass, but eventually it should be an RPO CGSCC pass
+// when such infrastructure is available.
+struct ReversePostOrderFunctionAttrs : public ModulePass {
+ static char ID; // Pass identification, replacement for typeid
+ ReversePostOrderFunctionAttrs() : ModulePass(ID) {
+ initializeReversePostOrderFunctionAttrsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnModule(Module &M) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ AU.addRequired<CallGraphWrapperPass>();
+ }
+};
+}
+
+char ReversePostOrderFunctionAttrs::ID = 0;
+INITIALIZE_PASS_BEGIN(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+ "Deduce function attributes in RPO", false, false)
+INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
+INITIALIZE_PASS_END(ReversePostOrderFunctionAttrs, "rpo-functionattrs",
+ "Deduce function attributes in RPO", false, false)
+
+Pass *llvm::createReversePostOrderFunctionAttrsPass() {
+ return new ReversePostOrderFunctionAttrs();
+}
+
+static bool addNoRecurseAttrsTopDown(Function &F) {
+ // We check the preconditions for the function prior to calling this to avoid
+ // the cost of building up a reversible post-order list. We assert them here
+ // to make sure none of the invariants this relies on were violated.
+ assert(!F.isDeclaration() && "Cannot deduce norecurse without a definition!");
+ assert(!F.doesNotRecurse() &&
+ "This function has already been deduced as norecurs!");
+ assert(F.hasInternalLinkage() &&
+ "Can only do top-down deduction for internal linkage functions!");
+
+ // If F is internal and all of its uses are calls from a non-recursive
+ // functions, then none of its calls could in fact recurse without going
+ // through a function marked norecurse, and so we can mark this function too
+ // as norecurse. Note that the uses must actually be calls -- otherwise
+ // a pointer to this function could be returned from a norecurse function but
+ // this function could be recursively (indirectly) called. Note that this
+ // also detects if F is directly recursive as F is not yet marked as
+ // a norecurse function.
+ for (auto *U : F.users()) {
+ auto *I = dyn_cast<Instruction>(U);
+ if (!I)
+ return false;
+ CallSite CS(I);
+ if (!CS || !CS.getParent()->getParent()->doesNotRecurse())
+ return false;
+ }
+ return setDoesNotRecurse(F);
+}
+
+bool ReversePostOrderFunctionAttrs::runOnModule(Module &M) {
+ // We only have a post-order SCC traversal (because SCCs are inherently
+ // discovered in post-order), so we accumulate them in a vector and then walk
+ // it in reverse. This is simpler than using the RPO iterator infrastructure
+ // because we need to combine SCC detection and the PO walk of the call
+ // graph. We can also cheat egregiously because we're primarily interested in
+ // synthesizing norecurse and so we can only save the singular SCCs as SCCs
+ // with multiple functions in them will clearly be recursive.
+ auto &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
+ SmallVector<Function *, 16> Worklist;
+ for (scc_iterator<CallGraph *> I = scc_begin(&CG); !I.isAtEnd(); ++I) {
+ if (I->size() != 1)
+ continue;
+
+ Function *F = I->front()->getFunction();
+ if (F && !F->isDeclaration() && !F->doesNotRecurse() &&
+ F->hasInternalLinkage())
+ Worklist.push_back(F);
+ }
+
bool Changed = false;
- // When iterating over SCCs we visit functions in a bottom-up fashion. Some of
- // the rules we have for identifying norecurse functions work best with a
- // top-down walk, so look again at all the functions we previously marked as
- // worth revisiting, in top-down order.
- for (auto &F : reverse(Revisit))
- if (F)
- Changed |= addNoRecurseAttrsTopDownOnly(cast<Function>((Value*)F));
+ for (auto *F : reverse(Worklist))
+ Changed |= addNoRecurseAttrsTopDown(*F);
+
return Changed;
}
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index d8b677b966f2..5e0df9505119 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -41,15 +41,16 @@ static std::unique_ptr<Module> loadFile(const std::string &FileName,
LLVMContext &Context) {
SMDiagnostic Err;
DEBUG(dbgs() << "Loading '" << FileName << "'\n");
- std::unique_ptr<Module> Result = getLazyIRFileModule(FileName, Err, Context);
+ // Metadata isn't loaded or linked until after all functions are
+ // imported, after which it will be materialized and linked.
+ std::unique_ptr<Module> Result =
+ getLazyIRFileModule(FileName, Err, Context,
+ /* ShouldLazyLoadMetadata = */ true);
if (!Result) {
Err.print("function-import", errs());
return nullptr;
}
- Result->materializeMetadata();
- UpgradeDebugInfo(*Result);
-
return Result;
}
@@ -132,6 +133,8 @@ static void findExternalCalls(const Module &DestModule, Function &F,
// Ignore functions already present in the destination module
auto *SrcGV = DestModule.getNamedValue(ImportedName);
if (SrcGV) {
+ if (GlobalAlias *SGA = dyn_cast<GlobalAlias>(SrcGV))
+ SrcGV = SGA->getBaseObject();
assert(isa<Function>(SrcGV) && "Name collision during import");
if (!cast<Function>(SrcGV)->isDeclaration()) {
DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring "
@@ -324,6 +327,10 @@ bool FunctionImporter::importFunctions(Module &DestModule) {
ModuleToTempMDValsMap) {
// Load the specified source module.
auto &SrcModule = ModuleLoaderCache(SME.getKey());
+ // The modules were created with lazy metadata loading. Materialize it
+ // now, before linking it.
+ SrcModule.materializeMetadata();
+ UpgradeDebugInfo(SrcModule);
// Link in all necessary metadata from this module.
if (TheLinker.linkInMetadata(SrcModule, SME.getValue().get()))
@@ -408,14 +415,19 @@ public:
Index = IndexPtr.get();
}
+ // First we need to promote to global scope and rename any local values that
+ // are potentially exported to other modules.
+ if (renameModuleForThinLTO(M, Index)) {
+ errs() << "Error renaming module\n";
+ return false;
+ }
+
// Perform the import now.
auto ModuleLoader = [&M](StringRef Identifier) {
return loadFile(Identifier, M.getContext());
};
FunctionImporter Importer(*Index, ModuleLoader);
return Importer.importFunctions(M);
-
- return false;
}
};
} // anonymous namespace
diff --git a/lib/Transforms/IPO/IPO.cpp b/lib/Transforms/IPO/IPO.cpp
index 7ea6c08b2e66..89629cf06e08 100644
--- a/lib/Transforms/IPO/IPO.cpp
+++ b/lib/Transforms/IPO/IPO.cpp
@@ -28,7 +28,6 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeDAEPass(Registry);
initializeDAHPass(Registry);
initializeForceFunctionAttrsLegacyPassPass(Registry);
- initializeFunctionAttrsPass(Registry);
initializeGlobalDCEPass(Registry);
initializeGlobalOptPass(Registry);
initializeIPCPPass(Registry);
@@ -42,6 +41,8 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeLowerBitSetsPass(Registry);
initializeMergeFunctionsPass(Registry);
initializePartialInlinerPass(Registry);
+ initializePostOrderFunctionAttrsPass(Registry);
+ initializeReversePostOrderFunctionAttrsPass(Registry);
initializePruneEHPass(Registry);
initializeStripDeadPrototypesLegacyPassPass(Registry);
initializeStripSymbolsPass(Registry);
@@ -71,7 +72,7 @@ void LLVMAddDeadArgEliminationPass(LLVMPassManagerRef PM) {
}
void LLVMAddFunctionAttrsPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createFunctionAttrsPass());
+ unwrap(PM)->add(createPostOrderFunctionAttrsPass());
}
void LLVMAddFunctionInliningPass(LLVMPassManagerRef PM) {
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index 8e4ad642ddd5..3c6a7bb7a17a 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -38,7 +38,7 @@ namespace {
static char ID; // Pass identification, replacement for typeid
unsigned NumLoops;
- explicit LoopExtractor(unsigned numLoops = ~0)
+ explicit LoopExtractor(unsigned numLoops = ~0)
: LoopPass(ID), NumLoops(numLoops) {
initializeLoopExtractorPass(*PassRegistry::getPassRegistry());
}
@@ -143,7 +143,7 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &) {
Changed = true;
// After extraction, the loop is replaced by a function call, so
// we shouldn't try to run any more loop passes on it.
- LI.updateUnloop(L);
+ LI.markAsRemoved(L);
}
++NumExtracted;
}
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9876efa7b235..faada9c2a7db 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -251,7 +251,7 @@ void PassManagerBuilder::populateModulePassManager(
Inliner = nullptr;
}
if (!DisableUnitAtATime)
- MPM.add(createFunctionAttrsPass()); // Set readonly/readnone attrs
+ MPM.add(createPostOrderFunctionAttrsPass());
if (OptLevel > 2)
MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
@@ -346,6 +346,9 @@ void PassManagerBuilder::populateModulePassManager(
// we must insert a no-op module pass to reset the pass manager.
MPM.add(createBarrierNoopPass());
+ if (!DisableUnitAtATime)
+ MPM.add(createReversePostOrderFunctionAttrsPass());
+
if (!DisableUnitAtATime && OptLevel > 1 && !PrepareForLTO) {
// Remove avail extern fns and globals definitions if we aren't
// compiling an object file for later LTO. For LTO we want to preserve
@@ -502,7 +505,8 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createIPSCCPPass());
// Now that we internalized some globals, see if we can hack on them!
- PM.add(createFunctionAttrsPass()); // Add norecurse if possible.
+ PM.add(createPostOrderFunctionAttrsPass());
+ PM.add(createReversePostOrderFunctionAttrsPass());
PM.add(createGlobalOptimizerPass());
// Promote any localized global vars.
PM.add(createPromoteMemoryToRegisterPass());
@@ -551,7 +555,7 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
PM.add(createScalarReplAggregatesPass());
// Run a few AA driven optimizations here and now, to cleanup the code.
- PM.add(createFunctionAttrsPass()); // Add nocapture.
+ PM.add(createPostOrderFunctionAttrsPass()); // Add nocapture.
PM.add(createGlobalsAAWrapperPass()); // IP alias analysis.
PM.add(createLICMPass()); // Hoist loop invariants.
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 7ad0efc42fb4..160792b0a000 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -636,7 +636,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
// if pattern detected emit alternate sequence
if (OpX && OpY) {
BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->SetFastMathFlags(Log2->getFastMathFlags());
+ Builder->setFastMathFlags(Log2->getFastMathFlags());
Log2->setArgOperand(0, OpY);
Value *FMulVal = Builder->CreateFMul(OpX, Log2);
Value *FSub = Builder->CreateFSub(FMulVal, OpX);
@@ -652,7 +652,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
bool IgnoreZeroSign = I.hasNoSignedZeros();
if (BinaryOperator::isFNeg(Opnd0, IgnoreZeroSign)) {
BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->SetFastMathFlags(I.getFastMathFlags());
+ Builder->setFastMathFlags(I.getFastMathFlags());
Value *N0 = dyn_castFNegVal(Opnd0, IgnoreZeroSign);
Value *N1 = dyn_castFNegVal(Opnd1, IgnoreZeroSign);
@@ -693,7 +693,7 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
if (Y) {
BuilderTy::FastMathFlagGuard Guard(*Builder);
- Builder->SetFastMathFlags(I.getFastMathFlags());
+ Builder->setFastMathFlags(I.getFastMathFlags());
Value *T = Builder->CreateFMul(Opnd1, Opnd1);
Value *R = Builder->CreateFMul(T, Y);
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 776704d1efa9..51219bcb0b7b 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -930,7 +930,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
- Builder->SetFastMathFlags(FCI->getFastMathFlags());
+ Builder->setFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, TrueVal, FalseVal,
FCI->getName() + ".inv");
@@ -973,7 +973,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
if (FCI->hasOneUse() && FCmpInst::isUnordered(FCI->getPredicate())) {
FCmpInst::Predicate InvPred = FCI->getInversePredicate();
IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
- Builder->SetFastMathFlags(FCI->getFastMathFlags());
+ Builder->setFastMathFlags(FCI->getFastMathFlags());
Value *NewCond = Builder->CreateFCmp(InvPred, FalseVal, TrueVal,
FCI->getName() + ".inv");
@@ -1082,7 +1082,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
} else {
IRBuilder<>::FastMathFlagGuard FMFG(*Builder);
auto FMF = cast<FPMathOperator>(SI.getCondition())->getFastMathFlags();
- Builder->SetFastMathFlags(FMF);
+ Builder->setFastMathFlags(FMF);
Cmp = Builder->CreateFCmp(Pred, LHS, RHS);
}
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 54a9fbdbe82e..5cde31a9162e 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -384,23 +384,20 @@ static void replaceExtractElements(InsertElementInst *InsElt,
ConstantVector::get(ExtendMask));
// Insert the new shuffle after the vector operand of the extract is defined
- // or at the start of the basic block, so any subsequent extracts can use it.
- bool ReplaceAllExtUsers;
- if (auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp)) {
+ // (as long as it's not a PHI) or at the start of the basic block of the
+ // extract, so any subsequent extracts in the same basic block can use it.
+ // TODO: Insert before the earliest ExtractElementInst that is replaced.
+ auto *ExtVecOpInst = dyn_cast<Instruction>(ExtVecOp);
+ if (ExtVecOpInst && !isa<PHINode>(ExtVecOpInst))
WideVec->insertAfter(ExtVecOpInst);
- ReplaceAllExtUsers = true;
- } else {
- // TODO: Insert at start of function, so it's always safe to replace all?
+ else
IC.InsertNewInstWith(WideVec, *ExtElt->getParent()->getFirstInsertionPt());
- ReplaceAllExtUsers = false;
- }
// Replace extracts from the original narrow vector with extracts from the new
// wide vector.
for (User *U : ExtVecOp->users()) {
ExtractElementInst *OldExt = dyn_cast<ExtractElementInst>(U);
- if (!OldExt ||
- (!ReplaceAllExtUsers && OldExt->getParent() != WideVec->getParent()))
+ if (!OldExt || OldExt->getParent() != WideVec->getParent())
continue;
auto *NewExt = ExtractElementInst::Create(WideVec, OldExt->getOperand(1));
NewExt->insertAfter(WideVec);
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 51ff95d9a74c..28483e7e9b69 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -93,8 +93,8 @@ private:
/// Replace instrprof_increment with an increment of the appropriate value.
void lowerIncrement(InstrProfIncrementInst *Inc);
- /// Set up the section and uses for coverage data and its references.
- void lowerCoverageData(GlobalVariable *CoverageData);
+ /// Force emitting of name vars for unused functions.
+ void lowerCoverageData(GlobalVariable *CoverageNamesVar);
/// Get the region counters for an increment, creating them if necessary.
///
@@ -156,9 +156,9 @@ bool InstrProfiling::runOnModule(Module &M) {
}
}
- if (GlobalVariable *Coverage =
- M.getNamedGlobal(getCoverageMappingVarName())) {
- lowerCoverageData(Coverage);
+ if (GlobalVariable *CoverageNamesVar =
+ M.getNamedGlobal(getCoverageNamesVarName())) {
+ lowerCoverageData(CoverageNamesVar);
MadeChange = true;
}
@@ -233,28 +233,16 @@ void InstrProfiling::lowerIncrement(InstrProfIncrementInst *Inc) {
Inc->eraseFromParent();
}
-void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageData) {
-
- Constant *Init = CoverageData->getInitializer();
- // We're expecting { [4 x 32], [n x { i8*, i32, i32 }], [m x i8] }
- // for some C. If not, the frontend's given us something broken.
- assert(Init->getNumOperands() == 3 && "bad number of fields in coverage map");
- assert(isa<ConstantArray>(Init->getAggregateElement(1)) &&
- "invalid function list in coverage map");
- ConstantArray *Records = cast<ConstantArray>(Init->getAggregateElement(1));
- for (unsigned I = 0, E = Records->getNumOperands(); I < E; ++I) {
- Constant *Record = Records->getOperand(I);
- Value *V = const_cast<Value *>(Record->getOperand(0))->stripPointerCasts();
+void InstrProfiling::lowerCoverageData(GlobalVariable *CoverageNamesVar) {
+ ConstantArray *Names =
+ cast<ConstantArray>(CoverageNamesVar->getInitializer());
+ for (unsigned I = 0, E = Names->getNumOperands(); I < E; ++I) {
+ Constant *NC = Names->getOperand(I);
+ Value *V = NC->stripPointerCasts();
assert(isa<GlobalVariable>(V) && "Missing reference to function name");
GlobalVariable *Name = cast<GlobalVariable>(V);
- // If we have region counters for this name, we've already handled it.
- auto It = ProfileDataMap.find(Name);
- if (It != ProfileDataMap.end())
- if (It->second.RegionCounters)
- continue;
-
// Move the name variable to the right section.
Name->setSection(getNameSection());
Name->setAlignment(1);
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 5a7bce5a5413..34aaa7f27d6e 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -692,7 +692,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
const DataLayout &DL = F.getParent()->getDataLayout();
unsigned OriginAlignment = std::max(kMinOriginAlignment, Alignment);
unsigned StoreSize = DL.getTypeStoreSize(Shadow->getType());
- if (isa<StructType>(Shadow->getType())) {
+ if (Shadow->getType()->isAggregateType()) {
paintOrigin(IRB, updateOrigin(Origin, IRB),
getOriginPtr(Addr, IRB, Alignment), StoreSize,
OriginAlignment);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 087ce8ac50d4..dcdcfed66e64 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -100,9 +100,9 @@ namespace {
std::unique_ptr<BranchProbabilityInfo> BPI;
bool HasProfileData;
#ifdef NDEBUG
- SmallPtrSet<BasicBlock*, 16> LoopHeaders;
+ SmallPtrSet<const BasicBlock *, 16> LoopHeaders;
#else
- SmallSet<AssertingVH<BasicBlock>, 16> LoopHeaders;
+ SmallSet<AssertingVH<const BasicBlock>, 16> LoopHeaders;
#endif
DenseSet<std::pair<Value*, BasicBlock*> > RecursionSet;
@@ -163,6 +163,7 @@ namespace {
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
bool TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB);
+ bool TryToUnfoldSelectInCurrBB(BasicBlock *BB);
private:
BasicBlock *SplitBlockPreds(BasicBlock *BB, ArrayRef<BasicBlock *> Preds,
@@ -210,11 +211,12 @@ bool JumpThreading::runOnFunction(Function &F) {
// we will loop forever. We take care of this issue by not jump threading for
// back edges. This works for normal cases but not for unreachable blocks as
// they may have cycle with no back edge.
- removeUnreachableBlocks(F);
+ bool EverChanged = false;
+ EverChanged |= removeUnreachableBlocks(F, LVI);
FindLoopHeaders(F);
- bool Changed, EverChanged = false;
+ bool Changed;
do {
Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
@@ -363,8 +365,8 @@ void JumpThreading::FindLoopHeaders(Function &F) {
SmallVector<std::pair<const BasicBlock*,const BasicBlock*>, 32> Edges;
FindFunctionBackedges(F, Edges);
- for (unsigned i = 0, e = Edges.size(); i != e; ++i)
- LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
+ for (const auto &Edge : Edges)
+ LoopHeaders.insert(Edge.second);
}
/// getKnownConstant - Helper method to determine if we can thread over a
@@ -410,8 +412,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If V is a constant, then it is known in all predecessors.
if (Constant *KC = getKnownConstant(V, Preference)) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(KC, *PI));
+ for (BasicBlock *Pred : predecessors(BB))
+ Result.push_back(std::make_pair(KC, Pred));
return true;
}
@@ -434,8 +436,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
// Perhaps getConstantOnEdge should be smart enough to do this?
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
Constant *PredCst = LVI->getConstantOnEdge(V, P, BB, CxtI);
@@ -491,22 +492,17 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// Scan for the sentinel. If we find an undef, force it to the
// interesting value: x|undef -> true and x&undef -> false.
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
- if (LHSVals[i].first == InterestingVal ||
- isa<UndefValue>(LHSVals[i].first)) {
- Result.push_back(LHSVals[i]);
- Result.back().first = InterestingVal;
- LHSKnownBBs.insert(LHSVals[i].second);
+ for (const auto &LHSVal : LHSVals)
+ if (LHSVal.first == InterestingVal || isa<UndefValue>(LHSVal.first)) {
+ Result.emplace_back(InterestingVal, LHSVal.second);
+ LHSKnownBBs.insert(LHSVal.second);
}
- for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
- if (RHSVals[i].first == InterestingVal ||
- isa<UndefValue>(RHSVals[i].first)) {
+ for (const auto &RHSVal : RHSVals)
+ if (RHSVal.first == InterestingVal || isa<UndefValue>(RHSVal.first)) {
// If we already inferred a value for this block on the LHS, don't
// re-add it.
- if (!LHSKnownBBs.count(RHSVals[i].second)) {
- Result.push_back(RHSVals[i]);
- Result.back().first = InterestingVal;
- }
+ if (!LHSKnownBBs.count(RHSVal.second))
+ Result.emplace_back(InterestingVal, RHSVal.second);
}
return !Result.empty();
@@ -522,8 +518,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
return false;
// Invert the known values.
- for (unsigned i = 0, e = Result.size(); i != e; ++i)
- Result[i].first = ConstantExpr::getNot(Result[i].first);
+ for (auto &R : Result)
+ R.first = ConstantExpr::getNot(R.first);
return true;
}
@@ -538,12 +534,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
WantInteger, CxtI);
// Try to use constant folding to simplify the binary operator.
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first;
+ for (const auto &LHSVal : LHSVals) {
+ Constant *V = LHSVal.first;
Constant *Folded = ConstantExpr::get(BO->getOpcode(), V, CI);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ Result.push_back(std::make_pair(KC, LHSVal.second));
}
}
@@ -591,8 +587,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
cast<Instruction>(Cmp->getOperand(0))->getParent() != BB) {
Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB);PI != E; ++PI){
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(BB)) {
// If the value is known by LazyValueInfo to be a constant in a
// predecessor, use that information to try to thread this block.
LazyValueInfo::Tristate Res =
@@ -615,12 +610,12 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals,
WantInteger, CxtI);
- for (unsigned i = 0, e = LHSVals.size(); i != e; ++i) {
- Constant *V = LHSVals[i].first;
+ for (const auto &LHSVal : LHSVals) {
+ Constant *V = LHSVal.first;
Constant *Folded = ConstantExpr::getCompare(Cmp->getPredicate(),
V, CmpConst);
if (Constant *KC = getKnownConstant(Folded, WantInteger))
- Result.push_back(std::make_pair(KC, LHSVals[i].second));
+ Result.push_back(std::make_pair(KC, LHSVal.second));
}
return !Result.empty();
@@ -637,8 +632,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
if ((TrueVal || FalseVal) &&
ComputeValueKnownInPredecessors(SI->getCondition(), BB, Conds,
WantInteger, CxtI)) {
- for (unsigned i = 0, e = Conds.size(); i != e; ++i) {
- Constant *Cond = Conds[i].first;
+ for (auto &C : Conds) {
+ Constant *Cond = C.first;
// Figure out what value to use for the condition.
bool KnownCond;
@@ -655,7 +650,7 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// See if the select has a known constant value for this predecessor.
if (Constant *Val = KnownCond ? TrueVal : FalseVal)
- Result.push_back(std::make_pair(Val, Conds[i].second));
+ Result.push_back(std::make_pair(Val, C.second));
}
return !Result.empty();
@@ -665,8 +660,8 @@ ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB, PredValueInfo &Result,
// If all else fails, see if LVI can figure out a constant value for us.
Constant *CI = LVI->getConstant(V, BB, CxtI);
if (Constant *KC = getKnownConstant(CI, Preference)) {
- for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
- Result.push_back(std::make_pair(KC, *PI));
+ for (BasicBlock *Pred : predecessors(BB))
+ Result.push_back(std::make_pair(KC, Pred));
}
return !Result.empty();
@@ -736,6 +731,9 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
}
+ if (TryToUnfoldSelectInCurrBB(BB))
+ return true;
+
// What kind of constant we're looking for.
ConstantPreference Preference = WantInteger;
@@ -988,10 +986,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// If we got here, the loaded value is transparent through to the start of the
// block. Check to see if it is available in any of the predecessor blocks.
- for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI) {
- BasicBlock *PredBB = *PI;
-
+ for (BasicBlock *PredBB : predecessors(LoadBB)) {
// If we already scanned this predecessor, skip it.
if (!PredsScanned.insert(PredBB).second)
continue;
@@ -1038,13 +1033,11 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
SmallVector<BasicBlock*, 8> PredsToSplit;
SmallPtrSet<BasicBlock*, 8> AvailablePredSet;
- for (unsigned i = 0, e = AvailablePreds.size(); i != e; ++i)
- AvailablePredSet.insert(AvailablePreds[i].first);
+ for (const auto &AvailablePred : AvailablePreds)
+ AvailablePredSet.insert(AvailablePred.first);
// Add all the unavailable predecessors to the PredsToSplit list.
- for (pred_iterator PI = pred_begin(LoadBB), PE = pred_end(LoadBB);
- PI != PE; ++PI) {
- BasicBlock *P = *PI;
+ for (BasicBlock *P : predecessors(LoadBB)) {
// If the predecessor is an indirect goto, we can't split the edge.
if (isa<IndirectBrInst>(P->getTerminator()))
return false;
@@ -1129,9 +1122,9 @@ FindMostPopularDest(BasicBlock *BB,
// blocks with known and real destinations to threading undef. We'll handle
// them later if interesting.
DenseMap<BasicBlock*, unsigned> DestPopularity;
- for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
- if (PredToDestList[i].second)
- DestPopularity[PredToDestList[i].second]++;
+ for (const auto &PredToDest : PredToDestList)
+ if (PredToDest.second)
+ DestPopularity[PredToDest.second]++;
// Find the most popular dest.
DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
@@ -1194,10 +1187,10 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
"ComputeValueKnownInPredecessors returned true with no values");
DEBUG(dbgs() << "IN BB: " << *BB;
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ for (const auto &PredValue : PredValues) {
dbgs() << " BB '" << BB->getName() << "': FOUND condition = "
- << *PredValues[i].first
- << " for pred '" << PredValues[i].second->getName() << "'.\n";
+ << *PredValue.first
+ << " for pred '" << PredValue.second->getName() << "'.\n";
});
// Decide what we want to thread through. Convert our list of known values to
@@ -1210,8 +1203,8 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
BasicBlock *OnlyDest = nullptr;
BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
- for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
- BasicBlock *Pred = PredValues[i].second;
+ for (const auto &PredValue : PredValues) {
+ BasicBlock *Pred = PredValue.second;
if (!SeenPreds.insert(Pred).second)
continue; // Duplicate predecessor entry.
@@ -1220,7 +1213,7 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
if (isa<IndirectBrInst>(Pred->getTerminator()))
continue;
- Constant *Val = PredValues[i].first;
+ Constant *Val = PredValue.first;
BasicBlock *DestBB;
if (isa<UndefValue>(Val))
@@ -1260,16 +1253,15 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB,
// Now that we know what the most popular destination is, factor all
// predecessors that will jump to it into a single predecessor.
SmallVector<BasicBlock*, 16> PredsToFactor;
- for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
- if (PredToDestList[i].second == MostPopularDest) {
- BasicBlock *Pred = PredToDestList[i].first;
+ for (const auto &PredToDest : PredToDestList)
+ if (PredToDest.second == MostPopularDest) {
+ BasicBlock *Pred = PredToDest.first;
// This predecessor may be a switch or something else that has multiple
// edges to the block. Factor each of these edges by listing them
// according to # occurrences in PredsToFactor.
- TerminatorInst *PredTI = Pred->getTerminator();
- for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
- if (PredTI->getSuccessor(i) == BB)
+ for (BasicBlock *Succ : successors(Pred))
+ if (Succ == BB)
PredsToFactor.push_back(Pred);
}
@@ -1366,11 +1358,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Scan the information to see which is most popular: true or false. The
// predecessors can be of the set true, false, or undef.
unsigned NumTrue = 0, NumFalse = 0;
- for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (isa<UndefValue>(XorOpValues[i].first))
+ for (const auto &XorOpValue : XorOpValues) {
+ if (isa<UndefValue>(XorOpValue.first))
// Ignore undefs for the count.
continue;
- if (cast<ConstantInt>(XorOpValues[i].first)->isZero())
+ if (cast<ConstantInt>(XorOpValue.first)->isZero())
++NumFalse;
else
++NumTrue;
@@ -1386,12 +1378,11 @@ bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
// Collect all of the blocks that this can be folded into so that we can
// factor this once and clone it once.
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
- for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
- if (XorOpValues[i].first != SplitVal &&
- !isa<UndefValue>(XorOpValues[i].first))
+ for (const auto &XorOpValue : XorOpValues) {
+ if (XorOpValue.first != SplitVal && !isa<UndefValue>(XorOpValue.first))
continue;
- BlocksToFoldInto.push_back(XorOpValues[i].second);
+ BlocksToFoldInto.push_back(XorOpValue.second);
}
// If we inferred a value for all of the predecessors, then duplication won't
@@ -1543,10 +1534,10 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Use &U : I->uses()) {
+ for (Use &U : I.uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(U) == BB)
@@ -1561,14 +1552,14 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
if (UsesToRename.empty())
continue;
- DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, &*I);
- SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&*I]);
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(NewBB, ValueMapping[&I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1644,10 +1635,10 @@ void JumpThreading::UpdateBlockFreqAndEdgeWeight(BasicBlock *PredBB,
// Collect updated outgoing edges' frequencies from BB and use them to update
// edge probabilities.
SmallVector<uint64_t, 4> BBSuccFreq;
- for (auto I = succ_begin(BB), E = succ_end(BB); I != E; ++I) {
- auto SuccFreq = (*I == SuccBB)
+ for (BasicBlock *Succ : successors(BB)) {
+ auto SuccFreq = (Succ == SuccBB)
? BB2SuccBBFreq - NewBBFreq
- : BBOrigFreq * BPI->getEdgeProbability(BB, *I);
+ : BBOrigFreq * BPI->getEdgeProbability(BB, Succ);
BBSuccFreq.push_back(SuccFreq.getFrequency());
}
@@ -1783,10 +1774,10 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// PHI insertion, of which we are prepared to do, clean these up now.
SSAUpdater SSAUpdate;
SmallVector<Use*, 16> UsesToRename;
- for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
+ for (Instruction &I : *BB) {
// Scan all uses of this instruction to see if it is used outside of its
// block, and if so, record them in UsesToRename.
- for (Use &U : I->uses()) {
+ for (Use &U : I.uses()) {
Instruction *User = cast<Instruction>(U.getUser());
if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
if (UserPN->getIncomingBlock(U) == BB)
@@ -1801,14 +1792,14 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
if (UsesToRename.empty())
continue;
- DEBUG(dbgs() << "JT: Renaming non-local uses of: " << *I << "\n");
+ DEBUG(dbgs() << "JT: Renaming non-local uses of: " << I << "\n");
// We found a use of I outside of BB. Rename all uses of I that are outside
// its block to be uses of the appropriate PHI node etc. See ValuesInBlocks
// with the two values we know.
- SSAUpdate.Initialize(I->getType(), I->getName());
- SSAUpdate.AddAvailableValue(BB, &*I);
- SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&*I]);
+ SSAUpdate.Initialize(I.getType(), I.getName());
+ SSAUpdate.AddAvailableValue(BB, &I);
+ SSAUpdate.AddAvailableValue(PredBB, ValueMapping[&I]);
while (!UsesToRename.empty())
SSAUpdate.RewriteUse(*UsesToRename.pop_back_val());
@@ -1903,3 +1894,62 @@ bool JumpThreading::TryToUnfoldSelect(CmpInst *CondCmp, BasicBlock *BB) {
}
return false;
}
+
+/// TryToUnfoldSelectInCurrBB - Look for PHI/Select in the same BB of the form
+/// bb:
+/// %p = phi [false, %bb1], [true, %bb2], [false, %bb3], [true, %bb4], ...
+/// %s = select p, trueval, falseval
+///
+/// And expand the select into a branch structure. This later enables
+/// jump-threading over bb in this pass.
+///
+/// Using the similar approach of SimplifyCFG::FoldCondBranchOnPHI(), unfold
+/// select if the associated PHI has at least one constant. If the unfolded
+/// select is not jump-threaded, it will be folded again in the later
+/// optimizations.
+bool JumpThreading::TryToUnfoldSelectInCurrBB(BasicBlock *BB) {
+ // If threading this would thread across a loop header, don't thread the edge.
+ // See the comments above FindLoopHeaders for justifications and caveats.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ // Look for a Phi/Select pair in the same basic block. The Phi feeds the
+ // condition of the Select and at least one of the incoming values is a
+ // constant.
+ for (BasicBlock::iterator BI = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(BI); ++BI) {
+ unsigned NumPHIValues = PN->getNumIncomingValues();
+ if (NumPHIValues == 0 || !PN->hasOneUse())
+ continue;
+
+ SelectInst *SI = dyn_cast<SelectInst>(PN->user_back());
+ if (!SI || SI->getParent() != BB)
+ continue;
+
+ Value *Cond = SI->getCondition();
+ if (!Cond || Cond != PN || !Cond->getType()->isIntegerTy(1))
+ continue;
+
+ bool HasConst = false;
+ for (unsigned i = 0; i != NumPHIValues; ++i) {
+ if (PN->getIncomingBlock(i) == BB)
+ return false;
+ if (isa<ConstantInt>(PN->getIncomingValue(i)))
+ HasConst = true;
+ }
+
+ if (HasConst) {
+ // Expand the select.
+ TerminatorInst *Term =
+ SplitBlockAndInsertIfThen(SI->getCondition(), SI, false);
+ PHINode *NewPN = PHINode::Create(SI->getType(), 2, "", SI);
+ NewPN->addIncoming(SI->getTrueValue(), Term->getParent());
+ NewPN->addIncoming(SI->getFalseValue(), BB);
+ SI->replaceAllUsesWith(NewPN);
+ SI->eraseFromParent();
+ return true;
+ }
+ }
+
+ return false;
+}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index e01e23f71732..8923ff74253c 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -203,9 +203,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
CurAST = new AliasSetTracker(*AA);
// Collect Alias info from subloops.
- for (Loop::iterator LoopItr = L->begin(), LoopItrE = L->end();
- LoopItr != LoopItrE; ++LoopItr) {
- Loop *InnerL = *LoopItr;
+ for (Loop *InnerL : L->getSubLoops()) {
AliasSetTracker *InnerAST = LoopToAliasSetMap[InnerL];
assert(InnerAST && "Where is my AST?");
@@ -227,9 +225,7 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Because subloops have already been incorporated into AST, we skip blocks in
// subloops.
//
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
+ for (BasicBlock *BB : L->blocks()) {
if (LI->getLoopFor(BB) == L) // Ignore blocks in subloops.
CurAST->add(*BB); // Incorporate the specified basic block
}
@@ -263,9 +259,8 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
PredIteratorCache PIC;
// Loop over all of the alias sets in the tracker object.
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I)
- Changed |= promoteLoopAccessesToScalars(*I, ExitBlocks, InsertPts,
+ for (AliasSet &AS : *CurAST)
+ Changed |= promoteLoopAccessesToScalars(AS, ExitBlocks, InsertPts,
PIC, LI, DT, CurLoop,
CurAST, &SafetyInfo);
@@ -324,9 +319,9 @@ bool llvm::sinkRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
// We are processing blocks in reverse dfo, so process children first.
const std::vector<DomTreeNode*> &Children = N->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |=
- sinkRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+ for (DomTreeNode *Child : Children)
+ Changed |= sinkRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+
// Only need to process the contents of this block if it is not part of a
// subloop (which would already have been processed).
if (inSubLoop(BB,CurLoop,LI)) return Changed;
@@ -407,9 +402,8 @@ bool llvm::hoistRegion(DomTreeNode *N, AliasAnalysis *AA, LoopInfo *LI,
}
const std::vector<DomTreeNode*> &Children = N->getChildren();
- for (unsigned i = 0, e = Children.size(); i != e; ++i)
- Changed |=
- hoistRegion(Children[i], AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
+ for (DomTreeNode *Child : Children)
+ Changed |= hoistRegion(Child, AA, LI, DT, TLI, CurLoop, CurAST, SafetyInfo);
return Changed;
}
@@ -499,9 +493,7 @@ bool canSinkOrHoistInst(Instruction &I, AliasAnalysis *AA, DominatorTree *DT,
// If this call only reads from memory and there are no writes to memory
// in the loop, we can hoist or sink the call as appropriate.
bool FoundMod = false;
- for (AliasSetTracker::iterator I = CurAST->begin(), E = CurAST->end();
- I != E; ++I) {
- AliasSet &AS = *I;
+ for (AliasSet &AS : *CurAST) {
if (!AS.isForwardingAliasSet() && AS.isMod()) {
FoundMod = true;
break;
@@ -783,8 +775,8 @@ static bool isGuaranteedToExecute(const Instruction &Inst,
CurLoop->getExitBlocks(ExitBlocks);
// Verify that the block dominates each of the exit blocks of the loop.
- for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i)
- if (!DT->dominates(Inst.getParent(), ExitBlocks[i]))
+ for (BasicBlock *ExitBlock : ExitBlocks)
+ if (!DT->dominates(Inst.getParent(), ExitBlock))
return false;
// As a degenerate case, if the loop is statically infinite then we haven't
@@ -951,17 +943,17 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// If there is an non-load/store instruction in the loop, we can't promote
// it.
- if (const LoadInst *load = dyn_cast<LoadInst>(UI)) {
- assert(!load->isVolatile() && "AST broken");
- if (!load->isSimple())
+ if (const LoadInst *Load = dyn_cast<LoadInst>(UI)) {
+ assert(!Load->isVolatile() && "AST broken");
+ if (!Load->isSimple())
return Changed;
- } else if (const StoreInst *store = dyn_cast<StoreInst>(UI)) {
+ } else if (const StoreInst *Store = dyn_cast<StoreInst>(UI)) {
// Stores *of* the pointer are not interesting, only stores *to* the
// pointer.
if (UI->getOperand(1) != ASIV)
continue;
- assert(!store->isVolatile() && "AST broken");
- if (!store->isSimple())
+ assert(!Store->isVolatile() && "AST broken");
+ if (!Store->isSimple())
return Changed;
// Don't sink stores from loops without dedicated block exits. Exits
// containing indirect branches are not transformed by loop simplify,
@@ -979,7 +971,7 @@ bool llvm::promoteLoopAccessesToScalars(AliasSet &AS,
// restrictive (and performant) alignment and if we are sure this
// instruction will be executed, update the alignment.
// Larger is better, with the exception of 0 being the best alignment.
- unsigned InstAlignment = store->getAlignment();
+ unsigned InstAlignment = Store->getAlignment();
if ((InstAlignment > Alignment || InstAlignment == 0) && Alignment != 0)
if (isGuaranteedToExecute(*UI, DT, CurLoop, SafetyInfo)) {
GuaranteedToExecute = true;
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index bc00ff3f3a42..7b1940b48c31 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -245,7 +245,7 @@ bool LoopDeletion::runOnLoop(Loop *L, LPPassManager &) {
loopInfo.removeBlock(BB);
// The last step is to update LoopInfo now that we've eliminated this loop.
- loopInfo.updateUnloop(L);
+ loopInfo.markAsRemoved(L);
Changed = true;
++NumDeleted;
diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp
index 56ae5c010411..ecef6dbe24e6 100644
--- a/lib/Transforms/Scalar/LoopUnrollPass.cpp
+++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp
@@ -39,16 +39,16 @@ using namespace llvm;
#define DEBUG_TYPE "loop-unroll"
static cl::opt<unsigned>
- UnrollThreshold("unroll-threshold", cl::init(150), cl::Hidden,
+ UnrollThreshold("unroll-threshold", cl::Hidden,
cl::desc("The baseline cost threshold for loop unrolling"));
static cl::opt<unsigned> UnrollPercentDynamicCostSavedThreshold(
- "unroll-percent-dynamic-cost-saved-threshold", cl::init(20), cl::Hidden,
+ "unroll-percent-dynamic-cost-saved-threshold", cl::Hidden,
cl::desc("The percentage of estimated dynamic cost which must be saved by "
"unrolling to allow unrolling up to the max threshold."));
static cl::opt<unsigned> UnrollDynamicCostSavingsDiscount(
- "unroll-dynamic-cost-savings-discount", cl::init(2000), cl::Hidden,
+ "unroll-dynamic-cost-savings-discount", cl::Hidden,
cl::desc("This is the amount discounted from the total unroll cost when "
"the unrolled form has a high dynamic cost savings (triggered by "
"the '-unroll-perecent-dynamic-cost-saved-threshold' flag)."));
@@ -59,17 +59,17 @@ static cl::opt<unsigned> UnrollMaxIterationsCountToAnalyze(
"iterations when checking full unroll profitability"));
static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::init(0), cl::Hidden,
+UnrollCount("unroll-count", cl::Hidden,
cl::desc("Use this unroll count for all loops including those with "
"unroll_count pragma values, for testing purposes"));
static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
+UnrollAllowPartial("unroll-allow-partial", cl::Hidden,
cl::desc("Allows loops to be partially unrolled until "
"-unroll-threshold loop size is reached."));
static cl::opt<bool>
-UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::init(false), cl::Hidden,
+UnrollRuntime("unroll-runtime", cl::ZeroOrMore, cl::Hidden,
cl::desc("Unroll loops with run-time trip counts"));
static cl::opt<unsigned>
@@ -77,182 +77,95 @@ PragmaUnrollThreshold("pragma-unroll-threshold", cl::init(16 * 1024), cl::Hidden
cl::desc("Unrolled size limit for loops with an unroll(full) or "
"unroll_count pragma."));
-namespace {
- class LoopUnroll : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopUnroll(int T = -1, int C = -1, int P = -1, int R = -1) : LoopPass(ID) {
- CurrentThreshold = (T == -1) ? UnrollThreshold : unsigned(T);
- CurrentPercentDynamicCostSavedThreshold =
- UnrollPercentDynamicCostSavedThreshold;
- CurrentDynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
- CurrentCount = (C == -1) ? UnrollCount : unsigned(C);
- CurrentAllowPartial = (P == -1) ? UnrollAllowPartial : (bool)P;
- CurrentRuntime = (R == -1) ? UnrollRuntime : (bool)R;
-
- UserThreshold = (T != -1) || (UnrollThreshold.getNumOccurrences() > 0);
- UserPercentDynamicCostSavedThreshold =
- (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0);
- UserDynamicCostSavingsDiscount =
- (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0);
- UserAllowPartial = (P != -1) ||
- (UnrollAllowPartial.getNumOccurrences() > 0);
- UserRuntime = (R != -1) || (UnrollRuntime.getNumOccurrences() > 0);
- UserCount = (C != -1) || (UnrollCount.getNumOccurrences() > 0);
-
- initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
- }
- /// A magic value for use with the Threshold parameter to indicate
- /// that the loop unroll should be performed regardless of how much
- /// code expansion would result.
- static const unsigned NoThreshold = UINT_MAX;
-
- // Threshold to use when optsize is specified (and there is no
- // explicit -unroll-threshold).
- static const unsigned OptSizeUnrollThreshold = 50;
-
- // Default unroll count for loops with run-time trip count if
- // -unroll-count is not set
- static const unsigned UnrollRuntimeCount = 8;
-
- unsigned CurrentCount;
- unsigned CurrentThreshold;
- unsigned CurrentPercentDynamicCostSavedThreshold;
- unsigned CurrentDynamicCostSavingsDiscount;
- bool CurrentAllowPartial;
- bool CurrentRuntime;
-
- // Flags for whether the 'current' settings are user-specified.
- bool UserCount;
- bool UserThreshold;
- bool UserPercentDynamicCostSavedThreshold;
- bool UserDynamicCostSavingsDiscount;
- bool UserAllowPartial;
- bool UserRuntime;
-
- bool runOnLoop(Loop *L, LPPassManager &) override;
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- ///
- void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<AssumptionCacheTracker>();
- AU.addRequired<DominatorTreeWrapperPass>();
- AU.addRequired<LoopInfoWrapperPass>();
- AU.addPreserved<LoopInfoWrapperPass>();
- AU.addRequiredID(LoopSimplifyID);
- AU.addPreservedID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addPreservedID(LCSSAID);
- AU.addRequired<ScalarEvolutionWrapperPass>();
- AU.addPreserved<ScalarEvolutionWrapperPass>();
- AU.addRequired<TargetTransformInfoWrapperPass>();
- // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
- // If loop unroll does not preserve dom info then LCSSA pass on next
- // loop will receive invalid dom info.
- // For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTreeWrapperPass>();
- AU.addPreserved<GlobalsAAWrapperPass>();
- }
+/// A magic value for use with the Threshold parameter to indicate
+/// that the loop unroll should be performed regardless of how much
+/// code expansion would result.
+static const unsigned NoThreshold = UINT_MAX;
- // Fill in the UnrollingPreferences parameter with values from the
- // TargetTransformationInfo.
- void getUnrollingPreferences(Loop *L, const TargetTransformInfo &TTI,
- TargetTransformInfo::UnrollingPreferences &UP) {
- UP.Threshold = CurrentThreshold;
- UP.PercentDynamicCostSavedThreshold =
- CurrentPercentDynamicCostSavedThreshold;
- UP.DynamicCostSavingsDiscount = CurrentDynamicCostSavingsDiscount;
- UP.OptSizeThreshold = OptSizeUnrollThreshold;
- UP.PartialThreshold = CurrentThreshold;
- UP.PartialOptSizeThreshold = OptSizeUnrollThreshold;
- UP.Count = CurrentCount;
- UP.MaxCount = UINT_MAX;
- UP.Partial = CurrentAllowPartial;
- UP.Runtime = CurrentRuntime;
- UP.AllowExpensiveTripCount = false;
- TTI.getUnrollingPreferences(L, UP);
- }
+/// Default unroll count for loops with run-time trip count if
+/// -unroll-count is not set
+static const unsigned DefaultUnrollRuntimeCount = 8;
- // Select and return an unroll count based on parameters from
- // user, unroll preferences, unroll pragmas, or a heuristic.
- // SetExplicitly is set to true if the unroll count is is set by
- // the user or a pragma rather than selected heuristically.
- unsigned
- selectUnrollCount(const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
- unsigned PragmaCount,
- const TargetTransformInfo::UnrollingPreferences &UP,
- bool &SetExplicitly);
-
- // Select threshold values used to limit unrolling based on a
- // total unrolled size. Parameters Threshold and PartialThreshold
- // are set to the maximum unrolled size for fully and partially
- // unrolled loops respectively.
- void selectThresholds(const Loop *L, bool UsePragmaThreshold,
- const TargetTransformInfo::UnrollingPreferences &UP,
- unsigned &Threshold, unsigned &PartialThreshold,
- unsigned &PercentDynamicCostSavedThreshold,
- unsigned &DynamicCostSavingsDiscount) {
- // Determine the current unrolling threshold. While this is
- // normally set from UnrollThreshold, it is overridden to a
- // smaller value if the current function is marked as
- // optimize-for-size, and the unroll threshold was not user
- // specified.
- Threshold = UserThreshold ? CurrentThreshold : UP.Threshold;
- PartialThreshold = UserThreshold ? CurrentThreshold : UP.PartialThreshold;
- PercentDynamicCostSavedThreshold =
- UserPercentDynamicCostSavedThreshold
- ? CurrentPercentDynamicCostSavedThreshold
- : UP.PercentDynamicCostSavedThreshold;
- DynamicCostSavingsDiscount = UserDynamicCostSavingsDiscount
- ? CurrentDynamicCostSavingsDiscount
- : UP.DynamicCostSavingsDiscount;
-
- if (!UserThreshold &&
- // FIXME: Use Function::optForSize().
- L->getHeader()->getParent()->hasFnAttribute(
- Attribute::OptimizeForSize)) {
- Threshold = UP.OptSizeThreshold;
- PartialThreshold = UP.PartialOptSizeThreshold;
- }
- if (UsePragmaThreshold) {
- // If the loop has an unrolling pragma, we want to be more
- // aggressive with unrolling limits. Set thresholds to at
- // least the PragmaTheshold value which is larger than the
- // default limits.
- if (Threshold != NoThreshold)
- Threshold = std::max<unsigned>(Threshold, PragmaUnrollThreshold);
- if (PartialThreshold != NoThreshold)
- PartialThreshold =
- std::max<unsigned>(PartialThreshold, PragmaUnrollThreshold);
- }
- }
- bool canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned PercentDynamicCostSavedThreshold,
- unsigned DynamicCostSavingsDiscount,
- uint64_t UnrolledCost, uint64_t RolledDynamicCost);
- };
-}
+/// Gather the various unrolling parameters based on the defaults, compiler
+/// flags, TTI overrides, pragmas, and user specified parameters.
+static TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(
+ Loop *L, const TargetTransformInfo &TTI, Optional<unsigned> UserThreshold,
+ Optional<unsigned> UserCount, Optional<bool> UserAllowPartial,
+ Optional<bool> UserRuntime, unsigned PragmaCount, bool PragmaFullUnroll,
+ bool PragmaEnableUnroll, unsigned TripCount) {
+ TargetTransformInfo::UnrollingPreferences UP;
-char LoopUnroll::ID = 0;
-INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
-INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
-INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
-INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
-INITIALIZE_PASS_DEPENDENCY(LCSSA)
-INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
-INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+ // Set up the defaults
+ UP.Threshold = 150;
+ UP.PercentDynamicCostSavedThreshold = 20;
+ UP.DynamicCostSavingsDiscount = 2000;
+ UP.OptSizeThreshold = 50;
+ UP.PartialThreshold = UP.Threshold;
+ UP.PartialOptSizeThreshold = UP.OptSizeThreshold;
+ UP.Count = 0;
+ UP.MaxCount = UINT_MAX;
+ UP.Partial = false;
+ UP.Runtime = false;
+ UP.AllowExpensiveTripCount = false;
+
+ // Override with any target specific settings
+ TTI.getUnrollingPreferences(L, UP);
+
+ // Apply size attributes
+ if (L->getHeader()->getParent()->optForSize()) {
+ UP.Threshold = UP.OptSizeThreshold;
+ UP.PartialThreshold = UP.PartialOptSizeThreshold;
+ }
-Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
- int Runtime) {
- return new LoopUnroll(Threshold, Count, AllowPartial, Runtime);
-}
+ // Apply unroll count pragmas
+ if (PragmaCount)
+ UP.Count = PragmaCount;
+ else if (PragmaFullUnroll)
+ UP.Count = TripCount;
-Pass *llvm::createSimpleLoopUnrollPass() {
- return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+ // Apply any user values specified by cl::opt
+ if (UnrollThreshold.getNumOccurrences() > 0) {
+ UP.Threshold = UnrollThreshold;
+ UP.PartialThreshold = UnrollThreshold;
+ }
+ if (UnrollPercentDynamicCostSavedThreshold.getNumOccurrences() > 0)
+ UP.PercentDynamicCostSavedThreshold =
+ UnrollPercentDynamicCostSavedThreshold;
+ if (UnrollDynamicCostSavingsDiscount.getNumOccurrences() > 0)
+ UP.DynamicCostSavingsDiscount = UnrollDynamicCostSavingsDiscount;
+ if (UnrollCount.getNumOccurrences() > 0)
+ UP.Count = UnrollCount;
+ if (UnrollAllowPartial.getNumOccurrences() > 0)
+ UP.Partial = UnrollAllowPartial;
+ if (UnrollRuntime.getNumOccurrences() > 0)
+ UP.Runtime = UnrollRuntime;
+
+ // Apply user values provided by argument
+ if (UserThreshold.hasValue()) {
+ UP.Threshold = *UserThreshold;
+ UP.PartialThreshold = *UserThreshold;
+ }
+ if (UserCount.hasValue())
+ UP.Count = *UserCount;
+ if (UserAllowPartial.hasValue())
+ UP.Partial = *UserAllowPartial;
+ if (UserRuntime.hasValue())
+ UP.Runtime = *UserRuntime;
+
+ if (PragmaCount > 0 ||
+ ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0)) {
+ // If the loop has an unrolling pragma, we want to be more aggressive with
+ // unrolling limits. Set thresholds to at least the PragmaTheshold value
+ // which is larger than the default limits.
+ if (UP.Threshold != NoThreshold)
+ UP.Threshold = std::max<unsigned>(UP.Threshold, PragmaUnrollThreshold);
+ if (UP.PartialThreshold != NoThreshold)
+ UP.PartialThreshold =
+ std::max<unsigned>(UP.PartialThreshold, PragmaUnrollThreshold);
+ }
+
+ return UP;
}
namespace {
@@ -793,12 +706,11 @@ static void SetLoopAlreadyUnrolled(Loop *L) {
L->setLoopID(NewLoopID);
}
-bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
- unsigned PercentDynamicCostSavedThreshold,
- unsigned DynamicCostSavingsDiscount,
- uint64_t UnrolledCost,
- uint64_t RolledDynamicCost) {
-
+static bool canUnrollCompletely(Loop *L, unsigned Threshold,
+ unsigned PercentDynamicCostSavedThreshold,
+ unsigned DynamicCostSavingsDiscount,
+ uint64_t UnrolledCost,
+ uint64_t RolledDynamicCost) {
if (Threshold == NoThreshold) {
DEBUG(dbgs() << " Can fully unroll, because no threshold is set.\n");
return true;
@@ -846,60 +758,13 @@ bool LoopUnroll::canUnrollCompletely(Loop *L, unsigned Threshold,
return false;
}
-unsigned LoopUnroll::selectUnrollCount(
- const Loop *L, unsigned TripCount, bool PragmaFullUnroll,
- unsigned PragmaCount, const TargetTransformInfo::UnrollingPreferences &UP,
- bool &SetExplicitly) {
- SetExplicitly = true;
-
- // User-specified count (either as a command-line option or
- // constructor parameter) has highest precedence.
- unsigned Count = UserCount ? CurrentCount : 0;
-
- // If there is no user-specified count, unroll pragmas have the next
- // highest precedence.
- if (Count == 0) {
- if (PragmaCount) {
- Count = PragmaCount;
- } else if (PragmaFullUnroll) {
- Count = TripCount;
- }
- }
-
- if (Count == 0)
- Count = UP.Count;
-
- if (Count == 0) {
- SetExplicitly = false;
- if (TripCount == 0)
- // Runtime trip count.
- Count = UnrollRuntimeCount;
- else
- // Conservative heuristic: if we know the trip count, see if we can
- // completely unroll (subject to the threshold, checked below); otherwise
- // try to find greatest modulo of the trip count which is still under
- // threshold value.
- Count = TripCount;
- }
- if (TripCount && Count > TripCount)
- return TripCount;
- return Count;
-}
-
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
- if (skipOptnoneFunction(L))
- return false;
-
- Function &F = *L->getHeader()->getParent();
-
- auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- const TargetTransformInfo &TTI =
- getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
-
+static bool tryToUnrollLoop(Loop *L, DominatorTree &DT, LoopInfo *LI,
+ ScalarEvolution *SE, const TargetTransformInfo &TTI,
+ AssumptionCache &AC, bool PreserveLCSSA,
+ Optional<unsigned> ProvidedCount,
+ Optional<unsigned> ProvidedThreshold,
+ Optional<bool> ProvidedAllowPartial,
+ Optional<bool> ProvidedRuntime) {
BasicBlock *Header = L->getHeader();
DEBUG(dbgs() << "Loop Unroll: F[" << Header->getParent()->getName()
<< "] Loop %" << Header->getName() << "\n");
@@ -912,9 +777,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
unsigned PragmaCount = UnrollCountPragmaValue(L);
bool HasPragma = PragmaFullUnroll || PragmaEnableUnroll || PragmaCount > 0;
- TargetTransformInfo::UnrollingPreferences UP;
- getUnrollingPreferences(L, TTI, UP);
-
// Find trip count and trip multiple if count is not available
unsigned TripCount = 0;
unsigned TripMultiple = 1;
@@ -929,11 +791,18 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
TripMultiple = SE->getSmallConstantTripMultiple(L, ExitingBlock);
}
- // Select an initial unroll count. This may be reduced later based
- // on size thresholds.
- bool CountSetExplicitly;
- unsigned Count = selectUnrollCount(L, TripCount, PragmaFullUnroll,
- PragmaCount, UP, CountSetExplicitly);
+ TargetTransformInfo::UnrollingPreferences UP = gatherUnrollingPreferences(
+ L, TTI, ProvidedThreshold, ProvidedCount, ProvidedAllowPartial,
+ ProvidedRuntime, PragmaCount, PragmaFullUnroll, PragmaEnableUnroll,
+ TripCount);
+
+ unsigned Count = UP.Count;
+ bool CountSetExplicitly = Count != 0;
+ // Use a heuristic count if we didn't set anything explicitly.
+ if (!CountSetExplicitly)
+ Count = TripCount == 0 ? DefaultUnrollRuntimeCount : TripCount;
+ if (TripCount && Count > TripCount)
+ Count = TripCount;
unsigned NumInlineCandidates;
bool notDuplicatable;
@@ -955,21 +824,6 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
return false;
}
- unsigned Threshold, PartialThreshold;
- unsigned PercentDynamicCostSavedThreshold;
- unsigned DynamicCostSavingsDiscount;
- // Only use the high pragma threshold when we have a target unroll factor such
- // as with "#pragma unroll N" or a pragma indicating full unrolling and the
- // trip count is known. Otherwise we rely on the standard threshold to
- // heuristically select a reasonable unroll count.
- bool UsePragmaThreshold =
- PragmaCount > 0 ||
- ((PragmaFullUnroll || PragmaEnableUnroll) && TripCount != 0);
-
- selectThresholds(L, UsePragmaThreshold, UP, Threshold, PartialThreshold,
- PercentDynamicCostSavedThreshold,
- DynamicCostSavingsDiscount);
-
// Given Count, TripCount and thresholds determine the type of
// unrolling which is to be performed.
enum { Full = 0, Partial = 1, Runtime = 2 };
@@ -977,19 +831,20 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
if (TripCount && Count == TripCount) {
Unrolling = Partial;
// If the loop is really small, we don't need to run an expensive analysis.
- if (canUnrollCompletely(L, Threshold, 100, DynamicCostSavingsDiscount,
+ if (canUnrollCompletely(L, UP.Threshold, 100, UP.DynamicCostSavingsDiscount,
UnrolledSize, UnrolledSize)) {
Unrolling = Full;
} else {
// The loop isn't that small, but we still can fully unroll it if that
// helps to remove a significant number of instructions.
// To check that, run additional analysis on the loop.
- if (Optional<EstimatedUnrollCost> Cost =
- analyzeLoopUnrollCost(L, TripCount, DT, *SE, TTI,
- Threshold + DynamicCostSavingsDiscount))
- if (canUnrollCompletely(L, Threshold, PercentDynamicCostSavedThreshold,
- DynamicCostSavingsDiscount, Cost->UnrolledCost,
- Cost->RolledDynamicCost)) {
+ if (Optional<EstimatedUnrollCost> Cost = analyzeLoopUnrollCost(
+ L, TripCount, DT, *SE, TTI,
+ UP.Threshold + UP.DynamicCostSavingsDiscount))
+ if (canUnrollCompletely(L, UP.Threshold,
+ UP.PercentDynamicCostSavedThreshold,
+ UP.DynamicCostSavingsDiscount,
+ Cost->UnrolledCost, Cost->RolledDynamicCost)) {
Unrolling = Full;
}
}
@@ -1001,23 +856,22 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
// Reduce count based on the type of unrolling and the threshold values.
unsigned OriginalCount = Count;
- bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) ||
- (UserRuntime ? CurrentRuntime : UP.Runtime);
+ bool AllowRuntime = PragmaEnableUnroll || (PragmaCount > 0) || UP.Runtime;
// Don't unroll a runtime trip count loop with unroll full pragma.
if (HasRuntimeUnrollDisablePragma(L) || PragmaFullUnroll) {
AllowRuntime = false;
}
if (Unrolling == Partial) {
- bool AllowPartial = PragmaEnableUnroll ||
- (UserAllowPartial ? CurrentAllowPartial : UP.Partial);
+ bool AllowPartial = PragmaEnableUnroll || UP.Partial;
if (!AllowPartial && !CountSetExplicitly) {
DEBUG(dbgs() << " will not try to unroll partially because "
<< "-unroll-allow-partial not given\n");
return false;
}
- if (PartialThreshold != NoThreshold && UnrolledSize > PartialThreshold) {
+ if (UP.PartialThreshold != NoThreshold &&
+ UnrolledSize > UP.PartialThreshold) {
// Reduce unroll count to be modulo of TripCount for partial unrolling.
- Count = (std::max(PartialThreshold, 3u)-2) / (LoopSize-2);
+ Count = (std::max(UP.PartialThreshold, 3u) - 2) / (LoopSize - 2);
while (Count != 0 && TripCount % Count != 0)
Count--;
}
@@ -1029,7 +883,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
}
// Reduce unroll count to be the largest power-of-two factor of
// the original count which satisfies the threshold limit.
- while (Count != 0 && UnrolledSize > PartialThreshold) {
+ while (Count != 0 && UnrolledSize > UP.PartialThreshold) {
Count >>= 1;
UnrolledSize = (LoopSize-2) * Count + 2;
}
@@ -1086,3 +940,91 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &) {
return true;
}
+
+namespace {
+class LoopUnroll : public LoopPass {
+public:
+ static char ID; // Pass ID, replacement for typeid
+ LoopUnroll(Optional<unsigned> Threshold = None,
+ Optional<unsigned> Count = None,
+ Optional<bool> AllowPartial = None, Optional<bool> Runtime = None)
+ : LoopPass(ID), ProvidedCount(Count), ProvidedThreshold(Threshold),
+ ProvidedAllowPartial(AllowPartial), ProvidedRuntime(Runtime) {
+ initializeLoopUnrollPass(*PassRegistry::getPassRegistry());
+ }
+
+ Optional<unsigned> ProvidedCount;
+ Optional<unsigned> ProvidedThreshold;
+ Optional<bool> ProvidedAllowPartial;
+ Optional<bool> ProvidedRuntime;
+
+ bool runOnLoop(Loop *L, LPPassManager &) override {
+ if (skipOptnoneFunction(L))
+ return false;
+
+ Function &F = *L->getHeader()->getParent();
+
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ ScalarEvolution *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ const TargetTransformInfo &TTI =
+ getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ return tryToUnrollLoop(L, DT, LI, SE, TTI, AC, PreserveLCSSA, ProvidedCount,
+ ProvidedThreshold, ProvidedAllowPartial,
+ ProvidedRuntime);
+ }
+
+ /// This transformation requires natural loop information & requires that
+ /// loop preheaders be inserted into the CFG...
+ ///
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequiredID(LoopSimplifyID);
+ AU.addPreservedID(LoopSimplifyID);
+ AU.addRequiredID(LCSSAID);
+ AU.addPreservedID(LCSSAID);
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
+ // If loop unroll does not preserve dom info then LCSSA pass on next
+ // loop will receive invalid dom info.
+ // For now, recreate dom info, if loop is unrolled.
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addPreserved<GlobalsAAWrapperPass>();
+ }
+};
+}
+
+char LoopUnroll::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
+INITIALIZE_PASS_DEPENDENCY(LCSSA)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(LoopUnroll, "loop-unroll", "Unroll loops", false, false)
+
+Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial,
+ int Runtime) {
+ // TODO: It would make more sense for this function to take the optionals
+ // directly, but that's dangerous since it would silently break out of tree
+ // callers.
+ return new LoopUnroll(Threshold == -1 ? None : Optional<unsigned>(Threshold),
+ Count == -1 ? None : Optional<unsigned>(Count),
+ AllowPartial == -1 ? None
+ : Optional<bool>(AllowPartial),
+ Runtime == -1 ? None : Optional<bool>(Runtime));
+}
+
+Pass *llvm::createSimpleLoopUnrollPass() {
+ return llvm::createLoopUnrollPass(-1, -1, 0, 0);
+}
diff --git a/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 7354016c2122..6b43b0f7a2ad 100644
--- a/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -529,12 +529,13 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// We found an instruction that may write to the loaded memory.
// We can try to promote at this position instead of the store
- // position if nothing alias the store memory after this.
+ // position if nothing alias the store memory after this and the store
+ // destination is not in the range.
P = &*I;
for (; I != E; ++I) {
MemoryLocation StoreLoc = MemoryLocation::get(SI);
- if (AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
- DEBUG(dbgs() << "Alias " << *I << "\n");
+ if (&*I == SI->getOperand(1) ||
+ AA.getModRefInfo(&*I, StoreLoc) != MRI_NoModRef) {
P = nullptr;
break;
}
@@ -628,13 +629,39 @@ bool MemCpyOpt::processStore(StoreInst *SI, BasicBlock::iterator &BBI) {
// Ensure that the value being stored is something that can be memset'able a
// byte at a time like "0" or "-1" or any width, as well as things like
// 0xA0A0A0A0 and 0.0.
- if (Value *ByteVal = isBytewiseValue(SI->getOperand(0)))
+ auto *V = SI->getOperand(0);
+ if (Value *ByteVal = isBytewiseValue(V)) {
if (Instruction *I = tryMergingIntoMemset(SI, SI->getPointerOperand(),
ByteVal)) {
BBI = I->getIterator(); // Don't invalidate iterator.
return true;
}
+ // If we have an aggregate, we try to promote it to memset regardless
+ // of opportunity for merging as it can expose optimization opportunities
+ // in subsequent passes.
+ auto *T = V->getType();
+ if (T->isAggregateType()) {
+ uint64_t Size = DL.getTypeStoreSize(T);
+ unsigned Align = SI->getAlignment();
+ if (!Align)
+ Align = DL.getABITypeAlignment(T);
+ IRBuilder<> Builder(SI);
+ auto *M = Builder.CreateMemSet(SI->getPointerOperand(), ByteVal,
+ Size, Align, SI->isVolatile());
+
+ DEBUG(dbgs() << "Promoting " << *SI << " to " << *M << "\n");
+
+ MD->removeInstruction(SI);
+ SI->eraseFromParent();
+ NumMemSetInfer++;
+
+ // Make sure we do not invalidate the iterator.
+ BBI = M->getIterator();
+ return true;
+ }
+ }
+
return false;
}
diff --git a/lib/Transforms/Scalar/PlaceSafepoints.cpp b/lib/Transforms/Scalar/PlaceSafepoints.cpp
index 28c610c2486a..b56b35599120 100644
--- a/lib/Transforms/Scalar/PlaceSafepoints.cpp
+++ b/lib/Transforms/Scalar/PlaceSafepoints.cpp
@@ -499,7 +499,7 @@ static bool isGCSafepointPoll(Function &F) {
static bool shouldRewriteFunction(Function &F) {
// TODO: This should check the GCStrategy
if (F.hasGC()) {
- const char *FunctionGCName = F.getGC();
+ const auto &FunctionGCName = F.getGC();
const StringRef StatepointExampleName("statepoint-example");
const StringRef CoreCLRName("coreclr");
return (StatepointExampleName == FunctionGCName) ||
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 401a740856e9..bcadd4e2bee6 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -2155,7 +2155,8 @@ void Reassociate::OptimizeInst(Instruction *I) {
// During the initial run we will get to the root of the tree.
// But if we get here while we are redoing instructions, there is no
// guarantee that the root will be visited. So Redo later
- if (BO->user_back() != BO)
+ if (BO->user_back() != BO &&
+ BO->getParent() == BO->user_back()->getParent())
RedoInsts.insert(BO->user_back());
return;
}
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 5d253be1aa86..d77d5745e60c 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -78,6 +78,13 @@ static cl::opt<bool>
AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
cl::Hidden, cl::init(true));
+/// Should we split vectors of pointers into their individual elements? This
+/// is known to be buggy, but the alternate implementation isn't yet ready.
+/// This is purely to provide a debugging and dianostic hook until the vector
+/// split is replaced with vector relocations.
+static cl::opt<bool> UseVectorSplit("rs4gc-split-vector-values", cl::Hidden,
+ cl::init(true));
+
namespace {
struct RewriteStatepointsForGC : public ModulePass {
static char ID; // Pass identification, replacement for typeid
@@ -357,10 +364,6 @@ static BaseDefiningValueResult findBaseDefiningValue(Value *I);
/// particular element in 'I'.
static BaseDefiningValueResult
findBaseDefiningValueOfVector(Value *I) {
- assert(I->getType()->isVectorTy() &&
- cast<VectorType>(I->getType())->getElementType()->isPointerTy() &&
- "Illegal to ask for the base pointer of a non-pointer type");
-
// Each case parallels findBaseDefiningValue below, see that code for
// detailed motivation.
@@ -368,26 +371,10 @@ findBaseDefiningValueOfVector(Value *I) {
// An incoming argument to the function is a base pointer
return BaseDefiningValueResult(I, true);
- // We shouldn't see the address of a global as a vector value?
- assert(!isa<GlobalVariable>(I) &&
- "unexpected global variable found in base of vector");
-
- // inlining could possibly introduce phi node that contains
- // undef if callee has multiple returns
- if (isa<UndefValue>(I))
- // utterly meaningless, but useful for dealing with partially optimized
- // code.
+ if (isa<Constant>(I))
+ // Constant vectors consist only of constant pointers.
return BaseDefiningValueResult(I, true);
- // Due to inheritance, this must be _after_ the global variable and undef
- // checks
- if (Constant *Con = dyn_cast<Constant>(I)) {
- assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) &&
- "order of checks wrong!");
- assert(Con->isNullValue() && "null is the only case which makes sense");
- return BaseDefiningValueResult(Con, true);
- }
-
if (isa<LoadInst>(I))
return BaseDefiningValueResult(I, true);
@@ -417,11 +404,11 @@ findBaseDefiningValueOfVector(Value *I) {
/// (i.e. a PHI or Select of two derived pointers), or c) involves a change
/// from pointer to vector type or back.
static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
+ assert(I->getType()->isPtrOrPtrVectorTy() &&
+ "Illegal to ask for the base pointer of a non-pointer type");
+
if (I->getType()->isVectorTy())
return findBaseDefiningValueOfVector(I);
-
- assert(I->getType()->isPointerTy() &&
- "Illegal to ask for the base pointer of a non-pointer type");
if (isa<Argument>(I))
// An incoming argument to the function is a base pointer
@@ -1310,18 +1297,29 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
assert(Index < LiveVec.size() && "Bug in std::find?");
return Index;
};
-
- // All gc_relocate are set to i8 addrspace(1)* type. We originally generated
- // unique declarations for each pointer type, but this proved problematic
- // because the intrinsic mangling code is incomplete and fragile. Since
- // we're moving towards a single unified pointer type anyways, we can just
- // cast everything to an i8* of the right address space. A bitcast is added
- // later to convert gc_relocate to the actual value's type.
Module *M = StatepointToken->getModule();
- auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace();
- Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)};
- Value *GCRelocateDecl =
- Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types);
+
+ // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
+ // element type is i8 addrspace(1)*). We originally generated unique
+ // declarations for each pointer type, but this proved problematic because
+ // the intrinsic mangling code is incomplete and fragile. Since we're moving
+ // towards a single unified pointer type anyways, we can just cast everything
+ // to an i8* of the right address space. A bitcast is added later to convert
+ // gc_relocate to the actual value's type.
+ auto getGCRelocateDecl = [&] (Type *Ty) {
+ assert(isHandledGCPointerType(Ty));
+ auto AS = Ty->getScalarType()->getPointerAddressSpace();
+ Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
+ if (auto *VT = dyn_cast<VectorType>(Ty))
+ NewTy = VectorType::get(NewTy, VT->getNumElements());
+ return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate,
+ {NewTy});
+ };
+
+ // Lazily populated map from input types to the canonicalized form mentioned
+ // in the comment above. This should probably be cached somewhere more
+ // broadly.
+ DenseMap<Type*, Value*> TypeToDeclMap;
for (unsigned i = 0; i < LiveVariables.size(); i++) {
// Generate the gc.relocate call and save the result
@@ -1329,6 +1327,11 @@ static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
Value *LiveIdx = Builder.getInt32(LiveStart + i);
+ Type *Ty = LiveVariables[i]->getType();
+ if (!TypeToDeclMap.count(Ty))
+ TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
+ Value *GCRelocateDecl = TypeToDeclMap[Ty];
+
// only specify a debug name if we can give a useful one
CallInst *Reloc = Builder.CreateCall(
GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
@@ -2380,15 +2383,19 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// Do a limited scalarization of any live at safepoint vector values which
// contain pointers. This enables this pass to run after vectorization at
- // the cost of some possible performance loss. TODO: it would be nice to
- // natively support vectors all the way through the backend so we don't need
- // to scalarize here.
- for (size_t i = 0; i < Records.size(); i++) {
- PartiallyConstructedSafepointRecord &Info = Records[i];
- Instruction *Statepoint = ToUpdate[i].getInstruction();
- splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
- Info.PointerToBase, DT);
- }
+ // the cost of some possible performance loss. Note: This is known to not
+ // handle updating of the side tables correctly which can lead to relocation
+ // bugs when the same vector is live at multiple statepoints. We're in the
+ // process of implementing the alternate lowering - relocating the
+ // vector-of-pointers as first class item and updating the backend to
+ // understand that - but that's not yet complete.
+ if (UseVectorSplit)
+ for (size_t i = 0; i < Records.size(); i++) {
+ PartiallyConstructedSafepointRecord &Info = Records[i];
+ Instruction *Statepoint = ToUpdate[i].getInstruction();
+ splitVectorValues(cast<Instruction>(Statepoint), Info.LiveSet,
+ Info.PointerToBase, DT);
+ }
// In order to reduce live set of statepoint we might choose to rematerialize
// some values instead of relocating them. This is purely an optimization and
@@ -2467,7 +2474,8 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
#ifndef NDEBUG
// sanity check
for (auto *Ptr : Live)
- assert(isGCPointerType(Ptr->getType()) && "must be a gc pointer type");
+ assert(isHandledGCPointerType(Ptr->getType()) &&
+ "must be a gc pointer type");
#endif
relocationViaAlloca(F, DT, Live, Records);
@@ -2547,7 +2555,7 @@ void RewriteStatepointsForGC::stripNonValidAttributesFromBody(Function &F) {
static bool shouldRewriteStatepointsIn(Function &F) {
// TODO: This should check the GCStrategy
if (F.hasGC()) {
- const char *FunctionGCName = F.getGC();
+ const auto &FunctionGCName = F.getGC();
const StringRef StatepointExampleName("statepoint-example");
const StringRef CoreCLRName("coreclr");
return (StatepointExampleName == FunctionGCName) ||
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 2fca803adde8..8569e080873c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -757,9 +757,14 @@ void SCCPSolver::visitCastInst(CastInst &I) {
LatticeVal OpSt = getValueState(I.getOperand(0));
if (OpSt.isOverdefined()) // Inherit overdefinedness of operand
markOverdefined(&I);
- else if (OpSt.isConstant()) // Propagate constant value
- markConstant(&I, ConstantExpr::getCast(I.getOpcode(),
- OpSt.getConstant(), I.getType()));
+ else if (OpSt.isConstant()) {
+ Constant *C =
+ ConstantExpr::getCast(I.getOpcode(), OpSt.getConstant(), I.getType());
+ if (isa<UndefValue>(C))
+ return;
+ // Propagate constant value
+ markConstant(&I, C);
+ }
}
@@ -859,10 +864,14 @@ void SCCPSolver::visitBinaryOperator(Instruction &I) {
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
- if (V1State.isConstant() && V2State.isConstant())
- return markConstant(IV, &I,
- ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
- V2State.getConstant()));
+ if (V1State.isConstant() && V2State.isConstant()) {
+ Constant *C = ConstantExpr::get(I.getOpcode(), V1State.getConstant(),
+ V2State.getConstant());
+ // X op Y -> undef.
+ if (isa<UndefValue>(C))
+ return;
+ return markConstant(IV, &I, C);
+ }
// If something is undef, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -917,10 +926,13 @@ void SCCPSolver::visitCmpInst(CmpInst &I) {
LatticeVal &IV = ValueState[&I];
if (IV.isOverdefined()) return;
- if (V1State.isConstant() && V2State.isConstant())
- return markConstant(IV, &I, ConstantExpr::getCompare(I.getPredicate(),
- V1State.getConstant(),
- V2State.getConstant()));
+ if (V1State.isConstant() && V2State.isConstant()) {
+ Constant *C = ConstantExpr::getCompare(
+ I.getPredicate(), V1State.getConstant(), V2State.getConstant());
+ if (isa<UndefValue>(C))
+ return;
+ return markConstant(IV, &I, C);
+ }
// If operands are still undefined, wait for it to resolve.
if (!V1State.isOverdefined() && !V2State.isOverdefined())
@@ -1020,8 +1032,11 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
Constant *Ptr = Operands[0];
auto Indices = makeArrayRef(Operands.begin() + 1, Operands.end());
- markConstant(&I, ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr,
- Indices));
+ Constant *C =
+ ConstantExpr::getGetElementPtr(I.getSourceElementType(), Ptr, Indices);
+ if (isa<UndefValue>(C))
+ return;
+ markConstant(&I, C);
}
void SCCPSolver::visitStoreInst(StoreInst &SI) {
@@ -1061,9 +1076,9 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
Constant *Ptr = PtrVal.getConstant();
- // load null -> null
+ // load null is undefined.
if (isa<ConstantPointerNull>(Ptr) && I.getPointerAddressSpace() == 0)
- return markConstant(IV, &I, UndefValue::get(I.getType()));
+ return;
// Transform load (constant global) into the value loaded.
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Ptr)) {
@@ -1079,8 +1094,11 @@ void SCCPSolver::visitLoadInst(LoadInst &I) {
}
// Transform load from a constant into a constant if possible.
- if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL))
+ if (Constant *C = ConstantFoldLoadFromConstPtr(Ptr, DL)) {
+ if (isa<UndefValue>(C))
+ return;
return markConstant(IV, &I, C);
+ }
// Otherwise we cannot say for certain what value this load will produce.
// Bail out.
@@ -1122,8 +1140,12 @@ CallOverdefined:
// If we can constant fold this, mark the result of the call as a
// constant.
- if (Constant *C = ConstantFoldCall(F, Operands, TLI))
+ if (Constant *C = ConstantFoldCall(F, Operands, TLI)) {
+ // call -> undef.
+ if (isa<UndefValue>(C))
+ return;
return markConstant(I, C);
+ }
}
// Otherwise, we don't know anything about this call, mark it overdefined.
@@ -1379,6 +1401,11 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
// X % undef -> undef. No change.
if (Op1LV.isUndefined()) break;
+ // X / 0 -> undef. No change.
+ // X % 0 -> undef. No change.
+ if (Op1LV.isConstant() && Op1LV.getConstant()->isZeroValue())
+ break;
+
// undef / X -> 0. X could be maxint.
// undef % X -> 0. X could be 1.
markForcedConstant(&I, Constant::getNullValue(ITy));
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 0e0b00df85bb..4e84d72ae7bd 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -425,9 +425,7 @@ bool TailCallElim::runTRE(Function &F) {
// with themselves. Check to see if we did and clean up our mess if so. This
// occurs when a function passes an argument straight through to its tail
// call.
- for (unsigned i = 0, e = ArgumentPHIs.size(); i != e; ++i) {
- PHINode *PN = ArgumentPHIs[i];
-
+ for (PHINode *PN : ArgumentPHIs) {
// If the PHI Node is a dynamic constant, replace it with the value it is.
if (Value *PNV = SimplifyInstruction(PN, F.getParent()->getDataLayout())) {
PN->replaceAllUsesWith(PNV);
@@ -468,10 +466,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// return value of the call, it must only use things that are defined before
// the call, or movable instructions between the call and the instruction
// itself.
- for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
- if (I->getOperand(i) == CI)
- return false;
- return true;
+ return std::find(I->op_begin(), I->op_end(), CI) == I->op_end();
}
/// Return true if the specified value is the same when the return would exit
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index a5137e933e83..72db980cf572 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -626,11 +626,17 @@ void llvm::SplitLandingPadPredecessors(BasicBlock *OrigBB,
Clone2->setName(Twine("lpad") + Suffix2);
NewBB2->getInstList().insert(NewBB2->getFirstInsertionPt(), Clone2);
- // Create a PHI node for the two cloned landingpad instructions.
- PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
- PN->addIncoming(Clone1, NewBB1);
- PN->addIncoming(Clone2, NewBB2);
- LPad->replaceAllUsesWith(PN);
+ // Create a PHI node for the two cloned landingpad instructions only
+ // if the original landingpad instruction has some uses.
+ if (!LPad->use_empty()) {
+ assert(!LPad->getType()->isTokenTy() &&
+ "Split cannot be applied if LPad is token type. Otherwise an "
+ "invalid PHINode of token type would be created.");
+ PHINode *PN = PHINode::Create(LPad->getType(), 2, "lpad.phi", LPad);
+ PN->addIncoming(Clone1, NewBB1);
+ PN->addIncoming(Clone2, NewBB2);
+ LPad->replaceAllUsesWith(PN);
+ }
LPad->eraseFromParent();
} else {
// There is no second clone. Just replace the landing pad with the first
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 854a3b855f54..6454afb8bc42 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -266,27 +266,14 @@ namespace {
bool ModuleLevelChanges;
const char *NameSuffix;
ClonedCodeInfo *CodeInfo;
- CloningDirector *Director;
- ValueMapTypeRemapper *TypeMapper;
- ValueMaterializer *Materializer;
public:
PruningFunctionCloner(Function *newFunc, const Function *oldFunc,
ValueToValueMapTy &valueMap, bool moduleLevelChanges,
- const char *nameSuffix, ClonedCodeInfo *codeInfo,
- CloningDirector *Director)
+ const char *nameSuffix, ClonedCodeInfo *codeInfo)
: NewFunc(newFunc), OldFunc(oldFunc), VMap(valueMap),
ModuleLevelChanges(moduleLevelChanges), NameSuffix(nameSuffix),
- CodeInfo(codeInfo), Director(Director) {
- // These are optional components. The Director may return null.
- if (Director) {
- TypeMapper = Director->getTypeRemapper();
- Materializer = Director->getValueMaterializer();
- } else {
- TypeMapper = nullptr;
- Materializer = nullptr;
- }
- }
+ CodeInfo(codeInfo) {}
/// The specified block is found to be reachable, clone it and
/// anything that it can reach.
@@ -332,23 +319,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// loop doesn't include the terminator.
for (BasicBlock::const_iterator II = StartingInst, IE = --BB->end();
II != IE; ++II) {
- // If the "Director" remaps the instruction, don't clone it.
- if (Director) {
- CloningDirector::CloningAction Action =
- Director->handleInstruction(VMap, &*II, NewBB);
- // If the cloning director says stop, we want to stop everything, not
- // just break out of the loop (which would cause the terminator to be
- // cloned). The cloning director is responsible for inserting a proper
- // terminator into the new basic block in this case.
- if (Action == CloningDirector::StopCloningBB)
- return;
- // If the cloning director says skip, continue to the next instruction.
- // In this case, the cloning director is responsible for mapping the
- // skipped instruction to some value that is defined in the new
- // basic block.
- if (Action == CloningDirector::SkipInstruction)
- continue;
- }
Instruction *NewInst = II->clone();
@@ -356,8 +326,7 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// nodes for which we defer processing until we update the CFG.
if (!isa<PHINode>(NewInst)) {
RemapInstruction(NewInst, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer);
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges);
// If we can simplify this instruction to some other value, simply add
// a mapping to that value rather than inserting a new instruction into
@@ -397,26 +366,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
// Finally, clone over the terminator.
const TerminatorInst *OldTI = BB->getTerminator();
bool TerminatorDone = false;
- if (Director) {
- CloningDirector::CloningAction Action
- = Director->handleInstruction(VMap, OldTI, NewBB);
- // If the cloning director says stop, we want to stop everything, not
- // just break out of the loop (which would cause the terminator to be
- // cloned). The cloning director is responsible for inserting a proper
- // terminator into the new basic block in this case.
- if (Action == CloningDirector::StopCloningBB)
- return;
- if (Action == CloningDirector::CloneSuccessors) {
- // If the director says to skip with a terminate instruction, we still
- // need to clone this block's successors.
- const TerminatorInst *TI = NewBB->getTerminator();
- for (const BasicBlock *Succ : TI->successors())
- ToClone.push_back(Succ);
- return;
- }
- assert(Action != CloningDirector::SkipInstruction &&
- "SkipInstruction is not valid for terminators.");
- }
if (const BranchInst *BI = dyn_cast<BranchInst>(OldTI)) {
if (BI->isConditional()) {
// If the condition was a known constant in the callee...
@@ -485,19 +434,13 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
ValueToValueMapTy &VMap,
bool ModuleLevelChanges,
SmallVectorImpl<ReturnInst *> &Returns,
- const char *NameSuffix,
- ClonedCodeInfo *CodeInfo,
- CloningDirector *Director) {
+ const char *NameSuffix,
+ ClonedCodeInfo *CodeInfo) {
assert(NameSuffix && "NameSuffix cannot be null!");
ValueMapTypeRemapper *TypeMapper = nullptr;
ValueMaterializer *Materializer = nullptr;
- if (Director) {
- TypeMapper = Director->getTypeRemapper();
- Materializer = Director->getValueMaterializer();
- }
-
#ifndef NDEBUG
// If the cloning starts at the beginning of the function, verify that
// the function arguments are mapped.
@@ -507,7 +450,7 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
#endif
PruningFunctionCloner PFC(NewFunc, OldFunc, VMap, ModuleLevelChanges,
- NameSuffix, CodeInfo, Director);
+ NameSuffix, CodeInfo);
const BasicBlock *StartingBB;
if (StartingInst)
StartingBB = StartingInst->getParent();
@@ -731,8 +674,7 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
ClonedCodeInfo *CodeInfo,
Instruction *TheCall) {
CloneAndPruneIntoFromInst(NewFunc, OldFunc, &OldFunc->front().front(), VMap,
- ModuleLevelChanges, Returns, NameSuffix, CodeInfo,
- nullptr);
+ ModuleLevelChanges, Returns, NameSuffix, CodeInfo);
}
/// \brief Remaps instructions in \p Blocks using the mapping in \p VMap.
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 0e386ac83e9e..d2793e5ecb5b 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CFG.h"
#include "llvm/IR/Constants.h"
@@ -1051,9 +1052,31 @@ bool llvm::ConvertDebugDeclareToDebugValue(DbgDeclareInst *DDI,
ExtendedArg = dyn_cast<Argument>(ZExt->getOperand(0));
if (SExtInst *SExt = dyn_cast<SExtInst>(SI->getOperand(0)))
ExtendedArg = dyn_cast<Argument>(SExt->getOperand(0));
- if (ExtendedArg)
- Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar, DIExpr,
+ if (ExtendedArg) {
+ // We're now only describing a subset of the variable. The piece we're
+ // describing will always be smaller than the variable size, because
+ // VariableSize == Size of Alloca described by DDI. Since SI stores
+ // to the alloca described by DDI, if it's first operand is an extend,
+ // we're guaranteed that before extension, the value was narrower than
+ // the size of the alloca, hence the size of the described variable.
+ SmallVector<uint64_t, 3> NewDIExpr;
+ unsigned PieceOffset = 0;
+ // If this already is a bit piece, we drop the bit piece from the expression
+ // and record the offset.
+ if (DIExpr->isBitPiece()) {
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end()-3);
+ PieceOffset = DIExpr->getBitPieceOffset();
+ } else {
+ NewDIExpr.append(DIExpr->elements_begin(), DIExpr->elements_end());
+ }
+ NewDIExpr.push_back(dwarf::DW_OP_bit_piece);
+ NewDIExpr.push_back(PieceOffset); //Offset
+ const DataLayout &DL = DDI->getModule()->getDataLayout();
+ NewDIExpr.push_back(DL.getTypeSizeInBits(ExtendedArg->getType())); // Size
+ Builder.insertDbgValueIntrinsic(ExtendedArg, 0, DIVar,
+ Builder.createExpression(NewDIExpr),
DDI->getDebugLoc(), SI);
+ }
else
Builder.insertDbgValueIntrinsic(SI->getOperand(0), 0, DIVar, DIExpr,
DDI->getDebugLoc(), SI);
@@ -1407,7 +1430,7 @@ void llvm::removeUnwindEdge(BasicBlock *BB) {
/// removeUnreachableBlocksFromFn - Remove blocks that are not reachable, even
/// if they are in a dead cycle. Return true if a change was made, false
/// otherwise.
-bool llvm::removeUnreachableBlocks(Function &F) {
+bool llvm::removeUnreachableBlocks(Function &F, LazyValueInfo *LVI) {
SmallPtrSet<BasicBlock*, 128> Reachable;
bool Changed = markAliveBlocks(F, Reachable);
@@ -1428,6 +1451,8 @@ bool llvm::removeUnreachableBlocks(Function &F) {
++SI)
if (Reachable.count(*SI))
(*SI)->removePredecessor(&*BB);
+ if (LVI)
+ LVI->eraseBlock(&*BB);
BB->dropAllReferences();
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index 2499b88741fe..eea9237ba80c 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -528,7 +528,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
Loop *OuterL = L->getParentLoop();
// Update LoopInfo if the loop is completely removed.
if (CompletelyUnroll)
- LI->updateUnloop(L);;
+ LI->markAsRemoved(L);
// If we have a pass and a DominatorTree we should re-simplify impacted loops
// to ensure subsequent analyses can rely on this form. We want to simplify
@@ -542,7 +542,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
// LCSSA must be performed on the outermost affected loop. The unrolled
// loop's last loop latch is guaranteed to be in the outermost loop after
- // LoopInfo's been updated by updateUnloop.
+ // LoopInfo's been updated by markAsRemoved.
Loop *LatchLoop = LI->getLoopFor(Latches.back());
if (!OuterL->contains(LatchLoop))
while (OuterL->getParentLoop() != LatchLoop)
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index e03880526bfa..fa958e913b7b 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -599,7 +599,7 @@ Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
IRBuilder<>::FastMathFlagGuard FMFG(Builder);
FastMathFlags FMF;
FMF.setUnsafeAlgebra();
- Builder.SetFastMathFlags(FMF);
+ Builder.setFastMathFlags(FMF);
Value *Cmp;
if (RK == MRK_FloatMin || RK == MRK_FloatMax)
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 3bb3fa5a301f..3125a2c359b6 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -141,6 +141,8 @@ class SimplifyCFGOpt {
bool SimplifyReturn(ReturnInst *RI, IRBuilder<> &Builder);
bool SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
+ bool SimplifySingleResume(ResumeInst *RI);
+ bool SimplifyCommonResume(ResumeInst *RI);
bool SimplifyCleanupReturn(CleanupReturnInst *RI);
bool SimplifyUnreachable(UnreachableInst *UI);
bool SimplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
@@ -3239,14 +3241,101 @@ static bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
}
bool SimplifyCFGOpt::SimplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
- // If this is a trivial landing pad that just continues unwinding the caught
- // exception then zap the landing pad, turning its invokes into calls.
+ if (isa<PHINode>(RI->getValue()))
+ return SimplifyCommonResume(RI);
+ else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
+ RI->getValue() == RI->getParent()->getFirstNonPHI())
+ // The resume must unwind the exception that caused control to branch here.
+ return SimplifySingleResume(RI);
+
+ return false;
+}
+
+// Simplify resume that is shared by several landing pads (phi of landing pad).
+bool SimplifyCFGOpt::SimplifyCommonResume(ResumeInst *RI) {
+ BasicBlock *BB = RI->getParent();
+
+ // Check that there are no other instructions except for debug intrinsics
+ // between the phi of landing pads (RI->getValue()) and resume instruction.
+ BasicBlock::iterator I = cast<Instruction>(RI->getValue())->getIterator(),
+ E = RI->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I))
+ return false;
+
+ SmallSet<BasicBlock *, 4> TrivialUnwindBlocks;
+ auto *PhiLPInst = cast<PHINode>(RI->getValue());
+
+ // Check incoming blocks to see if any of them are trivial.
+ for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues();
+ Idx != End; Idx++) {
+ auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
+ auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
+
+ // If the block has other successors, we can not delete it because
+ // it has other dependents.
+ if (IncomingBB->getUniqueSuccessor() != BB)
+ continue;
+
+ auto *LandingPad =
+ dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
+ // Not the landing pad that caused the control to branch here.
+ if (IncomingValue != LandingPad)
+ continue;
+
+ bool isTrivial = true;
+
+ I = IncomingBB->getFirstNonPHI()->getIterator();
+ E = IncomingBB->getTerminator()->getIterator();
+ while (++I != E)
+ if (!isa<DbgInfoIntrinsic>(I)) {
+ isTrivial = false;
+ break;
+ }
+
+ if (isTrivial)
+ TrivialUnwindBlocks.insert(IncomingBB);
+ }
+
+ // If no trivial unwind blocks, don't do any simplifications.
+ if (TrivialUnwindBlocks.empty()) return false;
+
+ // Turn all invokes that unwind here into calls.
+ for (auto *TrivialBB : TrivialUnwindBlocks) {
+ // Blocks that will be simplified should be removed from the phi node.
+ // Note there could be multiple edges to the resume block, and we need
+ // to remove them all.
+ while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
+ BB->removePredecessor(TrivialBB, true);
+
+ for (pred_iterator PI = pred_begin(TrivialBB), PE = pred_end(TrivialBB);
+ PI != PE;) {
+ BasicBlock *Pred = *PI++;
+ removeUnwindEdge(Pred);
+ }
+
+ // In each SimplifyCFG run, only the current processed block can be erased.
+ // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
+ // of erasing TrivialBB, we only remove the branch to the common resume
+ // block so that we can later erase the resume block since it has no
+ // predecessors.
+ TrivialBB->getTerminator()->eraseFromParent();
+ new UnreachableInst(RI->getContext(), TrivialBB);
+ }
+
+ // Delete the resume block if all its predecessors have been removed.
+ if (pred_empty(BB))
+ BB->eraseFromParent();
+
+ return !TrivialUnwindBlocks.empty();
+}
+
+// Simplify resume that is only used by a single (non-phi) landing pad.
+bool SimplifyCFGOpt::SimplifySingleResume(ResumeInst *RI) {
BasicBlock *BB = RI->getParent();
LandingPadInst *LPInst = dyn_cast<LandingPadInst>(BB->getFirstNonPHI());
- if (RI->getValue() != LPInst)
- // Not a landing pad, or the resume is not unwinding the exception that
- // caused control to branch here.
- return false;
+ assert (RI->getValue() == LPInst &&
+ "Resume must unwind the exception that caused control to here");
// Check that there are no other instructions except for debug intrinsics.
BasicBlock::iterator I = LPInst->getIterator(), E = RI->getIterator();
diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp
index dc5fee523d4c..dc0744060142 100644
--- a/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -997,7 +997,7 @@ Value *LibCallSimplifier::optimizeUnaryDoubleFP(CallInst *CI, IRBuilder<> &B,
// Propagate fast-math flags from the existing call to the new call.
IRBuilder<>::FastMathFlagGuard Guard(B);
- B.SetFastMathFlags(CI->getFastMathFlags());
+ B.setFastMathFlags(CI->getFastMathFlags());
// floor((double)floatval) -> (double)floorf(floatval)
if (Callee->isIntrinsic()) {
@@ -1035,7 +1035,7 @@ Value *LibCallSimplifier::optimizeBinaryDoubleFP(CallInst *CI, IRBuilder<> &B) {
// Propagate fast-math flags from the existing call to the new call.
IRBuilder<>::FastMathFlagGuard Guard(B);
- B.SetFastMathFlags(CI->getFastMathFlags());
+ B.setFastMathFlags(CI->getFastMathFlags());
// fmin((double)floatval1, (double)floatval2)
// -> (double)fminf(floatval1, floatval2)
@@ -1127,29 +1127,26 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
Callee->getAttributes());
}
+ // FIXME: Use instruction-level FMF.
bool UnsafeFPMath = canUseUnsafeFPMath(CI->getParent()->getParent());
- // pow(exp(x), y) -> exp(x*y)
+ // pow(exp(x), y) -> exp(x * y)
// pow(exp2(x), y) -> exp2(x * y)
- // We enable these only under fast-math. Besides rounding
- // differences the transformation changes overflow and
- // underflow behavior quite dramatically.
+ // We enable these only with fast-math. Besides rounding differences, the
+ // transformation changes overflow and underflow behavior quite dramatically.
// Example: x = 1000, y = 0.001.
// pow(exp(x), y) = pow(inf, 0.001) = inf, whereas exp(x*y) = exp(1).
- if (UnsafeFPMath) {
- if (auto *OpC = dyn_cast<CallInst>(Op1)) {
+ auto *OpC = dyn_cast<CallInst>(Op1);
+ if (OpC && OpC->hasUnsafeAlgebra() && CI->hasUnsafeAlgebra()) {
+ LibFunc::Func Func;
+ Function *OpCCallee = OpC->getCalledFunction();
+ if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
+ TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2)) {
IRBuilder<>::FastMathFlagGuard Guard(B);
- FastMathFlags FMF;
- FMF.setUnsafeAlgebra();
- B.SetFastMathFlags(FMF);
-
- LibFunc::Func Func;
- Function *OpCCallee = OpC->getCalledFunction();
- if (OpCCallee && TLI->getLibFunc(OpCCallee->getName(), Func) &&
- TLI->has(Func) && (Func == LibFunc::exp || Func == LibFunc::exp2))
- return EmitUnaryFloatFnCall(
- B.CreateFMul(OpC->getArgOperand(0), Op2, "mul"),
- OpCCallee->getName(), B, OpCCallee->getAttributes());
+ B.setFastMathFlags(CI->getFastMathFlags());
+ Value *FMul = B.CreateFMul(OpC->getArgOperand(0), Op2, "mul");
+ return EmitUnaryFloatFnCall(FMul, OpCCallee->getName(), B,
+ OpCCallee->getAttributes());
}
}
@@ -1167,9 +1164,12 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) {
LibFunc::fabsl)) {
// In -ffast-math, pow(x, 0.5) -> sqrt(x).
- if (UnsafeFPMath)
+ if (CI->hasUnsafeAlgebra()) {
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(CI->getFastMathFlags());
return EmitUnaryFloatFnCall(Op1, TLI->getName(LibFunc::sqrt), B,
Callee->getAttributes());
+ }
// Expand pow(x, 0.5) to (x == -infinity ? +infinity : fabs(sqrt(x))).
// This is faster than calling pow, and still handles negative zero
@@ -1328,7 +1328,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilder<> &B) {
FMF.setNoSignedZeros();
FMF.setNoNaNs();
}
- B.SetFastMathFlags(FMF);
+ B.setFastMathFlags(FMF);
// We have a relaxed floating-point environment. We can ignore NaN-handling
// and transform to a compare and select. We do not have to consider errno or
@@ -1354,11 +1354,13 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
!FT->getParamType(0)->isFloatingPointTy())
return Ret;
- if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+ if (!CI->hasUnsafeAlgebra())
return Ret;
Value *Op1 = CI->getArgOperand(0);
auto *OpC = dyn_cast<CallInst>(Op1);
- if (!OpC)
+
+ // The earlier call must also be unsafe in order to do these transforms.
+ if (!OpC || !OpC->hasUnsafeAlgebra())
return Ret;
// log(pow(x,y)) -> y*log(x)
@@ -1369,7 +1371,7 @@ Value *LibCallSimplifier::optimizeLog(CallInst *CI, IRBuilder<> &B) {
IRBuilder<>::FastMathFlagGuard Guard(B);
FastMathFlags FMF;
FMF.setUnsafeAlgebra();
- B.SetFastMathFlags(FMF);
+ B.setFastMathFlags(FMF);
LibFunc::Func Func;
Function *F = OpC->getCalledFunction();
@@ -1397,66 +1399,67 @@ Value *LibCallSimplifier::optimizeSqrt(CallInst *CI, IRBuilder<> &B) {
if (TLI->has(LibFunc::sqrtf) && (Callee->getName() == "sqrt" ||
Callee->getIntrinsicID() == Intrinsic::sqrt))
Ret = optimizeUnaryDoubleFP(CI, B, true);
- if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
+
+ if (!CI->hasUnsafeAlgebra())
return Ret;
- Value *Op = CI->getArgOperand(0);
- if (Instruction *I = dyn_cast<Instruction>(Op)) {
- if (I->getOpcode() == Instruction::FMul && I->hasUnsafeAlgebra()) {
- // We're looking for a repeated factor in a multiplication tree,
- // so we can do this fold: sqrt(x * x) -> fabs(x);
- // or this fold: sqrt(x * x * y) -> fabs(x) * sqrt(y).
- Value *Op0 = I->getOperand(0);
- Value *Op1 = I->getOperand(1);
- Value *RepeatOp = nullptr;
- Value *OtherOp = nullptr;
- if (Op0 == Op1) {
- // Simple match: the operands of the multiply are identical.
- RepeatOp = Op0;
- } else {
- // Look for a more complicated pattern: one of the operands is itself
- // a multiply, so search for a common factor in that multiply.
- // Note: We don't bother looking any deeper than this first level or for
- // variations of this pattern because instcombine's visitFMUL and/or the
- // reassociation pass should give us this form.
- Value *OtherMul0, *OtherMul1;
- if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
- // Pattern: sqrt((x * y) * z)
- if (OtherMul0 == OtherMul1) {
- // Matched: sqrt((x * x) * z)
- RepeatOp = OtherMul0;
- OtherOp = Op1;
- }
- }
- }
- if (RepeatOp) {
- // Fast math flags for any created instructions should match the sqrt
- // and multiply.
- // FIXME: We're not checking the sqrt because it doesn't have
- // fast-math-flags (see earlier comment).
- IRBuilder<>::FastMathFlagGuard Guard(B);
- B.SetFastMathFlags(I->getFastMathFlags());
- // If we found a repeated factor, hoist it out of the square root and
- // replace it with the fabs of that factor.
- Module *M = Callee->getParent();
- Type *ArgType = Op->getType();
- Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
- Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
- if (OtherOp) {
- // If we found a non-repeated factor, we still need to get its square
- // root. We then multiply that by the value that was simplified out
- // of the square root calculation.
- Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
- Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
- return B.CreateFMul(FabsCall, SqrtCall);
- }
- return FabsCall;
+ Instruction *I = dyn_cast<Instruction>(CI->getArgOperand(0));
+ if (!I || I->getOpcode() != Instruction::FMul || !I->hasUnsafeAlgebra())
+ return Ret;
+
+ // We're looking for a repeated factor in a multiplication tree,
+ // so we can do this fold: sqrt(x * x) -> fabs(x);
+ // or this fold: sqrt((x * x) * y) -> fabs(x) * sqrt(y).
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+ Value *RepeatOp = nullptr;
+ Value *OtherOp = nullptr;
+ if (Op0 == Op1) {
+ // Simple match: the operands of the multiply are identical.
+ RepeatOp = Op0;
+ } else {
+ // Look for a more complicated pattern: one of the operands is itself
+ // a multiply, so search for a common factor in that multiply.
+ // Note: We don't bother looking any deeper than this first level or for
+ // variations of this pattern because instcombine's visitFMUL and/or the
+ // reassociation pass should give us this form.
+ Value *OtherMul0, *OtherMul1;
+ if (match(Op0, m_FMul(m_Value(OtherMul0), m_Value(OtherMul1)))) {
+ // Pattern: sqrt((x * y) * z)
+ if (OtherMul0 == OtherMul1 &&
+ cast<Instruction>(Op0)->hasUnsafeAlgebra()) {
+ // Matched: sqrt((x * x) * z)
+ RepeatOp = OtherMul0;
+ OtherOp = Op1;
}
}
}
- return Ret;
-}
+ if (!RepeatOp)
+ return Ret;
+ // Fast math flags for any created instructions should match the sqrt
+ // and multiply.
+ IRBuilder<>::FastMathFlagGuard Guard(B);
+ B.setFastMathFlags(I->getFastMathFlags());
+
+ // If we found a repeated factor, hoist it out of the square root and
+ // replace it with the fabs of that factor.
+ Module *M = Callee->getParent();
+ Type *ArgType = I->getType();
+ Value *Fabs = Intrinsic::getDeclaration(M, Intrinsic::fabs, ArgType);
+ Value *FabsCall = B.CreateCall(Fabs, RepeatOp, "fabs");
+ if (OtherOp) {
+ // If we found a non-repeated factor, we still need to get its square
+ // root. We then multiply that by the value that was simplified out
+ // of the square root calculation.
+ Value *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, ArgType);
+ Value *SqrtCall = B.CreateCall(Sqrt, OtherOp, "sqrt");
+ return B.CreateFMul(FabsCall, SqrtCall);
+ }
+ return FabsCall;
+}
+
+// TODO: Generalize to handle any trig function and its inverse.
Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
Function *Callee = CI->getCalledFunction();
Value *Ret = nullptr;
@@ -1471,13 +1474,15 @@ Value *LibCallSimplifier::optimizeTan(CallInst *CI, IRBuilder<> &B) {
!FT->getParamType(0)->isFloatingPointTy())
return Ret;
- if (!canUseUnsafeFPMath(CI->getParent()->getParent()))
- return Ret;
Value *Op1 = CI->getArgOperand(0);
auto *OpC = dyn_cast<CallInst>(Op1);
if (!OpC)
return Ret;
+ // Both calls must allow unsafe optimizations in order to remove them.
+ if (!CI->hasUnsafeAlgebra() || !OpC->hasUnsafeAlgebra())
+ return Ret;
+
// tan(atan(x)) -> x
// tanf(atanf(x)) -> x
// tanl(atanl(x)) -> x
diff --git a/lib/Transforms/Utils/ValueMapper.cpp b/lib/Transforms/Utils/ValueMapper.cpp
index 2e361d38ed0b..f47ddb9f064f 100644
--- a/lib/Transforms/Utils/ValueMapper.cpp
+++ b/lib/Transforms/Utils/ValueMapper.cpp
@@ -222,8 +222,17 @@ static void resolveCycles(Metadata *MD, bool AllowTemps) {
if (auto *N = dyn_cast_or_null<MDNode>(MD)) {
if (AllowTemps && N->isTemporary())
return;
- if (!N->isResolved())
- N->resolveCycles(AllowTemps);
+ if (!N->isResolved()) {
+ if (AllowTemps)
+ // Note that this will drop RAUW support on any temporaries, which
+ // blocks uniquing. If this ends up being an issue, in the future
+ // we can experiment with delaying resolving these nodes until
+ // after metadata is fully materialized (i.e. when linking metadata
+ // as a postpass after function importing).
+ N->resolveNonTemporaries();
+ else
+ N->resolveCycles();
+ }
}
}
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 40abfc759e0a..f69a4e52c7e1 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -222,7 +222,7 @@ static void propagateIRFlags(Value *I, ArrayRef<Value *> VL) {
}
}
}
-
+
/// \returns \p I after propagating metadata from \p VL.
static Instruction *propagateMetadata(Instruction *I, ArrayRef<Value *> VL) {
Instruction *I0 = cast<Instruction>(VL[0]);
@@ -506,7 +506,7 @@ private:
}
return Last;
}
-
+
/// -- Vectorization State --
/// Holds all of the tree entries.
std::vector<TreeEntry> VectorizableTree;
@@ -884,7 +884,7 @@ private:
/// The current size of the scheduling region.
int ScheduleRegionSize;
-
+
/// The maximum size allowed for the scheduling region.
int ScheduleRegionSizeLimit;
@@ -1089,7 +1089,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth) {
newTreeEntry(VL, false);
return;
}
-
+
// Check that every instructions appears once in this bundle.
for (unsigned i = 0, e = VL.size(); i < e; ++i)
for (unsigned j = i+1; j < e; ++j)
@@ -1713,7 +1713,7 @@ int BoUpSLP::getSpillCost() {
int Cost = 0;
SmallPtrSet<Instruction*, 4> LiveValues;
- Instruction *PrevInst = nullptr;
+ Instruction *PrevInst = nullptr;
for (unsigned N = 0; N < VectorizableTree.size(); ++N) {
Instruction *Inst = dyn_cast<Instruction>(VectorizableTree[N].Scalars[0]);
@@ -1738,7 +1738,7 @@ int BoUpSLP::getSpillCost() {
for (auto &J : PrevInst->operands()) {
if (isa<Instruction>(&*J) && ScalarToTreeEntry.count(&*J))
LiveValues.insert(cast<Instruction>(&*J));
- }
+ }
// Now find the sequence of instructions between PrevInst and Inst.
BasicBlock::reverse_iterator InstIt(Inst->getIterator()),
@@ -1782,30 +1782,29 @@ int BoUpSLP::getTreeCost() {
unsigned BundleWidth = VectorizableTree[0].Scalars.size();
- for (unsigned i = 0, e = VectorizableTree.size(); i != e; ++i) {
- int C = getEntryCost(&VectorizableTree[i]);
+ for (TreeEntry &TE : VectorizableTree) {
+ int C = getEntryCost(&TE);
DEBUG(dbgs() << "SLP: Adding cost " << C << " for bundle that starts with "
- << *VectorizableTree[i].Scalars[0] << " .\n");
+ << TE.Scalars[0] << " .\n");
Cost += C;
}
SmallSet<Value *, 16> ExtractCostCalculated;
int ExtractCost = 0;
- for (UserList::iterator I = ExternalUses.begin(), E = ExternalUses.end();
- I != E; ++I) {
+ for (ExternalUser &EU : ExternalUses) {
// We only add extract cost once for the same scalar.
- if (!ExtractCostCalculated.insert(I->Scalar).second)
+ if (!ExtractCostCalculated.insert(EU.Scalar).second)
continue;
// Uses by ephemeral values are free (because the ephemeral value will be
// removed prior to code generation, and so the extraction will be
// removed as well).
- if (EphValues.count(I->User))
+ if (EphValues.count(EU.User))
continue;
- VectorType *VecTy = VectorType::get(I->Scalar->getType(), BundleWidth);
+ VectorType *VecTy = VectorType::get(EU.Scalar->getType(), BundleWidth);
ExtractCost += TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy,
- I->Lane);
+ EU.Lane);
}
Cost += getSpillCost();
@@ -2553,7 +2552,7 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
}
Value *BoUpSLP::vectorizeTree() {
-
+
// All blocks must be scheduled before any instructions are inserted.
for (auto &BSIter : BlocksSchedules) {
scheduleBlock(BSIter.second.get());
@@ -3074,10 +3073,10 @@ void BoUpSLP::BlockScheduling::resetSchedule() {
}
void BoUpSLP::scheduleBlock(BlockScheduling *BS) {
-
+
if (!BS->ScheduleStart)
return;
-
+
DEBUG(dbgs() << "SLP: schedule block " << BS->BB->getName() << "\n");
BS->resetSchedule();
@@ -3592,7 +3591,7 @@ bool SLPVectorizer::tryToVectorize(BinaryOperator *V, BoUpSLP &R) {
/// \param NumEltsToRdx The number of elements that should be reduced in the
/// vector.
/// \param IsPairwise Whether the reduction is a pairwise or splitting
-/// reduction. A pairwise reduction will generate a mask of
+/// reduction. A pairwise reduction will generate a mask of
/// <0,2,...> or <1,3,..> while a splitting reduction will generate
/// <2,3, undef,undef> for a vector of 4 and NumElts = 2.
/// \param IsLeft True will generate a mask of even elements, odd otherwise.
@@ -3775,7 +3774,7 @@ public:
IRBuilder<> Builder(ReductionRoot);
FastMathFlags Unsafe;
Unsafe.setUnsafeAlgebra();
- Builder.SetFastMathFlags(Unsafe);
+ Builder.setFastMathFlags(Unsafe);
unsigned i = 0;
for (; i < NumReducedVals - ReduxWidth + 1; i += ReduxWidth) {
@@ -4020,9 +4019,8 @@ bool SLPVectorizer::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
// Collect the incoming values from the PHIs.
Incoming.clear();
- for (BasicBlock::iterator instr = BB->begin(), ie = BB->end(); instr != ie;
- ++instr) {
- PHINode *P = dyn_cast<PHINode>(instr);
+ for (Instruction &I : *BB) {
+ PHINode *P = dyn_cast<PHINode>(&I);
if (!P)
break;
diff --git a/test/Analysis/GlobalsModRef/nocapture.ll b/test/Analysis/GlobalsModRef/nocapture.ll
deleted file mode 100644
index 0cb80a10f8da..000000000000
--- a/test/Analysis/GlobalsModRef/nocapture.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt < %s -globals-aa -aa-eval -print-all-alias-modref-info -disable-output 2>&1 | FileCheck %s
-
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.10.0"
-
-@a = internal global i32 0, align 4
-@b = internal global i32 0, align 4
-
-define void @g(i32* %p, void (i32*)* nocapture %ptr) {
-entry:
- tail call void %ptr(i32* %p) #1
- ret void
-}
-
-; CHECK-LABEL: Function: f
-; CHECK: MayAlias: i32* %p, i32* @a
-; CHECK: MayAlias: i32* %q, i32* @a
-define i32 @f(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
-entry:
- tail call void @g(i32* nonnull @a, void (i32*)* %ptr)
- %arrayidx = getelementptr inbounds i32, i32* %p, i64 0
- %z1 = load i32, i32* %arrayidx, align 4
- %z2 = load i32, i32* %q, align 4
- %add = add nsw i32 %z2, %z1
- store i32 %add, i32* %q, align 4
- ret i32 4
-}
-
-define void @g2(i32* nocapture %p, void (i32*)* nocapture %ptr) {
-entry:
- tail call void %ptr(i32* %p) #1
- ret void
-}
-
-; CHECK-LABEL: Function: f2
-; CHECK: NoAlias: i32* %p, i32* @b
-; CHECK: NoAlias: i32* %q, i32* @b
-define i32 @f2(i32 %n, i32* nocapture readonly %p, i32* nocapture %q, void (i32*)* nocapture %ptr) {
-entry:
- tail call void @g2(i32* nonnull @b, void (i32*)* %ptr)
- %arrayidx = getelementptr inbounds i32, i32* %p, i64 0
- %z1 = load i32, i32* %arrayidx, align 4
- %z2 = load i32, i32* %q, align 4
- %add = add nsw i32 %z2, %z1
- store i32 %add, i32* %q, align 4
- ret i32 4
-}
-
-declare void @g3()
-
-; CHECK-LABEL: Function: f3
-; CHECK: NoAlias: i32* %p, i32* @b
-define void @f3(i32* nocapture readonly %p) {
-entry:
- tail call void @g3() [ "deopt"(i32* @b, i32 *%p) ]
- unreachable
-}
diff --git a/test/Analysis/LoopAccessAnalysis/interleave-innermost.ll b/test/Analysis/LoopAccessAnalysis/interleave-innermost.ll
new file mode 100644
index 000000000000..6d8288e8ce36
--- /dev/null
+++ b/test/Analysis/LoopAccessAnalysis/interleave-innermost.ll
@@ -0,0 +1,29 @@
+; RUN: opt -loop-vectorize -force-vector-interleave=1 -S < %s | FileCheck %s
+; CHECK-LABEL: TestFoo
+; CHECK-NOT: %wide.vec
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+define void @TestFoo(i1 %X, i1 %Y) {
+bb:
+ br label %.loopexit5.outer
+
+.loopexit5.outer:
+ br label %.lr.ph12
+
+.loopexit:
+ br i1 %X, label %.loopexit5.outer, label %.lr.ph12
+
+.lr.ph12:
+ %f.110 = phi i32* [ %tmp1, %.loopexit ], [ null, %.loopexit5.outer ]
+ %tmp1 = getelementptr inbounds i32, i32* %f.110, i64 -2
+ br i1 %Y, label %bb4, label %.loopexit
+
+bb4:
+ %j.27 = phi i32 [ 0, %.lr.ph12 ], [ %tmp7, %bb4 ]
+ %tmp5 = load i32, i32* %f.110, align 4
+ %tmp7 = add nsw i32 %j.27, 1
+ %exitcond = icmp eq i32 %tmp7, 0
+ br i1 %exitcond, label %.loopexit, label %bb4
+}
diff --git a/test/Bitcode/compatibility.ll b/test/Bitcode/compatibility.ll
index 9363f503be5c..ae12a24ede4b 100644
--- a/test/Bitcode/compatibility.ll
+++ b/test/Bitcode/compatibility.ll
@@ -47,10 +47,36 @@ $comdat.samesize = comdat samesize
; CHECK: @const.struct = constant %const.struct.type { i32 -1, i8 undef }
@const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
; CHECK: @const.struct.packed = constant %const.struct.type.packed <{ i32 -1, i8 1 }>
-@const.array = constant [2 x i32] [i32 -3, i32 -4]
-; CHECK: @const.array = constant [2 x i32] [i32 -3, i32 -4]
-@const.vector = constant <2 x i32> <i32 -5, i32 -6>
-; CHECK: @const.vector = constant <2 x i32> <i32 -5, i32 -6>
+
+; CHECK: @constant.array.i8 = constant [3 x i8] c"\00\01\00"
+@constant.array.i8 = constant [3 x i8] [i8 -0, i8 1, i8 0]
+; CHECK: @constant.array.i16 = constant [3 x i16] [i16 0, i16 1, i16 0]
+@constant.array.i16 = constant [3 x i16] [i16 -0, i16 1, i16 0]
+; CHECK: @constant.array.i32 = constant [3 x i32] [i32 0, i32 1, i32 0]
+@constant.array.i32 = constant [3 x i32] [i32 -0, i32 1, i32 0]
+; CHECK: @constant.array.i64 = constant [3 x i64] [i64 0, i64 1, i64 0]
+@constant.array.i64 = constant [3 x i64] [i64 -0, i64 1, i64 0]
+; CHECK: @constant.array.f16 = constant [3 x half] [half 0xH8000, half 0xH3C00, half 0xH0000]
+@constant.array.f16 = constant [3 x half] [half -0.0, half 1.0, half 0.0]
+; CHECK: @constant.array.f32 = constant [3 x float] [float -0.000000e+00, float 1.000000e+00, float 0.000000e+00]
+@constant.array.f32 = constant [3 x float] [float -0.0, float 1.0, float 0.0]
+; CHECK: @constant.array.f64 = constant [3 x double] [double -0.000000e+00, double 1.000000e+00, double 0.000000e+00]
+@constant.array.f64 = constant [3 x double] [double -0.0, double 1.0, double 0.0]
+
+; CHECK: @constant.vector.i8 = constant <3 x i8> <i8 0, i8 1, i8 0>
+@constant.vector.i8 = constant <3 x i8> <i8 -0, i8 1, i8 0>
+; CHECK: @constant.vector.i16 = constant <3 x i16> <i16 0, i16 1, i16 0>
+@constant.vector.i16 = constant <3 x i16> <i16 -0, i16 1, i16 0>
+; CHECK: @constant.vector.i32 = constant <3 x i32> <i32 0, i32 1, i32 0>
+@constant.vector.i32 = constant <3 x i32> <i32 -0, i32 1, i32 0>
+; CHECK: @constant.vector.i64 = constant <3 x i64> <i64 0, i64 1, i64 0>
+@constant.vector.i64 = constant <3 x i64> <i64 -0, i64 1, i64 0>
+; CHECK: @constant.vector.f16 = constant <3 x half> <half 0xH8000, half 0xH3C00, half 0xH0000>
+@constant.vector.f16 = constant <3 x half> <half -0.0, half 1.0, half 0.0>
+; CHECK: @constant.vector.f32 = constant <3 x float> <float -0.000000e+00, float 1.000000e+00, float 0.000000e+00>
+@constant.vector.f32 = constant <3 x float> <float -0.0, float 1.0, float 0.0>
+; CHECK: @constant.vector.f64 = constant <3 x double> <double -0.000000e+00, double 1.000000e+00, double 0.000000e+00>
+@constant.vector.f64 = constant <3 x double> <double -0.0, double 1.0, double 0.0>
;; Global Variables
; Format: [@<GlobalVarName> =] [Linkage] [Visibility] [DLLStorageClass]
@@ -859,7 +885,8 @@ catchpad:
; CHECK-NEXT: br label %body
body:
- invoke void @f.ccc() to label %continue unwind label %terminate.inner
+ invoke void @f.ccc() [ "funclet"(token %catch) ]
+ to label %continue unwind label %terminate.inner
catchret from %catch to label %return
; CHECK: catchret from %catch to label %return
diff --git a/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll
new file mode 100644
index 000000000000..770521b75280
--- /dev/null
+++ b/test/CodeGen/AArch64/arm64-misched-memdep-bug.ll
@@ -0,0 +1,22 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=arm64-linux-gnu -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > /dev/null | FileCheck %s
+;
+; Test for bug in misched memory dependency calculation.
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK: misched_bug:BB#0 entry
+; CHECK: SU(2): %vreg2<def> = LDRWui %vreg0, 1; mem:LD4[%ptr1_plus1] GPR32:%vreg2 GPR64common:%vreg0
+; CHECK: Successors:
+; CHECK-NEXT: val SU(5): Latency=4 Reg=%vreg2
+; CHECK-NEXT: ch SU(4): Latency=0
+; CHECK: SU(4): STRWui %WZR, %vreg1, 0; mem:ST4[%ptr2] GPR64common:%vreg1
+; CHECK: SU(5): %W0<def> = COPY %vreg2; GPR32:%vreg2
+; CHECK: ** ScheduleDAGMI::schedule picking next node
+define i32 @misched_bug(i32* %ptr1, i32* %ptr2) {
+entry:
+ %ptr1_plus1 = getelementptr inbounds i32, i32* %ptr1, i64 1
+ %val1 = load i32, i32* %ptr1_plus1, align 4
+ store i32 0, i32* %ptr1, align 4
+ store i32 0, i32* %ptr2, align 4
+ ret i32 %val1
+}
diff --git a/test/CodeGen/AArch64/branch-folder-merge-mmos.ll b/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
new file mode 100644
index 000000000000..3f9c0239fe41
--- /dev/null
+++ b/test/CodeGen/AArch64/branch-folder-merge-mmos.ll
@@ -0,0 +1,33 @@
+; RUN: llc -march=aarch64 -mtriple=aarch64-none-linux-gnu -stop-after branch-folder -o /dev/null < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Function Attrs: norecurse nounwind
+define void @foo(i32 %a, i32 %b, float* nocapture %foo_arr) #0 {
+; CHECK: (load 4 from %ir.arrayidx1.{{i[1-2]}}), (load 4 from %ir.arrayidx1.{{i[1-2]}})
+entry:
+ %cmp = icmp sgt i32 %a, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %0 = load float, float* %foo_arr, align 4
+ %arrayidx1.i1 = getelementptr inbounds float, float* %foo_arr, i64 1
+ %1 = load float, float* %arrayidx1.i1, align 4
+ %sub.i = fsub float %0, %1
+ store float %sub.i, float* %foo_arr, align 4
+ br label %if.end3
+
+if.end: ; preds = %entry
+ %cmp1 = icmp sgt i32 %b, 0
+ br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2: ; preds = %if.end
+ %2 = load float, float* %foo_arr, align 4
+ %arrayidx1.i2 = getelementptr inbounds float, float* %foo_arr, i64 1
+ %3 = load float, float* %arrayidx1.i2, align 4
+ %sub.i3 = fsub float %2, %3
+ store float %sub.i3, float* %foo_arr, align 4
+ br label %if.end3
+
+if.end3: ; preds = %if.then2, %if.end, %if.then
+ ret void
+}
diff --git a/test/CodeGen/AArch64/machine-combiner.ll b/test/CodeGen/AArch64/machine-combiner.ll
new file mode 100644
index 000000000000..56a742fd6c3a
--- /dev/null
+++ b/test/CodeGen/AArch64/machine-combiner.ll
@@ -0,0 +1,258 @@
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math < %s | FileCheck %s
+
+; Verify that the first two adds are independent regardless of how the inputs are
+; commuted. The destination registers are used as source registers for the third add.
+
+define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds1:
+; CHECK: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: ret
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %t1, %x3
+ ret float %t2
+}
+
+define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds2:
+; CHECK: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: ret
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %t1, %x3
+ ret float %t2
+}
+
+define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds3:
+; CHECK: s0, s0, s1
+; CHECK-NEXT: s1, s2, s3
+; CHECK-NEXT: s0, s0, s1
+; CHECK-NEXT: ret
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds4:
+; CHECK: s0, s0, s1
+; CHECK-NEXT: s1, s2, s3
+; CHECK-NEXT: s0, s0, s1
+; CHECK-NEXT: ret
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
+; produced because that would cost more compile time.
+
+define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
+; CHECK-LABEL: reassociate_adds5:
+; CHECK: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s4, s5
+; CHECK-NEXT: fadd s1, s1, s6
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s7
+; CHECK-NEXT: ret
+ %t0 = fadd float %x0, %x1
+ %t1 = fadd float %t0, %x2
+ %t2 = fadd float %t1, %x3
+ %t3 = fadd float %t2, %x4
+ %t4 = fadd float %t3, %x5
+ %t5 = fadd float %t4, %x6
+ %t6 = fadd float %t5, %x7
+ ret float %t6
+}
+
+; Verify that we only need two associative operations to reassociate the operands.
+; Also, we should reassociate such that the result of the high latency division
+; is used by the final 'add' rather than reassociating the %x3 operand with the
+; division. The latter reassociation would not improve anything.
+
+define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds6:
+; CHECK: fdiv s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: ret
+ %t0 = fdiv float %x0, %x1
+ %t1 = fadd float %x2, %t0
+ %t2 = fadd float %x3, %t1
+ ret float %t2
+}
+
+; Verify that scalar single-precision multiplies are reassociated.
+
+define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_muls1:
+; CHECK: fdiv s0, s0, s1
+; CHECK-NEXT: fmul s1, s2, s3
+; CHECK-NEXT: fmul s0, s0, s1
+; CHECK-NEXT: ret
+ %t0 = fdiv float %x0, %x1
+ %t1 = fmul float %x2, %t0
+ %t2 = fmul float %x3, %t1
+ ret float %t2
+}
+
+; Verify that scalar double-precision adds are reassociated.
+
+define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
+; CHECK-LABEL: reassociate_adds_double:
+; CHECK: fdiv d0, d0, d1
+; CHECK-NEXT: fadd d1, d2, d3
+; CHECK-NEXT: fadd d0, d0, d1
+; CHECK-NEXT: ret
+ %t0 = fdiv double %x0, %x1
+ %t1 = fadd double %x2, %t0
+ %t2 = fadd double %x3, %t1
+ ret double %t2
+}
+
+; Verify that scalar double-precision multiplies are reassociated.
+
+define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
+; CHECK-LABEL: reassociate_muls_double:
+; CHECK: fdiv d0, d0, d1
+; CHECK-NEXT: fmul d1, d2, d3
+; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: ret
+ %t0 = fdiv double %x0, %x1
+ %t1 = fmul double %x2, %t0
+ %t2 = fmul double %x3, %t1
+ ret double %t2
+}
+
+; Verify that we reassociate vector instructions too.
+
+define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds1:
+; CHECK: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %t0, %x2
+ %t2 = fadd <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds2:
+; CHECK: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %x2, %t0
+ %t2 = fadd <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds3:
+; CHECK: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %t0, %x2
+ %t2 = fadd <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
+define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds4:
+; CHECK: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fadd <4 x float> %x2, %t0
+ %t2 = fadd <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+; Verify that 128-bit vector single-precision multiplies are reassociated.
+
+define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: reassociate_muls_v4f32:
+; CHECK: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fmul v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %t0 = fadd <4 x float> %x0, %x1
+ %t1 = fmul <4 x float> %x2, %t0
+ %t2 = fmul <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
+; Verify that 128-bit vector double-precision multiplies are reassociated.
+
+define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; CHECK-LABEL: reassociate_muls_v2f64:
+; CHECK: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: fmul v1.2d, v2.2d, v3.2d
+; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ %t0 = fadd <2 x double> %x0, %x1
+ %t1 = fmul <2 x double> %x2, %t0
+ %t2 = fmul <2 x double> %x3, %t1
+ ret <2 x double> %t2
+}
+
+; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
+; Verify that reassociation is not happening needlessly or wrongly.
+
+declare double @bar()
+
+define double @reassociate_adds_from_calls() {
+; CHECK-LABEL: reassociate_adds_from_calls:
+; CHECK: bl bar
+; CHECK-NEXT: mov v8.16b, v0.16b
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: mov v9.16b, v0.16b
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: mov v10.16b, v0.16b
+; CHECK-NEXT: bl bar
+; CHECK: fadd d1, d8, d9
+; CHECK-NEXT: fadd d0, d10, d0
+; CHECK-NEXT: fadd d0, d1, d0
+ %x0 = call double @bar()
+ %x1 = call double @bar()
+ %x2 = call double @bar()
+ %x3 = call double @bar()
+ %t0 = fadd double %x0, %x1
+ %t1 = fadd double %t0, %x2
+ %t2 = fadd double %t1, %x3
+ ret double %t2
+}
+
+define double @already_reassociated() {
+; CHECK-LABEL: already_reassociated:
+; CHECK: bl bar
+; CHECK-NEXT: mov v8.16b, v0.16b
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: mov v9.16b, v0.16b
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: mov v10.16b, v0.16b
+; CHECK-NEXT: bl bar
+; CHECK: fadd d1, d8, d9
+; CHECK-NEXT: fadd d0, d10, d0
+; CHECK-NEXT: fadd d0, d1, d0
+ %x0 = call double @bar()
+ %x1 = call double @bar()
+ %x2 = call double @bar()
+ %x3 = call double @bar()
+ %t0 = fadd double %x0, %x1
+ %t1 = fadd double %x2, %x3
+ %t2 = fadd double %t0, %t1
+ ret double %t2
+}
+
diff --git a/test/CodeGen/AMDGPU/ctlz.ll b/test/CodeGen/AMDGPU/ctlz.ll
new file mode 100644
index 000000000000..baedf47eef0d
--- /dev/null
+++ b/test/CodeGen/AMDGPU/ctlz.ll
@@ -0,0 +1,269 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+
+declare i7 @llvm.ctlz.i7(i7, i1) nounwind readnone
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+declare i16 @llvm.ctlz.i16(i16, i1) nounwind readnone
+
+declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
+declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
+
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
+declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
+; FUNC-LABEL: {{^}}s_ctlz_i32:
+; SI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI-DAG: s_flbit_i32_b32 [[CTLZ:s[0-9]+]], [[VAL]]
+; SI-DAG: v_cmp_eq_i32_e64 [[CMPZ:s\[[0-9]+:[0-9]+\]]], 0, [[VAL]]
+; SI-DAG: v_mov_b32_e32 [[VCTLZ:v[0-9]+]], [[CTLZ]]
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[VCTLZ]], 32, [[CMPZ]]
+; SI: buffer_store_dword [[RESULT]]
+; SI: s_endpgm
+
+; EG: FFBH_UINT
+; EG: CNDE_INT
+define void @s_ctlz_i32(i32 addrspace(1)* noalias %out, i32 %val) nounwind {
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ store i32 %ctlz, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i32:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI-DAG: v_ffbh_u32_e32 [[CTLZ:v[0-9]+]], [[VAL]]
+; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], [[CTLZ]], 32, vcc
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+
+; EG: FFBH_UINT
+; EG: CNDE_INT
+define void @v_ctlz_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr, align 4
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ store i32 %ctlz, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_v2i32:
+; SI: buffer_load_dwordx2
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: buffer_store_dwordx2
+; SI: s_endpgm
+
+; EG: FFBH_UINT
+; EG: CNDE_INT
+; EG: FFBH_UINT
+; EG: CNDE_INT
+define void @v_ctlz_v2i32(<2 x i32> addrspace(1)* noalias %out, <2 x i32> addrspace(1)* noalias %valptr) nounwind {
+ %val = load <2 x i32>, <2 x i32> addrspace(1)* %valptr, align 8
+ %ctlz = call <2 x i32> @llvm.ctlz.v2i32(<2 x i32> %val, i1 false) nounwind readnone
+ store <2 x i32> %ctlz, <2 x i32> addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_v4i32:
+; SI: buffer_load_dwordx4
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: v_ffbh_u32_e32
+; SI: buffer_store_dwordx4
+; SI: s_endpgm
+
+
+; EG-DAG: FFBH_UINT
+; EG-DAG: CNDE_INT
+
+; EG-DAG: FFBH_UINT
+; EG-DAG: CNDE_INT
+
+; EG-DAG: FFBH_UINT
+; EG-DAG: CNDE_INT
+
+; EG-DAG: FFBH_UINT
+; EG-DAG: CNDE_INT
+define void @v_ctlz_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x i32> addrspace(1)* noalias %valptr) nounwind {
+ %val = load <4 x i32>, <4 x i32> addrspace(1)* %valptr, align 16
+ %ctlz = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %val, i1 false) nounwind readnone
+ store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i8:
+; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
+; SI-DAG: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
+; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[CTLZ]]
+; SI-DAG: v_cndmask_b32_e64 [[CORRECTED_FFBH:v[0-9]+]], [[FFBH]], 32, vcc
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[CORRECTED_FFBH]]
+; SI: buffer_store_byte [[RESULT]],
+define void @v_ctlz_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i8, i8 addrspace(1)* %valptr
+ %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
+ store i8 %ctlz, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_ctlz_i64:
+; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
+; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
+; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
+; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
+; SI-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[FFBH_LO]]
+; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
+; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
+; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
+; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
+define void @s_ctlz_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
+ %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
+ store i64 %ctlz, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_ctlz_i64_trunc:
+define void @s_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+ %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
+ %trunc = trunc i64 %ctlz to i32
+ store i32 %trunc, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i64:
+; SI: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
+; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
+; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
+; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
+; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[ADD]], [[CMPHI]]
+; SI-DAG: v_or_b32_e32 [[OR:v[0-9]+]], v[[LO]], v[[HI]]
+; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[OR]]
+; SI-DAG: v_cndmask_b32_e64 v[[CLTZ_LO:[0-9]+]], v[[CTLZ:[0-9]+]], 64, vcc
+; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
+; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CLTZ_LO]]:[[CTLZ_HI]]{{\]}}
+define void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %in.gep
+ %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
+ store i64 %ctlz, i64 addrspace(1)* %out.gep
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i64_trunc:
+define void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %in.gep
+ %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 false)
+ %trunc = trunc i64 %ctlz to i32
+ store i32 %trunc, i32 addrspace(1)* %out.gep
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_neg1:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+ define void @v_ctlz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ %cmp = icmp eq i32 %val, 0
+ %sel = select i1 %cmp, i32 -1, i32 %ctlz
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_neg1:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI: buffer_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @v_ctlz_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ %cmp = icmp ne i32 %val, 0
+ %sel = select i1 %cmp, i32 %ctlz, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; TODO: Should be able to eliminate select here as well.
+; FUNC-LABEL: {{^}}v_ctlz_i32_sel_eq_bitwidth:
+; SI: buffer_load_dword
+; SI: v_ffbh_u32_e32
+; SI: v_cmp
+; SI: v_cndmask
+; SI: s_endpgm
+define void @v_ctlz_i32_sel_eq_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ %cmp = icmp eq i32 %ctlz, 32
+ %sel = select i1 %cmp, i32 -1, i32 %ctlz
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i32_sel_ne_bitwidth:
+; SI: buffer_load_dword
+; SI: v_ffbh_u32_e32
+; SI: v_cmp
+; SI: v_cndmask
+; SI: s_endpgm
+define void @v_ctlz_i32_sel_ne_bitwidth(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 false) nounwind readnone
+ %cmp = icmp ne i32 %ctlz, 32
+ %sel = select i1 %cmp, i32 %ctlz, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i8_sel_eq_neg1:
+; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
+; SI: buffer_store_byte [[FFBH]],
+ define void @v_ctlz_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i8, i8 addrspace(1)* %valptr
+ %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 false) nounwind readnone
+ %cmp = icmp eq i8 %val, 0
+ %sel = select i1 %cmp, i8 -1, i8 %ctlz
+ store i8 %sel, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i16_sel_eq_neg1:
+; SI: buffer_load_ushort [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
+; SI: buffer_store_short [[FFBH]],
+ define void @v_ctlz_i16_sel_eq_neg1(i16 addrspace(1)* noalias %out, i16 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i16, i16 addrspace(1)* %valptr
+ %ctlz = call i16 @llvm.ctlz.i16(i16 %val, i1 false) nounwind readnone
+ %cmp = icmp eq i16 %val, 0
+ %sel = select i1 %cmp, i16 -1, i16 %ctlz
+ store i16 %sel, i16 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_i7_sel_eq_neg1:
+; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
+; SI: v_and_b32_e32 [[TRUNC:v[0-9]+]], 0x7f, [[FFBH]]
+; SI: buffer_store_byte [[TRUNC]],
+ define void @v_ctlz_i7_sel_eq_neg1(i7 addrspace(1)* noalias %out, i7 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i7, i7 addrspace(1)* %valptr
+ %ctlz = call i7 @llvm.ctlz.i7(i7 %val, i1 false) nounwind readnone
+ %cmp = icmp eq i7 %val, 0
+ %sel = select i1 %cmp, i7 -1, i7 %ctlz
+ store i7 %sel, i7 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/ctlz_zero_undef.ll b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
index bd26c302fe5a..c1f84cd460cf 100644
--- a/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
+++ b/test/CodeGen/AMDGPU/ctlz_zero_undef.ll
@@ -2,10 +2,18 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+declare i8 @llvm.ctlz.i8(i8, i1) nounwind readnone
+
declare i32 @llvm.ctlz.i32(i32, i1) nounwind readnone
declare <2 x i32> @llvm.ctlz.v2i32(<2 x i32>, i1) nounwind readnone
declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readnone
+declare i64 @llvm.ctlz.i64(i64, i1) nounwind readnone
+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readnone
+declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readnone
+
+declare i32 @llvm.r600.read.tidig.x() nounwind readnone
+
; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i32:
; SI: s_load_dword [[VAL:s[0-9]+]],
; SI: s_flbit_i32_b32 [[SRESULT:s[0-9]+]], [[VAL]]
@@ -69,3 +77,192 @@ define void @v_ctlz_zero_undef_v4i32(<4 x i32> addrspace(1)* noalias %out, <4 x
store <4 x i32> %ctlz, <4 x i32> addrspace(1)* %out, align 16
ret void
}
+
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i8:
+; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
+; SI: v_add_i32_e32 [[RESULT:v[0-9]+]], vcc, 0xffffffe8, [[FFBH]]
+; SI: buffer_store_byte [[RESULT]],
+define void @v_ctlz_zero_undef_i8(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i8, i8 addrspace(1)* %valptr
+ %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
+ store i8 %ctlz, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64:
+; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, {{0xb|0x2c}}
+; SI-DAG: v_cmp_eq_i32_e64 vcc, 0, s[[HI]]
+; SI-DAG: s_flbit_i32_b32 [[FFBH_LO:s[0-9]+]], s[[LO]]
+; SI-DAG: s_add_i32 [[ADD:s[0-9]+]], [[FFBH_LO]], 32
+; SI-DAG: s_flbit_i32_b32 [[FFBH_HI:s[0-9]+]], s[[HI]]
+; SI-DAG: v_mov_b32_e32 [[VFFBH_LO:v[0-9]+]], [[FFBH_LO]]
+; SI-DAG: v_mov_b32_e32 [[VFFBH_HI:v[0-9]+]], [[FFBH_HI]]
+; SI-DAG: v_cndmask_b32_e32 v[[CTLZ:[0-9]+]], [[VFFBH_HI]], [[VFFBH_LO]]
+; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
+; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
+define void @s_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 %val) nounwind {
+ %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
+ store i64 %ctlz, i64 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_ctlz_zero_undef_i64_trunc:
+define void @s_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 %val) nounwind {
+ %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
+ %trunc = trunc i64 %ctlz to i32
+ store i32 %trunc, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64:
+; SI: {{buffer|flat}}_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
+; SI-DAG: v_cmp_eq_i32_e64 [[CMPHI:s\[[0-9]+:[0-9]+\]]], 0, v[[HI]]
+; SI-DAG: v_ffbh_u32_e32 [[FFBH_LO:v[0-9]+]], v[[LO]]
+; SI-DAG: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, 32, [[FFBH_LO]]
+; SI-DAG: v_ffbh_u32_e32 [[FFBH_HI:v[0-9]+]], v[[HI]]
+; SI-DAG: v_cndmask_b32_e64 v[[CTLZ:[0-9]+]], [[FFBH_HI]], [[FFBH_LO]]
+; SI-DAG: v_mov_b32_e32 v[[CTLZ_HI:[0-9]+]], 0{{$}}
+; SI: {{buffer|flat}}_store_dwordx2 v{{\[}}[[CTLZ]]:[[CTLZ_HI]]{{\]}}
+define void @v_ctlz_zero_undef_i64(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr i64, i64 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %in.gep
+ %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
+ store i64 %ctlz, i64 addrspace(1)* %out.gep
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i64_trunc:
+define void @v_ctlz_zero_undef_i64_trunc(i32 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr i32, i32 addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %in.gep
+ %ctlz = call i64 @llvm.ctlz.i64(i64 %val, i1 true)
+ %trunc = trunc i64 %ctlz to i32
+ store i32 %trunc, i32 addrspace(1)* %out.gep
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI-NEXT: buffer_store_dword [[RESULT]],
+ define void @v_ctlz_zero_undef_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+ %cmp = icmp eq i32 %val, 0
+ %sel = select i1 %cmp, i32 -1, i32 %ctlz
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_neg1:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[RESULT:v[0-9]+]], [[VAL]]
+; SI-NEXT: buffer_store_dword [[RESULT]],
+define void @v_ctlz_zero_undef_i32_sel_ne_neg1(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+ %cmp = icmp ne i32 %val, 0
+ %sel = select i1 %cmp, i32 %ctlz, i32 -1
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i8_sel_eq_neg1:
+; SI: buffer_load_ubyte [[VAL:v[0-9]+]],
+; SI: v_ffbh_u32_e32 [[FFBH:v[0-9]+]], [[VAL]]
+; SI: buffer_store_byte [[FFBH]],
+ define void @v_ctlz_zero_undef_i8_sel_eq_neg1(i8 addrspace(1)* noalias %out, i8 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i8, i8 addrspace(1)* %valptr
+ %ctlz = call i8 @llvm.ctlz.i8(i8 %val, i1 true) nounwind readnone
+ %cmp = icmp eq i8 %val, 0
+ %sel = select i1 %cmp, i8 -1, i8 %ctlz
+ store i8 %sel, i8 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_neg1_two_use:
+; SI: buffer_load_dword [[VAL:v[0-9]+]],
+; SI-DAG: v_ffbh_u32_e32 [[RESULT0:v[0-9]+]], [[VAL]]
+; SI-DAG: v_cmp_eq_i32_e32 vcc, 0, [[VAL]]
+; SI-DAG: v_cndmask_b32_e64 [[RESULT1:v[0-9]+]], 0, 1, vcc
+; SI-DAG: buffer_store_dword [[RESULT0]]
+; SI-DAG: buffer_store_byte [[RESULT1]]
+; SI: s_endpgm
+ define void @v_ctlz_zero_undef_i32_sel_eq_neg1_two_use(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+ %cmp = icmp eq i32 %val, 0
+ %sel = select i1 %cmp, i32 -1, i32 %ctlz
+ store volatile i32 %sel, i32 addrspace(1)* %out
+ store volatile i1 %cmp, i1 addrspace(1)* undef
+ ret void
+}
+
+; Selected on wrong constant
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_0:
+; SI: buffer_load_dword
+; SI: v_ffbh_u32_e32
+; SI: v_cmp
+; SI: v_cndmask
+; SI: buffer_store_dword
+ define void @v_ctlz_zero_undef_i32_sel_eq_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+ %cmp = icmp eq i32 %val, 0
+ %sel = select i1 %cmp, i32 0, i32 %ctlz
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; Selected on wrong constant
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_0:
+; SI: buffer_load_dword
+; SI: v_ffbh_u32_e32
+; SI: v_cmp
+; SI: v_cndmask
+; SI: buffer_store_dword
+define void @v_ctlz_zero_undef_i32_sel_ne_0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+ %cmp = icmp ne i32 %val, 0
+ %sel = select i1 %cmp, i32 %ctlz, i32 0
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; Compare on wrong constant
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_eq_cmp_non0:
+; SI: buffer_load_dword
+; SI: v_ffbh_u32_e32
+; SI: v_cmp
+; SI: v_cndmask
+; SI: buffer_store_dword
+ define void @v_ctlz_zero_undef_i32_sel_eq_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+ %cmp = icmp eq i32 %val, 1
+ %sel = select i1 %cmp, i32 0, i32 %ctlz
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
+
+; Selected on wrong constant
+; FUNC-LABEL: {{^}}v_ctlz_zero_undef_i32_sel_ne_cmp_non0:
+; SI: buffer_load_dword
+; SI: v_ffbh_u32_e32
+; SI: v_cmp
+; SI: v_cndmask
+; SI: buffer_store_dword
+define void @v_ctlz_zero_undef_i32_sel_ne_cmp_non0(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %valptr) nounwind {
+ %val = load i32, i32 addrspace(1)* %valptr
+ %ctlz = call i32 @llvm.ctlz.i32(i32 %val, i1 true) nounwind readnone
+ %cmp = icmp ne i32 %val, 1
+ %sel = select i1 %cmp, i32 %ctlz, i32 0
+ store i32 %sel, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/flat-scratch-reg.ll b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
index 9aea7c773431..b9489101f906 100644
--- a/test/CodeGen/AMDGPU/flat-scratch-reg.ll
+++ b/test/CodeGen/AMDGPU/flat-scratch-reg.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -march=amdgcn -mcpu=kaveri -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=CI --check-prefix=NO-XNACK
; RUN: llc < %s -march=amdgcn -mcpu=fiji -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=NO-XNACK
-; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=XNACK
+; RUN: llc < %s -march=amdgcn -mcpu=carrizo -mattr=+xnack -verify-machineinstrs | FileCheck %s --check-prefix=GCN --check-prefix=VI --check-prefix=XNACK
; GCN-LABEL: {{^}}no_vcc_no_flat:
; NO-XNACK: ; NumSgprs: 8
@@ -22,8 +22,7 @@ entry:
; GCN-LABEL: {{^}}no_vcc_flat:
; CI: ; NumSgprs: 12
-; VI: ; NumSgprs: 12
-; XNACK: ; NumSgprs: 14
+; VI: ; NumSgprs: 14
define void @no_vcc_flat() {
entry:
call void asm sideeffect "", "~{SGPR7},~{FLAT_SCR}"()
@@ -32,8 +31,7 @@ entry:
; GCN-LABEL: {{^}}vcc_flat:
; CI: ; NumSgprs: 12
-; VI: ; NumSgprs: 12
-; XNACK: ; NumSgprs: 14
+; VI: ; NumSgprs: 14
define void @vcc_flat() {
entry:
call void asm sideeffect "", "~{SGPR7},~{VCC},~{FLAT_SCR}"()
diff --git a/test/CodeGen/AMDGPU/fmin_legacy.ll b/test/CodeGen/AMDGPU/fmin_legacy.ll
index 52fc3d0d251a..69a0a520a476 100644
--- a/test/CodeGen/AMDGPU/fmin_legacy.ll
+++ b/test/CodeGen/AMDGPU/fmin_legacy.ll
@@ -8,8 +8,8 @@ declare i32 @llvm.r600.read.tidig.x() #1
; FUNC-LABEL: @test_fmin_legacy_f32
; EG: MIN *
-; SI-SAFE: v_min_legacy_f32_e32
-; SI-NONAN: v_min_f32_e32
+; SI-SAFE: v_min_legacy_f32_e64
+; SI-NONAN: v_min_f32_e64
define void @test_fmin_legacy_f32(<4 x float> addrspace(1)* %out, <4 x float> inreg %reg0) #0 {
%r0 = extractelement <4 x float> %reg0, i32 0
%r1 = extractelement <4 x float> %reg0, i32 1
diff --git a/test/CodeGen/AMDGPU/fsub.ll b/test/CodeGen/AMDGPU/fsub.ll
index dfe41cb5b111..38d573258a5e 100644
--- a/test/CodeGen/AMDGPU/fsub.ll
+++ b/test/CodeGen/AMDGPU/fsub.ll
@@ -32,9 +32,8 @@ declare void @llvm.AMDGPU.store.output(float, i32)
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, -KC0[3].Z
; R600-DAG: ADD {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, -KC0[3].Y
-; FIXME: Should be using SGPR directly for first operand
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @fsub_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) {
%sub = fsub <2 x float> %a, %b
store <2 x float> %sub, <2 x float> addrspace(1)* %out, align 8
@@ -60,13 +59,11 @@ define void @v_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(
ret void
}
-; FIXME: Should be using SGPR directly for first operand
-
; FUNC-LABEL: {{^}}s_fsub_v4f32:
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: v_subrev_f32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
; SI: s_endpgm
define void @s_fsub_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, <4 x float> %b) {
%result = fsub <4 x float> %a, %b
diff --git a/test/CodeGen/AMDGPU/hsa-globals.ll b/test/CodeGen/AMDGPU/hsa-globals.ll
index 1d76c40c042e..90322ac3dc01 100644
--- a/test/CodeGen/AMDGPU/hsa-globals.ll
+++ b/test/CodeGen/AMDGPU/hsa-globals.ll
@@ -17,41 +17,49 @@ define void @test() {
}
; ASM: .amdgpu_hsa_module_global internal_global
+; ASM: .size internal_global_program, 4
; ASM: .hsadata_global_program
; ASM: internal_global_program:
; ASM: .long 0
; ASM: .amdgpu_hsa_module_global common_global
+; ASM: .size common_global_program, 4
; ASM: .hsadata_global_program
; ASM: common_global_program:
; ASM: .long 0
; ASM: .amdgpu_hsa_program_global external_global
+; ASM: .size external_global_program, 4
; ASM: .hsadata_global_program
; ASM: external_global_program:
; ASM: .long 0
; ASM: .amdgpu_hsa_module_global internal_global
+; ASM: .size internal_global_agent, 4
; ASM: .hsadata_global_agent
; ASM: internal_global_agent:
; ASM: .long 0
; ASM: .amdgpu_hsa_module_global common_global
+; ASM: .size common_global_agent, 4
; ASM: .hsadata_global_agent
; ASM: common_global_agent:
; ASM: .long 0
; ASM: .amdgpu_hsa_program_global external_global
+; ASM: .size external_global_agent, 4
; ASM: .hsadata_global_agent
; ASM: external_global_agent:
; ASM: .long 0
; ASM: .amdgpu_hsa_module_global internal_readonly
+; ASM: .size internal_readonly, 4
; ASM: .hsatext
; ASM: internal_readonly:
; ASM: .long 0
; ASM: .amdgpu_hsa_program_global external_readonly
+; ASM: .size external_readonly, 4
; ASM: .hsatext
; ASM: external_readonly:
; ASM: .long 0
@@ -79,18 +87,21 @@ define void @test() {
; ELF: Symbol {
; ELF: Name: common_global_agent
+; ELF: Size: 4
; ELF: Binding: Local
; ELF: Section: .hsadata_global_agent
; ELF: }
; ELF: Symbol {
; ELF: Name: common_global_program
+; ELF: Size: 4
; ELF: Binding: Local
; ELF: Section: .hsadata_global_program
; ELF: }
; ELF: Symbol {
; ELF: Name: internal_global_agent
+; ELF: Size: 4
; ELF: Binding: Local
; ELF: Type: Object
; ELF: Section: .hsadata_global_agent
@@ -98,6 +109,7 @@ define void @test() {
; ELF: Symbol {
; ELF: Name: internal_global_program
+; ELF: Size: 4
; ELF: Binding: Local
; ELF: Type: Object
; ELF: Section: .hsadata_global_program
@@ -105,6 +117,7 @@ define void @test() {
; ELF: Symbol {
; ELF: Name: internal_readonly
+; ELF: Size: 4
; ELF: Binding: Local
; ELF: Type: Object
; ELF: Section: .hsatext
@@ -112,6 +125,7 @@ define void @test() {
; ELF: Symbol {
; ELF: Name: external_global_agent
+; ELF: Size: 4
; ELF: Binding: Global
; ELF: Type: Object
; ELF: Section: .hsadata_global_agent
@@ -119,6 +133,7 @@ define void @test() {
; ELF: Symbol {
; ELF: Name: external_global_program
+; ELF: Size: 4
; ELF: Binding: Global
; ELF: Type: Object
; ELF: Section: .hsadata_global_program
@@ -126,6 +141,7 @@ define void @test() {
; ELF: Symbol {
; ELF: Name: external_readonly
+; ELF: Size: 4
; ELF: Binding: Global
; ELF: Type: Object
; ELF: Section: .hsatext
diff --git a/test/CodeGen/AMDGPU/hsa-note-no-func.ll b/test/CodeGen/AMDGPU/hsa-note-no-func.ll
new file mode 100644
index 000000000000..0e4662231b4f
--- /dev/null
+++ b/test/CodeGen/AMDGPU/hsa-note-no-func.ll
@@ -0,0 +1,6 @@
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA --check-prefix=HSA-CI %s
+; RUN: llc < %s -mtriple=amdgcn--amdhsa -mcpu=carrizo | FileCheck --check-prefix=HSA --check-prefix=HSA-VI %s
+
+; HSA: .hsa_code_object_version 1,0
+; HSA-CI: .hsa_code_object_isa 7,0,0,"AMD","AMDGPU"
+; HSA-VI: .hsa_code_object_isa 8,0,1,"AMD","AMDGPU"
diff --git a/test/CodeGen/AMDGPU/hsa.ll b/test/CodeGen/AMDGPU/hsa.ll
index abc89b7fd837..c089dfd9a971 100644
--- a/test/CodeGen/AMDGPU/hsa.ll
+++ b/test/CodeGen/AMDGPU/hsa.ll
@@ -28,6 +28,7 @@
; ELF: Symbol {
; ELF: Name: simple
+; ELF: Size: 296
; ELF: Type: AMDGPU_HSA_KERNEL (0xA)
; ELF: }
@@ -52,6 +53,9 @@
; Make sure we generate flat store for HSA
; HSA: flat_store_dword v{{[0-9]+}}
+; HSA: .Lfunc_end0:
+; HSA: .size simple, .Lfunc_end0-simple
+
define void @simple(i32 addrspace(1)* %out) {
entry:
store i32 0, i32 addrspace(1)* %out
diff --git a/test/CodeGen/AMDGPU/inline-asm.ll b/test/CodeGen/AMDGPU/inline-asm.ll
index efc2292de3a5..9c8d3534f8ad 100644
--- a/test/CodeGen/AMDGPU/inline-asm.ll
+++ b/test/CodeGen/AMDGPU/inline-asm.ll
@@ -10,3 +10,14 @@ entry:
call void asm sideeffect "s_endpgm", ""()
ret void
}
+
+; CHECK: {{^}}inline_asm_shader:
+; CHECK: s_endpgm
+; CHECK: s_endpgm
+define void @inline_asm_shader() #0 {
+entry:
+ call void asm sideeffect "s_endpgm", ""()
+ ret void
+}
+
+attributes #0 = { "ShaderType"="0" }
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
index dc95cd1ee012..d96ea743f6ed 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.dispatch.ptr.ll
@@ -1,4 +1,7 @@
; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: not llc -mtriple=amdgcn-unknown-unknown -mcpu=kaveri -verify-machineinstrs < %s 2>&1 | FileCheck -check-prefix=ERROR %s
+
+; ERROR: error: unsupported hsa intrinsic without hsa target in test
; GCN-LABEL: {{^}}test:
; GCN: enable_sgpr_dispatch_ptr = 1
diff --git a/test/CodeGen/AMDGPU/llvm.round.f64.ll b/test/CodeGen/AMDGPU/llvm.round.f64.ll
index 6b365dc09e2a..98afbeee93e6 100644
--- a/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -21,7 +21,7 @@ define void @round_f64(double addrspace(1)* %out, double %x) #0 {
; SI-DAG: v_cmp_eq_i32
; SI-DAG: s_mov_b32 [[BFIMASK:s[0-9]+]], 0x7fffffff
-; SI-DAG: v_cmp_gt_i32_e32
+; SI-DAG: v_cmp_gt_i32
; SI-DAG: v_bfi_b32 [[COPYSIGN:v[0-9]+]], [[BFIMASK]]
; SI: buffer_store_dwordx2
diff --git a/test/CodeGen/AMDGPU/ret.ll b/test/CodeGen/AMDGPU/ret.ll
new file mode 100644
index 000000000000..2bd9fd6858fe
--- /dev/null
+++ b/test/CodeGen/AMDGPU/ret.ll
@@ -0,0 +1,245 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
+
+attributes #0 = { "ShaderType"="1" }
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+; GCN-LABEL: {{^}}vgpr:
+; GCN: v_mov_b32_e32 v1, v0
+; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
+; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
+; GCN: s_waitcnt expcnt(0)
+; GCN-NOT: s_endpgm
+define {float, float} @vgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
+ %x = fadd float %3, 1.0
+ %a = insertvalue {float, float} undef, float %x, 0
+ %b = insertvalue {float, float} %a, float %3, 1
+ ret {float, float} %b
+}
+
+; GCN-LABEL: {{^}}vgpr_literal:
+; GCN: v_mov_b32_e32 v4, v0
+; GCN-DAG: v_mov_b32_e32 v0, 1.0
+; GCN-DAG: v_mov_b32_e32 v1, 2.0
+; GCN-DAG: v_mov_b32_e32 v2, 4.0
+; GCN-DAG: v_mov_b32_e32 v3, -1.0
+; GCN: exp 15, 0, 1, 1, 1, v4, v4, v4, v4
+; GCN: s_waitcnt expcnt(0)
+; GCN-NOT: s_endpgm
+define {float, float, float, float} @vgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
+ ret {float, float, float, float} {float 1.0, float 2.0, float 4.0, float -1.0}
+}
+
+
+; GCN: .long 165580
+; GCN-NEXT: .long 562
+; GCN-NEXT: .long 165584
+; GCN-NEXT: .long 562
+; GCN-LABEL: {{^}}vgpr_ps_addr0:
+; GCN-NOT: v_mov_b32_e32 v0
+; GCN-NOT: v_mov_b32_e32 v1
+; GCN-NOT: v_mov_b32_e32 v2
+; GCN: v_mov_b32_e32 v3, v4
+; GCN: v_mov_b32_e32 v4, v6
+; GCN-NOT: s_endpgm
+attributes #1 = { "ShaderType"="0" "InitialPSInputAddr"="0" }
+define {float, float, float, float, float} @vgpr_ps_addr0([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
+ %i0 = extractelement <2 x i32> %4, i32 0
+ %i1 = extractelement <2 x i32> %4, i32 1
+ %i2 = extractelement <2 x i32> %7, i32 0
+ %i3 = extractelement <2 x i32> %8, i32 0
+ %f0 = bitcast i32 %i0 to float
+ %f1 = bitcast i32 %i1 to float
+ %f2 = bitcast i32 %i2 to float
+ %f3 = bitcast i32 %i3 to float
+ %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
+ %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
+ %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
+ %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
+ %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
+ ret {float, float, float, float, float} %r4
+}
+
+
+; GCN: .long 165580
+; GCN-NEXT: .long 1
+; GCN-NEXT: .long 165584
+; GCN-NEXT: .long 1
+; GCN-LABEL: {{^}}ps_input_ena_no_inputs:
+; GCN: v_mov_b32_e32 v0, 1.0
+; GCN-NOT: s_endpgm
+define float @ps_input_ena_no_inputs([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
+ ret float 1.0
+}
+
+
+; GCN: .long 165580
+; GCN-NEXT: .long 2081
+; GCN-NEXT: .long 165584
+; GCN-NEXT: .long 2081
+; GCN-LABEL: {{^}}ps_input_ena_pos_w:
+; GCN-DAG: v_mov_b32_e32 v0, v4
+; GCN-DAG: v_mov_b32_e32 v1, v2
+; GCN: v_mov_b32_e32 v2, v3
+; GCN-NOT: s_endpgm
+define {float, <2 x float>} @ps_input_ena_pos_w([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #1 {
+ %f = bitcast <2 x i32> %8 to <2 x float>
+ %s = insertvalue {float, <2 x float>} undef, float %14, 0
+ %s1 = insertvalue {float, <2 x float>} %s, <2 x float> %f, 1
+ ret {float, <2 x float>} %s1
+}
+
+
+; GCN: .long 165580
+; GCN-NEXT: .long 562
+; GCN-NEXT: .long 165584
+; GCN-NEXT: .long 563
+; GCN-LABEL: {{^}}vgpr_ps_addr1:
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mov_b32_e32 v2, v4
+; GCN-DAG: v_mov_b32_e32 v3, v6
+; GCN-DAG: v_mov_b32_e32 v4, v8
+; GCN-NOT: s_endpgm
+attributes #2 = { "ShaderType"="0" "InitialPSInputAddr"="1" }
+define {float, float, float, float, float} @vgpr_ps_addr1([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #2 {
+ %i0 = extractelement <2 x i32> %4, i32 0
+ %i1 = extractelement <2 x i32> %4, i32 1
+ %i2 = extractelement <2 x i32> %7, i32 0
+ %i3 = extractelement <2 x i32> %8, i32 0
+ %f0 = bitcast i32 %i0 to float
+ %f1 = bitcast i32 %i1 to float
+ %f2 = bitcast i32 %i2 to float
+ %f3 = bitcast i32 %i3 to float
+ %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
+ %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
+ %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
+ %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
+ %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
+ ret {float, float, float, float, float} %r4
+}
+
+
+; GCN: .long 165580
+; GCN-NEXT: .long 562
+; GCN-NEXT: .long 165584
+; GCN-NEXT: .long 631
+; GCN-LABEL: {{^}}vgpr_ps_addr119:
+; GCN-DAG: v_mov_b32_e32 v0, v2
+; GCN-DAG: v_mov_b32_e32 v1, v3
+; GCN: v_mov_b32_e32 v2, v6
+; GCN: v_mov_b32_e32 v3, v8
+; GCN: v_mov_b32_e32 v4, v12
+; GCN-NOT: s_endpgm
+attributes #3 = { "ShaderType"="0" "InitialPSInputAddr"="119" }
+define {float, float, float, float, float} @vgpr_ps_addr119([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #3 {
+ %i0 = extractelement <2 x i32> %4, i32 0
+ %i1 = extractelement <2 x i32> %4, i32 1
+ %i2 = extractelement <2 x i32> %7, i32 0
+ %i3 = extractelement <2 x i32> %8, i32 0
+ %f0 = bitcast i32 %i0 to float
+ %f1 = bitcast i32 %i1 to float
+ %f2 = bitcast i32 %i2 to float
+ %f3 = bitcast i32 %i3 to float
+ %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
+ %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
+ %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
+ %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
+ %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
+ ret {float, float, float, float, float} %r4
+}
+
+
+; GCN: .long 165580
+; GCN-NEXT: .long 562
+; GCN-NEXT: .long 165584
+; GCN-NEXT: .long 946
+; GCN-LABEL: {{^}}vgpr_ps_addr418:
+; GCN-NOT: v_mov_b32_e32 v0
+; GCN-NOT: v_mov_b32_e32 v1
+; GCN-NOT: v_mov_b32_e32 v2
+; GCN: v_mov_b32_e32 v3, v4
+; GCN: v_mov_b32_e32 v4, v8
+; GCN-NOT: s_endpgm
+attributes #4 = { "ShaderType"="0" "InitialPSInputAddr"="418" }
+define {float, float, float, float, float} @vgpr_ps_addr418([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, float, float, float) #4 {
+ %i0 = extractelement <2 x i32> %4, i32 0
+ %i1 = extractelement <2 x i32> %4, i32 1
+ %i2 = extractelement <2 x i32> %7, i32 0
+ %i3 = extractelement <2 x i32> %8, i32 0
+ %f0 = bitcast i32 %i0 to float
+ %f1 = bitcast i32 %i1 to float
+ %f2 = bitcast i32 %i2 to float
+ %f3 = bitcast i32 %i3 to float
+ %r0 = insertvalue {float, float, float, float, float} undef, float %f0, 0
+ %r1 = insertvalue {float, float, float, float, float} %r0, float %f1, 1
+ %r2 = insertvalue {float, float, float, float, float} %r1, float %f2, 2
+ %r3 = insertvalue {float, float, float, float, float} %r2, float %f3, 3
+ %r4 = insertvalue {float, float, float, float, float} %r3, float %12, 4
+ ret {float, float, float, float, float} %r4
+}
+
+
+; GCN-LABEL: {{^}}sgpr:
+; GCN: s_add_i32 s0, s3, 2
+; GCN: s_mov_b32 s2, s3
+; GCN-NOT: s_endpgm
+define {i32, i32, i32} @sgpr([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+ %x = add i32 %2, 2
+ %a = insertvalue {i32, i32, i32} undef, i32 %x, 0
+ %b = insertvalue {i32, i32, i32} %a, i32 %1, 1
+ %c = insertvalue {i32, i32, i32} %a, i32 %2, 2
+ ret {i32, i32, i32} %c
+}
+
+
+; GCN-LABEL: {{^}}sgpr_literal:
+; GCN: s_mov_b32 s0, 5
+; GCN-NOT: s_mov_b32 s0, s0
+; GCN-DAG: s_mov_b32 s1, 6
+; GCN-DAG: s_mov_b32 s2, 7
+; GCN-DAG: s_mov_b32 s3, 8
+; GCN-NOT: s_endpgm
+define {i32, i32, i32, i32} @sgpr_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+ %x = add i32 %2, 2
+ ret {i32, i32, i32, i32} {i32 5, i32 6, i32 7, i32 8}
+}
+
+
+; GCN-LABEL: {{^}}both:
+; GCN: v_mov_b32_e32 v1, v0
+; GCN-DAG: exp 15, 0, 1, 1, 1, v1, v1, v1, v1
+; GCN-DAG: v_add_f32_e32 v0, 1.0, v1
+; GCN-DAG: s_add_i32 s0, s3, 2
+; GCN-DAG: s_mov_b32 s1, s2
+; GCN: s_mov_b32 s2, s3
+; GCN: s_waitcnt expcnt(0)
+; GCN-NOT: s_endpgm
+define {float, i32, float, i32, i32} @both([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
+ %v = fadd float %3, 1.0
+ %s = add i32 %2, 2
+ %a0 = insertvalue {float, i32, float, i32, i32} undef, float %v, 0
+ %a1 = insertvalue {float, i32, float, i32, i32} %a0, i32 %s, 1
+ %a2 = insertvalue {float, i32, float, i32, i32} %a1, float %3, 2
+ %a3 = insertvalue {float, i32, float, i32, i32} %a2, i32 %1, 3
+ %a4 = insertvalue {float, i32, float, i32, i32} %a3, i32 %2, 4
+ ret {float, i32, float, i32, i32} %a4
+}
+
+
+; GCN-LABEL: {{^}}structure_literal:
+; GCN: v_mov_b32_e32 v3, v0
+; GCN-DAG: v_mov_b32_e32 v0, 1.0
+; GCN-DAG: s_mov_b32 s0, 2
+; GCN-DAG: s_mov_b32 s1, 3
+; GCN-DAG: v_mov_b32_e32 v1, 2.0
+; GCN-DAG: v_mov_b32_e32 v2, 4.0
+; GCN-DAG: exp 15, 0, 1, 1, 1, v3, v3, v3, v3
+define {{float, i32}, {i32, <2 x float>}} @structure_literal([9 x <16 x i8>] addrspace(2)* byval, i32 inreg, i32 inreg, float) #0 {
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %3, float %3, float %3, float %3)
+ ret {{float, i32}, {i32, <2 x float>}} {{float, i32} {float 1.0, i32 2}, {i32, <2 x float>} {i32 3, <2 x float> <float 2.0, float 4.0>}}
+}
diff --git a/test/CodeGen/AMDGPU/si-scheduler.ll b/test/CodeGen/AMDGPU/si-scheduler.ll
new file mode 100644
index 000000000000..66a9571d75bf
--- /dev/null
+++ b/test/CodeGen/AMDGPU/si-scheduler.ll
@@ -0,0 +1,55 @@
+; RUN: llc -march=amdgcn -mcpu=SI --misched=si < %s | FileCheck %s
+
+; The test checks the "si" machine scheduler pass works correctly.
+
+; CHECK-LABEL: {{^}}main:
+; CHECK: s_wqm
+; CHECK: s_load_dwordx4
+; CHECK: s_load_dwordx8
+; CHECK: s_waitcnt lgkmcnt(0)
+; CHECK: image_sample
+; CHECK: s_waitcnt vmcnt(0)
+; CHECK: exp
+; CHECK: s_endpgm
+
+define void @main([6 x <16 x i8>] addrspace(2)* byval, [17 x <16 x i8>] addrspace(2)* byval, [17 x <4 x i32>] addrspace(2)* byval, [34 x <8 x i32>] addrspace(2)* byval, float inreg, i32 inreg, <2 x i32>,
+<2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, float, i32, float, float) #0 {
+main_body:
+ %22 = bitcast [34 x <8 x i32>] addrspace(2)* %3 to <32 x i8> addrspace(2)*
+ %23 = load <32 x i8>, <32 x i8> addrspace(2)* %22, align 32, !tbaa !0
+ %24 = bitcast [17 x <4 x i32>] addrspace(2)* %2 to <16 x i8> addrspace(2)*
+ %25 = load <16 x i8>, <16 x i8> addrspace(2)* %24, align 16, !tbaa !0
+ %26 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %5, <2 x i32> %11)
+ %27 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %5, <2 x i32> %11)
+ %28 = bitcast float %26 to i32
+ %29 = bitcast float %27 to i32
+ %30 = insertelement <2 x i32> undef, i32 %28, i32 0
+ %31 = insertelement <2 x i32> %30, i32 %29, i32 1
+ %32 = call <4 x float> @llvm.SI.sample.v2i32(<2 x i32> %31, <32 x i8> %23, <16 x i8> %25, i32 2)
+ %33 = extractelement <4 x float> %32, i32 0
+ %34 = extractelement <4 x float> %32, i32 1
+ %35 = extractelement <4 x float> %32, i32 2
+ %36 = extractelement <4 x float> %32, i32 3
+ %37 = call i32 @llvm.SI.packf16(float %33, float %34)
+ %38 = bitcast i32 %37 to float
+ %39 = call i32 @llvm.SI.packf16(float %35, float %36)
+ %40 = bitcast i32 %39 to float
+ call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %38, float %40, float %38, float %40)
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
+
+; Function Attrs: nounwind readnone
+declare <4 x float> @llvm.SI.sample.v2i32(<2 x i32>, <32 x i8>, <16 x i8>, i32) #1
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.SI.packf16(float, float) #1
+
+declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
+
+attributes #0 = { "ShaderType"="0" "enable-no-nans-fp-math"="true" }
+attributes #1 = { nounwind readnone }
+
+!0 = !{!"const", null, i32 1}
diff --git a/test/CodeGen/AMDGPU/sint_to_fp.i64.ll b/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
new file mode 100644
index 000000000000..138b93b16d8d
--- /dev/null
+++ b/test/CodeGen/AMDGPU/sint_to_fp.i64.ll
@@ -0,0 +1,62 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
+
+; FIXME: This should be merged with sint_to_fp.ll, but s_sint_to_fp_v2i64 crashes on r600
+
+; FUNC-LABEL: {{^}}s_sint_to_fp_i64_to_f32:
+define void @s_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+ %result = sitofp i64 %in to float
+ store float %result, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_sint_to_fp_i64_to_f32:
+; GCN: {{buffer|flat}}_load_dwordx2
+
+; SI: v_ashr_i64 {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, 63
+; VI: v_ashrrev_i64 {{v\[[0-9]+:[0-9]+\]}}, 63, {{v\[[0-9]+:[0-9]+\]}}
+; GCN: v_xor_b32
+
+; GCN: v_ffbh_u32
+; GCN: v_ffbh_u32
+; GCN: v_cndmask
+; GCN: v_cndmask
+
+; GCN-DAG: v_cmp_eq_i64
+; GCN-DAG: v_cmp_lt_u64
+
+; GCN: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, v{{[0-9]+}}
+; GCN: v_cndmask_b32_e32 [[SIGN_SEL:v[0-9]+]],
+; GCN: {{buffer|flat}}_store_dword [[SIGN_SEL]]
+define void @v_sint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %in.gep
+ %result = sitofp i64 %val to float
+ store float %result, float addrspace(1)* %out.gep
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_sint_to_fp_v2i64:
+define void @s_sint_to_fp_v2i64(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
+ %result = sitofp <2 x i64> %in to <2 x float>
+ store <2 x float> %result, <2 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_sint_to_fp_v4i64:
+define void @v_sint_to_fp_v4i64(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
+ %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+ %result = sitofp <4 x i64> %value to <4 x float>
+ store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/sint_to_fp.ll b/test/CodeGen/AMDGPU/sint_to_fp.ll
index 8506441d1361..851085c9535d 100644
--- a/test/CodeGen/AMDGPU/sint_to_fp.ll
+++ b/test/CodeGen/AMDGPU/sint_to_fp.ll
@@ -2,63 +2,120 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-
; FUNC-LABEL: {{^}}s_sint_to_fp_i32_to_f32:
-; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{s[0-9]+$}}
-define void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) {
+
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+define void @s_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 {
%result = sitofp i32 %in to float
store float %result, float addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}sint_to_fp_v2i32:
-; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
-; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
+; FUNC-LABEL: {{^}}v_sint_to_fp_i32_to_f32:
+; SI: v_cvt_f32_i32_e32 {{v[0-9]+}}, {{v[0-9]+$}}
+; R600: INT_TO_FLT
+define void @v_sint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %in.gep
+ %result = sitofp i32 %val to float
+ store float %result, float addrspace(1)* %out.gep
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_sint_to_fp_v2i32:
; SI: v_cvt_f32_i32_e32
; SI: v_cvt_f32_i32_e32
-define void @sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
+
+; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
+; R600-DAG: INT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
+define void @s_sint_to_fp_v2i32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0{
%result = sitofp <2 x i32> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}sint_to_fp_v4i32:
+; FUNC-LABEL: {{^}}s_sint_to_fp_v4i32_to_v4f32:
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: v_cvt_f32_i32_e32
+; SI: s_endpgm
+
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+define void @s_sint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+ %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
+ %result = sitofp <4 x i32> %value to <4 x float>
+ store <4 x float> %result, <4 x float> addrspace(1)* %out
+ ret void
+}
+; FUNC-LABEL: {{^}}v_sint_to_fp_v4i32:
; SI: v_cvt_f32_i32_e32
; SI: v_cvt_f32_i32_e32
; SI: v_cvt_f32_i32_e32
; SI: v_cvt_f32_i32_e32
-define void @sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
- %value = load <4 x i32>, <4 x i32> addrspace(1) * %in
+
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: INT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+define void @v_sint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
+ %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
%result = sitofp <4 x i32> %value to <4 x float>
- store <4 x float> %result, <4 x float> addrspace(1)* %out
+ store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
ret void
}
-; FUNC-LABEL: {{^}}sint_to_fp_i1_f32:
+; FUNC-LABEL: {{^}}s_sint_to_fp_i1_f32:
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define void @sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) {
+define void @s_sint_to_fp_i1_f32(float addrspace(1)* %out, i32 %in) #0 {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
- store float %fp, float addrspace(1)* %out, align 4
+ store float %fp, float addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}sint_to_fp_i1_f32_load:
+; FUNC-LABEL: {{^}}s_sint_to_fp_i1_f32_load:
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define void @sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) {
+define void @s_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 %in) #0 {
%fp = sitofp i1 %in to float
- store float %fp, float addrspace(1)* %out, align 4
+ store float %fp, float addrspace(1)* %out
ret void
}
+
+; FUNC-LABEL: {{^}}v_sint_to_fp_i1_f32_load:
+; SI: {{buffer|flat}}_load_ubyte
+; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: v_cmp_eq_i32
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1.0
+; SI: {{buffer|flat}}_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @v_sint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %val = load i1, i1 addrspace(1)* %in.gep
+ %fp = sitofp i1 %val to float
+ store float %fp, float addrspace(1)* %out.gep
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/udiv.ll b/test/CodeGen/AMDGPU/udiv.ll
index de22a22e5029..2a09e0b20498 100644
--- a/test/CodeGen/AMDGPU/udiv.ll
+++ b/test/CodeGen/AMDGPU/udiv.ll
@@ -1,12 +1,11 @@
-;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG %s
-;RUN: llc < %s -march=amdgcn -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI %s
-;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
+; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
-;EG-LABEL: {{^}}test:
-;EG-NOT: SETGE_INT
-;EG: CF_END
-
-define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}udiv_i32:
+; EG-NOT: SETGE_INT
+; EG: CF_END
+define void @udiv_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
%b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
%a = load i32, i32 addrspace(1) * %in
%b = load i32, i32 addrspace(1) * %b_ptr
@@ -15,16 +14,24 @@ define void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
ret void
}
-;The code generated by udiv is long and complex and may frequently change.
-;The goal of this test is to make sure the ISel doesn't fail when it gets
-;a v4i32 udiv
+; FUNC-LABEL: {{^}}s_udiv_i32:
+
+define void @s_udiv_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+ %result = udiv i32 %a, %b
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; The code generated by udiv is long and complex and may frequently
+; change. The goal of this test is to make sure the ISel doesn't fail
+; when it gets a v4i32 udiv
-;EG-LABEL: {{^}}test2:
-;EG: CF_END
-;SI-LABEL: {{^}}test2:
-;SI: s_endpgm
+; FUNC-LABEL: {{^}}udiv_v2i32:
+; EG: CF_END
-define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
+; SI: s_endpgm
+define void @udiv_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(1)* %in, i32 1
%a = load <2 x i32>, <2 x i32> addrspace(1) * %in
%b = load <2 x i32>, <2 x i32> addrspace(1) * %b_ptr
@@ -33,12 +40,10 @@ define void @test2(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) {
ret void
}
-;EG-LABEL: {{^}}test4:
-;EG: CF_END
-;SI-LABEL: {{^}}test4:
-;SI: s_endpgm
-
-define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+; FUNC-LABEL: {{^}}udiv_v4i32:
+; EG: CF_END
+; SI: s_endpgm
+define void @udiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
%b_ptr = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 1
%a = load <4 x i32>, <4 x i32> addrspace(1) * %in
%b = load <4 x i32>, <4 x i32> addrspace(1) * %b_ptr
@@ -46,3 +51,43 @@ define void @test4(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
+
+; FUNC-LABEL: {{^}}udiv_i32_div_pow2:
+; SI: buffer_load_dword [[VAL:v[0-9]+]]
+; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 4, [[VAL]]
+; SI: buffer_store_dword [[RESULT]]
+define void @udiv_i32_div_pow2(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %result = udiv i32 %a, 16
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}udiv_i32_div_k_even:
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0xfabbd9c1
+; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[K]], [[VAL]]
+; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 25, [[MULHI]]
+; SI: buffer_store_dword [[RESULT]]
+define void @udiv_i32_div_k_even(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %result = udiv i32 %a, 34259182
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}udiv_i32_div_k_odd:
+; SI-DAG: buffer_load_dword [[VAL:v[0-9]+]]
+; SI-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x7d5deca3
+; SI: v_mul_hi_u32 [[MULHI:v[0-9]+]], [[K]], [[VAL]]
+; SI: v_lshrrev_b32_e32 [[RESULT:v[0-9]+]], 24, [[MULHI]]
+; SI: buffer_store_dword [[RESULT]]
+define void @udiv_i32_div_k_odd(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
+ %b_ptr = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %a = load i32, i32 addrspace(1)* %in
+ %result = udiv i32 %a, 34259183
+ store i32 %result, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/uint_to_fp.i64.ll b/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
new file mode 100644
index 000000000000..3ab11442d5cc
--- /dev/null
+++ b/test/CodeGen/AMDGPU/uint_to_fp.i64.ll
@@ -0,0 +1,57 @@
+; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=FUNC %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI -check-prefix=FUNC %s
+
+; FIXME: This should be merged with uint_to_fp.ll, but s_uint_to_fp_v2i64 crashes on r600
+
+; FUNC-LABEL: {{^}}s_uint_to_fp_i64_to_f32:
+define void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+ %result = uitofp i64 %in to float
+ store float %result, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_uint_to_fp_i64_to_f32:
+; GCN: {{buffer|flat}}_load_dwordx2
+
+; GCN: v_ffbh_u32
+; GCN: v_ffbh_u32
+; GCN: v_cndmask
+; GCN: v_cndmask
+
+; GCN-DAG: v_cmp_eq_i64
+; GCN-DAG: v_cmp_lt_u64
+
+; GCN: v_add_i32_e32 [[VR:v[0-9]+]]
+; GCN: {{buffer|flat}}_store_dword [[VR]]
+define void @v_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i64, i64 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %val = load i64, i64 addrspace(1)* %in.gep
+ %result = uitofp i64 %val to float
+ store float %result, float addrspace(1)* %out.gep
+ ret void
+}
+
+; FUNC-LABEL: {{^}}s_uint_to_fp_v2i64:
+define void @s_uint_to_fp_v2i64(<2 x float> addrspace(1)* %out, <2 x i64> %in) #0{
+ %result = uitofp <2 x i64> %in to <2 x float>
+ store <2 x float> %result, <2 x float> addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_uint_to_fp_v4i64:
+define void @v_uint_to_fp_v4i64(<4 x float> addrspace(1)* %out, <4 x i64> addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr <4 x i64>, <4 x i64> addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
+ %value = load <4 x i64>, <4 x i64> addrspace(1)* %in.gep
+ %result = uitofp <4 x i64> %value to <4 x float>
+ store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/AMDGPU/uint_to_fp.ll b/test/CodeGen/AMDGPU/uint_to_fp.ll
index 00fea80b1bc8..a3343d1e2d9c 100644
--- a/test/CodeGen/AMDGPU/uint_to_fp.ll
+++ b/test/CodeGen/AMDGPU/uint_to_fp.ll
@@ -2,81 +2,138 @@
; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s
-; FUNC-LABEL: {{^}}uint_to_fp_i32_to_f32:
-; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
-
+; FUNC-LABEL: {{^}}s_uint_to_fp_i32_to_f32:
; SI: v_cvt_f32_u32_e32
-; SI: s_endpgm
-define void @uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) {
+
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].Z
+define void @s_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 %in) #0 {
%result = uitofp i32 %in to float
store float %result, float addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}uint_to_fp_v2i32_to_v2f32:
-; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
-; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
+; FUNC-LABEL: {{^}}v_uint_to_fp_i32_to_f32:
+; SI: v_cvt_f32_u32_e32 {{v[0-9]+}}, {{v[0-9]+$}}
+
+; R600: INT_TO_FLT
+define void @v_uint_to_fp_i32_to_f32(float addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %val = load i32, i32 addrspace(1)* %in.gep
+ %result = uitofp i32 %val to float
+ store float %result, float addrspace(1)* %out.gep
+ ret void
+}
+; FUNC-LABEL: {{^}}s_uint_to_fp_v2i32_to_v2f32:
; SI: v_cvt_f32_u32_e32
; SI: v_cvt_f32_u32_e32
-; SI: s_endpgm
-define void @uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) {
+
+; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[2].W
+; R600-DAG: UINT_TO_FLT * T{{[0-9]+\.[XYZW]}}, KC0[3].X
+define void @s_uint_to_fp_v2i32_to_v2f32(<2 x float> addrspace(1)* %out, <2 x i32> %in) #0 {
%result = uitofp <2 x i32> %in to <2 x float>
store <2 x float> %result, <2 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}uint_to_fp_v4i32_to_v4f32:
-; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-
+; FUNC-LABEL: {{^}}s_uint_to_fp_v4i32_to_v4f32:
; SI: v_cvt_f32_u32_e32
; SI: v_cvt_f32_u32_e32
; SI: v_cvt_f32_u32_e32
; SI: v_cvt_f32_u32_e32
; SI: s_endpgm
-define void @uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
+
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+define void @s_uint_to_fp_v4i32_to_v4f32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
%value = load <4 x i32>, <4 x i32> addrspace(1) * %in
%result = uitofp <4 x i32> %value to <4 x float>
store <4 x float> %result, <4 x float> addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}uint_to_fp_i64_to_f32:
-; R600: UINT_TO_FLT
-; R600: UINT_TO_FLT
-; R600: MULADD_IEEE
+; FUNC-LABEL: {{^}}v_uint_to_fp_v4i32:
; SI: v_cvt_f32_u32_e32
; SI: v_cvt_f32_u32_e32
-; SI: v_madmk_f32_e32 {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, 0x4f800000
-; SI: s_endpgm
-define void @uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) {
-entry:
- %0 = uitofp i64 %in to float
- store float %0, float addrspace(1)* %out
+; SI: v_cvt_f32_u32_e32
+; SI: v_cvt_f32_u32_e32
+
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; R600: UINT_TO_FLT * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+define void @v_uint_to_fp_v4i32(<4 x float> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr <4 x i32>, <4 x i32> addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr <4 x float>, <4 x float> addrspace(1)* %out, i32 %tid
+ %value = load <4 x i32>, <4 x i32> addrspace(1)* %in.gep
+ %result = uitofp <4 x i32> %value to <4 x float>
+ store <4 x float> %result, <4 x float> addrspace(1)* %out.gep
ret void
}
-; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32:
+; FUNC-LABEL: {{^}}s_uint_to_fp_i1_to_f32:
; SI: v_cmp_eq_i32_e64 [[CMP:s\[[0-9]+:[0-9]\]]],
; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0, [[CMP]]
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define void @uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) {
+define void @s_uint_to_fp_i1_to_f32(float addrspace(1)* %out, i32 %in) #0 {
%cmp = icmp eq i32 %in, 0
%fp = uitofp i1 %cmp to float
- store float %fp, float addrspace(1)* %out, align 4
+ store float %fp, float addrspace(1)* %out
ret void
}
-; FUNC-LABEL: {{^}}uint_to_fp_i1_to_f32_load:
+; FUNC-LABEL: {{^}}s_uint_to_fp_i1_to_f32_load:
; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
; SI: buffer_store_dword [[RESULT]],
; SI: s_endpgm
-define void @uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) {
+define void @s_uint_to_fp_i1_to_f32_load(float addrspace(1)* %out, i1 %in) #0 {
%fp = uitofp i1 %in to float
- store float %fp, float addrspace(1)* %out, align 4
+ store float %fp, float addrspace(1)* %out
+ ret void
+}
+
+; FUNC-LABEL: {{^}}v_uint_to_fp_i1_f32_load:
+; SI: {{buffer|flat}}_load_ubyte
+; SI: v_and_b32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}}
+; SI: v_cmp_eq_i32
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1.0
+; SI: {{buffer|flat}}_store_dword [[RESULT]],
+; SI: s_endpgm
+define void @v_uint_to_fp_i1_f32_load(float addrspace(1)* %out, i1 addrspace(1)* %in) #0 {
+ %tid = call i32 @llvm.r600.read.tidig.x()
+ %in.gep = getelementptr i1, i1 addrspace(1)* %in, i32 %tid
+ %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
+ %val = load i1, i1 addrspace(1)* %in.gep
+ %fp = uitofp i1 %val to float
+ store float %fp, float addrspace(1)* %out.gep
ret void
}
+
+; FIXME: Repeated here to test r600
+; FUNC-LABEL: {{^}}s_uint_to_fp_i64_to_f32:
+; R600: FFBH_UINT
+; R600: FFBH_UINT
+; R600: CNDE_INT
+; R600: CNDE_INT
+
+; R600-DAG: SETGT_UINT
+; R600-DAG: SETGT_UINT
+; R600-DAG: SETE_INT
+
+define void @s_uint_to_fp_i64_to_f32(float addrspace(1)* %out, i64 %in) #0 {
+entry:
+ %cvt = uitofp i64 %in to float
+ store float %cvt, float addrspace(1)* %out
+ ret void
+}
+
+declare i32 @llvm.r600.read.tidig.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/ARM/bit-reverse-to-rbit.ll b/test/CodeGen/ARM/bit-reverse-to-rbit.ll
new file mode 100644
index 000000000000..8482cbf69f2e
--- /dev/null
+++ b/test/CodeGen/ARM/bit-reverse-to-rbit.ll
@@ -0,0 +1,34 @@
+;RUN: opt -instcombine -S < %s | llc -mtriple=armv5e--linux-gnueabi | FileCheck %s
+;RUN: opt -instcombine -S < %s | llc -mtriple=thumbv4t--linux-gnueabi | FileCheck %s
+;RUN: opt -instcombine -S < %s | llc -mtriple=armv6--linux-gnueabi | FileCheck %s
+
+;RUN: opt -instcombine -S < %s | llc -mtriple=armv7--linux-gnueabi | FileCheck %s --check-prefix=RBIT
+;RUN: opt -instcombine -S < %s | llc -mtriple=thumbv8--linux-gnueabi | FileCheck %s --check-prefix=RBIT
+
+;CHECK-NOT: rbit
+;RBIT: rbit
+
+define void @byte_reversal(i8* %p, i32 %n) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp ult i32 %i.0, %n
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %0 = sext i32 %i.0 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %0
+ %1 = load i8, i8* %arrayidx, align 1
+ %or19 = call i8 @llvm.bitreverse.i8(i8 %1)
+ store i8 %or19, i8* %arrayidx, align 1
+ %inc = add i32 %i.0, 1
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; Function Attrs: nounwind readnone
+declare i8 @llvm.bitreverse.i8(i8)
diff --git a/test/CodeGen/ARM/cxx-tlscc.ll b/test/CodeGen/ARM/cxx-tlscc.ll
new file mode 100644
index 000000000000..7b776d4b8e88
--- /dev/null
+++ b/test/CodeGen/ARM/cxx-tlscc.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=armv7k-apple-watchos2.0 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7k-apple-watchos2.0 -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s
+; RUN: llc < %s -mtriple=armv7-apple-ios8.0 | FileCheck %s
+; RUN: llc < %s -mtriple=armv7-apple-ios8.0 -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s
+
+%struct.S = type { i8 }
+
+@sg = internal thread_local global %struct.S zeroinitializer, align 1
+@__dso_handle = external global i8
+@__tls_guard = internal thread_local unnamed_addr global i1 false
+
+declare %struct.S* @_ZN1SC1Ev(%struct.S* returned)
+declare %struct.S* @_ZN1SD1Ev(%struct.S* returned)
+declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
+
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
+ %.b.i = load i1, i1* @__tls_guard, align 1
+ br i1 %.b.i, label %__tls_init.exit, label %init.i
+
+init.i:
+ store i1 true, i1* @__tls_guard, align 1
+ %call.i.i = tail call %struct.S* @_ZN1SC1Ev(%struct.S* nonnull @sg)
+ %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (%struct.S* (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle)
+ br label %__tls_init.exit
+
+__tls_init.exit:
+ ret %struct.S* @sg
+}
+
+; CHECK-LABEL: _ZTW2sg
+; CHECK: push {lr}
+; CHECK-NOT: push {r1, r2, r3, r4, r7, lr}
+; CHECK-NOT: push {r9, r12}
+; CHECK-NOT: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+; CHECK-NOT: vpush {d0, d1, d2, d3, d4, d5, d6, d7}
+; CHECK: blx
+; CHECK: bne [[BB_end:.?LBB0_[0-9]+]]
+; CHECK; blx
+; CHECK: tlv_atexit
+; CHECK: [[BB_end]]:
+; CHECK: blx
+; CHECK-NOT: vpop {d0, d1, d2, d3, d4, d5, d6, d7}
+; CHECK-NOT: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31}
+; CHECK-NOT: pop {r9, r12}
+; CHECK-NOT: pop {r1, r2, r3, r4, r7, pc}
+; CHECK: pop {lr}
diff --git a/test/CodeGen/ARM/darwin-tls.ll b/test/CodeGen/ARM/darwin-tls.ll
new file mode 100644
index 000000000000..e19953222020
--- /dev/null
+++ b/test/CodeGen/ARM/darwin-tls.ll
@@ -0,0 +1,165 @@
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - -fast-isel %s | FileCheck %s --check-prefix=T2-MOVT-PIC
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -mattr=+no-movt | FileCheck %s --check-prefix=T2-LIT-PIC
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -relocation-model=static | FileCheck %s --check-prefix=T2-MOVT-STATIC
+; RUN: llc -mtriple=thumbv7s-apple-ios7.0 -o - %s -mattr=+no-movt -relocation-model=static | FileCheck %s --check-prefix=T2-LIT-STATIC
+; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s | FileCheck %s --check-prefix=ARM-MOVT-PIC
+; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -mattr=+no-movt | FileCheck %s --check-prefix=ARM-LIT-PIC
+; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -relocation-model=static | FileCheck %s --check-prefix=ARM-MOVT-STATIC
+; RUN: llc -mtriple=armv7s-apple-ios7.0 -o - %s -mattr=+no-movt -relocation-model=static | FileCheck %s --check-prefix=ARM-LIT-STATIC
+
+
+@local_tls_var = thread_local global i32 0
+@external_tls_var = external thread_local global i32
+
+define i32 @test_local_tls() {
+; T2-MOVT-PIC-LABEL: test_local_tls:
+; T2-MOVT-PIC: movw r0, :lower16:(_local_tls_var-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+4))
+; T2-MOVT-PIC: movt r0, :upper16:(_local_tls_var-([[PCREL_LOC]]+4))
+; T2-MOVT-PIC: [[PCREL_LOC]]:
+; T2-MOVT-PIC-NEXT: add r0, pc
+; T2-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-MOVT-PIC: blx [[TLV_GET_ADDR]]
+; T2-MOVT-PIC: ldr r0, [r0]
+
+; T2-LIT-PIC-LABEL: test_local_tls:
+; T2-LIT-PIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; T2-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
+; T2-LIT-PIC-NEXT: add r0, pc
+; T2-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-LIT-PIC: blx [[TLV_GET_ADDR]]
+; T2-LIT-PIC: ldr r0, [r0]
+; T2-LIT-PIC: [[LOCAL_VAR_ADDR]]:
+; T2-LIT-PIC-NEXT: .long _local_tls_var-([[PCREL_LOC]]+4)
+
+; T2-MOVT-STATIC-LABEL: test_local_tls:
+; T2-MOVT-STATIC: movw r0, :lower16:_local_tls_var
+; T2-MOVT-STATIC: movt r0, :upper16:_local_tls_var
+; T2-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-MOVT-STATIC: blx [[TLV_GET_ADDR]]
+; T2-MOVT-STATIC: ldr r0, [r0]
+
+; T2-LIT-STATIC-LABEL: test_local_tls:
+; T2-LIT-STATIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; T2-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-LIT-STATIC: blx [[TLV_GET_ADDR]]
+; T2-LIT-STATIC: ldr r0, [r0]
+; T2-LIT-STATIC: [[LOCAL_VAR_ADDR]]:
+; T2-LIT-STATIC-NEXT: .long _local_tls_var
+
+; ARM-MOVT-PIC-LABEL: test_local_tls:
+; ARM-MOVT-PIC: movw [[VARPC1:r[0-9]+]], :lower16:(_local_tls_var-([[PCREL_LOC1:LPC[0-9]+_[0-9]+]]+8))
+; ARM-MOVT-PIC: movt [[VARPC1]], :upper16:(_local_tls_var-([[PCREL_LOC1]]+8))
+; ARM-MOVT-PIC: [[PCREL_LOC1]]:
+; ARM-MOVT-PIC: add r0, pc, [[VARPC1]]
+; ARM-MOVT-PIC: movw [[VARPC2:r[0-9]+]], :lower16:(_local_tls_var-([[PCREL_LOC2:LPC[0-9]+_[0-9]+]]+8))
+; ARM-MOVT-PIC: movt [[VARPC2]], :upper16:(_local_tls_var-([[PCREL_LOC2]]+8))
+; ARM-MOVT-PIC: [[PCREL_LOC2]]:
+; ARM-MOVT-PIC-NEXT: ldr [[TLV_GET_ADDR:r[0-9]+]], [pc, [[VARPC2]]]
+; ARM-MOVT-PIC: blx [[TLV_GET_ADDR]]
+; ARM-MOVT-PIC: ldr r0, [r0]
+
+; ARM-LIT-PIC-LABEL: test_local_tls:
+; ARM-LIT-PIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; ARM-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
+; ARM-LIT-PIC-NEXT: add r0, pc
+; ARM-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-LIT-PIC: blx [[TLV_GET_ADDR]]
+; ARM-LIT-PIC: ldr r0, [r0]
+; ARM-LIT-PIC: [[LOCAL_VAR_ADDR]]:
+; ARM-LIT-PIC-NEXT: .long _local_tls_var-([[PCREL_LOC]]+8)
+
+; ARM-MOVT-STATIC-LABEL: test_local_tls:
+; ARM-MOVT-STATIC: movw r0, :lower16:_local_tls_var
+; ARM-MOVT-STATIC: movt r0, :upper16:_local_tls_var
+; ARM-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-MOVT-STATIC: blx [[TLV_GET_ADDR]]
+; ARM-MOVT-STATIC: ldr r0, [r0]
+
+; ARM-LIT-STATIC-LABEL: test_local_tls:
+; ARM-LIT-STATIC: ldr r0, [[LOCAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; ARM-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-LIT-STATIC: blx [[TLV_GET_ADDR]]
+; ARM-LIT-STATIC: ldr r0, [r0]
+; ARM-LIT-STATIC: [[LOCAL_VAR_ADDR]]:
+; ARM-LIT-STATIC-NEXT: .long _local_tls_var
+
+
+ %val = load i32, i32* @local_tls_var, align 4
+ ret i32 %val
+}
+
+define i32 @test_external_tls() {
+; T2-MOVT-PIC-LABEL: test_external_tls:
+; T2-MOVT-PIC: movw r[[EXTGOT:[0-9]+]], :lower16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+4))
+; T2-MOVT-PIC: movt r[[EXTGOT]], :upper16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+4))
+; T2-MOVT-PIC: [[PCREL_LOC]]:
+; T2-MOVT-PIC-NEXT: add r[[EXTGOT]], pc
+; T2-MOVT-PIC: ldr r0, [r[[EXTGOT]]]
+; T2-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-MOVT-PIC: blx [[TLV_GET_ADDR]]
+; T2-MOVT-PIC: ldr r0, [r0]
+
+; T2-LIT-PIC-LABEL: test_external_tls:
+; T2-LIT-PIC: ldr r[[EXTGOT:[0-9]+]], [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; T2-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
+; T2-LIT-PIC-NEXT: add r[[EXTGOT]], pc
+; T2-LIT-PIC: ldr r0, [r[[EXTGOT]]]
+; T2-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-LIT-PIC: blx [[TLV_GET_ADDR]]
+; T2-LIT-PIC: ldr r0, [r0]
+; T2-LIT-PIC: [[EXTERNAL_VAR_ADDR]]:
+; T2-LIT-PIC-NEXT: .long L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+4)
+
+; T2-MOVT-STATIC-LABEL: test_external_tls:
+; T2-MOVT-STATIC: movw r0, :lower16:_external_tls_var
+; T2-MOVT-STATIC: movt r0, :upper16:_external_tls_var
+; T2-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-MOVT-STATIC: blx [[TLV_GET_ADDR]]
+; T2-MOVT-STATIC: ldr r0, [r0]
+
+; T2-LIT-STATIC-LABEL: test_external_tls:
+; T2-LIT-STATIC: ldr r0, [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; T2-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; T2-LIT-STATIC: blx [[TLV_GET_ADDR]]
+; T2-LIT-STATIC: ldr r0, [r0]
+; T2-LIT-STATIC: [[EXTERNAL_VAR_ADDR]]:
+; T2-LIT-STATIC-NEXT: .long _external_tls_var
+
+; ARM-MOVT-PIC-LABEL: test_external_tls:
+; ARM-MOVT-PIC: movw r[[EXTGOT:[0-9]+]], :lower16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC:LPC[0-9]+_[0-9]+]]+8))
+; ARM-MOVT-PIC: movt r[[EXTGOT]], :upper16:(L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+8))
+; ARM-MOVT-PIC: [[PCREL_LOC]]:
+; ARM-MOVT-PIC-NEXT: ldr r0, [pc, r[[EXTGOT]]]
+; ARM-MOVT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-MOVT-PIC: blx [[TLV_GET_ADDR]]
+; ARM-MOVT-PIC: ldr r0, [r0]
+
+; ARM-LIT-PIC-LABEL: test_external_tls:
+; ARM-LIT-PIC: ldr r[[EXTGOT:[0-9]+]], [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; ARM-LIT-PIC: [[PCREL_LOC:LPC[0-9]+_[0-9]+]]:
+; ARM-LIT-PIC-NEXT: add r[[EXTGOT]], pc
+; ARM-LIT-PIC: ldr r0, [r[[EXTGOT]]]
+; ARM-LIT-PIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-LIT-PIC: blx [[TLV_GET_ADDR]]
+; ARM-LIT-PIC: ldr r0, [r0]
+; ARM-LIT-PIC: [[EXTERNAL_VAR_ADDR]]:
+; ARM-LIT-PIC-NEXT: .long L_external_tls_var$non_lazy_ptr-([[PCREL_LOC]]+8)
+
+; ARM-MOVT-STATIC-LABEL: test_external_tls:
+; ARM-MOVT-STATIC: movw r0, :lower16:_external_tls_var
+; ARM-MOVT-STATIC: movt r0, :upper16:_external_tls_var
+; ARM-MOVT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-MOVT-STATIC: blx [[TLV_GET_ADDR]]
+; ARM-MOVT-STATIC: ldr r0, [r0]
+
+; ARM-LIT-STATIC-LABEL: test_external_tls:
+; ARM-LIT-STATIC: ldr r0, [[EXTERNAL_VAR_ADDR:LCPI[0-9]+_[0-9]+]]
+; ARM-LIT-STATIC: ldr [[TLV_GET_ADDR:r[0-9]+]], [r0]
+; ARM-LIT-STATIC: blx [[TLV_GET_ADDR]]
+; ARM-LIT-STATIC: ldr r0, [r0]
+; ARM-LIT-STATIC: [[EXTERNAL_VAR_ADDR]]:
+; ARM-LIT-STATIC-NEXT: .long _external_tls_var
+
+ %val = load i32, i32* @external_tls_var, align 4
+ ret i32 %val
+}
diff --git a/test/CodeGen/ARM/fabs-to-bfc.ll b/test/CodeGen/ARM/fabs-to-bfc.ll
new file mode 100644
index 000000000000..1a2e04584a91
--- /dev/null
+++ b/test/CodeGen/ARM/fabs-to-bfc.ll
@@ -0,0 +1,14 @@
+; RUN: llc < %s -mtriple=armv5e-none-linux-gnueabi -mattr=+vfp2 | FileCheck %s -check-prefix=CHECK-VABS
+; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi -mattr=+vfp3 | FileCheck %s -check-prefix=CHECK-BFC
+
+
+define double @test(double %tx) {
+;CHECK-LABEL: test:
+ %call = tail call double @fabs(double %tx)
+ ret double %call
+;CHECK-VABS: vabs.f64
+;CHECK-BFC: bfc
+}
+
+declare double @fabs(double) readnone
+
diff --git a/test/CodeGen/ARM/fp16-args.ll b/test/CodeGen/ARM/fp16-args.ll
index 31a20f85483b..708fae7f9ffa 100644
--- a/test/CodeGen/ARM/fp16-args.ll
+++ b/test/CodeGen/ARM/fp16-args.ll
@@ -32,9 +32,10 @@ entry:
; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s1
; HARD: vcvtb.f32.f16 {{s[0-9]+}}, s0
; HARD: vadd.f32 {{s[0-9]+}}, {{s[0-9]+}}, {{s[0-9]+}}
-; HARD: vcvtb.f16.f32 s0, {{s[0-9]+}}
-; HARD-NOT: vmov
-; HARD-NOT: uxth
+; HARD: vcvtb.f16.f32 [[SREG:s[0-9]+]], {{s[0-9]+}}
+; HARD-NEXT: vmov [[REG0:r[0-9]+]], [[SREG]]
+; HARD-NEXT: uxth [[REG1:r[0-9]+]], [[REG0]]
+; HARD-NEXT: vmov s0, [[REG1]]
; CHECK: bx lr
}
diff --git a/test/CodeGen/ARM/fp16-v3.ll b/test/CodeGen/ARM/fp16-v3.ll
new file mode 100644
index 000000000000..6ed9c9d22c9d
--- /dev/null
+++ b/test/CodeGen/ARM/fp16-v3.ll
@@ -0,0 +1,28 @@
+; RUN: llc -mattr=+fp16 < %s | FileCheck %s --check-prefix=CHECK
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7a--none-eabi"
+
+; CHECK-LABEL: test_vec3:
+; CHECK: vcvtb.f32.f16
+; CHECK: vcvt.f32.s32
+; CHECK: vadd.f32
+; CHECK-NEXT: vcvtb.f16.f32 [[SREG:s[0-9]+]], {{.*}}
+; CHECK-NEXT: vmov [[RREG1:r[0-9]+]], [[SREG]]
+; CHECK-NEXT: uxth [[RREG2:r[0-9]+]], [[RREG1]]
+; CHECK-NEXT: pkhbt [[RREG3:r[0-9]+]], [[RREG1]], [[RREG1]], lsl #16
+; CHECK-DAG: strh [[RREG1]], [r0, #4]
+; CHECK-DAG: vmov [[DREG:d[0-9]+]], [[RREG3]], [[RREG2]]
+; CHECK-DAG: vst1.32 {[[DREG]][0]}, [r0:32]
+; CHECK-NEXT: bx lr
+define void @test_vec3(<3 x half>* %arr, i32 %i) #0 {
+ %H = sitofp i32 %i to half
+ %S = fadd half %H, 0xH4A00
+ %1 = insertelement <3 x half> undef, half %S, i32 0
+ %2 = insertelement <3 x half> %1, half %S, i32 1
+ %3 = insertelement <3 x half> %2, half %S, i32 2
+ store <3 x half> %3, <3 x half>* %arr, align 8
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/CodeGen/ARM/inlineasm-imm-thumb.ll b/test/CodeGen/ARM/inlineasm-imm-thumb.ll
new file mode 100644
index 000000000000..80be870743f5
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-imm-thumb.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=thumbv5-none-linux-gnueabi -no-integrated-as %s -o /dev/null
+
+; Test thumb-mode "I" constraint, for any Data Processing immediate.
+define void @testI() {
+ tail call void asm sideeffect ".word $0", "I"( i32 255 ) nounwind
+ ret void
+}
+
+; Test thumb-mode "J" constraint, for compatibility with unknown use in GCC.
+define void @testJ() {
+ tail call void asm sideeffect ".word $0", "J"( i32 -254 ) nounwind
+ ret void
+}
+
+; Test thumb-mode "L" constraint, for negated Data Processing immediates.
+define void @testL() {
+ tail call void asm sideeffect ".word $0", "L"( i32 -7 ) nounwind
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/inlineasm-imm-thumb2.ll b/test/CodeGen/ARM/inlineasm-imm-thumb2.ll
new file mode 100644
index 000000000000..c54f3b8aa5f9
--- /dev/null
+++ b/test/CodeGen/ARM/inlineasm-imm-thumb2.ll
@@ -0,0 +1,31 @@
+; RUN: llc -mtriple=thumbv7-linux-gnu -no-integrated-as %s -o /dev/null
+
+; Test thumb2-mode "I" constraint, for any Data Processing immediate.
+define i32 @testI(i32 %x) {
+ %y = call i32 asm "add $0, $1, $2", "=r,r,I"( i32 %x, i32 65280 ) nounwind
+ ret i32 %y
+}
+
+; Test thumb2-mode "J" constraint, for compatibility with unknown use in GCC.
+define void @testJ() {
+ tail call void asm sideeffect ".word $0", "J"( i32 4080 ) nounwind
+ ret void
+}
+
+; Test thumb2-mode "K" constraint, for bitwise inverted Data Processing immediates.
+define void @testK() {
+ tail call void asm sideeffect ".word $0", "K"( i32 16777215 ) nounwind
+ ret void
+}
+
+; Test thumb2-mode "L" constraint, for negated Data Processing immediates.
+define void @testL() {
+ tail call void asm sideeffect ".word $0", "L"( i32 -65280 ) nounwind
+ ret void
+}
+
+; Test thumb2-mode "M" constraint, for value between 0 and 32.
+define i32 @testM(i32 %x) {
+ %y = call i32 asm "lsl $0, $1, $2", "=r,r,M"( i32 %x, i32 31 ) nounwind
+ ret i32 %y
+}
diff --git a/test/CodeGen/ARM/zero-cycle-zero.ll b/test/CodeGen/ARM/zero-cycle-zero.ll
index 121a87f5b84d..4e8696f4418a 100644
--- a/test/CodeGen/ARM/zero-cycle-zero.ll
+++ b/test/CodeGen/ARM/zero-cycle-zero.ll
@@ -1,26 +1,19 @@
-; RUN: llc -mtriple=armv8 -mcpu=cyclone < %s | FileCheck %s --check-prefix=CHECK-CYCLONE
-; RUN: llc -mtriple=armv8 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK-SWIFT
+; RUN: llc -mtriple=armv8 -mcpu=cyclone < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTSWIFT
+; RUN: llc -mtriple=armv8 -mcpu=swift < %s | FileCheck %s --check-prefix=CHECK
+; RUN: llc -mtriple=armv8 -mcpu=cortex-a57 < %s | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-NOTSWIFT
declare arm_aapcs_vfpcc void @take_vec64(<2 x i32>)
define void @test_vec64() {
-; CHECK-CYCLONE-LABEL: test_vec64:
-; CHECK-SWIFT-LABEL: test_vec64:
+; CHECK-LABEL: test_vec64:
call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
call arm_aapcs_vfpcc void @take_vec64(<2 x i32> <i32 0, i32 0>)
-; CHECK-CYCLONE-NOT: vmov.f64 d0,
-; CHECK-CYCLONE: vmov.i32 d0, #0
-; CHECK-CYCLONE: bl
-; CHECK-CYCLONE: vmov.i32 d0, #0
-; CHECK-CYCLONE: bl
-
-; CHECK-SWIFT: vmov.f64 [[ZEROREG:d[0-9]+]],
-; CHECK-SWIFT: vmov.i32 [[ZEROREG]], #0
-; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
-; CHECK-SWIFT: bl
-; CHECK-SWIFT: vorr d0, [[ZEROREG]], [[ZEROREG]]
-; CHECK-SWIFT: bl
+; CHECK-NOTSWIFT-NOT: vmov.f64 d0,
+; CHECK: vmov.i32 d0, #0
+; CHECK: bl
+; CHECK: vmov.i32 d0, #0
+; CHECK: bl
ret void
}
@@ -28,23 +21,15 @@ define void @test_vec64() {
declare arm_aapcs_vfpcc void @take_vec128(<8 x i16>)
define void @test_vec128() {
-; CHECK-CYCLONE-LABEL: test_vec128:
-; CHECK-SWIFT-LABEL: test_vec128:
+; CHECK-LABEL: test_vec128:
call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
call arm_aapcs_vfpcc void @take_vec128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
-; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
-; CHECK-CYCLONE: vmov.i32 q0, #0
-; CHECK-CYCLONE: bl
-; CHECK-CYCLONE: vmov.i32 q0, #0
-; CHECK-CYCLONE: bl
-
-; CHECK-SWIFT-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
-; CHECK-SWIFT: vmov.i32 [[ZEROREG:q[0-9]+]], #0
-; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
-; CHECK-SWIFT: bl
-; CHECK-SWIFT: vorr q0, [[ZEROREG]], [[ZEROREG]]
-; CHECK-SWIFT: bl
+; CHECK-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK: vmov.i32 q0, #0
+; CHECK: bl
+; CHECK: vmov.i32 q0, #0
+; CHECK: bl
ret void
}
@@ -52,16 +37,15 @@ define void @test_vec128() {
declare void @take_i32(i32)
define void @test_i32() {
-; CHECK-CYCLONE-LABEL: test_i32:
-; CHECK-SWIFT-LABEL: test_i32:
+; CHECK-LABEL: test_i32:
call arm_aapcs_vfpcc void @take_i32(i32 0)
call arm_aapcs_vfpcc void @take_i32(i32 0)
-; CHECK-CYCLONE-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
-; CHECK-CYCLONE: mov r0, #0
-; CHECK-CYCLONE: bl
-; CHECK-CYCLONE: mov r0, #0
-; CHECK-CYCLONE: bl
+; CHECK-NOTSWIFT-NOT: vmov.f64 [[ZEROREG:d[0-9]+]],
+; CHECK: mov r0, #0
+; CHECK: bl
+; CHECK: mov r0, #0
+; CHECK: bl
; It doesn't particularly matter what Swift does here, there isn't carefully
; crafted behaviour that we might break in Cyclone.
diff --git a/test/CodeGen/Hexagon/bit-phi.ll b/test/CodeGen/Hexagon/bit-phi.ll
new file mode 100644
index 000000000000..86b18d8bf256
--- /dev/null
+++ b/test/CodeGen/Hexagon/bit-phi.ll
@@ -0,0 +1,58 @@
+; RUN: llc -march=hexagon < %s
+; REQUIRES: asserts
+
+target datalayout = "e-m:e-p:32:32-i1:32-i64:64-a:0-v32:32-n16:32"
+target triple = "hexagon-unknown--elf"
+
+%struct.item = type { i32, i8*, i8*, i32, i8, i8, i16, i32, i8, i16, i32 }
+
+declare %struct.item* @foo(%struct.item*, i8*, i32) #1
+
+; Function Attrs: nounwind
+define i32 @bar(%struct.item** %ptr, i8* %buf, i32 %c, i8* %d, i32 %e) #1 {
+entry:
+ br i1 undef, label %return, label %if.end
+
+if.end: ; preds = %entry
+ br i1 undef, label %while.cond13.preheader, label %if.end3
+
+if.end3: ; preds = %if.end
+ br label %while.cond13.preheader
+
+while.cond13.preheader: ; preds = %if.end3, %if.end
+ br i1 undef, label %while.body20, label %return
+
+while.body20: ; preds = %if.end38, %while.cond13.preheader
+ %addr.0100 = phi i32 [ undef, %if.end38 ], [ %c, %while.cond13.preheader ]
+ %cond = select i1 undef, i32 %addr.0100, i32 undef
+ br i1 undef, label %while.body20.if.end38_crit_edge, label %if.then32
+
+while.body20.if.end38_crit_edge: ; preds = %while.body20
+ %conv39.pre = and i32 %cond, 65535
+ br label %if.end38
+
+if.then32: ; preds = %while.body20
+ %conv33 = and i32 %cond, 65535
+ %.pre = load %struct.item*, %struct.item** %ptr, align 4, !tbaa !1
+ br label %if.end38
+
+if.end38: ; preds = %if.then32, %while.body20.if.end38_crit_edge
+ %conv39.pre-phi = phi i32 [ %conv39.pre, %while.body20.if.end38_crit_edge ], [ %conv33, %if.then32 ]
+ %0 = phi %struct.item* [ undef, %while.body20.if.end38_crit_edge ], [ %.pre, %if.then32 ]
+ %add = add i32 %conv39.pre-phi, 0
+ %call52 = tail call %struct.item* @foo(%struct.item* %0, i8* %d, i32 %e) #1
+ br i1 undef, label %while.body20, label %return
+
+return: ; preds = %if.end38, %while.cond13.preheader, %entry
+ %retval.0 = phi i32 [ 0, %entry ], [ 0, %while.cond13.preheader ], [ %add, %if.end38 ]
+ ret i32 %retval.0
+}
+
+
+attributes #0 = { nounwind readonly }
+attributes #1 = { nounwind }
+
+!1 = !{!2, !2, i64 0}
+!2 = !{!"any pointer", !3, i64 0}
+!3 = !{!"omnipotent char", !4, i64 0}
+!4 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/postinc-offset.ll b/test/CodeGen/Hexagon/postinc-offset.ll
index 5e0f4751f305..cf8c4e5f71d2 100644
--- a/test/CodeGen/Hexagon/postinc-offset.ll
+++ b/test/CodeGen/Hexagon/postinc-offset.ll
@@ -1,4 +1,5 @@
-; RUN: llc -enable-aa-sched-mi -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s
+; RUN: llc -enable-aa-sched-mi -march=hexagon -mcpu=hexagonv5 -rdf-opt=0 \
+; RUN: < %s | FileCheck %s
; CHECK: {
; CHECK: ={{ *}}memd([[REG0:(r[0-9]+)]]{{ *}}++{{ *}}#8)
diff --git a/test/CodeGen/Hexagon/rdf-copy.ll b/test/CodeGen/Hexagon/rdf-copy.ll
new file mode 100644
index 000000000000..96153ca31fa4
--- /dev/null
+++ b/test/CodeGen/Hexagon/rdf-copy.ll
@@ -0,0 +1,54 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; Check that
+; {
+; r1 = r0
+; }
+; {
+; r0 = memw(r1 + #0)
+; }
+; was copy-propagated to
+; {
+; r1 = r0
+; r0 = memw(r0 + #0)
+; }
+;
+; CHECK-LABEL: LBB0_1
+; CHECK: [[DST:r[0-9]+]] = [[SRC:r[0-9]+]]
+; CHECK-DAG: memw([[SRC]]
+; CHECK-DAG-NOT: memw([[DST]]
+; CHECK-LABEL: LBB0_2
+
+target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a0:0-n16:32"
+target triple = "hexagon"
+
+%union.t = type { %struct.t, [64 x i8] }
+%struct.t = type { [12 x i8], %struct.r*, double }
+%struct.r = type opaque
+
+define %union.t* @foo(%union.t* %chain) nounwind readonly {
+entry:
+ %tobool = icmp eq %union.t* %chain, null
+ br i1 %tobool, label %if.end, label %while.cond.preheader
+
+while.cond.preheader: ; preds = %entry
+ br label %while.cond
+
+while.cond: ; preds = %while.cond.preheader, %while.cond
+ %chain.addr.0 = phi %union.t* [ %0, %while.cond ], [ %chain, %while.cond.preheader ]
+ %chain1 = bitcast %union.t* %chain.addr.0 to %union.t**
+ %0 = load %union.t*, %union.t** %chain1, align 4, !tbaa !0
+ %tobool2 = icmp eq %union.t* %0, null
+ br i1 %tobool2, label %if.end.loopexit, label %while.cond
+
+if.end.loopexit: ; preds = %while.cond
+ br label %if.end
+
+if.end: ; preds = %if.end.loopexit, %entry
+ %chain.addr.1 = phi %union.t* [ null, %entry ], [ %chain.addr.0, %if.end.loopexit ]
+ ret %union.t* %chain.addr.1
+}
+
+!0 = !{!"any pointer", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Hexagon/rdf-dead-loop.ll b/test/CodeGen/Hexagon/rdf-dead-loop.ll
new file mode 100644
index 000000000000..3762c79d4f57
--- /dev/null
+++ b/test/CodeGen/Hexagon/rdf-dead-loop.ll
@@ -0,0 +1,31 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+; CHECK-NOT: ={{.*}}add
+; CHECK-NOT: mem{{[bdhwu]}}
+
+define void @main() #0 {
+entry:
+ br label %body
+
+body:
+ %ip_vec30 = phi <2 x i32> [ %ip_vec, %body ], [ zeroinitializer, %entry ]
+ %scevgep.phi = phi i32* [ %scevgep.inc, %body ], [ undef, %entry ]
+ %polly.indvar = phi i32 [ %polly.indvar_next, %body ], [ 0, %entry ]
+ %vector_ptr = bitcast i32* %scevgep.phi to <2 x i32>*
+ %_p_vec_full = load <2 x i32>, <2 x i32>* %vector_ptr, align 8
+ %ip_vec = add <2 x i32> %_p_vec_full, %ip_vec30
+ %polly.indvar_next = add nsw i32 %polly.indvar, 2
+ %polly.loop_cond = icmp slt i32 %polly.indvar, 4
+ %scevgep.inc = getelementptr i32, i32* %scevgep.phi, i32 2
+ br i1 %polly.loop_cond, label %body, label %exit
+
+exit:
+ %0 = extractelement <2 x i32> %ip_vec, i32 1
+ ret void
+
+}
+
+attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!0 = !{!"int", !1}
+!1 = !{!"omnipotent char", !2}
+!2 = !{!"Simple C/C++ TBAA"}
diff --git a/test/CodeGen/Mips/llvm-ir/call.ll b/test/CodeGen/Mips/llvm-ir/call.ll
index 22a44da0b069..a4b03405f72b 100644
--- a/test/CodeGen/Mips/llvm-ir/call.ll
+++ b/test/CodeGen/Mips/llvm-ir/call.ll
@@ -182,3 +182,18 @@ define hidden void @thunk_undef_double(i32 %this, double %volume) unnamed_addr a
tail call void @undef_double(i32 undef, double undef) #8
ret void
}
+
+; Check that immediate addresses do not use jal.
+define i32 @jal_only_allows_symbols() {
+; ALL-LABEL: jal_only_allows_symbols:
+
+; ALL-NOT: {{jal }}
+; ALL: addiu $[[TGT:[0-9]+]], $zero, 1234
+; ALL-NOT: {{jal }}
+; ALL: jalr $[[TGT]]
+; ALL-NOT: {{jal }}
+
+ call void () inttoptr (i32 1234 to void ()*)()
+ ret i32 0
+}
+
diff --git a/test/CodeGen/Mips/madd-msub.ll b/test/CodeGen/Mips/madd-msub.ll
index b84d94d31494..667676de5f33 100644
--- a/test/CodeGen/Mips/madd-msub.ll
+++ b/test/CodeGen/Mips/madd-msub.ll
@@ -18,7 +18,7 @@
; 32-DAG: [[m]]flo $3
; DSP-DAG: sra $[[T0:[0-9]+]], $6, 31
-; DSP-DAG: mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG: mtlo $6, $[[AC:ac[0-3]+]]
; DSP-DAG: madd $[[AC]], ${{[45]}}, ${{[45]}}
; DSP-DAG: mfhi $2, $[[AC]]
; DSP-DAG: mflo $3, $[[AC]]
@@ -64,7 +64,7 @@ entry:
; 32-DAG: [[m]]flo $3
; DSP-DAG: addiu $[[T0:[0-9]+]], $zero, 0
-; DSP-DAG: mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG: mtlo $6, $[[AC:ac[0-3]+]]
; DSP-DAG: maddu $[[AC]], ${{[45]}}, ${{[45]}}
; DSP-DAG: mfhi $2, $[[AC]]
; DSP-DAG: mflo $3, $[[AC]]
@@ -101,8 +101,8 @@ entry:
; 32-DAG: [[m]]fhi $2
; 32-DAG: [[m]]flo $3
-; DSP-DAG: mthi $[[AC:ac[0-3]+]], $6
-; DSP-DAG: mtlo $[[AC]], $7
+; DSP-DAG: mthi $6, $[[AC:ac[0-3]+]]
+; DSP-DAG: mtlo $7, $[[AC]]
; DSP-DAG: madd $[[AC]], ${{[45]}}, ${{[45]}}
; DSP-DAG: mfhi $2, $[[AC]]
; DSP-DAG: mflo $3, $[[AC]]
@@ -143,7 +143,7 @@ entry:
; 32-DAG: [[m]]flo $3
; DSP-DAG: sra $[[T0:[0-9]+]], $6, 31
-; DSP-DAG: mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG: mtlo $6, $[[AC:ac[0-3]+]]
; DSP-DAG: msub $[[AC]], ${{[45]}}, ${{[45]}}
; DSP-DAG: mfhi $2, $[[AC]]
; DSP-DAG: mflo $3, $[[AC]]
@@ -189,7 +189,7 @@ entry:
; 32-DAG: [[m]]flo $3
; DSP-DAG: addiu $[[T0:[0-9]+]], $zero, 0
-; DSP-DAG: mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG: mtlo $6, $[[AC:ac[0-3]+]]
; DSP-DAG: msubu $[[AC]], ${{[45]}}, ${{[45]}}
; DSP-DAG: mfhi $2, $[[AC]]
; DSP-DAG: mflo $3, $[[AC]]
@@ -229,7 +229,7 @@ entry:
; 32-DAG: [[m]]flo $3
; DSP-DAG: addiu $[[T0:[0-9]+]], $zero, 0
-; DSP-DAG: mtlo $[[AC:ac[0-3]+]], $6
+; DSP-DAG: mtlo $6, $[[AC:ac[0-3]+]]
; DSP-DAG: msub $[[AC]], ${{[45]}}, ${{[45]}}
; DSP-DAG: mfhi $2, $[[AC]]
; DSP-DAG: mflo $3, $[[AC]]
diff --git a/test/CodeGen/PowerPC/2016-01-07-BranchWeightCrash.ll b/test/CodeGen/PowerPC/2016-01-07-BranchWeightCrash.ll
new file mode 100644
index 000000000000..65dff12f3115
--- /dev/null
+++ b/test/CodeGen/PowerPC/2016-01-07-BranchWeightCrash.ll
@@ -0,0 +1,35 @@
+; RUN: llc <%s | FileCheck %s
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+%struct.buffer_t = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [2 x i8] }
+
+declare i32 @__f1(i8*, %struct.buffer_t* noalias)
+
+; CHECK-LABEL: f1:
+define i32 @f1(i8* %__user_context, %struct.buffer_t* noalias %f1.buffer) {
+entry:
+ br i1 undef, label %"assert succeeded", label %"assert failed", !prof !1
+
+"assert failed": ; preds = %entry
+ br label %destructor_block
+
+"assert succeeded": ; preds = %entry
+ %__f1_result = call i32 @__f1(i8* %__user_context, %struct.buffer_t* %f1.buffer) #5
+ %0 = icmp eq i32 %__f1_result, 0
+ br i1 %0, label %"assert succeeded11", label %"assert failed10", !prof !1
+
+destructor_block: ; preds = %"assert succeeded11", %"assert failed10", %"assert failed"
+ %1 = phi i32 [ undef, %"assert failed" ], [ %__f1_result, %"assert failed10" ], [ 0, %"assert succeeded11" ]
+ ret i32 %1
+
+"assert failed10": ; preds = %"assert succeeded"
+ br label %destructor_block
+
+"assert succeeded11": ; preds = %"assert succeeded"
+ br label %destructor_block
+}
+
+attributes #5 = { nounwind }
+
+!1 = !{!"branch_weights", i32 1073741824, i32 0}
diff --git a/test/CodeGen/PowerPC/ppc64le-localentry-large.ll b/test/CodeGen/PowerPC/ppc64le-localentry-large.ll
new file mode 100644
index 000000000000..4cf1b5a233d5
--- /dev/null
+++ b/test/CodeGen/PowerPC/ppc64le-localentry-large.ll
@@ -0,0 +1,27 @@
+; RUN: llc -march=ppc64le -code-model=large < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+target triple = "powerpc64le-unknown-linux-gnu"
+
+@number64 = global i64 10, align 8
+
+; CHECK: .abiversion 2
+
+define i64 @use_toc(i64 %a) nounwind {
+entry:
+; CHECK: .Lfunc_toc[[FN:[0-9]+]]:
+; CHECK-NEXT: .quad .TOC.-.Lfunc_gep[[FN]]
+; CHECK: use_toc:
+; CHECK-NEXT: .L{{.*}}:
+; CHECK-NEXT: .Lfunc_gep[[FN]]:
+; CHECK-NEXT: ld 2, .Lfunc_toc[[FN]]-.Lfunc_gep[[FN]](12)
+; CHECK-NEXT: add 2, 2, 12
+; CHECK-NEXT: .Lfunc_lep[[FN]]:
+; CHECK-NEXT: .localentry use_toc, .Lfunc_lep[[FN]]-.Lfunc_gep[[FN]]
+; CHECK-NEXT: %entry
+ %0 = load i64, i64* @number64, align 8
+ %cmp = icmp eq i64 %0, %a
+ %conv1 = zext i1 %cmp to i64
+ ret i64 %conv1
+}
+
diff --git a/test/CodeGen/PowerPC/ppc64le-localentry.ll b/test/CodeGen/PowerPC/ppc64le-localentry.ll
index be64f1151769..45cae6b17bc2 100644
--- a/test/CodeGen/PowerPC/ppc64le-localentry.ll
+++ b/test/CodeGen/PowerPC/ppc64le-localentry.ll
@@ -17,11 +17,11 @@ define i64 @use_toc(i64 %a) nounwind {
entry:
; CHECK-LABEL: @use_toc
; CHECK-NEXT: .L{{.*}}:
-; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]:
-; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha
-; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l
-; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]:
-; CHECK-NEXT: .localentry use_toc, .Ltmp[[TMP2]]-.Ltmp[[TMP1]]
+; CHECK-NEXT: .Lfunc_gep[[FN:[0-9]+]]:
+; CHECK-NEXT: addis 2, 12, .TOC.-.Lfunc_gep[[FN]]@ha
+; CHECK-NEXT: addi 2, 2, .TOC.-.Lfunc_gep[[FN]]@l
+; CHECK-NEXT: .Lfunc_lep[[FN]]:
+; CHECK-NEXT: .localentry use_toc, .Lfunc_lep[[FN]]-.Lfunc_gep[[FN]]
; CHECK-NEXT: %entry
%0 = load i64, i64* @number64, align 8
%cmp = icmp eq i64 %0, %a
@@ -34,11 +34,11 @@ define void @use_toc_implicit() nounwind {
entry:
; CHECK-LABEL: @use_toc_implicit
; CHECK-NEXT: .L{{.*}}:
-; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]:
-; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha
-; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l
-; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]:
-; CHECK-NEXT: .localentry use_toc_implicit, .Ltmp[[TMP2]]-.Ltmp[[TMP1]]
+; CHECK-NEXT: .Lfunc_gep[[FN:[0-9]+]]:
+; CHECK-NEXT: addis 2, 12, .TOC.-.Lfunc_gep[[FN]]@ha
+; CHECK-NEXT: addi 2, 2, .TOC.-.Lfunc_gep[[FN]]@l
+; CHECK-NEXT: .Lfunc_lep[[FN]]:
+; CHECK-NEXT: .localentry use_toc_implicit, .Lfunc_lep[[FN]]-.Lfunc_gep[[FN]]
; CHECK-NEXT: %entry
call void @callee()
ret void
diff --git a/test/CodeGen/PowerPC/pr25802.ll b/test/CodeGen/PowerPC/pr25802.ll
new file mode 100644
index 000000000000..0631850be5fa
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr25802.ll
@@ -0,0 +1,52 @@
+; RUN: llc < %s | FileCheck %s
+; CHECK: .long .Ltmp6-.Ltmp12 # Call between .Ltmp12 and .Ltmp6
+
+; We used to crash in filetype=obj when computing a negative value.
+; RUN: llc -filetype=obj < %s
+
+target triple = "powerpc--netbsd"
+@_ZTI1I = external constant { i8*, i8* }
+define void @f(i8 %foo, i32 %bar) personality i8* bitcast (void ()* @g to i8*) {
+ invoke void @g()
+ to label %try.cont unwind label %lpad
+lpad: ; preds = %0
+ %tmp = landingpad { i8*, i32 }
+ catch i8* bitcast ({ i8*, i8* }* @_ZTI1I to i8*)
+ br i1 undef, label %catch10, label %catch
+catch10: ; preds = %lpad
+ %tmp8 = load i32, i32* undef, align 4
+ %conv.i.i = zext i8 %foo to i32
+ %cond.i.i = select i1 undef, i32 %conv.i.i, i32 %tmp8
+ invoke void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32 %cond.i.i)
+ to label %invoke.cont20 unwind label %lpad15
+invoke.cont20: ; preds = %catch10
+ ret void
+try.cont: ; preds = %0
+ ret void
+catch: ; preds = %lpad
+ %tmp14 = load i32, i32* undef, align 4
+ %conv.i.i34 = zext i8 %foo to i32
+ %cond.i.i35 = select i1 undef, i32 %conv.i.i34, i32 %tmp14
+ invoke void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32 %cond.i.i35)
+ to label %invoke.cont8 unwind label %lpad3
+invoke.cont8: ; preds = %call2.i.i.noexc36
+ ret void
+lpad3: ; preds = %call2.i.i.noexc36, %catch
+ %tmp16 = landingpad { i8*, i32 }
+ cleanup
+ invoke void @g()
+ to label %eh.resume unwind label %terminate.lpad
+lpad15: ; preds = %catch10
+ %tmp19 = landingpad { i8*, i32 }
+ cleanup
+ invoke void @g()
+ to label %eh.resume unwind label %terminate.lpad
+eh.resume: ; preds = %lpad15, %lpad3
+ ret void
+terminate.lpad: ; preds = %lpad15, %lpad3
+ %tmp22 = landingpad { i8*, i32 }
+ catch i8* null
+ ret void
+}
+declare void @g()
+declare void @_Z24__put_character_sequenceIccEvR1AIT_T0_Ej(i32)
diff --git a/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll b/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll
new file mode 100644
index 000000000000..400a1f297f00
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls_get_addr_clobbers.ll
@@ -0,0 +1,54 @@
+; RUN: llc -mtriple="powerpc64le-unknown-linux-gnu" -relocation-model=pic < %s | FileCheck %s
+
+@a = thread_local global i32* null, align 8
+
+define void @test_foo(i32* nocapture %x01, i32* nocapture %x02, i32* nocapture %x03, i32* nocapture %x04, i32* nocapture %x05, i32* nocapture %x06, i32* nocapture %x07, i32* nocapture %x08) #0 {
+entry:
+
+; CHECK-LABEL: test_foo:
+; CHECK: stdu 1, {{-?[0-9]+}}(1)
+; CHECK-DAG: mr [[BACKUP_3:[0-9]+]], 3
+; CHECK-DAG: mr [[BACKUP_4:[0-9]+]], 4
+; CHECK-DAG: mr [[BACKUP_5:[0-9]+]], 5
+; CHECK-DAG: mr [[BACKUP_6:[0-9]+]], 6
+; CHECK-DAG: mr [[BACKUP_7:[0-9]+]], 7
+; CHECK-DAG: mr [[BACKUP_8:[0-9]+]], 8
+; CHECK-DAG: mr [[BACKUP_9:[0-9]+]], 9
+; CHECK-DAG: mr [[BACKUP_10:[0-9]+]], 10
+; CHECK-DAG: std [[BACKUP_3]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_4]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_5]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_6]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_7]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_8]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_9]], {{[0-9]+}}(1)
+; CHECK-DAG: std [[BACKUP_10]], {{[0-9]+}}(1)
+; CHECK: bl __tls_get_addr
+; CHECK-DAG: stw 3, 0([[BACKUP_3]])
+; CHECK-DAG: stw 3, 0([[BACKUP_4]])
+; CHECK-DAG: stw 3, 0([[BACKUP_5]])
+; CHECK-DAG: stw 3, 0([[BACKUP_6]])
+; CHECK-DAG: stw 3, 0([[BACKUP_7]])
+; CHECK-DAG: stw 3, 0([[BACKUP_8]])
+; CHECK-DAG: stw 3, 0([[BACKUP_9]])
+; CHECK-DAG: stw 3, 0([[BACKUP_10]])
+; CHECK: blr
+
+ %0 = load i32*, i32** @a, align 8
+ %cmp = icmp eq i32* %0, null
+ br i1 %cmp, label %return, label %if.end
+
+if.end: ; preds = %entry
+ store i32 0, i32* %x01, align 4
+ store i32 0, i32* %x02, align 4
+ store i32 0, i32* %x03, align 4
+ store i32 0, i32* %x04, align 4
+ store i32 0, i32* %x05, align 4
+ store i32 0, i32* %x06, align 4
+ store i32 0, i32* %x07, align 4
+ store i32 0, i32* %x08, align 4
+ br label %return
+
+return: ; preds = %entry, %if.end
+ ret void
+}
diff --git a/test/CodeGen/PowerPC/tls_get_addr_stackframe.ll b/test/CodeGen/PowerPC/tls_get_addr_stackframe.ll
new file mode 100644
index 000000000000..4a235983e6f7
--- /dev/null
+++ b/test/CodeGen/PowerPC/tls_get_addr_stackframe.ll
@@ -0,0 +1,32 @@
+; RUN: llc -mtriple="powerpc64le-unknown-linux-gnu" -relocation-model=pic < %s | FileCheck %s
+; CHECK-LABEL: foo_test:
+; CHECK: mflr 0
+; CHECK: __tls_get_addr
+
+%struct1.2.41 = type { %struct2.0.39, %struct3.1.40, %struct1.2.41* }
+%struct2.0.39 = type { i64, i32, i32, i32, i32 }
+%struct3.1.40 = type { [160 x i8] }
+
+@tls_var = external thread_local global %struct1.2.41*, align 8
+
+define void @foo_test() {
+ %1 = load %struct1.2.41*, %struct1.2.41** @tls_var, align 8
+ br i1 undef, label %foo.exit, label %2
+
+; <label>:2 ; preds = %0
+ br i1 undef, label %foo.exit, label %3
+
+; <label>:3 ; preds = %2
+ %4 = getelementptr inbounds %struct1.2.41, %struct1.2.41* %1, i64 0, i32 0, i32 3
+ %5 = load i32, i32* %4, align 8
+ %6 = add nsw i32 %5, -1
+ %7 = icmp eq i32 %6, 0
+ br i1 %7, label %8, label %foo.exit
+
+; <label>:8 ; preds = %3
+ tail call void undef(%struct1.2.41* undef, %struct1.2.41* nonnull undef)
+ br label %foo.exit
+
+foo.exit: ; preds = %8, %3, %2, %0
+ ret void
+}
diff --git a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
index 29bca67e2d24..b38c955e1f63 100644
--- a/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
+++ b/test/CodeGen/SPARC/2011-01-19-DelaySlot.ll
@@ -59,7 +59,7 @@ entry:
;CHECK: sethi
;CHECK: !NO_APP
;CHECK-NEXT: cmp
-;CHECK-NEXT: bg
+;CHECK-NEXT: ble
;CHECK-NEXT: mov
tail call void asm sideeffect "sethi 0, %g0", ""() nounwind
%0 = icmp slt i32 %a, 0
diff --git a/test/CodeGen/SPARC/analyze-branch.ll b/test/CodeGen/SPARC/analyze-branch.ll
new file mode 100644
index 000000000000..7d2096033a03
--- /dev/null
+++ b/test/CodeGen/SPARC/analyze-branch.ll
@@ -0,0 +1,58 @@
+; RUN: llc -mtriple=sparc-none-linux-gnu < %s | FileCheck %s
+
+; This test checks that LLVM can do basic stripping and reapplying of branches
+; to basic blocks.
+
+declare void @test_true()
+declare void @test_false()
+
+; !0 corresponds to a branch being taken, !1 to not being takne.
+!0 = !{!"branch_weights", i32 64, i32 4}
+!1 = !{!"branch_weights", i32 4, i32 64}
+
+define void @test_Bcc_fallthrough_taken(i32 %in) nounwind {
+; CHECK-LABEL: test_Bcc_fallthrough_taken:
+ %tst = icmp eq i32 %in, 42
+ br i1 %tst, label %true, label %false, !prof !0
+
+; CHECK: cmp {{%[goli][0-9]+}}, 42
+; CHECK: bne [[FALSE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: nop
+; CHECK-NEXT: ! BB#
+; CHECK-NEXT: call test_true
+
+; CHECK: [[FALSE]]:
+; CHECK: call test_false
+
+true:
+ call void @test_true()
+ ret void
+
+false:
+ call void @test_false()
+ ret void
+}
+
+define void @test_Bcc_fallthrough_nottaken(i32 %in) nounwind {
+; CHECK-LABEL: test_Bcc_fallthrough_nottaken:
+ %tst = icmp eq i32 %in, 42
+ br i1 %tst, label %true, label %false, !prof !1
+
+; CHECK: cmp {{%[goli][0-9]+}}, 42
+
+; CHECK: be [[TRUE:.LBB[0-9]+_[0-9]+]]
+; CHECK-NEXT: nop
+; CHECK-NEXT: ! BB#
+; CHECK-NEXT: call test_false
+
+; CHECK: [[TRUE]]:
+; CHECK: call test_true
+
+true:
+ call void @test_true()
+ ret void
+
+false:
+ call void @test_false()
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/call.ll b/test/CodeGen/WebAssembly/call.ll
index 9158ccec0979..6d5542c89d3d 100644
--- a/test/CodeGen/WebAssembly/call.ll
+++ b/test/CodeGen/WebAssembly/call.ll
@@ -2,7 +2,7 @@
; Test that basic call operations assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare i32 @i32_nullary()
@@ -15,7 +15,7 @@ declare void @void_nullary()
; CHECK-LABEL: call_i32_nullary:
; CHECK-NEXT: .result i32{{$}}
-; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_nullary{{$}}
+; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_nullary@FUNCTION{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i32 @call_i32_nullary() {
%r = call i32 @i32_nullary()
@@ -24,7 +24,7 @@ define i32 @call_i32_nullary() {
; CHECK-LABEL: call_i64_nullary:
; CHECK-NEXT: .result i64{{$}}
-; CHECK-NEXT: {{^}} i64.call $push[[NUM:[0-9]+]]=, i64_nullary{{$}}
+; CHECK-NEXT: {{^}} i64.call $push[[NUM:[0-9]+]]=, i64_nullary@FUNCTION{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i64 @call_i64_nullary() {
%r = call i64 @i64_nullary()
@@ -33,7 +33,7 @@ define i64 @call_i64_nullary() {
; CHECK-LABEL: call_float_nullary:
; CHECK-NEXT: .result f32{{$}}
-; CHECK-NEXT: {{^}} f32.call $push[[NUM:[0-9]+]]=, float_nullary{{$}}
+; CHECK-NEXT: {{^}} f32.call $push[[NUM:[0-9]+]]=, float_nullary@FUNCTION{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define float @call_float_nullary() {
%r = call float @float_nullary()
@@ -42,7 +42,7 @@ define float @call_float_nullary() {
; CHECK-LABEL: call_double_nullary:
; CHECK-NEXT: .result f64{{$}}
-; CHECK-NEXT: {{^}} f64.call $push[[NUM:[0-9]+]]=, double_nullary{{$}}
+; CHECK-NEXT: {{^}} f64.call $push[[NUM:[0-9]+]]=, double_nullary@FUNCTION{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define double @call_double_nullary() {
%r = call double @double_nullary()
@@ -50,7 +50,7 @@ define double @call_double_nullary() {
}
; CHECK-LABEL: call_void_nullary:
-; CHECK-NEXT: {{^}} call void_nullary{{$}}
+; CHECK-NEXT: {{^}} call void_nullary@FUNCTION{{$}}
; CHECK-NEXT: return{{$}}
define void @call_void_nullary() {
call void @void_nullary()
@@ -60,7 +60,7 @@ define void @call_void_nullary() {
; CHECK-LABEL: call_i32_unary:
; CHECK-NEXT: .param i32{{$}}
; CHECK-NEXT: .result i32{{$}}
-; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_unary, $0{{$}}
+; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_unary@FUNCTION, $0{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i32 @call_i32_unary(i32 %a) {
%r = call i32 @i32_unary(i32 %a)
@@ -70,7 +70,7 @@ define i32 @call_i32_unary(i32 %a) {
; CHECK-LABEL: call_i32_binary:
; CHECK-NEXT: .param i32, i32{{$}}
; CHECK-NEXT: .result i32{{$}}
-; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_binary, $0, $1{{$}}
+; CHECK-NEXT: {{^}} i32.call $push[[NUM:[0-9]+]]=, i32_binary@FUNCTION, $0, $1{{$}}
; CHECK-NEXT: return $pop[[NUM]]{{$}}
define i32 @call_i32_binary(i32 %a, i32 %b) {
%r = call i32 @i32_binary(i32 %a, i32 %b)
@@ -97,7 +97,7 @@ define i32 @call_indirect_i32(i32 ()* %callee) {
}
; CHECK-LABEL: tail_call_void_nullary:
-; CHECK-NEXT: {{^}} call void_nullary{{$}}
+; CHECK-NEXT: {{^}} call void_nullary@FUNCTION{{$}}
; CHECK-NEXT: return{{$}}
define void @tail_call_void_nullary() {
tail call void @void_nullary()
@@ -105,7 +105,7 @@ define void @tail_call_void_nullary() {
}
; CHECK-LABEL: fastcc_tail_call_void_nullary:
-; CHECK-NEXT: {{^}} call void_nullary{{$}}
+; CHECK-NEXT: {{^}} call void_nullary@FUNCTION{{$}}
; CHECK-NEXT: return{{$}}
define void @fastcc_tail_call_void_nullary() {
tail call fastcc void @void_nullary()
@@ -113,7 +113,7 @@ define void @fastcc_tail_call_void_nullary() {
}
; CHECK-LABEL: coldcc_tail_call_void_nullary:
-; CHECK-NEXT: {{^}} call void_nullary
+; CHECK-NEXT: {{^}} call void_nullary@FUNCTION{{$}}
; CHECK-NEXT: return{{$}}
define void @coldcc_tail_call_void_nullary() {
tail call coldcc void @void_nullary()
diff --git a/test/CodeGen/WebAssembly/cfg-stackify.ll b/test/CodeGen/WebAssembly/cfg-stackify.ll
index 71f3551347bf..f0e5f4471678 100644
--- a/test/CodeGen/WebAssembly/cfg-stackify.ll
+++ b/test/CodeGen/WebAssembly/cfg-stackify.ll
@@ -3,7 +3,7 @@
; Test the CFG stackifier pass.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare void @something()
@@ -18,7 +18,7 @@ declare void @something()
; CHECK-NEXT: br_if
; CHECK-NOT: br
; CHECK: call
-; CHECK: br BB0_1{{$}}
+; CHECK: br 0{{$}}
; CHECK: return{{$}}
; OPT-LABEL: test0:
; OPT: loop
@@ -28,7 +28,7 @@ declare void @something()
; OPT-NEXT: br_if
; OPT-NOT: br
; OPT: call
-; OPT: br BB0_1{{$}}
+; OPT: br 0{{$}}
; OPT: return{{$}}
define void @test0(i32 %n) {
entry:
@@ -59,7 +59,7 @@ back:
; CHECK-NEXT: br_if
; CHECK-NOT: br
; CHECK: call
-; CHECK: br BB1_1{{$}}
+; CHECK: br 0{{$}}
; CHECK: return{{$}}
; OPT-LABEL: test1:
; OPT: loop
@@ -69,7 +69,7 @@ back:
; OPT-NEXT: br_if
; OPT-NOT: br
; OPT: call
-; OPT: br BB1_1{{$}}
+; OPT: br 0{{$}}
; OPT: return{{$}}
define void @test1(i32 %n) {
entry:
@@ -93,18 +93,20 @@ back:
; Test that a simple loop is handled as expected.
; CHECK-LABEL: test2:
-; CHECK: block BB2_2{{$}}
-; CHECK: br_if {{[^,]*}}, BB2_2{{$}}
-; CHECK: BB2_1:
-; CHECK: br_if ${{[0-9]+}}, BB2_1{{$}}
-; CHECK: BB2_2:
+; CHECK-NOT: local
+; CHECK: block{{$}}
+; CHECK: br_if {{[^,]*}}, 0{{$}}
+; CHECK: .LBB2_1:
+; CHECK: br_if ${{[0-9]+}}, 0{{$}}
+; CHECK: .LBB2_2:
; CHECK: return{{$}}
; OPT-LABEL: test2:
-; OPT: block BB2_2{{$}}
-; OPT: br_if {{[^,]*}}, BB2_2{{$}}
-; OPT: BB2_1:
-; OPT: br_if ${{[0-9]+}}, BB2_1{{$}}
-; OPT: BB2_2:
+; OPT-NOT: local
+; OPT: block{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT: .LBB2_1:
+; OPT: br_if ${{[0-9]+}}, 0{{$}}
+; OPT: .LBB2_2:
; OPT: return{{$}}
define void @test2(double* nocapture %p, i32 %n) {
entry:
@@ -132,24 +134,29 @@ for.end:
}
; CHECK-LABEL: doublediamond:
-; CHECK: block BB3_5{{$}}
-; CHECK: block BB3_2{{$}}
-; CHECK: br_if $0, BB3_2{{$}}
-; CHECK: block BB3_4{{$}}
-; CHECK: br_if $1, BB3_4{{$}}
-; CHECK: br BB3_5{{$}}
-; CHECK: BB3_4:
-; CHECK: BB3_5:
+; CHECK: block{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK: br_if ${{[^,]*}}, 0{{$}}
+; CHECK: br 1{{$}}
+; CHECK: .LBB3_2:
+; CHECK-NEXT: end_block{{$}}
+; CHECK: block{{$}}
+; CHECK: br_if ${{[^,]*}}, 0{{$}}
+; CHECK: br 1{{$}}
+; CHECK: .LBB3_4:
+; CHECK-NEXT: end_block{{$}}
+; CHECK: .LBB3_5:
+; CHECK-NEXT: end_block{{$}}
; CHECK: return ${{[0-9]+}}{{$}}
; OPT-LABEL: doublediamond:
-; OPT: block BB3_5{{$}}
-; OPT: block BB3_4{{$}}
-; OPT: br_if {{[^,]*}}, BB3_4{{$}}
-; OPT: block BB3_3{{$}}
-; OPT: br_if {{[^,]*}}, BB3_3{{$}}
-; OPT: br BB3_5{{$}}
-; OPT: BB3_4:
-; OPT: BB3_5:
+; OPT: block{{$}}
+; OPT-NEXT: block{{$}}
+; OPT: br_if ${{[^,]*}}, 0{{$}}
+; OPT: block{{$}}
+; OPT: br_if ${{[^,]*}}, 0{{$}}
+; OPT: br 1{{$}}
+; OPT: .LBB3_4:
+; OPT: .LBB3_5:
; OPT: return ${{[0-9]+}}{{$}}
define i32 @doublediamond(i32 %a, i32 %b, i32* %p) {
entry:
@@ -175,14 +182,14 @@ exit:
}
; CHECK-LABEL: triangle:
-; CHECK: block BB4_2{{$}}
-; CHECK: br_if $1, BB4_2{{$}}
-; CHECK: BB4_2:
+; CHECK: block{{$}}
+; CHECK: br_if $1, 0{{$}}
+; CHECK: .LBB4_2:
; CHECK: return ${{[0-9]+}}{{$}}
; OPT-LABEL: triangle:
-; OPT: block BB4_2{{$}}
-; OPT: br_if $1, BB4_2{{$}}
-; OPT: BB4_2:
+; OPT: block{{$}}
+; OPT: br_if $1, 0{{$}}
+; OPT: .LBB4_2:
; OPT: return ${{[0-9]+}}{{$}}
define i32 @triangle(i32* %p, i32 %a) {
entry:
@@ -198,20 +205,20 @@ exit:
}
; CHECK-LABEL: diamond:
-; CHECK: block BB5_3{{$}}
-; CHECK: block BB5_2{{$}}
-; CHECK: br_if $1, BB5_2{{$}}
-; CHECK: br BB5_3{{$}}
-; CHECK: BB5_2:
-; CHECK: BB5_3:
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: br_if $1, 0{{$}}
+; CHECK: br 1{{$}}
+; CHECK: .LBB5_2:
+; CHECK: .LBB5_3:
; CHECK: return ${{[0-9]+}}{{$}}
; OPT-LABEL: diamond:
-; OPT: block BB5_3{{$}}
-; OPT: block BB5_2{{$}}
-; OPT: br_if {{[^,]*}}, BB5_2{{$}}
-; OPT: br BB5_3{{$}}
-; OPT: BB5_2:
-; OPT: BB5_3:
+; OPT: block{{$}}
+; OPT: block{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT: br 1{{$}}
+; OPT: .LBB5_2:
+; OPT: .LBB5_3:
; OPT: return ${{[0-9]+}}{{$}}
define i32 @diamond(i32* %p, i32 %a) {
entry:
@@ -243,16 +250,16 @@ entry:
; CHECK-LABEL: minimal_loop:
; CHECK-NOT: br
-; CHECK: BB7_1:
+; CHECK: .LBB7_1:
; CHECK: i32.store $discard=, 0($0), $pop{{[0-9]+}}{{$}}
-; CHECK: br BB7_1{{$}}
-; CHECK: BB7_2:
+; CHECK: br 0{{$}}
+; CHECK: .LBB7_2:
; OPT-LABEL: minimal_loop:
; OPT-NOT: br
-; OPT: BB7_1:
+; OPT: .LBB7_1:
; OPT: i32.store $discard=, 0($0), $pop{{[0-9]+}}{{$}}
-; OPT: br BB7_1{{$}}
-; OPT: BB7_2:
+; OPT: br 0{{$}}
+; OPT: .LBB7_2:
define i32 @minimal_loop(i32* %p) {
entry:
store volatile i32 0, i32* %p
@@ -264,17 +271,17 @@ loop:
; CHECK-LABEL: simple_loop:
; CHECK-NOT: br
-; CHECK: BB8_1:
-; CHECK: loop BB8_2{{$}}
-; CHECK: br_if $pop{{[0-9]+}}, BB8_1{{$}}
-; CHECK: BB8_2:
+; CHECK: .LBB8_1:
+; CHECK: loop{{$}}
+; CHECK: br_if $pop{{[0-9]+}}, 0{{$}}
+; CHECK-NEXT: end_loop{{$}}
; CHECK: return ${{[0-9]+}}{{$}}
; OPT-LABEL: simple_loop:
; OPT-NOT: br
-; OPT: BB8_1:
-; OPT: loop BB8_2{{$}}
-; OPT: br_if {{[^,]*}}, BB8_1{{$}}
-; OPT: BB8_2:
+; OPT: .LBB8_1:
+; OPT: loop{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT-NEXT: end_loop{{$}}
; OPT: return ${{[0-9]+}}{{$}}
define i32 @simple_loop(i32* %p, i32 %a) {
entry:
@@ -290,20 +297,20 @@ exit:
}
; CHECK-LABEL: doubletriangle:
-; CHECK: block BB9_4{{$}}
-; CHECK: br_if $0, BB9_4{{$}}
-; CHECK: block BB9_3{{$}}
-; CHECK: br_if $1, BB9_3{{$}}
-; CHECK: BB9_3:
-; CHECK: BB9_4:
+; CHECK: block{{$}}
+; CHECK: br_if $0, 0{{$}}
+; CHECK: block{{$}}
+; CHECK: br_if $1, 0{{$}}
+; CHECK: .LBB9_3:
+; CHECK: .LBB9_4:
; CHECK: return ${{[0-9]+}}{{$}}
; OPT-LABEL: doubletriangle:
-; OPT: block BB9_4{{$}}
-; OPT: br_if $0, BB9_4{{$}}
-; OPT: block BB9_3{{$}}
-; OPT: br_if $1, BB9_3{{$}}
-; OPT: BB9_3:
-; OPT: BB9_4:
+; OPT: block{{$}}
+; OPT: br_if $0, 0{{$}}
+; OPT: block{{$}}
+; OPT: br_if $1, 0{{$}}
+; OPT: .LBB9_3:
+; OPT: .LBB9_4:
; OPT: return ${{[0-9]+}}{{$}}
define i32 @doubletriangle(i32 %a, i32 %b, i32* %p) {
entry:
@@ -326,22 +333,22 @@ exit:
}
; CHECK-LABEL: ifelse_earlyexits:
-; CHECK: block BB10_4{{$}}
-; CHECK: block BB10_2{{$}}
-; CHECK: br_if $0, BB10_2{{$}}
-; CHECK: br BB10_4{{$}}
-; CHECK: BB10_2:
-; CHECK: br_if $1, BB10_4{{$}}
-; CHECK: BB10_4:
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: br_if $0, 0{{$}}
+; CHECK: br 1{{$}}
+; CHECK: .LBB10_2:
+; CHECK: br_if $1, 0{{$}}
+; CHECK: .LBB10_4:
; CHECK: return ${{[0-9]+}}{{$}}
; OPT-LABEL: ifelse_earlyexits:
-; OPT: block BB10_4{{$}}
-; OPT: block BB10_3{{$}}
-; OPT: br_if {{[^,]*}}, BB10_3{{$}}
-; OPT: br_if $1, BB10_4{{$}}
-; OPT: br BB10_4{{$}}
-; OPT: BB10_3:
-; OPT: BB10_4:
+; OPT: block{{$}}
+; OPT: block{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT: br_if $1, 1{{$}}
+; OPT: br 1{{$}}
+; OPT: .LBB10_3:
+; OPT: .LBB10_4:
; OPT: return ${{[0-9]+}}{{$}}
define i32 @ifelse_earlyexits(i32 %a, i32 %b, i32* %p) {
entry:
@@ -364,35 +371,40 @@ exit:
}
; CHECK-LABEL: doublediamond_in_a_loop:
-; CHECK: BB11_1:
-; CHECK: loop BB11_7{{$}}
-; CHECK: block BB11_6{{$}}
-; CHECK: block BB11_3{{$}}
-; CHECK: br_if $0, BB11_3{{$}}
-; CHECK: br BB11_6{{$}}
-; CHECK: BB11_3:
-; CHECK: block BB11_5{{$}}
-; CHECK: br_if $1, BB11_5{{$}}
-; CHECK: br BB11_6{{$}}
-; CHECK: BB11_5:
-; CHECK: BB11_6:
-; CHECK: br BB11_1{{$}}
-; CHECK: BB11_7:
+; CHECK: .LBB11_1:
+; CHECK: loop{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: br_if $0, 0{{$}}
+; CHECK: br 1{{$}}
+; CHECK: .LBB11_3:
+; CHECK: block{{$}}
+; CHECK: br_if $1, 0{{$}}
+; CHECK: br 1{{$}}
+; CHECK: .LBB11_5:
+; CHECK: .LBB11_6:
+; CHECK: br 0{{$}}
+; CHECK: .LBB11_7:
+; CHECK-NEXT: end_loop{{$}}
; OPT-LABEL: doublediamond_in_a_loop:
-; OPT: BB11_1:
-; OPT: loop BB11_7{{$}}
-; OPT: block BB11_6{{$}}
-; OPT: block BB11_5{{$}}
-; OPT: br_if {{[^,]*}}, BB11_5{{$}}
-; OPT: block BB11_4{{$}}
-; OPT: br_if {{[^,]*}}, BB11_4{{$}}
-; OPT: br BB11_6{{$}}
-; OPT: BB11_4:
-; OPT: br BB11_6{{$}}
-; OPT: BB11_5:
-; OPT: BB11_6:
-; OPT: br BB11_1{{$}}
-; OPT: BB11_7:
+; OPT: .LBB11_1:
+; OPT: loop{{$}}
+; OPT: block{{$}}
+; OPT: block{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT: block{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT: br 2{{$}}
+; OPT: .LBB11_4:
+; OPT-NEXT: end_block{{$}}
+; OPT: br 1{{$}}
+; OPT: .LBB11_5:
+; OPT-NEXT: end_block{{$}}
+; OPT: .LBB11_6:
+; OPT-NEXT: end_block{{$}}
+; OPT: br 0{{$}}
+; OPT: .LBB11_7:
+; OPT-NEXT: end_loop{{$}}
define i32 @doublediamond_in_a_loop(i32 %a, i32 %b, i32* %p) {
entry:
br label %header
@@ -423,12 +435,12 @@ exit:
; CHECK-LABEL: test3:
; CHECK: loop
; CHECK-NEXT: br_if
-; CHECK-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
+; CHECK-NEXT: .LBB{{[0-9]+}}_{{[0-9]+}}:
; CHECK-NEXT: loop
; OPT-LABEL: test3:
; OPT: loop
; OPT-NEXT: br_if
-; OPT-NEXT: BB{{[0-9]+}}_{{[0-9]+}}:
+; OPT-NEXT: .LBB{{[0-9]+}}_{{[0-9]+}}:
; OPT-NEXT: loop
declare void @bar()
define void @test3(i32 %w) {
@@ -460,42 +472,48 @@ if.end:
; Test switch lowering and block placement.
; CHECK-LABEL: test4:
-; CHECK-NEXT: .param i32{{$}}
-; CHECK: block BB13_8{{$}}
-; CHECK-NEXT: block BB13_7{{$}}
-; CHECK-NEXT: block BB13_4{{$}}
-; CHECK: br_if $pop{{[0-9]*}}, BB13_4{{$}}
-; CHECK-NEXT: block BB13_3{{$}}
-; CHECK: br_if $pop{{[0-9]*}}, BB13_3{{$}}
-; CHECK: br_if $pop{{[0-9]*}}, BB13_7{{$}}
-; CHECK-NEXT: BB13_3:
+; CHECK-NEXT: .param i32{{$}}
+; CHECK: block{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK: br_if $pop{{[0-9]*}}, 0{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK: br_if $pop{{[0-9]*}}, 0{{$}}
+; CHECK: br_if $pop{{[0-9]*}}, 2{{$}}
+; CHECK-NEXT: .LBB13_3:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NEXT: return{{$}}
-; CHECK-NEXT: BB13_4:
-; CHECK: br_if $pop{{[0-9]*}}, BB13_8{{$}}
-; CHECK: br_if $pop{{[0-9]*}}, BB13_7{{$}}
+; CHECK-NEXT: .LBB13_4:
+; CHECK: br_if $pop{{[0-9]*}}, 1{{$}}
+; CHECK: br_if $pop{{[0-9]*}}, 0{{$}}
; CHECK-NEXT: return{{$}}
-; CHECK-NEXT: BB13_7:
+; CHECK-NEXT: .LBB13_7:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NEXT: return{{$}}
-; CHECK-NEXT: BB13_8:
+; CHECK-NEXT: .LBB13_8:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NEXT: return{{$}}
; OPT-LABEL: test4:
-; OPT-NEXT: .param i32{{$}}
-; OPT: block BB13_8{{$}}
-; OPT-NEXT: block BB13_7{{$}}
-; OPT-NEXT: block BB13_4{{$}}
-; OPT: br_if $pop{{[0-9]*}}, BB13_4{{$}}
-; OPT-NEXT: block BB13_3{{$}}
-; OPT: br_if $pop{{[0-9]*}}, BB13_3{{$}}
-; OPT: br_if $pop{{[0-9]*}}, BB13_7{{$}}
-; OPT-NEXT: BB13_3:
+; OPT-NEXT: .param i32{{$}}
+; OPT: block{{$}}
+; OPT-NEXT: block{{$}}
+; OPT-NEXT: block{{$}}
+; OPT: br_if $pop{{[0-9]*}}, 0{{$}}
+; OPT-NEXT: block{{$}}
+; OPT: br_if $pop{{[0-9]*}}, 0{{$}}
+; OPT: br_if $pop{{[0-9]*}}, 2{{$}}
+; OPT-NEXT: .LBB13_3:
+; OPT-NEXT: end_block{{$}}
; OPT-NEXT: return{{$}}
-; OPT-NEXT: BB13_4:
-; OPT: br_if $pop{{[0-9]*}}, BB13_8{{$}}
-; OPT: br_if $pop{{[0-9]*}}, BB13_7{{$}}
+; OPT-NEXT: .LBB13_4:
+; OPT: br_if $pop{{[0-9]*}}, 1{{$}}
+; OPT: br_if $pop{{[0-9]*}}, 0{{$}}
; OPT-NEXT: return{{$}}
-; OPT-NEXT: BB13_7:
+; OPT-NEXT: .LBB13_7:
+; OPT-NEXT: end_block{{$}}
; OPT-NEXT: return{{$}}
-; OPT-NEXT: BB13_8:
+; OPT-NEXT: .LBB13_8:
+; OPT-NEXT: end_block{{$}}
; OPT-NEXT: return{{$}}
define void @test4(i32 %t) {
entry:
@@ -523,24 +541,24 @@ default:
; same basic block.
; CHECK-LABEL: test5:
-; CHECK: BB14_1:
-; CHECK-NEXT: block BB14_4{{$}}
-; CHECK-NEXT: loop BB14_3{{$}}
-; CHECK: br_if {{[^,]*}}, BB14_4{{$}}
-; CHECK: br_if {{[^,]*}}, BB14_1{{$}}
-; CHECK-NEXT: BB14_3:
+; CHECK: .LBB14_1:
+; CHECK-NEXT: block{{$}}
+; CHECK-NEXT: loop{{$}}
+; CHECK: br_if {{[^,]*}}, 2{{$}}
+; CHECK: br_if {{[^,]*}}, 0{{$}}
+; CHECK-NEXT: end_loop{{$}}
; CHECK: return{{$}}
-; CHECK-NEXT: BB14_4:
+; CHECK-NEXT: .LBB14_4:
; CHECK: return{{$}}
; OPT-LABEL: test5:
-; OPT: BB14_1:
-; OPT-NEXT: block BB14_4{{$}}
-; OPT-NEXT: loop BB14_3{{$}}
-; OPT: br_if {{[^,]*}}, BB14_4{{$}}
-; OPT: br_if {{[^,]*}}, BB14_1{{$}}
-; OPT-NEXT: BB14_3:
+; OPT: .LBB14_1:
+; OPT-NEXT: block{{$}}
+; OPT-NEXT: loop{{$}}
+; OPT: br_if {{[^,]*}}, 2{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT-NEXT: end_loop{{$}}
; OPT: return{{$}}
-; OPT-NEXT: BB14_4:
+; OPT-NEXT: .LBB14_4:
; OPT: return{{$}}
define void @test5(i1 %p, i1 %q) {
entry:
@@ -568,41 +586,45 @@ return:
; which has another predecessor.
; CHECK-LABEL: test6:
-; CHECK: BB15_1:
-; CHECK-NEXT: block BB15_6{{$}}
-; CHECK-NEXT: block BB15_5{{$}}
-; CHECK-NEXT: loop BB15_4{{$}}
+; CHECK: .LBB15_1:
+; CHECK-NEXT: block{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK-NEXT: loop{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB15_6{{$}}
+; CHECK: br_if {{[^,]*}}, 3{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB15_5{{$}}
+; CHECK: br_if {{[^,]*}}, 2{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB15_1{{$}}
-; CHECK-NEXT: BB15_4:
+; CHECK: br_if {{[^,]*}}, 0{{$}}
+; CHECK-NEXT: end_loop{{$}}
; CHECK-NOT: block
; CHECK: return{{$}}
-; CHECK-NEXT: BB15_5:
+; CHECK-NEXT: .LBB15_5:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
-; CHECK: BB15_6:
+; CHECK: .LBB15_6:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
; CHECK: return{{$}}
; OPT-LABEL: test6:
-; OPT: BB15_1:
-; OPT-NEXT: block BB15_6{{$}}
-; OPT-NEXT: block BB15_5{{$}}
-; OPT-NEXT: loop BB15_4{{$}}
+; OPT: .LBB15_1:
+; OPT-NEXT: block{{$}}
+; OPT-NEXT: block{{$}}
+; OPT-NEXT: loop{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB15_6{{$}}
+; OPT: br_if {{[^,]*}}, 3{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB15_5{{$}}
+; OPT: br_if {{[^,]*}}, 2{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB15_1{{$}}
-; OPT-NEXT: BB15_4:
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT-NEXT: end_loop{{$}}
; OPT-NOT: block
; OPT: return{{$}}
-; OPT-NEXT: BB15_5:
+; OPT-NEXT: .LBB15_5:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
-; OPT: BB15_6:
+; OPT: .LBB15_6:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
; OPT: return{{$}}
define void @test6(i1 %p, i1 %q) {
@@ -638,36 +660,38 @@ second:
; that end in unreachable.
; CHECK-LABEL: test7:
-; CHECK: BB16_1:
-; CHECK-NEXT: loop BB16_5{{$}}
+; CHECK: .LBB16_1:
+; CHECK-NEXT: loop{{$}}
; CHECK-NOT: block
-; CHECK: block BB16_4{{$}}
-; CHECK: br_if {{[^,]*}}, BB16_4{{$}}
+; CHECK: block{{$}}
+; CHECK: br_if {{[^,]*}}, 0{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB16_1{{$}}
+; CHECK: br_if {{[^,]*}}, 1{{$}}
; CHECK-NOT: block
; CHECK: unreachable
-; CHECK_NEXT: BB16_4:
+; CHECK-NEXT: .LBB16_4:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB16_1{{$}}
-; CHECK-NEXT: BB16_5:
+; CHECK: br_if {{[^,]*}}, 0{{$}}
+; CHECK-NEXT: end_loop{{$}}
; CHECK-NOT: block
; CHECK: unreachable
; OPT-LABEL: test7:
-; OPT: BB16_1:
-; OPT-NEXT: loop BB16_5{{$}}
+; OPT: .LBB16_1:
+; OPT-NEXT: loop{{$}}
; OPT-NOT: block
-; OPT: block BB16_4{{$}}
+; OPT: block{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB16_4{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB16_1{{$}}
+; OPT: br_if {{[^,]*}}, 1{{$}}
; OPT-NOT: block
; OPT: unreachable
-; OPT_NEXT: BB16_4:
+; OPT-NEXT: .LBB16_4:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB16_1{{$}}
-; OPT-NEXT: BB16_5:
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT-NEXT: end_loop{{$}}
; OPT-NOT: block
; OPT: unreachable
define void @test7(i1 %tobool2, i1 %tobool9) {
@@ -699,31 +723,33 @@ u1:
; Test an interesting case using nested loops and switches.
; CHECK-LABEL: test8:
-; CHECK: BB17_1:
-; CHECK-NEXT: loop BB17_4{{$}}
-; CHECK-NEXT: block BB17_3{{$}}
+; CHECK: .LBB17_1:
+; CHECK-NEXT: loop{{$}}
+; CHECK-NEXT: block{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB17_3{{$}}
+; CHECK: br_if {{[^,]*}}, 0{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB17_1{{$}}
-; CHECK-NEXT: BB17_3:
-; CHECK-NEXT: loop BB17_4{{$}}
-; CHECK-NEXT: br_if {{[^,]*}}, BB17_3{{$}}
-; CHECK-NEXT: br BB17_1{{$}}
-; CHECK-NEXT: BB17_4:
+; CHECK: br_if {{[^,]*}}, 1{{$}}
+; CHECK-NEXT: .LBB17_3:
+; CHECK-NEXT: end_block{{$}}
+; CHECK-NEXT: loop{{$}}
+; CHECK-NEXT: br_if {{[^,]*}}, 0{{$}}
+; CHECK-NEXT: br 2{{$}}
+; CHECK-NEXT: .LBB17_4:
; OPT-LABEL: test8:
-; OPT: BB17_1:
-; OPT-NEXT: loop BB17_4{{$}}
-; OPT-NEXT: block BB17_3{{$}}
+; OPT: .LBB17_1:
+; OPT-NEXT: loop{{$}}
+; OPT-NEXT: block{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB17_3{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB17_1{{$}}
-; OPT-NEXT: BB17_3:
-; OPT-NEXT: loop BB17_4{{$}}
-; OPT-NEXT: br_if {{[^,]*}}, BB17_3{{$}}
-; OPT-NEXT: br BB17_1{{$}}
-; OPT-NEXT: BB17_4:
+; OPT: br_if {{[^,]*}}, 1{{$}}
+; OPT-NEXT: .LBB17_3:
+; OPT-NEXT: end_block{{$}}
+; OPT-NEXT: loop{{$}}
+; OPT-NEXT: br_if {{[^,]*}}, 0{{$}}
+; OPT-NEXT: br 2{{$}}
+; OPT-NEXT: .LBB17_4:
define i32 @test8() {
bb:
br label %bb1
@@ -745,45 +771,47 @@ bb3:
; Test an interesting case using nested loops that share a bottom block.
; CHECK-LABEL: test9:
-; CHECK: BB18_1:
-; CHECK-NEXT: loop BB18_5{{$}}
+; CHECK: .LBB18_1:
+; CHECK-NEXT: loop{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB18_5{{$}}
-; CHECK-NEXT: BB18_2:
-; CHECK-NEXT: loop BB18_5{{$}}
+; CHECK: br_if {{[^,]*}}, 1{{$}}
+; CHECK-NEXT: .LBB18_2:
+; CHECK-NEXT: loop{{$}}
; CHECK-NOT: block
-; CHECK: block BB18_4{{$}}
+; CHECK: block{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB18_4{{$}}
+; CHECK: br_if {{[^,]*}}, 0{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB18_2{{$}}
-; CHECK-NEXT: br BB18_1{{$}}
-; CHECK-NEXT: BB18_4:
+; CHECK: br_if {{[^,]*}}, 1{{$}}
+; CHECK-NEXT: br 3{{$}}
+; CHECK-NEXT: .LBB18_4:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB18_2{{$}}
-; CHECK-NEXT: br BB18_1{{$}}
-; CHECK-NEXT: BB18_5:
+; CHECK: br_if {{[^,]*}}, 0{{$}}
+; CHECK-NEXT: br 2{{$}}
+; CHECK-NEXT: .LBB18_5:
; CHECK-NOT: block
; CHECK: return{{$}}
; OPT-LABEL: test9:
-; OPT: BB18_1:
-; OPT-NEXT: loop BB18_5{{$}}
+; OPT: .LBB18_1:
+; OPT-NEXT: loop{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB18_5{{$}}
-; OPT-NEXT: BB18_2:
-; OPT-NEXT: loop BB18_5{{$}}
+; OPT: br_if {{[^,]*}}, 1{{$}}
+; OPT-NEXT: .LBB18_2:
+; OPT-NEXT: loop{{$}}
; OPT-NOT: block
-; OPT: block BB18_4{{$}}
+; OPT: block{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB18_4{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB18_2{{$}}
-; OPT-NEXT: br BB18_1{{$}}
-; OPT-NEXT: BB18_4:
+; OPT: br_if {{[^,]*}}, 1{{$}}
+; OPT-NEXT: br 3{{$}}
+; OPT-NEXT: .LBB18_4:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB18_2{{$}}
-; OPT-NEXT: br BB18_1{{$}}
-; OPT-NEXT: BB18_5:
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT-NEXT: br 2{{$}}
+; OPT-NEXT: .LBB18_5:
; OPT-NOT: block
; OPT: return{{$}}
declare i1 @a()
@@ -821,47 +849,53 @@ end:
; and loop exits to a block with unreachable.
; CHECK-LABEL: test10:
-; CHECK: BB19_1:
-; CHECK-NEXT: loop BB19_7{{$}}
+; CHECK: .LBB19_1:
+; CHECK-NEXT: loop{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB19_1{{$}}
-; CHECK-NEXT: BB19_2:
-; CHECK-NEXT: block BB19_6{{$}}
-; CHECK-NEXT: loop BB19_5{{$}}
+; CHECK: br_if {{[^,]*}}, 0{{$}}
+; CHECK-NEXT: .LBB19_2:
+; CHECK-NEXT: block{{$}}
+; CHECK-NEXT: loop{{$}}
; CHECK-NOT: block
-; CHECK: BB19_3:
-; CHECK-NEXT: loop BB19_5{{$}}
+; CHECK: .LBB19_3:
+; CHECK-NEXT: loop{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB19_1{{$}}
+; CHECK: br_if {{[^,]*}}, 5{{$}}
; CHECK-NOT: block
-; CHECK: tableswitch {{[^,]*}}, BB19_3, BB19_3, BB19_5, BB19_1, BB19_2, BB19_6{{$}}
-; CHECK-NEXT: BB19_5:
+; CHECK: tableswitch {{[^,]*}}, 0, 0, 1, 5, 2, 4{{$}}
+; CHECK-NEXT: .LBB19_5:
+; CHECK-NEXT: end_loop{{$}}
+; CHECK-NEXT: end_loop{{$}}
; CHECK-NEXT: return{{$}}
-; CHECK-NEXT: BB19_6:
+; CHECK-NEXT: .LBB19_6:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
-; CHECK: br BB19_1{{$}}
-; CHECK-NEXT: BB19_7:
+; CHECK: br 0{{$}}
+; CHECK-NEXT: .LBB19_7:
; OPT-LABEL: test10:
-; OPT: BB19_1:
-; OPT-NEXT: loop BB19_7{{$}}
+; OPT: .LBB19_1:
+; OPT-NEXT: loop{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB19_1{{$}}
-; OPT-NEXT: BB19_2:
-; OPT-NEXT: block BB19_6{{$}}
-; OPT-NEXT: loop BB19_5{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
+; OPT-NEXT: .LBB19_2:
+; OPT-NEXT: block{{$}}
+; OPT-NEXT: loop{{$}}
; OPT-NOT: block
-; OPT: BB19_3:
-; OPT-NEXT: loop BB19_5{{$}}
+; OPT: .LBB19_3:
+; OPT-NEXT: loop{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB19_1{{$}}
+; OPT: br_if {{[^,]*}}, 5{{$}}
; OPT-NOT: block
-; OPT: tableswitch {{[^,]*}}, BB19_3, BB19_3, BB19_5, BB19_1, BB19_2, BB19_6{{$}}
-; OPT-NEXT: BB19_5:
+; OPT: tableswitch {{[^,]*}}, 0, 0, 1, 5, 2, 4{{$}}
+; OPT-NEXT: .LBB19_5:
+; OPT-NEXT: end_loop{{$}}
+; OPT-NEXT: end_loop{{$}}
; OPT-NEXT: return{{$}}
-; OPT-NEXT: BB19_6:
+; OPT-NEXT: .LBB19_6:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
-; OPT: br BB19_1{{$}}
-; OPT-NEXT: BB19_7:
+; OPT: br 0{{$}}
+; OPT-NEXT: .LBB19_7:
define void @test10() {
bb0:
br label %bb1
@@ -900,58 +934,67 @@ bb6:
; Test a CFG DAG with interesting merging.
; CHECK-LABEL: test11:
-; CHECK: block BB20_8{{$}}
-; CHECK-NEXT: block BB20_7{{$}}
-; CHECK-NEXT: block BB20_6{{$}}
-; CHECK-NEXT: block BB20_4{{$}}
-; CHECK-NEXT: br_if {{[^,]*}}, BB20_4{{$}}
+; CHECK: block{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK-NEXT: br_if {{[^,]*}}, 0{{$}}
+; CHECK-NEXT: block{{$}}
; CHECK-NOT: block
-; CHECK: block BB20_3{{$}}
-; CHECK: br_if {{[^,]*}}, BB20_3{{$}}
+; CHECK: br_if {{[^,]*}}, 0{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB20_6{{$}}
-; CHECK-NEXT: BB20_3:
+; CHECK: br_if {{[^,]*}}, 2{{$}}
+; CHECK-NEXT: .LBB20_3:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
; CHECK: return{{$}}
-; CHECK-NEXT: BB20_4:
+; CHECK-NEXT: .LBB20_4:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB20_8{{$}}
+; CHECK: br_if {{[^,]*}}, 2{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB20_7{{$}}
-; CHECK-NEXT: BB20_6:
+; CHECK: br_if {{[^,]*}}, 1{{$}}
+; CHECK-NEXT: .LBB20_6:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
; CHECK: return{{$}}
-; CHECK-NEXT: BB20_7:
+; CHECK-NEXT: .LBB20_7:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
; CHECK: return{{$}}
-; CHECK-NEXT: BB20_8:
+; CHECK-NEXT: .LBB20_8:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
; CHECK: return{{$}}
; OPT-LABEL: test11:
-; OPT: block BB20_8{{$}}
-; OPT-NEXT: block BB20_4{{$}}
-; OPT-NEXT: br_if $0, BB20_4{{$}}
+; OPT: block{{$}}
+; OPT-NEXT: block{{$}}
+; OPT-NEXT: br_if $0, 0{{$}}
+; OPT-NEXT: block{{$}}
; OPT-NOT: block
-; OPT: block BB20_3{{$}}
-; OPT: br_if $0, BB20_3{{$}}
+; OPT: br_if $0, 0{{$}}
; OPT-NOT: block
-; OPT: br_if $0, BB20_8{{$}}
-; OPT-NEXT: BB20_3:
+; OPT: br_if $0, 2{{$}}
+; OPT-NEXT: .LBB20_3:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
; OPT: return{{$}}
-; OPT-NEXT: BB20_4:
+; OPT-NEXT: .LBB20_4:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
-; OPT: block BB20_6{{$}}
+; OPT: block{{$}}
; OPT-NOT: block
-; OPT: br_if $pop9, BB20_6{{$}}
+; OPT: br_if $pop9, 0{{$}}
; OPT-NOT: block
; OPT: return{{$}}
-; OPT-NEXT: BB20_6:
+; OPT-NEXT: .LBB20_6:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
-; OPT: br_if $0, BB20_8{{$}}
+; OPT: br_if $0, 0{{$}}
; OPT-NOT: block
; OPT: return{{$}}
-; OPT-NEXT: BB20_8:
+; OPT-NEXT: .LBB20_8:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
; OPT: return{{$}}
define void @test11() {
@@ -985,53 +1028,59 @@ bb8:
}
; CHECK-LABEL: test12:
-; CHECK: BB21_1:
-; CHECK-NEXT: loop BB21_8{{$}}
+; CHECK: .LBB21_1:
+; CHECK-NEXT: loop{{$}}
; CHECK-NOT: block
-; CHECK: block BB21_7{{$}}
-; CHECK-NEXT: block BB21_6{{$}}
-; CHECK-NEXT: block BB21_4{{$}}
-; CHECK: br_if {{[^,]*}}, BB21_4{{$}}
+; CHECK: block{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK-NEXT: block{{$}}
+; CHECK: br_if {{[^,]*}}, 0{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB21_7{{$}}
+; CHECK: br_if {{[^,]*}}, 2{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB21_7{{$}}
-; CHECK-NEXT: br BB21_6{{$}}
-; CHECK-NEXT: BB21_4:
+; CHECK: br_if {{[^,]*}}, 2{{$}}
+; CHECK-NEXT: br 1{{$}}
+; CHECK-NEXT: .LBB21_4:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB21_7{{$}}
+; CHECK: br_if {{[^,]*}}, 1{{$}}
; CHECK-NOT: block
-; CHECK: br_if {{[^,]*}}, BB21_7{{$}}
-; CHECK-NEXT: BB21_6:
+; CHECK: br_if {{[^,]*}}, 1{{$}}
+; CHECK-NEXT: .LBB21_6:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NEXT: return{{$}}
-; CHECK-NEXT: BB21_7:
+; CHECK-NEXT: .LBB21_7:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NOT: block
-; CHECK: br BB21_1{{$}}
-; CHECK-NEXT: BB21_8:
+; CHECK: br 0{{$}}
+; CHECK-NEXT: .LBB21_8:
; OPT-LABEL: test12:
-; OPT: BB21_1:
-; OPT-NEXT: loop BB21_8{{$}}
+; OPT: .LBB21_1:
+; OPT-NEXT: loop{{$}}
; OPT-NOT: block
-; OPT: block BB21_7{{$}}
-; OPT-NEXT: block BB21_6{{$}}
-; OPT-NEXT: block BB21_4{{$}}
-; OPT: br_if {{[^,]*}}, BB21_4{{$}}
+; OPT: block{{$}}
+; OPT-NEXT: block{{$}}
+; OPT-NEXT: block{{$}}
+; OPT: br_if {{[^,]*}}, 0{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB21_7{{$}}
+; OPT: br_if {{[^,]*}}, 2{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB21_7{{$}}
-; OPT-NEXT: br BB21_6{{$}}
-; OPT-NEXT: BB21_4:
+; OPT: br_if {{[^,]*}}, 2{{$}}
+; OPT-NEXT: br 1{{$}}
+; OPT-NEXT: .LBB21_4:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB21_7{{$}}
+; OPT: br_if {{[^,]*}}, 1{{$}}
; OPT-NOT: block
-; OPT: br_if {{[^,]*}}, BB21_7{{$}}
-; OPT-NEXT: BB21_6:
+; OPT: br_if {{[^,]*}}, 1{{$}}
+; OPT-NEXT: .LBB21_6:
+; OPT-NEXT: end_block{{$}}
; OPT-NEXT: return{{$}}
-; OPT-NEXT: BB21_7:
+; OPT-NEXT: .LBB21_7:
+; OPT-NEXT: end_block{{$}}
; OPT-NOT: block
-; OPT: br BB21_1{{$}}
-; OPT-NEXT: BB21_8:
+; OPT: br 0{{$}}
+; OPT-NEXT: .LBB21_8:
define void @test12(i8* %arg) {
bb:
br label %bb1
@@ -1060,30 +1109,34 @@ bb7:
; optnone to disable optimizations to test this case.
; CHECK-LABEL: test13:
-; CHECK-NEXT: .local i32{{$}}
-; CHECK: block BB22_2{{$}}
-; CHECK: br_if $pop4, BB22_2{{$}}
+; CHECK-NEXT: local i32{{$}}
+; CHECK: block{{$}}
+; CHECK: br_if $pop4, 0{{$}}
; CHECK-NEXT: return{{$}}
-; CHECK-NEXT: BB22_2:
-; CHECK: block BB22_4{{$}}
-; CHECK-NEXT: br_if $0, BB22_4{{$}}
-; CHECK: BB22_4:
-; CHECK: block BB22_5{{$}}
-; CHECK: br_if $pop6, BB22_5{{$}}
-; CHECK-NEXT: BB22_5:
+; CHECK-NEXT: .LBB22_2:
+; CHECK-NEXT: end_block{{$}}
+; CHECK: block{{$}}
+; CHECK-NEXT: br_if $0, 0{{$}}
+; CHECK: .LBB22_4:
+; CHECK-NEXT: end_block{{$}}
+; CHECK: block{{$}}
+; CHECK: br_if $pop6, 0{{$}}
+; CHECK-NEXT: end_block{{$}}
; CHECK-NEXT: unreachable{{$}}
; OPT-LABEL: test13:
-; OPT-NEXT: .local i32{{$}}
-; OPT: block BB22_2{{$}}
-; OPT: br_if $pop4, BB22_2{{$}}
+; OPT-NEXT: local i32{{$}}
+; OPT: block{{$}}
+; OPT: br_if $pop4, 0{{$}}
; OPT-NEXT: return{{$}}
-; OPT-NEXT: BB22_2:
-; OPT: block BB22_4{{$}}
-; OPT-NEXT: br_if $0, BB22_4{{$}}
-; OPT: BB22_4:
-; OPT: block BB22_5{{$}}
-; OPT: br_if $pop6, BB22_5{{$}}
-; OPT-NEXT: BB22_5:
+; OPT-NEXT: .LBB22_2:
+; OPT-NEXT: end_block{{$}}
+; OPT: block{{$}}
+; OPT-NEXT: br_if $0, 0{{$}}
+; OPT: .LBB22_4:
+; OPT-NEXT: end_block{{$}}
+; OPT: block{{$}}
+; OPT: br_if $pop6, 0{{$}}
+; OPT-NEXT: end_block{{$}}
; OPT-NEXT: unreachable{{$}}
define void @test13() noinline optnone {
bb:
@@ -1100,3 +1153,65 @@ bb4:
bb5:
ret void
}
+
+; Test a case with a single-block loop that has another loop
+; as a successor. The end_loop for the first loop should go
+; before the loop for the second.
+
+; CHECK-LABEL: test14:
+; CHECK-NEXT: local i32{{$}}
+; CHECK-NEXT: i32.const $0=, 0{{$}}
+; CHECK-NEXT: .LBB23_1:{{$}}
+; CHECK-NEXT: loop{{$}}
+; CHECK-NEXT: br_if $0, 0{{$}}
+; CHECK-NEXT: .LBB23_2:{{$}}
+; CHECK-NEXT: end_loop{{$}}
+; CHECK-NEXT: loop{{$}}
+; CHECK-NEXT: br_if $0, 0{{$}}
+; CHECK-NEXT: end_loop{{$}}
+; CHECK-NEXT: return{{$}}
+define void @test14() {
+bb:
+ br label %bb1
+
+bb1:
+ %tmp = bitcast i1 undef to i1
+ br i1 %tmp, label %bb3, label %bb1
+
+bb3:
+ br label %bb4
+
+bb4:
+ br i1 undef, label %bb7, label %bb48
+
+bb7:
+ br i1 undef, label %bb12, label %bb12
+
+bb12:
+ br i1 undef, label %bb17, label %bb17
+
+bb17:
+ br i1 undef, label %bb22, label %bb22
+
+bb22:
+ br i1 undef, label %bb27, label %bb27
+
+bb27:
+ br i1 undef, label %bb30, label %bb30
+
+bb30:
+ br i1 undef, label %bb35, label %bb35
+
+bb35:
+ br i1 undef, label %bb38, label %bb38
+
+bb38:
+ br i1 undef, label %bb48, label %bb48
+
+bb48:
+ %tmp49 = bitcast i1 undef to i1
+ br i1 %tmp49, label %bb3, label %bb50
+
+bb50:
+ ret void
+}
diff --git a/test/CodeGen/WebAssembly/comparisons_f32.ll b/test/CodeGen/WebAssembly/comparisons_f32.ll
index 6df37ea1c6dd..2d324f7f2083 100644
--- a/test/CodeGen/WebAssembly/comparisons_f32.ll
+++ b/test/CodeGen/WebAssembly/comparisons_f32.ll
@@ -3,7 +3,7 @@
; Test that basic 32-bit floating-point comparison operations assemble as
; expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: ord_f32:
diff --git a/test/CodeGen/WebAssembly/comparisons_f64.ll b/test/CodeGen/WebAssembly/comparisons_f64.ll
index f5acc64b667c..22fbc1ae4c1f 100644
--- a/test/CodeGen/WebAssembly/comparisons_f64.ll
+++ b/test/CodeGen/WebAssembly/comparisons_f64.ll
@@ -3,7 +3,7 @@
; Test that basic 64-bit floating-point comparison operations assemble as
; expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: ord_f64:
diff --git a/test/CodeGen/WebAssembly/comparisons_i32.ll b/test/CodeGen/WebAssembly/comparisons_i32.ll
index b724cec1cc63..db81ef36e270 100644
--- a/test/CodeGen/WebAssembly/comparisons_i32.ll
+++ b/test/CodeGen/WebAssembly/comparisons_i32.ll
@@ -2,7 +2,7 @@
; Test that basic 32-bit integer comparison operations assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: eq_i32:
diff --git a/test/CodeGen/WebAssembly/comparisons_i64.ll b/test/CodeGen/WebAssembly/comparisons_i64.ll
index 898591999bec..19e5cf8603bf 100644
--- a/test/CodeGen/WebAssembly/comparisons_i64.ll
+++ b/test/CodeGen/WebAssembly/comparisons_i64.ll
@@ -2,7 +2,7 @@
; Test that basic 64-bit integer comparison operations assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: eq_i64:
diff --git a/test/CodeGen/WebAssembly/conv.ll b/test/CodeGen/WebAssembly/conv.ll
index e1acaca2c9ec..1a4bd72d72d6 100644
--- a/test/CodeGen/WebAssembly/conv.ll
+++ b/test/CodeGen/WebAssembly/conv.ll
@@ -2,7 +2,7 @@
; Test that basic conversion operations assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: i32_wrap_i64:
diff --git a/test/CodeGen/WebAssembly/copysign-casts.ll b/test/CodeGen/WebAssembly/copysign-casts.ll
index 760e49133018..f8e50d043ca9 100644
--- a/test/CodeGen/WebAssembly/copysign-casts.ll
+++ b/test/CodeGen/WebAssembly/copysign-casts.ll
@@ -3,7 +3,7 @@
; DAGCombiner oddly folds casts into the rhs of copysign. Test that they get
; unfolded.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare double @copysign(double, double) nounwind readnone
diff --git a/test/CodeGen/WebAssembly/cpus.ll b/test/CodeGen/WebAssembly/cpus.ll
index 2b77c5f475c8..51856fcd12c2 100644
--- a/test/CodeGen/WebAssembly/cpus.ll
+++ b/test/CodeGen/WebAssembly/cpus.ll
@@ -1,13 +1,13 @@
; This tests that llc accepts all valid WebAssembly CPUs.
-; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=mvp 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=mvp 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=bleeding-edge 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=bleeding-edge 2>&1 | FileCheck %s
-; RUN: llc < %s -mtriple=wasm32-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
-; RUN: llc < %s -mtriple=wasm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
+; RUN: llc < %s -asm-verbose=false -mtriple=wasm32-unknown-unknown -mcpu=mvp 2>&1 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=wasm64-unknown-unknown -mcpu=mvp 2>&1 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=wasm32-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=wasm64-unknown-unknown -mcpu=generic 2>&1 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=wasm32-unknown-unknown -mcpu=bleeding-edge 2>&1 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=wasm64-unknown-unknown -mcpu=bleeding-edge 2>&1 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -mtriple=wasm32-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
+; RUN: llc < %s -asm-verbose=false -mtriple=wasm64-unknown-unknown -mcpu=invalidcpu 2>&1 | FileCheck %s --check-prefix=INVALID
; CHECK-NOT: {{.*}} is not a recognized processor for this target
; INVALID: {{.*}} is not a recognized processor for this target
diff --git a/test/CodeGen/WebAssembly/dead-vreg.ll b/test/CodeGen/WebAssembly/dead-vreg.ll
index b03e1569fde6..29a41990961d 100644
--- a/test/CodeGen/WebAssembly/dead-vreg.ll
+++ b/test/CodeGen/WebAssembly/dead-vreg.ll
@@ -2,7 +2,7 @@
; Check that unused vregs aren't assigned registers.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
define void @foo(i32* nocapture %a, i32 %w, i32 %h) {
diff --git a/test/CodeGen/WebAssembly/f32.ll b/test/CodeGen/WebAssembly/f32.ll
index 777010064cdb..c32a7c3dc7d9 100644
--- a/test/CodeGen/WebAssembly/f32.ll
+++ b/test/CodeGen/WebAssembly/f32.ll
@@ -2,7 +2,7 @@
; Test that basic 32-bit floating-point operations assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare float @llvm.fabs.f32(float)
@@ -146,7 +146,7 @@ define float @fmax32(float %x) {
}
; CHECK-LABEL: fma32:
-; CHECK: {{^}} f32.call $push0=, fmaf, $0, $1, $2{{$}}
+; CHECK: {{^}} f32.call $push0=, fmaf@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop0{{$}}
define float @fma32(float %a, float %b, float %c) {
%d = call float @llvm.fma.f32(float %a, float %b, float %c)
diff --git a/test/CodeGen/WebAssembly/f64.ll b/test/CodeGen/WebAssembly/f64.ll
index 302ee79389b3..92284999cbf7 100644
--- a/test/CodeGen/WebAssembly/f64.ll
+++ b/test/CodeGen/WebAssembly/f64.ll
@@ -2,7 +2,7 @@
; Test that basic 64-bit floating-point operations assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare double @llvm.fabs.f64(double)
@@ -146,7 +146,7 @@ define double @fmax64(double %x) {
}
; CHECK-LABEL: fma64:
-; CHECK: {{^}} f64.call $push0=, fma, $0, $1, $2{{$}}
+; CHECK: {{^}} f64.call $push0=, fma@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop0{{$}}
define double @fma64(double %a, double %b, double %c) {
%d = call double @llvm.fma.f64(double %a, double %b, double %c)
diff --git a/test/CodeGen/WebAssembly/fast-isel.ll b/test/CodeGen/WebAssembly/fast-isel.ll
index 07d78c1415e5..7f9f20fa7083 100644
--- a/test/CodeGen/WebAssembly/fast-isel.ll
+++ b/test/CodeGen/WebAssembly/fast-isel.ll
@@ -2,7 +2,7 @@
; RUN: -fast-isel -fast-isel-abort=1 -verify-machineinstrs \
; RUN: | FileCheck %s
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; This tests very minimal fast-isel functionality.
diff --git a/test/CodeGen/WebAssembly/frem.ll b/test/CodeGen/WebAssembly/frem.ll
index 688370313b48..b8c80fbe6997 100644
--- a/test/CodeGen/WebAssembly/frem.ll
+++ b/test/CodeGen/WebAssembly/frem.ll
@@ -2,13 +2,13 @@
; Test that the frem instruction works.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: frem32:
; CHECK-NEXT: .param f32, f32{{$}}
; CHECK-NEXT: .result f32{{$}}
-; CHECK-NEXT: {{^}} f32.call $push0=, fmodf, $0, $1{{$}}
+; CHECK-NEXT: {{^}} f32.call $push0=, fmodf@FUNCTION, $0, $1{{$}}
; CHECK-NEXT: return $pop0{{$}}
define float @frem32(float %x, float %y) {
%a = frem float %x, %y
@@ -18,7 +18,7 @@ define float @frem32(float %x, float %y) {
; CHECK-LABEL: frem64:
; CHECK-NEXT: .param f64, f64{{$}}
; CHECK-NEXT: .result f64{{$}}
-; CHECK-NEXT: {{^}} f64.call $push0=, fmod, $0, $1{{$}}
+; CHECK-NEXT: {{^}} f64.call $push0=, fmod@FUNCTION, $0, $1{{$}}
; CHECK-NEXT: return $pop0{{$}}
define double @frem64(double %x, double %y) {
%a = frem double %x, %y
diff --git a/test/CodeGen/WebAssembly/func.ll b/test/CodeGen/WebAssembly/func.ll
index 6f42dc744ac7..9857dadee414 100644
--- a/test/CodeGen/WebAssembly/func.ll
+++ b/test/CodeGen/WebAssembly/func.ll
@@ -2,11 +2,12 @@
; Test that basic functions assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: f0:
; CHECK: return{{$}}
+; CHECK: .endfunc{{$}}
; CHECK: .size f0,
define void @f0() {
ret void
diff --git a/test/CodeGen/WebAssembly/global.ll b/test/CodeGen/WebAssembly/global.ll
index 5f149ed067c8..85fe5c896565 100644
--- a/test/CodeGen/WebAssembly/global.ll
+++ b/test/CodeGen/WebAssembly/global.ll
@@ -2,7 +2,7 @@
; Test that globals assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-NOT: llvm.used
@@ -21,7 +21,7 @@ define i32 @foo() {
; CHECK-LABEL: call_memcpy:
; CHECK-NEXT: .param i32, i32, i32{{$}}
; CHECK-NEXT: .result i32{{$}}
-; CHECK-NEXT: call memcpy, $0, $1, $2{{$}}
+; CHECK-NEXT: call memcpy@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return $0{{$}}
declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture readonly, i32, i32, i1)
define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
@@ -29,15 +29,15 @@ define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
ret i8* %p
}
-; CHECK: .type g,@object
+; CHECK: .type .Lg,@object
; CHECK: .align 2{{$}}
-; CHECK-NEXT: g:
+; CHECK-NEXT: .Lg:
; CHECK-NEXT: .int32 1337{{$}}
-; CHECK-NEXT: .size g, 4{{$}}
+; CHECK-NEXT: .size .Lg, 4{{$}}
@g = private global i32 1337
; CHECK-LABEL: ud:
-; CHECK-NEXT: .zero 4{{$}}
+; CHECK-NEXT: .skip 4{{$}}
; CHECK-NEXT: .size ud, 4{{$}}
@ud = internal global i32 undef
@@ -73,7 +73,7 @@ define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
; CHECK: .type ud64,@object
; CHECK: .align 3{{$}}
; CHECK-NEXT: ud64:
-; CHECK-NEXT: .zero 8{{$}}
+; CHECK-NEXT: .skip 8{{$}}
; CHECK-NEXT: .size ud64, 8{{$}}
@ud64 = internal global i64 undef
@@ -102,7 +102,7 @@ define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
; CHECK: .type f32ud,@object
; CHECK: .align 2{{$}}
; CHECK-NEXT: f32ud:
-; CHECK-NEXT: .zero 4{{$}}
+; CHECK-NEXT: .skip 4{{$}}
; CHECK-NEXT: .size f32ud, 4{{$}}
@f32ud = internal global float undef
@@ -131,7 +131,7 @@ define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
; CHECK: .type f64ud,@object
; CHECK: .align 3{{$}}
; CHECK-NEXT: f64ud:
-; CHECK-NEXT: .zero 8{{$}}
+; CHECK-NEXT: .skip 8{{$}}
; CHECK-NEXT: .size f64ud, 8{{$}}
@f64ud = internal global double undef
@@ -172,6 +172,6 @@ define i8* @call_memcpy(i8* %p, i8* nocapture readonly %q, i32 %n) {
; CHECK: .globl rom{{$}}
; CHECK: .align 4{{$}}
; CHECK: rom:
-; CHECK: .zero 512{{$}}
+; CHECK: .skip 512{{$}}
; CHECK: .size rom, 512{{$}}
@rom = constant [128 x i32] zeroinitializer, align 16
diff --git a/test/CodeGen/WebAssembly/globl.ll b/test/CodeGen/WebAssembly/globl.ll
index a5dc028c1db4..91d3ade4666b 100644
--- a/test/CodeGen/WebAssembly/globl.ll
+++ b/test/CodeGen/WebAssembly/globl.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -asm-verbose=false | FileCheck %s
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK: .globl foo
diff --git a/test/CodeGen/WebAssembly/i32.ll b/test/CodeGen/WebAssembly/i32.ll
index ab29b0472bf2..10d97ad9e6d1 100644
--- a/test/CodeGen/WebAssembly/i32.ll
+++ b/test/CodeGen/WebAssembly/i32.ll
@@ -2,7 +2,7 @@
; Test that basic 32-bit integer operations assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare i32 @llvm.ctlz.i32(i32, i1)
diff --git a/test/CodeGen/WebAssembly/i64.ll b/test/CodeGen/WebAssembly/i64.ll
index 769f74266754..6dd46a91fad0 100644
--- a/test/CodeGen/WebAssembly/i64.ll
+++ b/test/CodeGen/WebAssembly/i64.ll
@@ -2,7 +2,7 @@
; Test that basic 64-bit integer operations assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare i64 @llvm.ctlz.i64(i64, i1)
diff --git a/test/CodeGen/WebAssembly/ident.ll b/test/CodeGen/WebAssembly/ident.ll
index 1e0dc2aa6725..49c188ec2578 100644
--- a/test/CodeGen/WebAssembly/ident.ll
+++ b/test/CodeGen/WebAssembly/ident.ll
@@ -2,7 +2,7 @@
; Test llvm.ident.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK: .ident "hello world"
diff --git a/test/CodeGen/WebAssembly/immediates.ll b/test/CodeGen/WebAssembly/immediates.ll
index abab11f2254e..735b386b4fc0 100644
--- a/test/CodeGen/WebAssembly/immediates.ll
+++ b/test/CodeGen/WebAssembly/immediates.ll
@@ -2,7 +2,7 @@
; Test that basic immediates assemble as expected.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: zero_i32:
diff --git a/test/CodeGen/WebAssembly/inline-asm.ll b/test/CodeGen/WebAssembly/inline-asm.ll
index fc066c4b812f..f35042e64f86 100644
--- a/test/CodeGen/WebAssembly/inline-asm.ll
+++ b/test/CodeGen/WebAssembly/inline-asm.ll
@@ -1,8 +1,9 @@
-; RUN: llc < %s -asm-verbose=false | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -no-integrated-as | FileCheck %s
-; Test basic inline assembly.
+; Test basic inline assembly. Pass -no-integrated-as since these aren't
+; actually valid assembly syntax.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: foo:
diff --git a/test/CodeGen/WebAssembly/legalize.ll b/test/CodeGen/WebAssembly/legalize.ll
index e780b2ee36ca..5feb2e8c8c75 100644
--- a/test/CodeGen/WebAssembly/legalize.ll
+++ b/test/CodeGen/WebAssembly/legalize.ll
@@ -2,7 +2,7 @@
; Test various types and operators that need to be legalized.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: shl_i3:
diff --git a/test/CodeGen/WebAssembly/load-ext.ll b/test/CodeGen/WebAssembly/load-ext.ll
index 0ffcd38a8666..d52df3361a38 100644
--- a/test/CodeGen/WebAssembly/load-ext.ll
+++ b/test/CodeGen/WebAssembly/load-ext.ll
@@ -2,7 +2,7 @@
; Test that extending loads are assembled properly.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: sext_i8_i32:
diff --git a/test/CodeGen/WebAssembly/load-store-i1.ll b/test/CodeGen/WebAssembly/load-store-i1.ll
index 37b514729479..47e2e8cb254f 100644
--- a/test/CodeGen/WebAssembly/load-store-i1.ll
+++ b/test/CodeGen/WebAssembly/load-store-i1.ll
@@ -2,7 +2,7 @@
; Test that i1 extending loads and truncating stores are assembled properly.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: load_u_i1_i32:
diff --git a/test/CodeGen/WebAssembly/load.ll b/test/CodeGen/WebAssembly/load.ll
index aa8ae689e0d1..243fa9d50ad6 100644
--- a/test/CodeGen/WebAssembly/load.ll
+++ b/test/CodeGen/WebAssembly/load.ll
@@ -2,7 +2,7 @@
; Test that basic loads are assembled properly.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: ldi32:
diff --git a/test/CodeGen/WebAssembly/loop-idiom.ll b/test/CodeGen/WebAssembly/loop-idiom.ll
index 2906df20a229..2a233c406900 100644
--- a/test/CodeGen/WebAssembly/loop-idiom.ll
+++ b/test/CodeGen/WebAssembly/loop-idiom.ll
@@ -1,6 +1,6 @@
; RUN: opt -loop-idiom -S < %s -march=wasm32 | FileCheck %s
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
diff --git a/test/CodeGen/WebAssembly/memory-addr32.ll b/test/CodeGen/WebAssembly/memory-addr32.ll
index e2dd556bddc0..e6c15633fd63 100644
--- a/test/CodeGen/WebAssembly/memory-addr32.ll
+++ b/test/CodeGen/WebAssembly/memory-addr32.ll
@@ -2,7 +2,7 @@
; Test that basic memory operations assemble as expected with 32-bit addresses.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare i32 @llvm.wasm.memory.size.i32() nounwind readonly
diff --git a/test/CodeGen/WebAssembly/memory-addr64.ll b/test/CodeGen/WebAssembly/memory-addr64.ll
index 5de1f2b11cfd..d504c277f306 100644
--- a/test/CodeGen/WebAssembly/memory-addr64.ll
+++ b/test/CodeGen/WebAssembly/memory-addr64.ll
@@ -2,7 +2,7 @@
; Test that basic memory operations assemble as expected with 64-bit addresses.
-target datalayout = "e-p:64:64-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128"
target triple = "wasm64-unknown-unknown"
declare i64 @llvm.wasm.memory.size.i64() nounwind readonly
diff --git a/test/CodeGen/WebAssembly/offset-folding.ll b/test/CodeGen/WebAssembly/offset-folding.ll
index 2b4e8a90b0f0..159a25eba358 100644
--- a/test/CodeGen/WebAssembly/offset-folding.ll
+++ b/test/CodeGen/WebAssembly/offset-folding.ll
@@ -2,7 +2,7 @@
; Test that constant offsets can be folded into global addresses.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; FIXME: make this 'external' and make sure it still works. WebAssembly
diff --git a/test/CodeGen/WebAssembly/offset.ll b/test/CodeGen/WebAssembly/offset.ll
index 901801d7dbbe..828f40206a96 100644
--- a/test/CodeGen/WebAssembly/offset.ll
+++ b/test/CodeGen/WebAssembly/offset.ll
@@ -2,7 +2,7 @@
; Test constant load and store address offsets.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; With an nuw add, we can fold an offset.
diff --git a/test/CodeGen/WebAssembly/phi.ll b/test/CodeGen/WebAssembly/phi.ll
index bae8a7c9e3b8..00e5859b75cf 100644
--- a/test/CodeGen/WebAssembly/phi.ll
+++ b/test/CodeGen/WebAssembly/phi.ll
@@ -2,7 +2,7 @@
; Test that phis are lowered.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; Basic phi triangle.
@@ -25,7 +25,7 @@ done:
; Swap phis.
; CHECK-LABEL: test1:
-; CHECK: BB1_1:
+; CHECK: .LBB1_1:
; CHECK: copy_local $[[NUM0:[0-9]+]]=, $[[NUM1:[0-9]+]]{{$}}
; CHECK: copy_local $[[NUM1]]=, $[[NUM2:[0-9]+]]{{$}}
; CHECK: copy_local $[[NUM2]]=, $[[NUM0]]{{$}}
diff --git a/test/CodeGen/WebAssembly/reg-stackify.ll b/test/CodeGen/WebAssembly/reg-stackify.ll
index 1c1b1e193f7a..f8cae7f92404 100644
--- a/test/CodeGen/WebAssembly/reg-stackify.ll
+++ b/test/CodeGen/WebAssembly/reg-stackify.ll
@@ -2,7 +2,7 @@
; Test the register stackifier pass.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; No because of pointer aliasing.
@@ -55,7 +55,7 @@ define i32 @yes1(i32* %q) {
; CHECK-NEXT: .local i32, i32{{$}}
; CHECK-NEXT: i32.const $5=, 2{{$}}
; CHECK-NEXT: i32.const $4=, 1{{$}}
-; CHECK-NEXT: block BB4_2{{$}}
+; CHECK-NEXT: block{{$}}
; CHECK-NEXT: i32.lt_s $push0=, $0, $4{{$}}
; CHECK-NEXT: i32.lt_s $push1=, $1, $5{{$}}
; CHECK-NEXT: i32.xor $push4=, $pop0, $pop1{{$}}
@@ -64,10 +64,11 @@ define i32 @yes1(i32* %q) {
; CHECK-NEXT: i32.xor $push5=, $pop2, $pop3{{$}}
; CHECK-NEXT: i32.xor $push6=, $pop4, $pop5{{$}}
; CHECK-NEXT: i32.ne $push7=, $pop6, $4{{$}}
-; CHECK-NEXT: br_if $pop7, BB4_2{{$}}
+; CHECK-NEXT: br_if $pop7, 0{{$}}
; CHECK-NEXT: i32.const $push8=, 0{{$}}
; CHECK-NEXT: return $pop8{{$}}
-; CHECK-NEXT: BB4_2:
+; CHECK-NEXT: .LBB4_2:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NEXT: return $4{{$}}
define i32 @stack_uses(i32 %x, i32 %y, i32 %z, i32 %w) {
entry:
@@ -89,16 +90,17 @@ false:
; be trivially stackified.
; CHECK-LABEL: multiple_uses:
-; CHECK-NEXT: .param i32, i32, i32{{$}}
-; CHECK-NEXT: .local i32{{$}}
+; CHECK-NEXT: .param i32, i32, i32{{$}}
+; CHECK-NEXT: .local i32{{$}}
; CHECK-NEXT: i32.load $3=, 0($2){{$}}
-; CHECK-NEXT: block BB5_3{{$}}
+; CHECK-NEXT: block{{$}}
; CHECK-NEXT: i32.ge_u $push0=, $3, $1{{$}}
-; CHECK-NEXT: br_if $pop0, BB5_3{{$}}
+; CHECK-NEXT: br_if $pop0, 0{{$}}
; CHECK-NEXT: i32.lt_u $push1=, $3, $0{{$}}
-; CHECK-NEXT: br_if $pop1, BB5_3{{$}}
+; CHECK-NEXT: br_if $pop1, 0{{$}}
; CHECK-NEXT: i32.store $discard=, 0($2), $3{{$}}
-; CHECK-NEXT: BB5_3:
+; CHECK-NEXT: .LBB5_3:
+; CHECK-NEXT: end_block{{$}}
; CHECK-NEXT: return{{$}}
define void @multiple_uses(i32* %arg0, i32* %arg1, i32* %arg2) nounwind {
bb:
diff --git a/test/CodeGen/WebAssembly/return-int32.ll b/test/CodeGen/WebAssembly/return-int32.ll
index 663cef4e459d..a93a0f6c438b 100644
--- a/test/CodeGen/WebAssembly/return-int32.ll
+++ b/test/CodeGen/WebAssembly/return-int32.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -asm-verbose=false | FileCheck %s
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: return_i32:
diff --git a/test/CodeGen/WebAssembly/return-void.ll b/test/CodeGen/WebAssembly/return-void.ll
index 4933bfcb87e6..65ff5f325719 100644
--- a/test/CodeGen/WebAssembly/return-void.ll
+++ b/test/CodeGen/WebAssembly/return-void.ll
@@ -1,6 +1,6 @@
; RUN: llc < %s -asm-verbose=false | FileCheck %s
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: return_void:
diff --git a/test/CodeGen/WebAssembly/returned.ll b/test/CodeGen/WebAssembly/returned.ll
index e208e198c73d..9c892bb3ecea 100644
--- a/test/CodeGen/WebAssembly/returned.ll
+++ b/test/CodeGen/WebAssembly/returned.ll
@@ -2,14 +2,14 @@
; Test that the "returned" attribute is optimized effectively.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: _Z3foov:
; CHECK-NEXT: .result i32{{$}}
; CHECK-NEXT: i32.const $push0=, 1{{$}}
-; CHECK-NEXT: {{^}} i32.call $push1=, _Znwm, $pop0{{$}}
-; CHECK-NEXT: {{^}} i32.call $push2=, _ZN5AppleC1Ev, $pop1{{$}}
+; CHECK-NEXT: {{^}} i32.call $push1=, _Znwm@FUNCTION, $pop0{{$}}
+; CHECK-NEXT: {{^}} i32.call $push2=, _ZN5AppleC1Ev@FUNCTION, $pop1{{$}}
; CHECK-NEXT: return $pop2{{$}}
%class.Apple = type { i8 }
declare noalias i8* @_Znwm(i32)
@@ -25,7 +25,7 @@ entry:
; CHECK-LABEL: _Z3barPvS_l:
; CHECK-NEXT: .param i32, i32, i32{{$}}
; CHECK-NEXT: .result i32{{$}}
-; CHECK-NEXT: {{^}} i32.call $push0=, memcpy, $0, $1, $2{{$}}
+; CHECK-NEXT: {{^}} i32.call $push0=, memcpy@FUNCTION, $0, $1, $2{{$}}
; CHECK-NEXT: return $pop0{{$}}
declare i8* @memcpy(i8* returned, i8*, i32)
define i8* @_Z3barPvS_l(i8* %p, i8* %s, i32 %n) {
@@ -38,7 +38,7 @@ entry:
; CHECK-LABEL: test_constant_arg:
; CHECK-NEXT: i32.const $push0=, global{{$}}
-; CHECK-NEXT: {{^}} i32.call $discard=, returns_arg, $pop0{{$}}
+; CHECK-NEXT: {{^}} i32.call $discard=, returns_arg@FUNCTION, $pop0{{$}}
; CHECK-NEXT: return{{$}}
@global = external global i32
@addr = global i32* @global
diff --git a/test/CodeGen/WebAssembly/select.ll b/test/CodeGen/WebAssembly/select.ll
index 1b1d7aed7154..416f58cac0d3 100644
--- a/test/CodeGen/WebAssembly/select.ll
+++ b/test/CodeGen/WebAssembly/select.ll
@@ -3,7 +3,7 @@
; Test that wasm select instruction is selected from LLVM select instruction.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: select_i32_bool:
diff --git a/test/CodeGen/WebAssembly/signext-zeroext.ll b/test/CodeGen/WebAssembly/signext-zeroext.ll
index 40d49af0ccc7..f6f56363c1af 100644
--- a/test/CodeGen/WebAssembly/signext-zeroext.ll
+++ b/test/CodeGen/WebAssembly/signext-zeroext.ll
@@ -2,7 +2,7 @@
; Test zeroext and signext ABI keywords
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: z2s_func:
@@ -32,7 +32,7 @@ define zeroext i8 @s2z_func(i8 signext %t) {
; CHECK-NEXT: .result i32{{$}}
; CHECK-NEXT: i32.const $push[[NUM0:[0-9]+]]=, 255{{$}}
; CHECK-NEXT: i32.and $push[[NUM1:[0-9]+]]=, $0, $pop[[NUM0]]{{$}}
-; CHECK-NEXT: call $push[[NUM2:[0-9]+]]=, z2s_func, $pop[[NUM1]]{{$}}
+; CHECK-NEXT: call $push[[NUM2:[0-9]+]]=, z2s_func@FUNCTION, $pop[[NUM1]]{{$}}
; CHECK-NEXT: return $pop[[NUM2]]{{$}}
define i32 @z2s_call(i32 %t) {
%s = trunc i32 %t to i8
@@ -48,7 +48,7 @@ define i32 @z2s_call(i32 %t) {
; CHECK-NEXT: i32.const $[[NUM0:[0-9]+]]=, 24{{$}}
; CHECK-NEXT: i32.shl $push[[NUM1:[0-9]+]]=, $0, $[[NUM0]]{{$}}
; CHECK-NEXT: i32.shr_s $push[[NUM2:[0-9]+]]=, $pop[[NUM1]], $[[NUM0]]{{$}}
-; CHECK-NEXT: call $push[[NUM3:[0-9]]]=, s2z_func, $pop[[NUM2]]{{$}}
+; CHECK-NEXT: call $push[[NUM3:[0-9]]]=, s2z_func@FUNCTION, $pop[[NUM2]]{{$}}
; CHECK-NEXT: i32.shl $push[[NUM4:[0-9]+]]=, $pop[[NUM3]], $[[NUM0]]{{$}}
; CHECK-NEXT: i32.shr_s $push[[NUM5:[0-9]+]]=, $pop[[NUM4]], $[[NUM0]]{{$}}
; CHECK-NEXT: return $pop[[NUM5]]{{$}}
diff --git a/test/CodeGen/WebAssembly/store-results.ll b/test/CodeGen/WebAssembly/store-results.ll
index 73479e544db9..ae74133fe386 100644
--- a/test/CodeGen/WebAssembly/store-results.ll
+++ b/test/CodeGen/WebAssembly/store-results.ll
@@ -3,7 +3,7 @@
; Test that the wasm-store-results pass makes users of stored values use the
; result of store expressions to reduce get_local/set_local traffic.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: single_block:
diff --git a/test/CodeGen/WebAssembly/store-trunc.ll b/test/CodeGen/WebAssembly/store-trunc.ll
index c12b716dfd59..d069af1da7bc 100644
--- a/test/CodeGen/WebAssembly/store-trunc.ll
+++ b/test/CodeGen/WebAssembly/store-trunc.ll
@@ -2,7 +2,7 @@
; Test that truncating stores are assembled properly.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: trunc_i8_i32:
diff --git a/test/CodeGen/WebAssembly/store.ll b/test/CodeGen/WebAssembly/store.ll
index 442caedef3a7..dc93ebbbadb4 100644
--- a/test/CodeGen/WebAssembly/store.ll
+++ b/test/CodeGen/WebAssembly/store.ll
@@ -2,7 +2,7 @@
; Test that basic stores are assembled properly.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: sti32:
diff --git a/test/CodeGen/WebAssembly/switch.ll b/test/CodeGen/WebAssembly/switch.ll
index 7f6f6efff7d6..3df5e7f9cf6f 100644
--- a/test/CodeGen/WebAssembly/switch.ll
+++ b/test/CodeGen/WebAssembly/switch.ll
@@ -3,7 +3,7 @@
; Test switch instructions. Block placement is disabled because it reorders
; the blocks in a way that isn't interesting here.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare void @foo0()
@@ -14,27 +14,27 @@ declare void @foo4()
declare void @foo5()
; CHECK-LABEL: bar32:
-; CHECK: block BB0_8{{$}}
-; CHECK: block BB0_7{{$}}
-; CHECK: block BB0_6{{$}}
-; CHECK: block BB0_5{{$}}
-; CHECK: block BB0_4{{$}}
-; CHECK: block BB0_3{{$}}
-; CHECK: block BB0_2{{$}}
-; CHECK: tableswitch {{[^,]*}}, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_2, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_3, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_4, BB0_5, BB0_6, BB0_7{{$}}
-; CHECK: BB0_2:
-; CHECK: call foo0
-; CHECK: BB0_3:
-; CHECK: call foo1
-; CHECK: BB0_4:
-; CHECK: call foo2
-; CHECK: BB0_5:
-; CHECK: call foo3
-; CHECK: BB0_6:
-; CHECK: call foo4
-; CHECK: BB0_7:
-; CHECK: call foo5
-; CHECK: BB0_8:
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: tableswitch {{[^,]*}}, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5{{$}}
+; CHECK: .LBB0_2:
+; CHECK: call foo0@FUNCTION{{$}}
+; CHECK: .LBB0_3:
+; CHECK: call foo1@FUNCTION{{$}}
+; CHECK: .LBB0_4:
+; CHECK: call foo2@FUNCTION{{$}}
+; CHECK: .LBB0_5:
+; CHECK: call foo3@FUNCTION{{$}}
+; CHECK: .LBB0_6:
+; CHECK: call foo4@FUNCTION{{$}}
+; CHECK: .LBB0_7:
+; CHECK: call foo5@FUNCTION{{$}}
+; CHECK: .LBB0_8:
; CHECK: return{{$}}
define void @bar32(i32 %n) {
entry:
@@ -94,27 +94,27 @@ sw.epilog: ; preds = %entry, %sw.bb.5, %s
}
; CHECK-LABEL: bar64:
-; CHECK: block BB1_8{{$}}
-; CHECK: block BB1_7{{$}}
-; CHECK: block BB1_6{{$}}
-; CHECK: block BB1_5{{$}}
-; CHECK: block BB1_4{{$}}
-; CHECK: block BB1_3{{$}}
-; CHECK: block BB1_2{{$}}
-; CHECK: tableswitch {{[^,]*}}, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_2, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_3, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_4, BB1_5, BB1_6, BB1_7{{$}}
-; CHECK: BB1_2:
-; CHECK: call foo0
-; CHECK: BB1_3:
-; CHECK: call foo1
-; CHECK: BB1_4:
-; CHECK: call foo2
-; CHECK: BB1_5:
-; CHECK: call foo3
-; CHECK: BB1_6:
-; CHECK: call foo4
-; CHECK: BB1_7:
-; CHECK: call foo5
-; CHECK: BB1_8:
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: block{{$}}
+; CHECK: tableswitch {{[^,]*}}, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 4, 5{{$}}
+; CHECK: .LBB1_2:
+; CHECK: call foo0@FUNCTION{{$}}
+; CHECK: .LBB1_3:
+; CHECK: call foo1@FUNCTION{{$}}
+; CHECK: .LBB1_4:
+; CHECK: call foo2@FUNCTION{{$}}
+; CHECK: .LBB1_5:
+; CHECK: call foo3@FUNCTION{{$}}
+; CHECK: .LBB1_6:
+; CHECK: call foo4@FUNCTION{{$}}
+; CHECK: .LBB1_7:
+; CHECK: call foo5@FUNCTION{{$}}
+; CHECK: .LBB1_8:
; CHECK: return{{$}}
define void @bar64(i64 %n) {
entry:
diff --git a/test/CodeGen/WebAssembly/unreachable.ll b/test/CodeGen/WebAssembly/unreachable.ll
index 414767e5c35d..7b23bf3cecfb 100644
--- a/test/CodeGen/WebAssembly/unreachable.ll
+++ b/test/CodeGen/WebAssembly/unreachable.ll
@@ -4,7 +4,7 @@
; Test that LLVM unreachable instruction and trap intrinsic are lowered to
; wasm unreachable
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
declare void @llvm.trap()
@@ -12,7 +12,7 @@ declare void @llvm.debugtrap()
declare void @abort()
; CHECK-LABEL: f1:
-; CHECK: call abort
+; CHECK: call abort@FUNCTION{{$}}
; CHECK: unreachable
define i32 @f1() {
call void @abort()
diff --git a/test/CodeGen/WebAssembly/unused-argument.ll b/test/CodeGen/WebAssembly/unused-argument.ll
index e7851b216cb4..00dea769ee86 100644
--- a/test/CodeGen/WebAssembly/unused-argument.ll
+++ b/test/CodeGen/WebAssembly/unused-argument.ll
@@ -2,7 +2,7 @@
; Make sure that argument offsets are correct even if some arguments are unused.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: unused_first:
@@ -22,7 +22,7 @@ define i32 @unused_second(i32 %x, i32 %y) {
}
; CHECK-LABEL: call_something:
-; CHECK-NEXT: {{^}} i32.call $discard=, return_something{{$}}
+; CHECK-NEXT: {{^}} i32.call $discard=, return_something@FUNCTION{{$}}
; CHECK-NEXT: return{{$}}
declare i32 @return_something()
define void @call_something() {
diff --git a/test/CodeGen/WebAssembly/userstack.ll b/test/CodeGen/WebAssembly/userstack.ll
index 6e01e36cf9fa..cc50192b66db 100644
--- a/test/CodeGen/WebAssembly/userstack.ll
+++ b/test/CodeGen/WebAssembly/userstack.ll
@@ -2,7 +2,7 @@
; RUN: llc < %s -asm-verbose=false -fast-isel | FileCheck %s
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; CHECK-LABEL: alloca32:
@@ -72,6 +72,27 @@ define void @allocarray() {
ret void
}
+define void @allocarray_inbounds() {
+ ; CHECK: i32.const [[L1:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.load [[L1]]=, 0([[L1]])
+ ; CHECK-NEXT: i32.const [[L2:.+]]=, 32
+ ; CHECK-NEXT: i32.sub [[SP:.+]]=, [[L1]], [[L2]]
+ %r = alloca [5 x i32]
+ ; CHECK: i32.const $push[[L3:.+]]=, 1
+ ; CHECK: i32.store {{.*}}=, 12([[SP]]), $pop[[L3]]
+ %p = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 0
+ store i32 1, i32* %p
+ ; This store should have both the GEP and the FI folded into it.
+ ; CHECK-NEXT: i32.store {{.*}}=, 16([[SP]]), $pop
+ %p2 = getelementptr inbounds [5 x i32], [5 x i32]* %r, i32 0, i32 1
+ store i32 1, i32* %p2
+ ; CHECK: i32.const [[L7:.+]]=, 32
+ ; CHECK-NEXT: i32.add [[SP]]=, [[SP]], [[L7]]
+ ; CHECK-NEXT: i32.const [[L8:.+]]=, __stack_pointer
+ ; CHECK-NEXT: i32.store [[SP]]=, 0([[L7]]), [[SP]]
+ ret void
+}
+
define void @dynamic_alloca(i32 %alloc) {
; TODO: Support frame pointers
;%r = alloca i32, i32 %alloc
diff --git a/test/CodeGen/WebAssembly/varargs.ll b/test/CodeGen/WebAssembly/varargs.ll
index c564d9420742..c12264625c37 100644
--- a/test/CodeGen/WebAssembly/varargs.ll
+++ b/test/CodeGen/WebAssembly/varargs.ll
@@ -2,7 +2,7 @@
; Test varargs constructs.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
; Test va_start.
@@ -103,7 +103,7 @@ entry:
declare void @callee(...)
; CHECK-LABEL: caller_none:
-; CHECK-NEXT: call callee{{$}}
+; CHECK-NEXT: call callee@FUNCTION{{$}}
; CHECK-NEXT: return{{$}}
define void @caller_none() {
call void (...) @callee()
diff --git a/test/CodeGen/WebAssembly/vtable.ll b/test/CodeGen/WebAssembly/vtable.ll
index 38298bc474b5..739ba2aaf5a5 100644
--- a/test/CodeGen/WebAssembly/vtable.ll
+++ b/test/CodeGen/WebAssembly/vtable.ll
@@ -11,7 +11,7 @@
; struct D : public B;
; Each with a virtual dtor and method foo.
-target datalayout = "e-p:32:32-i64:64-n32:64-S128"
+target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"
%struct.A = type { i32 (...)** }
diff --git a/test/CodeGen/WinEH/wineh-cloning.ll b/test/CodeGen/WinEH/wineh-cloning.ll
index 3c1793a3bd7f..748c07df1730 100644
--- a/test/CodeGen/WinEH/wineh-cloning.ll
+++ b/test/CodeGen/WinEH/wineh-cloning.ll
@@ -233,48 +233,6 @@ exit:
; CHECK-NEXT: br label %outer.ret
-define void @test9() personality i32 (...)* @__C_specific_handler {
-entry:
- invoke void @f()
- to label %invoke.cont unwind label %left
-invoke.cont:
- invoke void @f()
- to label %unreachable unwind label %right
-left:
- %cp.left = cleanuppad within none []
- call void @llvm.foo(i32 1)
- invoke void @f() [ "funclet"(token %cp.left) ]
- to label %unreachable unwind label %right
-right:
- %cp.right = cleanuppad within none []
- call void @llvm.foo(i32 2)
- invoke void @f() [ "funclet"(token %cp.right) ]
- to label %unreachable unwind label %left
-unreachable:
- unreachable
-}
-; This is an irreducible loop with two funclets that enter each other.
-; CHECK-LABEL: define void @test9(
-; CHECK: entry:
-; CHECK: to label %invoke.cont unwind label %[[LEFT:.+]]
-; CHECK: invoke.cont:
-; CHECK: to label %[[UNREACHABLE_ENTRY:.+]] unwind label %[[RIGHT:.+]]
-; CHECK: [[LEFT]]:
-; CHECK: call void @llvm.foo(i32 1)
-; CHECK: invoke void @f()
-; CHECK: to label %[[UNREACHABLE_LEFT:.+]] unwind label %[[RIGHT]]
-; CHECK: [[RIGHT]]:
-; CHECK: call void @llvm.foo(i32 2)
-; CHECK: invoke void @f()
-; CHECK: to label %[[UNREACHABLE_RIGHT:.+]] unwind label %[[LEFT]]
-; CHECK: [[UNREACHABLE_RIGHT]]:
-; CHECK: unreachable
-; CHECK: [[UNREACHABLE_LEFT]]:
-; CHECK: unreachable
-; CHECK: [[UNREACHABLE_ENTRY]]:
-; CHECK: unreachable
-
-
define void @test10() personality i32 (...)* @__CxxFrameHandler3 {
entry:
invoke void @f()
diff --git a/test/CodeGen/WinEH/wineh-no-demotion.ll b/test/CodeGen/WinEH/wineh-no-demotion.ll
index 4fb84db89093..0901e27c301d 100644
--- a/test/CodeGen/WinEH/wineh-no-demotion.ll
+++ b/test/CodeGen/WinEH/wineh-no-demotion.ll
@@ -33,7 +33,7 @@ right:
shared:
%x = call i32 @g()
- invoke void @f() [ "funclet"(token %0) ]
+ invoke void @f()
to label %shared.cont unwind label %inner
shared.cont:
@@ -72,7 +72,7 @@ right:
shared:
%x = call i32 @g()
- invoke void @f() [ "funclet"(token %0) ]
+ invoke void @f()
to label %shared.cont unwind label %inner
shared.cont:
diff --git a/test/CodeGen/WinEH/wineh-statenumbering.ll b/test/CodeGen/WinEH/wineh-statenumbering.ll
index dab7fde61a66..4e7c36943a01 100644
--- a/test/CodeGen/WinEH/wineh-statenumbering.ll
+++ b/test/CodeGen/WinEH/wineh-statenumbering.ll
@@ -44,7 +44,7 @@ catch: ; preds = %catch.dispatch
; CHECK: catch:
; CHECK: store i32 2
; CHECK: invoke void @_CxxThrowException(
- invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) #1
+ invoke void @_CxxThrowException(i8* null, %eh.ThrowInfo* null) [ "funclet"(token %1) ]
to label %unreachable unwind label %catch.dispatch.1
catch.dispatch.1: ; preds = %catch
diff --git a/test/CodeGen/X86/2008-11-03-F80VAARG.ll b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
index 507799b7304f..97c046c86426 100644
--- a/test/CodeGen/X86/2008-11-03-F80VAARG.ll
+++ b/test/CodeGen/X86/2008-11-03-F80VAARG.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -march=x86 -o - | not grep 10
+; RUN: llc < %s -march=x86 -o - | FileCheck %s
declare void @llvm.va_start(i8*) nounwind
@@ -6,6 +6,8 @@ declare void @llvm.va_copy(i8*, i8*) nounwind
declare void @llvm.va_end(i8*) nounwind
+; CHECK-LABEL: test:
+; CHECK-NOT: 10
define x86_fp80 @test(...) nounwind {
%ap = alloca i8* ; <i8**> [#uses=3]
%v1 = bitcast i8** %ap to i8* ; <i8*> [#uses=1]
diff --git a/test/CodeGen/X86/2012-01-12-extract-sv.ll b/test/CodeGen/X86/2012-01-12-extract-sv.ll
index 92ec107a0079..6950641a08ae 100644
--- a/test/CodeGen/X86/2012-01-12-extract-sv.ll
+++ b/test/CodeGen/X86/2012-01-12-extract-sv.ll
@@ -3,9 +3,7 @@
define void @endless_loop() {
; CHECK-LABEL: endless_loop:
; CHECK-NEXT: # BB#0:
-; CHECK-NEXT: vmovaps (%eax), %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
+; CHECK-NEXT: vbroadcastss (%eax), %ymm0
; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = xmm0[0,0]
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
diff --git a/test/CodeGen/X86/avx-vbroadcast.ll b/test/CodeGen/X86/avx-vbroadcast.ll
index 86b0628aa0bc..0c92f4884fb7 100644
--- a/test/CodeGen/X86/avx-vbroadcast.ll
+++ b/test/CodeGen/X86/avx-vbroadcast.ll
@@ -1,11 +1,24 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7-avx -mattr=+avx | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx | FileCheck %s --check-prefix=X64
define <4 x i64> @A(i64* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: A:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: A:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%eax), %ecx
+; X32-NEXT: movl 4(%eax), %eax
+; X32-NEXT: vmovd %ecx, %xmm0
+; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: A:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd (%rdi), %ymm0
+; X64-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 8
%vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -16,10 +29,16 @@ entry:
}
define <8 x i32> @B(i32* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: B:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: B:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: B:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %ymm0
+; X64-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
@@ -30,10 +49,16 @@ entry:
}
define <4 x double> @C(double* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: C:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: C:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: C:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd (%rdi), %ymm0
+; X64-NEXT: retq
entry:
%q = load double, double* %ptr, align 8
%vecinit.i = insertelement <4 x double> undef, double %q, i32 0
@@ -44,10 +69,16 @@ entry:
}
define <8 x float> @D(float* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: D:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: D:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: D:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %ymm0
+; X64-NEXT: retq
entry:
%q = load float, float* %ptr, align 4
%vecinit.i = insertelement <8 x float> undef, float %q, i32 0
@@ -60,10 +91,16 @@ entry:
;;;; 128-bit versions
define <4 x float> @e(float* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: e:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: e:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: e:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %xmm0
+; X64-NEXT: retq
entry:
%q = load float, float* %ptr, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -75,10 +112,15 @@ entry:
; Don't broadcast constants on pre-AVX2 hardware.
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: _e2:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
-; CHECK-NEXT: retq
+; X32-LABEL: _e2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
+; X32-NEXT: retl
+;
+; X64-LABEL: _e2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [-7.812500e-03,-7.812500e-03,-7.812500e-03,-7.812500e-03]
+; X64-NEXT: retq
entry:
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
@@ -89,10 +131,16 @@ entry:
define <4 x i32> @F(i32* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: F:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: F:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: F:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %xmm0
+; X64-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
@@ -105,10 +153,16 @@ entry:
; FIXME: Pointer adjusted broadcasts
define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4i32_4i32_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4i32_4i32_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4i32_4i32_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[1,1,1,1]
+; X64-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -116,11 +170,18 @@ entry:
}
define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_8i32_4i32_33333333:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_8i32_4i32_33333333:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
+; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_8i32_4i32_33333333:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,3,3]
+; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -128,13 +189,16 @@ entry:
}
define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_8i32_8i32_55555555:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovaps (%rdi), %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_8i32_8i32_55555555:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 20(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_8i32_8i32_55555555:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 20(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <8 x i32>, <8 x i32>* %ptr
%ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -142,10 +206,16 @@ entry:
}
define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4f32_4f32_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4f32_4f32_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 4(%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4f32_4f32_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 4(%rdi), %xmm0
+; X64-NEXT: retq
entry:
%ld = load <4 x float>, <4 x float>* %ptr
%ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -153,10 +223,16 @@ entry:
}
define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_8f32_4f32_33333333:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_8f32_4f32_33333333:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 12(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_8f32_4f32_33333333:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 12(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <4 x float>, <4 x float>* %ptr
%ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -164,10 +240,16 @@ entry:
}
define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_8f32_8f32_55555555:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_8f32_8f32_55555555:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 20(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_8f32_8f32_55555555:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 20(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <8 x float>, <8 x float>* %ptr
%ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -175,10 +257,16 @@ entry:
}
define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_2i64_2i64_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_2i64_2i64_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_2i64_2i64_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = mem[2,3,2,3]
+; X64-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr
%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
@@ -186,12 +274,20 @@ entry:
}
define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4i64_2i64_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovaps (%rdi), %xmm0
-; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4i64_2i64_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovaps (%eax), %xmm0
+; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4i64_2i64_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovaps (%rdi), %xmm0
+; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
+; X64-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr
%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -199,13 +295,16 @@ entry:
}
define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4i64_4i64_2222:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovapd (%rdi), %ymm0
-; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
-; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4i64_4i64_2222:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd 16(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4i64_4i64_2222:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <4 x i64>, <4 x i64>* %ptr
%ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
@@ -213,11 +312,18 @@ entry:
}
define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_2f64_2f64_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovaps (%rdi), %xmm0
-; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_2f64_2f64_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovaps (%eax), %xmm0
+; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_2f64_2f64_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovaps (%rdi), %xmm0
+; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: retq
entry:
%ld = load <2 x double>, <2 x double>* %ptr
%ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
@@ -225,10 +331,16 @@ entry:
}
define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4f64_2f64_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4f64_2f64_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd 8(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4f64_2f64_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <2 x double>, <2 x double>* %ptr
%ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -236,10 +348,16 @@ entry:
}
define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4f64_4f64_2222:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4f64_4f64_2222:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd 16(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4f64_4f64_2222:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <4 x double>, <4 x double>* %ptr
%ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
@@ -249,11 +367,22 @@ entry:
; Unsupported vbroadcasts
define <2 x i64> @G(i64* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: G:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT: retq
+; X32-LABEL: G:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%eax), %ecx
+; X32-NEXT: movl 4(%eax), %eax
+; X32-NEXT: vmovd %ecx, %xmm0
+; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: G:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
+; X64-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 8
%vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
@@ -262,20 +391,31 @@ entry:
}
define <4 x i32> @H(<4 x i32> %a) {
-; CHECK-LABEL: H:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
-; CHECK-NEXT: retq
+; X32-LABEL: H:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X32-NEXT: retl
+;
+; X64-LABEL: H:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3]
+; X64-NEXT: retq
entry:
%x = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
ret <4 x i32> %x
}
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: I:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; CHECK-NEXT: retq
+; X32-LABEL: I:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: I:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X64-NEXT: retq
entry:
%q = load double, double* %ptr, align 4
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
@@ -284,12 +424,21 @@ entry:
}
define <4 x float> @_RR(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
-; CHECK-LABEL: _RR:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
-; CHECK-NEXT: movl (%rsi), %eax
-; CHECK-NEXT: movl %eax, (%rax)
-; CHECK-NEXT: retq
+; X32-LABEL: _RR:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: vbroadcastss (%ecx), %xmm0
+; X32-NEXT: movl (%eax), %eax
+; X32-NEXT: movl %eax, (%eax)
+; X32-NEXT: retl
+;
+; X64-LABEL: _RR:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %xmm0
+; X64-NEXT: movl (%rsi), %eax
+; X64-NEXT: movl %eax, (%rax)
+; X64-NEXT: retq
entry:
%q = load float, float* %ptr, align 4
%vecinit.i = insertelement <4 x float> undef, float %q, i32 0
@@ -303,10 +452,16 @@ entry:
}
define <4 x float> @_RR2(float* %ptr, i32* %k) nounwind uwtable readnone ssp {
-; CHECK-LABEL: _RR2:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: _RR2:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _RR2:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %xmm0
+; X64-NEXT: retq
entry:
%q = load float, float* %ptr, align 4
%v = insertelement <4 x float> undef, float %q, i32 0
@@ -319,10 +474,16 @@ entry:
; (via the insertelements).
define <8 x float> @splat_concat1(float* %p) {
-; CHECK-LABEL: splat_concat1:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: splat_concat1:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: splat_concat1:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss (%rdi), %ymm0
+; X64-NEXT: retq
%1 = load float, float* %p, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = insertelement <4 x float> %2, float %1, i32 1
@@ -333,10 +494,16 @@ define <8 x float> @splat_concat1(float* %p) {
}
define <8 x float> @splat_concat2(float* %p) {
-; CHECK-LABEL: splat_concat2:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: splat_concat2:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: splat_concat2:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss (%rdi), %ymm0
+; X64-NEXT: retq
%1 = load float, float* %p, align 4
%2 = insertelement <4 x float> undef, float %1, i32 0
%3 = insertelement <4 x float> %2, float %1, i32 1
@@ -351,10 +518,16 @@ define <8 x float> @splat_concat2(float* %p) {
}
define <4 x double> @splat_concat3(double* %p) {
-; CHECK-LABEL: splat_concat3:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: splat_concat3:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: splat_concat3:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastsd (%rdi), %ymm0
+; X64-NEXT: retq
%1 = load double, double* %p, align 8
%2 = insertelement <2 x double> undef, double %1, i32 0
%3 = insertelement <2 x double> %2, double %1, i32 1
@@ -363,10 +536,16 @@ define <4 x double> @splat_concat3(double* %p) {
}
define <4 x double> @splat_concat4(double* %p) {
-; CHECK-LABEL: splat_concat4:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: splat_concat4:
+; X32: ## BB#0:
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: splat_concat4:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastsd (%rdi), %ymm0
+; X64-NEXT: retq
%1 = load double, double* %p, align 8
%2 = insertelement <2 x double> undef, double %1, i32 0
%3 = insertelement <2 x double> %2, double %1, i32 1
diff --git a/test/CodeGen/X86/avx2-vbroadcast.ll b/test/CodeGen/X86/avx2-vbroadcast.ll
index 6b77edb155a4..8fd50ae3015d 100644
--- a/test/CodeGen/X86/avx2-vbroadcast.ll
+++ b/test/CodeGen/X86/avx2-vbroadcast.ll
@@ -1,11 +1,18 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: BB16:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastb (%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: BB16:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastb (%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: BB16:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastb (%rdi), %xmm0
+; X64-NEXT: retq
entry:
%q = load i8, i8* %ptr, align 4
%q0 = insertelement <16 x i8> undef, i8 %q, i32 0
@@ -28,10 +35,16 @@ entry:
}
define <32 x i8> @BB32(i8* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: BB32:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastb (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: BB32:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastb (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: BB32:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastb (%rdi), %ymm0
+; X64-NEXT: retq
entry:
%q = load i8, i8* %ptr, align 4
%q0 = insertelement <32 x i8> undef, i8 %q, i32 0
@@ -71,10 +84,16 @@ entry:
}
define <8 x i16> @W16(i16* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: W16:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastw (%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: W16:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastw (%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: W16:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastw (%rdi), %xmm0
+; X64-NEXT: retq
entry:
%q = load i16, i16* %ptr, align 4
%q0 = insertelement <8 x i16> undef, i16 %q, i32 0
@@ -89,10 +108,16 @@ entry:
}
define <16 x i16> @WW16(i16* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: WW16:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastw (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: WW16:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastw (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: WW16:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastw (%rdi), %ymm0
+; X64-NEXT: retq
entry:
%q = load i16, i16* %ptr, align 4
%q0 = insertelement <16 x i16> undef, i16 %q, i32 0
@@ -115,10 +140,16 @@ entry:
}
define <4 x i32> @D32(i32* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: D32:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: D32:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: D32:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %xmm0
+; X64-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%q0 = insertelement <4 x i32> undef, i32 %q, i32 0
@@ -129,10 +160,16 @@ entry:
}
define <8 x i32> @DD32(i32* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: DD32:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: DD32:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss (%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: DD32:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss (%rdi), %ymm0
+; X64-NEXT: retq
entry:
%q = load i32, i32* %ptr, align 4
%q0 = insertelement <8 x i32> undef, i32 %q, i32 0
@@ -147,10 +184,21 @@ entry:
}
define <2 x i64> @Q64(i64* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: Q64:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq (%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: Q64:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%eax), %ecx
+; X32-NEXT: movl 4(%eax), %eax
+; X32-NEXT: vmovd %ecx, %xmm0
+; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: Q64:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastq (%rdi), %xmm0
+; X64-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 4
%q0 = insertelement <2 x i64> undef, i64 %q, i32 0
@@ -159,10 +207,22 @@ entry:
}
define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: QQ64:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: QQ64:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl (%eax), %ecx
+; X32-NEXT: movl 4(%eax), %eax
+; X32-NEXT: vmovd %ecx, %xmm0
+; X32-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0
+; X32-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: QQ64:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd (%rdi), %ymm0
+; X64-NEXT: retq
entry:
%q = load i64, i64* %ptr, align 4
%q0 = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -175,10 +235,16 @@ entry:
; FIXME: Pointer adjusted broadcasts
define <16 x i8> @load_splat_16i8_16i8_1111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_16i8_16i8_1111111111111111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastb 1(%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_16i8_16i8_1111111111111111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastb 1(%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_16i8_16i8_1111111111111111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastb 1(%rdi), %xmm0
+; X64-NEXT: retq
entry:
%ld = load <16 x i8>, <16 x i8>* %ptr
%ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -186,10 +252,16 @@ entry:
}
define <32 x i8> @load_splat_32i8_16i8_11111111111111111111111111111111(<16 x i8>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastb 1(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_32i8_16i8_11111111111111111111111111111111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <16 x i8>, <16 x i8>* %ptr
%ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -197,10 +269,16 @@ entry:
}
define <32 x i8> @load_splat_32i8_32i8_11111111111111111111111111111111(<32 x i8>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastb 1(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastb 1(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_32i8_32i8_11111111111111111111111111111111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastb 1(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <32 x i8>, <32 x i8>* %ptr
%ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -208,10 +286,16 @@ entry:
}
define <8 x i16> @load_splat_8i16_8i16_11111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_8i16_8i16_11111111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastw 2(%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_8i16_8i16_11111111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastw 2(%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_8i16_8i16_11111111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastw 2(%rdi), %xmm0
+; X64-NEXT: retq
entry:
%ld = load <8 x i16>, <8 x i16>* %ptr
%ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -219,10 +303,16 @@ entry:
}
define <16 x i16> @load_splat_16i16_8i16_1111111111111111(<8 x i16>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_16i16_8i16_1111111111111111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_16i16_8i16_1111111111111111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastw 2(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_16i16_8i16_1111111111111111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <8 x i16>, <8 x i16>* %ptr
%ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -230,10 +320,16 @@ entry:
}
define <16 x i16> @load_splat_16i16_16i16_1111111111111111(<16 x i16>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_16i16_16i16_1111111111111111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastw 2(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_16i16_16i16_1111111111111111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vpbroadcastw 2(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_16i16_16i16_1111111111111111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastw 2(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <16 x i16>, <16 x i16>* %ptr
%ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
@@ -241,10 +337,16 @@ entry:
}
define <4 x i32> @load_splat_4i32_4i32_1111(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4i32_4i32_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4i32_4i32_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 4(%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4i32_4i32_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 4(%rdi), %xmm0
+; X64-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -252,10 +354,16 @@ entry:
}
define <8 x i32> @load_splat_8i32_4i32_33333333(<4 x i32>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_8i32_4i32_33333333:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_8i32_4i32_33333333:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 12(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_8i32_4i32_33333333:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 12(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <4 x i32>, <4 x i32>* %ptr
%ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -263,10 +371,16 @@ entry:
}
define <8 x i32> @load_splat_8i32_8i32_55555555(<8 x i32>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_8i32_8i32_55555555:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_8i32_8i32_55555555:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 20(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_8i32_8i32_55555555:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 20(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <8 x i32>, <8 x i32>* %ptr
%ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -274,10 +388,16 @@ entry:
}
define <4 x float> @load_splat_4f32_4f32_1111(<4 x float>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4f32_4f32_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss 4(%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4f32_4f32_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 4(%eax), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4f32_4f32_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 4(%rdi), %xmm0
+; X64-NEXT: retq
entry:
%ld = load <4 x float>, <4 x float>* %ptr
%ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -285,10 +405,16 @@ entry:
}
define <8 x float> @load_splat_8f32_4f32_33333333(<4 x float>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_8f32_4f32_33333333:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss 12(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_8f32_4f32_33333333:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 12(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_8f32_4f32_33333333:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 12(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <4 x float>, <4 x float>* %ptr
%ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
@@ -296,10 +422,16 @@ entry:
}
define <8 x float> @load_splat_8f32_8f32_55555555(<8 x float>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_8f32_8f32_55555555:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss 20(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_8f32_8f32_55555555:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastss 20(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_8f32_8f32_55555555:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss 20(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <8 x float>, <8 x float>* %ptr
%ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
@@ -307,10 +439,17 @@ entry:
}
define <2 x i64> @load_splat_2i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_2i64_2i64_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastq 8(%rdi), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_2i64_2i64_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_2i64_2i64_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastq 8(%rdi), %xmm0
+; X64-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr
%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
@@ -318,10 +457,16 @@ entry:
}
define <4 x i64> @load_splat_4i64_2i64_1111(<2 x i64>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4i64_2i64_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4i64_2i64_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd 8(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4i64_2i64_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <2 x i64>, <2 x i64>* %ptr
%ret = shufflevector <2 x i64> %ld, <2 x i64> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -329,10 +474,16 @@ entry:
}
define <4 x i64> @load_splat_4i64_4i64_2222(<4 x i64>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4i64_4i64_2222:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4i64_4i64_2222:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd 16(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4i64_4i64_2222:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <4 x i64>, <4 x i64>* %ptr
%ret = shufflevector <4 x i64> %ld, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
@@ -340,11 +491,18 @@ entry:
}
define <2 x double> @load_splat_2f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_2f64_2f64_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovaps (%rdi), %xmm0
-; CHECK-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_2f64_2f64_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovaps (%eax), %xmm0
+; X32-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_2f64_2f64_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovaps (%rdi), %xmm0
+; X64-NEXT: vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
+; X64-NEXT: retq
entry:
%ld = load <2 x double>, <2 x double>* %ptr
%ret = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 1>
@@ -352,10 +510,16 @@ entry:
}
define <4 x double> @load_splat_4f64_2f64_1111(<2 x double>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4f64_2f64_1111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd 8(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4f64_2f64_1111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd 8(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4f64_2f64_1111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd 8(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <2 x double>, <2 x double>* %ptr
%ret = shufflevector <2 x double> %ld, <2 x double> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1>
@@ -363,10 +527,16 @@ entry:
}
define <4 x double> @load_splat_4f64_4f64_2222(<4 x double>* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: load_splat_4f64_4f64_2222:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastsd 16(%rdi), %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: load_splat_4f64_4f64_2222:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vbroadcastsd 16(%eax), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: load_splat_4f64_4f64_2222:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0
+; X64-NEXT: retq
entry:
%ld = load <4 x double>, <4 x double>* %ptr
%ret = shufflevector <4 x double> %ld, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
@@ -376,10 +546,16 @@ entry:
; make sure that we still don't support broadcast double into 128-bit vector
; this used to crash
define <2 x double> @I(double* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: I:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
-; CHECK-NEXT: retq
+; X32-LABEL: I:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: I:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
+; X64-NEXT: retq
entry:
%q = load double, double* %ptr, align 4
%vecinit.i = insertelement <2 x double> undef, double %q, i32 0
@@ -388,32 +564,49 @@ entry:
}
define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
-; CHECK-LABEL: V111:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
-; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: V111:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: vpbroadcastd LCPI27_0, %ymm1
+; X32-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: V111:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vpbroadcastd {{.*}}(%rip), %ymm1
+; X64-NEXT: vpaddd %ymm1, %ymm0, %ymm0
+; X64-NEXT: retq
entry:
%g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
ret <8 x i32> %g
}
define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
-; CHECK-LABEL: V113:
-; CHECK: ## BB#0: ## %entry
-; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
-; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: V113:
+; X32: ## BB#0: ## %entry
+; X32-NEXT: vbroadcastss LCPI28_0, %ymm1
+; X32-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: V113:
+; X64: ## BB#0: ## %entry
+; X64-NEXT: vbroadcastss {{.*}}(%rip), %ymm1
+; X64-NEXT: vaddps %ymm1, %ymm0, %ymm0
+; X64-NEXT: retq
entry:
%g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
ret <8 x float> %g
}
define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: _e2:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: _e2:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss LCPI29_0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _e2:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss {{.*}}(%rip), %xmm0
+; X64-NEXT: retq
%vecinit.i = insertelement <4 x float> undef, float 0xbf80000000000000, i32 0
%vecinit2.i = insertelement <4 x float> %vecinit.i, float 0xbf80000000000000, i32 1
%vecinit4.i = insertelement <4 x float> %vecinit2.i, float 0xbf80000000000000, i32 2
@@ -422,10 +615,15 @@ define <4 x float> @_e2(float* %ptr) nounwind uwtable readnone ssp {
}
define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
-; CHECK-LABEL: _e4:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
-; CHECK-NEXT: retq
+; X32-LABEL: _e4:
+; X32: ## BB#0:
+; X32-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
+; X32-NEXT: retl
+;
+; X64-LABEL: _e4:
+; X64: ## BB#0:
+; X64-NEXT: vmovaps {{.*#+}} xmm0 = [52,52,52,52,52,52,52,52]
+; X64-NEXT: retq
%vecinit0.i = insertelement <8 x i8> undef, i8 52, i32 0
%vecinit1.i = insertelement <8 x i8> %vecinit0.i, i8 52, i32 1
%vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 52, i32 2
@@ -437,19 +635,30 @@ define <8 x i8> @_e4(i8* %ptr) nounwind uwtable readnone ssp {
ret <8 x i8> %vecinit7.i
}
-
define void @crash() nounwind alwaysinline {
-; CHECK-LABEL: crash:
-; CHECK: ## BB#0: ## %WGLoopsEntry
-; CHECK-NEXT: xorl %eax, %eax
-; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je LBB31_1
-; CHECK-NEXT: ## BB#2: ## %ret
-; CHECK-NEXT: retq
-; CHECK-NEXT: .align 4, 0x90
-; CHECK-NEXT: LBB31_1: ## %footer349VF
-; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: jmp LBB31_1
+; X32-LABEL: crash:
+; X32: ## BB#0: ## %WGLoopsEntry
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: testb %al, %al
+; X32-NEXT: je LBB31_1
+; X32-NEXT: ## BB#2: ## %ret
+; X32-NEXT: retl
+; X32-NEXT: .align 4, 0x90
+; X32-NEXT: LBB31_1: ## %footer349VF
+; X32-NEXT: ## =>This Inner Loop Header: Depth=1
+; X32-NEXT: jmp LBB31_1
+;
+; X64-LABEL: crash:
+; X64: ## BB#0: ## %WGLoopsEntry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: testb %al, %al
+; X64-NEXT: je LBB31_1
+; X64-NEXT: ## BB#2: ## %ret
+; X64-NEXT: retq
+; X64-NEXT: .align 4, 0x90
+; X64-NEXT: LBB31_1: ## %footer349VF
+; X64-NEXT: ## =>This Inner Loop Header: Depth=1
+; X64-NEXT: jmp LBB31_1
WGLoopsEntry:
br i1 undef, label %ret, label %footer329VF
@@ -477,150 +686,230 @@ ret:
}
define <8 x i32> @_inreg0(i32 %scalar) nounwind uwtable readnone ssp {
-; CHECK-LABEL: _inreg0:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vmovd %edi, %xmm0
-; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg0:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg0:
+; X64: ## BB#0:
+; X64-NEXT: vmovd %edi, %xmm0
+; X64-NEXT: vbroadcastss %xmm0, %ymm0
+; X64-NEXT: retq
%in = insertelement <8 x i32> undef, i32 %scalar, i32 0
%wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
ret <8 x i32> %wide
}
define <8 x float> @_inreg1(float %scalar) nounwind uwtable readnone ssp {
-; CHECK-LABEL: _inreg1:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg1:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg1:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss %xmm0, %ymm0
+; X64-NEXT: retq
%in = insertelement <8 x float> undef, float %scalar, i32 0
%wide = shufflevector <8 x float> %in, <8 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %wide
}
define <4 x float> @_inreg2(float %scalar) nounwind uwtable readnone ssp {
-; CHECK-LABEL: _inreg2:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg2:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg2:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss %xmm0, %xmm0
+; X64-NEXT: retq
%in = insertelement <4 x float> undef, float %scalar, i32 0
%wide = shufflevector <4 x float> %in, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %wide
}
define <4 x double> @_inreg3(double %scalar) nounwind uwtable readnone ssp {
-; CHECK-LABEL: _inreg3:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg3:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg3:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastsd %xmm0, %ymm0
+; X64-NEXT: retq
%in = insertelement <4 x double> undef, double %scalar, i32 0
%wide = shufflevector <4 x double> %in, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %wide
}
define <8 x float> @_inreg8xfloat(<8 x float> %a) {
-; CHECK-LABEL: _inreg8xfloat:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg8xfloat:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss %xmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg8xfloat:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss %xmm0, %ymm0
+; X64-NEXT: retq
%b = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> zeroinitializer
ret <8 x float> %b
}
define <4 x float> @_inreg4xfloat(<4 x float> %a) {
-; CHECK-LABEL: _inreg4xfloat:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg4xfloat:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg4xfloat:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss %xmm0, %xmm0
+; X64-NEXT: retq
%b = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> zeroinitializer
ret <4 x float> %b
}
define <16 x i16> @_inreg16xi16(<16 x i16> %a) {
-; CHECK-LABEL: _inreg16xi16:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg16xi16:
+; X32: ## BB#0:
+; X32-NEXT: vpbroadcastw %xmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg16xi16:
+; X64: ## BB#0:
+; X64-NEXT: vpbroadcastw %xmm0, %ymm0
+; X64-NEXT: retq
%b = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> zeroinitializer
ret <16 x i16> %b
}
define <8 x i16> @_inreg8xi16(<8 x i16> %a) {
-; CHECK-LABEL: _inreg8xi16:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg8xi16:
+; X32: ## BB#0:
+; X32-NEXT: vpbroadcastw %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg8xi16:
+; X64: ## BB#0:
+; X64-NEXT: vpbroadcastw %xmm0, %xmm0
+; X64-NEXT: retq
%b = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> zeroinitializer
ret <8 x i16> %b
}
define <4 x i64> @_inreg4xi64(<4 x i64> %a) {
-; CHECK-LABEL: _inreg4xi64:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg4xi64:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastsd %xmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg4xi64:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastsd %xmm0, %ymm0
+; X64-NEXT: retq
%b = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> zeroinitializer
ret <4 x i64> %b
}
define <2 x i64> @_inreg2xi64(<2 x i64> %a) {
-; CHECK-LABEL: _inreg2xi64:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg2xi64:
+; X32: ## BB#0:
+; X32-NEXT: vpbroadcastq %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg2xi64:
+; X64: ## BB#0:
+; X64-NEXT: vpbroadcastq %xmm0, %xmm0
+; X64-NEXT: retq
%b = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> zeroinitializer
ret <2 x i64> %b
}
define <4 x double> @_inreg4xdouble(<4 x double> %a) {
-; CHECK-LABEL: _inreg4xdouble:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg4xdouble:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastsd %xmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg4xdouble:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastsd %xmm0, %ymm0
+; X64-NEXT: retq
%b = shufflevector <4 x double> %a, <4 x double> undef, <4 x i32> zeroinitializer
ret <4 x double> %b
}
define <2 x double> @_inreg2xdouble(<2 x double> %a) {
-; CHECK-LABEL: _inreg2xdouble:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg2xdouble:
+; X32: ## BB#0:
+; X32-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg2xdouble:
+; X64: ## BB#0:
+; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X64-NEXT: retq
%b = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> zeroinitializer
ret <2 x double> %b
}
define <8 x i32> @_inreg8xi32(<8 x i32> %a) {
-; CHECK-LABEL: _inreg8xi32:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg8xi32:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss %xmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg8xi32:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss %xmm0, %ymm0
+; X64-NEXT: retq
%b = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> zeroinitializer
ret <8 x i32> %b
}
define <4 x i32> @_inreg4xi32(<4 x i32> %a) {
-; CHECK-LABEL: _inreg4xi32:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg4xi32:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg4xi32:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss %xmm0, %xmm0
+; X64-NEXT: retq
%b = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> zeroinitializer
ret <4 x i32> %b
}
define <32 x i8> @_inreg32xi8(<32 x i8> %a) {
-; CHECK-LABEL: _inreg32xi8:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg32xi8:
+; X32: ## BB#0:
+; X32-NEXT: vpbroadcastb %xmm0, %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg32xi8:
+; X64: ## BB#0:
+; X64-NEXT: vpbroadcastb %xmm0, %ymm0
+; X64-NEXT: retq
%b = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> zeroinitializer
ret <32 x i8> %b
}
define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
-; CHECK-LABEL: _inreg16xi8:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0
-; CHECK-NEXT: retq
+; X32-LABEL: _inreg16xi8:
+; X32: ## BB#0:
+; X32-NEXT: vpbroadcastb %xmm0, %xmm0
+; X32-NEXT: retl
+;
+; X64-LABEL: _inreg16xi8:
+; X64: ## BB#0:
+; X64-NEXT: vpbroadcastb %xmm0, %xmm0
+; X64-NEXT: retq
%b = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> zeroinitializer
ret <16 x i8> %b
}
@@ -630,10 +919,15 @@ define <16 x i8> @_inreg16xi8(<16 x i8> %a) {
; (via the insertelements).
define <8 x float> @splat_concat1(float %f) {
-; CHECK-LABEL: splat_concat1:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: splat_concat1:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: splat_concat1:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss %xmm0, %ymm0
+; X64-NEXT: retq
%1 = insertelement <4 x float> undef, float %f, i32 0
%2 = insertelement <4 x float> %1, float %f, i32 1
%3 = insertelement <4 x float> %2, float %f, i32 2
@@ -643,10 +937,15 @@ define <8 x float> @splat_concat1(float %f) {
}
define <8 x float> @splat_concat2(float %f) {
-; CHECK-LABEL: splat_concat2:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: splat_concat2:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: splat_concat2:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastss %xmm0, %ymm0
+; X64-NEXT: retq
%1 = insertelement <4 x float> undef, float %f, i32 0
%2 = insertelement <4 x float> %1, float %f, i32 1
%3 = insertelement <4 x float> %2, float %f, i32 2
@@ -660,10 +959,15 @@ define <8 x float> @splat_concat2(float %f) {
}
define <4 x double> @splat_concat3(double %d) {
-; CHECK-LABEL: splat_concat3:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: splat_concat3:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: splat_concat3:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastsd %xmm0, %ymm0
+; X64-NEXT: retq
%1 = insertelement <2 x double> undef, double %d, i32 0
%2 = insertelement <2 x double> %1, double %d, i32 1
%3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
@@ -671,10 +975,15 @@ define <4 x double> @splat_concat3(double %d) {
}
define <4 x double> @splat_concat4(double %d) {
-; CHECK-LABEL: splat_concat4:
-; CHECK: ## BB#0:
-; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
-; CHECK-NEXT: retq
+; X32-LABEL: splat_concat4:
+; X32: ## BB#0:
+; X32-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0
+; X32-NEXT: retl
+;
+; X64-LABEL: splat_concat4:
+; X64: ## BB#0:
+; X64-NEXT: vbroadcastsd %xmm0, %ymm0
+; X64-NEXT: retq
%1 = insertelement <2 x double> undef, double %d, i32 0
%2 = insertelement <2 x double> %1, double %d, i32 1
%3 = insertelement <2 x double> undef, double %d, i32 0
@@ -805,9 +1114,9 @@ eintry:
ret void
}
-; CHECK-LABEL: isel_crash_2q
-; CHECK: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}}
-; CHECK: ret
+; X64-LABEL: isel_crash_2q
+; X64: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}}
+; X64: ret
define void @isel_crash_2q(i64* %cV_R.addr) {
entry:
%__a.addr.i = alloca <2 x i64>, align 16
@@ -823,9 +1132,9 @@ entry:
ret void
}
-; CHECK-LABEL: isel_crash_4q
-; CHECK: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
-; CHECK: ret
+; X64-LABEL: isel_crash_4q
+; X64: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
+; X64: ret
define void @isel_crash_4q(i64* %cV_R.addr) {
eintry:
%__a.addr.i = alloca <4 x i64>, align 16
diff --git a/test/CodeGen/X86/avx512-intrinsics.ll b/test/CodeGen/X86/avx512-intrinsics.ll
index 5a17cdb29216..7179f742cc66 100644
--- a/test/CodeGen/X86/avx512-intrinsics.ll
+++ b/test/CodeGen/X86/avx512-intrinsics.ll
@@ -907,49 +907,79 @@ define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask)
declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
-define <16 x float> @test_maskz_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
-; CHECK-LABEL: test_maskz_load_aligned_ps:
+define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
+; CHECK-LABEL: test_mask_load_aligned_ps:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: vmovaps (%rdi), %zmm0
+; CHECK-NEXT: vmovaps (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: vmovaps (%rdi), %zmm1 {%k1} {z}
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
- ret <16 x float> %res
+ %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
+ %res4 = fadd <16 x float> %res2, %res1
+ ret <16 x float> %res4
}
declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
-define <8 x double> @test_maskz_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
-; CHECK-LABEL: test_maskz_load_aligned_pd:
+define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
+; CHECK-LABEL: test_mask_load_unaligned_ps:
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %esi, %k1
-; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1} {z}
+; CHECK-NEXT: vmovups (%rdi), %zmm0
+; CHECK-NEXT: vmovups (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: vmovups (%rdi), %zmm1 {%k1} {z}
+; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
- ret <8 x double> %res
+ %res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
+ %res1 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
+ %res2 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
+ %res4 = fadd <16 x float> %res2, %res1
+ ret <16 x float> %res4
}
-declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
+declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16)
-define <16 x float> @test_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
-; CHECK-LABEL: test_load_aligned_ps:
+define <8 x double> @test_mask_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_aligned_pd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovaps (%rdi), %zmm0
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovapd (%rdi), %zmm0
+; CHECK-NEXT: vmovapd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: vmovapd (%rdi), %zmm1 {%k1} {z}
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
- ret <16 x float> %res
+ %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
+ %res4 = fadd <8 x double> %res2, %res1
+ ret <8 x double> %res4
}
-define <8 x double> @test_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
-; CHECK-LABEL: test_load_aligned_pd:
+declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
+
+define <8 x double> @test_mask_load_unaligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_unaligned_pd:
; CHECK: ## BB#0:
-; CHECK-NEXT: vmovapd (%rdi), %zmm0
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovupd (%rdi), %zmm0
+; CHECK-NEXT: vmovupd (%rdi), %zmm0 {%k1}
+; CHECK-NEXT: vmovupd (%rdi), %zmm1 {%k1} {z}
+; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
- %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
- ret <8 x double> %res
+ %res = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
+ %res1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
+ %res2 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
+ %res4 = fadd <8 x double> %res2, %res1
+ ret <8 x double> %res4
}
-declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*)
+declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8)
define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
; CHECK-LABEL: test_valign_q:
@@ -5731,7 +5761,7 @@ define <8 x double>@test_int_x86_avx512_mask_getmant_pd_512(<8 x double> %x0, <8
; CHECK-NEXT: movzbl %dil, %eax
; CHECK-NEXT: kmovw %eax, %k1
; CHECK-NEXT: vgetmantpd $11, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vgetmantpd $11,{sae}, %zmm0, %zmm0
+; CHECK-NEXT: vgetmantpd $11, {sae}, %zmm0, %zmm0
; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <8 x double> @llvm.x86.avx512.mask.getmant.pd.512(<8 x double> %x0, i32 11, <8 x double> %x2, i8 %x3, i32 4)
@@ -5747,7 +5777,7 @@ define <16 x float>@test_int_x86_avx512_mask_getmant_ps_512(<16 x float> %x0, <1
; CHECK: ## BB#0:
; CHECK-NEXT: kmovw %edi, %k1
; CHECK-NEXT: vgetmantps $11, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vgetmantps $11,{sae}, %zmm0, %zmm0
+; CHECK-NEXT: vgetmantps $11, {sae}, %zmm0, %zmm0
; CHECK-NEXT: vaddps %zmm0, %zmm1, %zmm0
; CHECK-NEXT: retq
%res = call <16 x float> @llvm.x86.avx512.mask.getmant.ps.512(<16 x float> %x0, i32 11, <16 x float> %x2, i16 %x3, i32 4)
@@ -5767,7 +5797,7 @@ define <2 x double>@test_int_x86_avx512_mask_getmant_sd(<2 x double> %x0, <2 x d
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm3 {%k1}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm4 {%k1} {z}
; CHECK-NEXT: vgetmantsd $11, %xmm1, %xmm0, %xmm5
-; CHECK-NEXT: vgetmantsd $11,{sae}, %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vgetmantsd $11, {sae}, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vaddpd %xmm4, %xmm3, %xmm0
; CHECK-NEXT: vaddpd %xmm5, %xmm2, %xmm1
; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
@@ -5792,7 +5822,7 @@ define <4 x float>@test_int_x86_avx512_mask_getmant_ss(<4 x float> %x0, <4 x flo
; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm2 {%k1}
; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm3 {%k1} {z}
; CHECK-NEXT: vgetmantss $11, %xmm1, %xmm0, %xmm4
-; CHECK-NEXT: vgetmantss $11,{sae}, %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vgetmantss $11, {sae}, %xmm1, %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm1
; CHECK-NEXT: vaddps %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
@@ -6542,3 +6572,316 @@ define <8 x i64>@test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i8 %x1, <8
ret <8 x i64> %res4
}
+declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i16, <16 x i32>, i8)
+
+define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i16 %x1, <16 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpshufd $3, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpshufd $3, %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpshufd $3, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i16 3, <16 x i32> %x2, i8 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i16 3, <16 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i16 3, <16 x i32> %x2, i8 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vprorvd %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm2 {%k1}
+; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm3 {%k1} {z}
+; CHECK-NEXT: vprorvq %zmm1, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res3, %res2
+ ret <8 x i64> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i8, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i8 %x1, <16 x i32> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vprold $3, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vprold $3, %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vprold $3, %zmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 %x3)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i8 3, <16 x i32> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i8 3, <16 x i32> %x2, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i8, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i8 %x1, <8 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprolq $3, %zmm0, %zmm1 {%k1}
+; CHECK-NEXT: vprolq $3, %zmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vprolq $3, %zmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 %x3)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i8 3, <8 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i8 3, <8 x i64> %x2, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res3, %res2
+ ret <8 x i64> %res4
+}
+
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+
+declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxbd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovzxbd %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbd %xmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxbq %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovzxbq %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbq %xmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res3, %res2
+ ret <8 x i64> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxdq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovzxdq %ymm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxdq %ymm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res3, %res2
+ ret <8 x i64> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxwd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovzxwd %ymm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwd %ymm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxwq %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovzxwq %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwq %xmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res3, %res2
+ ret <8 x i64> %res4
+}
+
+declare <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxbd %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovsxbd %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbd %xmm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxbq %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovsxbq %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbq %xmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res3, %res2
+ ret <8 x i64> %res4
+}
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxdq %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovsxdq %ymm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxdq %ymm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res3, %res2
+ ret <8 x i64> %res4
+}
+
+
+declare <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16>, <16 x i32>, i16)
+
+define <16 x i32>@test_int_x86_avx512_mask_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxwd %ymm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovsxwd %ymm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwd %ymm0, %zmm0
+; CHECK-NEXT: vpaddd %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
+ %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
+ %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
+ %res3 = add <16 x i32> %res, %res1
+ %res4 = add <16 x i32> %res3, %res2
+ ret <16 x i32> %res4
+}
+
+
+declare <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16>, <8 x i64>, i8)
+
+define <8 x i64>@test_int_x86_avx512_mask_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxwq %xmm0, %zmm1 {%k1}
+; CHECK-NEXT: vpmovsxwq %xmm0, %zmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwq %xmm0, %zmm0
+; CHECK-NEXT: vpaddq %zmm2, %zmm1, %zmm1
+; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
+ %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
+ %res3 = add <8 x i64> %res, %res1
+ %res4 = add <8 x i64> %res3, %res2
+ ret <8 x i64> %res4
+}
+
+
diff --git a/test/CodeGen/X86/avx512bw-intrinsics.ll b/test/CodeGen/X86/avx512bw-intrinsics.ll
index 064652aa470d..7cf6edafbcc8 100644
--- a/test/CodeGen/X86/avx512bw-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bw-intrinsics.ll
@@ -2910,15 +2910,15 @@ define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i8 %x1, <
declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
-; CHECK-LABEL: test_int_x86_avx512_mask_psrlv32hi:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
-; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrlv32hi:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vpsrlvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
@@ -2930,15 +2930,15 @@ define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16>
declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
-; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %edi, %k1
-; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
-; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
-; CHECK-NEXT: vpsraw %xmm1, %zmm0, %zmm0
-; CHECK-NEXT: vpaddw %zmm3, %zmm2, %zmm1
-; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psra_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vpsraw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
@@ -2950,15 +2950,15 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16>
declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i8, <32 x i16>, i32)
define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) {
-; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_512:
-; CHECK: ## BB#0:
-; CHECK-NEXT: kmovd %esi, %k1
-; CHECK-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1}
-; CHECK-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z}
-; CHECK-NEXT: vpsraw $3, %zmm0, %zmm0
-; CHECK-NEXT: vpaddw %zmm2, %zmm1, %zmm1
-; CHECK-NEXT: vpaddw %zmm0, %zmm1, %zmm0
-; CHECK-NEXT: retq
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psra_wi_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT: vpsraw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
%res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
%res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
%res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
@@ -2966,3 +2966,164 @@ define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i8 %x1, <
%res4 = add <32 x i16> %res3, %res2
ret <32 x i16> %res4
}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i8, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpshufhw $3, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vpshufhw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT: vpshufhw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i8, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT: vpshuflw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psrav32_hi:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vpsravw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i8, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i8 %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psll_wi_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %esi, %k1
+; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT: vpsllw $3, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> zeroinitializer, i32 %x3)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i8 3, <32 x i16> %x2, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_psllv32hi:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm2 {%k1}
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm3 {%k1} {z}
+; AVX512BW-NEXT: vpsllvw %zmm1, %zmm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm3, %zmm2, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
+
+declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT: vpmovzxbw %ymm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+}
+
+
+declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32)
+
+define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
+; AVX512BW-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
+; AVX512BW: ## BB#0:
+; AVX512BW-NEXT: kmovd %edi, %k1
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm1 {%k1}
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm2 {%k1} {z}
+; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0
+; AVX512BW-NEXT: vpaddw %zmm2, %zmm1, %zmm1
+; AVX512BW-NEXT: vpaddw %zmm0, %zmm1, %zmm0
+; AVX512BW-NEXT: retq
+ %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
+ %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
+ %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
+ %res3 = add <32 x i16> %res, %res1
+ %res4 = add <32 x i16> %res3, %res2
+ ret <32 x i16> %res4
+} \ No newline at end of file
diff --git a/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
index 6b2cb432f1cd..4cbb9ba6c56a 100644
--- a/test/CodeGen/X86/avx512bwvl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512bwvl-intrinsics.ll
@@ -4714,3 +4714,424 @@ define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i8 %x1, <
%res4 = add <16 x i16> %res3, %res2
ret <16 x i16> %res4
}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i16, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i16 %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpshufd $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpshufd $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpshufd $3, %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[3,0,0,0]
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i16 3, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i16 3, <4 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i16 3, <4 x i32> %x2, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i16, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i16 %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpshufd $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vpshufd $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpshufd $3, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0,7,4,4,4]
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i16 3, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i16 3, <8 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i16 3, <8 x i32> %x2, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16>, i8, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i8 %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pshufh_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpshufhw $3, %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[0,1,2,3,7,4,4,4]
+; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i8 3, <8 x i16> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16>, i8, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i8 %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pshufh_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpshufhw $3, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
+; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i8 3, <16 x i16> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16>, i8, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i8 %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pshufl_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpshuflw $3, %xmm0, %xmm0
+; CHECK-NEXT: ## xmm0 = xmm0[3,0,0,0,4,5,6,7]
+; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i8 3, <8 x i16> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16>, i8, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i8 %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pshufl_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpshuflw $3, %ymm0, %ymm0
+; CHECK-NEXT: ## ymm0 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
+; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i8 3, <16 x i16> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav16_hi:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpsravw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_hi:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpsravw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+
+declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpsllw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpsllw %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i8, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i8 %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllw $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpsllw $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpsllw $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i8 3, <8 x i16> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i8 3, <8 x i16> %x2, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i8, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i8 %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %esi, %k1
+; CHECK-NEXT: vpsllw $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vpsllw $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpsllw $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i8 3, <16 x i16> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i8 3, <16 x i16> %x2, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psllv16_hi:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpsllvw %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_hi:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpsllvw %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxbw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxbw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbw %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovzxbw %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxbw %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbw %xmm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+
+declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8)
+
+define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxbw %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxbw %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbw %xmm0, %xmm0
+; CHECK-NEXT: vpaddw %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddw %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
+ %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
+ %res3 = add <8 x i16> %res, %res1
+ %res4 = add <8 x i16> %res3, %res2
+ ret <8 x i16> %res4
+}
+
+declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16)
+
+define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: kmovw %edi, %k1
+; CHECK-NEXT: vpmovsxbw %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxbw %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
+; CHECK-NEXT: vpaddw %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddw %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
+ %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
+ %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
+ %res3 = add <16 x i16> %res, %res1
+ %res4 = add <16 x i16> %res3, %res2
+ ret <16 x i16> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxdq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxdq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxdq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
diff --git a/test/CodeGen/X86/avx512vl-intrinsics.ll b/test/CodeGen/X86/avx512vl-intrinsics.ll
index 8ab34bd8c436..a4f3e666833a 100644
--- a/test/CodeGen/X86/avx512vl-intrinsics.ll
+++ b/test/CodeGen/X86/avx512vl-intrinsics.ll
@@ -6387,3 +6387,1036 @@ define <4 x i64>@test_int_x86_avx512_mask_psll_qi_256(<4 x i64> %x0, i8 %x1, <4
%res4 = add <4 x i64> %res3, %res2
ret <4 x i64> %res4
}
+
+define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_aligned_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps (%rdi), %ymm0
+; CHECK-NEXT: vmovaps (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: vmovaps (%rdi), %ymm1 {%k1} {z}
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
+ %res4 = fadd <8 x float> %res2, %res1
+ ret <8 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8)
+
+define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_unaligned_ps_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovups (%rdi), %ymm0
+; CHECK-NEXT: vmovups (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: vmovups (%rdi), %ymm1 {%k1} {z}
+; CHECK-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
+ %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
+ %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
+ %res4 = fadd <8 x float> %res2, %res1
+ ret <8 x float> %res4
+}
+
+declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8)
+
+define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_aligned_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovapd (%rdi), %ymm0
+; CHECK-NEXT: vmovapd (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: vmovapd (%rdi), %ymm1 {%k1} {z}
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
+ %res4 = fadd <4 x double> %res2, %res1
+ ret <4 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8)
+
+define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_unaligned_pd_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovupd (%rdi), %ymm0
+; CHECK-NEXT: vmovupd (%rdi), %ymm0 {%k1}
+; CHECK-NEXT: vmovupd (%rdi), %ymm1 {%k1} {z}
+; CHECK-NEXT: vaddpd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
+ %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
+ %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
+ %res4 = fadd <4 x double> %res2, %res1
+ ret <4 x double> %res4
+}
+
+declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8)
+
+define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_aligned_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovaps (%rdi), %xmm0
+; CHECK-NEXT: vmovaps (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: vmovaps (%rdi), %xmm1 {%k1} {z}
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
+ %res4 = fadd <4 x float> %res2, %res1
+ ret <4 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8)
+
+define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_unaligned_ps_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovups (%rdi), %xmm0
+; CHECK-NEXT: vmovups (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: vmovups (%rdi), %xmm1 {%k1} {z}
+; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
+ %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
+ %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
+ %res4 = fadd <4 x float> %res2, %res1
+ ret <4 x float> %res4
+}
+
+declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8)
+
+define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_aligned_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovapd (%rdi), %xmm0
+; CHECK-NEXT: vmovapd (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: vmovapd (%rdi), %xmm1 {%k1} {z}
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
+ %res4 = fadd <2 x double> %res2, %res1
+ ret <2 x double> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8)
+
+define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
+; CHECK-LABEL: test_mask_load_unaligned_pd_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vmovupd (%rdi), %xmm0
+; CHECK-NEXT: vmovupd (%rdi), %xmm0 {%k1}
+; CHECK-NEXT: vmovupd (%rdi), %xmm1 {%k1} {z}
+; CHECK-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
+ %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
+ %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
+ %res4 = fadd <2 x double> %res2, %res1
+ ret <2 x double> %res4
+}
+
+declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8)
+
+declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav4_si:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_si:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpsravd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpsravq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psrav_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpsravq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psllv2_di:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpsllvq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_di:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpsllvq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psllv4_si:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_si:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vpsllvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vprorvd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prorv_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vprorvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vprorvq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prorv_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vprorvq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i8, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprold $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vprold $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vprold $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i8, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i8 %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prol_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprold $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vprold $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vprold $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i8, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprolq $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vprolq $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vprolq $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i8, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prol_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprolq $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vprolq $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vprolq $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vprolvd %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prolv_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vprolvd %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm2 {%k1}
+; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm3 {%k1} {z}
+; CHECK-NEXT: vprolvq %xmm1, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_prolv_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm2 {%k1}
+; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm3 {%k1} {z}
+; CHECK-NEXT: vprolvq %ymm1, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i8, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i8 %x1, <4 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprord $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vprord $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vprord $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 %x3)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i8 3, <4 x i32> %x2, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i8, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pror_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprord $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vprord $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vprord $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 %x3)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> zeroinitializer, i8 %x3)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i8 3, <8 x i32> %x2, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i8, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i8 %x1, <2 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprorq $3, %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vprorq $3, %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vprorq $3, %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 %x3)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i8 3, <2 x i64> %x2, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i8, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i8 %x1, <4 x i64> %x2, i8 %x3) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pror_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %sil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vprorq $3, %ymm0, %ymm1 {%k1}
+; CHECK-NEXT: vprorq $3, %ymm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vprorq $3, %ymm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 %x3)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> zeroinitializer, i8 %x3)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i8 3, <4 x i64> %x2, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxbd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxbd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxbd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxbd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxbq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxbq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxbq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxbq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxbq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxdq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxdq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxdq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxdq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxdq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxdq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxwd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxwd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxwd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxwd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxwq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovzxwq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovzxwq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovzxwq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovzxwq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxbd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxbd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxbd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxbd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxbq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxbq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxbq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxbq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxbq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
+
+declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8)
+
+define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwd %xmm0, %xmm0
+; CHECK-NEXT: vpaddd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
+ %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
+ %res3 = add <4 x i32> %res, %res1
+ %res4 = add <4 x i32> %res3, %res2
+ ret <4 x i32> %res4
+}
+
+declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8)
+
+define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxwd %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxwd %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0
+; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
+ %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
+ %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
+ %res3 = add <8 x i32> %res, %res1
+ %res4 = add <8 x i32> %res3, %res2
+ ret <8 x i32> %res4
+}
+
+declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8)
+
+define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxwq %xmm0, %xmm1 {%k1}
+; CHECK-NEXT: vpmovsxwq %xmm0, %xmm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwq %xmm0, %xmm0
+; CHECK-NEXT: vpaddq %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
+ %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
+ %res3 = add <2 x i64> %res, %res1
+ %res4 = add <2 x i64> %res3, %res2
+ ret <2 x i64> %res4
+}
+
+declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8)
+
+define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
+; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
+; CHECK: ## BB#0:
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: kmovw %eax, %k1
+; CHECK-NEXT: vpmovsxwq %xmm0, %ymm1 {%k1}
+; CHECK-NEXT: vpmovsxwq %xmm0, %ymm2 {%k1} {z}
+; CHECK-NEXT: vpmovsxwq %xmm0, %ymm0
+; CHECK-NEXT: vpaddq %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: retq
+ %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
+ %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
+ %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
+ %res3 = add <4 x i64> %res, %res1
+ %res4 = add <4 x i64> %res3, %res2
+ ret <4 x i64> %res4
+}
diff --git a/test/CodeGen/X86/catchpad-lifetime.ll b/test/CodeGen/X86/catchpad-lifetime.ll
new file mode 100644
index 000000000000..dfd75334561f
--- /dev/null
+++ b/test/CodeGen/X86/catchpad-lifetime.ll
@@ -0,0 +1,91 @@
+; RUN: llc < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "x86_64-pc-windows-msvc"
+
+declare void @throw()
+
+declare i32 @__CxxFrameHandler3(...)
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %alloca2 = alloca i8*, align 4
+ %alloca1 = alloca i8*, align 4
+ store volatile i8* null, i8** %alloca1
+ invoke void @throw()
+ to label %unreachable unwind label %catch.dispatch
+
+; CHECK-LABEL: test1:
+; CHECK: movq $0, -16(%rbp)
+; CHECK: callq throw
+
+catch.dispatch: ; preds = %entry
+ %cs = catchswitch within none [label %catch.pad] unwind to caller
+
+catch.pad: ; preds = %catch.dispatch
+ %cp = catchpad within %cs [i8* null, i32 0, i8** %alloca1]
+ store volatile i8* null, i8** %alloca1
+ %bc1 = bitcast i8** %alloca1 to i8*
+ call void @llvm.lifetime.end(i64 4, i8* nonnull %bc1)
+ %bc2 = bitcast i8** %alloca2 to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %bc2)
+ store volatile i8* null, i8** %alloca1
+ unreachable
+
+; CHECK-LABEL: "?catch$2@?0?test1@4HA"
+; CHECK: movq $0, -16(%rbp)
+; CHECK: movq $0, -16(%rbp)
+; CHECK: ud2
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+; CHECK-LABEL: $cppxdata$test1:
+; CHECK: .long 32 # CatchObjOffset
+
+define void @test2() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %alloca2 = alloca i8*, align 4
+ %alloca1 = alloca i8*, align 4
+ store volatile i8* null, i8** %alloca1
+ invoke void @throw()
+ to label %unreachable unwind label %catch.dispatch
+
+; CHECK-LABEL: test2:
+; CHECK: movq $0, -16(%rbp)
+; CHECK: callq throw
+
+catch.dispatch: ; preds = %entry
+ %cs = catchswitch within none [label %catch.pad] unwind to caller
+
+catch.pad: ; preds = %catch.dispatch
+ %cp = catchpad within %cs [i8* null, i32 0, i8** null]
+ store volatile i8* null, i8** %alloca1
+ %bc1 = bitcast i8** %alloca1 to i8*
+ call void @llvm.lifetime.end(i64 4, i8* nonnull %bc1)
+ %bc2 = bitcast i8** %alloca2 to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %bc2)
+ store volatile i8* null, i8** %alloca1
+ unreachable
+
+; CHECK-LABEL: "?catch$2@?0?test2@4HA"
+; CHECK: movq $0, -16(%rbp)
+; CHECK: movq $0, -16(%rbp)
+; CHECK: ud2
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+; CHECK-LABEL: $cppxdata$test2:
+; CHECK: .long 0 # CatchObjOffset
+
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #0
+
+attributes #0 = { argmemonly nounwind }
diff --git a/test/CodeGen/X86/cxx_tlscc64.ll b/test/CodeGen/X86/cxx_tlscc64.ll
index c229521cc9a4..70fe501040bf 100644
--- a/test/CodeGen/X86/cxx_tlscc64.ll
+++ b/test/CodeGen/X86/cxx_tlscc64.ll
@@ -1,5 +1,9 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck --check-prefix=SHRINK %s
+; TLS function were wrongly model and after fixing that, shrink-wrapping
+; cannot help here. To achieve the expected lowering, we need to playing
+; tricks similar to AArch64 fast TLS calling convention (r255821).
+; Applying tricks on x86-64 similar to r255821.
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -enable-shrink-wrap=true | FileCheck %s
%struct.S = type { i8 }
@sg = internal thread_local global %struct.S zeroinitializer, align 1
@@ -12,51 +16,28 @@ declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*)
; Every GPR should be saved - except rdi, rax, and rsp
; CHECK-LABEL: _ZTW2sg
-; CHECK: pushq %r11
-; CHECK: pushq %r10
-; CHECK: pushq %r9
-; CHECK: pushq %r8
-; CHECK: pushq %rsi
-; CHECK: pushq %rdx
-; CHECK: pushq %rcx
-; CHECK: pushq %rbx
+; CHECK-NOT: pushq %r11
+; CHECK-NOT: pushq %r10
+; CHECK-NOT: pushq %r9
+; CHECK-NOT: pushq %r8
+; CHECK-NOT: pushq %rsi
+; CHECK-NOT: pushq %rdx
+; CHECK-NOT: pushq %rcx
+; CHECK-NOT: pushq %rbx
; CHECK: callq
; CHECK: jne
; CHECK: callq
; CHECK: tlv_atexit
; CHECK: callq
-; CHECK: popq %rbx
-; CHECK: popq %rcx
-; CHECK: popq %rdx
-; CHECK: popq %rsi
-; CHECK: popq %r8
-; CHECK: popq %r9
-; CHECK: popq %r10
-; CHECK: popq %r11
-; SHRINK-LABEL: _ZTW2sg
-; SHRINK: callq
-; SHRINK: jne
-; SHRINK: pushq %r11
-; SHRINK: pushq %r10
-; SHRINK: pushq %r9
-; SHRINK: pushq %r8
-; SHRINK: pushq %rsi
-; SHRINK: pushq %rdx
-; SHRINK: pushq %rcx
-; SHRINK: pushq %rbx
-; SHRINK: callq
-; SHRINK: tlv_atexit
-; SHRINK: popq %rbx
-; SHRINK: popq %rcx
-; SHRINK: popq %rdx
-; SHRINK: popq %rsi
-; SHRINK: popq %r8
-; SHRINK: popq %r9
-; SHRINK: popq %r10
-; SHRINK: popq %r11
-; SHRINK: LBB{{.*}}:
-; SHRINK: callq
-define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() {
+; CHECK-NOT: popq %rbx
+; CHECK-NOT: popq %rcx
+; CHECK-NOT: popq %rdx
+; CHECK-NOT: popq %rsi
+; CHECK-NOT: popq %r8
+; CHECK-NOT: popq %r9
+; CHECK-NOT: popq %r10
+; CHECK-NOT: popq %r11
+define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() nounwind {
%.b.i = load i1, i1* @__tls_guard, align 1
br i1 %.b.i, label %__tls_init.exit, label %init.i
diff --git a/test/CodeGen/X86/dagcombine-cse.ll b/test/CodeGen/X86/dagcombine-cse.ll
index be1dcff7ae85..bff0e64910bf 100644
--- a/test/CodeGen/X86/dagcombine-cse.ll
+++ b/test/CodeGen/X86/dagcombine-cse.ll
@@ -1,5 +1,5 @@
; REQUIRES: asserts
-; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 14
+; RUN: llc < %s -march=x86 -mattr=+sse2 -mtriple=i386-apple-darwin -stats 2>&1 | grep asm-printer | grep 13
define i32 @t(i8* %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind {
entry:
diff --git a/test/CodeGen/X86/f16c-intrinsics.ll b/test/CodeGen/X86/f16c-intrinsics.ll
index 485592aeac38..a78022ac5505 100644
--- a/test/CodeGen/X86/f16c-intrinsics.ll
+++ b/test/CodeGen/X86/f16c-intrinsics.ll
@@ -61,6 +61,18 @@ define <4 x float> @test_x86_vcvtps2ph_128_scalar(i64* %ptr) {
ret <4 x float> %res
}
+define <4 x float> @test_x86_vcvtps2ph_128_scalar2(i64* %ptr) {
+; CHECK-LABEL: test_x86_vcvtps2ph_128_scalar2:
+; CHECK-NOT: vmov
+; CHECK: vcvtph2ps (%
+
+ %load = load i64, i64* %ptr
+ %ins = insertelement <2 x i64> undef, i64 %load, i32 0
+ %bc = bitcast <2 x i64> %ins to <8 x i16>
+ %res = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %bc)
+ ret <4 x float> %res
+}
+
define void @test_x86_vcvtps2ph_256_m(<8 x i16>* nocapture %d, <8 x float> %a) nounwind {
entry:
; CHECK-LABEL: test_x86_vcvtps2ph_256_m:
diff --git a/test/CodeGen/X86/insertps-combine.ll b/test/CodeGen/X86/insertps-combine.ll
index 655f8f49f838..f2596b6347b9 100644
--- a/test/CodeGen/X86/insertps-combine.ll
+++ b/test/CodeGen/X86/insertps-combine.ll
@@ -109,3 +109,36 @@ define <4 x float> @shuffle_v4f32_0z6z(<4 x float> %A, <4 x float> %B) {
%vecinit4 = insertelement <4 x float> %vecinit3, float 0.000000e+00, i32 3
ret <4 x float> %vecinit4
}
+
+define float @extract_zero_insertps_z0z7(<4 x float> %a0, <4 x float> %a1) {
+; SSE-LABEL: extract_zero_insertps_z0z7:
+; SSE: # BB#0:
+; SSE-NEXT: xorps %xmm0, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extract_zero_insertps_z0z7:
+; AVX: # BB#0:
+; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 21)
+ %ext = extractelement <4 x float> %res, i32 0
+ ret float %ext
+}
+
+define float @extract_lane_insertps_5123(<4 x float> %a0, <4 x float> *%p1) {
+; SSE-LABEL: extract_lane_insertps_5123:
+; SSE: # BB#0:
+; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; SSE-NEXT: retq
+;
+; AVX-LABEL: extract_lane_insertps_5123:
+; AVX: # BB#0:
+; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; AVX-NEXT: retq
+ %a1 = load <4 x float>, <4 x float> *%p1
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %a0, <4 x float> %a1, i8 128)
+ %ext = extractelement <4 x float> %res, i32 0
+ ret float %ext
+}
+
+declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
diff --git a/test/CodeGen/X86/lea-opt.ll b/test/CodeGen/X86/lea-opt.ll
index 571f2d9084c4..8096bfabd6cf 100644
--- a/test/CodeGen/X86/lea-opt.ll
+++ b/test/CodeGen/X86/lea-opt.ll
@@ -129,3 +129,41 @@ sw.epilog: ; preds = %sw.bb.2, %sw.bb.1,
; CHECK: movl ${{[1-4]+}}, ([[REG2]])
; CHECK: movl ${{[1-4]+}}, ([[REG3]])
}
+
+define void @test4(i64 %x) nounwind minsize {
+entry:
+ %a = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 0
+ %tmp = load i32, i32* %a, align 4
+ %b = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 1
+ %tmp1 = load i32, i32* %b, align 4
+ %sub = sub i32 %tmp, %tmp1
+ %c = getelementptr inbounds [65 x %struct.anon1], [65 x %struct.anon1]* @arr1, i64 0, i64 %x, i32 2
+ %tmp2 = load i32, i32* %c, align 4
+ %add = add nsw i32 %sub, %tmp2
+ switch i32 %add, label %sw.epilog [
+ i32 1, label %sw.bb.1
+ i32 2, label %sw.bb.2
+ ]
+
+sw.bb.1: ; preds = %entry
+ store i32 111, i32* %b, align 4
+ store i32 222, i32* %c, align 4
+ br label %sw.epilog
+
+sw.bb.2: ; preds = %entry
+ store i32 333, i32* %b, align 4
+ store i32 444, i32* %c, align 4
+ br label %sw.epilog
+
+sw.epilog: ; preds = %sw.bb.2, %sw.bb.1, %entry
+ ret void
+; CHECK-LABEL: test4:
+; CHECK: leaq arr1+4({{.*}}), [[REG2:%[a-z]+]]
+; CHECK: movl -4([[REG2]]), {{.*}}
+; CHECK: subl ([[REG2]]), {{.*}}
+; CHECK: addl 4([[REG2]]), {{.*}}
+; CHECK: movl ${{[1-4]+}}, ([[REG2]])
+; CHECK: movl ${{[1-4]+}}, 4([[REG2]])
+; CHECK: movl ${{[1-4]+}}, ([[REG2]])
+; CHECK: movl ${{[1-4]+}}, 4([[REG2]])
+}
diff --git a/test/CodeGen/X86/pr13577.ll b/test/CodeGen/X86/pr13577.ll
index 2228fbbaa53b..691d75b0e501 100644
--- a/test/CodeGen/X86/pr13577.ll
+++ b/test/CodeGen/X86/pr13577.ll
@@ -18,3 +18,19 @@ define x86_fp80 @foo(x86_fp80 %a) {
}
declare x86_fp80 @copysignl(x86_fp80, x86_fp80) nounwind readnone
+
+; This would crash:
+; https://llvm.org/bugs/show_bug.cgi?id=26070
+
+define float @pr26070() {
+ %c = call float @copysignf(float 1.0, float undef) readnone
+ ret float %c
+
+; CHECK-LABEL: pr26070:
+; CHECK: andps
+; CHECK-NEXT: orps
+; CHECK-NEXT: retq
+}
+
+declare float @copysignf(float, float)
+
diff --git a/test/CodeGen/X86/scalar-int-to-fp.ll b/test/CodeGen/X86/scalar-int-to-fp.ll
index 93039859cdfb..4a16c3198aa5 100644
--- a/test/CodeGen/X86/scalar-int-to-fp.ll
+++ b/test/CodeGen/X86/scalar-int-to-fp.ll
@@ -74,9 +74,16 @@ define x86_fp80 @s32_to_x(i32 %a) nounwind {
}
; CHECK-LABEL: u64_to_f
+; AVX512_32: vmovq {{.*#+}} xmm0 = mem[0],zero
+; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
; AVX512_32: fildll
+
; AVX512_64: vcvtusi2ssq
+
+; SSE2_32: movq {{.*#+}} xmm0 = mem[0],zero
+; SSE2_32: movq %xmm0, {{[0-9]+}}(%esp)
; SSE2_32: fildll
+
; SSE2_64: cvtsi2ssq
; X87: fildll
define float @u64_to_f(i64 %a) nounwind {
@@ -95,6 +102,24 @@ define float @s64_to_f(i64 %a) nounwind {
ret float %r
}
+; CHECK-LABEL: s64_to_f_2
+; SSE2_32: movd %ecx, %xmm0
+; SSE2_32: movd %eax, %xmm1
+; SSE2_32: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
+; SSE2_32: fildll {{[0-9]+}}(%esp)
+
+; AVX512_32: vmovd %eax, %xmm0
+; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
+; AVX512_32: fildll {{[0-9]+}}(%esp)
+
+define float @s64_to_f_2(i64 %a) nounwind {
+ %a1 = add i64 %a, 5
+ %r = sitofp i64 %a1 to float
+ ret float %r
+}
+
; CHECK-LABEL: u64_to_d
; AVX512_32: vpunpckldq
; AVX512_64: vcvtusi2sdq
@@ -117,6 +142,24 @@ define double @s64_to_d(i64 %a) nounwind {
ret double %r
}
+; CHECK-LABEL: s64_to_d_2
+; SSE2_32: movd %ecx, %xmm0
+; SSE2_32: movd %eax, %xmm1
+; SSE2_32: punpckldq %xmm0, %xmm1
+; SSE2_32: movq %xmm1, {{[0-9]+}}(%esp)
+; SSE2_32: fildll
+
+; AVX512_32: vmovd %eax, %xmm0
+; AVX512_32: vpinsrd $1, %ecx, %xmm0, %xmm0
+; AVX512_32: vmovlpd %xmm0, {{[0-9]+}}(%esp)
+; AVX512_32: fildll
+
+define double @s64_to_d_2(i64 %a) nounwind {
+ %b = add i64 %a, 5
+ %f = sitofp i64 %b to double
+ ret double %f
+}
+
; CHECK-LABEL: u64_to_x
; CHECK: fildll
define x86_fp80 @u64_to_x(i64 %a) nounwind {
diff --git a/test/CodeGen/X86/shrinkwrap-hang.ll b/test/CodeGen/X86/shrinkwrap-hang.ll
new file mode 100644
index 000000000000..e1e4eefb3efa
--- /dev/null
+++ b/test/CodeGen/X86/shrinkwrap-hang.ll
@@ -0,0 +1,32 @@
+; RUN: llc %s -o - -enable-shrink-wrap=true | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+target triple = "i686-pc-linux"
+
+@b = global i32 1, align 4
+@a = common global i32 0, align 4
+
+declare void @fn1() #0
+
+; CHECK-LABEL: fn2:
+define void @fn2() #0 {
+entry:
+ %0 = load i32, i32* @b, align 4
+ %tobool = icmp eq i32 %0, 0
+ br i1 %tobool, label %if.end, label %lbl
+
+lbl: ; preds = %if.end, %entry
+ store i32 0, i32* @b, align 4
+ br label %if.end
+
+if.end: ; preds = %entry, %lbl
+ tail call void @fn1()
+ %1 = load i32, i32* @b, align 4
+ %tobool1 = icmp eq i32 %1, 0
+ br i1 %tobool1, label %if.end3, label %lbl
+
+if.end3: ; preds = %if.end
+ ret void
+}
+
+attributes #0 = { norecurse nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/CodeGen/X86/stack-folding-fp-sse42.ll b/test/CodeGen/X86/stack-folding-fp-sse42.ll
index 105115bc7d25..9f689cfe85e5 100644
--- a/test/CodeGen/X86/stack-folding-fp-sse42.ll
+++ b/test/CodeGen/X86/stack-folding-fp-sse42.ll
@@ -307,7 +307,7 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone
define i64 @stack_fold_cvtsd2si64_int(<2 x double> %a0) {
;CHECK-LABEL: stack_fold_cvtsd2si64_int
- ;CHECK: cvtsd2siq {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
+ ;CHECK: cvtsd2si {{-?[0-9]*}}(%rsp), %rax {{.*#+}} 16-byte Folded Reload
%1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %a0)
ret i64 %2
diff --git a/test/CodeGen/X86/statepoint-vector.ll b/test/CodeGen/X86/statepoint-vector.ll
new file mode 100644
index 000000000000..9d80e9217b49
--- /dev/null
+++ b/test/CodeGen/X86/statepoint-vector.ll
@@ -0,0 +1,162 @@
+; RUN: llc -mcpu=core-avx -debug-only=stackmaps < %s | FileCheck %s
+; REQUIRES: asserts
+
+target triple = "x86_64-pc-linux-gnu"
+
+; Can we lower a single vector?
+define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
+entry:
+; CHECK-LABEL: @test
+; CHECK: subq $24, %rsp
+; CHECK: movaps %xmm0, (%rsp)
+; CHECK: callq do_safepoint
+; CHECK: movaps (%rsp), %xmm0
+; CHECK: addq $24, %rsp
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj)
+ %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+ ret <2 x i8 addrspace(1)*> %obj.relocated
+}
+
+; Can we lower the base, derived pairs if both are vectors?
+define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" {
+entry:
+; CHECK-LABEL: @test2
+; CHECK: subq $40, %rsp
+; CHECK: movd %rdi, %xmm1
+; CHECK: pshufd $68, %xmm1, %xmm1 # xmm1 = xmm1[0,1,0,1]
+; CHECK: paddq %xmm0, %xmm1
+; CHECK: movdqa %xmm0, 16(%rsp)
+; CHECK: movdqa %xmm1, (%rsp)
+; CHECK: callq do_safepoint
+; CHECK: movaps (%rsp), %xmm0
+; CHECK: addq $40, %rsp
+ %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived)
+ %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 8) ; (%obj, %derived)
+ ret <2 x i8 addrspace(1)*> %derived.relocated
+}
+
+; Originally, this was just a variant of @test2 above, but it ends up
+; covering a bunch of interesting missed optimizations. Specifically:
+; - We waste a stack slot for a value that a backend transform pass
+; CSEd to another spilled one.
+; - We don't remove the testb even though it serves no purpose
+; - We could in principal reuse the argument memory (%rsi) and do away
+; with stack slots entirely.
+define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+entry:
+; CHECK-LABEL: @test3
+; CHECK: subq $40, %rsp
+; CHECK: testb $1, %dil
+; CHECK: movaps (%rsi), %xmm0
+; CHECK: movaps %xmm0, 16(%rsp)
+; CHECK: movaps %xmm0, (%rsp)
+; CHECK: callq do_safepoint
+; CHECK: movaps (%rsp), %xmm0
+; CHECK: addq $40, %rsp
+ br i1 %cnd, label %taken, label %untaken
+
+taken: ; preds = %entry
+ %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+untaken: ; preds = %entry
+ %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+ %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
+ %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base)
+ %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 7) ; (%obj.base, %obj)
+ %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*>
+ %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 8, i32 8) ; (%obj.base, %obj.base)
+ %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*>
+ ret <2 x i64 addrspace(1)*> %obj.relocated.casted
+}
+
+; Can we handle vector constants? At the moment, we don't appear to actually
+; get selection dag nodes for these.
+define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" {
+entry:
+; CHECK-LABEL: @test4
+; CHECK: subq $24, %rsp
+; CHECK: xorps %xmm0, %xmm0
+; CHECK: movaps %xmm0, (%rsp)
+; CHECK: callq do_safepoint
+; CHECK: movaps (%rsp), %xmm0
+; CHECK: addq $24, %rsp
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> zeroinitializer)
+ %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+ ret <2 x i8 addrspace(1)*> %obj.relocated
+}
+
+; Check that we can lower a constant typed as i128 correctly. Note that the
+; actual value is representable in 64 bits. We don't have a representation
+; of larger than 64 bit constant in the StackMap format.
+define void @test5() gc "statepoint-example" {
+entry:
+; CHECK-LABEL: @test5
+; CHECK: push
+; CHECK: callq do_safepoint
+; CHECK: pop
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 1, i128 0)
+ ret void
+}
+
+; CHECK: __LLVM_StackMaps:
+
+; CHECK: .Ltmp1-test
+; Check for the two spill slots
+; Stack Maps: Loc 3: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0]
+; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0]
+; CHECK: .byte 3
+; CHECK: .byte 16
+; CHECK: .short 7
+; CHECK: .long 0
+; CHECK: .byte 3
+; CHECK: .byte 16
+; CHECK: .short 7
+; CHECK: .long 0
+
+; CHECK: .Ltmp3-test2
+; Check for the two spill slots
+; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16]
+; Stack Maps: Loc 4: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0]
+; CHECK: .byte 3
+; CHECK: .byte 16
+; CHECK: .short 7
+; CHECK: .long 16
+; CHECK: .byte 3
+; CHECK: .byte 16
+; CHECK: .short 7
+; CHECK: .long 0
+
+; CHECK: .Ltmp5-test3
+; Check for the four spill slots
+; Stack Maps: Loc 3: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16]
+; Stack Maps: Loc 4: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16]
+; Stack Maps: Loc 5: Indirect 7+16 [encoding: .byte 3, .byte 16, .short 7, .int 16]
+; Stack Maps: Loc 6: Indirect 7+0 [encoding: .byte 3, .byte 16, .short 7, .int 0]
+; CHECK: .byte 3
+; CHECK: .byte 16
+; CHECK: .short 7
+; CHECK: .long 16
+; CHECK: .byte 3
+; CHECK: .byte 16
+; CHECK: .short 7
+; CHECK: .long 16
+; CHECK: .byte 3
+; CHECK: .byte 16
+; CHECK: .short 7
+; CHECK: .long 16
+; CHECK: .byte 3
+; CHECK: .byte 16
+; CHECK: .short 7
+; CHECK: .long 0
+
+declare void @do_safepoint()
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
+declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
diff --git a/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
index 1f36d064f873..dfc186bef052 100644
--- a/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
+++ b/test/CodeGen/X86/vec_uint_to_fp-fastmath.ll
@@ -6,6 +6,10 @@
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=CST
; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx2 \
; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512f \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=x86_64 -enable-unsafe-fp-math -mattr=+avx512vl \
+; RUN: | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512VL
; CST: [[MASKCSTADDR:.LCPI[0-9_]+]]:
; CST-NEXT: .long 65535 # 0xffff
@@ -58,6 +62,16 @@ define <4 x float> @test_uitofp_v4i32_to_v4f32(<4 x i32> %arg) {
; AVX2-NEXT: vcvtdq2ps %xmm0, %xmm0
; AVX2-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test_uitofp_v4i32_to_v4f32:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: test_uitofp_v4i32_to_v4f32:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vcvtudq2ps %xmm0, %xmm0
+; AVX512VL-NEXT: retq
%tmp = uitofp <4 x i32> %arg to <4 x float>
ret <4 x float> %tmp
}
@@ -125,6 +139,16 @@ define <8 x float> @test_uitofp_v8i32_to_v8f32(<8 x i32> %arg) {
; AVX2-NEXT: vcvtdq2ps %ymm0, %ymm0
; AVX2-NEXT: vaddps %ymm0, %ymm1, %ymm0
; AVX2-NEXT: retq
+;
+; AVX512F-LABEL: test_uitofp_v8i32_to_v8f32:
+; AVX512F: # BB#0:
+; AVX512F-NEXT: vcvtudq2ps %zmm0, %zmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: test_uitofp_v8i32_to_v8f32:
+; AVX512VL: # BB#0:
+; AVX512VL-NEXT: vcvtudq2ps %ymm0, %ymm0
+; AVX512VL-NEXT: retq
%tmp = uitofp <8 x i32> %arg to <8 x float>
ret <8 x float> %tmp
}
diff --git a/test/CodeGen/X86/version_directive.ll b/test/CodeGen/X86/version_directive.ll
new file mode 100644
index 000000000000..8e4e6dc70e61
--- /dev/null
+++ b/test/CodeGen/X86/version_directive.ll
@@ -0,0 +1,4 @@
+; RUN: llc -mtriple x86_64-apple-darwin15.0.0 -o - /dev/null | FileCheck %s
+; RUN: llc -mtriple x86_64-apple-macosx10.11.0 -o - /dev/null | FileCheck %s
+
+; CHECK: .macosx_version_min 10, 11
diff --git a/test/CodeGen/X86/x86-shrink-wrapping.ll b/test/CodeGen/X86/x86-shrink-wrapping.ll
index 34e56919468b..609e2cc1158c 100644
--- a/test/CodeGen/X86/x86-shrink-wrapping.ll
+++ b/test/CodeGen/X86/x86-shrink-wrapping.ll
@@ -878,4 +878,109 @@ for.end: ; preds = %for.cond.for.end_cr
declare i32 @varfunc(i8* nocapture readonly)
+@sum1 = external hidden thread_local global i32, align 4
+
+
+; Function Attrs: nounwind
+; Make sure the TLS call used to access @sum1 happens after the prologue
+; and before the epilogue.
+; TLS calls used to be wrongly model and shrink-wrapping would have inserted
+; the prologue and epilogue just around the call to doSomething.
+; PR25820.
+;
+; CHECK-LABEL: tlsCall:
+; CHECK: pushq
+; CHECK: testb $1, %dil
+; CHECK: je [[ELSE_LABEL:LBB[0-9_]+]]
+;
+; master bb
+; CHECK: movq _sum1@TLVP(%rip), %rdi
+; CHECK-NEXT: callq *(%rdi)
+; CHECK: jmp [[EXIT_LABEL:LBB[0-9_]+]]
+;
+; [[ELSE_LABEL]]:
+; CHECK: callq _doSomething
+;
+; [[EXIT_LABEL]]:
+; CHECK: popq
+; CHECK-NEXT: retq
+define i32 @tlsCall(i1 %bool1, i32 %arg, i32* readonly dereferenceable(4) %sum1) #3 {
+entry:
+ br i1 %bool1, label %master, label %else
+
+master:
+ %tmp1 = load i32, i32* %sum1, align 4
+ store i32 %tmp1, i32* @sum1, align 4
+ br label %exit
+
+else:
+ %call = call i32 @doSomething(i32 0, i32* null)
+ br label %exit
+
+exit:
+ %res = phi i32 [ %arg, %master], [ %call, %else ]
+ ret i32 %res
+}
+
attributes #3 = { nounwind }
+
+@irreducibleCFGa = common global i32 0, align 4
+@irreducibleCFGf = common global i8 0, align 1
+@irreducibleCFGb = common global i32 0, align 4
+
+; Check that we do not run shrink-wrapping on irreducible CFGs until
+; it is actually supported.
+; At the moment, on those CFGs the loop information may be incorrect
+; and since we use that information to do the placement, we may end up
+; inserting the prologue/epilogue at incorrect places.
+; PR25988.
+;
+; CHECK-LABEL: irreducibleCFG:
+; CHECK: %entry
+; Make sure the prologue happens in the entry block.
+; CHECK-NEXT: pushq
+; ...
+; Make sure the epilogue happens in the exit block.
+; CHECK-NOT: popq
+; CHECK: popq
+; CHECK-NEXT: popq
+; CHECK-NEXT: retq
+define i32 @irreducibleCFG() #4 {
+entry:
+ %i0 = load i32, i32* @irreducibleCFGa, align 4
+ %.pr = load i8, i8* @irreducibleCFGf, align 1
+ %bool = icmp eq i8 %.pr, 0
+ br i1 %bool, label %split, label %preheader
+
+preheader:
+ br label %preheader
+
+split:
+ %i1 = load i32, i32* @irreducibleCFGb, align 4
+ %tobool1.i = icmp ne i32 %i1, 0
+ br i1 %tobool1.i, label %for.body4.i, label %for.cond8.i.preheader
+
+for.body4.i:
+ %call.i = tail call i32 (...) @something(i32 %i0)
+ br label %for.cond8
+
+for.cond8:
+ %p1 = phi i32 [ %inc18.i, %for.inc ], [ 0, %for.body4.i ]
+ %.pr1.pr = load i32, i32* @irreducibleCFGb, align 4
+ br label %for.cond8.i.preheader
+
+for.cond8.i.preheader:
+ %.pr1 = phi i32 [ %.pr1.pr, %for.cond8 ], [ %i1, %split ]
+ %p13 = phi i32 [ %p1, %for.cond8 ], [ 0, %split ]
+ br label %for.inc
+
+fn1.exit:
+ ret i32 0
+
+for.inc:
+ %inc18.i = add nuw nsw i32 %p13, 1
+ %cmp = icmp slt i32 %inc18.i, 7
+ br i1 %cmp, label %for.cond8, label %fn1.exit
+}
+
+attributes #4 = { "no-frame-pointer-elim"="true" }
diff --git a/test/DebugInfo/COFF/asm.ll b/test/DebugInfo/COFF/asm.ll
index b67100c87fdb..1baeb7eddb75 100644
--- a/test/DebugInfo/COFF/asm.ll
+++ b/test/DebugInfo/COFF/asm.ll
@@ -57,11 +57,11 @@
; X86-NEXT: .long 3
; X86-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X86-NEXT: .long [[ASM_LINE]]-_f
-; X86-NEXT: .long 4
+; X86-NEXT: .long -2147483644
; X86-NEXT: .long [[CALL_LINE]]-_f
-; X86-NEXT: .long 5
+; X86-NEXT: .long -2147483643
; X86-NEXT: .long [[RETURN_STMT]]-_f
-; X86-NEXT: .long 6
+; X86-NEXT: .long -2147483642
; X86-NEXT: .short 0
; X86-NEXT: .short 0
; X86-NEXT: .short 0
@@ -110,17 +110,28 @@
; OBJ32-NEXT: Flags: 0x1
; OBJ32-NEXT: CodeSize: 0x6
; OBJ32-NEXT: FilenameSegment [
-; OBJ32-NEXT: Filename: D:\asm.c
+; OBJ32-NEXT: Filename: D:\asm.c
; FIXME: An empty __asm stmt creates an extra entry.
; We seem to know that these offsets are the same statically during the
; execution of endModule(). See PR18679 for the details.
-; OBJ32-NEXT: +0x0: 4
-; OBJ32-NEXT: +0x0: 5
-; OBJ32-NEXT: +0x5: 6
+; OBJ32-NEXT: +0x0 [
+; OBJ32-NEXT: LineNumberStart: 4
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
; OBJ32-NEXT: ColStart: 0
; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
+; OBJ32-NEXT: +0x0 [
+; OBJ32-NEXT: LineNumberStart: 5
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
; OBJ32-NEXT: ColStart: 0
; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
+; OBJ32-NEXT: +0x5 [
+; OBJ32-NEXT: LineNumberStart: 6
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
; OBJ32-NEXT: ColStart: 0
; OBJ32-NEXT: ColEnd: 0
; OBJ32-NEXT: ]
@@ -175,13 +186,13 @@
; X64-NEXT: .long 4
; X64-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X64-NEXT: .long [[START]]-f
-; X64-NEXT: .long 3
+; X64-NEXT: .long -2147483645
; X64-NEXT: .long [[ASM_LINE]]-f
-; X64-NEXT: .long 4
+; X64-NEXT: .long -2147483644
; X64-NEXT: .long [[CALL_LINE]]-f
-; X64-NEXT: .long 5
+; X64-NEXT: .long -2147483643
; X64-NEXT: .long [[EPILOG_AND_RET]]-f
-; X64-NEXT: .long 6
+; X64-NEXT: .long -2147483642
; X64-NEXT: .short 0
; X64-NEXT: .short 0
; X64-NEXT: .short 0
@@ -232,20 +243,36 @@
; OBJ64-NEXT: CodeSize: 0xE
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\asm.c
-; OBJ64-NEXT: +0x0: 3
; FIXME: An empty __asm stmt creates an extra entry.
; See PR18679 for the details.
-; OBJ64-NEXT: +0x4: 4
-; OBJ64-NEXT: +0x4: 5
-; OBJ64-NEXT: +0x9: 6
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: +0x0 [
+; OBJ64-NEXT: LineNumberStart: 3
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x4 [
+; OBJ64-NEXT: LineNumberStart: 4
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x4 [
+; OBJ64-NEXT: LineNumberStart: 5
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x9 [
+; OBJ64-NEXT: LineNumberStart: 6
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
diff --git a/test/DebugInfo/COFF/multifile.ll b/test/DebugInfo/COFF/multifile.ll
index 70bc0022cfb2..6758718e2fcc 100644
--- a/test/DebugInfo/COFF/multifile.ll
+++ b/test/DebugInfo/COFF/multifile.ll
@@ -65,7 +65,7 @@
; X86-NEXT: .long 1
; X86-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X86-NEXT: .long [[CALL_LINE_1]]-_f
-; X86-NEXT: .long 1
+; X86-NEXT: .long -2147483647
; X86-NEXT: .short 0
; X86-NEXT: .short 0
; X86-NEXT: [[FILE_SEGMENT_END]]:
@@ -75,7 +75,7 @@
; X86-NEXT: .long 1
; X86-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X86-NEXT: .long [[CALL_LINE_2]]-_f
-; X86-NEXT: .long 2
+; X86-NEXT: .long -2147483646
; X86-NEXT: .short 0
; X86-NEXT: .short 0
; X86-NEXT: [[FILE_SEGMENT_END]]:
@@ -85,9 +85,9 @@
; X86-NEXT: .long 2
; X86-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X86-NEXT: .long [[CALL_LINE_3]]-_f
-; X86-NEXT: .long 7
+; X86-NEXT: .long -2147483641
; X86-NEXT: .long [[RETURN_STMT]]-_f
-; X86-NEXT: .long 8
+; X86-NEXT: .long -2147483640
; X86-NEXT: .short 0
; X86-NEXT: .short 0
; X86-NEXT: .short 0
@@ -137,24 +137,40 @@
; OBJ32-NEXT: CodeSize: 0x10
; OBJ32-NEXT: FilenameSegment [
; OBJ32-NEXT: Filename: D:\one.c
-; OBJ32-NEXT: +0x0: 1
-; OBJ32-NEXT: ColStart: 0
-; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: +0x0 [
+; OBJ32-NEXT: LineNumberStart: 1
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 0
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
; OBJ32-NEXT: FilenameSegment [
; OBJ32-NEXT: Filename: D:\two.c
-; OBJ32-NEXT: +0x5: 2
-; OBJ32-NEXT: ColStart: 0
-; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: +0x5 [
+; OBJ32-NEXT: LineNumberStart: 2
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 0
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
; OBJ32-NEXT: FilenameSegment [
; OBJ32-NEXT: Filename: D:\one.c
-; OBJ32-NEXT: +0xA: 7
-; OBJ32-NEXT: +0xF: 8
-; OBJ32-NEXT: ColStart: 0
-; OBJ32-NEXT: ColEnd: 0
-; OBJ32-NEXT: ColStart: 0
-; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: +0xA [
+; OBJ32-NEXT: LineNumberStart: 7
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 0
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
+; OBJ32-NEXT: +0xF [
+; OBJ32-NEXT: LineNumberStart: 8
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 0
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
@@ -211,7 +227,7 @@
; X64-NEXT: .long 1
; X64-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X64-NEXT: .long [[START]]-f
-; X64-NEXT: .long 3
+; X64-NEXT: .long -2147483645
; X64-NEXT: .short 0
; X64-NEXT: .short 0
; X64-NEXT: [[FILE_SEGMENT_END]]:
@@ -221,7 +237,7 @@
; X64-NEXT: .long 1
; X64-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X64-NEXT: .long [[CALL_LINE_1]]-f
-; X64-NEXT: .long 1
+; X64-NEXT: .long -2147483647
; X64-NEXT: .short 0
; X64-NEXT: .short 0
; X64-NEXT: [[FILE_SEGMENT_END]]:
@@ -231,7 +247,7 @@
; X64-NEXT: .long 1
; X64-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X64-NEXT: .long [[CALL_LINE_2]]-f
-; X64-NEXT: .long 2
+; X64-NEXT: .long -2147483646
; X64-NEXT: .short 0
; X64-NEXT: .short 0
; X64-NEXT: [[FILE_SEGMENT_END]]:
@@ -241,9 +257,9 @@
; X64-NEXT: .long 2
; X64-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X64-NEXT: .long [[CALL_LINE_3]]-f
-; X64-NEXT: .long 7
+; X64-NEXT: .long -2147483641
; X64-NEXT: .long [[EPILOG_AND_RET]]-f
-; X64-NEXT: .long 8
+; X64-NEXT: .long -2147483640
; X64-NEXT: .short 0
; X64-NEXT: .short 0
; X64-NEXT: .short 0
@@ -297,30 +313,50 @@
; OBJ64-NEXT: CodeSize: 0x18
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\input.c
-; OBJ64-NEXT: +0x0: 3
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: +0x0 [
+; OBJ64-NEXT: LineNumberStart: 3
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\one.c
-; OBJ64-NEXT: +0x4: 1
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: +0x4 [
+; OBJ64-NEXT: LineNumberStart: 1
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\two.c
-; OBJ64-NEXT: +0x9: 2
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: +0x9 [
+; OBJ64-NEXT: LineNumberStart: 2
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\one.c
-; OBJ64-NEXT: +0xE: 7
-; OBJ64-NEXT: +0x13: 8
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: +0xE [
+; OBJ64-NEXT: LineNumberStart: 7
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x13 [
+; OBJ64-NEXT: LineNumberStart: 8
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
diff --git a/test/DebugInfo/COFF/multifunction.ll b/test/DebugInfo/COFF/multifunction.ll
index 4c044fa9c4fa..a292af3585d1 100644
--- a/test/DebugInfo/COFF/multifunction.ll
+++ b/test/DebugInfo/COFF/multifunction.ll
@@ -86,13 +86,13 @@
; X86-NEXT: .long 2
; X86-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X86-NEXT: .long [[X_CALL]]-_x
-; X86-NEXT: .long 4
+; X86-NEXT: .long -2147483644
; X86-NEXT: .long [[X_RETURN]]-_x
-; X86-NEXT: .long 5
-; X86-NEXT: .short 42
+; X86-NEXT: .long -2147483643
; X86-NEXT: .short 42
+; X86-NEXT: .short 0
; X86-NEXT: .short 43
-; X86-NEXT: .short 43
+; X86-NEXT: .short 0
; X86-NEXT: [[FILE_SEGMENT_END]]:
; X86-NEXT: [[F2_END]]:
; Symbol subsection for y
@@ -129,13 +129,13 @@
; X86-NEXT: .long 2
; X86-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X86-NEXT: .long [[Y_CALL]]-_y
-; X86-NEXT: .long 8
+; X86-NEXT: .long -2147483640
; X86-NEXT: .long [[Y_RETURN]]-_y
-; X86-NEXT: .long 9
-; X86-NEXT: .short 52
+; X86-NEXT: .long -2147483639
; X86-NEXT: .short 52
+; X86-NEXT: .short 0
; X86-NEXT: .short 53
-; X86-NEXT: .short 53
+; X86-NEXT: .short 0
; X86-NEXT: [[FILE_SEGMENT_END]]:
; X86-NEXT: [[F2_END]]:
; Symbol subsection for f
@@ -172,21 +172,21 @@
; X86-NEXT: .long 4
; X86-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X86-NEXT: .long [[F_CALLS_X]]-_f
-; X86-NEXT: .long 12
+; X86-NEXT: .long -2147483636
; X86-NEXT: .long [[F_CALLS_Y]]-_f
-; X86-NEXT: .long 13
+; X86-NEXT: .long -2147483635
; X86-NEXT: .long [[F_CALLS_Z]]-_f
-; X86-NEXT: .long 14
+; X86-NEXT: .long -2147483634
; X86-NEXT: .long [[F_RETURN]]-_f
-; X86-NEXT: .long 15
+; X86-NEXT: .long -2147483633
; X86-NEXT: .short 62
-; X86-NEXT: .short 62
-; X86-NEXT: .short 63
+; X86-NEXT: .short 0
; X86-NEXT: .short 63
+; X86-NEXT: .short 0
; X86-NEXT: .short 72
-; X86-NEXT: .short 72
-; X86-NEXT: .short 73
+; X86-NEXT: .short 0
; X86-NEXT: .short 73
+; X86-NEXT: .short 0
; X86-NEXT: [[FILE_SEGMENT_END]]:
; X86-NEXT: [[F2_END]]:
; File index to string table offset subsection
@@ -265,49 +265,82 @@
; OBJ32-NEXT: CodeSize: 0x6
; OBJ32-NEXT: FilenameSegment [
; OBJ32-NEXT: Filename: D:\source.c
-; OBJ32-NEXT: +0x0: 4
-; OBJ32-NEXT: +0x5: 5
-; OBJ32-NEXT: ColStart: 42
-; OBJ32-NEXT: ColEnd: 42
-; OBJ32-NEXT: ColStart: 43
-; OBJ32-NEXT: ColEnd: 43
+; OBJ32-NEXT: +0x0 [
+; OBJ32-NEXT: LineNumberStart: 4
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 42
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
+; OBJ32-NEXT: +0x5 [
+; OBJ32-NEXT: LineNumberStart: 5
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 43
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
; OBJ32-NEXT: FunctionLineTable [
-; OBJ32-NEXT: Name: _y
+; OBJ32-NEXT: LinkageName: _y
; OBJ32-NEXT: Flags: 0x1
; OBJ32-NEXT: CodeSize: 0x6
; OBJ32-NEXT: FilenameSegment [
; OBJ32-NEXT: Filename: D:\source.c
-; OBJ32-NEXT: +0x0: 8
-; OBJ32-NEXT: +0x5: 9
-; OBJ32-NEXT: ColStart: 52
-; OBJ32-NEXT: ColEnd: 52
-; OBJ32-NEXT: ColStart: 53
-; OBJ32-NEXT: ColEnd: 53
+; OBJ32-NEXT: +0x0 [
+; OBJ32-NEXT: LineNumberStart: 8
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 52
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
+; OBJ32-NEXT: +0x5 [
+; OBJ32-NEXT: LineNumberStart: 9
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 53
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
; OBJ32-NEXT: FunctionLineTable [
-; OBJ32-NEXT: Name: _f
+; OBJ32-NEXT: LinkageName: _f
; OBJ32-NEXT: Flags: 0x1
; OBJ32-NEXT: CodeSize: 0x10
; OBJ32-NEXT: FilenameSegment [
; OBJ32-NEXT: Filename: D:\source.c
-; OBJ32-NEXT: +0x0: 12
-; OBJ32-NEXT: +0x5: 13
-; OBJ32-NEXT: +0xA: 14
-; OBJ32-NEXT: +0xF: 15
-; OBJ32-NEXT: ColStart: 62
-; OBJ32-NEXT: ColEnd: 62
-; OBJ32-NEXT: ColStart: 63
-; OBJ32-NEXT: ColEnd: 63
-; OBJ32-NEXT: ColStart: 72
-; OBJ32-NEXT: ColEnd: 72
-; OBJ32-NEXT: ColStart: 73
-; OBJ32-NEXT: ColEnd: 73
+; OBJ32-NEXT: +0x0 [
+; OBJ32-NEXT: LineNumberStart: 12
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 62
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
+; OBJ32-NEXT: +0x5 [
+; OBJ32-NEXT: LineNumberStart: 13
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 63
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
+; OBJ32-NEXT: +0xA [
+; OBJ32-NEXT: LineNumberStart: 14
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 72
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
+; OBJ32-NEXT: +0xF [
+; OBJ32-NEXT: LineNumberStart: 15
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 73
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
+
; X64-LABEL: x:
; X64-NEXT: .L{{.*}}:
; X64-NEXT: [[X_START:.*]]:{{$}}
@@ -384,17 +417,17 @@
; X64-NEXT: .long 3
; X64-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X64-NEXT: .long [[X_START]]-x
-; X64-NEXT: .long 3
+; X64-NEXT: .long -2147483645
; X64-NEXT: .long [[X_CALL_LINE]]-x
-; X64-NEXT: .long 4
+; X64-NEXT: .long -2147483644
; X64-NEXT: .long [[X_EPILOG_AND_RET]]-x
-; X64-NEXT: .long 5
+; X64-NEXT: .long -2147483643
; X64-NEXT: .short 0
; X64-NEXT: .short 0
; X64-NEXT: .short 42
-; X64-NEXT: .short 42
-; X64-NEXT: .short 43
+; X64-NEXT: .short 0
; X64-NEXT: .short 43
+; X64-NEXT: .short 0
; X64-NEXT: [[FILE_SEGMENT_END]]:
; X64-NEXT: [[F2_END]]:
; Symbol subsection for y
@@ -431,17 +464,17 @@
; X64-NEXT: .long 3
; X64-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X64-NEXT: .long [[Y_START]]-y
-; X64-NEXT: .long 7
+; X64-NEXT: .long -2147483641
; X64-NEXT: .long [[Y_CALL_LINE]]-y
-; X64-NEXT: .long 8
+; X64-NEXT: .long -2147483640
; X64-NEXT: .long [[Y_EPILOG_AND_RET]]-y
-; X64-NEXT: .long 9
+; X64-NEXT: .long -2147483639
; X64-NEXT: .short 0
; X64-NEXT: .short 0
; X64-NEXT: .short 52
-; X64-NEXT: .short 52
-; X64-NEXT: .short 53
+; X64-NEXT: .short 0
; X64-NEXT: .short 53
+; X64-NEXT: .short 0
; X64-NEXT: [[FILE_SEGMENT_END]]:
; X64-NEXT: [[F2_END]]:
; Symbol subsection for f
@@ -478,25 +511,25 @@
; X64-NEXT: .long 5
; X64-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X64-NEXT: .long [[F_START]]-f
-; X64-NEXT: .long 11
+; X64-NEXT: .long -2147483637
; X64-NEXT: .long [[F_CALLS_X]]-f
-; X64-NEXT: .long 12
+; X64-NEXT: .long -2147483636
; X64-NEXT: .long [[F_CALLS_Y]]-f
-; X64-NEXT: .long 13
+; X64-NEXT: .long -2147483635
; X64-NEXT: .long [[F_CALLS_Z]]-f
-; X64-NEXT: .long 14
+; X64-NEXT: .long -2147483634
; X64-NEXT: .long [[F_EPILOG_AND_RET]]-f
-; X64-NEXT: .long 15
+; X64-NEXT: .long -2147483633
; X64-NEXT: .short 0
; X64-NEXT: .short 0
; X64-NEXT: .short 62
-; X64-NEXT: .short 62
-; X64-NEXT: .short 63
+; X64-NEXT: .short 0
; X64-NEXT: .short 63
+; X64-NEXT: .short 0
; X64-NEXT: .short 72
-; X64-NEXT: .short 72
-; X64-NEXT: .short 73
+; X64-NEXT: .short 0
; X64-NEXT: .short 73
+; X64-NEXT: .short 0
; X64-NEXT: [[FILE_SEGMENT_END]]:
; X64-NEXT: [[F2_END]]:
; File index to string table offset subsection
@@ -570,60 +603,104 @@
; OBJ64-NEXT: Type: 0xF2
; OBJ64: ]
; OBJ64: FunctionLineTable [
-; OBJ64-NEXT: Name: x
+; OBJ64-NEXT: LinkageName: x
; OBJ64-NEXT: Flags: 0x1
; OBJ64-NEXT: CodeSize: 0xE
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\source.c
-; OBJ64-NEXT: +0x0: 3
-; OBJ64-NEXT: +0x4: 4
-; OBJ64-NEXT: +0x9: 5
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
-; OBJ64-NEXT: ColStart: 42
-; OBJ64-NEXT: ColEnd: 42
-; OBJ64-NEXT: ColStart: 43
-; OBJ64-NEXT: ColEnd: 43
+; OBJ64-NEXT: +0x0 [
+; OBJ64-NEXT: LineNumberStart: 3
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x4 [
+; OBJ64-NEXT: LineNumberStart: 4
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 42
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x9 [
+; OBJ64-NEXT: LineNumberStart: 5
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 43
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: FunctionLineTable [
-; OBJ64-NEXT: Name: y
+; OBJ64-NEXT: LinkageName: y
; OBJ64-NEXT: Flags: 0x1
; OBJ64-NEXT: CodeSize: 0xE
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\source.c
-; OBJ64-NEXT: +0x0: 7
-; OBJ64-NEXT: +0x4: 8
-; OBJ64-NEXT: +0x9: 9
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
-; OBJ64-NEXT: ColStart: 52
-; OBJ64-NEXT: ColEnd: 52
-; OBJ64-NEXT: ColStart: 53
-; OBJ64-NEXT: ColEnd: 53
+; OBJ64-NEXT: +0x0 [
+; OBJ64-NEXT: LineNumberStart: 7
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x4 [
+; OBJ64-NEXT: LineNumberStart: 8
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 52
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x9 [
+; OBJ64-NEXT: LineNumberStart: 9
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 53
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: FunctionLineTable [
-; OBJ64-NEXT: Name: f
+; OBJ64-NEXT: LinkageName: f
; OBJ64-NEXT: Flags: 0x1
; OBJ64-NEXT: CodeSize: 0x18
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\source.c
-; OBJ64-NEXT: +0x0: 11
-; OBJ64-NEXT: +0x4: 12
-; OBJ64-NEXT: +0x9: 13
-; OBJ64-NEXT: +0xE: 14
-; OBJ64-NEXT: +0x13: 15
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
-; OBJ64-NEXT: ColStart: 62
-; OBJ64-NEXT: ColEnd: 62
-; OBJ64-NEXT: ColStart: 63
-; OBJ64-NEXT: ColEnd: 63
-; OBJ64-NEXT: ColStart: 72
-; OBJ64-NEXT: ColEnd: 72
-; OBJ64-NEXT: ColStart: 73
-; OBJ64-NEXT: ColEnd: 73
+; OBJ64-NEXT: +0x0 [
+; OBJ64-NEXT: LineNumberStart: 11
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x4 [
+; OBJ64-NEXT: LineNumberStart: 12
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 62
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x9 [
+; OBJ64-NEXT: LineNumberStart: 13
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 63
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0xE [
+; OBJ64-NEXT: LineNumberStart: 14
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 72
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x13 [
+; OBJ64-NEXT: LineNumberStart: 15
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 73
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
diff --git a/test/DebugInfo/COFF/simple.ll b/test/DebugInfo/COFF/simple.ll
index 2103df07f6dc..128b9efba545 100644
--- a/test/DebugInfo/COFF/simple.ll
+++ b/test/DebugInfo/COFF/simple.ll
@@ -55,9 +55,9 @@
; X86-NEXT: .long 2
; X86-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X86-NEXT: .long [[CALL_LINE]]-_f
-; X86-NEXT: .long 4
+; X86-NEXT: .long -2147483644
; X86-NEXT: .long [[RETURN_STMT]]-_f
-; X86-NEXT: .long 5
+; X86-NEXT: .long -2147483643
; X86-NEXT: .short 0
; X86-NEXT: .short 0
; X86-NEXT: .short 0
@@ -104,12 +104,20 @@
; OBJ32-NEXT: CodeSize: 0x6
; OBJ32-NEXT: FilenameSegment [
; OBJ32-NEXT: Filename: D:\test.c
-; OBJ32-NEXT: +0x0: 4
-; OBJ32-NEXT: +0x5: 5
-; OBJ32-NEXT: ColStart: 0
-; OBJ32-NEXT: ColEnd: 0
-; OBJ32-NEXT: ColStart: 0
-; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: +0x0 [
+; OBJ32-NEXT: LineNumberStart: 4
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 0
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
+; OBJ32-NEXT: +0x5 [
+; OBJ32-NEXT: LineNumberStart: 5
+; OBJ32-NEXT: LineNumberEndDelta: 0
+; OBJ32-NEXT: IsStatement: Yes
+; OBJ32-NEXT: ColStart: 0
+; OBJ32-NEXT: ColEnd: 0
+; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
; OBJ32-NEXT: ]
@@ -161,11 +169,11 @@
; X64-NEXT: .long 3
; X64-NEXT: .long [[FILE_SEGMENT_END:.*]]-[[FILE_SEGMENT_START]]
; X64-NEXT: .long [[START]]-f
-; X64-NEXT: .long 3
+; X64-NEXT: .long -2147483645
; X64-NEXT: .long [[CALL_LINE]]-f
-; X64-NEXT: .long 4
+; X64-NEXT: .long -2147483644
; X64-NEXT: .long [[EPILOG_AND_RET]]-f
-; X64-NEXT: .long 5
+; X64-NEXT: .long -2147483643
; X64-NEXT: .short 0
; X64-NEXT: .short 0
; X64-NEXT: .short 0
@@ -214,15 +222,27 @@
; OBJ64-NEXT: CodeSize: 0xE
; OBJ64-NEXT: FilenameSegment [
; OBJ64-NEXT: Filename: D:\test.c
-; OBJ64-NEXT: +0x0: 3
-; OBJ64-NEXT: +0x4: 4
-; OBJ64-NEXT: +0x9: 5
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
-; OBJ64-NEXT: ColStart: 0
-; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: +0x0 [
+; OBJ64-NEXT: LineNumberStart: 3
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x4 [
+; OBJ64-NEXT: LineNumberStart: 4
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
+; OBJ64-NEXT: +0x9 [
+; OBJ64-NEXT: LineNumberStart: 5
+; OBJ64-NEXT: LineNumberEndDelta: 0
+; OBJ64-NEXT: IsStatement: Yes
+; OBJ64-NEXT: ColStart: 0
+; OBJ64-NEXT: ColEnd: 0
+; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
; OBJ64-NEXT: ]
diff --git a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
index 28b8d2859efa..a21c8bb0d973 100644
--- a/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
+++ b/test/DebugInfo/COFF/tail-call-without-lexical-scopes.ll
@@ -30,7 +30,7 @@
; X86-NEXT: .long 1
; X86-NEXT: .long {{.*}}
; X86-NEXT: .long [[JMP_LINE]]-"?bar@@YAXHZZ"
-; X86-NEXT: .long 4
+; X86-NEXT: .long -2147483644
; X86-LABEL: .long 244
diff --git a/test/DebugInfo/X86/debug-macro.ll b/test/DebugInfo/X86/debug-macro.ll
new file mode 100644
index 000000000000..b79e2de5ca45
--- /dev/null
+++ b/test/DebugInfo/X86/debug-macro.ll
@@ -0,0 +1,67 @@
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=info - | FileCheck --check-prefix=CHECK-INFO %s
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=macro - | FileCheck --check-prefix=CHECK-MACRO %s
+; RUN: %llc_dwarf -O0 -filetype=obj < %s | llvm-dwarfdump -debug-dump=line - | FileCheck --check-prefix=CHECK-LINE %s
+
+
+; CHECK-INFO: .debug_info contents:
+; CHECK-INFO: DW_TAG_compile_unit
+; CHECK-INFO-NOT: DW_TAG
+; CHECK-INFO: DW_AT_name {{.*}}"debug-macro.cpp")
+; CHECK-INFO: DW_AT_macro_info {{.*}}(0x00000000)
+; CHECK-INFO: DW_TAG_compile_unit
+; CHECK-INFO-NOT: DW_TAG
+; CHECK-INFO: DW_AT_name {{.*}}"debug-macro1.cpp")
+; CHECK-INFO: DW_AT_macro_info {{.*}}(0x00000044)
+; CHECK-INFO: DW_TAG_compile_unit
+; CHECK-INFO-NOT: DW_TAG
+; CHECK-INFO: DW_AT_name {{.*}}"debug-macro2.cpp")
+; CHECK-INFO-NOT: DW_AT_macro_info
+
+; CHECK-MACRO: .debug_macinfo contents:
+; CHECK-MACRO-NEXT: DW_MACINFO_define - lineno: 0 macro: NameCMD ValueCMD
+; CHECK-MACRO-NEXT: DW_MACINFO_start_file - lineno: 0 filenum: 1
+; CHECK-MACRO-NEXT: DW_MACINFO_start_file - lineno: 9 filenum: 2
+; CHECK-MACRO-NEXT: DW_MACINFO_define - lineno: 1 macro: NameDef Value
+; CHECK-MACRO-NEXT: DW_MACINFO_undef - lineno: 11 macro: NameUndef
+; CHECK-MACRO-NEXT: DW_MACINFO_end_file
+; CHECK-MACRO-NEXT: DW_MACINFO_undef - lineno: 10 macro: NameUndef2
+; CHECK-MACRO-NEXT: DW_MACINFO_end_file
+; CHECK-MACRO-NEXT: DW_MACINFO_start_file - lineno: 0 filenum: 1
+; CHECK-MACRO-NEXT: DW_MACINFO_end_file
+
+; CHECK-LINE: .debug_line contents:
+; CHECK-LINE: Dir Mod Time File Len File Name
+; CHECK-LINE: file_names[ 1] {{.*}}debug-macro.cpp
+; CHECK-LINE: file_names[ 2] {{.*}}debug-macro.h
+; CHECK-LINE: Dir Mod Time File Len File Name
+; CHECK-LINE: file_names[ 1] {{.*}}debug-macro1.cpp
+
+!llvm.dbg.cu = !{!0, !16, !20}
+!llvm.module.flags = !{!13, !14}
+!llvm.ident = !{!15}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, macros: !3)
+!1 = !DIFile(filename: "debug-macro.cpp", directory: "/")
+!2 = !{}
+!3 = !{!4, !5}
+!4 = !DIMacro(type: DW_MACINFO_define, line: 0, name: "NameCMD", value: "ValueCMD")
+!5 = !DIMacroFile(line: 0, file: !1, nodes: !6)
+!6 = !{!7, !12}
+!7 = !DIMacroFile(line: 9, file: !8, nodes: !9)
+!8 = !DIFile(filename: "debug-macro.h", directory: "/")
+!9 = !{!10, !11}
+!10 = !DIMacro(type: DW_MACINFO_define, line: 1, name: "NameDef", value: "Value")
+!11 = !DIMacro(type: DW_MACINFO_undef, line: 11, name: "NameUndef")
+!12 = !DIMacro(type: DW_MACINFO_undef, line: 10, name: "NameUndef2")
+
+!13 = !{i32 2, !"Dwarf Version", i32 4}
+!14 = !{i32 1, !"Debug Info Version", i32 3}
+!15 = !{!"clang version 3.5.0 "}
+
+!16 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !17, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, macros: !18)
+!17 = !DIFile(filename: "debug-macro1.cpp", directory: "/")
+!18 = !{!19}
+!19 = !DIMacroFile(line: 0, file: !17, nodes: !2)
+
+!20 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !21, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2)
+!21 = !DIFile(filename: "debug-macro2.cpp", directory: "/")
diff --git a/test/DebugInfo/X86/debugger-tune.ll b/test/DebugInfo/X86/debugger-tune.ll
index b685612d4a47..19a37199ceb1 100644
--- a/test/DebugInfo/X86/debugger-tune.ll
+++ b/test/DebugInfo/X86/debugger-tune.ll
@@ -6,7 +6,7 @@
; Verify defaults for various targets.
; RUN: llc -mtriple=x86_64-scei-ps4 -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=SCE %s
; RUN: llc -mtriple=x86_64-apple-darwin12 -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=LLDB %s
-; RUN: llc -mtriple=x86_64-pc-freebsd -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=LLDB %s
+; RUN: llc -mtriple=x86_64-pc-freebsd -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=GDB %s
; RUN: llc -mtriple=x86_64-pc-linux -filetype=obj < %s | llvm-readobj -sections - | FileCheck --check-prefix=GDB %s
; We can override defaults.
diff --git a/test/DebugInfo/X86/tls.ll b/test/DebugInfo/X86/tls.ll
index 754ed25debc6..633096ba956e 100644
--- a/test/DebugInfo/X86/tls.ll
+++ b/test/DebugInfo/X86/tls.ll
@@ -14,7 +14,7 @@
; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=DARWIN --check-prefix=STDOP %s
; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-freebsd \
-; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=STDOP %s
+; RUN: | FileCheck --check-prefix=NOEMU --check-prefix=SINGLE --check-prefix=SINGLE-64 --check-prefix=GNUOP %s
; RUN: llc %s -o - -filetype=asm -O0 -mtriple=x86_64-unknown-linux-gnu -emulated-tls \
; RUN: | FileCheck --check-prefix=SINGLE --check-prefix=EMUSINGLE-64 \
diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
index c31572361d3d..7df88b1ec5e0 100644
--- a/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
+++ b/test/ExecutionEngine/MCJIT/remote/cross-module-a.ll
@@ -1,4 +1,5 @@
; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
declare i32 @FB()
diff --git a/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
deleted file mode 100644
index 415fd25409d6..000000000000
--- a/test/ExecutionEngine/MCJIT/remote/cross-module-sm-pic-a.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: %lli -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips-, mipsel-, i686, i386, arm
-
-declare i32 @FB()
-
-define i32 @FA() {
- ret i32 0
-}
-
-define i32 @main() {
- %r = call i32 @FB( ) ; <i32> [#uses=1]
- ret i32 %r
-}
-
diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
index 0fd363b4447a..d35418b19c7f 100644
--- a/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
+++ b/test/ExecutionEngine/MCJIT/remote/multi-module-a.ll
@@ -1,4 +1,5 @@
; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
declare i32 @FB()
diff --git a/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll b/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
deleted file mode 100644
index 3e020dc853a6..000000000000
--- a/test/ExecutionEngine/MCJIT/remote/multi-module-sm-pic-a.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: %lli -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips-, mipsel-, i686, i386, arm
-
-declare i32 @FB()
-
-define i32 @main() {
- %r = call i32 @FB( ) ; <i32> [#uses=1]
- ret i32 %r
-}
-
diff --git a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
index 30b4dd8e7abc..0d1a1ec6871a 100644
--- a/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/simpletest-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
define i32 @bar() nounwind {
ret i32 0
diff --git a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
index a834ac5c9850..31ed7523db43 100644
--- a/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/stubs-remote.ll
@@ -1,5 +1,5 @@
; RUN: %lli -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
-; XFAIL: *
+; XFAIL: win32
; This test should fail until remote symbol resolution is supported.
define i32 @main() nounwind {
diff --git a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
index fb7750adf497..bbeab10cd788 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-common-symbols-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
+; XFAIL: win32
; The intention of this test is to verify that symbols mapped to COMMON in ELF
; work as expected.
diff --git a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
index 435c21a4a9fa..0aa19b244c04 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-data-align-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
+; XFAIL: win32
; Check that a variable is always aligned as specified.
diff --git a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
index 6134b8b24641..13bac29a3628 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-fp-no-external-funcs-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
define double @test(double* %DP, double %Arg) nounwind {
%D = load double, double* %DP ; <double> [#uses=1]
diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
index 5b7999c067e4..5d5480e9d459 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
@count = global i32 1, align 4
diff --git a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
index e350b85a8bce..ef74fa02e6a9 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-global-init-nonzero-sm-pic.ll
@@ -1,5 +1,6 @@
-; RUN: %lli -remote-mcjit -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
+; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
+; RUN: -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32
@count = global i32 1, align 4
diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
index ad1af93ffdef..c2260fc2f1ff 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
+; XFAIL: win32
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
index 7162e927de0a..2a45472b25a1 100644
--- a/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
+++ b/test/ExecutionEngine/MCJIT/remote/test-ptr-reloc-sm-pic.ll
@@ -1,5 +1,6 @@
-; RUN: %lli -remote-mcjit -O0 -relocation-model=pic -code-model=small %s
-; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
+; RUN: %lli -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
+; RUN: -O0 -relocation-model=pic -code-model=small %s
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll
index 16553ebd2ade..249aad2d4b48 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/cross-module-a.ll
@@ -1,4 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
declare i32 @FB()
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-sm-pic-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/cross-module-sm-pic-a.ll
deleted file mode 100644
index 4326fc1e526b..000000000000
--- a/test/ExecutionEngine/OrcMCJIT/remote/cross-module-sm-pic-a.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/cross-module-b.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips-, mipsel-, i686, i386, arm
-
-declare i32 @FB()
-
-define i32 @FA() {
- ret i32 0
-}
-
-define i32 @main() {
- %r = call i32 @FB( ) ; <i32> [#uses=1]
- ret i32 %r
-}
-
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll
index aa3434862305..32c58ee6237b 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/multi-module-a.ll
@@ -1,4 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
declare i32 @FB()
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-sm-pic-a.ll b/test/ExecutionEngine/OrcMCJIT/remote/multi-module-sm-pic-a.ll
deleted file mode 100644
index 18a2d7e87d09..000000000000
--- a/test/ExecutionEngine/OrcMCJIT/remote/multi-module-sm-pic-a.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: %lli -jit-kind=orc-mcjit -extra-module=%p/Inputs/multi-module-b.ll -extra-module=%p/Inputs/multi-module-c.ll -disable-lazy-compilation=true -remote-mcjit -mcjit-remote-process=lli-child-target%exeext -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips-, mipsel-, i686, i386, arm
-
-declare i32 @FB()
-
-define i32 @main() {
- %r = call i32 @FB( ) ; <i32> [#uses=1]
- ret i32 %r
-}
-
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll
index 45e11fce5a6e..aaf3ebc9bc7f 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/simpletest-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
define i32 @bar() nounwind {
ret i32 0
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll
index aeff011d5edb..a0d941049c4a 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/stubs-remote.ll
@@ -1,5 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
-; XFAIL: *
+; XFAIL: win32
; This test should fail until remote symbol resolution is supported.
define i32 @main() nounwind {
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll
index 7e4dc056027c..9b4e2469665f 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/test-common-symbols-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -disable-lazy-compilation=false -mcjit-remote-process=lli-child-target%exeext %s
+; XFAIL: win32
; The intention of this test is to verify that symbols mapped to COMMON in ELF
; work as expected.
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll
index 95c9b825a8d3..88a561b613ef 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/test-data-align-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
+; XFAIL: win32
; Check that a variable is always aligned as specified.
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll
index 286710338841..484541ab4807 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/test-fp-no-external-funcs-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
define double @test(double* %DP, double %Arg) nounwind {
%D = load double, double* %DP ; <double> [#uses=1]
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll
index 5dabb8f44d68..adc3e944b639 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext %s > /dev/null
+; XFAIL: win32
@count = global i32 1, align 4
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll
index f1e93133b226..8ab3fd591388 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/test-global-init-nonzero-sm-pic.ll
@@ -1,5 +1,6 @@
-; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -relocation-model=pic -code-model=small %s > /dev/null
-; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
+; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
+; RUN: -relocation-model=pic -code-model=small %s > /dev/null
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32
@count = global i32 1, align 4
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll
index aa07db9f512d..a47c801e799b 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-remote.ll
@@ -1,4 +1,5 @@
; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -mcjit-remote-process=lli-child-target%exeext %s
+; XFAIL: win32
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll
index cac800ef5e76..210ac6f6ed1c 100644
--- a/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll
+++ b/test/ExecutionEngine/OrcMCJIT/remote/test-ptr-reloc-sm-pic.ll
@@ -1,5 +1,6 @@
-; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -O0 -relocation-model=pic -code-model=small %s
-; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386
+; RUN: %lli -jit-kind=orc-mcjit -remote-mcjit -mcjit-remote-process=lli-child-target%exeext \
+; RUN: -O0 -relocation-model=pic -code-model=small %s
+; XFAIL: mips-, mipsel-, aarch64, arm, i686, i386, win32
@.str = private unnamed_addr constant [6 x i8] c"data1\00", align 1
@ptr = global i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i32 0, i32 0), align 4
diff --git a/test/Feature/exception.ll b/test/Feature/exception.ll
index 2634692f4252..cbe2d0353cc3 100644
--- a/test/Feature/exception.ll
+++ b/test/Feature/exception.ll
@@ -43,7 +43,7 @@ entry:
invoke void @_Z3quxv() optsize
to label %exit unwind label %pad
cleanup:
- cleanupret from %cp unwind label %pad
+ cleanupret from %cp unwind to caller
pad:
%cp = cleanuppad within none []
br label %cleanup
@@ -57,7 +57,7 @@ entry:
invoke void @_Z3quxv() optsize
to label %exit unwind label %pad
cleanup:
- cleanupret from %0 unwind label %pad
+ cleanupret from %0 unwind to caller
pad:
%0 = cleanuppad within none []
br label %cleanup
diff --git a/test/Instrumentation/MemorySanitizer/origin-array.ll b/test/Instrumentation/MemorySanitizer/origin-array.ll
new file mode 100644
index 000000000000..d9936ff8e78a
--- /dev/null
+++ b/test/Instrumentation/MemorySanitizer/origin-array.ll
@@ -0,0 +1,23 @@
+; RUN: opt < %s -msan -msan-check-access-address=0 -msan-track-origins=2 -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-unknown-linux-gnu"
+
+; Check origin handling of array types.
+
+define void @foo([2 x i64] %v, [2 x i64]* %p) sanitize_memory {
+entry:
+ store [2 x i64] %v, [2 x i64]* %p, align 8
+ ret void
+}
+
+; CHECK-LABEL: @foo
+; CHECK: [[PARAM:%[01-9a-z]+]] = load {{.*}} @__msan_param_tls
+; CHECK: [[ORIGIN:%[01-9a-z]+]] = load {{.*}} @__msan_param_origin_tls
+
+; CHECK: [[TMP1:%[01-9a-z]+]] = ptrtoint
+; CHECK: [[TMP2:%[01-9a-z]+]] = xor i64 [[TMP1]]
+; CHECK: [[TMP3:%[01-9a-z]+]] = inttoptr i64 [[TMP2]] to [2 x i64]*
+; CHECK: store [2 x i64] [[PARAM]], [2 x i64]* [[TMP3]]
+
+; CHECK: {{.*}} call i32 @__msan_chain_origin(i32 {{.*}}[[ORIGIN]])
diff --git a/test/Linker/Inputs/pr26037.ll b/test/Linker/Inputs/pr26037.ll
new file mode 100644
index 000000000000..ed05b08089a4
--- /dev/null
+++ b/test/Linker/Inputs/pr26037.ll
@@ -0,0 +1,23 @@
+define i32 @main() #0 !dbg !4 {
+entry:
+ %retval = alloca i32, align 4
+ store i32 0, i32* %retval, align 4
+ ret i32 0, !dbg !11
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256934) (llvm/trunk 256936)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!1 = !DIFile(filename: "main.cc", directory: "")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.8.0 (trunk 256934) (llvm/trunk 256936)"}
+!11 = !DILocation(line: 1, column: 14, scope: !4)
diff --git a/test/Linker/pr26037.ll b/test/Linker/pr26037.ll
new file mode 100644
index 000000000000..aa089a8922c0
--- /dev/null
+++ b/test/Linker/pr26037.ll
@@ -0,0 +1,38 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-as %p/Inputs/pr26037.ll -o %t2.bc
+; RUN: llvm-link -S -only-needed %t2.bc %t.bc | FileCheck %s
+
+; CHECK: [[A:![0-9]+]] = distinct !DISubprogram(name: "a"
+; CHECK: [[B:![0-9]+]] = distinct !DISubprogram(name: "b"
+; CHECK: !DIImportedEntity({{.*}}, scope: [[B]], entity: [[A]]
+
+define void @_ZN1A1aEv() #0 !dbg !4 {
+entry:
+ ret void, !dbg !14
+}
+
+define void @_ZN1A1bEv() #0 !dbg !8 {
+entry:
+ ret void, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!11, !12}
+!llvm.ident = !{!13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256934) (llvm/trunk 256936)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, imports: !9)
+!1 = !DIFile(filename: "a2.cc", directory: "")
+!2 = !{}
+!3 = !{!4, !8}
+!4 = distinct !DISubprogram(name: "a", linkageName: "_ZN1A1aEv", scope: !5, file: !1, line: 7, type: !6, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = !DINamespace(name: "A", scope: null, file: !1, line: 1)
+!6 = !DISubroutineType(types: !7)
+!7 = !{null}
+!8 = distinct !DISubprogram(name: "b", linkageName: "_ZN1A1bEv", scope: !5, file: !1, line: 8, type: !6, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!9 = !{!10}
+!10 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !8, entity: !4, line: 8)
+!11 = !{i32 2, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{!"clang version 3.8.0 (trunk 256934) (llvm/trunk 256936)"}
+!14 = !DILocation(line: 7, column: 12, scope: !4)
+!15 = !DILocation(line: 8, column: 24, scope: !8)
diff --git a/test/MC/ARM/twice.ll b/test/MC/ARM/twice.ll
new file mode 100644
index 000000000000..8811632dd560
--- /dev/null
+++ b/test/MC/ARM/twice.ll
@@ -0,0 +1,9 @@
+; Check for state persistence bugs in the ARM MC backend
+; This should neither fail (in the comparison that the second object
+; is bit-identical to the first) nor crash. Either failure would most
+; likely indicate some state that is not properly reset in the
+; appropriate ::reset method.
+; RUN: llc -compile-twice -filetype=obj %s -o -
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv4t-unknown-linux-gnueabi"
diff --git a/test/MC/COFF/timestamp.s b/test/MC/COFF/timestamp.s
index a2761575789d..140225acf7e8 100644
--- a/test/MC/COFF/timestamp.s
+++ b/test/MC/COFF/timestamp.s
@@ -1,5 +1,4 @@
// RUN: llvm-mc -filetype=obj -triple i686-pc-win32 -incremental-linker-compatible %s -o - | llvm-readobj -h | FileCheck %s
-// REQUIRES: timestamps
// CHECK: ImageFileHeader {
// CHECK: TimeDateStamp:
diff --git a/test/MC/Disassembler/Mips/mips2/valid-mips2.txt b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
index 570f8ddb47c3..a0b766a5dc1b 100644
--- a/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
+++ b/test/MC/Disassembler/Mips/mips2/valid-mips2.txt
@@ -83,6 +83,7 @@
0x08 0x00 0x00 0x01 # CHECK: j 4
0x09 0x33 0x00 0x2a # CHECK: j 80478376
0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
diff --git a/test/MC/Disassembler/Mips/mips3/valid-mips3.txt b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
index db83b50aa01a..fb244e2f1543 100644
--- a/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
+++ b/test/MC/Disassembler/Mips/mips3/valid-mips3.txt
@@ -114,6 +114,7 @@
0x08 0x00 0x00 0x01 # CHECK: j 4
0x09 0x33 0x00 0x2a # CHECK: j 80478376
0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
diff --git a/test/MC/Disassembler/Mips/mips4/valid-mips4.txt b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
index 5e8253dfef2d..47ab90809ec6 100644
--- a/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
+++ b/test/MC/Disassembler/Mips/mips4/valid-mips4.txt
@@ -118,6 +118,7 @@
0x08 0x00 0x00 0x01 # CHECK: j 4
0x09 0x33 0x00 0x2a # CHECK: j 80478376
0x0b 0x2a 0xd1 0x44 # CHECK: j 212550928
+0x0c 0x00 0x01 0x4c # CHECK: jal 1328
0x21 0x08 0xff 0xfe # CHECK: addi $8, $8, -2
0x21 0x2d 0x66 0xd2 # CHECK: addi $13, $9, 26322
0x21 0xad 0xe6 0x90 # CHECK: addi $13, $13, -6512
diff --git a/test/MC/Disassembler/X86/avx-512.txt b/test/MC/Disassembler/X86/avx-512.txt
index d618e7e59335..9e57d4e7dd3b 100644
--- a/test/MC/Disassembler/X86/avx-512.txt
+++ b/test/MC/Disassembler/X86/avx-512.txt
@@ -137,5 +137,5 @@
# CHECK: vpcmpd $8, %zmm10, %zmm25, %k5
0x62 0xd3 0x35 0x40 0x1f 0xea 0x8
-# CHECK: vcmppd $127,{sae}, %zmm27, %zmm11, %k4
+# CHECK: vcmppd $127, {sae}, %zmm27, %zmm11, %k4
0x62 0x91 0xa5 0x58 0xc2 0xe3 0x7f
diff --git a/test/MC/Mips/mips1/valid.s b/test/MC/Mips/mips1/valid.s
index 80f0f8b047c6..5080e77cae10 100644
--- a/test/MC/Mips/mips1/valid.s
+++ b/test/MC/Mips/mips1/valid.s
@@ -46,6 +46,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
lb $24,-14515($10)
lbu $8,30195($v1)
lh $11,-8556($s5)
diff --git a/test/MC/Mips/mips2/valid.s b/test/MC/Mips/mips2/valid.s
index c57d386d9d05..026a3a28558a 100644
--- a/test/MC/Mips/mips2/valid.s
+++ b/test/MC/Mips/mips2/valid.s
@@ -62,6 +62,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
lb $24,-14515($10)
lbu $8,30195($v1)
ldc1 $f11,16391($s0)
diff --git a/test/MC/Mips/mips3/valid.s b/test/MC/Mips/mips3/valid.s
index cf51753712e6..2e62343c3342 100644
--- a/test/MC/Mips/mips3/valid.s
+++ b/test/MC/Mips/mips3/valid.s
@@ -117,6 +117,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
lb $24,-14515($10)
lbu $8,30195($v1)
ld $sp,-28645($s1)
diff --git a/test/MC/Mips/mips32/valid.s b/test/MC/Mips/mips32/valid.s
index 2fdbdfe65223..5f0769d92837 100644
--- a/test/MC/Mips/mips32/valid.s
+++ b/test/MC/Mips/mips32/valid.s
@@ -71,6 +71,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
lb $24,-14515($10)
lbu $8,30195($v1)
ldc1 $f11,16391($s0)
diff --git a/test/MC/Mips/mips32r2/valid.s b/test/MC/Mips/mips32r2/valid.s
index 7ebc60d946a8..d8e491ecd0b1 100644
--- a/test/MC/Mips/mips32r2/valid.s
+++ b/test/MC/Mips/mips32r2/valid.s
@@ -77,6 +77,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
jr.hb $4 # CHECK: jr.hb $4 # encoding: [0x00,0x80,0x04,0x08]
jalr.hb $4 # CHECK: jalr.hb $4 # encoding: [0x00,0x80,0xfc,0x09]
jalr.hb $4, $5 # CHECK: jalr.hb $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
diff --git a/test/MC/Mips/mips32r3/valid.s b/test/MC/Mips/mips32r3/valid.s
index 3431e1cbc8d5..93859dbd1d51 100644
--- a/test/MC/Mips/mips32r3/valid.s
+++ b/test/MC/Mips/mips32r3/valid.s
@@ -77,6 +77,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
jr.hb $4 # CHECK: jr.hb $4 # encoding: [0x00,0x80,0x04,0x08]
jalr.hb $4 # CHECK: jalr.hb $4 # encoding: [0x00,0x80,0xfc,0x09]
jalr.hb $4, $5 # CHECK: jalr.hb $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
diff --git a/test/MC/Mips/mips32r5/valid.s b/test/MC/Mips/mips32r5/valid.s
index 0c477f4fa2ae..e0c1711fbe6e 100644
--- a/test/MC/Mips/mips32r5/valid.s
+++ b/test/MC/Mips/mips32r5/valid.s
@@ -78,6 +78,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
jr.hb $4 # CHECK: jr.hb $4 # encoding: [0x00,0x80,0x04,0x08]
jalr.hb $4 # CHECK: jalr.hb $4 # encoding: [0x00,0x80,0xfc,0x09]
jalr.hb $4, $5 # CHECK: jalr.hb $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
diff --git a/test/MC/Mips/mips32r6/valid.s b/test/MC/Mips/mips32r6/valid.s
index 226acd96a5a6..e4786d0e4ec4 100644
--- a/test/MC/Mips/mips32r6/valid.s
+++ b/test/MC/Mips/mips32r6/valid.s
@@ -159,6 +159,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
jr.hb $4 # CHECK: jr.hb $4 # encoding: [0x00,0x80,0x04,0x09]
jalr.hb $4 # CHECK: jalr.hb $4 # encoding: [0x00,0x80,0xfc,0x09]
jalr.hb $4, $5 # CHECK: jalr.hb $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
diff --git a/test/MC/Mips/mips4/valid.s b/test/MC/Mips/mips4/valid.s
index 9bf98d1c29ff..fcea8ead8a7d 100644
--- a/test/MC/Mips/mips4/valid.s
+++ b/test/MC/Mips/mips4/valid.s
@@ -121,6 +121,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
lb $24,-14515($10)
lbu $8,30195($v1)
ld $sp,-28645($s1)
diff --git a/test/MC/Mips/mips5/valid.s b/test/MC/Mips/mips5/valid.s
index cb30de38c295..36f3ce14dc63 100644
--- a/test/MC/Mips/mips5/valid.s
+++ b/test/MC/Mips/mips5/valid.s
@@ -121,6 +121,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
lb $24,-14515($10)
lbu $8,30195($v1)
ld $sp,-28645($s1)
diff --git a/test/MC/Mips/mips64/valid.s b/test/MC/Mips/mips64/valid.s
index 24ed1ffc8d60..92afbb395465 100644
--- a/test/MC/Mips/mips64/valid.s
+++ b/test/MC/Mips/mips64/valid.s
@@ -128,6 +128,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
lb $24,-14515($10)
lbu $8,30195($v1)
ld $sp,-28645($s1)
diff --git a/test/MC/Mips/mips64r2/valid.s b/test/MC/Mips/mips64r2/valid.s
index e571d9365913..9057fcef60b4 100644
--- a/test/MC/Mips/mips64r2/valid.s
+++ b/test/MC/Mips/mips64r2/valid.s
@@ -141,6 +141,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
jr.hb $4 # CHECK: jr.hb $4 # encoding: [0x00,0x80,0x04,0x08]
jalr.hb $4 # CHECK: jalr.hb $4 # encoding: [0x00,0x80,0xfc,0x09]
jalr.hb $4, $5 # CHECK: jalr.hb $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
diff --git a/test/MC/Mips/mips64r3/valid.s b/test/MC/Mips/mips64r3/valid.s
index 4bde82eb8ec1..4e2717bdd15d 100644
--- a/test/MC/Mips/mips64r3/valid.s
+++ b/test/MC/Mips/mips64r3/valid.s
@@ -141,6 +141,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
jr.hb $4 # CHECK: jr.hb $4 # encoding: [0x00,0x80,0x04,0x08]
jalr.hb $4 # CHECK: jalr.hb $4 # encoding: [0x00,0x80,0xfc,0x09]
jalr.hb $4, $5 # CHECK: jalr.hb $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
diff --git a/test/MC/Mips/mips64r5/valid.s b/test/MC/Mips/mips64r5/valid.s
index 029dfa9438c7..574dc60163db 100644
--- a/test/MC/Mips/mips64r5/valid.s
+++ b/test/MC/Mips/mips64r5/valid.s
@@ -142,6 +142,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
jr.hb $4 # CHECK: jr.hb $4 # encoding: [0x00,0x80,0x04,0x08]
jalr.hb $4 # CHECK: jalr.hb $4 # encoding: [0x00,0x80,0xfc,0x09]
jalr.hb $4, $5 # CHECK: jalr.hb $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
diff --git a/test/MC/Mips/mips64r6/valid.s b/test/MC/Mips/mips64r6/valid.s
index 0b4b6b187afd..cbe3e8232dce 100644
--- a/test/MC/Mips/mips64r6/valid.s
+++ b/test/MC/Mips/mips64r6/valid.s
@@ -138,6 +138,7 @@ a:
j a # CHECK: j a # encoding: [0b000010AA,A,A,A]
# CHECK: # fixup A - offset: 0, value: a, kind: fixup_Mips_26
j 1328 # CHECK: j 1328 # encoding: [0x08,0x00,0x01,0x4c]
+ jal 21100 # CHECK: jal 21100 # encoding: [0x0c,0x00,0x14,0x9b]
jr.hb $4 # CHECK: jr.hb $4 # encoding: [0x00,0x80,0x04,0x09]
jalr.hb $4 # CHECK: jalr.hb $4 # encoding: [0x00,0x80,0xfc,0x09]
jalr.hb $4, $5 # CHECK: jalr.hb $4, $5 # encoding: [0x00,0xa0,0x24,0x09]
diff --git a/test/MC/X86/avx512-encodings.s b/test/MC/X86/avx512-encodings.s
index 658ca715a32a..7191058692eb 100644
--- a/test/MC/X86/avx512-encodings.s
+++ b/test/MC/X86/avx512-encodings.s
@@ -6184,7 +6184,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab]
vcmppd $0xab, %zmm26, %zmm12, %k2 {%k3}
-// CHECK: vcmppd $171,{sae}, %zmm26, %zmm12, %k2
+// CHECK: vcmppd $171, {sae}, %zmm26, %zmm12, %k2
// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab]
vcmppd $0xab,{sae}, %zmm26, %zmm12, %k2
@@ -6192,7 +6192,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b]
vcmppd $0x7b, %zmm26, %zmm12, %k2
-// CHECK: vcmppd $123,{sae}, %zmm26, %zmm12, %k2
+// CHECK: vcmppd $123, {sae}, %zmm26, %zmm12, %k2
// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b]
vcmppd $0x7b,{sae}, %zmm26, %zmm12, %k2
@@ -6248,7 +6248,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab]
vcmpps $0xab, %zmm22, %zmm17, %k2 {%k3}
-// CHECK: vcmpps $171,{sae}, %zmm22, %zmm17, %k2
+// CHECK: vcmpps $171, {sae}, %zmm22, %zmm17, %k2
// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab]
vcmpps $0xab,{sae}, %zmm22, %zmm17, %k2
@@ -6256,7 +6256,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b]
vcmpps $0x7b, %zmm22, %zmm17, %k2
-// CHECK: vcmpps $123,{sae}, %zmm22, %zmm17, %k2
+// CHECK: vcmpps $123, {sae}, %zmm22, %zmm17, %k2
// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b]
vcmpps $0x7b,{sae}, %zmm22, %zmm17, %k2
@@ -7812,7 +7812,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x73,0x2d,0xc4,0x54,0xfa,0xab]
vfixupimmps $0xab, %zmm2, %zmm26, %zmm15 {%k4} {z}
-// CHECK: vfixupimmps $171,{sae}, %zmm2, %zmm26, %zmm15
+// CHECK: vfixupimmps $171, {sae}, %zmm2, %zmm26, %zmm15
// CHECK: encoding: [0x62,0x73,0x2d,0x10,0x54,0xfa,0xab]
vfixupimmps $0xab,{sae}, %zmm2, %zmm26, %zmm15
@@ -7820,7 +7820,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x54,0xfa,0x7b]
vfixupimmps $0x7b, %zmm2, %zmm26, %zmm15
-// CHECK: vfixupimmps $123,{sae}, %zmm2, %zmm26, %zmm15
+// CHECK: vfixupimmps $123, {sae}, %zmm2, %zmm26, %zmm15
// CHECK: encoding: [0x62,0x73,0x2d,0x10,0x54,0xfa,0x7b]
vfixupimmps $0x7b,{sae}, %zmm2, %zmm26, %zmm15
@@ -7880,7 +7880,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x33,0xd5,0xc2,0x54,0xcb,0xab]
vfixupimmpd $0xab, %zmm19, %zmm21, %zmm9 {%k2} {z}
-// CHECK: vfixupimmpd $171,{sae}, %zmm19, %zmm21, %zmm9
+// CHECK: vfixupimmpd $171, {sae}, %zmm19, %zmm21, %zmm9
// CHECK: encoding: [0x62,0x33,0xd5,0x10,0x54,0xcb,0xab]
vfixupimmpd $0xab,{sae}, %zmm19, %zmm21, %zmm9
@@ -7888,7 +7888,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x33,0xd5,0x40,0x54,0xcb,0x7b]
vfixupimmpd $0x7b, %zmm19, %zmm21, %zmm9
-// CHECK: vfixupimmpd $123,{sae}, %zmm19, %zmm21, %zmm9
+// CHECK: vfixupimmpd $123, {sae}, %zmm19, %zmm21, %zmm9
// CHECK: encoding: [0x62,0x33,0xd5,0x10,0x54,0xcb,0x7b]
vfixupimmpd $0x7b,{sae}, %zmm19, %zmm21, %zmm9
@@ -7948,7 +7948,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x13,0x6d,0x85,0x55,0xfc,0xab]
vfixupimmss $0xab, %xmm28, %xmm18, %xmm15 {%k5} {z}
-// CHECK: vfixupimmss $171,{sae}, %xmm28, %xmm18, %xmm15
+// CHECK: vfixupimmss $171, {sae}, %xmm28, %xmm18, %xmm15
// CHECK: encoding: [0x62,0x13,0x6d,0x10,0x55,0xfc,0xab]
vfixupimmss $0xab,{sae}, %xmm28, %xmm18, %xmm15
@@ -7956,7 +7956,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x13,0x6d,0x00,0x55,0xfc,0x7b]
vfixupimmss $0x7b, %xmm28, %xmm18, %xmm15
-// CHECK: vfixupimmss $123,{sae}, %xmm28, %xmm18, %xmm15
+// CHECK: vfixupimmss $123, {sae}, %xmm28, %xmm18, %xmm15
// CHECK: encoding: [0x62,0x13,0x6d,0x10,0x55,0xfc,0x7b]
vfixupimmss $0x7b,{sae}, %xmm28, %xmm18, %xmm15
@@ -7996,7 +7996,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x73,0xad,0x86,0x55,0xed,0xab]
vfixupimmsd $0xab, %xmm5, %xmm26, %xmm13 {%k6} {z}
-// CHECK: vfixupimmsd $171,{sae}, %xmm5, %xmm26, %xmm13
+// CHECK: vfixupimmsd $171, {sae}, %xmm5, %xmm26, %xmm13
// CHECK: encoding: [0x62,0x73,0xad,0x10,0x55,0xed,0xab]
vfixupimmsd $0xab,{sae}, %xmm5, %xmm26, %xmm13
@@ -8004,7 +8004,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xed,0x7b]
vfixupimmsd $0x7b, %xmm5, %xmm26, %xmm13
-// CHECK: vfixupimmsd $123,{sae}, %xmm5, %xmm26, %xmm13
+// CHECK: vfixupimmsd $123, {sae}, %xmm5, %xmm26, %xmm13
// CHECK: encoding: [0x62,0x73,0xad,0x10,0x55,0xed,0x7b]
vfixupimmsd $0x7b,{sae}, %xmm5, %xmm26, %xmm13
@@ -12962,7 +12962,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xe3,0xfd,0xc9,0x09,0xf7,0xab]
vrndscalepd $0xab, %zmm7, %zmm22 {%k1} {z}
-// CHECK: vrndscalepd $171,{sae}, %zmm7, %zmm22
+// CHECK: vrndscalepd $171, {sae}, %zmm7, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0xab]
vrndscalepd $0xab,{sae}, %zmm7, %zmm22
@@ -12970,7 +12970,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xe3,0xfd,0x48,0x09,0xf7,0x7b]
vrndscalepd $0x7b, %zmm7, %zmm22
-// CHECK: vrndscalepd $123,{sae}, %zmm7, %zmm22
+// CHECK: vrndscalepd $123, {sae}, %zmm7, %zmm22
// CHECK: encoding: [0x62,0xe3,0xfd,0x18,0x09,0xf7,0x7b]
vrndscalepd $0x7b,{sae}, %zmm7, %zmm22
@@ -13030,7 +13030,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x73,0x7d,0xc9,0x08,0xef,0xab]
vrndscaleps $0xab, %zmm7, %zmm13 {%k1} {z}
-// CHECK: vrndscaleps $171,{sae}, %zmm7, %zmm13
+// CHECK: vrndscaleps $171, {sae}, %zmm7, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0xab]
vrndscaleps $0xab,{sae}, %zmm7, %zmm13
@@ -13038,7 +13038,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x73,0x7d,0x48,0x08,0xef,0x7b]
vrndscaleps $0x7b, %zmm7, %zmm13
-// CHECK: vrndscaleps $123,{sae}, %zmm7, %zmm13
+// CHECK: vrndscaleps $123, {sae}, %zmm7, %zmm13
// CHECK: encoding: [0x62,0x73,0x7d,0x18,0x08,0xef,0x7b]
vrndscaleps $0x7b,{sae}, %zmm7, %zmm13
@@ -14966,7 +14966,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xd1,0x06,0x0d,0xc2,0xe4,0xab]
vcmpss $0xab, %xmm12, %xmm15, %k4 {%k5}
-// CHECK: vcmpss $171,{sae}, %xmm12, %xmm15, %k4
+// CHECK: vcmpss $171, {sae}, %xmm12, %xmm15, %k4
// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0xab]
vcmpss $0xab,{sae}, %xmm12, %xmm15, %k4
@@ -14974,7 +14974,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xd1,0x06,0x08,0xc2,0xe4,0x7b]
vcmpss $0x7b, %xmm12, %xmm15, %k4
-// CHECK: vcmpss $123,{sae}, %xmm12, %xmm15, %k4
+// CHECK: vcmpss $123, {sae}, %xmm12, %xmm15, %k4
// CHECK: encoding: [0x62,0xd1,0x06,0x18,0xc2,0xe4,0x7b]
vcmpss $0x7b,{sae}, %xmm12, %xmm15, %k4
@@ -15010,7 +15010,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xf1,0xe7,0x01,0xc2,0xec,0xab]
vcmpsd $0xab, %xmm4, %xmm19, %k5 {%k1}
-// CHECK: vcmpsd $171,{sae}, %xmm4, %xmm19, %k5
+// CHECK: vcmpsd $171, {sae}, %xmm4, %xmm19, %k5
// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0xab]
vcmpsd $0xab,{sae}, %xmm4, %xmm19, %k5
@@ -15018,7 +15018,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xf1,0xe7,0x00,0xc2,0xec,0x7b]
vcmpsd $0x7b, %xmm4, %xmm19, %k5
-// CHECK: vcmpsd $123,{sae}, %xmm4, %xmm19, %k5
+// CHECK: vcmpsd $123, {sae}, %xmm4, %xmm19, %k5
// CHECK: encoding: [0x62,0xf1,0xe7,0x10,0xc2,0xec,0x7b]
vcmpsd $0x7b,{sae}, %xmm4, %xmm19, %k5
@@ -15518,7 +15518,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xd3,0x6d,0x8f,0x27,0xdc,0xab]
vgetmantss $0xab, %xmm12, %xmm2, %xmm3 {%k7} {z}
-// CHECK: vgetmantss $171,{sae}, %xmm12, %xmm2, %xmm3
+// CHECK: vgetmantss $171, {sae}, %xmm12, %xmm2, %xmm3
// CHECK: encoding: [0x62,0xd3,0x6d,0x18,0x27,0xdc,0xab]
vgetmantss $0xab,{sae}, %xmm12, %xmm2, %xmm3
@@ -15526,7 +15526,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xd3,0x6d,0x08,0x27,0xdc,0x7b]
vgetmantss $0x7b, %xmm12, %xmm2, %xmm3
-// CHECK: vgetmantss $123,{sae}, %xmm12, %xmm2, %xmm3
+// CHECK: vgetmantss $123, {sae}, %xmm12, %xmm2, %xmm3
// CHECK: encoding: [0x62,0xd3,0x6d,0x18,0x27,0xdc,0x7b]
vgetmantss $0x7b,{sae}, %xmm12, %xmm2, %xmm3
@@ -15566,7 +15566,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xd3,0xa5,0x86,0x27,0xd8,0xab]
vgetmantsd $0xab, %xmm8, %xmm27, %xmm3 {%k6} {z}
-// CHECK: vgetmantsd $171,{sae}, %xmm8, %xmm27, %xmm3
+// CHECK: vgetmantsd $171, {sae}, %xmm8, %xmm27, %xmm3
// CHECK: encoding: [0x62,0xd3,0xa5,0x10,0x27,0xd8,0xab]
vgetmantsd $0xab,{sae}, %xmm8, %xmm27, %xmm3
@@ -15574,7 +15574,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0xd3,0xa5,0x00,0x27,0xd8,0x7b]
vgetmantsd $0x7b, %xmm8, %xmm27, %xmm3
-// CHECK: vgetmantsd $123,{sae}, %xmm8, %xmm27, %xmm3
+// CHECK: vgetmantsd $123, {sae}, %xmm8, %xmm27, %xmm3
// CHECK: encoding: [0x62,0xd3,0xa5,0x10,0x27,0xd8,0x7b]
vgetmantsd $0x7b,{sae}, %xmm8, %xmm27, %xmm3
@@ -15614,7 +15614,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x83,0x7d,0xcb,0x26,0xf4,0xab]
vgetmantps $0xab, %zmm28, %zmm22 {%k3} {z}
-// CHECK: vgetmantps $171,{sae}, %zmm28, %zmm22
+// CHECK: vgetmantps $171, {sae}, %zmm28, %zmm22
// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x26,0xf4,0xab]
vgetmantps $0xab,{sae}, %zmm28, %zmm22
@@ -15622,7 +15622,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x26,0xf4,0x7b]
vgetmantps $0x7b, %zmm28, %zmm22
-// CHECK: vgetmantps $123,{sae}, %zmm28, %zmm22
+// CHECK: vgetmantps $123, {sae}, %zmm28, %zmm22
// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x26,0xf4,0x7b]
vgetmantps $0x7b,{sae}, %zmm28, %zmm22
@@ -15682,7 +15682,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x93,0xfd,0xcf,0x26,0xd2,0xab]
vgetmantpd $0xab, %zmm26, %zmm2 {%k7} {z}
-// CHECK: vgetmantpd $171,{sae}, %zmm26, %zmm2
+// CHECK: vgetmantpd $171, {sae}, %zmm26, %zmm2
// CHECK: encoding: [0x62,0x93,0xfd,0x18,0x26,0xd2,0xab]
vgetmantpd $0xab,{sae}, %zmm26, %zmm2
@@ -15690,7 +15690,7 @@ vpermilpd $0x23, 0x400(%rbx), %zmm2
// CHECK: encoding: [0x62,0x93,0xfd,0x48,0x26,0xd2,0x7b]
vgetmantpd $0x7b, %zmm26, %zmm2
-// CHECK: vgetmantpd $123,{sae}, %zmm26, %zmm2
+// CHECK: vgetmantpd $123, {sae}, %zmm26, %zmm2
// CHECK: encoding: [0x62,0x93,0xfd,0x18,0x26,0xd2,0x7b]
vgetmantpd $0x7b,{sae}, %zmm26, %zmm2
diff --git a/test/MC/X86/intel-syntax-avx512.s b/test/MC/X86/intel-syntax-avx512.s
index c5ab7dde1106..1ad2b86bb8ca 100644
--- a/test/MC/X86/intel-syntax-avx512.s
+++ b/test/MC/X86/intel-syntax-avx512.s
@@ -1,10 +1,10 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown -x86-asm-syntax=intel -output-asm-variant=1 -mcpu=knl --show-encoding %s | FileCheck %s
-// CHECK: vaddps zmm1 , zmm1, zmmword ptr [rax]
+// CHECK: vaddps zmm1, zmm1, zmmword ptr [rax]
// CHECK: encoding: [0x62,0xf1,0x74,0x48,0x58,0x08]
vaddps zmm1, zmm1, zmmword ptr [rax]
-// CHECK: vaddpd zmm1 , zmm1, zmm2
+// CHECK: vaddpd zmm1, zmm1, zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca]
vaddpd zmm1,zmm1,zmm2
@@ -16,23 +16,23 @@ vaddpd zmm1{k5},zmm1,zmm2
// CHECK: encoding: [0x62,0xf1,0xf5,0xcd,0x58,0xca]
vaddpd zmm1{k5} {z},zmm1,zmm2
-// CHECK: vaddpd zmm1 , zmm1, zmm2, {rn-sae}
+// CHECK: vaddpd zmm1, zmm1, zmm2, {rn-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x18,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rn-sae}
-// CHECK: vaddpd zmm1 , zmm1, zmm2, {ru-sae}
+// CHECK: vaddpd zmm1, zmm1, zmm2, {ru-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x58,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{ru-sae}
-// CHECK: vaddpd zmm1 , zmm1, zmm2, {rd-sae}
+// CHECK: vaddpd zmm1, zmm1, zmm2, {rd-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x38,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rd-sae}
-// CHECK: vaddpd zmm1 , zmm1, zmm2, {rz-sae}
+// CHECK: vaddpd zmm1, zmm1, zmm2, {rz-sae}
// CHECK: encoding: [0x62,0xf1,0xf5,0x78,0x58,0xca]
vaddpd zmm1,zmm1,zmm2,{rz-sae}
-// CHECK: vcmppd k2 , zmm12, zmm26, 171
+// CHECK: vcmppd k2, zmm12, zmm26, 171
// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0xab]
vcmppd k2,zmm12,zmm26,0xab
@@ -40,63 +40,63 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
// CHECK: encoding: [0x62,0x91,0x9d,0x4b,0xc2,0xd2,0xab]
vcmppd k2{k3},zmm12,zmm26,0xab
-// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 171
+// CHECK: vcmppd k2, zmm12, zmm26, {sae}, 171
// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0xab]
vcmppd k2,zmm12,zmm26,{sae},0xab
-// CHECK: vcmppd k2 , zmm12, zmm26, 123
+// CHECK: vcmppd k2, zmm12, zmm26, 123
// CHECK: encoding: [0x62,0x91,0x9d,0x48,0xc2,0xd2,0x7b]
vcmppd k2 ,zmm12,zmm26,0x7b
-// CHECK: vcmppd k2 , zmm12, zmm26,{sae}, 123
+// CHECK: vcmppd k2, zmm12, zmm26, {sae}, 123
// CHECK: encoding: [0x62,0x91,0x9d,0x18,0xc2,0xd2,0x7b]
vcmppd k2,zmm12,zmm26,{sae},0x7b
-// CHECK: vcmppd k2 , zmm12, zmmword ptr [rcx], 123
+// CHECK: vcmppd k2, zmm12, zmmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x11,0x7b]
vcmppd k2,zmm12,zmmword PTR [rcx],0x7b
-// CHECK: vcmppd k2 , zmm12, zmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: vcmppd k2, zmm12, zmmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0x9d,0x48,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmppd k2 ,zmm12,zmmword PTR [rax+r14*8+0x123],0x7b
-// CHECK: vcmppd k2 , zmm12, qword ptr [rcx]{1to8}, 123
+// CHECK: vcmppd k2, zmm12, qword ptr [rcx]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x11,0x7b]
vcmppd k2,zmm12,QWORD PTR [rcx]{1to8},0x7b
-// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8128], 123
+// CHECK: vcmppd k2, zmm12, zmmword ptr [rdx + 8128], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x7f,0x7b]
vcmppd k2,zmm12,zmmword PTR [rdx+0x1fc0],0x7b
-// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx + 8192], 123
+// CHECK: vcmppd k2, zmm12, zmmword ptr [rdx + 8192], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
vcmppd k2,zmm12,zmmword PTR [rdx+0x2000],0x7b
-// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8192], 123
+// CHECK: vcmppd k2, zmm12, zmmword ptr [rdx - 8192], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x52,0x80,0x7b]
vcmppd k2,zmm12,zmmword PTR [rdx-0x2000],0x7b
-// CHECK: vcmppd k2 , zmm12, zmmword ptr [rdx - 8256], 123
+// CHECK: vcmppd k2, zmm12, zmmword ptr [rdx - 8256], 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x48,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vcmppd k2,zmm12,zmmword PTR [rdx-0x2040],0x7b
-// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1016]{1to8}, 123
+// CHECK: vcmppd k2, zmm12, qword ptr [rdx + 1016]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x7f,0x7b]
vcmppd k2,zmm12,QWORD PTR [rdx+0x3f8]{1to8},0x7b
-// CHECK: vcmppd k2 , zmm12, qword ptr [rdx + 1024]{1to8}, 123
+// CHECK: vcmppd k2, zmm12, qword ptr [rdx + 1024]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0x00,0x04,0x00,0x00,0x7b]
vcmppd k2,zmm12,QWORD PTR [rdx+0x400]{1to8},0x7b
-// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1024]{1to8}, 123
+// CHECK: vcmppd k2, zmm12, qword ptr [rdx - 1024]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x52,0x80,0x7b]
vcmppd k2,zmm12,QWORD PTR [rdx-0x400]{1to8},0x7b
-// CHECK: vcmppd k2 , zmm12, qword ptr [rdx - 1032]{1to8}, 123
+// CHECK: vcmppd k2, zmm12, qword ptr [rdx - 1032]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x9d,0x58,0xc2,0x92,0xf8,0xfb,0xff,0xff,0x7b]
vcmppd k2,zmm12,QWORD PTR [rdx-0x408]{1to8},0x7b
-// CHECK: vcmpps k2 , zmm17, zmm22, 171
+// CHECK: vcmpps k2, zmm17, zmm22, 171
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0xab]
vcmpps k2,zmm17,zmm22,0xab
@@ -104,64 +104,64 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
// CHECK: encoding: [0x62,0xb1,0x74,0x43,0xc2,0xd6,0xab]
vcmpps k2{k3},zmm17,zmm22,0xab
-// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 171
+// CHECK: vcmpps k2, zmm17, zmm22, {sae}, 171
// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0xab]
vcmpps k2,zmm17,zmm22,{sae},0xab
-// CHECK: vcmpps k2 , zmm17, zmm22, 123
+// CHECK: vcmpps k2, zmm17, zmm22, 123
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0xd6,0x7b]
vcmpps k2,zmm17,zmm22,0x7b
-// CHECK: vcmpps k2 , zmm17, zmm22,{sae}, 123
+// CHECK: vcmpps k2, zmm17, zmm22, {sae}, 123
// CHECK: encoding: [0x62,0xb1,0x74,0x10,0xc2,0xd6,0x7b]
vcmpps k2,zmm17,zmm22,{sae},0x7b
-// CHECK: vcmpps k2 , zmm17, zmmword ptr [rcx], 123
+// CHECK: vcmpps k2, zmm17, zmmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x11,0x7b]
vcmpps k2,zmm17,zmmword PTR [rcx],0x7b
-// CHECK: vcmpps k2 , zmm17, zmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: vcmpps k2, zmm17, zmmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0x74,0x40,0xc2,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmpps k2,zmm17,zmmword PTR [rax+r14*8+0x123],0x7b
-// CHECK: vcmpps k2 , zmm17, dword ptr [rcx]{1to16}, 123
+// CHECK: vcmpps k2, zmm17, dword ptr [rcx]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x11,0x7b]
vcmpps k2,zmm17,DWORD PTR [rcx]{1to16},0x7b
-// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8128], 123
+// CHECK: vcmpps k2, zmm17, zmmword ptr [rdx + 8128], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x7f,0x7b]
vcmpps k2,zmm17,zmmword PTR [rdx+0x1fc0],0x7b
-// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx + 8192], 123
+// CHECK: vcmpps k2, zmm17, zmmword ptr [rdx + 8192], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0x00,0x20,0x00,0x00,0x7b]
vcmpps k2,zmm17,zmmword PTR [rdx+0x2000],0x7b
-// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8192], 123
+// CHECK: vcmpps k2, zmm17, zmmword ptr [rdx - 8192], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x52,0x80,0x7b]
vcmpps k2,zmm17,zmmword PTR [rdx-0x2000],0x7b
-// CHECK: vcmpps k2 , zmm17, zmmword ptr [rdx - 8256], 123
+// CHECK: vcmpps k2, zmm17, zmmword ptr [rdx - 8256], 123
// CHECK: encoding: [0x62,0xf1,0x74,0x40,0xc2,0x92,0xc0,0xdf,0xff,0xff,0x7b]
vcmpps k2,zmm17,zmmword PTR [rdx-0x2040],0x7b
-// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 508]{1to16}, 123
+// CHECK: vcmpps k2, zmm17, dword ptr [rdx + 508]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x7f,0x7b]
vcmpps k2,zmm17,DWORD PTR [rdx+0x1fc]{1to16},0x7b
-// CHECK: vcmpps k2 , zmm17, dword ptr [rdx + 512]{1to16}, 123
+// CHECK: vcmpps k2, zmm17, dword ptr [rdx + 512]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0x00,0x02,0x00,0x00,0x7b]
vcmpps k2,zmm17,DWORD PTR [rdx+0x200]{1to16},0x7b
-// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 512]{1to16}, 123
+// CHECK: vcmpps k2, zmm17, dword ptr [rdx - 512]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x52,0x80,0x7b]
vcmpps k2,zmm17,DWORD PTR [rdx-0x200]{1to16},0x7b
-// CHECK: vcmpps k2 , zmm17, dword ptr [rdx - 516]{1to16}, 123
+// CHECK: vcmpps k2, zmm17, dword ptr [rdx - 516]{1to16}, 123
// CHECK: encoding: [0x62,0xf1,0x74,0x50,0xc2,0x92,0xfc,0xfd,0xff,0xff,0x7b]
vcmpps k2,zmm17,DWORD PTR [rdx-0x204]{1to16},0x7b
-// CHECK: vfixupimmss xmm15 , xmm18, xmm28, 171
+// CHECK: vfixupimmss xmm15, xmm18, xmm28, 171
// CHECK: encoding: [0x62,0x13,0x6d,0x00,0x55,0xfc,0xab]
vfixupimmss xmm15,xmm18,xmm28,0xab
@@ -173,43 +173,43 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
// CHECK: encoding: [0x62,0x13,0x6d,0x85,0x55,0xfc,0xab]
vfixupimmss xmm15{k5} {z},xmm18,xmm28,0xab
-// CHECK: vfixupimmss xmm15 , xmm18, xmm28,{sae}, 171
+// CHECK: vfixupimmss xmm15, xmm18, xmm28, {sae}, 171
// CHECK: encoding: [0x62,0x13,0x6d,0x10,0x55,0xfc,0xab]
vfixupimmss xmm15,xmm18,xmm28,{sae},0xab
-// CHECK: vfixupimmss xmm15 , xmm18, xmm28, 123
+// CHECK: vfixupimmss xmm15, xmm18, xmm28, 123
// CHECK: encoding: [0x62,0x13,0x6d,0x00,0x55,0xfc,0x7b]
vfixupimmss xmm15,xmm18,xmm28,0x7b
-// CHECK: vfixupimmss xmm15 , xmm18, xmm28,{sae}, 123
+// CHECK: vfixupimmss xmm15, xmm18, xmm28, {sae}, 123
// CHECK: encoding: [0x62,0x13,0x6d,0x10,0x55,0xfc,0x7b]
vfixupimmss xmm15,xmm18,xmm28,{sae},0x7b
-// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rcx], 123
+// CHECK: vfixupimmss xmm15, xmm18, dword ptr [rcx], 123
// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0x39,0x7b]
vfixupimmss xmm15,xmm18,DWORD PTR [rcx],0x7b
-// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rax + 8*r14 + 291], 123
+// CHECK: vfixupimmss xmm15, xmm18, dword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0x33,0x6d,0x00,0x55,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b]
vfixupimmss xmm15,xmm18,DWORD PTR [rax+r14*8+0x123],0x7b
-// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rdx + 508], 123
+// CHECK: vfixupimmss xmm15, xmm18, dword ptr [rdx + 508], 123
// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0x7a,0x7f,0x7b]
vfixupimmss xmm15,xmm18,DWORD PTR [rdx+0x1fc],0x7b
-// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rdx + 512], 123
+// CHECK: vfixupimmss xmm15, xmm18, dword ptr [rdx + 512], 123
// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0xba,0x00,0x02,0x00,0x00,0x7b]
vfixupimmss xmm15,xmm18,DWORD PTR [rdx+0x200],0x7b
-// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rdx - 512], 123
+// CHECK: vfixupimmss xmm15, xmm18, dword ptr [rdx - 512], 123
// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0x7a,0x80,0x7b]
vfixupimmss xmm15,xmm18,DWORD PTR [rdx-0x200],0x7b
-// CHECK: vfixupimmss xmm15 , xmm18, dword ptr [rdx - 516], 123
+// CHECK: vfixupimmss xmm15, xmm18, dword ptr [rdx - 516], 123
// CHECK: encoding: [0x62,0x73,0x6d,0x00,0x55,0xba,0xfc,0xfd,0xff,0xff,0x7b]
vfixupimmss xmm15,xmm18,DWORD PTR [rdx-0x204],0x7b
-// CHECK: vfixupimmsd xmm13 , xmm26, xmm5, 171
+// CHECK: vfixupimmsd xmm13, xmm26, xmm5, 171
// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xed,0xab]
vfixupimmsd xmm13,xmm26,xmm5,0xab
@@ -221,39 +221,39 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
// CHECK: encoding: [0x62,0x73,0xad,0x86,0x55,0xed,0xab]
vfixupimmsd xmm13{k6} {z},xmm26,xmm5,0xab
-// CHECK: vfixupimmsd xmm13 , xmm26, xmm5,{sae}, 171
+// CHECK: vfixupimmsd xmm13, xmm26, xmm5, {sae}, 171
// CHECK: encoding: [0x62,0x73,0xad,0x10,0x55,0xed,0xab]
vfixupimmsd xmm13,xmm26,xmm5,{sae},0xab
-// CHECK: vfixupimmsd xmm13 , xmm26, xmm5, 123
+// CHECK: vfixupimmsd xmm13, xmm26, xmm5, 123
// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xed,0x7b]
vfixupimmsd xmm13,xmm26,xmm5,0x7b
-// CHECK: vfixupimmsd xmm13 , xmm26, xmm5,{sae}, 123
+// CHECK: vfixupimmsd xmm13, xmm26, xmm5, {sae}, 123
// CHECK: encoding: [0x62,0x73,0xad,0x10,0x55,0xed,0x7b]
vfixupimmsd xmm13,xmm26,xmm5,{sae},0x7b
-// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rcx], 123
+// CHECK: vfixupimmsd xmm13, xmm26, qword ptr [rcx], 123
// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0x29,0x7b]
vfixupimmsd xmm13,xmm26,QWORD PTR [rcx],0x7b
-// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rax + 8*r14 + 291], 123
+// CHECK: vfixupimmsd xmm13, xmm26, qword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0x33,0xad,0x00,0x55,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b]
vfixupimmsd xmm13,xmm26,QWORD PTR [rax+r14*8+0x123],0x7b
-// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rdx + 1016], 123
+// CHECK: vfixupimmsd xmm13, xmm26, qword ptr [rdx + 1016], 123
// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0x6a,0x7f,0x7b]
vfixupimmsd xmm13,xmm26,QWORD PTR [rdx+0x3f8],0x7b
-// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rdx + 1024], 123
+// CHECK: vfixupimmsd xmm13, xmm26, qword ptr [rdx + 1024], 123
// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xaa,0x00,0x04,0x00,0x00,0x7b]
vfixupimmsd xmm13,xmm26,QWORD PTR [rdx+0x400],0x7b
-// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rdx - 1024], 123
+// CHECK: vfixupimmsd xmm13, xmm26, qword ptr [rdx - 1024], 123
// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0x6a,0x80,0x7b]
vfixupimmsd xmm13,xmm26,QWORD PTR [rdx-0x400],0x7b
-// CHECK: vfixupimmsd xmm13 , xmm26, qword ptr [rdx - 1032], 123
+// CHECK: vfixupimmsd xmm13, xmm26, qword ptr [rdx - 1032], 123
// CHECK: encoding: [0x62,0x73,0xad,0x00,0x55,0xaa,0xf8,0xfb,0xff,0xff,0x7b]
vfixupimmsd xmm13,xmm26,QWORD PTR [rdx-0x408],0x7b
@@ -321,7 +321,7 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
// CHECK: encoding: [0x62,0xf1,0x7e,0x8c,0x10,0x11]
vmovss xmm2{k4} {z}, dword ptr [rcx]
-// CHECK: vmovsd xmm25 , qword ptr [rcx]
+// CHECK: vmovsd xmm25, qword ptr [rcx]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x09]
vmovsd xmm25, qword ptr [rcx]
@@ -333,22 +333,22 @@ vaddpd zmm1,zmm1,zmm2,{rz-sae}
// CHECK: encoding: [0x62,0x61,0xff,0x8b,0x10,0x09]
vmovsd xmm25{k3} {z}, qword ptr [rcx]
-// CHECK: vmovsd xmm25 , qword ptr [rax + 8*r14 + 291]
+// CHECK: vmovsd xmm25, qword ptr [rax + 8*r14 + 291]
// CHECK: encoding: [0x62,0x21,0xff,0x08,0x10,0x8c,0xf0,0x23,0x01,0x00,0x00]
vmovsd xmm25, qword ptr [rax+r14*8+0x123]
-// CHECK: vmovsd xmm25 , qword ptr [rdx + 1016]
+// CHECK: vmovsd xmm25, qword ptr [rdx + 1016]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x7f]
vmovsd xmm25, qword ptr [rdx+0x3f8]
-// CHECK: vmovsd xmm25 , qword ptr [rdx + 1024]
+// CHECK: vmovsd xmm25, qword ptr [rdx + 1024]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0x00,0x04,0x00,0x00]
vmovsd xmm25, qword ptr [rdx+0x400]
-// CHECK: vmovsd xmm25 , qword ptr [rdx - 1024]
+// CHECK: vmovsd xmm25, qword ptr [rdx - 1024]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x4a,0x80]
vmovsd xmm25, qword ptr [rdx-0x400]
-// CHECK: vmovsd xmm25 , qword ptr [rdx - 1032]
+// CHECK: vmovsd xmm25, qword ptr [rdx - 1032]
// CHECK: encoding: [0x62,0x61,0xff,0x08,0x10,0x8a,0xf8,0xfb,0xff,0xff]
vmovsd xmm25, qword ptr [rdx-0x408]
diff --git a/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s b/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
index 6de59da40b30..80e26f1ef026 100644
--- a/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
+++ b/test/MC/X86/intel-syntax-x86-64-avx512f_vl.s
@@ -1,6 +1,6 @@
// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
-// CHECK: vcmppd k3 , xmm27, xmm23, 171
+// CHECK: vcmppd k3, xmm27, xmm23, 171
// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0xab]
vcmppd k3,xmm27,xmm23,0xab
@@ -8,55 +8,55 @@
// CHECK: encoding: [0x62,0xb1,0xa5,0x05,0xc2,0xdf,0xab]
vcmppd k3{k5},xmm27,xmm23,0xab
-// CHECK: vcmppd k3 , xmm27, xmm23, 123
+// CHECK: vcmppd k3, xmm27, xmm23, 123
// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0xdf,0x7b]
vcmppd k3,xmm27,xmm23,0x7b
-// CHECK: vcmppd k3 , xmm27, xmmword ptr [rcx], 123
+// CHECK: vcmppd k3, xmm27, xmmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x19,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rcx],0x7b
-// CHECK: vcmppd k3 , xmm27, xmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: vcmppd k3, xmm27, xmmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0xa5,0x00,0xc2,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rax+r14*8+0x123],0x7b
-// CHECK: vcmppd k3 , xmm27, qword ptr [rcx]{1to2}, 123
+// CHECK: vcmppd k3, xmm27, qword ptr [rcx]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x19,0x7b]
vcmppd k3,xmm27,QWORD PTR [rcx]{1to2},0x7b
-// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2032], 123
+// CHECK: vcmppd k3, xmm27, xmmword ptr [rdx + 2032], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x7f,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rdx+0x7f0],0x7b
-// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx + 2048], 123
+// CHECK: vcmppd k3, xmm27, xmmword ptr [rdx + 2048], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0x00,0x08,0x00,0x00,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rdx+0x800],0x7b
-// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2048], 123
+// CHECK: vcmppd k3, xmm27, xmmword ptr [rdx - 2048], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x5a,0x80,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rdx-0x800],0x7b
-// CHECK: vcmppd k3 , xmm27, xmmword ptr [rdx - 2064], 123
+// CHECK: vcmppd k3, xmm27, xmmword ptr [rdx - 2064], 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x00,0xc2,0x9a,0xf0,0xf7,0xff,0xff,0x7b]
vcmppd k3,xmm27,XMMWORD PTR [rdx-0x810],0x7b
-// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1016]{1to2}, 123
+// CHECK: vcmppd k3, xmm27, qword ptr [rdx + 1016]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x7f,0x7b]
vcmppd k3,xmm27,QWORD PTR [rdx+0x3f8]{1to2},0x7b
-// CHECK: vcmppd k3 , xmm27, qword ptr [rdx + 1024]{1to2}, 123
+// CHECK: vcmppd k3, xmm27, qword ptr [rdx + 1024]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0x00,0x04,0x00,0x00,0x7b]
vcmppd k3,xmm27,QWORD PTR [rdx+0x400]{1to2},0x7b
-// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1024]{1to2}, 123
+// CHECK: vcmppd k3, xmm27, qword ptr [rdx - 1024]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x5a,0x80,0x7b]
vcmppd k3,xmm27,QWORD PTR [rdx-0x400]{1to2},0x7b
-// CHECK: vcmppd k3 , xmm27, qword ptr [rdx - 1032]{1to2}, 123
+// CHECK: vcmppd k3, xmm27, qword ptr [rdx - 1032]{1to2}, 123
// CHECK: encoding: [0x62,0xf1,0xa5,0x10,0xc2,0x9a,0xf8,0xfb,0xff,0xff,0x7b]
vcmppd k3,xmm27,QWORD PTR [rdx-0x408]{1to2},0x7b
-// CHECK: vcmppd k4 , ymm17, ymm27, 171
+// CHECK: vcmppd k4, ymm17, ymm27, 171
// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0xab]
vcmppd k4,ymm17,ymm27,0xab
@@ -64,55 +64,55 @@
// CHECK: encoding: [0x62,0x91,0xf5,0x27,0xc2,0xe3,0xab]
vcmppd k4{k7},ymm17,ymm27,0xab
-// CHECK: vcmppd k4 , ymm17, ymm27, 123
+// CHECK: vcmppd k4, ymm17, ymm27, 123
// CHECK: encoding: [0x62,0x91,0xf5,0x20,0xc2,0xe3,0x7b]
vcmppd k4,ymm17,ymm27,0x7b
-// CHECK: vcmppd k4 , ymm17, ymmword ptr [rcx], 123
+// CHECK: vcmppd k4, ymm17, ymmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x21,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rcx],0x7b
-// CHECK: vcmppd k4 , ymm17, ymmword ptr [rax + 8*r14 + 291], 123
+// CHECK: vcmppd k4, ymm17, ymmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0xf5,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rax+r14*8+0x123],0x7b
-// CHECK: vcmppd k4 , ymm17, qword ptr [rcx]{1to4}, 123
+// CHECK: vcmppd k4, ymm17, qword ptr [rcx]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x21,0x7b]
vcmppd k4,ymm17,QWORD PTR [rcx]{1to4},0x7b
-// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4064], 123
+// CHECK: vcmppd k4, ymm17, ymmword ptr [rdx + 4064], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x7f,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rdx+0xfe0],0x7b
-// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx + 4096], 123
+// CHECK: vcmppd k4, ymm17, ymmword ptr [rdx + 4096], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rdx+0x1000],0x7b
-// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4096], 123
+// CHECK: vcmppd k4, ymm17, ymmword ptr [rdx - 4096], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0x62,0x80,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1000],0x7b
-// CHECK: vcmppd k4 , ymm17, ymmword ptr [rdx - 4128], 123
+// CHECK: vcmppd k4, ymm17, ymmword ptr [rdx - 4128], 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
vcmppd k4,ymm17,YMMWORD PTR [rdx-0x1020],0x7b
-// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1016]{1to4}, 123
+// CHECK: vcmppd k4, ymm17, qword ptr [rdx + 1016]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x7f,0x7b]
vcmppd k4,ymm17,QWORD PTR [rdx+0x3f8]{1to4},0x7b
-// CHECK: vcmppd k4 , ymm17, qword ptr [rdx + 1024]{1to4}, 123
+// CHECK: vcmppd k4, ymm17, qword ptr [rdx + 1024]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0x00,0x04,0x00,0x00,0x7b]
vcmppd k4,ymm17,QWORD PTR [rdx+0x400]{1to4},0x7b
-// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1024]{1to4}, 123
+// CHECK: vcmppd k4, ymm17, qword ptr [rdx - 1024]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0x62,0x80,0x7b]
vcmppd k4,ymm17,QWORD PTR [rdx-0x400]{1to4},0x7b
-// CHECK: vcmppd k4 , ymm17, qword ptr [rdx - 1032]{1to4}, 123
+// CHECK: vcmppd k4, ymm17, qword ptr [rdx - 1032]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0xf5,0x30,0xc2,0xa2,0xf8,0xfb,0xff,0xff,0x7b]
vcmppd k4,ymm17,QWORD PTR [rdx-0x408]{1to4},0x7b
-// CHECK: vcmpps k4 , xmm29, xmm28, 171
+// CHECK: vcmpps k4, xmm29, xmm28, 171
// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0xab]
vcmpps k4,xmm29,xmm28,0xab
@@ -120,55 +120,55 @@
// CHECK: encoding: [0x62,0x91,0x14,0x02,0xc2,0xe4,0xab]
vcmpps k4{k2},xmm29,xmm28,0xab
-// CHECK: vcmpps k4 , xmm29, xmm28, 123
+// CHECK: vcmpps k4, xmm29, xmm28, 123
// CHECK: encoding: [0x62,0x91,0x14,0x00,0xc2,0xe4,0x7b]
vcmpps k4,xmm29,xmm28,0x7b
-// CHECK: vcmpps k4 , xmm29, xmmword ptr [rcx], 123
+// CHECK: vcmpps k4, xmm29, xmmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x21,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rcx],0x7b
-// CHECK: vcmpps k4 , xmm29, xmmword ptr [rax + 8*r14 + 291], 123
+// CHECK: vcmpps k4, xmm29, xmmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0x14,0x00,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rax+r14*8+0x123],0x7b
-// CHECK: vcmpps k4 , xmm29, dword ptr [rcx]{1to4}, 123
+// CHECK: vcmpps k4, xmm29, dword ptr [rcx]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x21,0x7b]
vcmpps k4,xmm29,DWORD PTR [rcx]{1to4},0x7b
-// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2032], 123
+// CHECK: vcmpps k4, xmm29, xmmword ptr [rdx + 2032], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x7f,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rdx+0x7f0],0x7b
-// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx + 2048], 123
+// CHECK: vcmpps k4, xmm29, xmmword ptr [rdx + 2048], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0x00,0x08,0x00,0x00,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rdx+0x800],0x7b
-// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2048], 123
+// CHECK: vcmpps k4, xmm29, xmmword ptr [rdx - 2048], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0x62,0x80,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rdx-0x800],0x7b
-// CHECK: vcmpps k4 , xmm29, xmmword ptr [rdx - 2064], 123
+// CHECK: vcmpps k4, xmm29, xmmword ptr [rdx - 2064], 123
// CHECK: encoding: [0x62,0xf1,0x14,0x00,0xc2,0xa2,0xf0,0xf7,0xff,0xff,0x7b]
vcmpps k4,xmm29,XMMWORD PTR [rdx-0x810],0x7b
-// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 508]{1to4}, 123
+// CHECK: vcmpps k4, xmm29, dword ptr [rdx + 508]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x7f,0x7b]
vcmpps k4,xmm29,DWORD PTR [rdx+0x1fc]{1to4},0x7b
-// CHECK: vcmpps k4 , xmm29, dword ptr [rdx + 512]{1to4}, 123
+// CHECK: vcmpps k4, xmm29, dword ptr [rdx + 512]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
vcmpps k4,xmm29,DWORD PTR [rdx+0x200]{1to4},0x7b
-// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 512]{1to4}, 123
+// CHECK: vcmpps k4, xmm29, dword ptr [rdx - 512]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0x62,0x80,0x7b]
vcmpps k4,xmm29,DWORD PTR [rdx-0x200]{1to4},0x7b
-// CHECK: vcmpps k4 , xmm29, dword ptr [rdx - 516]{1to4}, 123
+// CHECK: vcmpps k4, xmm29, dword ptr [rdx - 516]{1to4}, 123
// CHECK: encoding: [0x62,0xf1,0x14,0x10,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vcmpps k4,xmm29,DWORD PTR [rdx-0x204]{1to4},0x7b
-// CHECK: vcmpps k4 , ymm19, ymm18, 171
+// CHECK: vcmpps k4, ymm19, ymm18, 171
// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0xab]
vcmpps k4,ymm19,ymm18,0xab
@@ -176,50 +176,50 @@
// CHECK: encoding: [0x62,0xb1,0x64,0x21,0xc2,0xe2,0xab]
vcmpps k4{k1},ymm19,ymm18,0xab
-// CHECK: vcmpps k4 , ymm19, ymm18, 123
+// CHECK: vcmpps k4, ymm19, ymm18, 123
// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xe2,0x7b]
vcmpps k4,ymm19,ymm18,0x7b
-// CHECK: vcmpps k4 , ymm19, ymmword ptr [rcx], 123
+// CHECK: vcmpps k4, ymm19, ymmword ptr [rcx], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x21,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rcx],0x7b
-// CHECK: vcmpps k4 , ymm19, ymmword ptr [rax + 8*r14 + 291], 123
+// CHECK: vcmpps k4, ymm19, ymmword ptr [rax + 8*r14 + 291], 123
// CHECK: encoding: [0x62,0xb1,0x64,0x20,0xc2,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rax+r14*8+0x123],0x7b
-// CHECK: vcmpps k4 , ymm19, dword ptr [rcx]{1to8}, 123
+// CHECK: vcmpps k4, ymm19, dword ptr [rcx]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x21,0x7b]
vcmpps k4,ymm19,DWORD PTR [rcx]{1to8},0x7b
-// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4064], 123
+// CHECK: vcmpps k4, ymm19, ymmword ptr [rdx + 4064], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x7f,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rdx+0xfe0],0x7b
-// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx + 4096], 123
+// CHECK: vcmpps k4, ymm19, ymmword ptr [rdx + 4096], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0x00,0x10,0x00,0x00,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rdx+0x1000],0x7b
-// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4096], 123
+// CHECK: vcmpps k4, ymm19, ymmword ptr [rdx - 4096], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0x62,0x80,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1000],0x7b
-// CHECK: vcmpps k4 , ymm19, ymmword ptr [rdx - 4128], 123
+// CHECK: vcmpps k4, ymm19, ymmword ptr [rdx - 4128], 123
// CHECK: encoding: [0x62,0xf1,0x64,0x20,0xc2,0xa2,0xe0,0xef,0xff,0xff,0x7b]
vcmpps k4,ymm19,YMMWORD PTR [rdx-0x1020],0x7b
-// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 508]{1to8}, 123
+// CHECK: vcmpps k4, ymm19, dword ptr [rdx + 508]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x7f,0x7b]
vcmpps k4,ymm19,DWORD PTR [rdx+0x1fc]{1to8},0x7b
-// CHECK: vcmpps k4 , ymm19, dword ptr [rdx + 512]{1to8}, 123
+// CHECK: vcmpps k4, ymm19, dword ptr [rdx + 512]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0x00,0x02,0x00,0x00,0x7b]
vcmpps k4,ymm19,DWORD PTR [rdx+0x200]{1to8},0x7b
-// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 512]{1to8}, 123
+// CHECK: vcmpps k4, ymm19, dword ptr [rdx - 512]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0x62,0x80,0x7b]
vcmpps k4,ymm19,DWORD PTR [rdx-0x200]{1to8},0x7b
-// CHECK: vcmpps k4 , ymm19, dword ptr [rdx - 516]{1to8}, 123
+// CHECK: vcmpps k4, ymm19, dword ptr [rdx - 516]{1to8}, 123
// CHECK: encoding: [0x62,0xf1,0x64,0x30,0xc2,0xa2,0xfc,0xfd,0xff,0xff,0x7b]
vcmpps k4,ymm19,DWORD PTR [rdx-0x204]{1to8},0x7b
diff --git a/test/MC/X86/x86-64-avx512dq.s b/test/MC/X86/x86-64-avx512dq.s
index d0b91d69ebde..ed46ae7c5997 100644
--- a/test/MC/X86/x86-64-avx512dq.s
+++ b/test/MC/X86/x86-64-avx512dq.s
@@ -1171,7 +1171,7 @@
// CHECK: encoding: [0x62,0xa3,0xe5,0xc6,0x50,0xc9,0xab]
vrangepd $0xab, %zmm17, %zmm19, %zmm17 {%k6} {z}
-// CHECK: vrangepd $171,{sae}, %zmm17, %zmm19, %zmm17
+// CHECK: vrangepd $171, {sae}, %zmm17, %zmm19, %zmm17
// CHECK: encoding: [0x62,0xa3,0xe5,0x10,0x50,0xc9,0xab]
vrangepd $0xab,{sae}, %zmm17, %zmm19, %zmm17
@@ -1179,7 +1179,7 @@
// CHECK: encoding: [0x62,0xa3,0xe5,0x40,0x50,0xc9,0x7b]
vrangepd $0x7b, %zmm17, %zmm19, %zmm17
-// CHECK: vrangepd $123,{sae}, %zmm17, %zmm19, %zmm17
+// CHECK: vrangepd $123, {sae}, %zmm17, %zmm19, %zmm17
// CHECK: encoding: [0x62,0xa3,0xe5,0x10,0x50,0xc9,0x7b]
vrangepd $0x7b,{sae}, %zmm17, %zmm19, %zmm17
@@ -1239,7 +1239,7 @@
// CHECK: encoding: [0x62,0x23,0x55,0xc6,0x50,0xc1,0xab]
vrangeps $0xab, %zmm17, %zmm21, %zmm24 {%k6} {z}
-// CHECK: vrangeps $171,{sae}, %zmm17, %zmm21, %zmm24
+// CHECK: vrangeps $171, {sae}, %zmm17, %zmm21, %zmm24
// CHECK: encoding: [0x62,0x23,0x55,0x10,0x50,0xc1,0xab]
vrangeps $0xab,{sae}, %zmm17, %zmm21, %zmm24
@@ -1247,7 +1247,7 @@
// CHECK: encoding: [0x62,0x23,0x55,0x40,0x50,0xc1,0x7b]
vrangeps $0x7b, %zmm17, %zmm21, %zmm24
-// CHECK: vrangeps $123,{sae}, %zmm17, %zmm21, %zmm24
+// CHECK: vrangeps $123, {sae}, %zmm17, %zmm21, %zmm24
// CHECK: encoding: [0x62,0x23,0x55,0x10,0x50,0xc1,0x7b]
vrangeps $0x7b,{sae}, %zmm17, %zmm21, %zmm24
@@ -1307,7 +1307,7 @@
// CHECK: encoding: [0x62,0xa3,0xf5,0x85,0x51,0xcd,0xab]
vrangesd $0xab, %xmm21, %xmm17, %xmm17 {%k5} {z}
-// CHECK: vrangesd $171,{sae}, %xmm21, %xmm17, %xmm17
+// CHECK: vrangesd $171, {sae}, %xmm21, %xmm17, %xmm17
// CHECK: encoding: [0x62,0xa3,0xf5,0x10,0x51,0xcd,0xab]
vrangesd $0xab,{sae}, %xmm21, %xmm17, %xmm17
@@ -1315,7 +1315,7 @@
// CHECK: encoding: [0x62,0xa3,0xf5,0x00,0x51,0xcd,0x7b]
vrangesd $0x7b, %xmm21, %xmm17, %xmm17
-// CHECK: vrangesd $123,{sae}, %xmm21, %xmm17, %xmm17
+// CHECK: vrangesd $123, {sae}, %xmm21, %xmm17, %xmm17
// CHECK: encoding: [0x62,0xa3,0xf5,0x10,0x51,0xcd,0x7b]
vrangesd $0x7b,{sae}, %xmm21, %xmm17, %xmm17
@@ -1355,7 +1355,7 @@
// CHECK: encoding: [0x62,0x23,0x3d,0x85,0x51,0xcc,0xab]
vrangess $0xab, %xmm20, %xmm24, %xmm25 {%k5} {z}
-// CHECK: vrangess $171,{sae}, %xmm20, %xmm24, %xmm25
+// CHECK: vrangess $171, {sae}, %xmm20, %xmm24, %xmm25
// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x51,0xcc,0xab]
vrangess $0xab,{sae}, %xmm20, %xmm24, %xmm25
@@ -1363,7 +1363,7 @@
// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x51,0xcc,0x7b]
vrangess $0x7b, %xmm20, %xmm24, %xmm25
-// CHECK: vrangess $123,{sae}, %xmm20, %xmm24, %xmm25
+// CHECK: vrangess $123, {sae}, %xmm20, %xmm24, %xmm25
// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x51,0xcc,0x7b]
vrangess $0x7b,{sae}, %xmm20, %xmm24, %xmm25
@@ -1403,7 +1403,7 @@
// CHECK: encoding: [0x62,0xa3,0xfd,0xce,0x56,0xdb,0xab]
vreducepd $0xab, %zmm19, %zmm19 {%k6} {z}
-// CHECK: vreducepd $171,{sae}, %zmm19, %zmm19
+// CHECK: vreducepd $171, {sae}, %zmm19, %zmm19
// CHECK: encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0xab]
vreducepd $0xab,{sae}, %zmm19, %zmm19
@@ -1411,7 +1411,7 @@
// CHECK: encoding: [0x62,0xa3,0xfd,0x48,0x56,0xdb,0x7b]
vreducepd $0x7b, %zmm19, %zmm19
-// CHECK: vreducepd $123,{sae}, %zmm19, %zmm19
+// CHECK: vreducepd $123, {sae}, %zmm19, %zmm19
// CHECK: encoding: [0x62,0xa3,0xfd,0x18,0x56,0xdb,0x7b]
vreducepd $0x7b,{sae}, %zmm19, %zmm19
@@ -1471,7 +1471,7 @@
// CHECK: encoding: [0x62,0x83,0x7d,0xcb,0x56,0xdd,0xab]
vreduceps $0xab, %zmm29, %zmm19 {%k3} {z}
-// CHECK: vreduceps $171,{sae}, %zmm29, %zmm19
+// CHECK: vreduceps $171, {sae}, %zmm29, %zmm19
// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0xab]
vreduceps $0xab,{sae}, %zmm29, %zmm19
@@ -1479,7 +1479,7 @@
// CHECK: encoding: [0x62,0x83,0x7d,0x48,0x56,0xdd,0x7b]
vreduceps $0x7b, %zmm29, %zmm19
-// CHECK: vreduceps $123,{sae}, %zmm29, %zmm19
+// CHECK: vreduceps $123, {sae}, %zmm29, %zmm19
// CHECK: encoding: [0x62,0x83,0x7d,0x18,0x56,0xdd,0x7b]
vreduceps $0x7b,{sae}, %zmm29, %zmm19
@@ -1539,7 +1539,7 @@
// CHECK: encoding: [0x62,0x83,0xf5,0x86,0x57,0xc9,0xab]
vreducesd $0xab, %xmm25, %xmm17, %xmm17 {%k6} {z}
-// CHECK: vreducesd $171,{sae}, %xmm25, %xmm17, %xmm17
+// CHECK: vreducesd $171, {sae}, %xmm25, %xmm17, %xmm17
// CHECK: encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0xab]
vreducesd $0xab,{sae}, %xmm25, %xmm17, %xmm17
@@ -1547,7 +1547,7 @@
// CHECK: encoding: [0x62,0x83,0xf5,0x00,0x57,0xc9,0x7b]
vreducesd $0x7b, %xmm25, %xmm17, %xmm17
-// CHECK: vreducesd $123,{sae}, %xmm25, %xmm17, %xmm17
+// CHECK: vreducesd $123, {sae}, %xmm25, %xmm17, %xmm17
// CHECK: encoding: [0x62,0x83,0xf5,0x10,0x57,0xc9,0x7b]
vreducesd $0x7b,{sae}, %xmm25, %xmm17, %xmm17
@@ -1587,7 +1587,7 @@
// CHECK: encoding: [0x62,0x23,0x15,0x81,0x57,0xf5,0xab]
vreducess $0xab, %xmm21, %xmm29, %xmm30 {%k1} {z}
-// CHECK: vreducess $171,{sae}, %xmm21, %xmm29, %xmm30
+// CHECK: vreducess $171, {sae}, %xmm21, %xmm29, %xmm30
// CHECK: encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0xab]
vreducess $0xab,{sae}, %xmm21, %xmm29, %xmm30
@@ -1595,7 +1595,7 @@
// CHECK: encoding: [0x62,0x23,0x15,0x00,0x57,0xf5,0x7b]
vreducess $0x7b, %xmm21, %xmm29, %xmm30
-// CHECK: vreducess $123,{sae}, %xmm21, %xmm29, %xmm30
+// CHECK: vreducess $123, {sae}, %xmm21, %xmm29, %xmm30
// CHECK: encoding: [0x62,0x23,0x15,0x10,0x57,0xf5,0x7b]
vreducess $0x7b,{sae}, %xmm21, %xmm29, %xmm30
@@ -1635,7 +1635,7 @@
// CHECK: encoding: [0x62,0x83,0xfd,0xcd,0x56,0xd4,0xab]
vreducepd $0xab, %zmm28, %zmm18 {%k5} {z}
-// CHECK: vreducepd $171,{sae}, %zmm28, %zmm18
+// CHECK: vreducepd $171, {sae}, %zmm28, %zmm18
// CHECK: encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0xab]
vreducepd $0xab,{sae}, %zmm28, %zmm18
@@ -1643,7 +1643,7 @@
// CHECK: encoding: [0x62,0x83,0xfd,0x48,0x56,0xd4,0x7b]
vreducepd $0x7b, %zmm28, %zmm18
-// CHECK: vreducepd $123,{sae}, %zmm28, %zmm18
+// CHECK: vreducepd $123, {sae}, %zmm28, %zmm18
// CHECK: encoding: [0x62,0x83,0xfd,0x18,0x56,0xd4,0x7b]
vreducepd $0x7b,{sae}, %zmm28, %zmm18
@@ -1703,7 +1703,7 @@
// CHECK: encoding: [0x62,0x03,0x7d,0xcb,0x56,0xd1,0xab]
vreduceps $0xab, %zmm25, %zmm26 {%k3} {z}
-// CHECK: vreduceps $171,{sae}, %zmm25, %zmm26
+// CHECK: vreduceps $171, {sae}, %zmm25, %zmm26
// CHECK: encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0xab]
vreduceps $0xab,{sae}, %zmm25, %zmm26
@@ -1711,7 +1711,7 @@
// CHECK: encoding: [0x62,0x03,0x7d,0x48,0x56,0xd1,0x7b]
vreduceps $0x7b, %zmm25, %zmm26
-// CHECK: vreduceps $123,{sae}, %zmm25, %zmm26
+// CHECK: vreduceps $123, {sae}, %zmm25, %zmm26
// CHECK: encoding: [0x62,0x03,0x7d,0x18,0x56,0xd1,0x7b]
vreduceps $0x7b,{sae}, %zmm25, %zmm26
@@ -1771,7 +1771,7 @@
// CHECK: encoding: [0x62,0x03,0xe5,0x83,0x57,0xc8,0xab]
vreducesd $0xab, %xmm24, %xmm19, %xmm25 {%k3} {z}
-// CHECK: vreducesd $171,{sae}, %xmm24, %xmm19, %xmm25
+// CHECK: vreducesd $171, {sae}, %xmm24, %xmm19, %xmm25
// CHECK: encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0xab]
vreducesd $0xab,{sae}, %xmm24, %xmm19, %xmm25
@@ -1779,7 +1779,7 @@
// CHECK: encoding: [0x62,0x03,0xe5,0x00,0x57,0xc8,0x7b]
vreducesd $0x7b, %xmm24, %xmm19, %xmm25
-// CHECK: vreducesd $123,{sae}, %xmm24, %xmm19, %xmm25
+// CHECK: vreducesd $123, {sae}, %xmm24, %xmm19, %xmm25
// CHECK: encoding: [0x62,0x03,0xe5,0x10,0x57,0xc8,0x7b]
vreducesd $0x7b,{sae}, %xmm24, %xmm19, %xmm25
@@ -1819,7 +1819,7 @@
// CHECK: encoding: [0x62,0x23,0x3d,0x82,0x57,0xf5,0xab]
vreducess $0xab, %xmm21, %xmm24, %xmm30 {%k2} {z}
-// CHECK: vreducess $171,{sae}, %xmm21, %xmm24, %xmm30
+// CHECK: vreducess $171, {sae}, %xmm21, %xmm24, %xmm30
// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0xab]
vreducess $0xab,{sae}, %xmm21, %xmm24, %xmm30
@@ -1827,7 +1827,7 @@
// CHECK: encoding: [0x62,0x23,0x3d,0x00,0x57,0xf5,0x7b]
vreducess $0x7b, %xmm21, %xmm24, %xmm30
-// CHECK: vreducess $123,{sae}, %xmm21, %xmm24, %xmm30
+// CHECK: vreducess $123, {sae}, %xmm21, %xmm24, %xmm30
// CHECK: encoding: [0x62,0x23,0x3d,0x10,0x57,0xf5,0x7b]
vreducess $0x7b,{sae}, %xmm21, %xmm24, %xmm30
diff --git a/test/Other/2010-05-06-Printer.ll b/test/Other/2010-05-06-Printer.ll
index e57b9825b334..dcc0e752bb5a 100644
--- a/test/Other/2010-05-06-Printer.ll
+++ b/test/Other/2010-05-06-Printer.ll
@@ -1,7 +1,19 @@
; RUN: llc -O2 -print-after-all < %s 2>/dev/null
+; RUN: llc -O2 -print-after-all < %s 2>&1 | FileCheck %s --check-prefix=ALL
+; RUN: llc -O2 -print-after-all -filter-print-funcs=foo < %s 2>&1 | FileCheck %s --check-prefix=FOO
; REQUIRES: default_triple
-
define void @tester(){
ret void
}
+define void @foo(){
+ ret void
+}
+
+;ALL: define void @tester()
+;ALL: define void @foo()
+;ALL: ModuleID =
+
+;FOO: IR Dump After
+;FOO-NEXT: define void @foo()
+;FOO-NOT: define void @tester
diff --git a/test/TableGen/TwoLevelName.td b/test/TableGen/TwoLevelName.td
index 9c502f475507..77c7a6796ee4 100644
--- a/test/TableGen/TwoLevelName.td
+++ b/test/TableGen/TwoLevelName.td
@@ -21,6 +21,21 @@ multiclass OT2<string ss, int w> {
defm i8 : OT2<"i8", 8>;
+multiclass OT3<string ss, int w> {
+ defm v32#NAME : OT1<!strconcat("v32", ss), 32, w>;
+}
+
+multiclass OT4<string ss, int w> {
+ defm v64#NAME : OT1<!strconcat("v64", ss), 64, w>;
+}
+
+multiclass OT5<string ss, int w> {
+ defm NAME : OT3<ss, w>;
+ defm NAME : OT4<ss, w>;
+}
+
+defm i16 : OT5<"i16", 16>;
+
// CHECK: _v16i8
// CHECK: Length = 16
// CHECK: Width = 8
@@ -33,6 +48,10 @@ defm i8 : OT2<"i8", 8>;
// CHECK: Length = 2
// CHECK: Width = 8
+// CHECK: def _v32i16
+// CHECK: Length = 32
+// CHECK: Width = 16
+
// CHECK: _v3i8
// CHECK: Length = 3
// CHECK: Width = 8
@@ -41,6 +60,11 @@ defm i8 : OT2<"i8", 8>;
// CHECK: Length = 4
// CHECK: Width = 8
+// CHECK: _v64i16
+// CHECK: Length = 64
+// CHECK: Width = 16
+
// CHECK: _v8i8
// CHECK: Length = 8
// CHECK: Width = 8
+
diff --git a/test/Transforms/FunctionAttrs/norecurse.ll b/test/Transforms/FunctionAttrs/norecurse.ll
index 47481191d278..d5a2d8208402 100644
--- a/test/Transforms/FunctionAttrs/norecurse.ll
+++ b/test/Transforms/FunctionAttrs/norecurse.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -functionattrs -S | FileCheck %s
+; RUN: opt < %s -basicaa -functionattrs -rpo-functionattrs -S | FileCheck %s
; CHECK: define i32 @leaf() #0
define i32 @leaf() {
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport.ll b/test/Transforms/FunctionImport/Inputs/funcimport.ll
index 96555892fe3c..79b766b386df 100644
--- a/test/Transforms/FunctionImport/Inputs/funcimport.ll
+++ b/test/Transforms/FunctionImport/Inputs/funcimport.ll
@@ -10,6 +10,7 @@
define void @globalfunc1() #0 {
entry:
+ call void @funcwithpersonality()
ret void
}
@@ -79,6 +80,20 @@ entry:
ret i32 1
}
+declare i32 @__gxx_personality_v0(...)
+
+; Add enough instructions to prevent import with inst limit of 5
+define internal void @funcwithpersonality() #2 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ ret void
+}
+
define internal void @staticfunc2() #0 {
entry:
ret void
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport_alias.ll b/test/Transforms/FunctionImport/Inputs/funcimport_alias.ll
new file mode 100644
index 000000000000..f897aeda6ce1
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/funcimport_alias.ll
@@ -0,0 +1,7 @@
+declare void @analias()
+
+define void @callanalias() #0 {
+entry:
+ call void @analias()
+ ret void
+}
diff --git a/test/Transforms/FunctionImport/funcimport.ll b/test/Transforms/FunctionImport/funcimport.ll
index c099b9766477..52fd53d3f31f 100644
--- a/test/Transforms/FunctionImport/funcimport.ll
+++ b/test/Transforms/FunctionImport/funcimport.ll
@@ -73,3 +73,5 @@ declare void @callfuncptr(...) #1
; CHECK-DAG: declare void @weakfunc(...)
declare void @weakfunc(...) #1
+; INSTLIMDEF-DAG: define available_externally hidden void @funcwithpersonality.llvm.2() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; INSTLIM5-DAG: declare hidden void @funcwithpersonality.llvm.2()
diff --git a/test/Transforms/FunctionImport/funcimport_alias.ll b/test/Transforms/FunctionImport/funcimport_alias.ll
new file mode 100644
index 000000000000..8c7f00fe37b3
--- /dev/null
+++ b/test/Transforms/FunctionImport/funcimport_alias.ll
@@ -0,0 +1,25 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: llvm-as -function-summary %s -o %t.bc
+; RUN: llvm-as -function-summary %p/Inputs/funcimport_alias.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Do the import now. Ensures that the importer handles an external call
+; from imported callanalias() to a function that is defined already in
+; the dest module, but as an alias.
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s
+
+define i32 @main() #0 {
+entry:
+ call void @callanalias()
+ ret i32 0
+}
+
+@analias = alias void (), void ()* @globalfunc
+
+define void @globalfunc() #0 {
+entry:
+ ret void
+}
+
+declare void @callanalias() #1
+; CHECK-DAG: define available_externally void @callanalias()
diff --git a/test/Transforms/FunctionImport/funcimport_debug.ll b/test/Transforms/FunctionImport/funcimport_debug.ll
index c57b5e14af1b..96b73a3f6bc7 100644
--- a/test/Transforms/FunctionImport/funcimport_debug.ll
+++ b/test/Transforms/FunctionImport/funcimport_debug.ll
@@ -7,8 +7,18 @@
; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s
; CHECK: define available_externally void @func()
-; CHECK: distinct !DISubprogram(name: "main"
-; CHECK: distinct !DISubprogram(name: "func"
+
+; Check that we have exactly two subprograms (that func's subprogram wasn't
+; linked more than once for example), and that they are connected to
+; the subprogram list on a compute unit.
+; CHECK: !{{[0-9]+}} = distinct !DICompileUnit({{.*}} subprograms: ![[SPs1:[0-9]+]]
+; CHECK: ![[SPs1]] = !{![[MAINSP:[0-9]+]]}
+; CHECK: ![[MAINSP]] = distinct !DISubprogram(name: "main"
+; CHECK: !{{[0-9]+}} = distinct !DICompileUnit({{.*}} subprograms: ![[SPs2:[0-9]+]]
+; CHECK-NOT: ![[SPs2]] = !{{{.*}}null{{.*}}}
+; CHECK: ![[SPs2]] = !{![[FUNCSP:[0-9]+]]}
+; CHECK: ![[FUNCSP]] = distinct !DISubprogram(name: "func"
+; CHECK-NOT: distinct !DISubprogram
; ModuleID = 'funcimport_debug.o'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/Transforms/IPConstantProp/PR16052.ll b/test/Transforms/IPConstantProp/PR16052.ll
new file mode 100644
index 000000000000..959074d771f9
--- /dev/null
+++ b/test/Transforms/IPConstantProp/PR16052.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -S -ipsccp | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i64 @fn2() {
+entry:
+ %conv = sext i32 undef to i64
+ %div = sdiv i64 8, %conv
+ %call2 = call i64 @fn1(i64 %div)
+ ret i64 %call2
+}
+
+; CHECK-DAG: define i64 @fn2(
+; CHECK: %[[CALL:.*]] = call i64 @fn1(i64 undef)
+
+define internal i64 @fn1(i64 %p1) {
+entry:
+ %tobool = icmp ne i64 %p1, 0
+ %cond = select i1 %tobool, i64 %p1, i64 %p1
+ ret i64 %cond
+}
+
+; CHECK-DAG: define internal i64 @fn1(
+; CHECK: %[[SEL:.*]] = select i1 undef, i64 undef, i64 undef
+; CHECK: ret i64 %[[SEL]]
diff --git a/test/Transforms/IPConstantProp/PR26044.ll b/test/Transforms/IPConstantProp/PR26044.ll
new file mode 100644
index 000000000000..9e8c61eb53bd
--- /dev/null
+++ b/test/Transforms/IPConstantProp/PR26044.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -S -ipsccp | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fn2() {
+entry:
+ br label %if.end
+
+for.cond1: ; preds = %if.end, %for.end
+ br i1 undef, label %if.end, label %if.end
+
+if.end: ; preds = %lbl, %for.cond1
+ %e.2 = phi i32* [ undef, %entry ], [ null, %for.cond1 ], [ null, %for.cond1 ]
+ %0 = load i32, i32* %e.2, align 4
+ %call = call i32 @fn1(i32 %0)
+ br label %for.cond1
+}
+
+define internal i32 @fn1(i32 %p1) {
+entry:
+ %tobool = icmp ne i32 %p1, 0
+ %cond = select i1 %tobool, i32 %p1, i32 %p1
+ ret i32 %cond
+}
+
+; CHECK-LABEL: define void @fn2(
+; CHECK: call i32 @fn1(i32 undef)
+
+; CHECK-LABEL: define internal i32 @fn1(
+; CHECK:%[[COND:.*]] = select i1 undef, i32 undef, i32 undef
+; CHECK: ret i32 %[[COND]]
diff --git a/test/Transforms/Inline/attributes.ll b/test/Transforms/Inline/attributes.ll
index a97e6a60de75..0458fa23f795 100644
--- a/test/Transforms/Inline/attributes.ll
+++ b/test/Transforms/Inline/attributes.ll
@@ -160,3 +160,87 @@ define i32 @test_target_features1(i32 %i) "target-features"="+sse4.2" {
; CHECK-NEXT: @test_target_features_callee1
; CHECK-NEXT: ret i32
}
+
+define i32 @less-precise-fpmad_callee0(i32 %i) "less-precise-fpmad"="false" {
+ ret i32 %i
+; CHECK: @less-precise-fpmad_callee0(i32 %i) [[FPMAD_FALSE:#[0-9]+]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @less-precise-fpmad_callee1(i32 %i) "less-precise-fpmad"="true" {
+ ret i32 %i
+; CHECK: @less-precise-fpmad_callee1(i32 %i) [[FPMAD_TRUE:#[0-9]+]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_less-precise-fpmad0(i32 %i) "less-precise-fpmad"="false" {
+ %1 = call i32 @less-precise-fpmad_callee0(i32 %i)
+ ret i32 %1
+; CHECK: @test_less-precise-fpmad0(i32 %i) [[FPMAD_FALSE]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_less-precise-fpmad1(i32 %i) "less-precise-fpmad"="false" {
+ %1 = call i32 @less-precise-fpmad_callee1(i32 %i)
+ ret i32 %1
+; CHECK: @test_less-precise-fpmad1(i32 %i) [[FPMAD_FALSE]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_less-precise-fpmad2(i32 %i) "less-precise-fpmad"="true" {
+ %1 = call i32 @less-precise-fpmad_callee0(i32 %i)
+ ret i32 %1
+; CHECK: @test_less-precise-fpmad2(i32 %i) [[FPMAD_FALSE]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_less-precise-fpmad3(i32 %i) "less-precise-fpmad"="true" {
+ %1 = call i32 @less-precise-fpmad_callee1(i32 %i)
+ ret i32 %1
+; CHECK: @test_less-precise-fpmad3(i32 %i) [[FPMAD_TRUE]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @no-implicit-float_callee0(i32 %i) {
+ ret i32 %i
+; CHECK: @no-implicit-float_callee0(i32 %i) {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @no-implicit-float_callee1(i32 %i) noimplicitfloat {
+ ret i32 %i
+; CHECK: @no-implicit-float_callee1(i32 %i) [[NOIMPLICITFLOAT:#[0-9]+]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no-implicit-float0(i32 %i) {
+ %1 = call i32 @no-implicit-float_callee0(i32 %i)
+ ret i32 %1
+; CHECK: @test_no-implicit-float0(i32 %i) {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no-implicit-float1(i32 %i) {
+ %1 = call i32 @no-implicit-float_callee1(i32 %i)
+ ret i32 %1
+; CHECK: @test_no-implicit-float1(i32 %i) [[NOIMPLICITFLOAT]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no-implicit-float2(i32 %i) noimplicitfloat {
+ %1 = call i32 @no-implicit-float_callee0(i32 %i)
+ ret i32 %1
+; CHECK: @test_no-implicit-float2(i32 %i) [[NOIMPLICITFLOAT]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no-implicit-float3(i32 %i) noimplicitfloat {
+ %1 = call i32 @no-implicit-float_callee1(i32 %i)
+ ret i32 %1
+; CHECK: @test_no-implicit-float3(i32 %i) [[NOIMPLICITFLOAT]] {
+; CHECK-NEXT: ret i32
+}
+
+; CHECK: attributes [[FPMAD_FALSE]] = { "less-precise-fpmad"="false" }
+; CHECK: attributes [[FPMAD_TRUE]] = { "less-precise-fpmad"="true" }
+; CHECK: attributes [[NOIMPLICITFLOAT]] = { noimplicitfloat }
diff --git a/test/Transforms/InstCombine/fast-math.ll b/test/Transforms/InstCombine/fast-math.ll
index 5bdf48b85ce7..6ccf6e9fa774 100644
--- a/test/Transforms/InstCombine/fast-math.ll
+++ b/test/Transforms/InstCombine/fast-math.ll
@@ -555,18 +555,12 @@ define float @fact_div6(float %x) {
; A squared factor fed into a square root intrinsic should be hoisted out
; as a fabs() value.
-; We have to rely on a function-level attribute to enable this optimization
-; because intrinsics don't currently have access to IR-level fast-math
-; flags. If that changes, we can relax the requirement on all of these
-; tests to just specify 'fast' on the sqrt.
-
-attributes #0 = { "unsafe-fp-math" = "true" }
declare double @llvm.sqrt.f64(double)
-define double @sqrt_intrinsic_arg_squared(double %x) #0 {
+define double @sqrt_intrinsic_arg_squared(double %x) {
%mul = fmul fast double %x, %x
- %sqrt = call double @llvm.sqrt.f64(double %mul)
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul)
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_arg_squared(
@@ -577,10 +571,10 @@ define double @sqrt_intrinsic_arg_squared(double %x) #0 {
; Check all 6 combinations of a 3-way multiplication tree where
; one factor is repeated.
-define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 {
+define double @sqrt_intrinsic_three_args1(double %x, double %y) {
%mul = fmul fast double %y, %x
%mul2 = fmul fast double %mul, %x
- %sqrt = call double @llvm.sqrt.f64(double %mul2)
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args1(
@@ -590,10 +584,10 @@ define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 {
; CHECK-NEXT: ret double %1
}
-define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 {
+define double @sqrt_intrinsic_three_args2(double %x, double %y) {
%mul = fmul fast double %x, %y
%mul2 = fmul fast double %mul, %x
- %sqrt = call double @llvm.sqrt.f64(double %mul2)
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args2(
@@ -603,10 +597,10 @@ define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 {
; CHECK-NEXT: ret double %1
}
-define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 {
+define double @sqrt_intrinsic_three_args3(double %x, double %y) {
%mul = fmul fast double %x, %x
%mul2 = fmul fast double %mul, %y
- %sqrt = call double @llvm.sqrt.f64(double %mul2)
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args3(
@@ -616,10 +610,10 @@ define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 {
; CHECK-NEXT: ret double %1
}
-define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 {
+define double @sqrt_intrinsic_three_args4(double %x, double %y) {
%mul = fmul fast double %y, %x
%mul2 = fmul fast double %x, %mul
- %sqrt = call double @llvm.sqrt.f64(double %mul2)
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args4(
@@ -629,10 +623,10 @@ define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 {
; CHECK-NEXT: ret double %1
}
-define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 {
+define double @sqrt_intrinsic_three_args5(double %x, double %y) {
%mul = fmul fast double %x, %y
%mul2 = fmul fast double %x, %mul
- %sqrt = call double @llvm.sqrt.f64(double %mul2)
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args5(
@@ -642,10 +636,10 @@ define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 {
; CHECK-NEXT: ret double %1
}
-define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 {
+define double @sqrt_intrinsic_three_args6(double %x, double %y) {
%mul = fmul fast double %x, %x
%mul2 = fmul fast double %y, %mul
- %sqrt = call double @llvm.sqrt.f64(double %mul2)
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_three_args6(
@@ -655,10 +649,25 @@ define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 {
; CHECK-NEXT: ret double %1
}
-define double @sqrt_intrinsic_arg_4th(double %x) #0 {
+; If any operation is not 'fast', we can't simplify.
+
+define double @sqrt_intrinsic_not_so_fast(double %x, double %y) {
+ %mul = fmul double %x, %x
+ %mul2 = fmul fast double %mul, %y
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+ ret double %sqrt
+
+; CHECK-LABEL: sqrt_intrinsic_not_so_fast(
+; CHECK-NEXT: %mul = fmul double %x, %x
+; CHECK-NEXT: %mul2 = fmul fast double %mul, %y
+; CHECK-NEXT: %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
+; CHECK-NEXT: ret double %sqrt
+}
+
+define double @sqrt_intrinsic_arg_4th(double %x) {
%mul = fmul fast double %x, %x
%mul2 = fmul fast double %mul, %mul
- %sqrt = call double @llvm.sqrt.f64(double %mul2)
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_arg_4th(
@@ -666,11 +675,11 @@ define double @sqrt_intrinsic_arg_4th(double %x) #0 {
; CHECK-NEXT: ret double %mul
}
-define double @sqrt_intrinsic_arg_5th(double %x) #0 {
+define double @sqrt_intrinsic_arg_5th(double %x) {
%mul = fmul fast double %x, %x
%mul2 = fmul fast double %mul, %x
%mul3 = fmul fast double %mul2, %mul
- %sqrt = call double @llvm.sqrt.f64(double %mul3)
+ %sqrt = call fast double @llvm.sqrt.f64(double %mul3)
ret double %sqrt
; CHECK-LABEL: sqrt_intrinsic_arg_5th(
@@ -686,9 +695,9 @@ declare float @sqrtf(float)
declare double @sqrt(double)
declare fp128 @sqrtl(fp128)
-define float @sqrt_call_squared_f32(float %x) #0 {
+define float @sqrt_call_squared_f32(float %x) {
%mul = fmul fast float %x, %x
- %sqrt = call float @sqrtf(float %mul)
+ %sqrt = call fast float @sqrtf(float %mul)
ret float %sqrt
; CHECK-LABEL: sqrt_call_squared_f32(
@@ -696,9 +705,9 @@ define float @sqrt_call_squared_f32(float %x) #0 {
; CHECK-NEXT: ret float %fabs
}
-define double @sqrt_call_squared_f64(double %x) #0 {
+define double @sqrt_call_squared_f64(double %x) {
%mul = fmul fast double %x, %x
- %sqrt = call double @sqrt(double %mul)
+ %sqrt = call fast double @sqrt(double %mul)
ret double %sqrt
; CHECK-LABEL: sqrt_call_squared_f64(
@@ -706,9 +715,9 @@ define double @sqrt_call_squared_f64(double %x) #0 {
; CHECK-NEXT: ret double %fabs
}
-define fp128 @sqrt_call_squared_f128(fp128 %x) #0 {
+define fp128 @sqrt_call_squared_f128(fp128 %x) {
%mul = fmul fast fp128 %x, %x
- %sqrt = call fp128 @sqrtl(fp128 %mul)
+ %sqrt = call fast fp128 @sqrtl(fp128 %mul)
ret fp128 %sqrt
; CHECK-LABEL: sqrt_call_squared_f128(
diff --git a/test/Transforms/InstCombine/inline-intrinsic-assert.ll b/test/Transforms/InstCombine/inline-intrinsic-assert.ll
index c6446d43cffd..8eecb3fd40aa 100644
--- a/test/Transforms/InstCombine/inline-intrinsic-assert.ll
+++ b/test/Transforms/InstCombine/inline-intrinsic-assert.ll
@@ -4,7 +4,7 @@
; The inliner should not add an edge to an intrinsic and
; then assert that it did not add an edge to an intrinsic!
-define float @foo(float %f1) #0 {
+define float @foo(float %f1) {
%call = call float @bar(float %f1)
ret float %call
@@ -13,18 +13,16 @@ define float @foo(float %f1) #0 {
; CHECK-NEXT: ret float
}
-define float @bar(float %f1) #0 {
+define float @bar(float %f1) {
%call = call float @sqr(float %f1)
- %call1 = call float @sqrtf(float %call) #0
+ %call1 = call fast float @sqrtf(float %call)
ret float %call1
}
-define float @sqr(float %f) #0 {
+define float @sqr(float %f) {
%mul = fmul fast float %f, %f
ret float %mul
}
-declare float @sqrtf(float) #0
-
-attributes #0 = { "unsafe-fp-math"="true" }
+declare float @sqrtf(float)
diff --git a/test/Transforms/InstCombine/insert-extract-shuffle.ll b/test/Transforms/InstCombine/insert-extract-shuffle.ll
index 4223660db3d6..47c2a139a479 100644
--- a/test/Transforms/InstCombine/insert-extract-shuffle.ll
+++ b/test/Transforms/InstCombine/insert-extract-shuffle.ll
@@ -125,3 +125,53 @@ end:
ret <8 x i16> %t6
}
+; The widening shuffle must be inserted at a valid point (after the PHIs).
+
+define <4 x double> @pr25999_phis1(i1 %c, <2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @pr25999_phis1(
+; CHECK: %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
+; CHECK-NEXT: %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x double> %tmp1, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: %tmp4 = shufflevector <4 x double> %tmp2, <4 x double> %[[WIDEVEC]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT: ret <4 x double> %tmp4
+bb1:
+ br i1 %c, label %bb2, label %bb3
+
+bb2:
+ %r = call <2 x double> @dummy(<2 x double> %a)
+ br label %bb3
+
+bb3:
+ %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
+ %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
+ %tmp3 = extractelement <2 x double> %tmp1, i32 0
+ %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
+ ret <4 x double> %tmp4
+}
+
+declare <2 x double> @dummy(<2 x double>)
+
+define <4 x double> @pr25999_phis2(i1 %c, <2 x double> %a, <4 x double> %b) {
+; CHECK-LABEL: @pr25999_phis2(
+; CHECK: %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
+; CHECK-NEXT: %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
+; CHECK-NEXT: %d = fadd <2 x double> %tmp1, %tmp1
+; CHECK-NEXT: %[[WIDEVEC:.*]] = shufflevector <2 x double> %d, <2 x double> undef, <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: %tmp4 = shufflevector <4 x double> %tmp2, <4 x double> %[[WIDEVEC]], <4 x i32> <i32 0, i32 1, i32 4, i32 3>
+; CHECK-NEXT: ret <4 x double> %tmp4
+bb1:
+ br i1 %c, label %bb2, label %bb3
+
+bb2:
+ %r = call <2 x double> @dummy(<2 x double> %a)
+ br label %bb3
+
+bb3:
+ %tmp1 = phi <2 x double> [ %a, %bb1 ], [ %r, %bb2 ]
+ %tmp2 = phi <4 x double> [ %b, %bb1 ], [ zeroinitializer, %bb2 ]
+ %d = fadd <2 x double> %tmp1, %tmp1
+ %tmp3 = extractelement <2 x double> %d, i32 0
+ %tmp4 = insertelement <4 x double> %tmp2, double %tmp3, i32 2
+ ret <4 x double> %tmp4
+}
+
diff --git a/test/Transforms/InstCombine/log-pow.ll b/test/Transforms/InstCombine/log-pow.ll
index c5ca1688d34a..a0c10d0a0b8c 100644
--- a/test/Transforms/InstCombine/log-pow.ll
+++ b/test/Transforms/InstCombine/log-pow.ll
@@ -1,41 +1,61 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
-define double @mylog(double %x, double %y) #0 {
-entry:
+define double @log_pow(double %x, double %y) {
+ %pow = call fast double @llvm.pow.f64(double %x, double %y)
+ %call = call fast double @log(double %pow)
+ ret double %call
+}
+
+; CHECK-LABEL: define double @log_pow(
+; CHECK-NEXT: %log = call fast double @log(double %x)
+; CHECK-NEXT: %mul = fmul fast double %log, %y
+; CHECK-NEXT: ret double %mul
+
+define double @log_pow_not_fast(double %x, double %y) {
%pow = call double @llvm.pow.f64(double %x, double %y)
- %call = call double @log(double %pow) #0
+ %call = call fast double @log(double %pow)
ret double %call
}
-; CHECK-LABEL: define double @mylog(
-; CHECK: %log = call fast double @log(double %x) #0
-; CHECK: %mul = fmul fast double %log, %y
-; CHECK: ret double %mul
-; CHECK: }
+; CHECK-LABEL: define double @log_pow_not_fast(
+; CHECK-NEXT: %pow = call double @llvm.pow.f64(double %x, double %y)
+; CHECK-NEXT: %call = call fast double @log(double %pow)
+; CHECK-NEXT: ret double %call
-define double @test2(double ()* %fptr, double %p1) #0 {
+define double @function_pointer(double ()* %fptr, double %p1) {
%call1 = call double %fptr()
%pow = call double @log(double %call1)
ret double %pow
}
-; CHECK-LABEL: @test2
-; CHECK: log
+; CHECK-LABEL: @function_pointer
+; CHECK-NEXT: %call1 = call double %fptr()
+; CHECK-NEXT: %pow = call double @log(double %call1)
+; CHECK-NEXT: ret double %pow
+
+define double @log_exp2(double %x) {
+ %call2 = call fast double @exp2(double %x)
+ %call3 = call fast double @log(double %call2)
+ ret double %call3
+}
+
+; CHECK-LABEL: @log_exp2
+; CHECK-NEXT: %call2 = call fast double @exp2(double %x)
+; CHECK-NEXT: %logmul = fmul fast double %x, 0x3FE62E42FEFA39EF
+; CHECK-NEXT: ret double %logmul
-define double @test3(double %x) #0 {
- %call2 = call double @exp2(double %x) #0
- %call3 = call double @log(double %call2) #0
+define double @log_exp2_not_fast(double %x) {
+ %call2 = call double @exp2(double %x)
+ %call3 = call fast double @log(double %call2)
ret double %call3
}
-; CHECK-LABEL: @test3
-; CHECK: %call2 = call double @exp2(double %x) #0
-; CHECK: %logmul = fmul fast double %x, 0x3FE62E42FEFA39EF
-; CHECK: ret double %logmul
-; CHECK: }
+; CHECK-LABEL: @log_exp2_not_fast
+; CHECK-NEXT: %call2 = call double @exp2(double %x)
+; CHECK-NEXT: %call3 = call fast double @log(double %call2)
+; CHECK-NEXT: ret double %call3
-declare double @log(double) #0
-declare double @exp2(double) #0
+declare double @log(double)
+declare double @exp2(double)
declare double @llvm.pow.f64(double, double)
-attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/no_cgscc_assert.ll b/test/Transforms/InstCombine/no_cgscc_assert.ll
index 3df04d2c8902..677066fa2ab0 100644
--- a/test/Transforms/InstCombine/no_cgscc_assert.ll
+++ b/test/Transforms/InstCombine/no_cgscc_assert.ll
@@ -6,7 +6,7 @@
define float @bar(float %f) #0 {
%mul = fmul fast float %f, %f
- %call1 = call float @sqrtf(float %mul) #0
+ %call1 = call fast float @sqrtf(float %mul)
ret float %call1
; CHECK-LABEL: @bar(
@@ -14,6 +14,5 @@ define float @bar(float %f) #0 {
; CHECK-NEXT: ret float
}
-declare float @sqrtf(float) #0
+declare float @sqrtf(float)
-attributes #0 = { readnone "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pow-exp.ll b/test/Transforms/InstCombine/pow-exp.ll
index acc512734ec5..594594abd7d1 100644
--- a/test/Transforms/InstCombine/pow-exp.ll
+++ b/test/Transforms/InstCombine/pow-exp.ll
@@ -1,28 +1,49 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
-define double @mypow(double %x, double %y) #0 {
-entry:
+define double @pow_exp(double %x, double %y) {
+ %call = call fast double @exp(double %x) nounwind readnone
+ %pow = call fast double @llvm.pow.f64(double %call, double %y)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @pow_exp(
+; CHECK-NEXT: %mul = fmul fast double %x, %y
+; CHECK-NEXT: %exp = call fast double @exp(double %mul)
+; CHECK-NEXT: ret double %exp
+
+define double @pow_exp2(double %x, double %y) {
+ %call = call fast double @exp2(double %x) nounwind readnone
+ %pow = call fast double @llvm.pow.f64(double %call, double %y)
+ ret double %pow
+}
+
+; CHECK-LABEL: define double @pow_exp2(
+; CHECK-NEXT: %mul = fmul fast double %x, %y
+; CHECK-NEXT: %exp2 = call fast double @exp2(double %mul)
+; CHECK-NEXT: ret double %exp2
+
+define double @pow_exp_not_fast(double %x, double %y) {
%call = call double @exp(double %x)
- %pow = call double @llvm.pow.f64(double %call, double %y)
+ %pow = call fast double @llvm.pow.f64(double %call, double %y)
ret double %pow
}
-; CHECK-LABEL: define double @mypow(
-; CHECK: %mul = fmul fast double %x, %y
-; CHECK: %exp = call fast double @exp(double %mul) #0
-; CHECK: ret double %exp
-; CHECK: }
+; CHECK-LABEL: define double @pow_exp_not_fast(
+; CHECK-NEXT: %call = call double @exp(double %x)
+; CHECK-NEXT: %pow = call fast double @llvm.pow.f64(double %call, double %y)
+; CHECK-NEXT: ret double %pow
-define double @test2(double ()* %fptr, double %p1) #0 {
- %call1 = call double %fptr()
- %pow = call double @llvm.pow.f64(double %call1, double %p1)
+define double @function_pointer(double ()* %fptr, double %p1) {
+ %call1 = call fast double %fptr()
+ %pow = call fast double @llvm.pow.f64(double %call1, double %p1)
ret double %pow
}
-; CHECK-LABEL: @test2
-; CHECK: llvm.pow.f64
+; CHECK-LABEL: @function_pointer
+; CHECK-NEXT: %call1 = call fast double %fptr()
+; CHECK-NEXT: %pow = call fast double @llvm.pow.f64(double %call1, double %p1)
-declare double @exp(double) #1
+declare double @exp(double)
+declare double @exp2(double)
declare double @llvm.pow.f64(double, double)
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" }
+
diff --git a/test/Transforms/InstCombine/pow-exp2.ll b/test/Transforms/InstCombine/pow-exp2.ll
deleted file mode 100644
index c42cab391e64..000000000000
--- a/test/Transforms/InstCombine/pow-exp2.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define double @mypow(double %x, double %y) #0 {
-entry:
- %call = call double @exp2(double %x)
- %pow = call double @llvm.pow.f64(double %call, double %y)
- ret double %pow
-}
-
-; CHECK-LABEL: define double @mypow(
-; CHECK: %mul = fmul fast double %x, %y
-; CHECK: %exp2 = call fast double @exp2(double %mul) #0
-; CHECK: ret double %exp2
-; CHECK: }
-
-declare double @exp2(double) #1
-declare double @llvm.pow.f64(double, double)
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/InstCombine/pow-sqrt.ll b/test/Transforms/InstCombine/pow-sqrt.ll
index 8fc74e4a0024..1e6166c5f114 100644
--- a/test/Transforms/InstCombine/pow-sqrt.ll
+++ b/test/Transforms/InstCombine/pow-sqrt.ll
@@ -1,15 +1,13 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
-define double @mypow(double %x) #0 {
-entry:
- %pow = call double @llvm.pow.f64(double %x, double 5.000000e-01)
+define double @pow_half(double %x) {
+ %pow = call fast double @llvm.pow.f64(double %x, double 5.000000e-01)
ret double %pow
}
-; CHECK-LABEL: define double @mypow(
-; CHECK: %sqrt = call double @sqrt(double %x) #1
-; CHECK: ret double %sqrt
-; CHECK: }
+; CHECK-LABEL: define double @pow_half(
+; CHECK-NEXT: %sqrt = call fast double @sqrt(double %x)
+; CHECK-NEXT: ret double %sqrt
declare double @llvm.pow.f64(double, double)
-attributes #0 = { "unsafe-fp-math"="true" }
+
diff --git a/test/Transforms/InstCombine/printf-3.ll b/test/Transforms/InstCombine/printf-3.ll
new file mode 100644
index 000000000000..8f3a36a13465
--- /dev/null
+++ b/test/Transforms/InstCombine/printf-3.ll
@@ -0,0 +1,39 @@
+; Test that the printf library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+@.str = private unnamed_addr constant [2 x i8] c"\0A\00", align 1
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @_CxxThrowException(i8* null, i8* null)
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch:
+ %cs = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %cp = catchpad within %cs [i8* null, i32 64, i8* null]
+ %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i32 0, i32 0)) [ "funclet"(token %cp) ]
+ catchret from %cp to label %try.cont
+
+try.cont:
+ ret void
+
+unreachable:
+ unreachable
+}
+
+; CHECK-DAG: define void @test1(
+; CHECK: %[[CS:.*]] = catchswitch within none
+; CHECK: %[[CP:.*]] = catchpad within %[[CS]] [i8* null, i32 64, i8* null]
+; CHECK: call i32 @putchar(i32 10) [ "funclet"(token %[[CP]]) ]
+
+declare void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare i32 @printf(i8*, ...)
diff --git a/test/Transforms/InstCombine/tan.ll b/test/Transforms/InstCombine/tan.ll
index 15a832f253a9..6ea116839fe9 100644
--- a/test/Transforms/InstCombine/tan.ll
+++ b/test/Transforms/InstCombine/tan.ll
@@ -1,24 +1,23 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
-define float @mytan(float %x) #0 {
-entry:
- %call = call float @atanf(float %x)
- %call1 = call float @tanf(float %call)
+define float @mytan(float %x) {
+ %call = call fast float @atanf(float %x)
+ %call1 = call fast float @tanf(float %call)
ret float %call1
}
; CHECK-LABEL: define float @mytan(
; CHECK: ret float %x
-define float @test2(float ()* %fptr) #0 {
- %call1 = call float %fptr()
- %tan = call float @tanf(float %call1)
+define float @test2(float ()* %fptr) {
+ %call1 = call fast float %fptr()
+ %tan = call fast float @tanf(float %call1)
ret float %tan
}
; CHECK-LABEL: @test2
; CHECK: tanf
-declare float @tanf(float) #0
-declare float @atanf(float) #0
-attributes #0 = { "unsafe-fp-math"="true" }
+declare float @tanf(float)
+declare float @atanf(float)
+
diff --git a/test/Transforms/InstSimplify/floating-point-compare.ll b/test/Transforms/InstSimplify/floating-point-compare.ll
index 8174f5834533..b148d9961d33 100644
--- a/test/Transforms/InstSimplify/floating-point-compare.ll
+++ b/test/Transforms/InstSimplify/floating-point-compare.ll
@@ -8,6 +8,8 @@ declare float @llvm.fabs.f32(float)
declare float @llvm.sqrt.f32(float)
declare double @llvm.powi.f64(double,i32)
declare float @llvm.exp.f32(float)
+declare float @llvm.minnum.f32(float, float)
+declare float @llvm.maxnum.f32(float, float)
declare double @llvm.exp2.f64(double)
declare float @llvm.fma.f32(float,float,float)
@@ -58,6 +60,45 @@ define i1 @orderedLessZeroPowi(double,double) {
ret i1 %olt
}
+; CHECK-LABEL: @orderedLessZeroUIToFP(
+define i1 @orderedLessZeroUIToFP(i32) {
+ %a = uitofp i32 %0 to float
+ %uge = fcmp uge float %a, 0.000000e+00
+; CHECK: ret i1 true
+ ret i1 %uge
+}
+
+; CHECK-LABEL: @orderedLessZeroSelect(
+define i1 @orderedLessZeroSelect(float, float) {
+ %a = call float @llvm.exp.f32(float %0)
+ %b = call float @llvm.fabs.f32(float %1)
+ %c = fcmp olt float %0, %1
+ %d = select i1 %c, float %a, float %b
+ %e = fadd float %d, 1.0
+ %uge = fcmp uge float %e, 0.000000e+00
+; CHECK: ret i1 true
+ ret i1 %uge
+}
+
+; CHECK-LABEL: @orderedLessZeroMinNum(
+define i1 @orderedLessZeroMinNum(float, float) {
+ %a = call float @llvm.exp.f32(float %0)
+ %b = call float @llvm.fabs.f32(float %1)
+ %c = call float @llvm.minnum.f32(float %a, float %b)
+ %uge = fcmp uge float %c, 0.000000e+00
+; CHECK: ret i1 true
+ ret i1 %uge
+}
+
+; CHECK-LABEL: @orderedLessZeroMaxNum(
+define i1 @orderedLessZeroMaxNum(float, float) {
+ %a = call float @llvm.exp.f32(float %0)
+ %b = call float @llvm.maxnum.f32(float %a, float %1)
+ %uge = fcmp uge float %b, 0.000000e+00
+; CHECK: ret i1 true
+ ret i1 %uge
+}
+
define i1 @nonans1(double %in1, double %in2) {
%cmp = fcmp nnan uno double %in1, %in2
ret i1 %cmp
diff --git a/test/Transforms/JumpThreading/pr26096.ll b/test/Transforms/JumpThreading/pr26096.ll
new file mode 100644
index 000000000000..2671e82b6177
--- /dev/null
+++ b/test/Transforms/JumpThreading/pr26096.ll
@@ -0,0 +1,68 @@
+; RUN: opt -prune-eh -inline -jump-threading -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@d = external global i32*, align 8
+
+define void @fn3(i1 %B) {
+entry:
+ br i1 %B, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ call void @fn2()
+ ret void
+
+if.end: ; preds = %entry
+ call void @fn2()
+ ret void
+}
+
+define internal void @fn2() unnamed_addr {
+entry:
+ call void @fn1()
+ call void @fn1()
+ call void @fn1()
+ unreachable
+}
+
+; CHECK-LABEL: define internal void @fn2(
+; CHECK: %[[LOAD:.*]] = load i32*, i32** @d, align 8
+; CHECK: %tobool1.i = icmp eq i32* %[[LOAD]], null
+
+define internal void @fn1() unnamed_addr {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry
+ %0 = load i32*, i32** @d, align 8
+ %tobool1 = icmp eq i32* %0, null
+ br i1 %tobool1, label %cond.false, label %cond.end
+
+cond.false: ; preds = %for.body
+ call void @__assert_fail(i8* null)
+ unreachable
+
+cond.end: ; preds = %for.body
+ %1 = load i32*, i32** @d, align 8
+ %cmp = icmp eq i32* %1, null
+ br i1 %cmp, label %cond.end4, label %cond.false3
+
+cond.false3: ; preds = %cond.end
+ call void @__assert_fail(i8* null)
+ unreachable
+
+cond.end4: ; preds = %cond.end
+ call void @__assert_fail(i8* null)
+ unreachable
+
+for.end: ; No predecessors!
+ ret void
+}
+
+declare void @__assert_fail(i8*)
+
+; Function Attrs: noreturn nounwind
+declare void @llvm.trap() #0
+
+attributes #0 = { noreturn nounwind }
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index 595cacbcbf54..6a3cf7edd7dc 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -250,3 +250,40 @@ if.end: ; preds = %if.then, %cond.end4
; CHECK: br i1 %cmp6, label %if.then, label %if.end
; CHECK: br label %if.end
}
+
+
+define i32 @unfold3(i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z, i32 %j) nounwind {
+entry:
+ %add3 = add nsw i32 %j, 2
+ %cmp.i = icmp slt i32 %u, %v
+ br i1 %cmp.i, label %.exit, label %cond.false.i
+
+cond.false.i: ; preds = %entry
+ %cmp4.i = icmp sgt i32 %u, %v
+ br i1 %cmp4.i, label %.exit, label %cond.false.6.i
+
+cond.false.6.i: ; preds = %cond.false.i
+ %cmp8.i = icmp slt i32 %w, %x
+ br i1 %cmp8.i, label %.exit, label %cond.false.10.i
+
+cond.false.10.i: ; preds = %cond.false.6.i
+ %cmp13.i = icmp sgt i32 %w, %x
+ br i1 %cmp13.i, label %.exit, label %cond.false.15.i
+
+cond.false.15.i: ; preds = %cond.false.10.i
+ %phitmp = icmp sge i32 %y, %z
+ br label %.exit
+
+.exit: ; preds = %entry, %cond.false.i, %cond.false.6.i, %cond.false.10.i, %cond.false.15.i
+ %cond23.i = phi i1 [ false, %entry ], [ true, %cond.false.i ], [ false, %cond.false.6.i ], [ %phitmp, %cond.false.15.i ], [ true, %cond.false.10.i ]
+ %j.add3 = select i1 %cond23.i, i32 %j, i32 %add3
+ ret i32 %j.add3
+
+; CHECK-LABEL: @unfold3
+; CHECK: br i1 %cmp.i, label %.exit.thread2, label %cond.false.i
+; CHECK: br i1 %cmp4.i, label %.exit.thread, label %cond.false.6.i
+; CHECK: br i1 %cmp8.i, label %.exit.thread2, label %cond.false.10.i
+; CHECK: br i1 %cmp13.i, label %.exit.thread, label %.exit
+; CHECK: br i1 %phitmp, label %.exit.thread, label %.exit.thread2
+; CHECK: br label %.exit.thread2
+}
diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
index a650317f3df7..e5e0151761bf 100644
--- a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
+++ b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
@@ -1,4 +1,6 @@
; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
+; RUN: sed -e 's/optsize/minsize/' %s | opt -S -loop-unroll -unroll-allow-partial | FileCheck %s
+
; Loop size = 3, when the function has the optsize attribute, the
; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not).
@@ -49,4 +51,3 @@ exit:
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: icmp
-
diff --git a/test/Transforms/LoopUnroll/unloop.ll b/test/Transforms/LoopUnroll/unloop.ll
index b98b4a3fffba..720b2ae1bdbc 100644
--- a/test/Transforms/LoopUnroll/unloop.ll
+++ b/test/Transforms/LoopUnroll/unloop.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -S -loop-unroll -verify-loop-info | FileCheck %s
;
-; Unit tests for LoopInfo::updateUnloop.
+; Unit tests for LoopInfo::markAsRemoved.
declare i1 @check() nounwind
diff --git a/test/Transforms/MemCpyOpt/fca2memcpy.ll b/test/Transforms/MemCpyOpt/fca2memcpy.ll
index 75a1a8f96e2b..c8a126848b06 100644
--- a/test/Transforms/MemCpyOpt/fca2memcpy.ll
+++ b/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -3,7 +3,7 @@
target datalayout = "e-i64:64-f80:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
-%S = type { i8*, i32 }
+%S = type { i8*, i8, i32 }
define void @copy(%S* %src, %S* %dst) {
; CHECK-LABEL: copy
@@ -37,8 +37,10 @@ define void @noaliasdst(%S* %src, %S* noalias %dst) {
define void @destroysrc(%S* %src, %S* %dst) {
; CHECK-LABEL: destroysrc
-; CHECK-NOT: call
-; CHECK: ret void
+; CHECK: load %S, %S* %src
+; CHECK: call void @llvm.memset.p0i8.i64
+; CHECK-NEXT: store %S %1, %S* %dst
+; CHECK-NEXT: ret void
%1 = load %S, %S* %src
store %S zeroinitializer, %S* %src
store %S %1, %S* %dst
@@ -49,7 +51,7 @@ define void @destroynoaliassrc(%S* noalias %src, %S* %dst) {
; CHECK-LABEL: destroynoaliassrc
; CHECK-NOT: load
; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64
-; CHECK-NEXT: store %S zeroinitializer, %S* %src
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
; CHECK-NEXT: ret void
%1 = load %S, %S* %src
store %S zeroinitializer, %S* %src
@@ -70,3 +72,17 @@ define void @copyalias(%S* %src, %S* %dst) {
store %S %2, %S* %dst
ret void
}
+
+
+; The GEP is present after the aliasing store, preventing to move the memcpy before
+; (without further analysis/transformation)
+define void @copyaliaswithproducerinbetween(%S* %src, %S* %dst) {
+; CHECK-LABEL: copyalias
+; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src
+; CHECK-NOT: call
+ %1 = load %S, %S* %src
+ store %S undef, %S* %dst
+ %dst2 = getelementptr %S , %S* %dst, i64 1
+ store %S %1, %S* %dst2
+ ret void
+}
diff --git a/test/Transforms/Reassociate/add_across_block_crash.ll b/test/Transforms/Reassociate/add_across_block_crash.ll
new file mode 100644
index 000000000000..07be75242b59
--- /dev/null
+++ b/test/Transforms/Reassociate/add_across_block_crash.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+; CHECK-LABEL: main
+; This test is to make sure while processing a block, uses of instructions
+; from a different basic block don't get added to be re-optimized
+define void @main() {
+entry:
+ %0 = fadd fast float undef, undef
+ br i1 undef, label %bb1, label %bb2
+
+bb1:
+ %1 = fmul fast float undef, -2.000000e+00
+ %2 = fmul fast float %1, 2.000000e+00
+ %3 = fadd fast float %2, 2.000000e+00
+ %4 = fadd fast float %3, %0
+ %mul351 = fmul fast float %4, 5.000000e-01
+ ret void
+
+bb2:
+ ret void
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/constants.ll b/test/Transforms/RewriteStatepointsForGC/constants.ll
index b30f64beba09..0f600f215718 100644
--- a/test/Transforms/RewriteStatepointsForGC/constants.ll
+++ b/test/Transforms/RewriteStatepointsForGC/constants.ll
@@ -92,4 +92,13 @@ use:
ret i8 addrspace(1)* %res
}
-
+; Globals don't move and thus don't get relocated
+define i8 addrspace(1)* @test5(i1 %always_true) gc "statepoint-example" {
+; CHECK-LABEL: @test5
+; CHECK: gc.statepoint
+; CHECK-NEXT: %res = extractelement <2 x i8 addrspace(1)*> <i8 addrspace(1)* @G, i8 addrspace(1)* @G>, i32 0
+entry:
+ call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ %res = extractelement <2 x i8 addrspace(1)*> <i8 addrspace(1)* @G, i8 addrspace(1)* @G>, i32 0
+ ret i8 addrspace(1)* %res
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector-nosplit.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector-nosplit.ll
new file mode 100644
index 000000000000..ee578eb3d309
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector-nosplit.ll
@@ -0,0 +1,112 @@
+; Test that we can correctly handle vectors of pointers in statepoint
+; rewriting.
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -rs4gc-split-vector-values=0 -S | FileCheck %s
+
+; A non-vector relocation for comparison
+define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
+; CHECK-LABEL: test
+; CHECK: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret i64 addrspace(1)*
+; A base vector from a argument
+entry:
+ call void @do_safepoint() [ "deopt"() ]
+ ret i64 addrspace(1)* %obj
+}
+
+; A vector argument
+define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" {
+; CHECK-LABEL: test2
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+; A load
+define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test3
+; CHECK: load
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
+entry:
+ %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare i32 @fake_personality_function()
+
+; When a statepoint is an invoke rather than a call
+define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" personality i32 ()* @fake_personality_function {
+; CHECK-LABEL: test4
+; CHECK: load
+; CHECK-NEXT: gc.statepoint
+entry:
+ %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ invoke void @do_safepoint() [ "deopt"() ]
+ to label %normal_return unwind label %exceptional_return
+
+normal_return: ; preds = %entry
+; CHECK-LABEL: normal_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
+ ret <2 x i64 addrspace(1)*> %obj
+
+exceptional_return: ; preds = %entry
+; CHECK-LABEL: exceptional_return:
+; CHECK: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
+ %landing_pad4 = landingpad token
+ cleanup
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+; A newly created vector
+define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: test5
+; CHECK: insertelement
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %vec.relocated.casted
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %vec
+}
+
+; A merge point
+define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: test6
+entry:
+ br i1 %cnd, label %taken, label %untaken
+
+taken: ; preds = %entry
+ %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+untaken: ; preds = %entry
+ %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
+ br label %merge
+
+merge: ; preds = %untaken, %taken
+; CHECK-LABEL: merge:
+; CHECK-NEXT: = phi
+; CHECK-NEXT: gc.statepoint
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
+; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
+ %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
+ call void @do_safepoint() [ "deopt"() ]
+ ret <2 x i64 addrspace(1)*> %obj
+}
+
+declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
index 00f28938cee9..284a993bae29 100644
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
@@ -1,6 +1,6 @@
; Test that we can correctly handle vectors of pointers in statepoint
; rewriting. Currently, we scalarize, but that's an implementation detail.
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck %s
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -rs4gc-split-vector-values -S | FileCheck %s
; A non-vector relocation for comparison
diff --git a/test/Transforms/RewriteStatepointsForGC/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
index 584fd7add1b6..2ec09d6acae6 100644
--- a/test/Transforms/RewriteStatepointsForGC/live-vector.ll
+++ b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
@@ -1,6 +1,6 @@
; Test that we can correctly handle vectors of pointers in statepoint
; rewriting. Currently, we scalarize, but that's an implementation detail.
-; RUN: opt %s -rewrite-statepoints-for-gc -S | FileCheck %s
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-split-vector-values -S | FileCheck %s
; A non-vector relocation for comparison
define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
diff --git a/test/Transforms/RewriteStatepointsForGC/two-invokes-one-landingpad.ll b/test/Transforms/RewriteStatepointsForGC/two-invokes-one-landingpad.ll
new file mode 100644
index 000000000000..d3d3c5a8d1ab
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/two-invokes-one-landingpad.ll
@@ -0,0 +1,33 @@
+; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck %s
+
+declare void @some_call(i64 addrspace(1)*)
+
+declare i32 @"dummy_personality_function"()
+
+define i64 addrspace(1)* @test(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
+ gc "statepoint-example"
+ personality i32 ()* @"dummy_personality_function" {
+entry:
+ invoke void @some_call(i64 addrspace(1)* %obj) [ "deopt"() ]
+ to label %second_invoke unwind label %exceptional_return
+
+second_invoke: ; preds = %entry
+ invoke void @some_call(i64 addrspace(1)* %obj) [ "deopt"() ]
+ to label %normal_return unwind label %exceptional_return
+
+normal_return: ; preds = %second_invoke
+ ret i64 addrspace(1)* %obj
+
+; CHECK: exceptional_return1:
+; CHECK-NEXT: %lpad2 = landingpad token
+
+; CHECK: exceptional_return.split-lp:
+; CHECK-NEXT: %lpad.split-lp = landingpad token
+
+; CHECK: exceptional_return:
+; CHECK-NOT: phi token
+
+exceptional_return: ; preds = %second_invoke, %entry
+ %lpad = landingpad token cleanup
+ ret i64 addrspace(1)* %obj1
+}
diff --git a/test/Transforms/SimplifyCFG/bug-25299.ll b/test/Transforms/SimplifyCFG/bug-25299.ll
new file mode 100644
index 000000000000..706afbe540cf
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/bug-25299.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+;; Test case for bug 25299, contributed by David Majnemer.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i1 %B) personality i1 undef {
+entry:
+;CHECK: entry
+;CHECK-NEXT: call void @g()
+ invoke void @g()
+ to label %continue unwind label %unwind
+
+unwind: ; preds = %entry
+ %tmp101 = landingpad { i8*, i32 }
+ cleanup
+ br i1 %B, label %resume, label %then
+
+then: ; preds = %cleanup1
+ br label %resume
+
+resume: ; preds = %cleanup2, %then, %cleanup1, %unwind
+ %tmp104 = phi { i8*, i32 } [ %tmp101, %then ], [ %tmp106, %cleanup2 ], [ %tmp101, %unwind ]
+;CHECK-NOT: resume { i8*, i32 } %tmp104
+ resume { i8*, i32 } %tmp104
+
+continue: ; preds = %entry, %continue
+;CHECK: continue: ; preds = %entry, %continue
+;CHECK-NEXT: call void @g()
+ invoke void @g()
+ to label %continue unwind label %cleanup2
+
+cleanup2: ; preds = %continue
+ %tmp106 = landingpad { i8*, i32 }
+ cleanup
+ br label %resume
+}
+
+declare void @g() \ No newline at end of file
diff --git a/test/Transforms/SimplifyCFG/invoke_unwind.ll b/test/Transforms/SimplifyCFG/invoke_unwind.ll
index 100bfd4e9e3e..b11b7c15fa34 100644
--- a/test/Transforms/SimplifyCFG/invoke_unwind.ll
+++ b/test/Transforms/SimplifyCFG/invoke_unwind.ll
@@ -30,4 +30,46 @@ Rethrow:
resume { i8*, i32 } %exn
}
+declare i64 @dummy1()
+declare i64 @dummy2()
+
+; This testcase checks to see if simplifycfg pass can convert two invoke
+; instructions to call instructions if they share a common trivial unwind
+; block.
+define i64 @test3(i1 %cond) personality i32 (...)* @__gxx_personality_v0 {
+entry:
+; CHECK-LABEL: @test3(
+; CHECK: %call1 = call i64 @dummy1()
+; CHECK: %call2 = call i64 @dummy2()
+; CHECK-NOT: resume { i8*, i32 } %lp
+ br i1 %cond, label %br1, label %br2
+
+br1:
+ %call1 = invoke i64 @dummy1()
+ to label %invoke.cont unwind label %lpad1
+
+br2:
+ %call2 = invoke i64 @dummy2()
+ to label %invoke.cont unwind label %lpad2
+
+invoke.cont:
+ %c = phi i64 [%call1, %br1], [%call2, %br2]
+ ret i64 %c
+
+
+lpad1:
+ %0 = landingpad { i8*, i32 }
+ cleanup
+ br label %rethrow
+
+rethrow:
+ %lp = phi { i8*, i32 } [%0, %lpad1], [%1, %lpad2]
+ resume { i8*, i32 } %lp
+
+lpad2:
+ %1 = landingpad { i8*, i32 }
+ cleanup
+ br label %rethrow
+}
+
declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/Util/split-bit-piece.ll b/test/Transforms/Util/split-bit-piece.ll
new file mode 100644
index 000000000000..6945beca84b1
--- /dev/null
+++ b/test/Transforms/Util/split-bit-piece.ll
@@ -0,0 +1,45 @@
+; Checks that llvm.dbg.declare -> llvm.dbg.value conversion utility
+; (here exposed through the SROA) pass, properly inserts bit_piece expressions
+; if it only describes part of the variable.
+; RUN: opt -S -sroa %s | FileCheck %s
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+
+; Function Attrs: nounwind uwtable
+define hidden void @_ZN6__tsan9FastState14SetHistorySizeEi(i32 %hs) #1 align 2 {
+entry:
+ %hs.addr = alloca i32, align 4
+ %v1 = alloca i64, align 8
+ %v2 = alloca i64, align 8
+ store i32 %hs, i32* %hs.addr, align 4
+; CHECK: call void @llvm.dbg.value(metadata i32 %hs, i64 0, metadata !{{[0-9]+}}, metadata ![[EXPR:[0-9]+]])
+; CHECK: ![[EXPR]] = !DIExpression(DW_OP_bit_piece, 0
+ call void @llvm.dbg.declare(metadata i64* %v1, metadata !9, metadata !12), !dbg !13
+ %0 = load i32, i32* %hs.addr, align 4
+ %conv = sext i32 %0 to i64
+ store i64 %conv, i64* %v1, align 8
+ %1 = load i64, i64* %v2, align 8
+ unreachable
+}
+
+attributes #0 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7}
+!llvm.ident = !{!8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256979) (llvm/trunk 257107)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, retainedTypes: !2)
+!1 = !DIFile(filename: "tsan_shadow_test.cc", directory: "/tmp")
+!2 = !{!3, !5}
+!3 = !DICompositeType(tag: DW_TAG_class_type, name: "FastState", file: !4, line: 91, size: 64, align: 64, identifier: "_ZTSN6__tsan9FastStateE")
+!4 = !DIFile(filename: "/mnt/extra/llvm/projects/compiler-rt/lib/tsan/rtl/tsan_rtl.h", directory: "/tmp")
+!5 = distinct !DIDerivedType(tag: DW_TAG_typedef, name: "u64", line: 78, baseType: !6)
+!6 = !DIBasicType(name: "long long unsigned int", size: 64, align: 64, encoding: DW_ATE_unsigned)
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{!"clang version 3.8.0 (trunk 256979) (llvm/trunk 257107)"}
+!9 = !DILocalVariable(name: "v1", scope: !10, file: !4, line: 136, type: !5)
+!10 = distinct !DILexicalBlock(scope: !11, file: !4, line: 136, column: 5)
+!11 = distinct !DISubprogram(name: "SetHistorySize", linkageName: "_ZN6__tsan9FastState14SetHistorySizeEi", scope: !"_ZTSN6__tsan9FastStateE", file: !4, line: 135, isLocal: false, isDefinition: true, scopeLine: 135, flags: DIFlagPrototyped, isOptimized: false)
+!12 = !DIExpression()
+!13 = !DILocation(line: 136, column: 5, scope: !10)
diff --git a/test/Verifier/gc_relocate_return.ll b/test/Verifier/gc_relocate_return.ll
index 77207f6c47b2..788978b14c60 100644
--- a/test/Verifier/gc_relocate_return.ll
+++ b/test/Verifier/gc_relocate_return.ll
@@ -1,8 +1,7 @@
; RUN: not llvm-as -disable-output <%s 2>&1 | FileCheck %s
-; This is to verify that gc_relocate must return a pointer type, which is defined
-; in intrinsics.td.
+; This is to verify that gc_relocate must return a pointer type
-; CHECK: Intrinsic has incorrect return type!
+; CHECK: gc.relocate must return a pointer or a vector of pointers
declare void @foo()
diff --git a/test/Verifier/invalid-eh.ll b/test/Verifier/invalid-eh.ll
index 21e88d4dcb3d..0f27198af536 100644
--- a/test/Verifier/invalid-eh.ll
+++ b/test/Verifier/invalid-eh.ll
@@ -6,6 +6,19 @@
; RUN: sed -e s/.T6:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK6 %s
; RUN: sed -e s/.T7:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK7 %s
; RUN: sed -e s/.T8:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK8 %s
+; RUN: sed -e s/.T9:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK9 %s
+; RUN: sed -e s/.T10:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK10 %s
+; RUN: sed -e s/.T11:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK11 %s
+; RUN: sed -e s/.T12:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK12 %s
+; RUN: sed -e s/.T13:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK13 %s
+; RUN: sed -e s/.T14:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK14 %s
+; RUN: sed -e s/.T15:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK15 %s
+; RUN: sed -e s/.T16:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK16 %s
+; RUN: sed -e s/.T17:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK17 %s
+; RUN: sed -e s/.T18:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK18 %s
+; RUN: sed -e s/.T19:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK19 %s
+; RUN: sed -e s/.T20:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK20 %s
+; RUN: sed -e s/.T21:// %s | not opt -verify -disable-output 2>&1 | FileCheck --check-prefix=CHECK21 %s
declare void @g()
@@ -96,3 +109,264 @@ declare void @g()
;T8: %cs1 = catchswitch within none [ label %switch1 ] unwind to caller
;T8: ; CHECK8: CatchSwitchInst handlers must be catchpads
;T8: }
+
+;T9: define void @f() personality void ()* @g {
+;T9: entry:
+;T9: ret void
+;T9: cleanup:
+;T9: %cp = cleanuppad within none []
+;T9: invoke void @g() [ "funclet"(token %cp) ]
+;T9: to label %exit unwind label %cleanup
+;T9: ; CHECK9: EH pad cannot handle exceptions raised within it
+;T9: ; CHECK9-NEXT: %cp = cleanuppad within none []
+;T9: ; CHECK9-NEXT: invoke void @g() [ "funclet"(token %cp) ]
+;T9: exit:
+;T9: ret void
+;T9: }
+
+;T10: define void @f() personality void ()* @g {
+;T10: entry:
+;T10: ret void
+;T10: cleanup1:
+;T10: %cp1 = cleanuppad within none []
+;T10: unreachable
+;T10: switch:
+;T10: %cs = catchswitch within %cp1 [label %catch] unwind to caller
+;T10: catch:
+;T10: %catchp1 = catchpad within %cs [i32 1]
+;T10: unreachable
+;T10: cleanup2:
+;T10: %cp2 = cleanuppad within %catchp1 []
+;T10: unreachable
+;T10: cleanup3:
+;T10: %cp3 = cleanuppad within %cp2 []
+;T10: cleanupret from %cp3 unwind label %switch
+;T10: ; CHECK10: EH pad cannot handle exceptions raised within it
+;T10: ; CHECK10-NEXT: %cs = catchswitch within %cp1 [label %catch] unwind to caller
+;T10: ; CHECK10-NEXT: cleanupret from %cp3 unwind label %switch
+;T10: }
+
+;T11: define void @f() personality void ()* @g {
+;T11: entry:
+;T11: ret void
+;T11: cleanup1:
+;T11: %cp1 = cleanuppad within none []
+;T11: unreachable
+;T11: cleanup2:
+;T11: %cp2 = cleanuppad within %cp1 []
+;T11: unreachable
+;T11: switch:
+;T11: %cs = catchswitch within none [label %catch] unwind label %cleanup2
+;T11: ; CHECK11: A single unwind edge may only enter one EH pad
+;T11: ; CHECK11-NEXT: %cs = catchswitch within none [label %catch] unwind label %cleanup2
+;T11: catch:
+;T11: catchpad within %cs [i32 1]
+;T11: unreachable
+;T11: }
+
+;T12: define void @f() personality void ()* @g {
+;T12: entry:
+;T12: ret void
+;T12: cleanup:
+;T12: %cp = cleanuppad within none []
+;T12: cleanupret from %cp unwind label %switch
+;T12: ; CHECK12: A cleanupret must exit its cleanup
+;T12: ; CHECK12-NEXT: cleanupret from %cp unwind label %switch
+;T12: switch:
+;T12: %cs = catchswitch within %cp [label %catch] unwind to caller
+;T12: catch:
+;T12: catchpad within %cs [i32 1]
+;T12: unreachable
+;T12: }
+
+;T13: define void @f() personality void ()* @g {
+;T13: entry:
+;T13: ret void
+;T13: switch:
+;T13: %cs = catchswitch within none [label %catch] unwind label %switch
+;T13: ; CHECK13: EH pad cannot handle exceptions raised within it
+;T13: ; CHECK13-NEXT: %cs = catchswitch within none [label %catch] unwind label %switch
+;T13: catch:
+;T13: catchpad within %cs [i32 0]
+;T13: unreachable
+;T13: }
+
+;T14: define void @f() personality void ()* @g {
+;T14: entry:
+;T14: ret void
+;T14: cleanup:
+;T14: %cp = cleanuppad within none []
+;T14: unreachable
+;T14: left:
+;T14: cleanupret from %cp unwind label %switch
+;T14: right:
+;T14: cleanupret from %cp unwind to caller
+;T14: ; CHECK14: Unwind edges out of a funclet pad must have the same unwind dest
+;T14: ; CHECK14-NEXT: %cp = cleanuppad within none []
+;T14: ; CHECK14-NEXT: cleanupret from %cp unwind label %switch
+;T14: ; CHECK14-NEXT: cleanupret from %cp unwind to caller
+;T14: switch:
+;T14: %cs = catchswitch within none [label %catch] unwind to caller
+;T14: catch:
+;T14: catchpad within %cs [i32 1]
+;T14: unreachable
+;T14: }
+
+;T15: define void @f() personality void ()* @g {
+;T15: entry:
+;T15: ret void
+;T15: switch:
+;T15: %cs = catchswitch within none [label %catch] unwind to caller
+;T15: catch:
+;T15: %catch.pad = catchpad within %cs [i32 1]
+;T15: invoke void @g() [ "funclet"(token %catch.pad) ]
+;T15: to label %unreachable unwind label %target1
+;T15: unreachable:
+;T15: unreachable
+;T15: target1:
+;T15: cleanuppad within none []
+;T15: unreachable
+;T15: target2:
+;T15: cleanuppad within none []
+;T15: unreachable
+;T15: nested.1:
+;T15: %nested.pad.1 = cleanuppad within %catch.pad []
+;T15: unreachable
+;T15: nested.2:
+;T15: %nested.pad.2 = cleanuppad within %nested.pad.1 []
+;T15: cleanupret from %nested.pad.2 unwind label %target2
+;T15: ; CHECK15: Unwind edges out of a funclet pad must have the same unwind dest
+;T15: ; CHECK15-NEXT: %catch.pad = catchpad within %cs [i32 1]
+;T15: ; CHECK15-NEXT: cleanupret from %nested.pad.2 unwind label %target2
+;T15: ; CHECK15-NEXT: invoke void @g() [ "funclet"(token %catch.pad) ]
+;T15: ; CHECK15-NEXT: to label %unreachable unwind label %target1
+;T15: }
+
+;T16: define void @f() personality void ()* @g {
+;T16: entry:
+;T16: ret void
+;T16: switch:
+;T16: %cs = catchswitch within none [label %catch] unwind to caller
+;T16: catch:
+;T16: %catch.pad = catchpad within %cs [i32 1]
+;T16: invoke void @g() [ "funclet"(token %catch.pad) ]
+;T16: to label %unreachable unwind label %target1
+;T16: ; CHECK16: Unwind edges out of a catch must have the same unwind dest as the parent catchswitch
+;T16: ; CHECK16-NEXT: %catch.pad = catchpad within %cs [i32 1]
+;T16: ; CHECK16-NEXT: invoke void @g() [ "funclet"(token %catch.pad) ]
+;T16: ; CHECK16-NEXT: to label %unreachable unwind label %target1
+;T16: ; CHECK16-NEXT: %cs = catchswitch within none [label %catch] unwind to caller
+;T16: unreachable:
+;T16: unreachable
+;T16: target1:
+;T16: cleanuppad within none []
+;T16: unreachable
+;T16: }
+
+;T17: define void @f() personality void ()* @g {
+;T17: entry:
+;T17: ret void
+;T17: switch:
+;T17: %cs = catchswitch within none [label %catch] unwind label %target1
+;T17: catch:
+;T17: %catch.pad = catchpad within %cs [i32 1]
+;T17: invoke void @g() [ "funclet"(token %catch.pad) ]
+;T17: to label %unreachable unwind label %target2
+;T17: ; CHECK17: Unwind edges out of a catch must have the same unwind dest as the parent catchswitch
+;T17: ; CHECK17-NEXT: %catch.pad = catchpad within %cs [i32 1]
+;T17: ; CHECK17-NEXT: invoke void @g() [ "funclet"(token %catch.pad) ]
+;T17: ; CHECK17-NEXT: to label %unreachable unwind label %target2
+;T17: ; CHECK17-NEXT: %cs = catchswitch within none [label %catch] unwind label %target1
+;T17: unreachable:
+;T17: unreachable
+;T17: target1:
+;T17: cleanuppad within none []
+;T17: unreachable
+;T17: target2:
+;T17: cleanuppad within none []
+;T17: unreachable
+;T17: }
+
+;T18: define void @f() personality void ()* @g {
+;T18: entry:
+;T18: invoke void @g()
+;T18: to label %invoke.cont unwind label %left
+;T18: invoke.cont:
+;T18: invoke void @g()
+;T18: to label %unreachable unwind label %right
+;T18: left:
+;T18: %cp.left = cleanuppad within none []
+;T18: invoke void @g() [ "funclet"(token %cp.left) ]
+;T18: to label %unreachable unwind label %right
+;T18: right:
+;T18: %cp.right = cleanuppad within none []
+;T18: invoke void @g() [ "funclet"(token %cp.right) ]
+;T18: to label %unreachable unwind label %left
+;T18: ; CHECK18: EH pads can't handle each other's exceptions
+;T18: ; CHECK18-NEXT: %cp.left = cleanuppad within none []
+;T18: ; CHECK18-NEXT: invoke void @g() [ "funclet"(token %cp.left) ]
+;T18: ; CHECK18-NEXT: to label %unreachable unwind label %right
+;T18: ; CHECK18-NEXT: %cp.right = cleanuppad within none []
+;T18: ; CHECK18-NEXT: invoke void @g() [ "funclet"(token %cp.right) ]
+;T18: ; CHECK18-NEXT: to label %unreachable unwind label %left
+;T18: unreachable:
+;T18: unreachable
+;T18: }
+
+;T19: define void @f() personality void ()* @g {
+;T19: entry:
+;T19: ret void
+;T19: red:
+;T19: %redpad = cleanuppad within none []
+;T19: unreachable
+;T19: red.inner:
+;T19: %innerpad = cleanuppad within %redpad []
+;T19: invoke void @g() [ "funclet"(token %innerpad) ]
+;T19: to label %unreachable unwind label %green
+;T19: green:
+;T19: %greenswitch = catchswitch within none [label %catch] unwind label %blue
+;T19: catch:
+;T19: catchpad within %greenswitch [i32 42]
+;T19: unreachable
+;T19: blue:
+;T19: %bluepad = cleanuppad within none []
+;T19: cleanupret from %bluepad unwind label %red
+;T19: ; CHECK19: EH pads can't handle each other's exceptions
+;T19: ; CHECK19-NEXT: %redpad = cleanuppad within none []
+;T19: ; CHECK19-NEXT: invoke void @g() [ "funclet"(token %innerpad) ]
+;T19: ; CHECK19-NEXT: to label %unreachable unwind label %green
+;T19: ; CHECK19-NEXT: %greenswitch = catchswitch within none [label %catch] unwind label %blue
+;T19: ; CHECK19-NEXT: %bluepad = cleanuppad within none []
+;T19: ; CHECK19-NEXT: cleanupret from %bluepad unwind label %red
+;T19: unreachable:
+;T19: unreachable
+;T19: }
+
+;T20: define void @f() personality void ()* @g {
+;T20: entry:
+;T20: ret void
+;T20: switch:
+;T20: %cs = catchswitch within none [label %catch] unwind label %catch
+;T20: ; CHECK20: Catchswitch cannot unwind to one of its catchpads
+;T20: ; CHECK20-NEXT: %cs = catchswitch within none [label %catch] unwind label %catch
+;T20: ; CHECK20-NEXT: %cp = catchpad within %cs [i32 4]
+;T20: catch:
+;T20: %cp = catchpad within %cs [i32 4]
+;T20: unreachable
+;T20: }
+
+;T21: define void @f() personality void ()* @g {
+;T21: entry:
+;T21: ret void
+;T21: switch:
+;T21: %cs = catchswitch within none [label %catch1] unwind label %catch2
+;T21: ; CHECK21: Catchswitch cannot unwind to one of its catchpads
+;T21: ; CHECK21-NEXT: %cs = catchswitch within none [label %catch1] unwind label %catch2
+;T21: ; CHECK21-NEXT: %cp2 = catchpad within %cs [i32 2]
+;T21: catch1:
+;T21: %cp1 = catchpad within %cs [i32 1]
+;T21: unreachable
+;T21: catch2:
+;T21: %cp2 = catchpad within %cs [i32 2]
+;T21: unreachable
+;T21: }
diff --git a/test/lit.cfg b/test/lit.cfg
index 36b4c7044083..e06c10f64212 100644
--- a/test/lit.cfg
+++ b/test/lit.cfg
@@ -194,6 +194,7 @@ config.substitutions.append( ('%llvmshlibdir', config.llvm_shlib_dir) )
config.substitutions.append( ('%shlibext', config.llvm_shlib_ext) )
config.substitutions.append( ('%exeext', config.llvm_exe_ext) )
config.substitutions.append( ('%python', config.python_executable) )
+config.substitutions.append( ('%host_cc', config.host_cc) )
# OCaml substitutions.
# Support tests for both native and bytecode builds.
@@ -276,6 +277,7 @@ for pattern in [r"\bbugpoint\b(?!-)",
r"\bllvm-split\b",
r"\bllvm-tblgen\b",
r"\bllvm-c-test\b",
+ NOJUNK + r"\bllvm-symbolizer\b",
NOJUNK + r"\bopt\b",
r"\bFileCheck\b",
r"\bobj2yaml\b",
@@ -459,10 +461,6 @@ if platform.system() in ['Windows'] and re.match(r'.*-win32$', config.target_tri
if not re.match(r'^x86_64.*-(mingw32|windows-gnu|win32)', config.target_triple):
config.available_features.add('debug_frame')
-# Check if we are embedding timestamps.
-if config.enable_timestamps == '1':
- config.available_features.add('timestamps')
-
# Check if we should use gmalloc.
use_gmalloc_str = lit_config.params.get('use_gmalloc', None)
if use_gmalloc_str is not None:
diff --git a/test/lit.site.cfg.in b/test/lit.site.cfg.in
index ae5814f02f41..13f5372ef7e3 100644
--- a/test/lit.site.cfg.in
+++ b/test/lit.site.cfg.in
@@ -36,7 +36,6 @@ config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
config.have_zlib = "@HAVE_LIBZ@"
config.have_dia_sdk = @HAVE_DIA_SDK@
config.enable_ffi = "@LLVM_ENABLE_FFI@"
-config.enable_timestamps = "@ENABLE_TIMESTAMPS@"
config.test_examples = "@ENABLE_EXAMPLES@"
# Support substitution of the tools_dir with user parameters. This is
diff --git a/test/tools/llvm-lto/error.ll b/test/tools/llvm-lto/error.ll
new file mode 100644
index 000000000000..43043d0cdc0b
--- /dev/null
+++ b/test/tools/llvm-lto/error.ll
@@ -0,0 +1,2 @@
+; RUN: not llvm-lto foobar 2>&1 | FileCheck %s
+; CHECK: llvm-lto: error loading file 'foobar': {{N|n}}o such file or directory
diff --git a/test/tools/llvm-objdump/Inputs/malformed-macho.bin b/test/tools/llvm-objdump/Inputs/malformed-macho.bin
new file mode 100644
index 000000000000..b8a3f153a896
--- /dev/null
+++ b/test/tools/llvm-objdump/Inputs/malformed-macho.bin
Binary files differ
diff --git a/test/tools/llvm-objdump/X86/macho-private-header.test b/test/tools/llvm-objdump/X86/macho-private-header.test
new file mode 100644
index 000000000000..d90072542a2e
--- /dev/null
+++ b/test/tools/llvm-objdump/X86/macho-private-header.test
@@ -0,0 +1,6 @@
+// RUN: llvm-objdump -private-header %p/Inputs/hello.obj.macho-x86_64 | FileCheck %s
+
+CHECK: Mach header
+CHECK: magic cputype cpusubtype caps filetype ncmds sizeofcmds flags
+CHECK: MH_MAGIC_64 X86_64 ALL 0x00 OBJECT 3 496 SUBSECTIONS_VIA_SYMBOLS
+CHECK-NOT: Load command
diff --git a/test/tools/llvm-objdump/malformed-archives.test b/test/tools/llvm-objdump/malformed-archives.test
index e0f165d37ed7..c477022e2e11 100644
--- a/test/tools/llvm-objdump/malformed-archives.test
+++ b/test/tools/llvm-objdump/malformed-archives.test
@@ -1,7 +1,7 @@
// These test checks that llvm-objdump will not crash with malformed Archive
// files. So the check line is not all that important but the bug fixes to
// make sure llvm-objdump is robust is what matters.
-# RUN: llvm-objdump -macho -archive-headers \
+# RUN: not llvm-objdump -macho -archive-headers \
# RUN: %p/Inputs/libbogus1.a \
# RUN: 2>&1 | FileCheck -check-prefix=bogus1 %s
diff --git a/test/tools/llvm-objdump/malformed-macho.test b/test/tools/llvm-objdump/malformed-macho.test
new file mode 100644
index 000000000000..e96945d91125
--- /dev/null
+++ b/test/tools/llvm-objdump/malformed-macho.test
@@ -0,0 +1,2 @@
+RUN: not llvm-objdump -macho -s %p/Inputs/malformed-macho.bin 2>&1 | FileCheck %s -check-prefix=MALFORMED
+MALFORMED: '{{.*}}': The file was not recognized as a valid object file
diff --git a/test/tools/llvm-profdata/value-prof.proftext b/test/tools/llvm-profdata/value-prof.proftext
index ca2b1f822097..a8f6e8641c67 100644
--- a/test/tools/llvm-profdata/value-prof.proftext
+++ b/test/tools/llvm-profdata/value-prof.proftext
@@ -1,4 +1,4 @@
-# RUN: llvm-profdata show -ic-targets -all-functions %s | FileCheck %s --check-prefix=IC
+# RUN: llvm-profdata show -ic-targets -all-functions %s | FileCheck %s --check-prefix=ICTXT
# RUN: llvm-profdata show -ic-targets -counts -text -all-functions %s | FileCheck %s --check-prefix=ICTEXT
# RUN: llvm-profdata merge -o %t.profdata %s
# RUN: llvm-profdata show -ic-targets -all-functions %t.profdata | FileCheck %s --check-prefix=IC
@@ -45,10 +45,16 @@ foo2:1000
1
foo2:20000
+#ICTXT: Indirect Call Site Count: 3
+#ICTXT-NEXT: Indirect Target Results:
+#ICTXT-NEXT: [ 1, foo, 100 ]
+#ICTXT-NEXT: [ 1, foo2, 1000 ]
+#ICTXT-NEXT: [ 2, foo2, 20000 ]
+
#IC: Indirect Call Site Count: 3
#IC-NEXT: Indirect Target Results:
-#IC-NEXT: [ 1, foo, 100 ]
#IC-NEXT: [ 1, foo2, 1000 ]
+#IC-NEXT: [ 1, foo, 100 ]
#IC-NEXT: [ 2, foo2, 20000 ]
#ICTEXT: foo:100
diff --git a/test/tools/llvm-readobj/codeview-linetables.test b/test/tools/llvm-readobj/codeview-linetables.test
index 880b6d52f095..a1c6ab66b4ca 100644
--- a/test/tools/llvm-readobj/codeview-linetables.test
+++ b/test/tools/llvm-readobj/codeview-linetables.test
@@ -103,41 +103,85 @@ MFUN32-NEXT: Type: 0xF1
MFUN32-NEXT: PayloadSize: 0x8
MFUN32: ]
MFUN32-NEXT: FunctionLineTable [
-MFUN32-NEXT: LinkageName: _x
-MFUN32-NEXT: Flags: 0x0
-MFUN32-NEXT: CodeSize: 0xA
-MFUN32-NEXT: FilenameSegment [
-MFUN32-NEXT: Filename: d:\source.c
-MFUN32-NEXT: +0x0: 3
-MFUN32-NEXT: +0x3: 4
-MFUN32-NEXT: +0x8: 5
-MFUN32-NEXT: ]
-MFUN32-NEXT: ]
-MFUN32-NEXT: FunctionLineTable [
-MFUN32-NEXT: LinkageName: _y
-MFUN32-NEXT: Flags: 0x0
-MFUN32-NEXT: CodeSize: 0xA
-MFUN32-NEXT: FilenameSegment [
-MFUN32-NEXT: Filename: d:\source.c
-MFUN32-NEXT: +0x0: 7
-MFUN32-NEXT: +0x3: 8
-MFUN32-NEXT: +0x8: 9
-MFUN32-NEXT: ]
-MFUN32-NEXT: ]
-MFUN32-NEXT: FunctionLineTable [
-MFUN32-NEXT: LinkageName: _f
-MFUN32-NEXT: Flags: 0x0
-MFUN32-NEXT: CodeSize: 0x14
-MFUN32-NEXT: FilenameSegment [
-MFUN32-NEXT: Filename: d:\source.c
-MFUN32-NEXT: +0x0: 11
-MFUN32-NEXT: +0x3: 12
-MFUN32-NEXT: +0x8: 13
-MFUN32-NEXT: +0xD: 14
-MFUN32-NEXT: +0x12: 15
-MFUN32-NEXT: ]
-MFUN32-NEXT: ]
-MFUN32-NEXT: ]
+MFUN32-NEXT LinkageName: _x
+MFUN32-NEXT Flags: 0x0
+MFUN32-NEXT CodeSize: 0xA
+MFUN32-NEXT FilenameSegment [
+MFUN32-NEXT Filename: d:\source.c
+MFUN32-NEXT +0x0 [
+MFUN32-NEXT LineNumberStart: 3
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT +0x3 [
+MFUN32-NEXT LineNumberStart: 4
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT +0x8 [
+MFUN32-NEXT LineNumberStart: 5
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT ]
+MFUN32-NEXT ]
+MFUN32-NEXT FunctionLineTable [
+MFUN32-NEXT LinkageName: _y
+MFUN32-NEXT Flags: 0x0
+MFUN32-NEXT CodeSize: 0xA
+MFUN32-NEXT FilenameSegment [
+MFUN32-NEXT Filename: d:\source.c
+MFUN32-NEXT +0x0 [
+MFUN32-NEXT LineNumberStart: 7
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT +0x3 [
+MFUN32-NEXT LineNumberStart: 8
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT +0x8 [
+MFUN32-NEXT LineNumberStart: 9
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT ]
+MFUN32-NEXT ]
+MFUN32-NEXT FunctionLineTable [
+MFUN32-NEXT LinkageName: _f
+MFUN32-NEXT Flags: 0x0
+MFUN32-NEXT CodeSize: 0x14
+MFUN32-NEXT FilenameSegment [
+MFUN32-NEXT Filename: d:\source.c
+MFUN32-NEXT +0x0 [
+MFUN32-NEXT LineNumberStart: 11
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT +0x3 [
+MFUN32-NEXT LineNumberStart: 12
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT +0x8 [
+MFUN32-NEXT LineNumberStart: 13
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT +0xD [
+MFUN32-NEXT LineNumberStart: 14
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT +0x12 [
+MFUN32-NEXT LineNumberStart: 15
+MFUN32-NEXT LineNumberEndDelta: 0
+MFUN32-NEXT IsStatement: Yes
+MFUN32-NEXT ]
+MFUN32-NEXT ]
+MFUN32-NEXT ]
+MFUN32-NEXT ]
MFUN64: CodeViewDebugInfo [
MFUN64-NEXT: Magic: 0x4
@@ -208,9 +252,21 @@ MFUN64-NEXT: Flags: 0x0
MFUN64-NEXT: CodeSize: 0xE
MFUN64-NEXT: FilenameSegment [
MFUN64-NEXT: Filename: d:\source.c
-MFUN64-NEXT: +0x0: 3
-MFUN64-NEXT: +0x4: 4
-MFUN64-NEXT: +0x9: 5
+MFUN64-NEXT: +0x0 [
+MFUN64-NEXT: LineNumberStart: 3
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
+MFUN64-NEXT: +0x4 [
+MFUN64-NEXT: LineNumberStart: 4
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
+MFUN64-NEXT: +0x9 [
+MFUN64-NEXT: LineNumberStart: 5
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
MFUN64-NEXT: ]
MFUN64-NEXT: ]
MFUN64-NEXT: FunctionLineTable [
@@ -219,9 +275,21 @@ MFUN64-NEXT: Flags: 0x0
MFUN64-NEXT: CodeSize: 0xE
MFUN64-NEXT: FilenameSegment [
MFUN64-NEXT: Filename: d:\source.c
-MFUN64-NEXT: +0x0: 7
-MFUN64-NEXT: +0x4: 8
-MFUN64-NEXT: +0x9: 9
+MFUN64-NEXT: +0x0 [
+MFUN64-NEXT: LineNumberStart: 7
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
+MFUN64-NEXT: +0x4 [
+MFUN64-NEXT: LineNumberStart: 8
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
+MFUN64-NEXT: +0x9 [
+MFUN64-NEXT: LineNumberStart: 9
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
MFUN64-NEXT: ]
MFUN64-NEXT: ]
MFUN64-NEXT: FunctionLineTable [
@@ -230,11 +298,31 @@ MFUN64-NEXT: Flags: 0x0
MFUN64-NEXT: CodeSize: 0x18
MFUN64-NEXT: FilenameSegment [
MFUN64-NEXT: Filename: d:\source.c
-MFUN64-NEXT: +0x0: 11
-MFUN64-NEXT: +0x4: 12
-MFUN64-NEXT: +0x9: 13
-MFUN64-NEXT: +0xE: 14
-MFUN64-NEXT: +0x13: 15
+MFUN64-NEXT: +0x0 [
+MFUN64-NEXT: LineNumberStart: 11
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
+MFUN64-NEXT: +0x4 [
+MFUN64-NEXT: LineNumberStart: 12
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
+MFUN64-NEXT: +0x9 [
+MFUN64-NEXT: LineNumberStart: 13
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
+MFUN64-NEXT: +0xE [
+MFUN64-NEXT: LineNumberStart: 14
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
+MFUN64-NEXT: +0x13 [
+MFUN64-NEXT: LineNumberStart: 15
+MFUN64-NEXT: LineNumberEndDelta: 0
+MFUN64-NEXT: IsStatement: Yes
+MFUN64-NEXT: ]
MFUN64-NEXT: ]
MFUN64-NEXT: ]
MFUN64-NEXT: ]
@@ -306,20 +394,40 @@ MFILE32-NEXT: Flags: 0x0
MFILE32-NEXT: CodeSize: 0x14
MFILE32-NEXT: FilenameSegment [
MFILE32-NEXT: Filename: d:\input.c
-MFILE32-NEXT: +0x0: 3
+MFILE32-NEXT: +0x0 [
+MFILE32-NEXT: LineNumberStart: 3
+MFILE32-NEXT: LineNumberEndDelta: 0
+MFILE32-NEXT: IsStatement: Yes
+MFILE32-NEXT: ]
MFILE32-NEXT: ]
MFILE32-NEXT: FilenameSegment [
MFILE32-NEXT: Filename: d:\one.c
-MFILE32-NEXT: +0x3: 1
+MFILE32-NEXT: +0x3 [
+MFILE32-NEXT: LineNumberStart: 1
+MFILE32-NEXT: LineNumberEndDelta: 0
+MFILE32-NEXT: IsStatement: Yes
+MFILE32-NEXT: ]
MFILE32-NEXT: ]
MFILE32-NEXT: FilenameSegment [
MFILE32-NEXT: Filename: d:\two.c
-MFILE32-NEXT: +0x8: 2
+MFILE32-NEXT: +0x8 [
+MFILE32-NEXT: LineNumberStart: 2
+MFILE32-NEXT: LineNumberEndDelta: 0
+MFILE32-NEXT: IsStatement: Yes
+MFILE32-NEXT: ]
MFILE32-NEXT: ]
MFILE32-NEXT: FilenameSegment [
MFILE32-NEXT: Filename: d:\one.c
-MFILE32-NEXT: +0xD: 7
-MFILE32-NEXT: +0x12: 8
+MFILE32-NEXT: +0xD [
+MFILE32-NEXT: LineNumberStart: 7
+MFILE32-NEXT: LineNumberEndDelta: 0
+MFILE32-NEXT: IsStatement: Yes
+MFILE32-NEXT: ]
+MFILE32-NEXT: +0x12 [
+MFILE32-NEXT: LineNumberStart: 8
+MFILE32-NEXT: LineNumberEndDelta: 0
+MFILE32-NEXT: IsStatement: Yes
+MFILE32-NEXT: ]
MFILE32-NEXT: ]
MFILE32-NEXT: ]
MFILE32-NEXT: ]
@@ -357,28 +465,47 @@ MFILE64-NEXT: Subsection [
MFILE64-NEXT: Type: 0xF1
MFILE64-NEXT: PayloadSize: 0x8
MFILE64: ]
-MFILE64-NEXT: FunctionLineTable [
-MFILE64-NEXT: LinkageName: f
-MFILE64-NEXT: Flags: 0x0
-MFILE64-NEXT: CodeSize: 0x18
-MFILE64-NEXT: FilenameSegment [
-MFILE64-NEXT: Filename: d:\input.c
-MFILE64-NEXT: +0x0: 3
-MFILE64-NEXT: ]
-MFILE64-NEXT: FilenameSegment [
-MFILE64-NEXT: Filename: d:\one.c
-MFILE64-NEXT: +0x4: 1
-MFILE64-NEXT: ]
-MFILE64-NEXT: FilenameSegment [
-MFILE64-NEXT: Filename: d:\two.c
-MFILE64-NEXT: +0x9: 2
-MFILE64-NEXT: ]
-MFILE64-NEXT: FilenameSegment [
-MFILE64-NEXT: Filename: d:\one.c
-MFILE64-NEXT: +0xE: 7
-MFILE64-NEXT: +0x13: 8
-MFILE64-NEXT: ]
-MFILE64-NEXT: ]
+MFILE64-NEXT: FunctionLineTable [
+MFILE64-NEXT: LinkageName: f
+MFILE64-NEXT: Flags: 0x0
+MFILE64-NEXT: CodeSize: 0x18
+MFILE64-NEXT: FilenameSegment [
+MFILE64-NEXT: Filename: d:\input.c
+MFILE64-NEXT: +0x0 [
+MFILE64-NEXT: LineNumberStart: 3
+MFILE64-NEXT: LineNumberEndDelta: 0
+MFILE64-NEXT: IsStatement: Yes
+MFILE64-NEXT: ]
+MFILE64-NEXT: ]
+MFILE64-NEXT: FilenameSegment [
+MFILE64-NEXT: Filename: d:\one.c
+MFILE64-NEXT: +0x4 [
+MFILE64-NEXT: LineNumberStart: 1
+MFILE64-NEXT: LineNumberEndDelta: 0
+MFILE64-NEXT: IsStatement: Yes
+MFILE64-NEXT: ]
+MFILE64-NEXT: ]
+MFILE64-NEXT: FilenameSegment [
+MFILE64-NEXT: Filename: d:\two.c
+MFILE64-NEXT: +0x9 [
+MFILE64-NEXT: LineNumberStart: 2
+MFILE64-NEXT: LineNumberEndDelta: 0
+MFILE64-NEXT: IsStatement: Yes
+MFILE64-NEXT: ]
+MFILE64-NEXT: ]
+MFILE64-NEXT: FilenameSegment [
+MFILE64-NEXT: Filename: d:\one.c
+MFILE64-NEXT: +0xE [
+MFILE64-NEXT: LineNumberStart: 7
+MFILE64-NEXT: LineNumberEndDelta: 0
+MFILE64-NEXT: IsStatement: Yes
+MFILE64-NEXT: ]
+MFILE64-NEXT: +0x13 [
+MFILE64-NEXT: LineNumberStart: 8
+MFILE64-NEXT: LineNumberEndDelta: 0
+MFILE64-NEXT: IsStatement: Yes
+MFILE64-NEXT: ]
+MFILE64-NEXT: ]
MFILE64-NEXT: ]
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -411,9 +538,21 @@ MCOMDAT-NEXT: Flags: 0x0
MCOMDAT-NEXT: CodeSize: 0x7
MCOMDAT-NEXT: FilenameSegment [
MCOMDAT-NEXT: Filename: c:\src\test.cc
-MCOMDAT-NEXT: +0x0: 2
-MCOMDAT-NEXT: +0x3: 3
-MCOMDAT-NEXT: +0x5: 4
+MCOMDAT-NEXT: +0x0 [
+MCOMDAT-NEXT: LineNumberStart: 2
+MCOMDAT-NEXT: LineNumberEndDelta: 0
+MCOMDAT-NEXT: IsStatement: Yes
+MCOMDAT-NEXT: ]
+MCOMDAT-NEXT: +0x3 [
+MCOMDAT-NEXT: LineNumberStart: 3
+MCOMDAT-NEXT: LineNumberEndDelta: 0
+MCOMDAT-NEXT: IsStatement: Yes
+MCOMDAT-NEXT: ]
+MCOMDAT-NEXT: +0x5 [
+MCOMDAT-NEXT: LineNumberStart: 4
+MCOMDAT-NEXT: LineNumberEndDelta: 0
+MCOMDAT-NEXT: IsStatement: Yes
+MCOMDAT-NEXT: ]
MCOMDAT-NEXT: ]
MCOMDAT-NEXT: ]
MCOMDAT: ProcStart {
@@ -427,8 +566,20 @@ MCOMDAT-NEXT: Flags: 0x0
MCOMDAT-NEXT: CodeSize: 0x7
MCOMDAT-NEXT: FilenameSegment [
MCOMDAT-NEXT: Filename: c:\src\test.cc
-MCOMDAT-NEXT: +0x0: 7
-MCOMDAT-NEXT: +0x3: 8
-MCOMDAT-NEXT: +0x5: 9
+MCOMDAT-NEXT: +0x0 [
+MCOMDAT-NEXT: LineNumberStart: 7
+MCOMDAT-NEXT: LineNumberEndDelta: 0
+MCOMDAT-NEXT: IsStatement: Yes
+MCOMDAT-NEXT: ]
+MCOMDAT-NEXT: +0x3 [
+MCOMDAT-NEXT: LineNumberStart: 8
+MCOMDAT-NEXT: LineNumberEndDelta: 0
+MCOMDAT-NEXT: IsStatement: Yes
+MCOMDAT-NEXT: ]
+MCOMDAT-NEXT: +0x5 [
+MCOMDAT-NEXT: LineNumberStart: 9
+MCOMDAT-NEXT: LineNumberEndDelta: 0
+MCOMDAT-NEXT: IsStatement: Yes
+MCOMDAT-NEXT: ]
MCOMDAT-NEXT: ]
MCOMDAT-NEXT: ]
diff --git a/test/tools/llvm-symbolizer/Inputs/addr.inp b/test/tools/llvm-symbolizer/Inputs/addr.inp
index 4de096479dae..b5e146b114e2 100644
--- a/test/tools/llvm-symbolizer/Inputs/addr.inp
+++ b/test/tools/llvm-symbolizer/Inputs/addr.inp
@@ -1 +1,3 @@
+some text
0x40054d
+some text2
diff --git a/test/tools/llvm-symbolizer/print_context.c b/test/tools/llvm-symbolizer/print_context.c
new file mode 100644
index 000000000000..f1860e919881
--- /dev/null
+++ b/test/tools/llvm-symbolizer/print_context.c
@@ -0,0 +1,22 @@
+// REQUIRES: x86_64-linux
+// RUN: %host_cc -O0 -g %s -o %t 2>&1
+// RUN: %t 2>&1 | llvm-symbolizer -print-source-context-lines=5 -obj=%t | FileCheck %s --check-prefix=CHECK
+
+#include <stdio.h>
+
+int inc(int a) {
+ return a + 1;
+}
+
+int main() {
+ printf("%p\n", inc);
+ return 0;
+}
+
+// CHECK: inc
+// CHECK: print_context.c:7
+// CHECK: 5 : #include
+// CHECK: 6 :
+// CHECK: 7 >: int inc
+// CHECK: 8 : return
+// CHECK: 9 : }
diff --git a/test/tools/llvm-symbolizer/sym.test b/test/tools/llvm-symbolizer/sym.test
index 01a6692222e7..27f06901ff6c 100644
--- a/test/tools/llvm-symbolizer/sym.test
+++ b/test/tools/llvm-symbolizer/sym.test
@@ -20,11 +20,15 @@
RUN: llvm-symbolizer -print-address -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck %s
RUN: llvm-symbolizer -inlining -print-address -pretty-print -obj=%p/Inputs/addr.exe < %p/Inputs/addr.inp | FileCheck --check-prefix="PRETTY" %s
+#CHECK: some text
#CHECK: 0x40054d
#CHECK: main
#CHECK: {{[/\]+}}tmp{{[/\]+}}x.c:14:0
+#CHECK: some text2
#
+#PRETTY: some text
#PRETTY: {{[0x]+}}40054d: inctwo at {{[/\]+}}tmp{{[/\]+}}x.c:3:3
#PRETTY: (inlined by) inc at {{[/\]+}}tmp{{[/\]+}}x.c:7:0
#PRETTY (inlined by) main at {{[/\]+}}tmp{{[/\]+}}x.c:14:0
+#PRETTY: some text2
diff --git a/tools/lli/CMakeLists.txt b/tools/lli/CMakeLists.txt
index 4af05969af16..e317db6b713c 100644
--- a/tools/lli/CMakeLists.txt
+++ b/tools/lli/CMakeLists.txt
@@ -38,8 +38,5 @@ endif( LLVM_USE_INTEL_JITEVENTS )
add_llvm_tool(lli
lli.cpp
OrcLazyJIT.cpp
- RemoteMemoryManager.cpp
- RemoteTarget.cpp
- RemoteTargetExternal.cpp
)
export_executable_symbols(lli)
diff --git a/tools/lli/ChildTarget/CMakeLists.txt b/tools/lli/ChildTarget/CMakeLists.txt
index 9f88b2cde3f7..0f851d593444 100644
--- a/tools/lli/ChildTarget/CMakeLists.txt
+++ b/tools/lli/ChildTarget/CMakeLists.txt
@@ -1,8 +1,11 @@
-set(LLVM_LINK_COMPONENTS support)
+set(LLVM_LINK_COMPONENTS
+ OrcJIT
+ RuntimeDyld
+ Support
+ )
add_llvm_executable(lli-child-target
ChildTarget.cpp
- ../RemoteTarget.cpp
)
set_target_properties(lli-child-target PROPERTIES FOLDER "Misc")
diff --git a/tools/lli/ChildTarget/ChildTarget.cpp b/tools/lli/ChildTarget/ChildTarget.cpp
index 6c537d47df3d..0b75e20f83e9 100644
--- a/tools/lli/ChildTarget/ChildTarget.cpp
+++ b/tools/lli/ChildTarget/ChildTarget.cpp
@@ -1,244 +1,69 @@
-#include "llvm/Config/config.h"
-#include "../RPCChannel.h"
-#include "../RemoteTarget.h"
-#include "../RemoteTargetMessage.h"
-#include "llvm/Support/Memory.h"
-#include <assert.h>
-#include <map>
-#include <stdint.h>
-#include <string>
-#include <vector>
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/Process.h"
+#include <sstream>
-using namespace llvm;
+#include "../RemoteJITUtils.h"
-class LLIChildTarget {
-public:
- void initialize();
- LLIMessageType waitForIncomingMessage();
- void handleMessage(LLIMessageType messageType);
- RemoteTarget *RT;
- RPCChannel RPC;
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::sys;
-private:
- // Incoming message handlers
- void handleAllocateSpace();
- void handleLoadSection(bool IsCode);
- void handleExecute();
+#ifdef __x86_64__
+typedef OrcX86_64 HostOrcArch;
+#else
+typedef OrcGenericArchitecture HostOrcArch;
+#endif
- // Outgoing message handlers
- void sendChildActive();
- void sendAllocationResult(uint64_t Addr);
- void sendLoadStatus(uint32_t Status);
- void sendExecutionComplete(int Result);
+int main(int argc, char *argv[]) {
- // OS-specific functions
- void initializeConnection();
- int WriteBytes(const void *Data, size_t Size) {
- return RPC.WriteBytes(Data, Size) ? Size : -1;
+ if (argc != 3) {
+ errs() << "Usage: " << argv[0] << " <input fd> <output fd>\n";
+ return 1;
}
- int ReadBytes(void *Data, size_t Size) {
- return RPC.ReadBytes(Data, Size) ? Size : -1;
- }
-
- // Communication handles (OS-specific)
- void *ConnectionData;
-};
-
-int main() {
- LLIChildTarget ThisChild;
- ThisChild.RT = new RemoteTarget();
- ThisChild.initialize();
- LLIMessageType MsgType;
- do {
- MsgType = ThisChild.waitForIncomingMessage();
- ThisChild.handleMessage(MsgType);
- } while (MsgType != LLI_Terminate &&
- MsgType != LLI_Error);
- delete ThisChild.RT;
- return 0;
-}
-// Public methods
-void LLIChildTarget::initialize() {
- RPC.createClient();
- sendChildActive();
-}
+ int InFD;
+ int OutFD;
+ {
+ std::istringstream InFDStream(argv[1]), OutFDStream(argv[2]);
+ InFDStream >> InFD;
+ OutFDStream >> OutFD;
+ }
-LLIMessageType LLIChildTarget::waitForIncomingMessage() {
- int32_t MsgType = -1;
- if (ReadBytes(&MsgType, 4) > 0)
- return (LLIMessageType)MsgType;
- return LLI_Error;
-}
+ if (sys::DynamicLibrary::LoadLibraryPermanently(nullptr)) {
+ errs() << "Error loading program symbols.\n";
+ return 1;
+ }
-void LLIChildTarget::handleMessage(LLIMessageType messageType) {
- switch (messageType) {
- case LLI_AllocateSpace:
- handleAllocateSpace();
- break;
- case LLI_LoadCodeSection:
- handleLoadSection(true);
- break;
- case LLI_LoadDataSection:
- handleLoadSection(false);
- break;
- case LLI_Execute:
- handleExecute();
- break;
- case LLI_Terminate:
- RT->stop();
- break;
+ auto SymbolLookup = [](const std::string &Name) {
+ return RTDyldMemoryManager::getSymbolAddressInProcess(Name);
+ };
+
+ FDRPCChannel Channel(InFD, OutFD);
+ typedef remote::OrcRemoteTargetServer<FDRPCChannel, HostOrcArch> JITServer;
+ JITServer Server(Channel, SymbolLookup);
+
+ while (1) {
+ JITServer::JITProcId Id = JITServer::InvalidId;
+ if (auto EC = Server.getNextProcId(Id)) {
+ errs() << "Error: " << EC.message() << "\n";
+ return 1;
+ }
+ switch (Id) {
+ case JITServer::TerminateSessionId:
+ return 0;
default:
- // FIXME: Handle error!
- break;
+ if (auto EC = Server.handleKnownProcedure(Id)) {
+ errs() << "Error: " << EC.message() << "\n";
+ return 1;
+ }
+ }
}
-}
-
-// Incoming message handlers
-void LLIChildTarget::handleAllocateSpace() {
- // Read and verify the message data size.
- uint32_t DataSize = 0;
- int rc = ReadBytes(&DataSize, 4);
- (void)rc;
- assert(rc == 4);
- assert(DataSize == 8);
-
- // Read the message arguments.
- uint32_t Alignment = 0;
- uint32_t AllocSize = 0;
- rc = ReadBytes(&Alignment, 4);
- assert(rc == 4);
- rc = ReadBytes(&AllocSize, 4);
- assert(rc == 4);
-
- // Allocate the memory.
- uint64_t Addr;
- RT->allocateSpace(AllocSize, Alignment, Addr);
-
- // Send AllocationResult message.
- sendAllocationResult(Addr);
-}
-
-void LLIChildTarget::handleLoadSection(bool IsCode) {
- // Read the message data size.
- uint32_t DataSize = 0;
- int rc = ReadBytes(&DataSize, 4);
- (void)rc;
- assert(rc == 4);
-
- // Read the target load address.
- uint64_t Addr = 0;
- rc = ReadBytes(&Addr, 8);
- assert(rc == 8);
- size_t BufferSize = DataSize - 8;
-
- if (!RT->isAllocatedMemory(Addr, BufferSize))
- return sendLoadStatus(LLI_Status_NotAllocated);
-
- // Read section data into previously allocated buffer
- rc = ReadBytes((void*)Addr, BufferSize);
- if (rc != (int)(BufferSize))
- return sendLoadStatus(LLI_Status_IncompleteMsg);
-
- // If IsCode, mark memory executable
- if (IsCode)
- sys::Memory::InvalidateInstructionCache((void *)Addr, BufferSize);
-
- // Send MarkLoadComplete message.
- sendLoadStatus(LLI_Status_Success);
-}
-
-void LLIChildTarget::handleExecute() {
- // Read the message data size.
- uint32_t DataSize = 0;
- int rc = ReadBytes(&DataSize, 4);
- (void)rc;
- assert(rc == 4);
- assert(DataSize == 8);
-
- // Read the target address.
- uint64_t Addr = 0;
- rc = ReadBytes(&Addr, 8);
- assert(rc == 8);
-
- // Call function
- int32_t Result = -1;
- RT->executeCode(Addr, Result);
-
- // Send ExecutionResult message.
- sendExecutionComplete(Result);
-}
-
-// Outgoing message handlers
-void LLIChildTarget::sendChildActive() {
- // Write the message type.
- uint32_t MsgType = (uint32_t)LLI_ChildActive;
- int rc = WriteBytes(&MsgType, 4);
- (void)rc;
- assert(rc == 4);
-
- // Write the data size.
- uint32_t DataSize = 0;
- rc = WriteBytes(&DataSize, 4);
- assert(rc == 4);
-}
-
-void LLIChildTarget::sendAllocationResult(uint64_t Addr) {
- // Write the message type.
- uint32_t MsgType = (uint32_t)LLI_AllocationResult;
- int rc = WriteBytes(&MsgType, 4);
- (void)rc;
- assert(rc == 4);
-
- // Write the data size.
- uint32_t DataSize = 8;
- rc = WriteBytes(&DataSize, 4);
- assert(rc == 4);
-
- // Write the allocated address.
- rc = WriteBytes(&Addr, 8);
- assert(rc == 8);
-}
-
-void LLIChildTarget::sendLoadStatus(uint32_t Status) {
- // Write the message type.
- uint32_t MsgType = (uint32_t)LLI_LoadResult;
- int rc = WriteBytes(&MsgType, 4);
- (void)rc;
- assert(rc == 4);
-
- // Write the data size.
- uint32_t DataSize = 4;
- rc = WriteBytes(&DataSize, 4);
- assert(rc == 4);
-
- // Write the result.
- rc = WriteBytes(&Status, 4);
- assert(rc == 4);
-}
-
-void LLIChildTarget::sendExecutionComplete(int Result) {
- // Write the message type.
- uint32_t MsgType = (uint32_t)LLI_ExecutionResult;
- int rc = WriteBytes(&MsgType, 4);
- (void)rc;
- assert(rc == 4);
+ close(InFD);
+ close(OutFD);
- // Write the data size.
- uint32_t DataSize = 4;
- rc = WriteBytes(&DataSize, 4);
- assert(rc == 4);
-
- // Write the result.
- rc = WriteBytes(&Result, 4);
- assert(rc == 4);
+ return 0;
}
-
-#ifdef LLVM_ON_UNIX
-#include "../Unix/RPCChannel.inc"
-#endif
-
-#ifdef LLVM_ON_WIN32
-#include "../Windows/RPCChannel.inc"
-#endif
diff --git a/tools/lli/ChildTarget/Makefile b/tools/lli/ChildTarget/Makefile
index 6f4ddefcd59d..0767ba1d724c 100644
--- a/tools/lli/ChildTarget/Makefile
+++ b/tools/lli/ChildTarget/Makefile
@@ -12,8 +12,8 @@ TOOLNAME := lli-child-target
include $(LEVEL)/Makefile.config
-LINK_COMPONENTS := support
+LINK_COMPONENTS := support OrcJIT
-SOURCES := ChildTarget.cpp ../RemoteTarget.cpp
+SOURCES := ChildTarget.cpp
include $(LLVM_SRC_ROOT)/Makefile.rules
diff --git a/tools/lli/OrcLazyJIT.cpp b/tools/lli/OrcLazyJIT.cpp
index 4235145ee7a5..7f483f742b80 100644
--- a/tools/lli/OrcLazyJIT.cpp
+++ b/tools/lli/OrcLazyJIT.cpp
@@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "OrcLazyJIT.h"
-#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include <cstdio>
diff --git a/tools/lli/OrcLazyJIT.h b/tools/lli/OrcLazyJIT.h
index bb4da33ea9b6..2a5b31d1bfb8 100644
--- a/tools/lli/OrcLazyJIT.h
+++ b/tools/lli/OrcLazyJIT.h
@@ -105,7 +105,9 @@ public:
// Add the module to the JIT.
std::vector<std::unique_ptr<Module>> S;
S.push_back(std::move(M));
- auto H = CODLayer.addModuleSet(std::move(S), nullptr, std::move(Resolver));
+ auto H = CODLayer.addModuleSet(std::move(S),
+ llvm::make_unique<SectionMemoryManager>(),
+ std::move(Resolver));
// Run the static constructors, and save the static destructor runner for
// execution when the JIT is torn down.
diff --git a/tools/lli/RPCChannel.h b/tools/lli/RPCChannel.h
deleted file mode 100644
index ebd3c65640bc..000000000000
--- a/tools/lli/RPCChannel.h
+++ /dev/null
@@ -1,49 +0,0 @@
-//===---------- RPCChannel.h - LLVM out-of-process JIT execution ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of the RemoteTargetExternal class which executes JITed code in a
-// separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_RPCCHANNEL_H
-#define LLVM_TOOLS_LLI_RPCCHANNEL_H
-
-#include <stdlib.h>
-#include <string>
-
-namespace llvm {
-
-class RPCChannel {
-public:
- std::string ChildName;
-
- RPCChannel() {}
- ~RPCChannel();
-
- /// Start the remote process.
- ///
- /// @returns True on success. On failure, ErrorMsg is updated with
- /// descriptive text of the encountered error.
- bool createServer();
-
- bool createClient();
-
- // This will get filled in as a point to an OS-specific structure.
- void *ConnectionData;
-
- bool WriteBytes(const void *Data, size_t Size);
- bool ReadBytes(void *Data, size_t Size);
-
- void Wait();
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/tools/lli/RemoteJITUtils.h b/tools/lli/RemoteJITUtils.h
new file mode 100644
index 000000000000..a3f3fa0fd3b4
--- /dev/null
+++ b/tools/lli/RemoteJITUtils.h
@@ -0,0 +1,131 @@
+//===-- RemoteJITUtils.h - Utilities for remote-JITing with LLI -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for remote-JITing with LLI.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLI_REMOTEJITUTILS_H
+#define LLVM_TOOLS_LLI_REMOTEJITUTILS_H
+
+#include "llvm/ExecutionEngine/Orc/RPCChannel.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+
+#if !defined(_MSC_VER) && !defined(__MINGW32__)
+#include <unistd.h>
+#else
+#include <io.h>
+#endif
+
+/// RPC channel that reads from and writes from file descriptors.
+class FDRPCChannel : public llvm::orc::remote::RPCChannel {
+public:
+ FDRPCChannel(int InFD, int OutFD) : InFD(InFD), OutFD(OutFD) {}
+
+ std::error_code readBytes(char *Dst, unsigned Size) override {
+ assert(Dst && "Attempt to read into null.");
+ ssize_t ReadResult = ::read(InFD, Dst, Size);
+ if (ReadResult != (ssize_t)Size)
+ return std::error_code(errno, std::generic_category());
+ return std::error_code();
+ }
+
+ std::error_code appendBytes(const char *Src, unsigned Size) override {
+ assert(Src && "Attempt to append from null.");
+ ssize_t WriteResult = ::write(OutFD, Src, Size);
+ if (WriteResult != (ssize_t)Size)
+ std::error_code(errno, std::generic_category());
+ return std::error_code();
+ }
+
+ std::error_code send() override { return std::error_code(); }
+
+private:
+ int InFD, OutFD;
+};
+
+// launch the remote process (see lli.cpp) and return a channel to it.
+std::unique_ptr<FDRPCChannel> launchRemote();
+
+namespace llvm {
+
+// ForwardingMM - Adapter to connect MCJIT to Orc's Remote memory manager.
+class ForwardingMemoryManager : public llvm::RTDyldMemoryManager {
+public:
+ void setMemMgr(std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr) {
+ this->MemMgr = std::move(MemMgr);
+ }
+
+ void setResolver(std::unique_ptr<RuntimeDyld::SymbolResolver> Resolver) {
+ this->Resolver = std::move(Resolver);
+ }
+
+ uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID,
+ StringRef SectionName) override {
+ return MemMgr->allocateCodeSection(Size, Alignment, SectionID, SectionName);
+ }
+
+ uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+ unsigned SectionID, StringRef SectionName,
+ bool IsReadOnly) override {
+ return MemMgr->allocateDataSection(Size, Alignment, SectionID, SectionName,
+ IsReadOnly);
+ }
+
+ void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+ uintptr_t RODataSize, uint32_t RODataAlign,
+ uintptr_t RWDataSize,
+ uint32_t RWDataAlign) override {
+ MemMgr->reserveAllocationSpace(CodeSize, CodeAlign, RODataSize, RODataAlign,
+ RWDataSize, RWDataAlign);
+ }
+
+ bool needsToReserveAllocationSpace() override {
+ return MemMgr->needsToReserveAllocationSpace();
+ }
+
+ void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+ size_t Size) override {
+ MemMgr->registerEHFrames(Addr, LoadAddr, Size);
+ }
+
+ void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
+ size_t Size) override {
+ MemMgr->deregisterEHFrames(Addr, LoadAddr, Size);
+ }
+
+ bool finalizeMemory(std::string *ErrMsg = nullptr) override {
+ return MemMgr->finalizeMemory(ErrMsg);
+ }
+
+ void notifyObjectLoaded(RuntimeDyld &RTDyld,
+ const object::ObjectFile &Obj) override {
+ MemMgr->notifyObjectLoaded(RTDyld, Obj);
+ }
+
+ // Don't hide the sibling notifyObjectLoaded from RTDyldMemoryManager.
+ using RTDyldMemoryManager::notifyObjectLoaded;
+
+ RuntimeDyld::SymbolInfo findSymbol(const std::string &Name) override {
+ return Resolver->findSymbol(Name);
+ }
+
+ RuntimeDyld::SymbolInfo
+ findSymbolInLogicalDylib(const std::string &Name) override {
+ return Resolver->findSymbolInLogicalDylib(Name);
+ }
+
+private:
+ std::unique_ptr<RuntimeDyld::MemoryManager> MemMgr;
+ std::unique_ptr<RuntimeDyld::SymbolResolver> Resolver;
+};
+}
+
+#endif
diff --git a/tools/lli/RemoteMemoryManager.cpp b/tools/lli/RemoteMemoryManager.cpp
deleted file mode 100644
index 0a16210d2ea5..000000000000
--- a/tools/lli/RemoteMemoryManager.cpp
+++ /dev/null
@@ -1,174 +0,0 @@
-//===---- RemoteMemoryManager.cpp - Recording memory manager --------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This memory manager allocates local storage and keeps a record of each
-// allocation. Iterators are provided for all data and code allocations.
-//
-//===----------------------------------------------------------------------===//
-
-#include "RemoteMemoryManager.h"
-#include "llvm/ExecutionEngine/ExecutionEngine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "lli"
-
-RemoteMemoryManager::~RemoteMemoryManager() {
- for (SmallVector<Allocation, 2>::iterator
- I = AllocatedSections.begin(), E = AllocatedSections.end();
- I != E; ++I)
- sys::Memory::releaseMappedMemory(I->MB);
-}
-
-uint8_t *RemoteMemoryManager::
-allocateCodeSection(uintptr_t Size, unsigned Alignment, unsigned SectionID,
- StringRef SectionName) {
- // The recording memory manager is just a local copy of the remote target.
- // The alignment requirement is just stored here for later use. Regular
- // heap storage is sufficient here, but we're using mapped memory to work
- // around a bug in MCJIT.
- sys::MemoryBlock Block = allocateSection(Size);
- // AllocatedSections will own this memory.
- AllocatedSections.push_back( Allocation(Block, Alignment, true) );
- // UnmappedSections has the same information but does not own the memory.
- UnmappedSections.push_back( Allocation(Block, Alignment, true) );
- return (uint8_t*)Block.base();
-}
-
-uint8_t *RemoteMemoryManager::
-allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID, StringRef SectionName,
- bool IsReadOnly) {
- // The recording memory manager is just a local copy of the remote target.
- // The alignment requirement is just stored here for later use. Regular
- // heap storage is sufficient here, but we're using mapped memory to work
- // around a bug in MCJIT.
- sys::MemoryBlock Block = allocateSection(Size);
- // AllocatedSections will own this memory.
- AllocatedSections.push_back( Allocation(Block, Alignment, false) );
- // UnmappedSections has the same information but does not own the memory.
- UnmappedSections.push_back( Allocation(Block, Alignment, false) );
- return (uint8_t*)Block.base();
-}
-
-sys::MemoryBlock RemoteMemoryManager::allocateSection(uintptr_t Size) {
- std::error_code ec;
- sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(Size,
- &Near,
- sys::Memory::MF_READ |
- sys::Memory::MF_WRITE,
- ec);
- assert(!ec && MB.base());
-
- // FIXME: This is part of a work around to keep sections near one another
- // when MCJIT performs relocations after code emission but before
- // the generated code is moved to the remote target.
- // Save this address as the basis for our next request
- Near = MB;
- return MB;
-}
-
-void RemoteMemoryManager::notifyObjectLoaded(ExecutionEngine *EE,
- const object::ObjectFile &Obj) {
- // The client should have called setRemoteTarget() before triggering any
- // code generation.
- assert(Target);
- if (!Target)
- return;
-
- // FIXME: Make this function thread safe.
-
- // Lay out our sections in order, with all the code sections first, then
- // all the data sections.
- uint64_t CurOffset = 0;
- unsigned MaxAlign = Target->getPageAlignment();
- SmallVector<std::pair<Allocation, uint64_t>, 16> Offsets;
- unsigned NumSections = UnmappedSections.size();
- // We're going to go through the list twice to separate code and data, but
- // it's a very small list, so that's OK.
- for (size_t i = 0, e = NumSections; i != e; ++i) {
- Allocation &Section = UnmappedSections[i];
- if (Section.IsCode) {
- unsigned Size = Section.MB.size();
- unsigned Align = Section.Alignment;
- DEBUG(dbgs() << "code region: size " << Size
- << ", alignment " << Align << "\n");
- // Align the current offset up to whatever is needed for the next
- // section.
- CurOffset = (CurOffset + Align - 1) / Align * Align;
- // Save off the address of the new section and allocate its space.
- Offsets.push_back(std::pair<Allocation,uint64_t>(Section, CurOffset));
- CurOffset += Size;
- }
- }
- // Adjust to keep code and data aligned on separate pages.
- CurOffset = (CurOffset + MaxAlign - 1) / MaxAlign * MaxAlign;
- for (size_t i = 0, e = NumSections; i != e; ++i) {
- Allocation &Section = UnmappedSections[i];
- if (!Section.IsCode) {
- unsigned Size = Section.MB.size();
- unsigned Align = Section.Alignment;
- DEBUG(dbgs() << "data region: size " << Size
- << ", alignment " << Align << "\n");
- // Align the current offset up to whatever is needed for the next
- // section.
- CurOffset = (CurOffset + Align - 1) / Align * Align;
- // Save off the address of the new section and allocate its space.
- Offsets.push_back(std::pair<Allocation,uint64_t>(Section, CurOffset));
- CurOffset += Size;
- }
- }
-
- // Allocate space in the remote target.
- uint64_t RemoteAddr;
- if (!Target->allocateSpace(CurOffset, MaxAlign, RemoteAddr))
- report_fatal_error(Target->getErrorMsg());
-
- // Map the section addresses so relocations will get updated in the local
- // copies of the sections.
- for (unsigned i = 0, e = Offsets.size(); i != e; ++i) {
- uint64_t Addr = RemoteAddr + Offsets[i].second;
- EE->mapSectionAddress(const_cast<void*>(Offsets[i].first.MB.base()), Addr);
-
- DEBUG(dbgs() << " Mapping local: " << Offsets[i].first.MB.base()
- << " to remote: 0x" << format("%llx", Addr) << "\n");
-
- MappedSections[Addr] = Offsets[i].first;
- }
-
- UnmappedSections.clear();
-}
-
-bool RemoteMemoryManager::finalizeMemory(std::string *ErrMsg) {
- // FIXME: Make this function thread safe.
- for (DenseMap<uint64_t, Allocation>::iterator
- I = MappedSections.begin(), E = MappedSections.end();
- I != E; ++I) {
- uint64_t RemoteAddr = I->first;
- const Allocation &Section = I->second;
- if (Section.IsCode) {
- if (!Target->loadCode(RemoteAddr, Section.MB.base(), Section.MB.size()))
- report_fatal_error(Target->getErrorMsg());
- DEBUG(dbgs() << " loading code: " << Section.MB.base()
- << " to remote: 0x" << format("%llx", RemoteAddr) << "\n");
- } else {
- if (!Target->loadData(RemoteAddr, Section.MB.base(), Section.MB.size()))
- report_fatal_error(Target->getErrorMsg());
- DEBUG(dbgs() << " loading data: " << Section.MB.base()
- << " to remote: 0x" << format("%llx", RemoteAddr) << "\n");
- }
- }
-
- MappedSections.clear();
-
- return false;
-}
diff --git a/tools/lli/RemoteMemoryManager.h b/tools/lli/RemoteMemoryManager.h
deleted file mode 100644
index 5733fa53f3e4..000000000000
--- a/tools/lli/RemoteMemoryManager.h
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- RemoteMemoryManager.h - LLI MCJIT recording memory manager ------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This memory manager allocates local storage and keeps a record of each
-// allocation. Iterators are provided for all data and code allocations.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_REMOTEMEMORYMANAGER_H
-#define LLVM_TOOLS_LLI_REMOTEMEMORYMANAGER_H
-
-#include "RemoteTarget.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
-#include <utility>
-
-namespace llvm {
-
-class RemoteMemoryManager : public RTDyldMemoryManager {
-public:
- // Notice that this structure takes ownership of the memory allocated.
- struct Allocation {
- Allocation() {}
- Allocation(sys::MemoryBlock mb, unsigned a, bool code)
- : MB(mb), Alignment(a), IsCode(code) {}
-
- sys::MemoryBlock MB;
- unsigned Alignment;
- bool IsCode;
- };
-
-private:
- // This vector contains Allocation objects for all sections which we have
- // allocated. This vector effectively owns the memory associated with the
- // allocations.
- SmallVector<Allocation, 2> AllocatedSections;
-
- // This vector contains pointers to Allocation objects for any sections we
- // have allocated locally but have not yet remapped for the remote target.
- // When we receive notification of a completed module load, we will map
- // these sections into the remote target.
- SmallVector<Allocation, 2> UnmappedSections;
-
- // This map tracks the sections we have remapped for the remote target
- // but have not yet copied to the target.
- DenseMap<uint64_t, Allocation> MappedSections;
-
- // FIXME: This is part of a work around to keep sections near one another
- // when MCJIT performs relocations after code emission but before
- // the generated code is moved to the remote target.
- sys::MemoryBlock Near;
- sys::MemoryBlock allocateSection(uintptr_t Size);
-
- RemoteTarget *Target;
-
-public:
- RemoteMemoryManager() : Target(nullptr) {}
- ~RemoteMemoryManager() override;
-
- uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID,
- StringRef SectionName) override;
-
- uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
- unsigned SectionID, StringRef SectionName,
- bool IsReadOnly) override;
-
- // For now, remote symbol resolution is not support in lli. The MCJIT
- // interface does support this, but clients must provide their own
- // mechanism for finding remote symbol addresses. MCJIT will resolve
- // symbols from Modules it contains.
- uint64_t getSymbolAddress(const std::string &Name) override { return 0; }
-
- void notifyObjectLoaded(ExecutionEngine *EE,
- const object::ObjectFile &Obj) override;
-
- bool finalizeMemory(std::string *ErrMsg) override;
-
- // For now, remote EH frame registration isn't supported. Remote symbol
- // resolution is a prerequisite to supporting remote EH frame registration.
- void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr,
- size_t Size) override {}
- void deregisterEHFrames(uint8_t *Addr, uint64_t LoadAddr,
- size_t Size) override {}
-
- // This is a non-interface function used by lli
- void setRemoteTarget(RemoteTarget *T) { Target = T; }
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/tools/lli/RemoteTarget.cpp b/tools/lli/RemoteTarget.cpp
deleted file mode 100644
index 95e1511eaaf5..000000000000
--- a/tools/lli/RemoteTarget.cpp
+++ /dev/null
@@ -1,71 +0,0 @@
-//===- RemoteTarget.cpp - LLVM Remote process JIT execution -----*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation of the RemoteTarget class which executes JITed code in a
-// separate address range from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#include "RemoteTarget.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Memory.h"
-#include <stdlib.h>
-#include <string>
-
-using namespace llvm;
-
-////////////////////////////////////////////////////////////////////////////////
-// Simulated remote execution
-//
-// This implementation will simply move generated code and data to a new memory
-// location in the current executable and let it run from there.
-////////////////////////////////////////////////////////////////////////////////
-
-bool RemoteTarget::allocateSpace(size_t Size, unsigned Alignment,
- uint64_t &Address) {
- sys::MemoryBlock *Prev = Allocations.size() ? &Allocations.back() : nullptr;
- sys::MemoryBlock Mem = sys::Memory::AllocateRWX(Size, Prev, &ErrorMsg);
- if (Mem.base() == nullptr)
- return false;
- if ((uintptr_t)Mem.base() % Alignment) {
- ErrorMsg = "unable to allocate sufficiently aligned memory";
- return false;
- }
- Address = reinterpret_cast<uint64_t>(Mem.base());
- Allocations.push_back(Mem);
- return true;
-}
-
-bool RemoteTarget::loadData(uint64_t Address, const void *Data, size_t Size) {
- memcpy ((void*)Address, Data, Size);
- return true;
-}
-
-bool RemoteTarget::loadCode(uint64_t Address, const void *Data, size_t Size) {
- memcpy ((void*)Address, Data, Size);
- sys::MemoryBlock Mem((void*)Address, Size);
- sys::Memory::setExecutable(Mem, &ErrorMsg);
- return true;
-}
-
-bool RemoteTarget::executeCode(uint64_t Address, int &RetVal) {
- int (*fn)() = (int(*)())Address;
- RetVal = fn();
- return true;
-}
-
-bool RemoteTarget::create() {
- return true;
-}
-
-void RemoteTarget::stop() {
- for (unsigned i = 0, e = Allocations.size(); i != e; ++i)
- sys::Memory::ReleaseRWX(Allocations[i]);
-}
diff --git a/tools/lli/RemoteTarget.h b/tools/lli/RemoteTarget.h
deleted file mode 100644
index ee758a2747a4..000000000000
--- a/tools/lli/RemoteTarget.h
+++ /dev/null
@@ -1,122 +0,0 @@
-//===- RemoteTarget.h - LLVM Remote process JIT execution ----------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of the RemoteTarget class which executes JITed code in a
-// separate address range from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_REMOTETARGET_H
-#define LLVM_TOOLS_LLI_REMOTETARGET_H
-
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Memory.h"
-#include <stdlib.h>
-#include <string>
-
-namespace llvm {
-
-class RemoteTarget {
- bool IsRunning;
-
- typedef SmallVector<sys::MemoryBlock, 16> AllocMapType;
- AllocMapType Allocations;
-
-protected:
- std::string ErrorMsg;
-
-public:
- StringRef getErrorMsg() const { return ErrorMsg; }
-
- /// Allocate space in the remote target address space.
- ///
- /// @param Size Amount of space, in bytes, to allocate.
- /// @param Alignment Required minimum alignment for allocated space.
- /// @param[out] Address Remote address of the allocated memory.
- ///
- /// @returns True on success. On failure, ErrorMsg is updated with
- /// descriptive text of the encountered error.
- virtual bool allocateSpace(size_t Size,
- unsigned Alignment,
- uint64_t &Address);
-
- bool isAllocatedMemory(uint64_t Address, uint32_t Size) {
- uint64_t AddressEnd = Address + Size;
- for (AllocMapType::const_iterator I = Allocations.begin(),
- E = Allocations.end();
- I != E; ++I) {
- if (Address >= (uint64_t)I->base() &&
- AddressEnd <= (uint64_t)I->base() + I->size())
- return true;
- }
- return false;
- }
-
- /// Load data into the target address space.
- ///
- /// @param Address Destination address in the target process.
- /// @param Data Source address in the host process.
- /// @param Size Number of bytes to copy.
- ///
- /// @returns True on success. On failure, ErrorMsg is updated with
- /// descriptive text of the encountered error.
- virtual bool loadData(uint64_t Address,
- const void *Data,
- size_t Size);
-
- /// Load code into the target address space and prepare it for execution.
- ///
- /// @param Address Destination address in the target process.
- /// @param Data Source address in the host process.
- /// @param Size Number of bytes to copy.
- ///
- /// @returns True on success. On failure, ErrorMsg is updated with
- /// descriptive text of the encountered error.
- virtual bool loadCode(uint64_t Address,
- const void *Data,
- size_t Size);
-
- /// Execute code in the target process. The called function is required
- /// to be of signature int "(*)(void)".
- ///
- /// @param Address Address of the loaded function in the target
- /// process.
- /// @param[out] RetVal The integer return value of the called function.
- ///
- /// @returns True on success. On failure, ErrorMsg is updated with
- /// descriptive text of the encountered error.
- virtual bool executeCode(uint64_t Address,
- int &RetVal);
-
- /// Minimum alignment for memory permissions. Used to separate code and
- /// data regions to make sure data doesn't get marked as code or vice
- /// versa.
- ///
- /// @returns Page alignment return value. Default of 4k.
- virtual unsigned getPageAlignment() { return 4096; }
-
- /// Start the remote process.
- virtual bool create();
-
- /// Terminate the remote process.
- virtual void stop();
-
- RemoteTarget() : IsRunning(false), ErrorMsg("") {}
- virtual ~RemoteTarget() { if (IsRunning) stop(); }
-private:
- // Main processing function for the remote target process. Command messages
- // are received on file descriptor CmdFD and responses come back on OutFD.
- static void doRemoteTargeting(int CmdFD, int OutFD);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/tools/lli/RemoteTargetExternal.cpp b/tools/lli/RemoteTargetExternal.cpp
deleted file mode 100644
index fe46248822c5..000000000000
--- a/tools/lli/RemoteTargetExternal.cpp
+++ /dev/null
@@ -1,327 +0,0 @@
-//===---- RemoteTargetExternal.cpp - LLVM out-of-process JIT execution ----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation of the RemoteTargetExternal class which executes JITed code
-// in a separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Config/config.h"
-#include "RemoteTarget.h"
-#include "RemoteTargetExternal.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/Memory.h"
-#include "llvm/Support/Program.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
-
-using namespace llvm;
-
-#define DEBUG_TYPE "lli"
-
-bool RemoteTargetExternal::allocateSpace(size_t Size, unsigned Alignment,
- uint64_t &Address) {
- DEBUG(dbgs() << "Message [allocate space] size: " << Size <<
- ", align: " << Alignment << "\n");
- if (!SendAllocateSpace(Alignment, Size)) {
- ErrorMsg += ", (RemoteTargetExternal::allocateSpace)";
- return false;
- }
- if (!Receive(LLI_AllocationResult, Address)) {
- ErrorMsg += ", (RemoteTargetExternal::allocateSpace)";
- return false;
- }
- if (Address == 0) {
- ErrorMsg += "failed allocation, (RemoteTargetExternal::allocateSpace)";
- return false;
- }
- DEBUG(dbgs() << "Message [allocate space] addr: 0x" <<
- format("%llx", Address) << "\n");
- return true;
-}
-
-bool RemoteTargetExternal::loadData(uint64_t Address, const void *Data, size_t Size) {
- DEBUG(dbgs() << "Message [load data] addr: 0x" << format("%llx", Address) <<
- ", size: " << Size << "\n");
- if (!SendLoadSection(Address, Data, (uint32_t)Size, false)) {
- ErrorMsg += ", (RemoteTargetExternal::loadData)";
- return false;
- }
- int Status = LLI_Status_Success;
- if (!Receive(LLI_LoadResult, Status)) {
- ErrorMsg += ", (RemoteTargetExternal::loadData)";
- return false;
- }
- if (Status == LLI_Status_IncompleteMsg) {
- ErrorMsg += "incomplete load data, (RemoteTargetExternal::loadData)";
- return false;
- }
- if (Status == LLI_Status_NotAllocated) {
- ErrorMsg += "data memory not allocated, (RemoteTargetExternal::loadData)";
- return false;
- }
- DEBUG(dbgs() << "Message [load data] complete\n");
- return true;
-}
-
-bool RemoteTargetExternal::loadCode(uint64_t Address, const void *Data, size_t Size) {
- DEBUG(dbgs() << "Message [load code] addr: 0x" << format("%llx", Address) <<
- ", size: " << Size << "\n");
- if (!SendLoadSection(Address, Data, (uint32_t)Size, true)) {
- ErrorMsg += ", (RemoteTargetExternal::loadCode)";
- return false;
- }
- int Status = LLI_Status_Success;
- if (!Receive(LLI_LoadResult, Status)) {
- ErrorMsg += ", (RemoteTargetExternal::loadCode)";
- return false;
- }
- if (Status == LLI_Status_IncompleteMsg) {
- ErrorMsg += "incomplete load data, (RemoteTargetExternal::loadData)";
- return false;
- }
- if (Status == LLI_Status_NotAllocated) {
- ErrorMsg += "data memory not allocated, (RemoteTargetExternal::loadData)";
- return false;
- }
- DEBUG(dbgs() << "Message [load code] complete\n");
- return true;
-}
-
-bool RemoteTargetExternal::executeCode(uint64_t Address, int32_t &RetVal) {
- DEBUG(dbgs() << "Message [exectue code] addr: " << Address << "\n");
- if (!SendExecute(Address)) {
- ErrorMsg += ", (RemoteTargetExternal::executeCode)";
- return false;
- }
- if (!Receive(LLI_ExecutionResult, RetVal)) {
- ErrorMsg += ", (RemoteTargetExternal::executeCode)";
- return false;
- }
- DEBUG(dbgs() << "Message [exectue code] return: " << RetVal << "\n");
- return true;
-}
-
-void RemoteTargetExternal::stop() {
- SendTerminate();
- RPC.Wait();
-}
-
-bool RemoteTargetExternal::SendAllocateSpace(uint32_t Alignment, uint32_t Size) {
- if (!SendHeader(LLI_AllocateSpace)) {
- ErrorMsg += ", (RemoteTargetExternal::SendAllocateSpace)";
- return false;
- }
-
- AppendWrite((const void *)&Alignment, 4);
- AppendWrite((const void *)&Size, 4);
-
- if (!SendPayload()) {
- ErrorMsg += ", (RemoteTargetExternal::SendAllocateSpace)";
- return false;
- }
- return true;
-}
-
-bool RemoteTargetExternal::SendLoadSection(uint64_t Addr,
- const void *Data,
- uint32_t Size,
- bool IsCode) {
- LLIMessageType MsgType = IsCode ? LLI_LoadCodeSection : LLI_LoadDataSection;
- if (!SendHeader(MsgType)) {
- ErrorMsg += ", (RemoteTargetExternal::SendLoadSection)";
- return false;
- }
-
- AppendWrite((const void *)&Addr, 8);
- AppendWrite(Data, Size);
-
- if (!SendPayload()) {
- ErrorMsg += ", (RemoteTargetExternal::SendLoadSection)";
- return false;
- }
- return true;
-}
-
-bool RemoteTargetExternal::SendExecute(uint64_t Addr) {
- if (!SendHeader(LLI_Execute)) {
- ErrorMsg += ", (RemoteTargetExternal::SendExecute)";
- return false;
- }
-
- AppendWrite((const void *)&Addr, 8);
-
- if (!SendPayload()) {
- ErrorMsg += ", (RemoteTargetExternal::SendExecute)";
- return false;
- }
- return true;
-}
-
-bool RemoteTargetExternal::SendTerminate() {
- return SendHeader(LLI_Terminate);
- // No data or data size is sent with Terminate
-}
-
-bool RemoteTargetExternal::Receive(LLIMessageType Msg) {
- if (!ReceiveHeader(Msg))
- return false;
- int Unused;
- AppendRead(&Unused, 0);
- if (!ReceivePayload())
- return false;
- ReceiveData.clear();
- Sizes.clear();
- return true;
-}
-
-bool RemoteTargetExternal::Receive(LLIMessageType Msg, int32_t &Data) {
- if (!ReceiveHeader(Msg))
- return false;
- AppendRead(&Data, 4);
- if (!ReceivePayload())
- return false;
- ReceiveData.clear();
- Sizes.clear();
- return true;
-}
-
-bool RemoteTargetExternal::Receive(LLIMessageType Msg, uint64_t &Data) {
- if (!ReceiveHeader(Msg))
- return false;
- AppendRead(&Data, 8);
- if (!ReceivePayload())
- return false;
- ReceiveData.clear();
- Sizes.clear();
- return true;
-}
-
-bool RemoteTargetExternal::ReceiveHeader(LLIMessageType ExpectedMsgType) {
- assert(ReceiveData.empty() && Sizes.empty() &&
- "Payload vector not empty to receive header");
-
- // Message header, with type to follow
- uint32_t MsgType;
- if (!ReadBytes(&MsgType, 4)) {
- ErrorMsg += ", (RemoteTargetExternal::ReceiveHeader)";
- return false;
- }
- if (MsgType != (uint32_t)ExpectedMsgType) {
- ErrorMsg = "received unexpected message type";
- ErrorMsg += ". Expecting: ";
- ErrorMsg += ExpectedMsgType;
- ErrorMsg += ", Got: ";
- ErrorMsg += MsgType;
- return false;
- }
- return true;
-}
-
-bool RemoteTargetExternal::ReceivePayload() {
- assert(!ReceiveData.empty() &&
- "Payload vector empty to receive");
- assert(ReceiveData.size() == Sizes.size() &&
- "Unexpected mismatch between data and size");
-
- uint32_t TotalSize = 0;
- for (int I=0, E=Sizes.size(); I < E; I++)
- TotalSize += Sizes[I];
-
- // Payload size header
- uint32_t DataSize;
- if (!ReadBytes(&DataSize, 4)) {
- ErrorMsg += ", invalid data size";
- return false;
- }
- if (DataSize != TotalSize) {
- ErrorMsg = "unexpected data size";
- ErrorMsg += ". Expecting: ";
- ErrorMsg += TotalSize;
- ErrorMsg += ", Got: ";
- ErrorMsg += DataSize;
- return false;
- }
- if (DataSize == 0)
- return true;
-
- // Payload itself
- for (int I=0, E=Sizes.size(); I < E; I++) {
- if (!ReadBytes(ReceiveData[I], Sizes[I])) {
- ErrorMsg = "unexpected data while reading message";
- return false;
- }
- }
-
- return true;
-}
-
-bool RemoteTargetExternal::SendHeader(LLIMessageType MsgType) {
- assert(SendData.empty() && Sizes.empty() &&
- "Payload vector not empty to send header");
-
- // Message header, with type to follow
- if (!WriteBytes(&MsgType, 4)) {
- ErrorMsg += ", (RemoteTargetExternal::SendHeader)";
- return false;
- }
- return true;
-}
-
-bool RemoteTargetExternal::SendPayload() {
- assert(!SendData.empty() && !Sizes.empty() &&
- "Payload vector empty to send");
- assert(SendData.size() == Sizes.size() &&
- "Unexpected mismatch between data and size");
-
- uint32_t TotalSize = 0;
- for (int I=0, E=Sizes.size(); I < E; I++)
- TotalSize += Sizes[I];
-
- // Payload size header
- if (!WriteBytes(&TotalSize, 4)) {
- ErrorMsg += ", invalid data size";
- return false;
- }
- if (TotalSize == 0)
- return true;
-
- // Payload itself
- for (int I=0, E=Sizes.size(); I < E; I++) {
- if (!WriteBytes(SendData[I], Sizes[I])) {
- ErrorMsg = "unexpected data while writing message";
- return false;
- }
- }
-
- SendData.clear();
- Sizes.clear();
- return true;
-}
-
-void RemoteTargetExternal::AppendWrite(const void *Data, uint32_t Size) {
- SendData.push_back(Data);
- Sizes.push_back(Size);
-}
-
-void RemoteTargetExternal::AppendRead(void *Data, uint32_t Size) {
- ReceiveData.push_back(Data);
- Sizes.push_back(Size);
-}
-
-#ifdef LLVM_ON_UNIX
-#include "Unix/RPCChannel.inc"
-#endif
-
-#ifdef LLVM_ON_WIN32
-#include "Windows/RPCChannel.inc"
-#endif
diff --git a/tools/lli/RemoteTargetExternal.h b/tools/lli/RemoteTargetExternal.h
deleted file mode 100644
index afe8570ff49f..000000000000
--- a/tools/lli/RemoteTargetExternal.h
+++ /dev/null
@@ -1,143 +0,0 @@
-//===----- RemoteTargetExternal.h - LLVM out-of-process JIT execution -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of the RemoteTargetExternal class which executes JITed code in a
-// separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_REMOTETARGETEXTERNAL_H
-#define LLVM_TOOLS_LLI_REMOTETARGETEXTERNAL_H
-
-#include "RPCChannel.h"
-#include "RemoteTarget.h"
-#include "RemoteTargetMessage.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Config/config.h"
-#include "llvm/Support/DataTypes.h"
-#include "llvm/Support/Memory.h"
-#include <stdlib.h>
-#include <string>
-
-namespace llvm {
-
-class RemoteTargetExternal : public RemoteTarget {
- RPCChannel RPC;
-
- bool WriteBytes(const void *Data, size_t Size) {
- return RPC.WriteBytes(Data, Size);
- }
-
- bool ReadBytes(void *Data, size_t Size) { return RPC.ReadBytes(Data, Size); }
-
-public:
- /// Allocate space in the remote target address space.
- ///
- /// @param Size Amount of space, in bytes, to allocate.
- /// @param Alignment Required minimum alignment for allocated space.
- /// @param[out] Address Remote address of the allocated memory.
- ///
- /// @returns True on success. On failure, ErrorMsg is updated with
- /// descriptive text of the encountered error.
- bool allocateSpace(size_t Size, unsigned Alignment,
- uint64_t &Address) override;
-
- /// Load data into the target address space.
- ///
- /// @param Address Destination address in the target process.
- /// @param Data Source address in the host process.
- /// @param Size Number of bytes to copy.
- ///
- /// @returns True on success. On failure, ErrorMsg is updated with
- /// descriptive text of the encountered error.
- bool loadData(uint64_t Address, const void *Data, size_t Size) override;
-
- /// Load code into the target address space and prepare it for execution.
- ///
- /// @param Address Destination address in the target process.
- /// @param Data Source address in the host process.
- /// @param Size Number of bytes to copy.
- ///
- /// @returns True on success. On failure, ErrorMsg is updated with
- /// descriptive text of the encountered error.
- bool loadCode(uint64_t Address, const void *Data, size_t Size) override;
-
- /// Execute code in the target process. The called function is required
- /// to be of signature int "(*)(void)".
- ///
- /// @param Address Address of the loaded function in the target
- /// process.
- /// @param[out] RetVal The integer return value of the called function.
- ///
- /// @returns True on success. On failure, ErrorMsg is updated with
- /// descriptive text of the encountered error.
- bool executeCode(uint64_t Address, int &RetVal) override;
-
- /// Minimum alignment for memory permissions. Used to separate code and
- /// data regions to make sure data doesn't get marked as code or vice
- /// versa.
- ///
- /// @returns Page alignment return value. Default of 4k.
- unsigned getPageAlignment() override { return 4096; }
-
- bool create() override {
- RPC.ChildName = ChildName;
- if (!RPC.createServer())
- return true;
-
- // We must get Ack from the client (blocking read)
- if (!Receive(LLI_ChildActive)) {
- ErrorMsg += ", (RPCChannel::create) - Stopping process!";
- stop();
- return false;
- }
-
- return true;
- }
-
- /// Terminate the remote process.
- void stop() override;
-
- RemoteTargetExternal(std::string &Name) : RemoteTarget(), ChildName(Name) {}
- ~RemoteTargetExternal() override {}
-
-private:
- std::string ChildName;
-
- bool SendAllocateSpace(uint32_t Alignment, uint32_t Size);
- bool SendLoadSection(uint64_t Addr,
- const void *Data,
- uint32_t Size,
- bool IsCode);
- bool SendExecute(uint64_t Addr);
- bool SendTerminate();
-
- // High-level wrappers for receiving data
- bool Receive(LLIMessageType Msg);
- bool Receive(LLIMessageType Msg, int32_t &Data);
- bool Receive(LLIMessageType Msg, uint64_t &Data);
-
- // Lower level target-independent read/write to deal with errors
- bool ReceiveHeader(LLIMessageType Msg);
- bool ReceivePayload();
- bool SendHeader(LLIMessageType Msg);
- bool SendPayload();
-
- // Functions to append/retrieve data from the payload
- SmallVector<const void *, 2> SendData;
- SmallVector<void *, 1> ReceiveData; // Future proof
- SmallVector<int, 2> Sizes;
- void AppendWrite(const void *Data, uint32_t Size);
- void AppendRead(void *Data, uint32_t Size);
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/tools/lli/RemoteTargetMessage.h b/tools/lli/RemoteTargetMessage.h
deleted file mode 100644
index c210e4b3d6b8..000000000000
--- a/tools/lli/RemoteTargetMessage.h
+++ /dev/null
@@ -1,85 +0,0 @@
-//===---- RemoteTargetMessage.h - LLI out-of-process message protocol -----===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Definition of the LLIMessageType enum which is used for communication with a
-// child process for remote execution.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TOOLS_LLI_REMOTETARGETMESSAGE_H
-#define LLVM_TOOLS_LLI_REMOTETARGETMESSAGE_H
-
-namespace llvm {
-
-// LLI messages from parent-to-child or vice versa follow an exceedingly simple
-// protocol where the first four bytes represent the message type, the next
-// four bytes represent the size of data for the command and following bytes
-// represent the actual data.
-//
-// The protocol is not intended to be robust, secure or fault-tolerant. It is
-// only here for testing purposes and is therefore intended to be the simplest
-// implementation that will work. It is assumed that the parent and child
-// share characteristics like endianness.
-//
-// Quick description of the protocol:
-//
-// { Header + Payload Size + Payload }
-//
-// The protocol message consist of a header, the payload size (which can be
-// zero), and the payload itself. The payload can contain any number of items,
-// and the size has to be the sum of them all. Each end is responsible for
-// reading/writing the correct number of items with the correct sizes.
-//
-// The current four known exchanges are:
-//
-// * Allocate Space:
-// Parent: { LLI_AllocateSpace, 8, Alignment, Size }
-// Child: { LLI_AllocationResult, 8, Address }
-//
-// * Load Data:
-// Parent: { LLI_LoadDataSection, 8+Size, Address, Data }
-// Child: { LLI_LoadComplete, 4, StatusCode }
-//
-// * Load Code:
-// Parent: { LLI_LoadCodeSection, 8+Size, Address, Code }
-// Child: { LLI_LoadComplete, 4, StatusCode }
-//
-// * Execute Code:
-// Parent: { LLI_Execute, 8, Address }
-// Child: { LLI_ExecutionResult, 4, Result }
-//
-// It is the responsibility of either side to check for correct headers,
-// sizes and payloads, since any inconsistency would misalign the pipe, and
-// result in data corruption.
-
-enum LLIMessageType {
- LLI_Error = -1,
- LLI_ChildActive = 0, // Data = not used
- LLI_AllocateSpace, // Data = struct { uint32_t Align, uint_32t Size }
- LLI_AllocationResult, // Data = uint64_t Address (child memory space)
-
- LLI_LoadCodeSection, // Data = uint64_t Address, void * SectionData
- LLI_LoadDataSection, // Data = uint64_t Address, void * SectionData
- LLI_LoadResult, // Data = uint32_t LLIMessageStatus
-
- LLI_Execute, // Data = uint64_t Address
- LLI_ExecutionResult, // Data = uint32_t Result
-
- LLI_Terminate // Data = not used
-};
-
-enum LLIMessageStatus {
- LLI_Status_Success = 0, // Operation succeeded
- LLI_Status_NotAllocated, // Address+Size not allocated in child space
- LLI_Status_IncompleteMsg // Size received doesn't match request
-};
-
-} // end namespace llvm
-
-#endif
diff --git a/tools/lli/Unix/RPCChannel.inc b/tools/lli/Unix/RPCChannel.inc
deleted file mode 100644
index 6a9ae14b599f..000000000000
--- a/tools/lli/Unix/RPCChannel.inc
+++ /dev/null
@@ -1,122 +0,0 @@
-//=- RPCChannel.inc - LLVM out-of-process JIT execution for Unix --=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation of the Unix-specific parts of the RPCChannel class
-// which executes JITed code in a separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/Errno.h"
-#include "llvm/Support/raw_ostream.h"
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/wait.h>
-#include <unistd.h>
-
-namespace {
-
-struct ConnectionData_t {
- int InputPipe;
- int OutputPipe;
-
- ConnectionData_t(int in, int out) : InputPipe(in), OutputPipe(out) {}
-};
-
-} // namespace
-
-namespace llvm {
-
-bool RPCChannel::createServer() {
- int PipeFD[2][2];
- pid_t ChildPID;
-
- // Create two pipes.
- if (pipe(PipeFD[0]) != 0 || pipe(PipeFD[1]) != 0)
- perror("Error creating pipe: ");
-
- ChildPID = fork();
-
- if (ChildPID == 0) {
- // In the child...
-
- // Close the parent ends of the pipes
- close(PipeFD[0][1]);
- close(PipeFD[1][0]);
-
- // Use our pipes as stdin and stdout
- if (PipeFD[0][0] != STDIN_FILENO) {
- dup2(PipeFD[0][0], STDIN_FILENO);
- close(PipeFD[0][0]);
- }
- if (PipeFD[1][1] != STDOUT_FILENO) {
- dup2(PipeFD[1][1], STDOUT_FILENO);
- close(PipeFD[1][1]);
- }
-
- // Execute the child process.
- char *args[1] = { nullptr };
- int rc = execv(ChildName.c_str(), args);
- if (rc != 0)
- perror("Error executing child process: ");
- } else {
- // In the parent...
-
- // Close the child ends of the pipes
- close(PipeFD[0][0]);
- close(PipeFD[1][1]);
-
- // Store the parent ends of the pipes
- ConnectionData = (void *)new ConnectionData_t(PipeFD[1][0], PipeFD[0][1]);
- return true;
- }
- return false;
-}
-
-bool RPCChannel::createClient() {
- // Store the parent ends of the pipes
- ConnectionData = (void *)new ConnectionData_t(STDIN_FILENO, STDOUT_FILENO);
- return true;
-}
-
-void RPCChannel::Wait() { wait(nullptr); }
-
-static bool CheckError(int rc, size_t Size, const char *Desc) {
- if (rc < 0) {
- llvm::errs() << "IO Error: " << Desc << ": " << sys::StrError() << '\n';
- return false;
- } else if ((size_t)rc != Size) {
- std::string ErrorMsg;
- char Number[10] = { 0 };
- ErrorMsg += "Expecting ";
- sprintf(Number, "%d", (uint32_t)Size);
- ErrorMsg += Number;
- ErrorMsg += " bytes, Got ";
- sprintf(Number, "%d", rc);
- ErrorMsg += Number;
- llvm::errs() << "RPC Error: " << Desc << ": " << ErrorMsg << '\n';
- return false;
- }
- return true;
-}
-
-bool RPCChannel::WriteBytes(const void *Data, size_t Size) {
- int rc = write(((ConnectionData_t *)ConnectionData)->OutputPipe, Data, Size);
- return CheckError(rc, Size, "WriteBytes");
-}
-
-bool RPCChannel::ReadBytes(void *Data, size_t Size) {
- int rc = read(((ConnectionData_t *)ConnectionData)->InputPipe, Data, Size);
- return CheckError(rc, Size, "ReadBytes");
-}
-
-RPCChannel::~RPCChannel() {
- delete static_cast<ConnectionData_t *>(ConnectionData);
-}
-
-} // namespace llvm
diff --git a/tools/lli/Windows/RPCChannel.inc b/tools/lli/Windows/RPCChannel.inc
deleted file mode 100644
index 82f2acbf14af..000000000000
--- a/tools/lli/Windows/RPCChannel.inc
+++ /dev/null
@@ -1,29 +0,0 @@
-//=- RPCChannel.inc - LLVM out-of-process JIT execution for Windows --=//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Implementation of the Windows-specific parts of the RPCChannel class
-// which executes JITed code in a separate process from where it was built.
-//
-//===----------------------------------------------------------------------===//
-
-namespace llvm {
-
-bool RPCChannel::createServer() { return false; }
-
-bool RPCChannel::createClient() { return false; }
-
-bool RPCChannel::WriteBytes(const void *Data, size_t Size) { return false; }
-
-bool RPCChannel::ReadBytes(void *Data, size_t Size) { return false; }
-
-void RPCChannel::Wait() {}
-
-RPCChannel::~RPCChannel() {}
-
-} // namespace llvm
diff --git a/tools/lli/lli.cpp b/tools/lli/lli.cpp
index 9f714060c17a..67e7cbd7686a 100644
--- a/tools/lli/lli.cpp
+++ b/tools/lli/lli.cpp
@@ -13,11 +13,9 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/IR/LLVMContext.h"
#include "OrcLazyJIT.h"
-#include "RemoteMemoryManager.h"
-#include "RemoteTarget.h"
-#include "RemoteTargetExternal.h"
+#include "RemoteJITUtils.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Bitcode/ReaderWriter.h"
#include "llvm/CodeGen/LinkAllCodegenComponents.h"
@@ -28,6 +26,7 @@
#include "llvm/ExecutionEngine/ObjectCache.h"
#include "llvm/ExecutionEngine/OrcMCJITReplacement.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetClient.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Type.h"
@@ -449,7 +448,7 @@ int main(int argc, char **argv, char * const *envp) {
RTDyldMemoryManager *RTDyldMM = nullptr;
if (!ForceInterpreter) {
if (RemoteMCJIT)
- RTDyldMM = new RemoteMemoryManager();
+ RTDyldMM = new ForwardingMemoryManager();
else
RTDyldMM = new SectionMemoryManager();
@@ -582,6 +581,25 @@ int main(int argc, char **argv, char * const *envp) {
int Result;
+ // Sanity check use of remote-jit: LLI currently only supports use of the
+ // remote JIT on Unix platforms.
+ if (RemoteMCJIT) {
+#ifndef LLVM_ON_UNIX
+ errs() << "Warning: host does not support external remote targets.\n"
+ << " Defaulting to local execution execution\n";
+ return -1;
+#else
+ if (ChildExecPath.empty()) {
+ errs() << "-remote-mcjit requires -mcjit-remote-process.\n";
+ exit(1);
+ } else if (!sys::fs::can_execute(ChildExecPath)) {
+ errs() << "Unable to find usable child executable: '" << ChildExecPath
+ << "'\n";
+ return -1;
+ }
+#endif
+ }
+
if (!RemoteMCJIT) {
// If the program doesn't explicitly call exit, we will need the Exit
// function later on to make an explicit call, so get the function now.
@@ -629,66 +647,123 @@ int main(int argc, char **argv, char * const *envp) {
// Remote target MCJIT doesn't (yet) support static constructors. No reason
// it couldn't. This is a limitation of the LLI implemantation, not the
// MCJIT itself. FIXME.
- //
- RemoteMemoryManager *MM = static_cast<RemoteMemoryManager*>(RTDyldMM);
- // Everything is prepared now, so lay out our program for the target
- // address space, assign the section addresses to resolve any relocations,
- // and send it to the target.
-
- std::unique_ptr<RemoteTarget> Target;
- if (!ChildExecPath.empty()) { // Remote execution on a child process
-#ifndef LLVM_ON_UNIX
- // FIXME: Remove this pointless fallback mode which causes tests to "pass"
- // on platforms where they should XFAIL.
- errs() << "Warning: host does not support external remote targets.\n"
- << " Defaulting to simulated remote execution\n";
- Target.reset(new RemoteTarget);
-#else
- if (!sys::fs::can_execute(ChildExecPath)) {
- errs() << "Unable to find usable child executable: '" << ChildExecPath
- << "'\n";
- return -1;
- }
- Target.reset(new RemoteTargetExternal(ChildExecPath));
-#endif
- } else {
- // No child process name provided, use simulated remote execution.
- Target.reset(new RemoteTarget);
+
+ // Lanch the remote process and get a channel to it.
+ std::unique_ptr<FDRPCChannel> C = launchRemote();
+ if (!C) {
+ errs() << "Failed to launch remote JIT.\n";
+ exit(1);
}
- // Give the memory manager a pointer to our remote target interface object.
- MM->setRemoteTarget(Target.get());
+ // Create a remote target client running over the channel.
+ typedef orc::remote::OrcRemoteTargetClient<orc::remote::RPCChannel> MyRemote;
+ ErrorOr<MyRemote> R = MyRemote::Create(*C);
+ if (!R) {
+ errs() << "Could not create remote: " << R.getError().message() << "\n";
+ exit(1);
+ }
- // Create the remote target.
- if (!Target->create()) {
- errs() << "ERROR: " << Target->getErrorMsg() << "\n";
- return EXIT_FAILURE;
+ // Create a remote memory manager.
+ std::unique_ptr<MyRemote::RCMemoryManager> RemoteMM;
+ if (auto EC = R->createRemoteMemoryManager(RemoteMM)) {
+ errs() << "Could not create remote memory manager: " << EC.message() << "\n";
+ exit(1);
}
- // Since we're executing in a (at least simulated) remote address space,
- // we can't use the ExecutionEngine::runFunctionAsMain(). We have to
- // grab the function address directly here and tell the remote target
- // to execute the function.
- //
- // Our memory manager will map generated code into the remote address
- // space as it is loaded and copy the bits over during the finalizeMemory
- // operation.
- //
+ // Forward MCJIT's memory manager calls to the remote memory manager.
+ static_cast<ForwardingMemoryManager*>(RTDyldMM)->setMemMgr(
+ std::move(RemoteMM));
+
+ // Forward MCJIT's symbol resolution calls to the remote.
+ static_cast<ForwardingMemoryManager*>(RTDyldMM)->setResolver(
+ orc::createLambdaResolver(
+ [&](const std::string &Name) {
+ orc::TargetAddress Addr = 0;
+ if (auto EC = R->getSymbolAddress(Addr, Name)) {
+ errs() << "Failure during symbol lookup: " << EC.message() << "\n";
+ exit(1);
+ }
+ return RuntimeDyld::SymbolInfo(Addr, JITSymbolFlags::Exported);
+ },
+ [](const std::string &Name) { return nullptr; }
+ ));
+
+ // Grab the target address of the JIT'd main function on the remote and call
+ // it.
// FIXME: argv and envp handling.
- uint64_t Entry = EE->getFunctionAddress(EntryFn->getName().str());
-
+ orc::TargetAddress Entry = EE->getFunctionAddress(EntryFn->getName().str());
+ EE->finalizeObject();
DEBUG(dbgs() << "Executing '" << EntryFn->getName() << "' at 0x"
<< format("%llx", Entry) << "\n");
-
- if (!Target->executeCode(Entry, Result))
- errs() << "ERROR: " << Target->getErrorMsg() << "\n";
+ if (auto EC = R->callIntVoid(Result, Entry))
+ errs() << "ERROR: " << EC.message() << "\n";
// Like static constructors, the remote target MCJIT support doesn't handle
// this yet. It could. FIXME.
- // Stop the remote target
- Target->stop();
+ // Delete the EE - we need to tear it down *before* we terminate the session
+ // with the remote, otherwise it'll crash when it tries to release resources
+ // on a remote that has already been disconnected.
+ delete EE;
+ EE = nullptr;
+
+ // Signal the remote target that we're done JITing.
+ R->terminateSession();
}
return Result;
}
+
+std::unique_ptr<FDRPCChannel> launchRemote() {
+#ifndef LLVM_ON_UNIX
+ llvm_unreachable("launchRemote not supported on non-Unix platforms");
+#else
+ int PipeFD[2][2];
+ pid_t ChildPID;
+
+ // Create two pipes.
+ if (pipe(PipeFD[0]) != 0 || pipe(PipeFD[1]) != 0)
+ perror("Error creating pipe: ");
+
+ ChildPID = fork();
+
+ if (ChildPID == 0) {
+ // In the child...
+
+ // Close the parent ends of the pipes
+ close(PipeFD[0][1]);
+ close(PipeFD[1][0]);
+
+
+ // Execute the child process.
+ std::unique_ptr<char[]> ChildPath, ChildIn, ChildOut;
+ {
+ ChildPath.reset(new char[ChildExecPath.size() + 1]);
+ std::copy(ChildExecPath.begin(), ChildExecPath.end(), &ChildPath[0]);
+ ChildPath[ChildExecPath.size()] = '\0';
+ std::string ChildInStr = std::to_string(PipeFD[0][0]);
+ ChildIn.reset(new char[ChildInStr.size() + 1]);
+ std::copy(ChildInStr.begin(), ChildInStr.end(), &ChildIn[0]);
+ ChildIn[ChildInStr.size()] = '\0';
+ std::string ChildOutStr = std::to_string(PipeFD[1][1]);
+ ChildOut.reset(new char[ChildOutStr.size() + 1]);
+ std::copy(ChildOutStr.begin(), ChildOutStr.end(), &ChildOut[0]);
+ ChildOut[ChildOutStr.size()] = '\0';
+ }
+
+ char * const args[] = { &ChildPath[0], &ChildIn[0], &ChildOut[0], nullptr };
+ int rc = execv(ChildExecPath.c_str(), args);
+ if (rc != 0)
+ perror("Error executing child process: ");
+ llvm_unreachable("Error executing child process");
+ }
+ // else we're the parent...
+
+ // Close the child ends of the pipes
+ close(PipeFD[0][0]);
+ close(PipeFD[1][1]);
+
+ // Return an RPC channel connected to our end of the pipes.
+ return llvm::make_unique<FDRPCChannel>(PipeFD[1][0], PipeFD[0][1]);
+#endif
+}
diff --git a/tools/llvm-lto/llvm-lto.cpp b/tools/llvm-lto/llvm-lto.cpp
index 4bc692279b9e..232051130cb2 100644
--- a/tools/llvm-lto/llvm-lto.cpp
+++ b/tools/llvm-lto/llvm-lto.cpp
@@ -289,6 +289,7 @@ int main(int argc, char **argv) {
CurrentActivity = "loading file '" + InputFilenames[i] + "'";
ErrorOr<std::unique_ptr<LTOModule>> ModuleOrErr =
LTOModule::createFromFile(Context, InputFilenames[i].c_str(), Options);
+ error(ModuleOrErr, "error " + CurrentActivity);
std::unique_ptr<LTOModule> &Module = *ModuleOrErr;
CurrentActivity = "";
diff --git a/tools/llvm-objdump/COFFDump.cpp b/tools/llvm-objdump/COFFDump.cpp
index f286351614ac..5d21b3320e74 100644
--- a/tools/llvm-objdump/COFFDump.cpp
+++ b/tools/llvm-objdump/COFFDump.cpp
@@ -358,13 +358,30 @@ static void printExportTable(const COFFObjectFile *Obj) {
uint32_t RVA;
if (I->getExportRVA(RVA))
return;
- outs() << format(" % 4d %# 8x", Ordinal, RVA);
+ bool IsForwarder;
+ if (I->isForwarder(IsForwarder))
+ return;
+
+ if (IsForwarder) {
+ // Export table entries can be used to re-export symbols that
+ // this COFF file is imported from some DLLs. This is rare.
+ // In most cases IsForwarder is false.
+ outs() << format(" % 4d ", Ordinal);
+ } else {
+ outs() << format(" % 4d %# 8x", Ordinal, RVA);
+ }
StringRef Name;
if (I->getSymbolName(Name))
continue;
if (!Name.empty())
outs() << " " << Name;
+ if (IsForwarder) {
+ StringRef S;
+ if (I->getForwardTo(S))
+ return;
+ outs() << " (forwarded to " << S << ")";
+ }
outs() << "\n";
}
}
diff --git a/tools/llvm-objdump/MachODump.cpp b/tools/llvm-objdump/MachODump.cpp
index a2f3bc8e8a7c..258c0b520a3d 100644
--- a/tools/llvm-objdump/MachODump.cpp
+++ b/tools/llvm-objdump/MachODump.cpp
@@ -1196,7 +1196,11 @@ static void ProcessMachO(StringRef Filename, MachOObjectFile *MachOOF,
PrintSymbolTable(MachOOF);
if (UnwindInfo)
printMachOUnwindInfo(MachOOF);
- if (PrivateHeaders)
+ if (PrivateHeaders) {
+ printMachOFileHeader(MachOOF);
+ printMachOLoadCommands(MachOOF);
+ }
+ if (FirstPrivateHeader)
printMachOFileHeader(MachOOF);
if (ObjcMetaData)
printObjcMetaData(MachOOF, !NonVerbose);
@@ -1477,10 +1481,8 @@ void llvm::ParseInputMachO(StringRef Filename) {
// Attempt to open the binary.
ErrorOr<OwningBinary<Binary>> BinaryOrErr = createBinary(Filename);
- if (std::error_code EC = BinaryOrErr.getError()) {
- errs() << "llvm-objdump: '" << Filename << "': " << EC.message() << ".\n";
- return;
- }
+ if (std::error_code EC = BinaryOrErr.getError())
+ report_error(Filename, EC);
Binary &Bin = *BinaryOrErr.get().getBinary();
if (Archive *A = dyn_cast<Archive>(&Bin)) {
@@ -1649,8 +1651,9 @@ void llvm::ParseInputMachO(StringRef Filename) {
} else
errs() << "llvm-objdump: '" << Filename << "': "
<< "Object is not a Mach-O file type.\n";
- } else
- report_error(Filename, object_error::invalid_file_type);
+ return;
+ }
+ llvm_unreachable("Input object can't be invalid at this point");
}
typedef std::pair<uint64_t, const char *> BindInfoEntry;
@@ -8646,31 +8649,40 @@ static void PrintLoadCommands(const MachOObjectFile *Obj, uint32_t filetype,
}
}
-static void getAndPrintMachHeader(const MachOObjectFile *Obj,
- uint32_t &filetype, uint32_t &cputype,
- bool verbose) {
+static void PrintMachHeader(const MachOObjectFile *Obj, bool verbose) {
if (Obj->is64Bit()) {
MachO::mach_header_64 H_64;
H_64 = Obj->getHeader64();
PrintMachHeader(H_64.magic, H_64.cputype, H_64.cpusubtype, H_64.filetype,
H_64.ncmds, H_64.sizeofcmds, H_64.flags, verbose);
- filetype = H_64.filetype;
- cputype = H_64.cputype;
} else {
MachO::mach_header H;
H = Obj->getHeader();
PrintMachHeader(H.magic, H.cputype, H.cpusubtype, H.filetype, H.ncmds,
H.sizeofcmds, H.flags, verbose);
- filetype = H.filetype;
- cputype = H.cputype;
}
}
void llvm::printMachOFileHeader(const object::ObjectFile *Obj) {
const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
+ PrintMachHeader(file, !NonVerbose);
+}
+
+void llvm::printMachOLoadCommands(const object::ObjectFile *Obj) {
+ const MachOObjectFile *file = dyn_cast<const MachOObjectFile>(Obj);
uint32_t filetype = 0;
uint32_t cputype = 0;
- getAndPrintMachHeader(file, filetype, cputype, !NonVerbose);
+ if (file->is64Bit()) {
+ MachO::mach_header_64 H_64;
+ H_64 = file->getHeader64();
+ filetype = H_64.filetype;
+ cputype = H_64.cputype;
+ } else {
+ MachO::mach_header H;
+ H = file->getHeader();
+ filetype = H.filetype;
+ cputype = H.cputype;
+ }
PrintLoadCommands(file, filetype, cputype, !NonVerbose);
}
diff --git a/tools/llvm-objdump/llvm-objdump.cpp b/tools/llvm-objdump/llvm-objdump.cpp
index 22167c788901..d5ae5de4b5a3 100644
--- a/tools/llvm-objdump/llvm-objdump.cpp
+++ b/tools/llvm-objdump/llvm-objdump.cpp
@@ -165,6 +165,11 @@ cl::opt<bool>
llvm::PrivateHeaders("private-headers",
cl::desc("Display format specific file headers"));
+cl::opt<bool>
+llvm::FirstPrivateHeader("private-header",
+ cl::desc("Display only the first format specific file "
+ "header"));
+
static cl::alias
PrivateHeadersShort("p", cl::desc("Alias for --private-headers"),
cl::aliasopt(PrivateHeaders));
@@ -482,6 +487,23 @@ static std::error_code getRelocationValueString(const ELFObjectFile<ELFT> *Obj,
case ELF::EM_MIPS:
res = Target;
break;
+ case ELF::EM_WEBASSEMBLY:
+ switch (type) {
+ case ELF::R_WEBASSEMBLY_DATA: {
+ std::string fmtbuf;
+ raw_string_ostream fmt(fmtbuf);
+ fmt << Target << (addend < 0 ? "" : "+") << addend;
+ fmt.flush();
+ Result.append(fmtbuf.begin(), fmtbuf.end());
+ break;
+ }
+ case ELF::R_WEBASSEMBLY_FUNCTION:
+ res = Target;
+ break;
+ default:
+ res = "Unknown";
+ }
+ break;
default:
res = "Unknown";
}
@@ -1478,7 +1500,19 @@ static void printFaultMaps(const ObjectFile *Obj) {
outs() << FMP;
}
-static void printPrivateFileHeader(const ObjectFile *o) {
+static void printPrivateFileHeaders(const ObjectFile *o) {
+ if (o->isELF())
+ printELFFileHeader(o);
+ else if (o->isCOFF())
+ printCOFFFileHeader(o);
+ else if (o->isMachO()) {
+ printMachOFileHeader(o);
+ printMachOLoadCommands(o);
+ } else
+ report_fatal_error("Invalid/Unsupported object file format");
+}
+
+static void printFirstPrivateFileHeader(const ObjectFile *o) {
if (o->isELF())
printELFFileHeader(o);
else if (o->isCOFF())
@@ -1510,7 +1544,9 @@ static void DumpObject(const ObjectFile *o) {
if (UnwindInfo)
PrintUnwindInfo(o);
if (PrivateHeaders)
- printPrivateFileHeader(o);
+ printPrivateFileHeaders(o);
+ if (FirstPrivateHeader)
+ printFirstPrivateFileHeader(o);
if (ExportsTrie)
printExportsTrie(o);
if (Rebase)
@@ -1601,6 +1637,7 @@ int main(int argc, char **argv) {
&& !SymbolTable
&& !UnwindInfo
&& !PrivateHeaders
+ && !FirstPrivateHeader
&& !ExportsTrie
&& !Rebase
&& !Bind
diff --git a/tools/llvm-objdump/llvm-objdump.h b/tools/llvm-objdump/llvm-objdump.h
index 6e8ad6bda156..60cabbc98e2f 100644
--- a/tools/llvm-objdump/llvm-objdump.h
+++ b/tools/llvm-objdump/llvm-objdump.h
@@ -31,6 +31,7 @@ extern cl::opt<bool> Disassemble;
extern cl::opt<bool> DisassembleAll;
extern cl::opt<bool> NoShowRawInsn;
extern cl::opt<bool> PrivateHeaders;
+extern cl::opt<bool> FirstPrivateHeader;
extern cl::opt<bool> ExportsTrie;
extern cl::opt<bool> Rebase;
extern cl::opt<bool> Bind;
@@ -70,6 +71,7 @@ void printELFFileHeader(const object::ObjectFile *o);
void printCOFFFileHeader(const object::ObjectFile *o);
void printCOFFSymbolTable(const object::COFFObjectFile *o);
void printMachOFileHeader(const object::ObjectFile *o);
+void printMachOLoadCommands(const object::ObjectFile *o);
void printExportsTrie(const object::ObjectFile *o);
void printRebaseTable(const object::ObjectFile *o);
void printBindTable(const object::ObjectFile *o);
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index 516d1cf8057b..d44da0d57466 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -603,12 +603,14 @@ void COFFDumper::printCodeViewSection(const SectionRef &Section) {
// in the line table. The filename string is accessed using double
// indirection to the string table subsection using the index subsection.
uint32_t OffsetInIndex = DE.getU32(&Offset),
- SegmentLength = DE.getU32(&Offset),
+ NumLines = DE.getU32(&Offset),
FullSegmentSize = DE.getU32(&Offset);
+ uint32_t ColumnOffset = Offset + 8 * NumLines;
+ DataExtractor ColumnDE(DE.getData(), true, 4);
+
if (FullSegmentSize !=
- 12 + 8 * SegmentLength +
- (HasColumnInformation ? 4 * SegmentLength : 0)) {
+ 12 + 8 * NumLines + (HasColumnInformation ? 4 * NumLines : 0)) {
error(object_error::parse_failed);
return;
}
@@ -635,29 +637,41 @@ void COFFDumper::printCodeViewSection(const SectionRef &Section) {
StringRef Filename(CVStringTable.data() + FilenameOffset);
ListScope S(W, "FilenameSegment");
W.printString("Filename", Filename);
- for (unsigned J = 0; J != SegmentLength && DE.isValidOffset(Offset);
- ++J) {
+ for (unsigned LineIdx = 0;
+ LineIdx != NumLines && DE.isValidOffset(Offset); ++LineIdx) {
// Then go the (PC, LineNumber) pairs. The line number is stored in the
// least significant 31 bits of the respective word in the table.
- uint32_t PC = DE.getU32(&Offset),
- LineNumber = DE.getU32(&Offset) & 0x7fffffff;
+ uint32_t PC = DE.getU32(&Offset), LineData = DE.getU32(&Offset);
if (PC >= FunctionSize) {
error(object_error::parse_failed);
return;
}
char Buffer[32];
format("+0x%X", PC).snprint(Buffer, 32);
- W.printNumber(Buffer, LineNumber);
- }
- if (HasColumnInformation) {
- for (unsigned J = 0; J != SegmentLength && DE.isValidOffset(Offset);
- ++J) {
- uint16_t ColStart = DE.getU16(&Offset);
+ ListScope PCScope(W, Buffer);
+ uint32_t LineNumberStart = LineData & COFF::CVL_MaxLineNumber;
+ uint32_t LineNumberEndDelta =
+ (LineData >> COFF::CVL_LineNumberStartBits) &
+ COFF::CVL_LineNumberEndDeltaMask;
+ bool IsStatement = LineData & COFF::CVL_IsStatement;
+ W.printNumber("LineNumberStart", LineNumberStart);
+ W.printNumber("LineNumberEndDelta", LineNumberEndDelta);
+ W.printBoolean("IsStatement", IsStatement);
+ if (HasColumnInformation &&
+ ColumnDE.isValidOffsetForDataOfSize(ColumnOffset, 4)) {
+ uint16_t ColStart = ColumnDE.getU16(&ColumnOffset);
W.printNumber("ColStart", ColStart);
- uint16_t ColEnd = DE.getU16(&Offset);
+ uint16_t ColEnd = ColumnDE.getU16(&ColumnOffset);
W.printNumber("ColEnd", ColEnd);
}
}
+ // Skip over the column data.
+ if (HasColumnInformation) {
+ for (unsigned LineIdx = 0;
+ LineIdx != NumLines && DE.isValidOffset(Offset); ++LineIdx) {
+ DE.getU32(&Offset);
+ }
+ }
}
}
}
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 02397f382848..be84f3c0f1bb 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -708,7 +708,8 @@ static const EnumEntry<unsigned> ElfMachineType[] = {
LLVM_READOBJ_ENUM_ENT(ELF, EM_VIDEOCORE5 ),
LLVM_READOBJ_ENUM_ENT(ELF, EM_78KOR ),
LLVM_READOBJ_ENUM_ENT(ELF, EM_56800EX ),
- LLVM_READOBJ_ENUM_ENT(ELF, EM_AMDGPU )
+ LLVM_READOBJ_ENUM_ENT(ELF, EM_AMDGPU ),
+ LLVM_READOBJ_ENUM_ENT(ELF, EM_WEBASSEMBLY ),
};
static const EnumEntry<unsigned> ElfSymbolBindings[] = {
diff --git a/tools/llvm-symbolizer/llvm-symbolizer.cpp b/tools/llvm-symbolizer/llvm-symbolizer.cpp
index e45660c84c77..950349377bf7 100644
--- a/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -82,6 +82,10 @@ static cl::opt<bool>
ClPrettyPrint("pretty-print", cl::init(false),
cl::desc("Make the output more human friendly"));
+static cl::opt<int> ClPrintSourceContextLines(
+ "print-source-context-lines", cl::init(0),
+ cl::desc("Print N number of source file context"));
+
static bool error(std::error_code ec) {
if (!ec)
return false;
@@ -89,18 +93,14 @@ static bool error(std::error_code ec) {
return true;
}
-static bool parseCommand(bool &IsData, std::string &ModuleName,
- uint64_t &ModuleOffset) {
+static bool parseCommand(StringRef InputString, bool &IsData,
+ std::string &ModuleName, uint64_t &ModuleOffset) {
const char *kDataCmd = "DATA ";
const char *kCodeCmd = "CODE ";
- const int kMaxInputStringLength = 1024;
- const char kDelimiters[] = " \n";
- char InputString[kMaxInputStringLength];
- if (!fgets(InputString, sizeof(InputString), stdin))
- return false;
+ const char kDelimiters[] = " \n\r";
IsData = false;
ModuleName = "";
- char *pos = InputString;
+ const char *pos = InputString.data();
if (strncmp(pos, kDataCmd, strlen(kDataCmd)) == 0) {
IsData = true;
pos += strlen(kDataCmd);
@@ -117,7 +117,7 @@ static bool parseCommand(bool &IsData, std::string &ModuleName,
if (*pos == '"' || *pos == '\'') {
char quote = *pos;
pos++;
- char *end = strchr(pos, quote);
+ const char *end = strchr(pos, quote);
if (!end)
return false;
ModuleName = std::string(pos, end - pos);
@@ -158,13 +158,25 @@ int main(int argc, char **argv) {
}
LLVMSymbolizer Symbolizer(Opts);
- bool IsData = false;
- std::string ModuleName;
- uint64_t ModuleOffset;
DIPrinter Printer(outs(), ClPrintFunctions != FunctionNameKind::None,
- ClPrettyPrint);
+ ClPrettyPrint, ClPrintSourceContextLines);
+
+ const int kMaxInputStringLength = 1024;
+ char InputString[kMaxInputStringLength];
+
+ while (true) {
+ if (!fgets(InputString, sizeof(InputString), stdin))
+ break;
+
+ bool IsData = false;
+ std::string ModuleName;
+ uint64_t ModuleOffset = 0;
+ if (!parseCommand(StringRef(InputString), IsData, ModuleName,
+ ModuleOffset)) {
+ outs() << InputString;
+ continue;
+ }
- while (parseCommand(IsData, ModuleName, ModuleOffset)) {
if (ClPrintAddress) {
outs() << "0x";
outs().write_hex(ModuleOffset);
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index cb878c61b85f..bce1bf93a338 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -25,7 +25,9 @@ set(ADTSources
MapVectorTest.cpp
OptionalTest.cpp
PackedVectorTest.cpp
+ PointerEmbeddedIntTest.cpp
PointerIntPairTest.cpp
+ PointerSumTypeTest.cpp
PointerUnionTest.cpp
PostOrderIteratorTest.cpp
RangeAdapterTest.cpp
diff --git a/unittests/ADT/PointerEmbeddedIntTest.cpp b/unittests/ADT/PointerEmbeddedIntTest.cpp
new file mode 100644
index 000000000000..b10365a2f618
--- /dev/null
+++ b/unittests/ADT/PointerEmbeddedIntTest.cpp
@@ -0,0 +1,46 @@
+//===- llvm/unittest/ADT/PointerEmbeddedIntTest.cpp -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/PointerEmbeddedInt.h"
+using namespace llvm;
+
+namespace {
+
+TEST(PointerEmbeddedIntTest, Basic) {
+ PointerEmbeddedInt<int, CHAR_BIT> I = 42, J = 43;
+
+ EXPECT_EQ(42, I);
+ EXPECT_EQ(43, I + 1);
+ EXPECT_EQ(sizeof(uintptr_t) * CHAR_BIT - CHAR_BIT,
+ PointerLikeTypeTraits<decltype(I)>::NumLowBitsAvailable);
+
+ EXPECT_FALSE(I == J);
+ EXPECT_TRUE(I != J);
+ EXPECT_TRUE(I < J);
+ EXPECT_FALSE(I > J);
+ EXPECT_TRUE(I <= J);
+ EXPECT_FALSE(I >= J);
+
+ EXPECT_FALSE(I == 43);
+ EXPECT_TRUE(I != 43);
+ EXPECT_TRUE(I < 43);
+ EXPECT_FALSE(I > 43);
+ EXPECT_TRUE(I <= 43);
+ EXPECT_FALSE(I >= 43);
+
+ EXPECT_FALSE(42 == J);
+ EXPECT_TRUE(42 != J);
+ EXPECT_TRUE(42 < J);
+ EXPECT_FALSE(42 > J);
+ EXPECT_TRUE(42 <= J);
+ EXPECT_FALSE(42 >= J);
+}
+
+} // end anonymous namespace
diff --git a/unittests/ADT/PointerIntPairTest.cpp b/unittests/ADT/PointerIntPairTest.cpp
index e27a5823a51e..13680c78b9bb 100644
--- a/unittests/ADT/PointerIntPairTest.cpp
+++ b/unittests/ADT/PointerIntPairTest.cpp
@@ -35,6 +35,33 @@ TEST(PointerIntPairTest, GetSet) {
Pair.setPointerAndInt(&s, 3U);
EXPECT_EQ(&s, Pair.getPointer());
EXPECT_EQ(3U, Pair.getInt());
+
+ // Make sure that we can perform all of our operations on enum classes.
+ //
+ // The concern is that enum classes are only explicitly convertible to
+ // integers. This means that if we assume in PointerIntPair this, a
+ // compilation error will result. This group of tests exercises the enum class
+ // code to make sure that we do not run into such issues in the future.
+ enum class E : unsigned {
+ Case1,
+ Case2,
+ Case3,
+ };
+ PointerIntPair<S *, 2, E> Pair2(&s, E::Case1);
+ EXPECT_EQ(&s, Pair2.getPointer());
+ EXPECT_EQ(E::Case1, Pair2.getInt());
+
+ Pair2.setInt(E::Case2);
+ EXPECT_EQ(&s, Pair2.getPointer());
+ EXPECT_EQ(E::Case2, Pair2.getInt());
+
+ Pair2.setPointer(nullptr);
+ EXPECT_EQ(nullptr, Pair2.getPointer());
+ EXPECT_EQ(E::Case2, Pair2.getInt());
+
+ Pair2.setPointerAndInt(&s, E::Case3);
+ EXPECT_EQ(&s, Pair2.getPointer());
+ EXPECT_EQ(E::Case3, Pair2.getInt());
}
TEST(PointerIntPairTest, DefaultInitialize) {
diff --git a/unittests/ADT/PointerSumTypeTest.cpp b/unittests/ADT/PointerSumTypeTest.cpp
new file mode 100644
index 000000000000..75c88f7fee9f
--- /dev/null
+++ b/unittests/ADT/PointerSumTypeTest.cpp
@@ -0,0 +1,113 @@
+//===- llvm/unittest/ADT/PointerSumTypeTest.cpp ---------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "llvm/ADT/PointerSumType.h"
+using namespace llvm;
+
+namespace {
+
+struct PointerSumTypeTest : public testing::Test {
+ enum Kinds { Float, Int1, Int2 };
+ float f;
+ int i1, i2;
+
+ typedef PointerSumType<Kinds, PointerSumTypeMember<Float, float *>,
+ PointerSumTypeMember<Int1, int *>,
+ PointerSumTypeMember<Int2, int *>>
+ SumType;
+ SumType a, b, c, n;
+
+ PointerSumTypeTest()
+ : f(3.14f), i1(42), i2(-1), a(SumType::create<Float>(&f)),
+ b(SumType::create<Int1>(&i1)), c(SumType::create<Int2>(&i2)), n() {}
+};
+
+TEST_F(PointerSumTypeTest, NullTest) {
+ EXPECT_TRUE(a);
+ EXPECT_TRUE(b);
+ EXPECT_TRUE(c);
+ EXPECT_FALSE(n);
+}
+
+TEST_F(PointerSumTypeTest, GetTag) {
+ EXPECT_EQ(Float, a.getTag());
+ EXPECT_EQ(Int1, b.getTag());
+ EXPECT_EQ(Int2, c.getTag());
+ EXPECT_EQ((Kinds)0, n.getTag());
+}
+
+TEST_F(PointerSumTypeTest, Is) {
+ EXPECT_TRUE(a.is<Float>());
+ EXPECT_FALSE(a.is<Int1>());
+ EXPECT_FALSE(a.is<Int2>());
+ EXPECT_FALSE(b.is<Float>());
+ EXPECT_TRUE(b.is<Int1>());
+ EXPECT_FALSE(b.is<Int2>());
+ EXPECT_FALSE(c.is<Float>());
+ EXPECT_FALSE(c.is<Int1>());
+ EXPECT_TRUE(c.is<Int2>());
+}
+
+TEST_F(PointerSumTypeTest, Get) {
+ EXPECT_EQ(&f, a.get<Float>());
+ EXPECT_EQ(nullptr, a.get<Int1>());
+ EXPECT_EQ(nullptr, a.get<Int2>());
+ EXPECT_EQ(nullptr, b.get<Float>());
+ EXPECT_EQ(&i1, b.get<Int1>());
+ EXPECT_EQ(nullptr, b.get<Int2>());
+ EXPECT_EQ(nullptr, c.get<Float>());
+ EXPECT_EQ(nullptr, c.get<Int1>());
+ EXPECT_EQ(&i2, c.get<Int2>());
+
+ // Note that we can use .get even on a null sum type. It just always produces
+ // a null pointer, even if one of the discriminants is null.
+ EXPECT_EQ(nullptr, n.get<Float>());
+ EXPECT_EQ(nullptr, n.get<Int1>());
+ EXPECT_EQ(nullptr, n.get<Int2>());
+}
+
+TEST_F(PointerSumTypeTest, Cast) {
+ EXPECT_EQ(&f, a.cast<Float>());
+ EXPECT_EQ(&i1, b.cast<Int1>());
+ EXPECT_EQ(&i2, c.cast<Int2>());
+}
+
+TEST_F(PointerSumTypeTest, Assignment) {
+ b = SumType::create<Int2>(&i2);
+ EXPECT_EQ(nullptr, b.get<Float>());
+ EXPECT_EQ(nullptr, b.get<Int1>());
+ EXPECT_EQ(&i2, b.get<Int2>());
+
+ b = SumType::create<Int2>(&i1);
+ EXPECT_EQ(nullptr, b.get<Float>());
+ EXPECT_EQ(nullptr, b.get<Int1>());
+ EXPECT_EQ(&i1, b.get<Int2>());
+
+ float Local = 1.616f;
+ b = SumType::create<Float>(&Local);
+ EXPECT_EQ(&Local, b.get<Float>());
+ EXPECT_EQ(nullptr, b.get<Int1>());
+ EXPECT_EQ(nullptr, b.get<Int2>());
+
+ n = SumType::create<Int1>(&i2);
+ EXPECT_TRUE(n);
+ EXPECT_EQ(nullptr, n.get<Float>());
+ EXPECT_EQ(&i2, n.get<Int1>());
+ EXPECT_EQ(nullptr, n.get<Int2>());
+
+ n = SumType::create<Float>(nullptr);
+ EXPECT_FALSE(n);
+ EXPECT_EQ(nullptr, n.get<Float>());
+ EXPECT_EQ(nullptr, n.get<Int1>());
+ EXPECT_EQ(nullptr, n.get<Int2>());
+}
+
+
+} // end anonymous namespace
diff --git a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
index 7704e5a2f79a..c8c244d22ed1 100644
--- a/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
+++ b/unittests/ExecutionEngine/MCJIT/MCJITCAPITest.cpp
@@ -88,8 +88,9 @@ public:
bool needsToReserveAllocationSpace() override { return true; }
- void reserveAllocationSpace(uintptr_t CodeSize, uintptr_t DataSizeRO,
- uintptr_t DataSizeRW) override {
+ void reserveAllocationSpace(uintptr_t CodeSize, uint32_t CodeAlign,
+ uintptr_t DataSizeRO, uint32_t RODataAlign,
+ uintptr_t DataSizeRW, uint32_t RWDataAlign) override {
ReservedCodeSize = CodeSize;
ReservedDataSizeRO = DataSizeRO;
ReservedDataSizeRW = DataSizeRW;
diff --git a/unittests/ExecutionEngine/Orc/CMakeLists.txt b/unittests/ExecutionEngine/Orc/CMakeLists.txt
index 74cc5b57015c..41fef24556b1 100644
--- a/unittests/ExecutionEngine/Orc/CMakeLists.txt
+++ b/unittests/ExecutionEngine/Orc/CMakeLists.txt
@@ -18,4 +18,5 @@ add_llvm_unittest(OrcJITTests
ObjectTransformLayerTest.cpp
OrcCAPITest.cpp
OrcTestCommon.cpp
+ RPCUtilsTest.cpp
)
diff --git a/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp b/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp
index a37177c5d1db..59ee01f36010 100644
--- a/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp
+++ b/unittests/ExecutionEngine/Orc/ObjectLinkingLayerTest.cpp
@@ -7,6 +7,7 @@
//
//===----------------------------------------------------------------------===//
+#include "OrcTestCommon.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/SectionMemoryManager.h"
#include "llvm/ExecutionEngine/Orc/CompileUtils.h"
@@ -21,6 +22,19 @@ using namespace llvm::orc;
namespace {
+class ObjectLinkingLayerExecutionTest : public testing::Test,
+ public OrcExecutionTest {
+};
+
+class SectionMemoryManagerWrapper : public SectionMemoryManager {
+public:
+ int FinalizationCount = 0;
+ bool finalizeMemory(std::string *ErrMsg = 0) override {
+ ++FinalizationCount;
+ return SectionMemoryManager::finalizeMemory(ErrMsg);
+ }
+};
+
TEST(ObjectLinkingLayerTest, TestSetProcessAllSections) {
class SectionMemoryManagerWrapper : public SectionMemoryManager {
@@ -91,4 +105,72 @@ TEST(ObjectLinkingLayerTest, TestSetProcessAllSections) {
}
}
+
+TEST_F(ObjectLinkingLayerExecutionTest, NoDuplicateFinalization) {
+
+ if (!TM)
+ return;
+
+ ObjectLinkingLayer<> ObjLayer;
+ SimpleCompiler Compile(*TM);
+
+ // Create a pair of modules that will trigger recursive finalization:
+ // Module 1:
+ // int bar() { return 42; }
+ // Module 2:
+ // int bar();
+ // int foo() { return bar(); }
+
+ ModuleBuilder MB1(getGlobalContext(), "", "dummy");
+ {
+ MB1.getModule()->setDataLayout(TM->createDataLayout());
+ Function *BarImpl = MB1.createFunctionDecl<int32_t(void)>("bar");
+ BasicBlock *BarEntry = BasicBlock::Create(getGlobalContext(), "entry",
+ BarImpl);
+ IRBuilder<> Builder(BarEntry);
+ IntegerType *Int32Ty = IntegerType::get(getGlobalContext(), 32);
+ Value *FourtyTwo = ConstantInt::getSigned(Int32Ty, 42);
+ Builder.CreateRet(FourtyTwo);
+ }
+
+ auto Obj1 = Compile(*MB1.getModule());
+ std::vector<object::ObjectFile*> Obj1Set;
+ Obj1Set.push_back(Obj1.getBinary());
+
+ ModuleBuilder MB2(getGlobalContext(), "", "dummy");
+ {
+ MB2.getModule()->setDataLayout(TM->createDataLayout());
+ Function *BarDecl = MB2.createFunctionDecl<int32_t(void)>("bar");
+ Function *FooImpl = MB2.createFunctionDecl<int32_t(void)>("foo");
+ BasicBlock *FooEntry = BasicBlock::Create(getGlobalContext(), "entry",
+ FooImpl);
+ IRBuilder<> Builder(FooEntry);
+ Builder.CreateRet(Builder.CreateCall(BarDecl));
+ }
+ auto Obj2 = Compile(*MB2.getModule());
+ std::vector<object::ObjectFile*> Obj2Set;
+ Obj2Set.push_back(Obj2.getBinary());
+
+ auto Resolver =
+ createLambdaResolver(
+ [&](const std::string &Name) {
+ if (auto Sym = ObjLayer.findSymbol(Name, true))
+ return RuntimeDyld::SymbolInfo(Sym.getAddress(), Sym.getFlags());
+ return RuntimeDyld::SymbolInfo(nullptr);
+ },
+ [](const std::string &Name) {
+ return RuntimeDyld::SymbolInfo(nullptr);
+ });
+
+ SectionMemoryManagerWrapper SMMW;
+ ObjLayer.addObjectSet(std::move(Obj1Set), &SMMW, &*Resolver);
+ auto H = ObjLayer.addObjectSet(std::move(Obj2Set), &SMMW, &*Resolver);
+ ObjLayer.emitAndFinalize(H);
+
+ // Finalization of module 2 should trigger finalization of module 1.
+ // Verify that finalize on SMMW is only called once.
+ EXPECT_EQ(SMMW.FinalizationCount, 1)
+ << "Extra call to finalize";
+}
+
}
diff --git a/unittests/ExecutionEngine/Orc/OrcTestCommon.cpp b/unittests/ExecutionEngine/Orc/OrcTestCommon.cpp
index 1b5485d3b33b..17d1e9c9276e 100644
--- a/unittests/ExecutionEngine/Orc/OrcTestCommon.cpp
+++ b/unittests/ExecutionEngine/Orc/OrcTestCommon.cpp
@@ -19,7 +19,7 @@ bool OrcExecutionTest::NativeTargetInitialized = false;
ModuleBuilder::ModuleBuilder(LLVMContext &Context, StringRef Triple,
StringRef Name)
- : M(new Module(Name, Context)),
- Builder(Context) {
- M->setTargetTriple(Triple);
+ : M(new Module(Name, Context)) {
+ if (Triple != "")
+ M->setTargetTriple(Triple);
}
diff --git a/unittests/ExecutionEngine/Orc/OrcTestCommon.h b/unittests/ExecutionEngine/Orc/OrcTestCommon.h
index 15d9f54a37fc..f480e0789ae5 100644
--- a/unittests/ExecutionEngine/Orc/OrcTestCommon.h
+++ b/unittests/ExecutionEngine/Orc/OrcTestCommon.h
@@ -20,6 +20,7 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/TypeBuilder.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/ExecutionEngine/ExecutionEngine.h"
#include "llvm/ExecutionEngine/Orc/JITSymbol.h"
#include "llvm/Support/TargetSelect.h"
@@ -74,7 +75,6 @@ public:
private:
std::unique_ptr<Module> M;
- IRBuilder<> Builder;
};
// Dummy struct type.
diff --git a/unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp b/unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp
new file mode 100644
index 000000000000..8215144a514d
--- /dev/null
+++ b/unittests/ExecutionEngine/Orc/RPCUtilsTest.cpp
@@ -0,0 +1,147 @@
+//===----------- RPCUtilsTest.cpp - Unit tests the Orc RPC utils ----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/RPCChannel.h"
+#include "llvm/ExecutionEngine/Orc/RPCUtils.h"
+#include "gtest/gtest.h"
+
+#include <queue>
+
+using namespace llvm;
+using namespace llvm::orc;
+using namespace llvm::orc::remote;
+
+class QueueChannel : public RPCChannel {
+public:
+ QueueChannel(std::queue<char> &Queue) : Queue(Queue) {}
+
+ std::error_code readBytes(char *Dst, unsigned Size) override {
+ while (Size--) {
+ *Dst++ = Queue.front();
+ Queue.pop();
+ }
+ return std::error_code();
+ }
+
+ std::error_code appendBytes(const char *Src, unsigned Size) override {
+ while (Size--)
+ Queue.push(*Src++);
+ return std::error_code();
+ }
+
+ std::error_code send() override { return std::error_code(); }
+
+private:
+ std::queue<char> &Queue;
+};
+
+class DummyRPC : public testing::Test,
+ public RPC<QueueChannel> {
+public:
+ typedef Procedure<1, bool> Proc1;
+ typedef Procedure<2, int8_t,
+ uint8_t,
+ int16_t,
+ uint16_t,
+ int32_t,
+ uint32_t,
+ int64_t,
+ uint64_t,
+ bool,
+ std::string,
+ std::vector<int>> AllTheTypes;
+};
+
+
+TEST_F(DummyRPC, TestBasic) {
+ std::queue<char> Queue;
+ QueueChannel C(Queue);
+
+ {
+ // Make a call to Proc1.
+ auto EC = call<Proc1>(C, true);
+ EXPECT_FALSE(EC) << "Simple call over queue failed";
+ }
+
+ {
+ // Expect a call to Proc1.
+ auto EC = expect<Proc1>(C,
+ [&](bool &B) {
+ EXPECT_EQ(B, true)
+ << "Bool serialization broken";
+ return std::error_code();
+ });
+ EXPECT_FALSE(EC) << "Simple expect over queue failed";
+ }
+}
+
+TEST_F(DummyRPC, TestSerialization) {
+ std::queue<char> Queue;
+ QueueChannel C(Queue);
+
+ {
+ // Make a call to Proc1.
+ std::vector<int> v({42, 7});
+ auto EC = call<AllTheTypes>(C,
+ -101,
+ 250,
+ -10000,
+ 10000,
+ -1000000000,
+ 1000000000,
+ -10000000000,
+ 10000000000,
+ true,
+ "foo",
+ v);
+ EXPECT_FALSE(EC) << "Big (serialization test) call over queue failed";
+ }
+
+ {
+ // Expect a call to Proc1.
+ auto EC = expect<AllTheTypes>(C,
+ [&](int8_t &s8,
+ uint8_t &u8,
+ int16_t &s16,
+ uint16_t &u16,
+ int32_t &s32,
+ uint32_t &u32,
+ int64_t &s64,
+ uint64_t &u64,
+ bool &b,
+ std::string &s,
+ std::vector<int> &v) {
+
+ EXPECT_EQ(s8, -101)
+ << "int8_t serialization broken";
+ EXPECT_EQ(u8, 250)
+ << "uint8_t serialization broken";
+ EXPECT_EQ(s16, -10000)
+ << "int16_t serialization broken";
+ EXPECT_EQ(u16, 10000)
+ << "uint16_t serialization broken";
+ EXPECT_EQ(s32, -1000000000)
+ << "int32_t serialization broken";
+ EXPECT_EQ(u32, 1000000000ULL)
+ << "uint32_t serialization broken";
+ EXPECT_EQ(s64, -10000000000)
+ << "int64_t serialization broken";
+ EXPECT_EQ(u64, 10000000000ULL)
+ << "uint64_t serialization broken";
+ EXPECT_EQ(b, true)
+ << "bool serialization broken";
+ EXPECT_EQ(s, "foo")
+ << "std::string serialization broken";
+ EXPECT_EQ(v, std::vector<int>({42, 7}))
+ << "std::vector serialization broken";
+ return std::error_code();
+ });
+ EXPECT_FALSE(EC) << "Big (serialization test) call over queue failed";
+ }
+}
diff --git a/unittests/IR/AsmWriterTest.cpp b/unittests/IR/AsmWriterTest.cpp
new file mode 100644
index 000000000000..c7e7bb5c9f0f
--- /dev/null
+++ b/unittests/IR/AsmWriterTest.cpp
@@ -0,0 +1,37 @@
+//===- llvm/unittest/IR/AsmWriter.cpp - AsmWriter tests -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Module.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(AsmWriterTest, DebugPrintDetachedInstruction) {
+
+ // PR24852: Ensure that an instruction can be printed even when it
+ // has metadata attached but no parent.
+ LLVMContext Ctx;
+ auto Ty = Type::getInt32Ty(Ctx);
+ auto Undef = UndefValue::get(Ty);
+ std::unique_ptr<BinaryOperator> Add(BinaryOperator::CreateAdd(Undef, Undef));
+ Add->setMetadata(
+ "", MDNode::get(Ctx, {ConstantAsMetadata::get(ConstantInt::get(Ty, 1))}));
+ std::string S;
+ raw_string_ostream OS(S);
+ Add->print(OS);
+ std::size_t r = OS.str().find("<badref> = add i32 undef, undef, !<empty");
+ EXPECT_TRUE(r != std::string::npos);
+}
+
+}
diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt
index e5a0fc981075..5aad8edc9134 100644
--- a/unittests/IR/CMakeLists.txt
+++ b/unittests/IR/CMakeLists.txt
@@ -6,6 +6,7 @@ set(LLVM_LINK_COMPONENTS
)
set(IRSources
+ AsmWriterTest.cpp
AttributesTest.cpp
ConstantRangeTest.cpp
ConstantsTest.cpp
diff --git a/unittests/IR/IRBuilderTest.cpp b/unittests/IR/IRBuilderTest.cpp
index 82565ccaebcf..bd0eae0399a4 100644
--- a/unittests/IR/IRBuilderTest.cpp
+++ b/unittests/IR/IRBuilderTest.cpp
@@ -142,13 +142,13 @@ TEST_F(IRBuilderTest, FastMathFlags) {
EXPECT_FALSE(FAdd->hasNoNaNs());
FastMathFlags FMF;
- Builder.SetFastMathFlags(FMF);
+ Builder.setFastMathFlags(FMF);
F = Builder.CreateFAdd(F, F);
EXPECT_FALSE(Builder.getFastMathFlags().any());
FMF.setUnsafeAlgebra();
- Builder.SetFastMathFlags(FMF);
+ Builder.setFastMathFlags(FMF);
F = Builder.CreateFAdd(F, F);
EXPECT_TRUE(Builder.getFastMathFlags().any());
@@ -179,7 +179,7 @@ TEST_F(IRBuilderTest, FastMathFlags) {
FMF.clear();
FMF.setAllowReciprocal();
- Builder.SetFastMathFlags(FMF);
+ Builder.setFastMathFlags(FMF);
F = Builder.CreateFDiv(F, F);
EXPECT_TRUE(Builder.getFastMathFlags().any());
@@ -197,7 +197,7 @@ TEST_F(IRBuilderTest, FastMathFlags) {
FMF.clear();
FMF.setAllowReciprocal();
- Builder.SetFastMathFlags(FMF);
+ Builder.setFastMathFlags(FMF);
FC = Builder.CreateFCmpOEQ(F, F);
EXPECT_TRUE(Builder.getFastMathFlags().any());
@@ -224,7 +224,7 @@ TEST_F(IRBuilderTest, FastMathFlags) {
FMF.clear();
FMF.setNoNaNs();
- Builder.SetFastMathFlags(FMF);
+ Builder.setFastMathFlags(FMF);
FCall = Builder.CreateCall(Callee, None);
EXPECT_TRUE(Builder.getFastMathFlags().any());
@@ -309,14 +309,14 @@ TEST_F(IRBuilderTest, RAIIHelpersTest) {
MDNode *FPMathA = MDB.createFPMath(0.01f);
MDNode *FPMathB = MDB.createFPMath(0.1f);
- Builder.SetDefaultFPMathTag(FPMathA);
+ Builder.setDefaultFPMathTag(FPMathA);
{
IRBuilder<>::FastMathFlagGuard Guard(Builder);
FastMathFlags FMF;
FMF.setAllowReciprocal();
- Builder.SetFastMathFlags(FMF);
- Builder.SetDefaultFPMathTag(FPMathB);
+ Builder.setFastMathFlags(FMF);
+ Builder.setDefaultFPMathTag(FPMathB);
EXPECT_TRUE(Builder.getFastMathFlags().allowReciprocal());
EXPECT_EQ(FPMathB, Builder.getDefaultFPMathTag());
}
diff --git a/unittests/ProfileData/InstrProfTest.cpp b/unittests/ProfileData/InstrProfTest.cpp
index 1ccc3ca5d695..51f52f2a0771 100644
--- a/unittests/ProfileData/InstrProfTest.cpp
+++ b/unittests/ProfileData/InstrProfTest.cpp
@@ -159,11 +159,57 @@ TEST_F(InstrProfTest, get_icall_data_read_write) {
std::unique_ptr<InstrProfValueData[]> VD =
R.get().getValueForSite(IPVK_IndirectCallTarget, 0);
- // Now sort the target acording to frequency.
- std::sort(&VD[0], &VD[3],
- [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
- return VD1.Count > VD2.Count;
- });
+
+ ASSERT_EQ(3U, VD[0].Count);
+ ASSERT_EQ(2U, VD[1].Count);
+ ASSERT_EQ(1U, VD[2].Count);
+
+ ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
+ ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
+ ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
+}
+
+TEST_F(InstrProfTest, get_icall_data_read_write_with_weight) {
+ InstrProfRecord Record1("caller", 0x1234, {1, 2});
+ InstrProfRecord Record2("callee1", 0x1235, {3, 4});
+ InstrProfRecord Record3("callee2", 0x1235, {3, 4});
+ InstrProfRecord Record4("callee3", 0x1235, {3, 4});
+
+ // 4 value sites.
+ Record1.reserveSites(IPVK_IndirectCallTarget, 4);
+ InstrProfValueData VD0[] = {{(uint64_t) "callee1", 1},
+ {(uint64_t) "callee2", 2},
+ {(uint64_t) "callee3", 3}};
+ Record1.addValueData(IPVK_IndirectCallTarget, 0, VD0, 3, nullptr);
+ // No value profile data at the second site.
+ Record1.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+ InstrProfValueData VD2[] = {{(uint64_t) "callee1", 1},
+ {(uint64_t) "callee2", 2}};
+ Record1.addValueData(IPVK_IndirectCallTarget, 2, VD2, 2, nullptr);
+ InstrProfValueData VD3[] = {{(uint64_t) "callee1", 1}};
+ Record1.addValueData(IPVK_IndirectCallTarget, 3, VD3, 1, nullptr);
+
+ Writer.addRecord(std::move(Record1), 10);
+ Writer.addRecord(std::move(Record2));
+ Writer.addRecord(std::move(Record3));
+ Writer.addRecord(std::move(Record4));
+ auto Profile = Writer.writeBuffer();
+ readProfile(std::move(Profile));
+
+ ErrorOr<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
+ ASSERT_TRUE(NoError(R.getError()));
+ ASSERT_EQ(4U, R.get().getNumValueSites(IPVK_IndirectCallTarget));
+ ASSERT_EQ(3U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
+ ASSERT_EQ(0U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 1));
+ ASSERT_EQ(2U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 2));
+ ASSERT_EQ(1U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 3));
+
+ std::unique_ptr<InstrProfValueData[]> VD =
+ R.get().getValueForSite(IPVK_IndirectCallTarget, 0);
+ ASSERT_EQ(30U, VD[0].Count);
+ ASSERT_EQ(20U, VD[1].Count);
+ ASSERT_EQ(10U, VD[2].Count);
+
ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
@@ -213,11 +259,6 @@ TEST_F(InstrProfTest, get_icall_data_read_write_big_endian) {
std::unique_ptr<InstrProfValueData[]> VD =
R.get().getValueForSite(IPVK_IndirectCallTarget, 0);
- // Now sort the target acording to frequency.
- std::sort(&VD[0], &VD[3],
- [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
- return VD1.Count > VD2.Count;
- });
ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee3"));
ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee2"));
ASSERT_EQ(StringRef((const char *)VD[2].Value, 7), StringRef("callee1"));
@@ -249,12 +290,11 @@ TEST_F(InstrProfTest, get_icall_data_merge1) {
{uint64_t(callee4), 4}};
Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 4, nullptr);
- // No valeu profile data at the second site.
+ // No value profile data at the second site.
Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
- InstrProfValueData VD2[] = {{uint64_t(callee1), 1},
- {uint64_t(callee2), 2},
- {uint64_t(callee3), 3}};
+ InstrProfValueData VD2[] = {
+ {uint64_t(callee1), 1}, {uint64_t(callee2), 2}, {uint64_t(callee3), 3}};
Record11.addValueData(IPVK_IndirectCallTarget, 2, VD2, 3, nullptr);
InstrProfValueData VD3[] = {{uint64_t(callee1), 1}};
@@ -267,16 +307,14 @@ TEST_F(InstrProfTest, get_icall_data_merge1) {
// A differnt record for the same caller.
Record12.reserveSites(IPVK_IndirectCallTarget, 5);
- InstrProfValueData VD02[] = {{uint64_t(callee2), 5},
- {uint64_t(callee3), 3}};
+ InstrProfValueData VD02[] = {{uint64_t(callee2), 5}, {uint64_t(callee3), 3}};
Record12.addValueData(IPVK_IndirectCallTarget, 0, VD02, 2, nullptr);
- // No valeu profile data at the second site.
+ // No value profile data at the second site.
Record12.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
- InstrProfValueData VD22[] = {{uint64_t(callee2), 1},
- {uint64_t(callee3), 3},
- {uint64_t(callee4), 4}};
+ InstrProfValueData VD22[] = {
+ {uint64_t(callee2), 1}, {uint64_t(callee3), 3}, {uint64_t(callee4), 4}};
Record12.addValueData(IPVK_IndirectCallTarget, 2, VD22, 3, nullptr);
Record12.addValueData(IPVK_IndirectCallTarget, 3, nullptr, 0, nullptr);
@@ -309,11 +347,6 @@ TEST_F(InstrProfTest, get_icall_data_merge1) {
std::unique_ptr<InstrProfValueData[]> VD =
R.get().getValueForSite(IPVK_IndirectCallTarget, 0);
- // Now sort the target acording to frequency.
- std::sort(&VD[0], &VD[4],
- [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
- return VD1.Count > VD2.Count;
- });
ASSERT_EQ(StringRef((const char *)VD[0].Value, 7), StringRef("callee2"));
ASSERT_EQ(7U, VD[0].Count);
ASSERT_EQ(StringRef((const char *)VD[1].Value, 7), StringRef("callee3"));
@@ -325,10 +358,6 @@ TEST_F(InstrProfTest, get_icall_data_merge1) {
std::unique_ptr<InstrProfValueData[]> VD_2(
R.get().getValueForSite(IPVK_IndirectCallTarget, 2));
- std::sort(&VD_2[0], &VD_2[4],
- [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
- return VD1.Count > VD2.Count;
- });
ASSERT_EQ(StringRef((const char *)VD_2[0].Value, 7), StringRef("callee3"));
ASSERT_EQ(6U, VD_2[0].Count);
ASSERT_EQ(StringRef((const char *)VD_2[1].Value, 7), StringRef("callee4"));
@@ -345,10 +374,6 @@ TEST_F(InstrProfTest, get_icall_data_merge1) {
std::unique_ptr<InstrProfValueData[]> VD_4(
R.get().getValueForSite(IPVK_IndirectCallTarget, 4));
- std::sort(&VD_4[0], &VD_4[3],
- [](const InstrProfValueData &VD1, const InstrProfValueData &VD2) {
- return VD1.Count > VD2.Count;
- });
ASSERT_EQ(StringRef((const char *)VD_4[0].Value, 7), StringRef("callee3"));
ASSERT_EQ(6U, VD_4[0].Count);
ASSERT_EQ(StringRef((const char *)VD_4[1].Value, 7), StringRef("callee2"));
@@ -408,6 +433,55 @@ TEST_F(InstrProfTest, get_icall_data_merge1_saturation) {
ASSERT_EQ(Max, VD[0].Count);
}
+// This test tests that when there are too many values
+// for a given site, the merged results are properly
+// truncated.
+TEST_F(InstrProfTest, get_icall_data_merge_site_trunc) {
+ static const char caller[] = "caller";
+
+ InstrProfRecord Record11(caller, 0x1234, {1, 2});
+ InstrProfRecord Record12(caller, 0x1234, {1, 2});
+
+ // 2 value sites.
+ Record11.reserveSites(IPVK_IndirectCallTarget, 2);
+ InstrProfValueData VD0[255];
+ for (int I = 0; I < 255; I++) {
+ VD0[I].Value = 2 * I;
+ VD0[I].Count = 2 * I + 1000;
+ }
+
+ Record11.addValueData(IPVK_IndirectCallTarget, 0, VD0, 255, nullptr);
+ Record11.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+
+ Record12.reserveSites(IPVK_IndirectCallTarget, 2);
+ InstrProfValueData VD1[255];
+ for (int I = 0; I < 255; I++) {
+ VD1[I].Value = 2 * I + 1;
+ VD1[I].Count = 2 * I + 1001;
+ }
+
+ Record12.addValueData(IPVK_IndirectCallTarget, 0, VD1, 255, nullptr);
+ Record12.addValueData(IPVK_IndirectCallTarget, 1, nullptr, 0, nullptr);
+
+ Writer.addRecord(std::move(Record11));
+ // Merge profile data.
+ Writer.addRecord(std::move(Record12));
+
+ auto Profile = Writer.writeBuffer();
+ readProfile(std::move(Profile));
+
+ ErrorOr<InstrProfRecord> R = Reader->getInstrProfRecord("caller", 0x1234);
+ ASSERT_TRUE(NoError(R.getError()));
+ std::unique_ptr<InstrProfValueData[]> VD(
+ R.get().getValueForSite(IPVK_IndirectCallTarget, 0));
+ ASSERT_EQ(2U, R.get().getNumValueSites(IPVK_IndirectCallTarget));
+ ASSERT_EQ(255U, R.get().getNumValueDataForSite(IPVK_IndirectCallTarget, 0));
+ for (unsigned I = 0; I < 255; I++) {
+ ASSERT_EQ(VD[I].Value, 509 - I);
+ ASSERT_EQ(VD[I].Count, 1509 - I);
+ }
+}
+
// Synthesize runtime value profile data.
ValueProfNode Site1Values[5] = {{{uint64_t("callee1"), 400}, &Site1Values[1]},
{{uint64_t("callee2"), 1000}, &Site1Values[2]},
diff --git a/unittests/Support/MathExtrasTest.cpp b/unittests/Support/MathExtrasTest.cpp
index 945d8322b259..97309f8d31f5 100644
--- a/unittests/Support/MathExtrasTest.cpp
+++ b/unittests/Support/MathExtrasTest.cpp
@@ -304,4 +304,58 @@ TEST(MathExtras, SaturatingMultiply) {
SaturatingMultiplyTestHelper<uint64_t>();
}
+template<typename T>
+void SaturatingMultiplyAddTestHelper()
+{
+ const T Max = std::numeric_limits<T>::max();
+ bool ResultOverflowed;
+
+ // Test basic multiply-add.
+ EXPECT_EQ(T(16), SaturatingMultiplyAdd(T(2), T(3), T(10)));
+ EXPECT_EQ(T(16), SaturatingMultiplyAdd(T(2), T(3), T(10), &ResultOverflowed));
+ EXPECT_FALSE(ResultOverflowed);
+
+ // Test multiply overflows, add doesn't overflow
+ EXPECT_EQ(Max, SaturatingMultiplyAdd(Max, Max, T(0), &ResultOverflowed));
+ EXPECT_TRUE(ResultOverflowed);
+
+ // Test multiply doesn't overflow, add overflows
+ EXPECT_EQ(Max, SaturatingMultiplyAdd(T(1), T(1), Max, &ResultOverflowed));
+ EXPECT_TRUE(ResultOverflowed);
+
+ // Test multiply-add with Max as operand
+ EXPECT_EQ(Max, SaturatingMultiplyAdd(T(1), T(1), Max, &ResultOverflowed));
+ EXPECT_TRUE(ResultOverflowed);
+
+ EXPECT_EQ(Max, SaturatingMultiplyAdd(T(1), Max, T(1), &ResultOverflowed));
+ EXPECT_TRUE(ResultOverflowed);
+
+ EXPECT_EQ(Max, SaturatingMultiplyAdd(Max, Max, T(1), &ResultOverflowed));
+ EXPECT_TRUE(ResultOverflowed);
+
+ EXPECT_EQ(Max, SaturatingMultiplyAdd(Max, Max, Max, &ResultOverflowed));
+ EXPECT_TRUE(ResultOverflowed);
+
+ // Test multiply-add with 0 as operand
+ EXPECT_EQ(T(1), SaturatingMultiplyAdd(T(1), T(1), T(0), &ResultOverflowed));
+ EXPECT_FALSE(ResultOverflowed);
+
+ EXPECT_EQ(T(1), SaturatingMultiplyAdd(T(1), T(0), T(1), &ResultOverflowed));
+ EXPECT_FALSE(ResultOverflowed);
+
+ EXPECT_EQ(T(1), SaturatingMultiplyAdd(T(0), T(0), T(1), &ResultOverflowed));
+ EXPECT_FALSE(ResultOverflowed);
+
+ EXPECT_EQ(T(0), SaturatingMultiplyAdd(T(0), T(0), T(0), &ResultOverflowed));
+ EXPECT_FALSE(ResultOverflowed);
+
+}
+
+TEST(MathExtras, SaturatingMultiplyAdd) {
+ SaturatingMultiplyAddTestHelper<uint8_t>();
+ SaturatingMultiplyAddTestHelper<uint16_t>();
+ SaturatingMultiplyAddTestHelper<uint32_t>();
+ SaturatingMultiplyAddTestHelper<uint64_t>();
+}
+
}
diff --git a/utils/KillTheDoctor/KillTheDoctor.cpp b/utils/KillTheDoctor/KillTheDoctor.cpp
index fae3b1a8b9d8..6c2242aafdfe 100644
--- a/utils/KillTheDoctor/KillTheDoctor.cpp
+++ b/utils/KillTheDoctor/KillTheDoctor.cpp
@@ -328,11 +328,9 @@ int main(int argc, char **argv) {
if (TraceExecution)
errs() << ToolName << ": Found Program: " << ProgramToRun << '\n';
- for (std::vector<std::string>::iterator i = Argv.begin(),
- e = Argv.end();
- i != e; ++i) {
+ for (const std::string &Arg : Argv) {
CommandLine.push_back(' ');
- CommandLine.append(*i);
+ CommandLine.append(Arg);
}
if (TraceExecution)
diff --git a/utils/TableGen/AsmWriterEmitter.cpp b/utils/TableGen/AsmWriterEmitter.cpp
index cc74f9ebcece..cf7cbd962865 100644
--- a/utils/TableGen/AsmWriterEmitter.cpp
+++ b/utils/TableGen/AsmWriterEmitter.cpp
@@ -65,7 +65,7 @@ private:
static void PrintCases(std::vector<std::pair<std::string,
AsmWriterOperand> > &OpsToPrint, raw_ostream &O) {
- O << " case " << OpsToPrint.back().first << ": ";
+ O << " case " << OpsToPrint.back().first << ":";
AsmWriterOperand TheOp = OpsToPrint.back().second;
OpsToPrint.pop_back();
@@ -73,13 +73,13 @@ static void PrintCases(std::vector<std::pair<std::string,
// emit a case label for them.
for (unsigned i = OpsToPrint.size(); i != 0; --i)
if (OpsToPrint[i-1].second == TheOp) {
- O << "\n case " << OpsToPrint[i-1].first << ": ";
+ O << "\n case " << OpsToPrint[i-1].first << ":";
OpsToPrint.erase(OpsToPrint.begin()+i-1);
}
// Finally, emit the code.
- O << TheOp.getCode();
- O << "break;\n";
+ O << "\n " << TheOp.getCode();
+ O << "\n break;\n";
}
@@ -109,9 +109,9 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
O << " case " << FirstInst.CGI->Namespace << "::"
<< FirstInst.CGI->TheDef->getName() << ":\n";
- for (unsigned i = 0, e = SimilarInsts.size(); i != e; ++i)
- O << " case " << SimilarInsts[i].CGI->Namespace << "::"
- << SimilarInsts[i].CGI->TheDef->getName() << ":\n";
+ for (const AsmWriterInst &AWI : SimilarInsts)
+ O << " case " << AWI.CGI->Namespace << "::"
+ << AWI.CGI->TheDef->getName() << ":\n";
for (unsigned i = 0, e = FirstInst.Operands.size(); i != e; ++i) {
if (i != DifferingOperand) {
// If the operand is the same for all instructions, just print it.
@@ -120,13 +120,13 @@ static void EmitInstructions(std::vector<AsmWriterInst> &Insts,
// If this is the operand that varies between all of the instructions,
// emit a switch for just this operand now.
O << " switch (MI->getOpcode()) {\n";
+ O << " default: llvm_unreachable(\"Unexpected opcode.\");\n";
std::vector<std::pair<std::string, AsmWriterOperand> > OpsToPrint;
OpsToPrint.push_back(std::make_pair(FirstInst.CGI->Namespace + "::" +
FirstInst.CGI->TheDef->getName(),
FirstInst.Operands[i]));
- for (unsigned si = 0, e = SimilarInsts.size(); si != e; ++si) {
- AsmWriterInst &AWI = SimilarInsts[si];
+ for (const AsmWriterInst &AWI : SimilarInsts) {
OpsToPrint.push_back(std::make_pair(AWI.CGI->Namespace+"::"+
AWI.CGI->TheDef->getName(),
AWI.Operands[i]));
@@ -159,11 +159,10 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
if (!Inst)
continue; // PHI, INLINEASM, CFI_INSTRUCTION, etc.
- std::string Command;
if (Inst->Operands.empty())
continue; // Instruction already done.
- Command = " " + Inst->Operands[0].getCode() + "\n";
+ std::string Command = " " + Inst->Operands[0].getCode() + "\n";
// Check to see if we already have 'Command' in UniqueOperandCommands.
// If not, add it.
@@ -178,7 +177,7 @@ FindUniqueOperandCommands(std::vector<std::string> &UniqueOperandCommands,
}
if (!FoundIt) {
InstIdxs[i] = UniqueOperandCommands.size();
- UniqueOperandCommands.push_back(Command);
+ UniqueOperandCommands.push_back(std::move(Command));
InstrsForCase.push_back(Inst->CGI->TheDef->getName());
// This command matches one operand so far.
@@ -293,14 +292,13 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
/// OpcodeInfo - This encodes the index of the string to use for the first
/// chunk of the output as well as indices used for operand printing.
- /// To reduce the number of unhandled cases, we expand the size from 32-bit
- /// to 32+16 = 48-bit.
std::vector<uint64_t> OpcodeInfo;
+ const unsigned OpcodeInfoBits = 64;
// Add all strings to the string table upfront so it can generate an optimized
// representation.
- for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) {
- AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions->at(i)];
+ for (const CodeGenInstruction *Inst : *NumberedInstructions) {
+ AsmWriterInst *AWI = CGIAWIMap[Inst];
if (AWI &&
AWI->Operands[0].OperandType ==
AsmWriterOperand::isLiteralTextOperand &&
@@ -314,8 +312,8 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
StringTable.layout();
unsigned MaxStringIdx = 0;
- for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) {
- AsmWriterInst *AWI = CGIAWIMap[NumberedInstructions->at(i)];
+ for (const CodeGenInstruction *Inst : *NumberedInstructions) {
+ AsmWriterInst *AWI = CGIAWIMap[Inst];
unsigned Idx;
if (!AWI) {
// Something not handled by the asmwriter printer.
@@ -344,7 +342,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
// To reduce code size, we compactify common instructions into a few bits
// in the opcode-indexed table.
- unsigned BitsLeft = 64-AsmStrBits;
+ unsigned BitsLeft = OpcodeInfoBits-AsmStrBits;
std::vector<std::vector<std::string>> TableDrivenOperandPrinters;
@@ -372,7 +370,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
// Otherwise, we can include this in the initial lookup table. Add it in.
for (unsigned i = 0, e = InstIdxs.size(); i != e; ++i)
if (InstIdxs[i] != ~0U) {
- OpcodeInfo[i] |= (uint64_t)InstIdxs[i] << (64-BitsLeft);
+ OpcodeInfo[i] |= (uint64_t)InstIdxs[i] << (OpcodeInfoBits-BitsLeft);
}
BitsLeft -= NumBits;
@@ -392,56 +390,55 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
TableDrivenOperandPrinters.push_back(std::move(UniqueOperandCommands));
}
-
- // We always emit at least one 32-bit table. A second table is emitted if
- // more bits are needed.
- O<<" static const uint32_t OpInfo[] = {\n";
- for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) {
- O << " " << (OpcodeInfo[i] & 0xffffffff) << "U,\t// "
- << NumberedInstructions->at(i)->TheDef->getName() << "\n";
- }
- // Add a dummy entry so the array init doesn't end with a comma.
- O << " 0U\n";
+ // Emit the string table itself.
+ O << " static const char AsmStrs[] = {\n";
+ StringTable.emit(O, printChar);
O << " };\n\n";
- if (BitsLeft < 32) {
- // Add a second OpInfo table only when it is necessary.
- // Adjust the type of the second table based on the number of bits needed.
- O << " static const uint"
- << ((BitsLeft < 16) ? "32" : (BitsLeft < 24) ? "16" : "8")
- << "_t OpInfo2[] = {\n";
+ // Emit the lookup tables in pieces to minimize wasted bytes.
+ unsigned BytesNeeded = ((OpcodeInfoBits - BitsLeft) + 7) / 8;
+ unsigned Table = 0, Shift = 0;
+ SmallString<128> BitsString;
+ raw_svector_ostream BitsOS(BitsString);
+ // If the total bits is more than 32-bits we need to use a 64-bit type.
+ BitsOS << " uint" << ((BitsLeft < (OpcodeInfoBits - 32)) ? 64 : 32)
+ << "_t Bits = 0;\n";
+ while (BytesNeeded != 0) {
+ // Figure out how big this table section needs to be, but no bigger than 4.
+ unsigned TableSize = std::min(1 << Log2_32(BytesNeeded), 4);
+ BytesNeeded -= TableSize;
+ TableSize *= 8; // Convert to bits;
+ uint64_t Mask = (1ULL << TableSize) - 1;
+ O << " static const uint" << TableSize << "_t OpInfo" << Table
+ << "[] = {\n";
for (unsigned i = 0, e = NumberedInstructions->size(); i != e; ++i) {
- O << " " << (OpcodeInfo[i] >> 32) << "U,\t// "
+ O << " " << ((OpcodeInfo[i] >> Shift) & Mask) << "U,\t// "
<< NumberedInstructions->at(i)->TheDef->getName() << "\n";
}
- // Add a dummy entry so the array init doesn't end with a comma.
- O << " 0U\n";
O << " };\n\n";
+ // Emit string to combine the individual table lookups.
+ BitsOS << " Bits |= ";
+ // If the total bits is more than 32-bits we need to use a 64-bit type.
+ if (BitsLeft < (OpcodeInfoBits - 32))
+ BitsOS << "(uint64_t)";
+ BitsOS << "OpInfo" << Table << "[MI->getOpcode()] << " << Shift << ";\n";
+ // Prepare the shift for the next iteration and increment the table count.
+ Shift += TableSize;
+ ++Table;
}
- // Emit the string itself.
- O << " static const char AsmStrs[] = {\n";
- StringTable.emit(O, printChar);
- O << " };\n\n";
-
+ // Emit the initial tab character.
O << " O << \"\\t\";\n\n";
O << " // Emit the opcode for the instruction.\n";
- if (BitsLeft < 32) {
- // If we have two tables then we need to perform two lookups and combine
- // the results into a single 64-bit value.
- O << " uint64_t Bits1 = OpInfo[MI->getOpcode()];\n"
- << " uint64_t Bits2 = OpInfo2[MI->getOpcode()];\n"
- << " uint64_t Bits = (Bits2 << 32) | Bits1;\n";
- } else {
- // If only one table is used we just need to perform a single lookup.
- O << " uint32_t Bits = OpInfo[MI->getOpcode()];\n";
- }
+ O << BitsString;
+
+ // Emit the starting string.
O << " assert(Bits != 0 && \"Cannot print this instruction.\");\n"
<< " O << AsmStrs+(Bits & " << (1 << AsmStrBits)-1 << ")-1;\n\n";
// Output the table driven operand information.
- BitsLeft = 64-AsmStrBits;
+ BitsLeft = OpcodeInfoBits-AsmStrBits;
for (unsigned i = 0, e = TableDrivenOperandPrinters.size(); i != e; ++i) {
std::vector<std::string> &Commands = TableDrivenOperandPrinters[i];
@@ -457,7 +454,7 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
if (Commands.size() == 2) {
// Emit two possibilitys with if/else.
O << " if ((Bits >> "
- << (64-BitsLeft) << ") & "
+ << (OpcodeInfoBits-BitsLeft) << ") & "
<< ((1 << NumBits)-1) << ") {\n"
<< Commands[1]
<< " } else {\n"
@@ -468,14 +465,14 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
O << Commands[0] << "\n\n";
} else {
O << " switch ((Bits >> "
- << (64-BitsLeft) << ") & "
+ << (OpcodeInfoBits-BitsLeft) << ") & "
<< ((1 << NumBits)-1) << ") {\n"
<< " default: llvm_unreachable(\"Invalid command number.\");\n";
// Print out all the cases.
- for (unsigned i = 0, e = Commands.size(); i != e; ++i) {
- O << " case " << i << ":\n";
- O << Commands[i];
+ for (unsigned j = 0, e = Commands.size(); j != e; ++j) {
+ O << " case " << j << ":\n";
+ O << Commands[j];
O << " break;\n";
}
O << " }\n\n";
@@ -484,14 +481,11 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
}
// Okay, delete instructions with no operand info left.
- for (unsigned i = 0, e = Instructions.size(); i != e; ++i) {
- // Entire instruction has been emitted?
- AsmWriterInst &Inst = Instructions[i];
- if (Inst.Operands.empty()) {
- Instructions.erase(Instructions.begin()+i);
- --i; --e;
- }
- }
+ auto I = std::remove_if(Instructions.begin(), Instructions.end(),
+ [](AsmWriterInst &Inst) {
+ return Inst.Operands.empty();
+ });
+ Instructions.erase(I, Instructions.end());
// Because this is a vector, we want to emit from the end. Reverse all of the
@@ -499,18 +493,18 @@ void AsmWriterEmitter::EmitPrintInstruction(raw_ostream &O) {
std::reverse(Instructions.begin(), Instructions.end());
- // Now that we've emitted all of the operand info that fit into 32 bits, emit
+ // Now that we've emitted all of the operand info that fit into 64 bits, emit
// information for those instructions that are left. This is a less dense
- // encoding, but we expect the main 32-bit table to handle the majority of
+ // encoding, but we expect the main 64-bit table to handle the majority of
// instructions.
if (!Instructions.empty()) {
// Find the opcode # of inline asm.
O << " switch (MI->getOpcode()) {\n";
+ O << " default: llvm_unreachable(\"Unexpected opcode.\");\n";
while (!Instructions.empty())
EmitInstructions(Instructions, O);
O << " }\n";
- O << " return;\n";
}
O << "}\n";
@@ -603,16 +597,16 @@ void AsmWriterEmitter::EmitGetRegisterName(raw_ostream &O) {
<< "\n";
if (hasAltNames) {
- for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i)
- emitRegisterNameString(O, AltNameIndices[i]->getName(), Registers);
+ for (const Record *R : AltNameIndices)
+ emitRegisterNameString(O, R->getName(), Registers);
} else
emitRegisterNameString(O, "", Registers);
if (hasAltNames) {
O << " switch(AltIdx) {\n"
<< " default: llvm_unreachable(\"Invalid register alt name index!\");\n";
- for (unsigned i = 0, e = AltNameIndices.size(); i < e; ++i) {
- std::string AltName(AltNameIndices[i]->getName());
+ for (const Record *R : AltNameIndices) {
+ std::string AltName(R->getName());
std::string Prefix = !Namespace.empty() ? Namespace + "::" : "";
O << " case " << Prefix << AltName << ":\n"
<< " assert(*(AsmStrs" << AltName << "+RegAsmOffset"
@@ -799,9 +793,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
typedef std::set<std::pair<CodeGenInstAlias, int>, AliasPriorityComparator>
AliasWithPriority;
std::map<std::string, AliasWithPriority> AliasMap;
- for (std::vector<Record*>::iterator
- I = AllInstAliases.begin(), E = AllInstAliases.end(); I != E; ++I) {
- const Record *R = *I;
+ for (Record *R : AllInstAliases) {
int Priority = R->getValueAsInt("EmitPriority");
if (Priority < 1)
continue; // Aliases with priority 0 are never emitted.
@@ -809,7 +801,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
const DagInit *DI = R->getValueAsDag("ResultInst");
const DefInit *Op = cast<DefInit>(DI->getOperator());
AliasMap[getQualifiedName(Op->getDef())].insert(
- std::make_pair(CodeGenInstAlias(*I, Variant, Target), Priority));
+ std::make_pair(CodeGenInstAlias(R, Variant, Target), Priority));
}
// A map of which conditions need to be met for each instruction operand
@@ -977,9 +969,7 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
CasesO.indent(2) << "case " << Entry.first << ":\n";
- for (std::vector<IAPrinter*>::iterator
- II = UniqueIAPs.begin(), IE = UniqueIAPs.end(); II != IE; ++II) {
- IAPrinter *IAP = *II;
+ for (IAPrinter *IAP : UniqueIAPs) {
CasesO.indent(4);
IAP->print(CasesO);
CasesO << '\n';
@@ -1098,10 +1088,11 @@ void AsmWriterEmitter::EmitPrintAliasInstruction(raw_ostream &O) {
AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) {
Record *AsmWriter = Target.getAsmWriter();
+ unsigned Variant = AsmWriter->getValueAsInt("Variant");
+ unsigned PassSubtarget = AsmWriter->getValueAsInt("PassSubtarget");
for (const CodeGenInstruction *I : Target.instructions())
if (!I->AsmString.empty() && I->TheDef->getName() != "PHI")
- Instructions.emplace_back(*I, AsmWriter->getValueAsInt("Variant"),
- AsmWriter->getValueAsInt("PassSubtarget"));
+ Instructions.emplace_back(*I, Variant, PassSubtarget);
// Get the instruction numbering.
NumberedInstructions = &Target.getInstructionsByEnumValue();
@@ -1109,8 +1100,8 @@ AsmWriterEmitter::AsmWriterEmitter(RecordKeeper &R) : Records(R), Target(R) {
// Compute the CodeGenInstruction -> AsmWriterInst mapping. Note that not
// all machine instructions are necessarily being printed, so there may be
// target instructions not in this map.
- for (unsigned i = 0, e = Instructions.size(); i != e; ++i)
- CGIAWIMap.insert(std::make_pair(Instructions[i].CGI, &Instructions[i]));
+ for (AsmWriterInst &AWI : Instructions)
+ CGIAWIMap.insert(std::make_pair(AWI.CGI, &AWI));
}
void AsmWriterEmitter::run(raw_ostream &O) {
diff --git a/utils/TableGen/AsmWriterInst.cpp b/utils/TableGen/AsmWriterInst.cpp
index 954188754009..5b09765a2756 100644
--- a/utils/TableGen/AsmWriterInst.cpp
+++ b/utils/TableGen/AsmWriterInst.cpp
@@ -29,8 +29,8 @@ static bool isIdentChar(char C) {
std::string AsmWriterOperand::getCode() const {
if (OperandType == isLiteralTextOperand) {
if (Str.size() == 1)
- return "O << '" + Str + "'; ";
- return "O << \"" + Str + "\"; ";
+ return "O << '" + Str + "';";
+ return "O << \"" + Str + "\";";
}
if (OperandType == isLiteralStatementOperand)
@@ -44,7 +44,7 @@ std::string AsmWriterOperand::getCode() const {
Result += ", O";
if (!MiModifier.empty())
Result += ", \"" + MiModifier + '"';
- return Result + "); ";
+ return Result + ");";
}
/// ParseAsmString - Parse the specified Instruction's AsmString into this
diff --git a/utils/lit/lit/util.py b/utils/lit/lit/util.py
index a6e8d52c0754..40a577168692 100644
--- a/utils/lit/lit/util.py
+++ b/utils/lit/lit/util.py
@@ -267,7 +267,14 @@ def killProcessAndChildren(pid):
import psutil
try:
psutilProc = psutil.Process(pid)
- for child in psutilProc.children(recursive=True):
+ # Handle the different psutil API versions
+ try:
+ # psutil >= 2.x
+ children_iterator = psutilProc.children(recursive=True)
+ except AttributeError:
+ # psutil 1.x
+ children_iterator = psutilProc.get_children(recursive=True)
+ for child in children_iterator:
try:
child.kill()
except psutil.NoSuchProcess: