aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/llvm/ADT/SmallVector.h8
-rw-r--r--include/llvm/Analysis/OrderedBasicBlock.h1
-rw-r--r--include/llvm/Analysis/RegionPass.h5
-rw-r--r--include/llvm/Analysis/ScalarEvolution.h2
-rw-r--r--include/llvm/CodeGen/MachineRegionInfo.h130
-rw-r--r--include/llvm/CodeGen/MachineRegisterInfo.h98
-rw-r--r--include/llvm/CodeGen/MachineScheduler.h11
-rw-r--r--include/llvm/CodeGen/PBQP/CostAllocator.h61
-rw-r--r--include/llvm/CodeGen/PBQP/Graph.h99
-rw-r--r--include/llvm/CodeGen/PBQP/Math.h22
-rw-r--r--include/llvm/CodeGen/PBQP/ReductionRules.h36
-rw-r--r--include/llvm/CodeGen/PBQP/Solution.h2
-rw-r--r--include/llvm/CodeGen/PBQPRAConstraint.h22
-rw-r--r--include/llvm/CodeGen/Passes.h3
-rw-r--r--include/llvm/CodeGen/RegAllocPBQP.h41
-rw-r--r--include/llvm/CodeGen/RegisterScavenging.h4
-rw-r--r--include/llvm/CodeGen/ScheduleDAGInstrs.h66
-rw-r--r--include/llvm/CodeGen/SelectionDAG.h108
-rw-r--r--include/llvm/CodeGen/SelectionDAGNodes.h50
-rw-r--r--include/llvm/CodeGen/SlotIndexes.h28
-rw-r--r--include/llvm/CodeGen/StackMaps.h31
-rw-r--r--include/llvm/CodeGen/TargetSchedule.h3
-rw-r--r--include/llvm/CodeGen/WinEHFuncInfo.h36
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeView.h8
-rw-r--r--include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h1
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h4
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h17
-rw-r--r--include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h21
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolDeserializer.h16
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolDumper.h5
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h9
-rw-r--r--include/llvm/DebugInfo/CodeView/SymbolSerializer.h7
-rw-r--r--include/llvm/DebugInfo/MSF/MappedBlockStream.h39
-rw-r--r--include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h11
-rw-r--r--include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h12
-rw-r--r--include/llvm/DebugInfo/PDB/Native/PDBStringTable.h2
-rw-r--r--include/llvm/DebugInfo/PDB/Native/TpiStream.h5
-rw-r--r--include/llvm/IR/DIBuilder.h4
-rw-r--r--include/llvm/IR/DebugLoc.h6
-rw-r--r--include/llvm/IR/ModuleSummaryIndex.h44
-rw-r--r--include/llvm/IR/ModuleSummaryIndexYAML.h23
-rw-r--r--include/llvm/IR/Statepoint.h16
-rw-r--r--include/llvm/InitializePasses.h12
-rw-r--r--include/llvm/LTO/Config.h3
-rw-r--r--include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h32
-rw-r--r--include/llvm/ObjectYAML/CodeViewYAMLSymbols.h4
-rw-r--r--include/llvm/TableGen/Record.h8
-rw-r--r--include/llvm/Transforms/IPO/FunctionImport.h12
-rw-r--r--include/llvm/Transforms/Instrumentation.h1
-rw-r--r--include/llvm/Transforms/Utils/Cloning.h4
-rw-r--r--lib/Analysis/ConstantFolding.cpp7
-rw-r--r--lib/Analysis/IndirectCallPromotionAnalysis.cpp2
-rw-r--r--lib/Analysis/InlineCost.cpp132
-rw-r--r--lib/Analysis/LazyValueInfo.cpp69
-rw-r--r--lib/Analysis/ModuleSummaryAnalysis.cpp12
-rw-r--r--lib/Analysis/OrderedBasicBlock.cpp2
-rw-r--r--lib/Analysis/RegionPass.cpp16
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp6
-rw-r--r--lib/Bitcode/Writer/BitcodeWriter.cpp113
-rw-r--r--lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp4
-rw-r--r--lib/CodeGen/CodeGen.cpp1
-rw-r--r--lib/CodeGen/GlobalMerge.cpp4
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp3
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp56
-rw-r--r--lib/CodeGen/MachineRegionInfo.cpp46
-rw-r--r--lib/CodeGen/MachineVerifier.cpp43
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp100
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp61
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp40
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp126
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp80
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp121
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp46
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp36
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp2
-rw-r--r--lib/DebugInfo/CodeView/CodeViewRecordIO.cpp14
-rw-r--r--lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp6
-rw-r--r--lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp32
-rw-r--r--lib/DebugInfo/CodeView/SymbolDumper.cpp4
-rw-r--r--lib/DebugInfo/CodeView/SymbolRecordMapping.cpp1
-rw-r--r--lib/DebugInfo/CodeView/SymbolSerializer.cpp8
-rw-r--r--lib/DebugInfo/MSF/MappedBlockStream.cpp63
-rw-r--r--lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp54
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStream.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp28
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFile.cpp9
-rw-r--r--lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp8
-rw-r--r--lib/DebugInfo/PDB/Native/PDBStringTable.cpp4
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStream.cpp6
-rw-r--r--lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp8
-rw-r--r--lib/IR/DIBuilder.cpp33
-rw-r--r--lib/IR/DebugLoc.cpp81
-rw-r--r--lib/IR/OptBisect.cpp15
-rw-r--r--lib/LTO/LTO.cpp43
-rw-r--r--lib/LTO/LTOBackend.cpp7
-rw-r--r--lib/LTO/ThinLTOCodeGenerator.cpp16
-rw-r--r--lib/MC/WasmObjectWriter.cpp906
-rw-r--r--lib/ObjectYAML/CodeViewYAMLDebugSections.cpp407
-rw-r--r--lib/ObjectYAML/CodeViewYAMLSymbols.cpp12
-rw-r--r--lib/Passes/PassBuilder.cpp6
-rw-r--r--lib/Support/Triple.cpp6
-rw-r--r--lib/Target/AArch64/AArch64PBQPRegAlloc.h7
-rw-r--r--lib/Target/AArch64/AArch64SchedFalkorDetails.td41
-rw-r--r--lib/Target/AMDGPU/AMDGPU.h2
-rw-r--r--lib/Target/AMDGPU/AMDGPU.td7
-rw-r--r--lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp10
-rw-r--r--lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp5
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.cpp1
-rw-r--r--lib/Target/AMDGPU/AMDGPUSubtarget.h14
-rw-r--r--lib/Target/AMDGPU/AMDGPUTargetMachine.cpp3
-rw-r--r--lib/Target/AMDGPU/SIFoldOperands.cpp21
-rw-r--r--lib/Target/AMDGPU/SIISelLowering.cpp2
-rw-r--r--lib/Target/AMDGPU/SIInsertWaitcnts.cpp3
-rw-r--r--lib/Target/AMDGPU/SIInsertWaits.cpp2
-rw-r--r--lib/Target/AMDGPU/SMInstructions.td1
-rw-r--r--lib/Target/AMDGPU/VOP3Instructions.td3
-rw-r--r--lib/Target/ARM/ARM.td21
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp83
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h18
-rw-r--r--lib/Target/ARM/ARMCallLowering.cpp14
-rw-r--r--lib/Target/ARM/ARMSchedule.td4
-rw-r--r--lib/Target/ARM/ARMScheduleA57.td1471
-rw-r--r--lib/Target/ARM/ARMScheduleA57WriteRes.td323
-rw-r--r--lib/Target/ARM/ARMSubtarget.h5
-rw-r--r--lib/Target/Hexagon/HexagonInstrInfo.cpp4
-rw-r--r--lib/Target/Mips/MicroMipsSizeReduction.cpp57
-rw-r--r--lib/Target/WebAssembly/known_gcc_test_failures.txt3
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp135
-rw-r--r--lib/Transforms/Coroutines/CoroSplit.cpp2
-rw-r--r--lib/Transforms/Coroutines/Coroutines.cpp12
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp4
-rw-r--r--lib/Transforms/IPO/FunctionImport.cpp107
-rw-r--r--lib/Transforms/IPO/LowerTypeTests.cpp3
-rw-r--r--lib/Transforms/IPO/PartialInlining.cpp66
-rw-r--r--lib/Transforms/IPO/PassManagerBuilder.cpp2
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp11
-rw-r--r--lib/Transforms/Instrumentation/MemorySanitizer.cpp1
-rw-r--r--lib/Transforms/Instrumentation/SanitizerCoverage.cpp181
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp1
-rw-r--r--lib/Transforms/Scalar/LowerExpectIntrinsic.cpp162
-rw-r--r--lib/Transforms/Scalar/RewriteStatepointsForGC.cpp10
-rw-r--r--lib/Transforms/Scalar/SROA.cpp7
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp71
-rw-r--r--lib/Transforms/Vectorize/LoopVectorize.cpp2
-rw-r--r--lib/Transforms/Vectorize/SLPVectorizer.cpp151
-rw-r--r--runtimes/CMakeLists.txt2
-rw-r--r--test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir9
-rw-r--r--test/CodeGen/AMDGPU/basic-branch.ll2
-rw-r--r--test/CodeGen/AMDGPU/branch-condition-and.ll3
-rw-r--r--test/CodeGen/AMDGPU/branch-relaxation.ll2
-rw-r--r--test/CodeGen/AMDGPU/commute-compares.ll8
-rw-r--r--test/CodeGen/AMDGPU/control-flow-fastregalloc.ll11
-rw-r--r--test/CodeGen/AMDGPU/indirect-addressing-si.ll9
-rw-r--r--test/CodeGen/AMDGPU/infinite-loop.ll2
-rw-r--r--test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll10
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll1
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.image.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll9
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll2
-rw-r--r--test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll4
-rw-r--r--test/CodeGen/AMDGPU/multi-divergent-exit-region.ll1
-rw-r--r--test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll15
-rw-r--r--test/CodeGen/AMDGPU/ret_jump.ll2
-rw-r--r--test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll3
-rw-r--r--test/CodeGen/AMDGPU/smrd-vccz-bug.ll2
-rw-r--r--test/CodeGen/AMDGPU/spill-m0.ll2
-rw-r--r--test/CodeGen/AMDGPU/sub.i16.ll2
-rw-r--r--test/CodeGen/AMDGPU/valu-i1.ll8
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll62
-rw-r--r--test/CodeGen/ARM/GlobalISel/arm-unsupported.ll13
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-alu.ll81
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-basic.ll53
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll37
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-ldm.ll28
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll36
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-stm.ll29
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-vfma.ll77
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll50
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-vldm.ll30
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll43
-rw-r--r--test/CodeGen/ARM/cortex-a57-misched-vstm.ll23
-rw-r--r--test/CodeGen/ARM/global-merge-external.ll1
-rw-r--r--test/CodeGen/Hexagon/newify-crash.ll44
-rw-r--r--test/CodeGen/MIR/Generic/runPass.mir1
-rw-r--r--test/CodeGen/Mips/micromips-sizereduction/micromips-lbu16-lhu16-sb16-sh16.ll40
-rw-r--r--test/CodeGen/PowerPC/scavenging.mir149
-rw-r--r--test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir267
-rw-r--r--test/CodeGen/X86/and-sink.ll133
-rw-r--r--test/CodeGen/X86/avx512-cvt.ll408
-rw-r--r--test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll32
-rw-r--r--test/CodeGen/X86/clear_upper_vector_element_bits.ll44
-rw-r--r--test/CodeGen/X86/scavenger.mir54
-rw-r--r--test/CodeGen/X86/select.ll31
-rw-r--r--test/CodeGen/X86/shrink-compare.ll129
-rw-r--r--test/CodeGen/X86/sse3.ll18
-rw-r--r--test/CodeGen/X86/stack-folding-fp-avx1.ll14
-rw-r--r--test/CodeGen/X86/statepoint-allocas.ll2
-rw-r--r--test/CodeGen/X86/statepoint-call-lowering.ll2
-rw-r--r--test/CodeGen/X86/statepoint-far-call.ll2
-rw-r--r--test/CodeGen/X86/statepoint-forward.ll2
-rw-r--r--test/CodeGen/X86/statepoint-gctransition-call-lowering.ll2
-rw-r--r--test/CodeGen/X86/statepoint-invoke.ll2
-rw-r--r--test/CodeGen/X86/statepoint-live-in.ll124
-rw-r--r--test/CodeGen/X86/statepoint-stack-usage.ll2
-rw-r--r--test/CodeGen/X86/statepoint-stackmap-format.ll4
-rw-r--r--test/CodeGen/X86/statepoint-uniqueing.ll2
-rw-r--r--test/CodeGen/X86/statepoint-vector-bad-spill.ll2
-rw-r--r--test/CodeGen/X86/statepoint-vector.ll2
-rw-r--r--test/CodeGen/X86/vector-unsigned-cmp.ll519
-rw-r--r--test/CodeGen/X86/wide-fma-contraction.ll54
-rw-r--r--test/CodeGen/X86/xor-icmp.ll78
-rw-r--r--test/DebugInfo/MIR/AArch64/clobber-sp.mir181
-rw-r--r--test/DebugInfo/MIR/AArch64/lit.local.cfg3
-rw-r--r--test/DebugInfo/PDB/Inputs/simple-line-info.yaml71
-rw-r--r--test/DebugInfo/PDB/pdbdump-write.test6
-rw-r--r--test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test13
-rw-r--r--test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll13
-rw-r--r--test/MC/WebAssembly/external-data.ll21
-rw-r--r--test/ThinLTO/X86/deadstrip.ll16
-rw-r--r--test/ThinLTO/X86/newpm-basic.ll2
-rw-r--r--test/Transforms/CodeExtractor/cost.ll64
-rw-r--r--test/Transforms/CodeExtractor/cost_meta.ll41
-rw-r--r--test/Transforms/Coroutines/coro-split-02.ll5
-rw-r--r--test/Transforms/Inline/AArch64/switch.ll4
-rw-r--r--test/Transforms/InstCombine/not.ll49
-rw-r--r--test/Transforms/InstSimplify/compare.ll16
-rw-r--r--test/Transforms/LowerExpectIntrinsic/phi_merge.ll356
-rw-r--r--test/Transforms/LowerExpectIntrinsic/phi_or.ll103
-rw-r--r--test/Transforms/LowerExpectIntrinsic/phi_tern.ll56
-rw-r--r--test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml3
-rw-r--r--test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml7
-rw-r--r--test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml3
-rw-r--r--test/Transforms/LowerTypeTests/export-dead.ll14
-rw-r--r--test/Transforms/LowerTypeTests/export-nothing.ll1
-rw-r--r--test/Transforms/LowerTypeTests/import-unsat.ll5
-rw-r--r--test/Transforms/SROA/address-spaces.ll18
-rw-r--r--test/Transforms/Util/PredicateInfo/condprop.ll1
-rw-r--r--test/Transforms/Util/PredicateInfo/condprop2.ll474
-rw-r--r--test/Transforms/Util/PredicateInfo/testandor.ll1
-rw-r--r--test/Transforms/Util/PredicateInfo/testandor2.ll214
-rw-r--r--test/Transforms/WholeProgramDevirt/Inputs/export.yaml3
-rw-r--r--test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml3
-rw-r--r--test/Transforms/WholeProgramDevirt/export-nothing.ll1
-rw-r--r--test/Transforms/WholeProgramDevirt/export-single-impl.ll1
-rw-r--r--test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll3
-rw-r--r--test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll3
-rw-r--r--test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll2
-rw-r--r--test/Transforms/WholeProgramDevirt/import-indir.ll4
-rw-r--r--test/tools/llvm-lto2/X86/pipeline.ll2
-rw-r--r--tools/llc/llc.cpp3
-rw-r--r--tools/llvm-config/llvm-config.cpp2
-rw-r--r--tools/llvm-lto2/llvm-lto2.cpp8
-rw-r--r--tools/llvm-pdbdump/LLVMOutputStyle.cpp14
-rw-r--r--tools/llvm-pdbdump/PdbYaml.cpp4
-rw-r--r--tools/llvm-pdbdump/PdbYaml.h5
-rw-r--r--tools/llvm-pdbdump/YAMLOutputStyle.cpp143
-rw-r--r--tools/llvm-pdbdump/YAMLOutputStyle.h3
-rw-r--r--tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp2
-rw-r--r--tools/llvm-pdbdump/llvm-pdbdump.cpp68
-rw-r--r--tools/llvm-readobj/COFFDumper.cpp3
-rw-r--r--unittests/ADT/SmallVectorTest.cpp10
-rw-r--r--unittests/Analysis/CMakeLists.txt5
-rw-r--r--unittests/Analysis/OrderedBasicBlockTest.cpp58
-rw-r--r--unittests/DebugInfo/PDB/MappedBlockStreamTest.cpp105
-rw-r--r--unittests/Transforms/Utils/Cloning.cpp2
-rw-r--r--utils/TableGen/X86FoldTablesEmitter.cpp5
-rw-r--r--utils/lit/lit/util.py14
277 files changed, 9435 insertions, 2819 deletions
diff --git a/include/llvm/ADT/SmallVector.h b/include/llvm/ADT/SmallVector.h
index bd24eab93b50..35c255002001 100644
--- a/include/llvm/ADT/SmallVector.h
+++ b/include/llvm/ADT/SmallVector.h
@@ -415,6 +415,9 @@ public:
append(IL.begin(), IL.end());
}
+ // FIXME: Consider assigning over existing elements, rather than clearing &
+ // re-initializing them - for all assign(...) variants.
+
void assign(size_type NumElts, const T &Elt) {
clear();
if (this->capacity() < NumElts)
@@ -423,6 +426,11 @@ public:
std::uninitialized_fill(this->begin(), this->end(), Elt);
}
+ template <typename in_iter> void assign(in_iter in_start, in_iter in_end) {
+ clear();
+ append(in_start, in_end);
+ }
+
void assign(std::initializer_list<T> IL) {
clear();
append(IL);
diff --git a/include/llvm/Analysis/OrderedBasicBlock.h b/include/llvm/Analysis/OrderedBasicBlock.h
index 5aa813eb4832..2e716af1f60d 100644
--- a/include/llvm/Analysis/OrderedBasicBlock.h
+++ b/include/llvm/Analysis/OrderedBasicBlock.h
@@ -58,6 +58,7 @@ public:
/// comes before \p B in \p BB. This is a simplification that considers
/// cached instruction positions and ignores other basic blocks, being
/// only relevant to compare relative instructions positions inside \p BB.
+ /// Returns false for A == B.
bool dominates(const Instruction *A, const Instruction *B);
};
diff --git a/include/llvm/Analysis/RegionPass.h b/include/llvm/Analysis/RegionPass.h
index b5f38139abf2..515b362e5407 100644
--- a/include/llvm/Analysis/RegionPass.h
+++ b/include/llvm/Analysis/RegionPass.h
@@ -78,6 +78,11 @@ public:
return PMT_RegionPassManager;
}
//@}
+
+protected:
+ /// Optional passes call this function to check whether the pass should be
+ /// skipped. This is the case when optimization bisect is over the limit.
+ bool skipRegion(Region &R) const;
};
/// @brief The pass manager to schedule RegionPasses.
diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
index 1d715b590ab7..8ee9712b93d8 100644
--- a/include/llvm/Analysis/ScalarEvolution.h
+++ b/include/llvm/Analysis/ScalarEvolution.h
@@ -636,7 +636,7 @@ private:
/// @}
public:
- BackedgeTakenInfo() : MaxAndComplete(nullptr, 0) {}
+ BackedgeTakenInfo() : MaxAndComplete(nullptr, 0), MaxOrZero(false) {}
BackedgeTakenInfo(BackedgeTakenInfo &&) = default;
BackedgeTakenInfo &operator=(BackedgeTakenInfo &&) = default;
diff --git a/include/llvm/CodeGen/MachineRegionInfo.h b/include/llvm/CodeGen/MachineRegionInfo.h
index 21f847c7e5ba..8394b58d0a16 100644
--- a/include/llvm/CodeGen/MachineRegionInfo.h
+++ b/include/llvm/CodeGen/MachineRegionInfo.h
@@ -10,83 +10,77 @@
#ifndef LLVM_CODEGEN_MACHINEREGIONINFO_H
#define LLVM_CODEGEN_MACHINEREGIONINFO_H
+#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/Analysis/RegionInfo.h"
#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominanceFrontier.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
-
+#include <cassert>
namespace llvm {
-class MachineDominatorTree;
struct MachinePostDominatorTree;
class MachineRegion;
class MachineRegionNode;
class MachineRegionInfo;
-template<>
-struct RegionTraits<MachineFunction> {
- typedef MachineFunction FuncT;
- typedef MachineBasicBlock BlockT;
- typedef MachineRegion RegionT;
- typedef MachineRegionNode RegionNodeT;
- typedef MachineRegionInfo RegionInfoT;
- typedef MachineDominatorTree DomTreeT;
- typedef MachineDomTreeNode DomTreeNodeT;
- typedef MachinePostDominatorTree PostDomTreeT;
- typedef MachineDominanceFrontier DomFrontierT;
- typedef MachineInstr InstT;
- typedef MachineLoop LoopT;
- typedef MachineLoopInfo LoopInfoT;
+template <> struct RegionTraits<MachineFunction> {
+ using FuncT = MachineFunction;
+ using BlockT = MachineBasicBlock;
+ using RegionT = MachineRegion;
+ using RegionNodeT = MachineRegionNode;
+ using RegionInfoT = MachineRegionInfo;
+ using DomTreeT = MachineDominatorTree;
+ using DomTreeNodeT = MachineDomTreeNode;
+ using PostDomTreeT = MachinePostDominatorTree;
+ using DomFrontierT = MachineDominanceFrontier;
+ using InstT = MachineInstr;
+ using LoopT = MachineLoop;
+ using LoopInfoT = MachineLoopInfo;
static unsigned getNumSuccessors(MachineBasicBlock *BB) {
return BB->succ_size();
}
};
-
class MachineRegionNode : public RegionNodeBase<RegionTraits<MachineFunction>> {
public:
- inline MachineRegionNode(MachineRegion *Parent,
- MachineBasicBlock *Entry,
+ inline MachineRegionNode(MachineRegion *Parent, MachineBasicBlock *Entry,
bool isSubRegion = false)
- : RegionNodeBase<RegionTraits<MachineFunction>>(Parent, Entry, isSubRegion) {
-
- }
+ : RegionNodeBase<RegionTraits<MachineFunction>>(Parent, Entry,
+ isSubRegion) {}
bool operator==(const MachineRegion &RN) const {
- return this == reinterpret_cast<const MachineRegionNode*>(&RN);
+ return this == reinterpret_cast<const MachineRegionNode *>(&RN);
}
};
class MachineRegion : public RegionBase<RegionTraits<MachineFunction>> {
public:
MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit,
- MachineRegionInfo* RI,
- MachineDominatorTree *DT, MachineRegion *Parent = nullptr);
+ MachineRegionInfo *RI, MachineDominatorTree *DT,
+ MachineRegion *Parent = nullptr);
~MachineRegion();
bool operator==(const MachineRegionNode &RN) const {
- return &RN == reinterpret_cast<const MachineRegionNode*>(this);
+ return &RN == reinterpret_cast<const MachineRegionNode *>(this);
}
};
class MachineRegionInfo : public RegionInfoBase<RegionTraits<MachineFunction>> {
public:
explicit MachineRegionInfo();
-
~MachineRegionInfo() override;
// updateStatistics - Update statistic about created regions.
void updateStatistics(MachineRegion *R) final;
- void recalculate(MachineFunction &F,
- MachineDominatorTree *DT,
- MachinePostDominatorTree *PDT,
- MachineDominanceFrontier *DF);
+ void recalculate(MachineFunction &F, MachineDominatorTree *DT,
+ MachinePostDominatorTree *PDT, MachineDominanceFrontier *DF);
};
class MachineRegionInfoPass : public MachineFunctionPass {
@@ -94,17 +88,13 @@ class MachineRegionInfoPass : public MachineFunctionPass {
public:
static char ID;
- explicit MachineRegionInfoPass();
+ explicit MachineRegionInfoPass();
~MachineRegionInfoPass() override;
- MachineRegionInfo &getRegionInfo() {
- return RI;
- }
+ MachineRegionInfo &getRegionInfo() { return RI; }
- const MachineRegionInfo &getRegionInfo() const {
- return RI;
- }
+ const MachineRegionInfo &getRegionInfo() const { return RI; }
/// @name MachineFunctionPass interface
//@{
@@ -117,66 +107,76 @@ public:
//@}
};
-
template <>
template <>
-inline MachineBasicBlock* RegionNodeBase<RegionTraits<MachineFunction>>::getNodeAs<MachineBasicBlock>() const {
+inline MachineBasicBlock *
+RegionNodeBase<RegionTraits<MachineFunction>>::getNodeAs<MachineBasicBlock>()
+ const {
assert(!isSubRegion() && "This is not a MachineBasicBlock RegionNode!");
return getEntry();
}
-template<>
-template<>
-inline MachineRegion* RegionNodeBase<RegionTraits<MachineFunction>>::getNodeAs<MachineRegion>() const {
+template <>
+template <>
+inline MachineRegion *
+RegionNodeBase<RegionTraits<MachineFunction>>::getNodeAs<MachineRegion>()
+ const {
assert(isSubRegion() && "This is not a subregion RegionNode!");
- auto Unconst = const_cast<RegionNodeBase<RegionTraits<MachineFunction>>*>(this);
- return reinterpret_cast<MachineRegion*>(Unconst);
+ auto Unconst =
+ const_cast<RegionNodeBase<RegionTraits<MachineFunction>> *>(this);
+ return reinterpret_cast<MachineRegion *>(Unconst);
}
-
RegionNodeGraphTraits(MachineRegionNode, MachineBasicBlock, MachineRegion);
-RegionNodeGraphTraits(const MachineRegionNode, MachineBasicBlock, MachineRegion);
+RegionNodeGraphTraits(const MachineRegionNode, MachineBasicBlock,
+ MachineRegion);
RegionGraphTraits(MachineRegion, MachineRegionNode);
RegionGraphTraits(const MachineRegion, const MachineRegionNode);
-template <> struct GraphTraits<MachineRegionInfo*>
- : public GraphTraits<FlatIt<MachineRegionNode*> > {
- typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
- GraphTraits<FlatIt<NodeRef>>>
- nodes_iterator;
+template <>
+struct GraphTraits<MachineRegionInfo *>
+ : public GraphTraits<FlatIt<MachineRegionNode *>> {
+ using nodes_iterator = df_iterator<NodeRef, df_iterator_default_set<NodeRef>,
+ false, GraphTraits<FlatIt<NodeRef>>>;
static NodeRef getEntryNode(MachineRegionInfo *RI) {
- return GraphTraits<FlatIt<MachineRegion*> >::getEntryNode(RI->getTopLevelRegion());
+ return GraphTraits<FlatIt<MachineRegion *>>::getEntryNode(
+ RI->getTopLevelRegion());
}
- static nodes_iterator nodes_begin(MachineRegionInfo* RI) {
+
+ static nodes_iterator nodes_begin(MachineRegionInfo *RI) {
return nodes_iterator::begin(getEntryNode(RI));
}
+
static nodes_iterator nodes_end(MachineRegionInfo *RI) {
return nodes_iterator::end(getEntryNode(RI));
}
};
-template <> struct GraphTraits<MachineRegionInfoPass*>
- : public GraphTraits<MachineRegionInfo *> {
- typedef df_iterator<NodeRef, df_iterator_default_set<NodeRef>, false,
- GraphTraits<FlatIt<NodeRef>>>
- nodes_iterator;
+template <>
+struct GraphTraits<MachineRegionInfoPass *>
+ : public GraphTraits<MachineRegionInfo *> {
+ using nodes_iterator = df_iterator<NodeRef, df_iterator_default_set<NodeRef>,
+ false, GraphTraits<FlatIt<NodeRef>>>;
static NodeRef getEntryNode(MachineRegionInfoPass *RI) {
- return GraphTraits<MachineRegionInfo*>::getEntryNode(&RI->getRegionInfo());
+ return GraphTraits<MachineRegionInfo *>::getEntryNode(&RI->getRegionInfo());
}
- static nodes_iterator nodes_begin(MachineRegionInfoPass* RI) {
- return GraphTraits<MachineRegionInfo*>::nodes_begin(&RI->getRegionInfo());
+
+ static nodes_iterator nodes_begin(MachineRegionInfoPass *RI) {
+ return GraphTraits<MachineRegionInfo *>::nodes_begin(&RI->getRegionInfo());
}
+
static nodes_iterator nodes_end(MachineRegionInfoPass *RI) {
- return GraphTraits<MachineRegionInfo*>::nodes_end(&RI->getRegionInfo());
+ return GraphTraits<MachineRegionInfo *>::nodes_end(&RI->getRegionInfo());
}
};
extern template class RegionBase<RegionTraits<MachineFunction>>;
extern template class RegionNodeBase<RegionTraits<MachineFunction>>;
extern template class RegionInfoBase<RegionTraits<MachineFunction>>;
-}
-#endif
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINEREGIONINFO_H
diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h
index 1026654da3d7..c027783aae55 100644
--- a/include/llvm/CodeGen/MachineRegisterInfo.h
+++ b/include/llvm/CodeGen/MachineRegisterInfo.h
@@ -14,11 +14,13 @@
#ifndef LLVM_CODEGEN_MACHINEREGISTERINFO_H
#define LLVM_CODEGEN_MACHINEREGISTERINFO_H
+#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/PointerUnion.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/LowLevelType.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -41,8 +43,8 @@ namespace llvm {
class PSetIterator;
/// Convenient type to represent either a register class or a register bank.
-typedef PointerUnion<const TargetRegisterClass *, const RegisterBank *>
- RegClassOrRegBank;
+using RegClassOrRegBank =
+ PointerUnion<const TargetRegisterClass *, const RegisterBank *>;
/// MachineRegisterInfo - Keep track of information for virtual and physical
/// registers, including vreg register classes, use/def chains for registers,
@@ -125,7 +127,7 @@ private:
/// started.
BitVector ReservedRegs;
- typedef DenseMap<unsigned, LLT> VRegToTypeMap;
+ using VRegToTypeMap = DenseMap<unsigned, LLT>;
/// Map generic virtual registers to their actual size.
mutable std::unique_ptr<VRegToTypeMap> VRegToType;
@@ -266,8 +268,8 @@ public:
/// reg_iterator/reg_begin/reg_end - Walk all defs and uses of the specified
/// register.
- typedef defusechain_iterator<true,true,false,true,false,false>
- reg_iterator;
+ using reg_iterator =
+ defusechain_iterator<true, true, false, true, false, false>;
reg_iterator reg_begin(unsigned RegNo) const {
return reg_iterator(getRegUseDefListHead(RegNo));
}
@@ -279,8 +281,8 @@ public:
/// reg_instr_iterator/reg_instr_begin/reg_instr_end - Walk all defs and uses
/// of the specified register, stepping by MachineInstr.
- typedef defusechain_instr_iterator<true,true,false,false,true,false>
- reg_instr_iterator;
+ using reg_instr_iterator =
+ defusechain_instr_iterator<true, true, false, false, true, false>;
reg_instr_iterator reg_instr_begin(unsigned RegNo) const {
return reg_instr_iterator(getRegUseDefListHead(RegNo));
}
@@ -295,8 +297,8 @@ public:
/// reg_bundle_iterator/reg_bundle_begin/reg_bundle_end - Walk all defs and uses
/// of the specified register, stepping by bundle.
- typedef defusechain_instr_iterator<true,true,false,false,false,true>
- reg_bundle_iterator;
+ using reg_bundle_iterator =
+ defusechain_instr_iterator<true, true, false, false, false, true>;
reg_bundle_iterator reg_bundle_begin(unsigned RegNo) const {
return reg_bundle_iterator(getRegUseDefListHead(RegNo));
}
@@ -314,8 +316,8 @@ public:
/// reg_nodbg_iterator/reg_nodbg_begin/reg_nodbg_end - Walk all defs and uses
/// of the specified register, skipping those marked as Debug.
- typedef defusechain_iterator<true,true,true,true,false,false>
- reg_nodbg_iterator;
+ using reg_nodbg_iterator =
+ defusechain_iterator<true, true, true, true, false, false>;
reg_nodbg_iterator reg_nodbg_begin(unsigned RegNo) const {
return reg_nodbg_iterator(getRegUseDefListHead(RegNo));
}
@@ -331,8 +333,8 @@ public:
/// reg_instr_nodbg_iterator/reg_instr_nodbg_begin/reg_instr_nodbg_end - Walk
/// all defs and uses of the specified register, stepping by MachineInstr,
/// skipping those marked as Debug.
- typedef defusechain_instr_iterator<true,true,true,false,true,false>
- reg_instr_nodbg_iterator;
+ using reg_instr_nodbg_iterator =
+ defusechain_instr_iterator<true, true, true, false, true, false>;
reg_instr_nodbg_iterator reg_instr_nodbg_begin(unsigned RegNo) const {
return reg_instr_nodbg_iterator(getRegUseDefListHead(RegNo));
}
@@ -348,8 +350,8 @@ public:
/// reg_bundle_nodbg_iterator/reg_bundle_nodbg_begin/reg_bundle_nodbg_end - Walk
/// all defs and uses of the specified register, stepping by bundle,
/// skipping those marked as Debug.
- typedef defusechain_instr_iterator<true,true,true,false,false,true>
- reg_bundle_nodbg_iterator;
+ using reg_bundle_nodbg_iterator =
+ defusechain_instr_iterator<true, true, true, false, false, true>;
reg_bundle_nodbg_iterator reg_bundle_nodbg_begin(unsigned RegNo) const {
return reg_bundle_nodbg_iterator(getRegUseDefListHead(RegNo));
}
@@ -369,8 +371,8 @@ public:
}
/// def_iterator/def_begin/def_end - Walk all defs of the specified register.
- typedef defusechain_iterator<false,true,false,true,false,false>
- def_iterator;
+ using def_iterator =
+ defusechain_iterator<false, true, false, true, false, false>;
def_iterator def_begin(unsigned RegNo) const {
return def_iterator(getRegUseDefListHead(RegNo));
}
@@ -382,8 +384,8 @@ public:
/// def_instr_iterator/def_instr_begin/def_instr_end - Walk all defs of the
/// specified register, stepping by MachineInst.
- typedef defusechain_instr_iterator<false,true,false,false,true,false>
- def_instr_iterator;
+ using def_instr_iterator =
+ defusechain_instr_iterator<false, true, false, false, true, false>;
def_instr_iterator def_instr_begin(unsigned RegNo) const {
return def_instr_iterator(getRegUseDefListHead(RegNo));
}
@@ -398,8 +400,8 @@ public:
/// def_bundle_iterator/def_bundle_begin/def_bundle_end - Walk all defs of the
/// specified register, stepping by bundle.
- typedef defusechain_instr_iterator<false,true,false,false,false,true>
- def_bundle_iterator;
+ using def_bundle_iterator =
+ defusechain_instr_iterator<false, true, false, false, false, true>;
def_bundle_iterator def_bundle_begin(unsigned RegNo) const {
return def_bundle_iterator(getRegUseDefListHead(RegNo));
}
@@ -425,8 +427,8 @@ public:
}
/// use_iterator/use_begin/use_end - Walk all uses of the specified register.
- typedef defusechain_iterator<true,false,false,true,false,false>
- use_iterator;
+ using use_iterator =
+ defusechain_iterator<true, false, false, true, false, false>;
use_iterator use_begin(unsigned RegNo) const {
return use_iterator(getRegUseDefListHead(RegNo));
}
@@ -438,8 +440,8 @@ public:
/// use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the
/// specified register, stepping by MachineInstr.
- typedef defusechain_instr_iterator<true,false,false,false,true,false>
- use_instr_iterator;
+ using use_instr_iterator =
+ defusechain_instr_iterator<true, false, false, false, true, false>;
use_instr_iterator use_instr_begin(unsigned RegNo) const {
return use_instr_iterator(getRegUseDefListHead(RegNo));
}
@@ -454,8 +456,8 @@ public:
/// use_bundle_iterator/use_bundle_begin/use_bundle_end - Walk all uses of the
/// specified register, stepping by bundle.
- typedef defusechain_instr_iterator<true,false,false,false,false,true>
- use_bundle_iterator;
+ using use_bundle_iterator =
+ defusechain_instr_iterator<true, false, false, false, false, true>;
use_bundle_iterator use_bundle_begin(unsigned RegNo) const {
return use_bundle_iterator(getRegUseDefListHead(RegNo));
}
@@ -482,8 +484,8 @@ public:
/// use_nodbg_iterator/use_nodbg_begin/use_nodbg_end - Walk all uses of the
/// specified register, skipping those marked as Debug.
- typedef defusechain_iterator<true,false,true,true,false,false>
- use_nodbg_iterator;
+ using use_nodbg_iterator =
+ defusechain_iterator<true, false, true, true, false, false>;
use_nodbg_iterator use_nodbg_begin(unsigned RegNo) const {
return use_nodbg_iterator(getRegUseDefListHead(RegNo));
}
@@ -499,8 +501,8 @@ public:
/// use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk
/// all uses of the specified register, stepping by MachineInstr, skipping
/// those marked as Debug.
- typedef defusechain_instr_iterator<true,false,true,false,true,false>
- use_instr_nodbg_iterator;
+ using use_instr_nodbg_iterator =
+ defusechain_instr_iterator<true, false, true, false, true, false>;
use_instr_nodbg_iterator use_instr_nodbg_begin(unsigned RegNo) const {
return use_instr_nodbg_iterator(getRegUseDefListHead(RegNo));
}
@@ -516,8 +518,8 @@ public:
/// use_bundle_nodbg_iterator/use_bundle_nodbg_begin/use_bundle_nodbg_end - Walk
/// all uses of the specified register, stepping by bundle, skipping
/// those marked as Debug.
- typedef defusechain_instr_iterator<true,false,true,false,false,true>
- use_bundle_nodbg_iterator;
+ using use_bundle_nodbg_iterator =
+ defusechain_instr_iterator<true, false, true, false, false, true>;
use_bundle_nodbg_iterator use_bundle_nodbg_begin(unsigned RegNo) const {
return use_bundle_nodbg_iterator(getRegUseDefListHead(RegNo));
}
@@ -593,7 +595,6 @@ public:
/// Return the register class of the specified virtual register.
/// This shouldn't be used directly unless \p Reg has a register class.
/// \see getRegClassOrNull when this might happen.
- ///
const TargetRegisterClass *getRegClass(unsigned Reg) const {
assert(VRegInfo[Reg].first.is<const TargetRegisterClass *>() &&
"Register class not set, wrong accessor");
@@ -620,7 +621,6 @@ public:
/// a register bank or has been assigned a register class.
/// \note It is possible to get the register bank from the register class via
/// RegisterBankInfo::getRegBankFromRegClass.
- ///
const RegisterBank *getRegBankOrNull(unsigned Reg) const {
const RegClassOrRegBank &Val = VRegInfo[Reg].first;
return Val.dyn_cast<const RegisterBank *>();
@@ -629,17 +629,14 @@ public:
/// Return the register bank or register class of \p Reg.
/// \note Before the register bank gets assigned (i.e., before the
/// RegBankSelect pass) \p Reg may not have either.
- ///
const RegClassOrRegBank &getRegClassOrRegBank(unsigned Reg) const {
return VRegInfo[Reg].first;
}
/// setRegClass - Set the register class of the specified virtual register.
- ///
void setRegClass(unsigned Reg, const TargetRegisterClass *RC);
/// Set the register bank to \p RegBank for \p Reg.
- ///
void setRegBank(unsigned Reg, const RegisterBank &RegBank);
void setRegClassOrRegBank(unsigned Reg,
@@ -653,7 +650,6 @@ public:
/// new register class, or NULL if no such class exists.
/// This should only be used when the constraint is known to be trivial, like
/// GR32 -> GR32_NOSP. Beware of increasing register pressure.
- ///
const TargetRegisterClass *constrainRegClass(unsigned Reg,
const TargetRegisterClass *RC,
unsigned MinNumRegs = 0);
@@ -665,12 +661,10 @@ public:
/// This method can be used after constraints have been removed from a
/// virtual register, for example after removing instructions or splitting
/// the live range.
- ///
bool recomputeRegClass(unsigned Reg);
/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register class.
- ///
unsigned createVirtualRegister(const TargetRegisterClass *RegClass);
/// Accessor for VRegToType. This accessor should only be used
@@ -704,7 +698,6 @@ public:
unsigned createIncompleteVirtualRegister();
/// getNumVirtRegs - Return the number of virtual registers created.
- ///
unsigned getNumVirtRegs() const { return VRegInfo.size(); }
/// clearVirtRegs - Remove all virtual registers (after physreg assignment).
@@ -810,7 +803,6 @@ public:
///
/// Reserved registers may belong to an allocatable register class, but the
/// target has explicitly requested that they are not used.
- ///
bool isReserved(unsigned PhysReg) const {
return getReservedRegs().test(PhysReg);
}
@@ -838,8 +830,8 @@ public:
// Iteration support for the live-ins set. It's kept in sorted order
// by register number.
- typedef std::vector<std::pair<unsigned,unsigned>>::const_iterator
- livein_iterator;
+ using livein_iterator =
+ std::vector<std::pair<unsigned,unsigned>>::const_iterator;
livein_iterator livein_begin() const { return LiveIns.begin(); }
livein_iterator livein_end() const { return LiveIns.end(); }
bool livein_empty() const { return LiveIns.empty(); }
@@ -910,10 +902,10 @@ public:
}
public:
- typedef std::iterator<std::forward_iterator_tag,
- MachineInstr, ptrdiff_t>::reference reference;
- typedef std::iterator<std::forward_iterator_tag,
- MachineInstr, ptrdiff_t>::pointer pointer;
+ using reference = std::iterator<std::forward_iterator_tag,
+ MachineInstr, ptrdiff_t>::reference;
+ using pointer = std::iterator<std::forward_iterator_tag,
+ MachineInstr, ptrdiff_t>::pointer;
defusechain_iterator() = default;
@@ -1016,10 +1008,10 @@ public:
}
public:
- typedef std::iterator<std::forward_iterator_tag,
- MachineInstr, ptrdiff_t>::reference reference;
- typedef std::iterator<std::forward_iterator_tag,
- MachineInstr, ptrdiff_t>::pointer pointer;
+ using reference = std::iterator<std::forward_iterator_tag,
+ MachineInstr, ptrdiff_t>::reference;
+ using pointer = std::iterator<std::forward_iterator_tag,
+ MachineInstr, ptrdiff_t>::pointer;
defusechain_instr_iterator() = default;
diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h
index 6b2a16e1d36e..3b02ec400aba 100644
--- a/include/llvm/CodeGen/MachineScheduler.h
+++ b/include/llvm/CodeGen/MachineScheduler.h
@@ -104,10 +104,15 @@ extern cl::opt<bool> ForceBottomUp;
class LiveIntervals;
class MachineDominatorTree;
+class MachineFunction;
+class MachineInstr;
class MachineLoopInfo;
class RegisterClassInfo;
class SchedDFSResult;
class ScheduleHazardRecognizer;
+class TargetInstrInfo;
+class TargetPassConfig;
+class TargetRegisterInfo;
/// MachineSchedContext provides enough context from the MachineScheduler pass
/// for the target to instantiate a scheduler.
@@ -129,10 +134,10 @@ struct MachineSchedContext {
/// schedulers.
class MachineSchedRegistry : public MachinePassRegistryNode {
public:
- typedef ScheduleDAGInstrs *(*ScheduleDAGCtor)(MachineSchedContext *);
+ using ScheduleDAGCtor = ScheduleDAGInstrs *(*)(MachineSchedContext *);
// RegisterPassParser requires a (misnamed) FunctionPassCtor type.
- typedef ScheduleDAGCtor FunctionPassCtor;
+ using FunctionPassCtor = ScheduleDAGCtor;
static MachinePassRegistry Registry;
@@ -527,7 +532,7 @@ public:
unsigned size() const { return Queue.size(); }
- typedef std::vector<SUnit*>::iterator iterator;
+ using iterator = std::vector<SUnit*>::iterator;
iterator begin() { return Queue.begin(); }
diff --git a/include/llvm/CodeGen/PBQP/CostAllocator.h b/include/llvm/CodeGen/PBQP/CostAllocator.h
index 02d39fe383f1..bde451ae1fcc 100644
--- a/include/llvm/CodeGen/PBQP/CostAllocator.h
+++ b/include/llvm/CodeGen/PBQP/CostAllocator.h
@@ -1,4 +1,4 @@
-//===---------- CostAllocator.h - PBQP Cost Allocator -----------*- C++ -*-===//
+//===- CostAllocator.h - PBQP Cost Allocator --------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -19,26 +19,28 @@
#define LLVM_CODEGEN_PBQP_COSTALLOCATOR_H
#include "llvm/ADT/DenseSet.h"
+#include <algorithm>
+#include <cstdint>
#include <memory>
-#include <type_traits>
namespace llvm {
namespace PBQP {
-template <typename ValueT>
-class ValuePool {
+template <typename ValueT> class ValuePool {
public:
- typedef std::shared_ptr<const ValueT> PoolRef;
+ using PoolRef = std::shared_ptr<const ValueT>;
private:
-
class PoolEntry : public std::enable_shared_from_this<PoolEntry> {
public:
template <typename ValueKeyT>
PoolEntry(ValuePool &Pool, ValueKeyT Value)
: Pool(Pool), Value(std::move(Value)) {}
+
~PoolEntry() { Pool.removeEntry(this); }
- const ValueT& getValue() const { return Value; }
+
+ const ValueT &getValue() const { return Value; }
+
private:
ValuePool &Pool;
ValueT Value;
@@ -46,10 +48,10 @@ private:
class PoolEntryDSInfo {
public:
- static inline PoolEntry* getEmptyKey() { return nullptr; }
+ static inline PoolEntry *getEmptyKey() { return nullptr; }
- static inline PoolEntry* getTombstoneKey() {
- return reinterpret_cast<PoolEntry*>(static_cast<uintptr_t>(1));
+ static inline PoolEntry *getTombstoneKey() {
+ return reinterpret_cast<PoolEntry *>(static_cast<uintptr_t>(1));
}
template <typename ValueKeyT>
@@ -66,8 +68,7 @@ private:
}
template <typename ValueKeyT1, typename ValueKeyT2>
- static
- bool isEqual(const ValueKeyT1 &C1, const ValueKeyT2 &C2) {
+ static bool isEqual(const ValueKeyT1 &C1, const ValueKeyT2 &C2) {
return C1 == C2;
}
@@ -83,10 +84,9 @@ private:
return P1 == P2;
return isEqual(P1->getValue(), P2);
}
-
};
- typedef DenseSet<PoolEntry*, PoolEntryDSInfo> EntrySetT;
+ using EntrySetT = DenseSet<PoolEntry *, PoolEntryDSInfo>;
EntrySetT EntrySet;
@@ -105,28 +105,31 @@ public:
}
};
-template <typename VectorT, typename MatrixT>
-class PoolCostAllocator {
+template <typename VectorT, typename MatrixT> class PoolCostAllocator {
private:
- typedef ValuePool<VectorT> VectorCostPool;
- typedef ValuePool<MatrixT> MatrixCostPool;
+ using VectorCostPool = ValuePool<VectorT>;
+ using MatrixCostPool = ValuePool<MatrixT>;
+
public:
- typedef VectorT Vector;
- typedef MatrixT Matrix;
- typedef typename VectorCostPool::PoolRef VectorPtr;
- typedef typename MatrixCostPool::PoolRef MatrixPtr;
+ using Vector = VectorT;
+ using Matrix = MatrixT;
+ using VectorPtr = typename VectorCostPool::PoolRef;
+ using MatrixPtr = typename MatrixCostPool::PoolRef;
+
+ template <typename VectorKeyT> VectorPtr getVector(VectorKeyT v) {
+ return VectorPool.getValue(std::move(v));
+ }
- template <typename VectorKeyT>
- VectorPtr getVector(VectorKeyT v) { return VectorPool.getValue(std::move(v)); }
+ template <typename MatrixKeyT> MatrixPtr getMatrix(MatrixKeyT m) {
+ return MatrixPool.getValue(std::move(m));
+ }
- template <typename MatrixKeyT>
- MatrixPtr getMatrix(MatrixKeyT m) { return MatrixPool.getValue(std::move(m)); }
private:
VectorCostPool VectorPool;
MatrixCostPool MatrixPool;
};
-} // namespace PBQP
-} // namespace llvm
+} // end namespace PBQP
+} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_PBQP_COSTALLOCATOR_H
diff --git a/include/llvm/CodeGen/PBQP/Graph.h b/include/llvm/CodeGen/PBQP/Graph.h
index 83487e6a808a..e94878ced10d 100644
--- a/include/llvm/CodeGen/PBQP/Graph.h
+++ b/include/llvm/CodeGen/PBQP/Graph.h
@@ -1,4 +1,4 @@
-//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===//
+//===- Graph.h - PBQP Graph -------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,16 +11,14 @@
//
//===----------------------------------------------------------------------===//
-
#ifndef LLVM_CODEGEN_PBQP_GRAPH_H
#define LLVM_CODEGEN_PBQP_GRAPH_H
#include "llvm/ADT/STLExtras.h"
-#include "llvm/Support/Debug.h"
#include <algorithm>
#include <cassert>
+#include <iterator>
#include <limits>
-#include <utility>
#include <vector>
namespace llvm {
@@ -28,8 +26,8 @@ namespace PBQP {
class GraphBase {
public:
- typedef unsigned NodeId;
- typedef unsigned EdgeId;
+ using NodeId = unsigned;
+ using EdgeId = unsigned;
/// @brief Returns a value representing an invalid (non-existent) node.
static NodeId invalidNodeId() {
@@ -48,32 +46,32 @@ namespace PBQP {
template <typename SolverT>
class Graph : public GraphBase {
private:
- typedef typename SolverT::CostAllocator CostAllocator;
+ using CostAllocator = typename SolverT::CostAllocator;
+
public:
- typedef typename SolverT::RawVector RawVector;
- typedef typename SolverT::RawMatrix RawMatrix;
- typedef typename SolverT::Vector Vector;
- typedef typename SolverT::Matrix Matrix;
- typedef typename CostAllocator::VectorPtr VectorPtr;
- typedef typename CostAllocator::MatrixPtr MatrixPtr;
- typedef typename SolverT::NodeMetadata NodeMetadata;
- typedef typename SolverT::EdgeMetadata EdgeMetadata;
- typedef typename SolverT::GraphMetadata GraphMetadata;
+ using RawVector = typename SolverT::RawVector;
+ using RawMatrix = typename SolverT::RawMatrix;
+ using Vector = typename SolverT::Vector;
+ using Matrix = typename SolverT::Matrix;
+ using VectorPtr = typename CostAllocator::VectorPtr;
+ using MatrixPtr = typename CostAllocator::MatrixPtr;
+ using NodeMetadata = typename SolverT::NodeMetadata;
+ using EdgeMetadata = typename SolverT::EdgeMetadata;
+ using GraphMetadata = typename SolverT::GraphMetadata;
private:
-
class NodeEntry {
public:
- typedef std::vector<EdgeId> AdjEdgeList;
- typedef AdjEdgeList::size_type AdjEdgeIdx;
- typedef AdjEdgeList::const_iterator AdjEdgeItr;
+ using AdjEdgeList = std::vector<EdgeId>;
+ using AdjEdgeIdx = AdjEdgeList::size_type;
+ using AdjEdgeItr = AdjEdgeList::const_iterator;
+
+ NodeEntry(VectorPtr Costs) : Costs(std::move(Costs)) {}
static AdjEdgeIdx getInvalidAdjEdgeIdx() {
return std::numeric_limits<AdjEdgeIdx>::max();
}
- NodeEntry(VectorPtr Costs) : Costs(std::move(Costs)) {}
-
AdjEdgeIdx addAdjEdgeId(EdgeId EId) {
AdjEdgeIdx Idx = AdjEdgeIds.size();
AdjEdgeIds.push_back(EId);
@@ -96,6 +94,7 @@ namespace PBQP {
VectorPtr Costs;
NodeMetadata Metadata;
+
private:
AdjEdgeList AdjEdgeIds;
};
@@ -150,8 +149,10 @@ namespace PBQP {
NodeId getN1Id() const { return NIds[0]; }
NodeId getN2Id() const { return NIds[1]; }
+
MatrixPtr Costs;
EdgeMetadata Metadata;
+
private:
NodeId NIds[2];
typename NodeEntry::AdjEdgeIdx ThisEdgeAdjIdxs[2];
@@ -161,18 +162,20 @@ namespace PBQP {
GraphMetadata Metadata;
CostAllocator CostAlloc;
- SolverT *Solver;
+ SolverT *Solver = nullptr;
- typedef std::vector<NodeEntry> NodeVector;
- typedef std::vector<NodeId> FreeNodeVector;
+ using NodeVector = std::vector<NodeEntry>;
+ using FreeNodeVector = std::vector<NodeId>;
NodeVector Nodes;
FreeNodeVector FreeNodeIds;
- typedef std::vector<EdgeEntry> EdgeVector;
- typedef std::vector<EdgeId> FreeEdgeVector;
+ using EdgeVector = std::vector<EdgeEntry>;
+ using FreeEdgeVector = std::vector<EdgeId>;
EdgeVector Edges;
FreeEdgeVector FreeEdgeIds;
+ Graph(const Graph &Other) {}
+
// ----- INTERNAL METHODS -----
NodeEntry &getNode(NodeId NId) {
@@ -220,20 +223,18 @@ namespace PBQP {
return EId;
}
- Graph(const Graph &Other) {}
void operator=(const Graph &Other) {}
public:
-
- typedef typename NodeEntry::AdjEdgeItr AdjEdgeItr;
+ using AdjEdgeItr = typename NodeEntry::AdjEdgeItr;
class NodeItr {
public:
- typedef std::forward_iterator_tag iterator_category;
- typedef NodeId value_type;
- typedef int difference_type;
- typedef NodeId* pointer;
- typedef NodeId& reference;
+ using iterator_category = std::forward_iterator_tag;
+ using value_type = NodeId;
+ using difference_type = int;
+ using pointer = NodeId *;
+ using reference = NodeId &;
NodeItr(NodeId CurNId, const Graph &G)
: CurNId(CurNId), EndNId(G.Nodes.size()), FreeNodeIds(G.FreeNodeIds) {
@@ -283,53 +284,65 @@ namespace PBQP {
class NodeIdSet {
public:
- NodeIdSet(const Graph &G) : G(G) { }
+ NodeIdSet(const Graph &G) : G(G) {}
+
NodeItr begin() const { return NodeItr(0, G); }
NodeItr end() const { return NodeItr(G.Nodes.size(), G); }
+
bool empty() const { return G.Nodes.empty(); }
+
typename NodeVector::size_type size() const {
return G.Nodes.size() - G.FreeNodeIds.size();
}
+
private:
const Graph& G;
};
class EdgeIdSet {
public:
- EdgeIdSet(const Graph &G) : G(G) { }
+ EdgeIdSet(const Graph &G) : G(G) {}
+
EdgeItr begin() const { return EdgeItr(0, G); }
EdgeItr end() const { return EdgeItr(G.Edges.size(), G); }
+
bool empty() const { return G.Edges.empty(); }
+
typename NodeVector::size_type size() const {
return G.Edges.size() - G.FreeEdgeIds.size();
}
+
private:
const Graph& G;
};
class AdjEdgeIdSet {
public:
- AdjEdgeIdSet(const NodeEntry &NE) : NE(NE) { }
+ AdjEdgeIdSet(const NodeEntry &NE) : NE(NE) {}
+
typename NodeEntry::AdjEdgeItr begin() const {
return NE.getAdjEdgeIds().begin();
}
+
typename NodeEntry::AdjEdgeItr end() const {
return NE.getAdjEdgeIds().end();
}
+
bool empty() const { return NE.getAdjEdgeIds().empty(); }
+
typename NodeEntry::AdjEdgeList::size_type size() const {
return NE.getAdjEdgeIds().size();
}
+
private:
const NodeEntry &NE;
};
/// @brief Construct an empty PBQP graph.
- Graph() : Solver(nullptr) {}
+ Graph() = default;
/// @brief Construct an empty PBQP graph with the given graph metadata.
- Graph(GraphMetadata Metadata)
- : Metadata(std::move(Metadata)), Solver(nullptr) {}
+ Graph(GraphMetadata Metadata) : Metadata(std::move(Metadata)) {}
/// @brief Get a reference to the graph metadata.
GraphMetadata& getMetadata() { return Metadata; }
@@ -656,7 +669,7 @@ namespace PBQP {
}
};
-} // namespace PBQP
-} // namespace llvm
+} // end namespace PBQP
+} // end namespace llvm
#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/include/llvm/CodeGen/PBQP/Math.h b/include/llvm/CodeGen/PBQP/Math.h
index 278787550a43..ba405e816d10 100644
--- a/include/llvm/CodeGen/PBQP/Math.h
+++ b/include/llvm/CodeGen/PBQP/Math.h
@@ -1,4 +1,4 @@
-//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
+//===- Math.h - PBQP Vector and Matrix classes ------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,20 +11,22 @@
#define LLVM_CODEGEN_PBQP_MATH_H
#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
#include <algorithm>
#include <cassert>
#include <functional>
+#include <memory>
namespace llvm {
namespace PBQP {
-typedef float PBQPNum;
+using PBQPNum = float;
/// \brief PBQP Vector class.
class Vector {
friend hash_code hash_value(const Vector &);
-public:
+public:
/// \brief Construct a PBQP vector of the given size.
explicit Vector(unsigned Length)
: Length(Length), Data(llvm::make_unique<PBQPNum []>(Length)) {}
@@ -120,8 +122,8 @@ OStream& operator<<(OStream &OS, const Vector &V) {
class Matrix {
private:
friend hash_code hash_value(const Matrix &);
-public:
+public:
/// \brief Construct a PBQP Matrix with the given dimensions.
Matrix(unsigned Rows, unsigned Cols) :
Rows(Rows), Cols(Cols), Data(llvm::make_unique<PBQPNum []>(Rows * Cols)) {
@@ -253,9 +255,11 @@ OStream& operator<<(OStream &OS, const Matrix &M) {
template <typename Metadata>
class MDVector : public Vector {
public:
- MDVector(const Vector &v) : Vector(v), md(*this) { }
+ MDVector(const Vector &v) : Vector(v), md(*this) {}
MDVector(Vector &&v) : Vector(std::move(v)), md(*this) { }
+
const Metadata& getMetadata() const { return md; }
+
private:
Metadata md;
};
@@ -268,9 +272,11 @@ inline hash_code hash_value(const MDVector<Metadata> &V) {
template <typename Metadata>
class MDMatrix : public Matrix {
public:
- MDMatrix(const Matrix &m) : Matrix(m), md(*this) { }
+ MDMatrix(const Matrix &m) : Matrix(m), md(*this) {}
MDMatrix(Matrix &&m) : Matrix(std::move(m)), md(*this) { }
+
const Metadata& getMetadata() const { return md; }
+
private:
Metadata md;
};
@@ -280,7 +286,7 @@ inline hash_code hash_value(const MDMatrix<Metadata> &M) {
return hash_value(static_cast<const Matrix&>(M));
}
-} // namespace PBQP
-} // namespace llvm
+} // end namespace PBQP
+} // end namespace llvm
#endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/include/llvm/CodeGen/PBQP/ReductionRules.h b/include/llvm/CodeGen/PBQP/ReductionRules.h
index d4a544bfe721..8aeb51936760 100644
--- a/include/llvm/CodeGen/PBQP/ReductionRules.h
+++ b/include/llvm/CodeGen/PBQP/ReductionRules.h
@@ -1,4 +1,4 @@
-//===----------- ReductionRules.h - Reduction Rules -------------*- C++ -*-===//
+//===- ReductionRules.h - Reduction Rules -----------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,6 +17,8 @@
#include "Graph.h"
#include "Math.h"
#include "Solution.h"
+#include <cassert>
+#include <limits>
namespace llvm {
namespace PBQP {
@@ -27,11 +29,11 @@ namespace PBQP {
/// neighbor. Notify the problem domain.
template <typename GraphT>
void applyR1(GraphT &G, typename GraphT::NodeId NId) {
- typedef typename GraphT::NodeId NodeId;
- typedef typename GraphT::EdgeId EdgeId;
- typedef typename GraphT::Vector Vector;
- typedef typename GraphT::Matrix Matrix;
- typedef typename GraphT::RawVector RawVector;
+ using NodeId = typename GraphT::NodeId;
+ using EdgeId = typename GraphT::EdgeId;
+ using Vector = typename GraphT::Vector;
+ using Matrix = typename GraphT::Matrix;
+ using RawVector = typename GraphT::RawVector;
assert(G.getNodeDegree(NId) == 1 &&
"R1 applied to node with degree != 1.");
@@ -71,11 +73,11 @@ namespace PBQP {
template <typename GraphT>
void applyR2(GraphT &G, typename GraphT::NodeId NId) {
- typedef typename GraphT::NodeId NodeId;
- typedef typename GraphT::EdgeId EdgeId;
- typedef typename GraphT::Vector Vector;
- typedef typename GraphT::Matrix Matrix;
- typedef typename GraphT::RawMatrix RawMatrix;
+ using NodeId = typename GraphT::NodeId;
+ using EdgeId = typename GraphT::EdgeId;
+ using Vector = typename GraphT::Vector;
+ using Matrix = typename GraphT::Matrix;
+ using RawMatrix = typename GraphT::RawMatrix;
assert(G.getNodeDegree(NId) == 2 &&
"R2 applied to node with degree != 2.");
@@ -177,9 +179,9 @@ namespace PBQP {
// state.
template <typename GraphT, typename StackT>
Solution backpropagate(GraphT& G, StackT stack) {
- typedef GraphBase::NodeId NodeId;
- typedef typename GraphT::Matrix Matrix;
- typedef typename GraphT::RawVector RawVector;
+ using NodeId = GraphBase::NodeId;
+ using Matrix = typename GraphT::Matrix;
+ using RawVector = typename GraphT::RawVector;
Solution s;
@@ -215,7 +217,7 @@ namespace PBQP {
return s;
}
-} // namespace PBQP
-} // namespace llvm
+} // end namespace PBQP
+} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_PBQP_REDUCTIONRULES_H
diff --git a/include/llvm/CodeGen/PBQP/Solution.h b/include/llvm/CodeGen/PBQP/Solution.h
index d96b5eac4520..8d5d2374679d 100644
--- a/include/llvm/CodeGen/PBQP/Solution.h
+++ b/include/llvm/CodeGen/PBQP/Solution.h
@@ -26,7 +26,7 @@ namespace PBQP {
/// To get the selection for each node in the problem use the getSelection method.
class Solution {
private:
- typedef std::map<GraphBase::NodeId, unsigned> SelectionsMap;
+ using SelectionsMap = std::map<GraphBase::NodeId, unsigned>;
SelectionsMap selections;
unsigned r0Reductions = 0;
diff --git a/include/llvm/CodeGen/PBQPRAConstraint.h b/include/llvm/CodeGen/PBQPRAConstraint.h
index 833b9bad613f..269b7a7b3a35 100644
--- a/include/llvm/CodeGen/PBQPRAConstraint.h
+++ b/include/llvm/CodeGen/PBQPRAConstraint.h
@@ -1,4 +1,4 @@
-//===-- RegAllocPBQP.h ------------------------------------------*- C++ -*-===//
+//===- RegAllocPBQP.h -------------------------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -16,23 +16,22 @@
#ifndef LLVM_CODEGEN_PBQPRACONSTRAINT_H
#define LLVM_CODEGEN_PBQPRACONSTRAINT_H
+#include <algorithm>
#include <memory>
#include <vector>
namespace llvm {
+
namespace PBQP {
namespace RegAlloc {
+
// Forward declare PBQP graph class.
class PBQPRAGraph;
-}
-}
-class LiveIntervals;
-class MachineBlockFrequencyInfo;
-class MachineFunction;
-class TargetRegisterInfo;
+} // end namespace RegAlloc
+} // end namespace PBQP
-typedef PBQP::RegAlloc::PBQPRAGraph PBQPRAGraph;
+using PBQPRAGraph = PBQP::RegAlloc::PBQPRAGraph;
/// @brief Abstract base for classes implementing PBQP register allocation
/// constraints (e.g. Spill-costs, interference, coalescing).
@@ -40,6 +39,7 @@ class PBQPRAConstraint {
public:
virtual ~PBQPRAConstraint() = 0;
virtual void apply(PBQPRAGraph &G) = 0;
+
private:
virtual void anchor();
};
@@ -59,11 +59,13 @@ public:
if (C)
Constraints.push_back(std::move(C));
}
+
private:
std::vector<std::unique_ptr<PBQPRAConstraint>> Constraints;
+
void anchor() override;
};
-}
+} // end namespace llvm
-#endif /* LLVM_CODEGEN_PBQPRACONSTRAINT_H */
+#endif // LLVM_CODEGEN_PBQPRACONSTRAINT_H
diff --git a/include/llvm/CodeGen/Passes.h b/include/llvm/CodeGen/Passes.h
index 3bcfc1c4254b..96cfce5b84df 100644
--- a/include/llvm/CodeGen/Passes.h
+++ b/include/llvm/CodeGen/Passes.h
@@ -140,6 +140,9 @@ namespace llvm {
/// Greedy register allocator.
extern char &RAGreedyID;
+ /// Basic register allocator.
+ extern char &RABasicID;
+
/// VirtRegRewriter pass. Rewrite virtual registers to physical registers as
/// assigned in VirtRegMap.
extern char &VirtRegRewriterID;
diff --git a/include/llvm/CodeGen/RegAllocPBQP.h b/include/llvm/CodeGen/RegAllocPBQP.h
index 8872a5dc54a1..5b342863eb50 100644
--- a/include/llvm/CodeGen/RegAllocPBQP.h
+++ b/include/llvm/CodeGen/RegAllocPBQP.h
@@ -130,10 +130,10 @@ inline hash_code hash_value(const AllowedRegVector &OptRegs) {
/// \brief Holds graph-level metadata relevant to PBQP RA problems.
class GraphMetadata {
private:
- typedef ValuePool<AllowedRegVector> AllowedRegVecPool;
+ using AllowedRegVecPool = ValuePool<AllowedRegVector>;
public:
- typedef AllowedRegVecPool::PoolRef AllowedRegVecRef;
+ using AllowedRegVecRef = AllowedRegVecPool::PoolRef;
GraphMetadata(MachineFunction &MF,
LiveIntervals &LIS,
@@ -167,17 +167,17 @@ private:
/// \brief Holds solver state and other metadata relevant to each PBQP RA node.
class NodeMetadata {
public:
- typedef RegAlloc::AllowedRegVector AllowedRegVector;
+ using AllowedRegVector = RegAlloc::AllowedRegVector;
// The node's reduction state. The order in this enum is important,
// as it is assumed nodes can only progress up (i.e. towards being
// optimally reducible) when reducing the graph.
- typedef enum {
+ using ReductionState = enum {
Unprocessed,
NotProvablyAllocatable,
ConservativelyAllocatable,
OptimallyReducible
- } ReductionState;
+ };
NodeMetadata() = default;
@@ -267,23 +267,23 @@ private:
class RegAllocSolverImpl {
private:
- typedef MDMatrix<MatrixMetadata> RAMatrix;
+ using RAMatrix = MDMatrix<MatrixMetadata>;
public:
- typedef PBQP::Vector RawVector;
- typedef PBQP::Matrix RawMatrix;
- typedef PBQP::Vector Vector;
- typedef RAMatrix Matrix;
- typedef PBQP::PoolCostAllocator<Vector, Matrix> CostAllocator;
+ using RawVector = PBQP::Vector;
+ using RawMatrix = PBQP::Matrix;
+ using Vector = PBQP::Vector;
+ using Matrix = RAMatrix;
+ using CostAllocator = PBQP::PoolCostAllocator<Vector, Matrix>;
- typedef GraphBase::NodeId NodeId;
- typedef GraphBase::EdgeId EdgeId;
+ using NodeId = GraphBase::NodeId;
+ using EdgeId = GraphBase::EdgeId;
- typedef RegAlloc::NodeMetadata NodeMetadata;
- struct EdgeMetadata { };
- typedef RegAlloc::GraphMetadata GraphMetadata;
+ using NodeMetadata = RegAlloc::NodeMetadata;
+ struct EdgeMetadata {};
+ using GraphMetadata = RegAlloc::GraphMetadata;
- typedef PBQP::Graph<RegAllocSolverImpl> Graph;
+ using Graph = PBQP::Graph<RegAllocSolverImpl>;
RegAllocSolverImpl(Graph &G) : G(G) {}
@@ -426,7 +426,7 @@ private:
std::vector<GraphBase::NodeId> reduce() {
assert(!G.empty() && "Cannot reduce empty graph.");
- typedef GraphBase::NodeId NodeId;
+ using NodeId = GraphBase::NodeId;
std::vector<NodeId> NodeStack;
// Consume worklists.
@@ -459,7 +459,6 @@ private:
ConservativelyAllocatableNodes.erase(NItr);
NodeStack.push_back(NId);
G.disconnectAllNeighborsFromNode(NId);
-
} else if (!NotProvablyAllocatableNodes.empty()) {
NodeSet::iterator NItr =
std::min_element(NotProvablyAllocatableNodes.begin(),
@@ -493,7 +492,7 @@ private:
};
Graph& G;
- typedef std::set<NodeId> NodeSet;
+ using NodeSet = std::set<NodeId>;
NodeSet OptimallyReducibleNodes;
NodeSet ConservativelyAllocatableNodes;
NodeSet NotProvablyAllocatableNodes;
@@ -501,7 +500,7 @@ private:
class PBQPRAGraph : public PBQP::Graph<RegAllocSolverImpl> {
private:
- typedef PBQP::Graph<RegAllocSolverImpl> BaseT;
+ using BaseT = PBQP::Graph<RegAllocSolverImpl>;
public:
PBQPRAGraph(GraphMetadata Metadata) : BaseT(std::move(Metadata)) {}
diff --git a/include/llvm/CodeGen/RegisterScavenging.h b/include/llvm/CodeGen/RegisterScavenging.h
index 1f939e72e139..ad1efe18c72d 100644
--- a/include/llvm/CodeGen/RegisterScavenging.h
+++ b/include/llvm/CodeGen/RegisterScavenging.h
@@ -204,6 +204,10 @@ private:
void setLiveInsUsed(const MachineBasicBlock &MBB);
};
+/// Replaces all frame index virtual registers with physical registers. Uses the
+/// register scavenger to find an appropriate register to use.
+void scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS);
+
} // end namespace llvm
#endif // LLVM_CODEGEN_REGISTERSCAVENGING_H
diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h
index f5f5bfd45e79..d62bb9bf0b75 100644
--- a/include/llvm/CodeGen/ScheduleDAGInstrs.h
+++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h
@@ -1,4 +1,4 @@
-//==- ScheduleDAGInstrs.h - MachineInstr Scheduling --------------*- C++ -*-==//
+//===- ScheduleDAGInstrs.h - MachineInstr Scheduling ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,22 +15,38 @@
#ifndef LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
#define LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
-#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseMultiSet.h"
#include "llvm/ADT/SparseSet.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/Support/Compiler.h"
+#include "llvm/MC/LaneBitmask.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include <cassert>
+#include <cstdint>
#include <list>
+#include <utility>
+#include <vector>
namespace llvm {
+
+ class LiveIntervals;
class MachineFrameInfo;
+ class MachineFunction;
+ class MachineInstr;
class MachineLoopInfo;
- class MachineDominatorTree;
- class RegPressureTracker;
+ class MachineOperand;
+ struct MCSchedClassDesc;
class PressureDiffs;
+ class PseudoSourceValue;
+ class RegPressureTracker;
+ class UndefValue;
+ class Value;
/// An individual mapping from virtual register number to SUnit.
struct VReg2SUnit {
@@ -70,31 +86,34 @@ namespace llvm {
/// Use a SparseMultiSet to track physical registers. Storage is only
/// allocated once for the pass. It can be cleared in constant time and reused
/// without any frees.
- typedef SparseMultiSet<PhysRegSUOper, llvm::identity<unsigned>, uint16_t>
- Reg2SUnitsMap;
+ using Reg2SUnitsMap =
+ SparseMultiSet<PhysRegSUOper, identity<unsigned>, uint16_t>;
/// Use SparseSet as a SparseMap by relying on the fact that it never
/// compares ValueT's, only unsigned keys. This allows the set to be cleared
/// between scheduling regions in constant time as long as ValueT does not
/// require a destructor.
- typedef SparseSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMap;
+ using VReg2SUnitMap = SparseSet<VReg2SUnit, VirtReg2IndexFunctor>;
/// Track local uses of virtual registers. These uses are gathered by the DAG
/// builder and may be consulted by the scheduler to avoid iterating an entire
/// vreg use list.
- typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMultiMap;
+ using VReg2SUnitMultiMap = SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor>;
+
+ using VReg2SUnitOperIdxMultiMap =
+ SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>;
- typedef SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>
- VReg2SUnitOperIdxMultiMap;
+ using ValueType = PointerUnion<const Value *, const PseudoSourceValue *>;
- typedef PointerUnion<const Value *, const PseudoSourceValue *> ValueType;
struct UnderlyingObject : PointerIntPair<ValueType, 1, bool> {
UnderlyingObject(ValueType V, bool MayAlias)
: PointerIntPair<ValueType, 1, bool>(V, MayAlias) {}
+
ValueType getValue() const { return getPointer(); }
bool mayAlias() const { return getInt(); }
};
- typedef SmallVector<UnderlyingObject, 4> UnderlyingObjectsVector;
+
+ using UnderlyingObjectsVector = SmallVector<UnderlyingObject, 4>;
/// A ScheduleDAG for scheduling lists of MachineInstr.
class ScheduleDAGInstrs : public ScheduleDAG {
@@ -114,10 +133,10 @@ namespace llvm {
/// reordering. A specialized scheduler can override
/// TargetInstrInfo::isSchedulingBoundary then enable this flag to indicate
/// it has taken responsibility for scheduling the terminator correctly.
- bool CanHandleTerminators;
+ bool CanHandleTerminators = false;
/// Whether lane masks should get tracked.
- bool TrackLaneMasks;
+ bool TrackLaneMasks = false;
// State specific to the current scheduling region.
// ------------------------------------------------
@@ -155,12 +174,12 @@ namespace llvm {
/// Tracks the last instructions in this region using each virtual register.
VReg2SUnitOperIdxMultiMap CurrentVRegUses;
- AliasAnalysis *AAForDep;
+ AliasAnalysis *AAForDep = nullptr;
/// Remember a generic side-effecting instruction as we proceed.
/// No other SU ever gets scheduled around it (except in the special
/// case of a huge region that gets reduced).
- SUnit *BarrierChain;
+ SUnit *BarrierChain = nullptr;
public:
/// A list of SUnits, used in Value2SUsMap, during DAG construction.
@@ -168,7 +187,7 @@ namespace llvm {
/// implementation of this data structure, such as a singly linked list
/// with a memory pool (SmallVector was tried but slow and SparseSet is not
/// applicable).
- typedef std::list<SUnit *> SUList;
+ using SUList = std::list<SUnit *>;
protected:
/// \brief A map from ValueType to SUList, used during DAG construction, as
@@ -216,13 +235,13 @@ namespace llvm {
/// For an unanalyzable memory access, this Value is used in maps.
UndefValue *UnknownValue;
- typedef std::vector<std::pair<MachineInstr *, MachineInstr *>>
- DbgValueVector;
+ using DbgValueVector =
+ std::vector<std::pair<MachineInstr *, MachineInstr *>>;
/// Remember instruction that precedes DBG_VALUE.
/// These are generated by buildSchedGraph but persist so they can be
/// referenced when emitting the final schedule.
DbgValueVector DbgValues;
- MachineInstr *FirstDbgValue;
+ MachineInstr *FirstDbgValue = nullptr;
/// Set of live physical registers for updating kill flags.
LivePhysRegs LiveRegs;
@@ -232,7 +251,7 @@ namespace llvm {
const MachineLoopInfo *mli,
bool RemoveKillFlags = false);
- ~ScheduleDAGInstrs() override {}
+ ~ScheduleDAGInstrs() override = default;
/// Gets the machine model for instruction scheduling.
const TargetSchedModel *getSchedModel() const { return &SchedModel; }
@@ -354,6 +373,7 @@ namespace llvm {
return nullptr;
return I->second;
}
+
} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_SCHEDULEDAGINSTRS_H
diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h
index 493122b15704..4b1a375abd57 100644
--- a/include/llvm/CodeGen/SelectionDAG.h
+++ b/include/llvm/CodeGen/SelectionDAG.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ---------*- C++ -*-===//
+//===- llvm/CodeGen/SelectionDAG.h - InstSelection DAG ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -15,35 +15,72 @@
#ifndef LLVM_CODEGEN_SELECTIONDAG_H
#define LLVM_CODEGEN_SELECTIONDAG_H
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/ilist.h"
+#include "llvm/ADT/iterator.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
-#include "llvm/ADT/ilist.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/DAGCombine.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/Support/Allocator.h"
#include "llvm/Support/ArrayRecycler.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/RecyclingAllocator.h"
-#include "llvm/Target/TargetMachine.h"
+#include <algorithm>
#include <cassert>
+#include <cstdint>
+#include <functional>
#include <map>
#include <string>
+#include <tuple>
+#include <utility>
#include <vector>
namespace llvm {
+class BlockAddress;
+class Constant;
+class ConstantFP;
+class ConstantInt;
+class DataLayout;
+struct fltSemantics;
+class GlobalValue;
struct KnownBits;
+class LLVMContext;
+class MachineBasicBlock;
class MachineConstantPoolValue;
-class MachineFunction;
-class MDNode;
+class MCSymbol;
class OptimizationRemarkEmitter;
class SDDbgValue;
-class TargetLowering;
+class SelectionDAG;
class SelectionDAGTargetInfo;
+class TargetLowering;
+class TargetMachine;
+class TargetSubtargetInfo;
+class Value;
class SDVTListNode : public FoldingSetNode {
friend struct FoldingSetTrait<SDVTListNode>;
+
/// A reference to an Interned FoldingSetNodeID for this node.
/// The Allocator in SelectionDAG holds the data.
/// SDVTList contains all types which are frequently accessed in SelectionDAG.
@@ -55,11 +92,13 @@ class SDVTListNode : public FoldingSetNode {
/// The hash value for SDVTList is fixed, so cache it to avoid
/// hash calculation.
unsigned HashValue;
+
public:
SDVTListNode(const FoldingSetNodeIDRef ID, const EVT *VT, unsigned int Num) :
FastID(ID), VTs(VT), NumVTs(Num) {
HashValue = ID.ComputeHash();
}
+
SDVTList getSDVTList() {
SDVTList result = {VTs, NumVTs};
return result;
@@ -72,12 +111,14 @@ template<> struct FoldingSetTrait<SDVTListNode> : DefaultFoldingSetTrait<SDVTLis
static void Profile(const SDVTListNode &X, FoldingSetNodeID& ID) {
ID = X.FastID;
}
+
static bool Equals(const SDVTListNode &X, const FoldingSetNodeID &ID,
unsigned IDHash, FoldingSetNodeID &TempID) {
if (X.HashValue != IDHash)
return false;
return ID == X.FastID;
}
+
static unsigned ComputeHash(const SDVTListNode &X, FoldingSetNodeID &TempID) {
return X.HashValue;
}
@@ -104,13 +145,13 @@ class SDDbgInfo {
BumpPtrAllocator Alloc;
SmallVector<SDDbgValue*, 32> DbgValues;
SmallVector<SDDbgValue*, 32> ByvalParmDbgValues;
- typedef DenseMap<const SDNode*, SmallVector<SDDbgValue*, 2> > DbgValMapType;
+ using DbgValMapType = DenseMap<const SDNode *, SmallVector<SDDbgValue *, 2>>;
DbgValMapType DbgValMap;
- void operator=(const SDDbgInfo&) = delete;
- SDDbgInfo(const SDDbgInfo&) = delete;
public:
- SDDbgInfo() {}
+ SDDbgInfo() = default;
+ SDDbgInfo(const SDDbgInfo &) = delete;
+ SDDbgInfo &operator=(const SDDbgInfo &) = delete;
void add(SDDbgValue *V, const SDNode *Node, bool isParameter) {
if (isParameter) {
@@ -144,14 +185,14 @@ public:
return ArrayRef<SDDbgValue*>();
}
- typedef SmallVectorImpl<SDDbgValue*>::iterator DbgIterator;
+ using DbgIterator = SmallVectorImpl<SDDbgValue*>::iterator;
+
DbgIterator DbgBegin() { return DbgValues.begin(); }
DbgIterator DbgEnd() { return DbgValues.end(); }
DbgIterator ByvalParmDbgBegin() { return ByvalParmDbgValues.begin(); }
DbgIterator ByvalParmDbgEnd() { return ByvalParmDbgValues.end(); }
};
-class SelectionDAG;
void checkForCycles(const SelectionDAG *DAG, bool force = false);
/// This is used to represent a portion of an LLVM function in a low-level
@@ -167,8 +208,8 @@ void checkForCycles(const SelectionDAG *DAG, bool force = false);
///
class SelectionDAG {
const TargetMachine &TM;
- const SelectionDAGTargetInfo *TSI;
- const TargetLowering *TLI;
+ const SelectionDAGTargetInfo *TSI = nullptr;
+ const TargetLowering *TLI = nullptr;
MachineFunction *MF;
LLVMContext *Context;
CodeGenOpt::Level OptLevel;
@@ -188,9 +229,9 @@ class SelectionDAG {
/// The AllocatorType for allocating SDNodes. We use
/// pool allocation with recycling.
- typedef RecyclingAllocator<BumpPtrAllocator, SDNode, sizeof(LargestSDNode),
- alignof(MostAlignedSDNode)>
- NodeAllocatorType;
+ using NodeAllocatorType = RecyclingAllocator<BumpPtrAllocator, SDNode,
+ sizeof(LargestSDNode),
+ alignof(MostAlignedSDNode)>;
/// Pool allocation for nodes.
NodeAllocatorType NodeAllocator;
@@ -243,9 +284,11 @@ public:
struct DAGNodeDeletedListener : public DAGUpdateListener {
std::function<void(SDNode *, SDNode *)> Callback;
+
DAGNodeDeletedListener(SelectionDAG &DAG,
std::function<void(SDNode *, SDNode *)> Callback)
: DAGUpdateListener(DAG), Callback(std::move(Callback)) {}
+
void NodeDeleted(SDNode *N, SDNode *E) override { Callback(N, E); }
};
@@ -254,7 +297,7 @@ public:
/// have legal types. This is important after type legalization since
/// any illegally typed nodes generated after this point will not experience
/// type legalization.
- bool NewNodesMustHaveLegalTypes;
+ bool NewNodesMustHaveLegalTypes = false;
private:
/// DAGUpdateListener is a friend so it can manipulate the listener stack.
@@ -262,7 +305,7 @@ private:
/// Linked list of registered DAGUpdateListener instances.
/// This stack is maintained by DAGUpdateListener RAII.
- DAGUpdateListener *UpdateListeners;
+ DAGUpdateListener *UpdateListeners = nullptr;
/// Implementation of setSubgraphColor.
/// Return whether we had to truncate the search.
@@ -316,11 +359,10 @@ private:
Node->OperandList = nullptr;
}
- void operator=(const SelectionDAG&) = delete;
- SelectionDAG(const SelectionDAG&) = delete;
-
public:
- explicit SelectionDAG(const TargetMachine &TM, llvm::CodeGenOpt::Level);
+ explicit SelectionDAG(const TargetMachine &TM, CodeGenOpt::Level);
+ SelectionDAG(const SelectionDAG &) = delete;
+ SelectionDAG &operator=(const SelectionDAG &) = delete;
~SelectionDAG();
/// Prepare this SelectionDAG to process code in the given MachineFunction.
@@ -364,12 +406,16 @@ public:
/// Convenience for setting subgraph color attribute.
void setSubgraphColor(SDNode *N, const char *Color);
- typedef ilist<SDNode>::const_iterator allnodes_const_iterator;
+ using allnodes_const_iterator = ilist<SDNode>::const_iterator;
+
allnodes_const_iterator allnodes_begin() const { return AllNodes.begin(); }
allnodes_const_iterator allnodes_end() const { return AllNodes.end(); }
- typedef ilist<SDNode>::iterator allnodes_iterator;
+
+ using allnodes_iterator = ilist<SDNode>::iterator;
+
allnodes_iterator allnodes_begin() { return AllNodes.begin(); }
allnodes_iterator allnodes_end() { return AllNodes.end(); }
+
ilist<SDNode>::size_type allnodes_size() const {
return AllNodes.size();
}
@@ -475,7 +521,6 @@ public:
//===--------------------------------------------------------------------===//
// Node creation methods.
- //
/// \brief Create a ConstantSDNode wrapping a constant value.
/// If VT is a vector type, the constant is splatted into a BUILD_VECTOR.
@@ -1251,9 +1296,11 @@ public:
SDDbgInfo::DbgIterator DbgBegin() { return DbgInfo->DbgBegin(); }
SDDbgInfo::DbgIterator DbgEnd() { return DbgInfo->DbgEnd(); }
+
SDDbgInfo::DbgIterator ByvalParmDbgBegin() {
return DbgInfo->ByvalParmDbgBegin();
}
+
SDDbgInfo::DbgIterator ByvalParmDbgEnd() {
return DbgInfo->ByvalParmDbgEnd();
}
@@ -1479,10 +1526,12 @@ private:
};
template <> struct GraphTraits<SelectionDAG*> : public GraphTraits<SDNode*> {
- typedef pointer_iterator<SelectionDAG::allnodes_iterator> nodes_iterator;
+ using nodes_iterator = pointer_iterator<SelectionDAG::allnodes_iterator>;
+
static nodes_iterator nodes_begin(SelectionDAG *G) {
return nodes_iterator(G->allnodes_begin());
}
+
static nodes_iterator nodes_end(SelectionDAG *G) {
return nodes_iterator(G->allnodes_end());
}
@@ -1493,7 +1542,6 @@ SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs,
ArrayRef<SDValue> Ops,
const SDLoc &dl, EVT MemVT,
MachineMemOperand *MMO) {
-
/// Compose node ID and try to find an existing node.
FoldingSetNodeID ID;
unsigned Opcode =
@@ -1524,6 +1572,6 @@ SDValue SelectionDAG::getTargetMemSDNode(SDVTList VTs,
return SDValue(N, 0);
}
-} // end namespace llvm
+} // end namespace llvm
-#endif
+#endif // LLVM_CODEGEN_SELECTIONDAG_H
diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h
index 973c5aac5281..3a4feb322092 100644
--- a/include/llvm/CodeGen/SelectionDAGNodes.h
+++ b/include/llvm/CodeGen/SelectionDAGNodes.h
@@ -37,6 +37,7 @@
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Metadata.h"
#include "llvm/Support/AlignOf.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/Casting.h"
@@ -53,14 +54,18 @@
namespace llvm {
-class SelectionDAG;
+class APInt;
+class Constant;
+template <typename T> struct DenseMapInfo;
class GlobalValue;
class MachineBasicBlock;
class MachineConstantPoolValue;
+class MCSymbol;
+class raw_ostream;
class SDNode;
+class SelectionDAG;
+class Type;
class Value;
-class MCSymbol;
-template <typename T> struct DenseMapInfo;
void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr,
bool force = false);
@@ -229,13 +234,15 @@ template <> struct isPodLike<SDValue> { static const bool value = true; };
/// Allow casting operators to work directly on
/// SDValues as if they were SDNode*'s.
template<> struct simplify_type<SDValue> {
- typedef SDNode* SimpleType;
+ using SimpleType = SDNode *;
+
static SimpleType getSimplifiedValue(SDValue &Val) {
return Val.getNode();
}
};
template<> struct simplify_type<const SDValue> {
- typedef /*const*/ SDNode* SimpleType;
+ using SimpleType = /*const*/ SDNode *;
+
static SimpleType getSimplifiedValue(const SDValue &Val) {
return Val.getNode();
}
@@ -330,7 +337,8 @@ private:
/// simplify_type specializations - Allow casting operators to work directly on
/// SDValues as if they were SDNode*'s.
template<> struct simplify_type<SDUse> {
- typedef SDNode* SimpleType;
+ using SimpleType = SDNode *;
+
static SimpleType getSimplifiedValue(SDUse &Val) {
return Val.getNode();
}
@@ -695,10 +703,10 @@ public:
explicit use_iterator(SDUse *op) : Op(op) {}
public:
- typedef std::iterator<std::forward_iterator_tag,
- SDUse, ptrdiff_t>::reference reference;
- typedef std::iterator<std::forward_iterator_tag,
- SDUse, ptrdiff_t>::pointer pointer;
+ using reference = std::iterator<std::forward_iterator_tag,
+ SDUse, ptrdiff_t>::reference;
+ using pointer = std::iterator<std::forward_iterator_tag,
+ SDUse, ptrdiff_t>::pointer;
use_iterator() = default;
use_iterator(const use_iterator &I) : Op(I.Op) {}
@@ -824,7 +832,7 @@ public:
return OperandList[Num];
}
- typedef SDUse* op_iterator;
+ using op_iterator = SDUse *;
op_iterator op_begin() const { return OperandList; }
op_iterator op_end() const { return OperandList+NumOperands; }
@@ -896,7 +904,8 @@ public:
return getValueType(ResNo).getSizeInBits();
}
- typedef const EVT* value_iterator;
+ using value_iterator = const EVT *;
+
value_iterator value_begin() const { return ValueList; }
value_iterator value_end() const { return ValueList+NumValues; }
@@ -1822,8 +1831,7 @@ class BlockAddressSDNode : public SDNode {
BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba,
int64_t o, unsigned char Flags)
: SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)),
- BA(ba), Offset(o), TargetFlags(Flags) {
- }
+ BA(ba), Offset(o), TargetFlags(Flags) {}
public:
const BlockAddress *getBlockAddress() const { return BA; }
@@ -2154,7 +2162,7 @@ public:
/// instruction selection proper phase.
class MachineSDNode : public SDNode {
public:
- typedef MachineMemOperand **mmo_iterator;
+ using mmo_iterator = MachineMemOperand **;
private:
friend class SelectionDAG;
@@ -2226,8 +2234,8 @@ public:
};
template <> struct GraphTraits<SDNode*> {
- typedef SDNode *NodeRef;
- typedef SDNodeIterator ChildIteratorType;
+ using NodeRef = SDNode *;
+ using ChildIteratorType = SDNodeIterator;
static NodeRef getEntryNode(SDNode *N) { return N; }
@@ -2244,12 +2252,12 @@ template <> struct GraphTraits<SDNode*> {
///
/// This needs to be a union because the largest node differs on 32 bit systems
/// with 4 and 8 byte pointer alignment, respectively.
-typedef AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
- BlockAddressSDNode, GlobalAddressSDNode>
- LargestSDNode;
+using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode,
+ BlockAddressSDNode,
+ GlobalAddressSDNode>;
/// The SDNode class with the greatest alignment requirement.
-typedef GlobalAddressSDNode MostAlignedSDNode;
+using MostAlignedSDNode = GlobalAddressSDNode;
namespace ISD {
diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h
index 14fc3a499a08..a275b2721b44 100644
--- a/include/llvm/CodeGen/SlotIndexes.h
+++ b/include/llvm/CodeGen/SlotIndexes.h
@@ -20,17 +20,26 @@
#define LLVM_CODEGEN_SLOTINDEXES_H
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/ilist.h"
#include "llvm/ADT/IntervalMap.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/ilist.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
+#include "llvm/Pass.h"
#include "llvm/Support/Allocator.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <utility>
namespace llvm {
+class raw_ostream;
+
/// This class represents an entry in the slot index list held in the
/// SlotIndexes pass. It should not be used directly. See the
/// SlotIndex & SlotIndexes classes for the public interface to this
@@ -40,7 +49,6 @@ namespace llvm {
unsigned index;
public:
-
IndexListEntry(MachineInstr *mi, unsigned index) : mi(mi), index(index) {}
MachineInstr* getInstr() const { return mi; }
@@ -301,7 +309,7 @@ namespace llvm {
return os;
}
- typedef std::pair<SlotIndex, MachineBasicBlock*> IdxMBBPair;
+ using IdxMBBPair = std::pair<SlotIndex, MachineBasicBlock *>;
inline bool operator<(SlotIndex V, const IdxMBBPair &IM) {
return V < IM.first;
@@ -325,7 +333,7 @@ namespace llvm {
// IndexListEntry allocator.
BumpPtrAllocator ileAllocator;
- typedef ilist<IndexListEntry> IndexList;
+ using IndexList = ilist<IndexListEntry>;
IndexList indexList;
#ifdef EXPENSIVE_CHECKS
@@ -334,7 +342,7 @@ namespace llvm {
MachineFunction *mf;
- typedef DenseMap<const MachineInstr*, SlotIndex> Mi2IndexMap;
+ using Mi2IndexMap = DenseMap<const MachineInstr *, SlotIndex>;
Mi2IndexMap mi2iMap;
/// MBBRanges - Map MBB number to (start, stop) indexes.
@@ -436,7 +444,7 @@ namespace llvm {
const MachineBasicBlock *MBB = MI.getParent();
assert(MBB && "MI must be inserted inna basic block");
MachineBasicBlock::const_iterator I = MI, B = MBB->begin();
- for (;;) {
+ while (true) {
if (I == B)
return getMBBStartIdx(MBB);
--I;
@@ -453,7 +461,7 @@ namespace llvm {
const MachineBasicBlock *MBB = MI.getParent();
assert(MBB && "MI must be inserted inna basic block");
MachineBasicBlock::const_iterator I = MI, E = MBB->end();
- for (;;) {
+ while (true) {
++I;
if (I == E)
return getMBBEndIdx(MBB);
@@ -497,21 +505,25 @@ namespace llvm {
/// Iterator over the idx2MBBMap (sorted pairs of slot index of basic block
/// begin and basic block)
- typedef SmallVectorImpl<IdxMBBPair>::const_iterator MBBIndexIterator;
+ using MBBIndexIterator = SmallVectorImpl<IdxMBBPair>::const_iterator;
+
/// Move iterator to the next IdxMBBPair where the SlotIndex is greater or
/// equal to \p To.
MBBIndexIterator advanceMBBIndex(MBBIndexIterator I, SlotIndex To) const {
return std::lower_bound(I, idx2MBBMap.end(), To);
}
+
/// Get an iterator pointing to the IdxMBBPair with the biggest SlotIndex
/// that is greater or equal to \p Idx.
MBBIndexIterator findMBBIndex(SlotIndex Idx) const {
return advanceMBBIndex(idx2MBBMap.begin(), Idx);
}
+
/// Returns an iterator for the begin of the idx2MBBMap.
MBBIndexIterator MBBIndexBegin() const {
return idx2MBBMap.begin();
}
+
/// Return an iterator for the end of the idx2MBBMap.
MBBIndexIterator MBBIndexEnd() const {
return idx2MBBMap.end();
diff --git a/include/llvm/CodeGen/StackMaps.h b/include/llvm/CodeGen/StackMaps.h
index a18936feea7b..8263946ed928 100644
--- a/include/llvm/CodeGen/StackMaps.h
+++ b/include/llvm/CodeGen/StackMaps.h
@@ -145,21 +145,27 @@ public:
///
/// Statepoint operands take the form:
/// <id>, <num patch bytes >, <num call arguments>, <call target>,
-/// [call arguments], <StackMaps::ConstantOp>, <calling convention>,
+/// [call arguments...],
+/// <StackMaps::ConstantOp>, <calling convention>,
/// <StackMaps::ConstantOp>, <statepoint flags>,
-/// <StackMaps::ConstantOp>, <num other args>, [other args],
-/// [gc values]
+/// <StackMaps::ConstantOp>, <num deopt args>, [deopt args...],
+/// <gc base/derived pairs...> <gc allocas...>
+/// Note that the last two sets of arguments are not currently length
+/// prefixed.
class StatepointOpers {
-private:
+ // TODO:: we should change the STATEPOINT representation so that CC and
+ // Flags should be part of meta operands, with args and deopt operands, and
+ // gc operands all prefixed by their length and a type code. This would be
+ // much more consistent.
+public:
// These values are aboolute offsets into the operands of the statepoint
// instruction.
enum { IDPos, NBytesPos, NCallArgsPos, CallTargetPos, MetaEnd };
// These values are relative offests from the start of the statepoint meta
// arguments (i.e. the end of the call arguments).
- enum { CCOffset = 1, FlagsOffset = 3, NumVMSArgsOffset = 5 };
+ enum { CCOffset = 1, FlagsOffset = 3, NumDeoptOperandsOffset = 5 };
-public:
explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {}
/// Get starting index of non call related arguments
@@ -220,7 +226,7 @@ public:
// OpTypes are used to encode information about the following logical
// operand (which may consist of several MachineOperands) for the
// OpParser.
- typedef enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp } OpType;
+ using OpType = enum { DirectMemRefOp, IndirectMemRefOp, ConstantOp };
StackMaps(AsmPrinter &AP);
@@ -248,9 +254,10 @@ public:
private:
static const char *WSMP;
- typedef SmallVector<Location, 8> LocationVec;
- typedef SmallVector<LiveOutReg, 8> LiveOutVec;
- typedef MapVector<uint64_t, uint64_t> ConstantPool;
+
+ using LocationVec = SmallVector<Location, 8>;
+ using LiveOutVec = SmallVector<LiveOutReg, 8>;
+ using ConstantPool = MapVector<uint64_t, uint64_t>;
struct FunctionInfo {
uint64_t StackSize = 0;
@@ -273,8 +280,8 @@ private:
LiveOuts(std::move(LiveOuts)) {}
};
- typedef MapVector<const MCSymbol *, FunctionInfo> FnInfoMap;
- typedef std::vector<CallsiteInfo> CallsiteInfoList;
+ using FnInfoMap = MapVector<const MCSymbol *, FunctionInfo>;
+ using CallsiteInfoList = std::vector<CallsiteInfo>;
AsmPrinter &AP;
CallsiteInfoList CSInfos;
diff --git a/include/llvm/CodeGen/TargetSchedule.h b/include/llvm/CodeGen/TargetSchedule.h
index 1992412120aa..4365fca74bf1 100644
--- a/include/llvm/CodeGen/TargetSchedule.h
+++ b/include/llvm/CodeGen/TargetSchedule.h
@@ -55,6 +55,9 @@ public:
/// Return the MCSchedClassDesc for this instruction.
const MCSchedClassDesc *resolveSchedClass(const MachineInstr *MI) const;
+ /// \brief TargetSubtargetInfo getter.
+ const TargetSubtargetInfo *getSubtargetInfo() const { return STI; }
+
/// \brief TargetInstrInfo getter.
const TargetInstrInfo *getInstrInfo() const { return TII; }
diff --git a/include/llvm/CodeGen/WinEHFuncInfo.h b/include/llvm/CodeGen/WinEHFuncInfo.h
index dd730495a5f6..8043024626a0 100644
--- a/include/llvm/CodeGen/WinEHFuncInfo.h
+++ b/include/llvm/CodeGen/WinEHFuncInfo.h
@@ -1,4 +1,4 @@
-//===-- llvm/CodeGen/WinEHFuncInfo.h ----------------------------*- C++ -*-===//
+//===- llvm/CodeGen/WinEHFuncInfo.h -----------------------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -17,28 +17,26 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/TinyPtrVector.h"
-#include "llvm/IR/Instructions.h"
+#include <cstdint>
+#include <limits>
+#include <utility>
namespace llvm {
+
class AllocaInst;
class BasicBlock;
-class CatchReturnInst;
-class Constant;
+class FuncletPadInst;
class Function;
class GlobalVariable;
+class Instruction;
class InvokeInst;
-class IntrinsicInst;
-class LandingPadInst;
-class MCExpr;
-class MCSymbol;
class MachineBasicBlock;
-class Value;
+class MCSymbol;
// The following structs respresent the .xdata tables for various
// Windows-related EH personalities.
-typedef PointerUnion<const BasicBlock *, MachineBasicBlock *> MBBOrBasicBlock;
+using MBBOrBasicBlock = PointerUnion<const BasicBlock *, MachineBasicBlock *>;
struct CxxUnwindMapEntry {
int ToState;
@@ -99,18 +97,18 @@ struct WinEHFuncInfo {
SmallVector<WinEHTryBlockMapEntry, 4> TryBlockMap;
SmallVector<SEHUnwindMapEntry, 4> SEHUnwindMap;
SmallVector<ClrEHUnwindMapEntry, 4> ClrEHUnwindMap;
- int UnwindHelpFrameIdx = INT_MAX;
- int PSPSymFrameIdx = INT_MAX;
+ int UnwindHelpFrameIdx = std::numeric_limits<int>::max();
+ int PSPSymFrameIdx = std::numeric_limits<int>::max();
int getLastStateNumber() const { return CxxUnwindMap.size() - 1; }
void addIPToStateRange(const InvokeInst *II, MCSymbol *InvokeBegin,
MCSymbol *InvokeEnd);
- int EHRegNodeFrameIndex = INT_MAX;
- int EHRegNodeEndOffset = INT_MAX;
- int EHGuardFrameIndex = INT_MAX;
- int SEHSetFrameOffset = INT_MAX;
+ int EHRegNodeFrameIndex = std::numeric_limits<int>::max();
+ int EHRegNodeEndOffset = std::numeric_limits<int>::max();
+ int EHGuardFrameIndex = std::numeric_limits<int>::max();
+ int SEHSetFrameOffset = std::numeric_limits<int>::max();
WinEHFuncInfo();
};
@@ -125,5 +123,7 @@ void calculateSEHStateNumbers(const Function *ParentFn,
WinEHFuncInfo &FuncInfo);
void calculateClrEHStateNumbers(const Function *Fn, WinEHFuncInfo &FuncInfo);
-}
+
+} // end namespace llvm
+
#endif // LLVM_CODEGEN_WINEHFUNCINFO_H
diff --git a/include/llvm/DebugInfo/CodeView/CodeView.h b/include/llvm/DebugInfo/CodeView/CodeView.h
index 4e8c8feb7a12..9890263ae2d2 100644
--- a/include/llvm/DebugInfo/CodeView/CodeView.h
+++ b/include/llvm/DebugInfo/CodeView/CodeView.h
@@ -574,6 +574,14 @@ struct FrameData {
IsFunctionStart = 1 << 2,
};
};
+
+enum class CodeViewContainer { ObjectFile, Pdb };
+
+inline uint32_t alignOf(CodeViewContainer Container) {
+ if (Container == CodeViewContainer::ObjectFile)
+ return 1;
+ return 4;
+}
}
}
diff --git a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
index b3976826a316..db944c7057f7 100644
--- a/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
+++ b/include/llvm/DebugInfo/CodeView/CodeViewRecordIO.h
@@ -136,6 +136,7 @@ public:
Error mapByteVectorTail(ArrayRef<uint8_t> &Bytes);
Error mapByteVectorTail(std::vector<uint8_t> &Bytes);
+ Error padToAlignment(uint32_t Align);
Error skipPadding();
private:
diff --git a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
index e7036033d2d9..c958a95ee6de 100644
--- a/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h
@@ -60,8 +60,8 @@ public:
Error initialize(BinaryStreamReader Reader);
Error initialize(BinaryStreamRef Stream);
- Iterator begin() { return Checksums.begin(); }
- Iterator end() { return Checksums.end(); }
+ Iterator begin() const { return Checksums.begin(); }
+ Iterator end() const { return Checksums.end(); }
const FileChecksumArray &getArray() const { return Checksums; }
diff --git a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
index e2cfc3c99233..60440700c265 100644
--- a/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
+++ b/include/llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h
@@ -74,8 +74,13 @@ private:
class DebugInlineeLinesSubsection final : public DebugSubsection {
public:
+ struct Entry {
+ std::vector<support::ulittle32_t> ExtraFiles;
+ InlineeSourceLineHeader Header;
+ };
+
DebugInlineeLinesSubsection(DebugChecksumsSubsection &Checksums,
- bool HasExtraFiles);
+ bool HasExtraFiles = false);
static bool classof(const DebugSubsection *S) {
return S->kind() == DebugSubsectionKind::InlineeLines;
@@ -87,16 +92,18 @@ public:
void addInlineSite(TypeIndex FuncId, StringRef FileName, uint32_t SourceLine);
void addExtraFile(StringRef FileName);
+ bool hasExtraFiles() const { return HasExtraFiles; }
+ void setHasExtraFiles(bool Has) { HasExtraFiles = Has; }
+
+ std::vector<Entry>::const_iterator begin() const { return Entries.begin(); }
+ std::vector<Entry>::const_iterator end() const { return Entries.end(); }
+
private:
DebugChecksumsSubsection &Checksums;
bool HasExtraFiles = false;
uint32_t ExtraFileCount = 0;
- struct Entry {
- std::vector<support::ulittle32_t> ExtraFiles;
- InlineeSourceLineHeader Header;
- };
std::vector<Entry> Entries;
};
}
diff --git a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
index b2e1131e5968..847259c5ceac 100644
--- a/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
+++ b/include/llvm/DebugInfo/CodeView/DebugSubsectionRecord.h
@@ -31,28 +31,32 @@ struct DebugSubsectionHeader {
class DebugSubsectionRecord {
public:
DebugSubsectionRecord();
- DebugSubsectionRecord(DebugSubsectionKind Kind, BinaryStreamRef Data);
+ DebugSubsectionRecord(DebugSubsectionKind Kind, BinaryStreamRef Data,
+ CodeViewContainer Container);
- static Error initialize(BinaryStreamRef Stream, DebugSubsectionRecord &Info);
+ static Error initialize(BinaryStreamRef Stream, DebugSubsectionRecord &Info,
+ CodeViewContainer Container);
uint32_t getRecordLength() const;
DebugSubsectionKind kind() const;
BinaryStreamRef getRecordData() const;
private:
+ CodeViewContainer Container;
DebugSubsectionKind Kind;
BinaryStreamRef Data;
};
class DebugSubsectionRecordBuilder {
public:
- DebugSubsectionRecordBuilder(DebugSubsectionKind Kind, DebugSubsection &Frag);
+ DebugSubsectionRecordBuilder(std::unique_ptr<DebugSubsection> Subsection,
+ CodeViewContainer Container);
uint32_t calculateSerializedLength();
Error commit(BinaryStreamWriter &Writer);
private:
- DebugSubsectionKind Kind;
- DebugSubsection &Frag;
+ std::unique_ptr<DebugSubsection> Subsection;
+ CodeViewContainer Container;
};
} // namespace codeview
@@ -62,7 +66,12 @@ template <> struct VarStreamArrayExtractor<codeview::DebugSubsectionRecord> {
static Error extract(BinaryStreamRef Stream, uint32_t &Length,
codeview::DebugSubsectionRecord &Info) {
- if (auto EC = codeview::DebugSubsectionRecord::initialize(Stream, Info))
+ // FIXME: We need to pass the container type through to this function, but
+ // VarStreamArray doesn't easily support stateful contexts. In practice
+ // this isn't super important since the subsection header describes its
+ // length and we can just skip it. It's more important when writing.
+ if (auto EC = codeview::DebugSubsectionRecord::initialize(
+ Stream, Info, codeview::CodeViewContainer::Pdb))
return EC;
Length = Info.getRecordLength();
return Error::success();
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
index 428ff153d5d1..7080b0480757 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDeserializer.h
@@ -24,9 +24,9 @@ namespace codeview {
class SymbolVisitorDelegate;
class SymbolDeserializer : public SymbolVisitorCallbacks {
struct MappingInfo {
- explicit MappingInfo(ArrayRef<uint8_t> RecordData)
+ MappingInfo(ArrayRef<uint8_t> RecordData, CodeViewContainer Container)
: Stream(RecordData, llvm::support::little), Reader(Stream),
- Mapping(Reader) {}
+ Mapping(Reader, Container) {}
BinaryByteStream Stream;
BinaryStreamReader Reader;
@@ -35,7 +35,9 @@ class SymbolDeserializer : public SymbolVisitorCallbacks {
public:
template <typename T> static Error deserializeAs(CVSymbol Symbol, T &Record) {
- SymbolDeserializer S(nullptr);
+ // If we're just deserializing one record, then don't worry about alignment
+ // as there's nothing that comes after.
+ SymbolDeserializer S(nullptr, CodeViewContainer::ObjectFile);
if (auto EC = S.visitSymbolBegin(Symbol))
return EC;
if (auto EC = S.visitKnownRecord(Symbol, Record))
@@ -45,12 +47,13 @@ public:
return Error::success();
}
- explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate)
- : Delegate(Delegate) {}
+ explicit SymbolDeserializer(SymbolVisitorDelegate *Delegate,
+ CodeViewContainer Container)
+ : Delegate(Delegate), Container(Container) {}
Error visitSymbolBegin(CVSymbol &Record) override {
assert(!Mapping && "Already in a symbol mapping!");
- Mapping = llvm::make_unique<MappingInfo>(Record.content());
+ Mapping = llvm::make_unique<MappingInfo>(Record.content(), Container);
return Mapping->Mapping.visitSymbolBegin(Record);
}
Error visitSymbolEnd(CVSymbol &Record) override {
@@ -78,6 +81,7 @@ private:
}
SymbolVisitorDelegate *Delegate;
+ CodeViewContainer Container;
std::unique_ptr<MappingInfo> Mapping;
};
}
diff --git a/include/llvm/DebugInfo/CodeView/SymbolDumper.h b/include/llvm/DebugInfo/CodeView/SymbolDumper.h
index e91065dcf87e..293daa851bdd 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolDumper.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolDumper.h
@@ -26,9 +26,11 @@ class TypeCollection;
class CVSymbolDumper {
public:
CVSymbolDumper(ScopedPrinter &W, TypeCollection &Types,
+ CodeViewContainer Container,
std::unique_ptr<SymbolDumpDelegate> ObjDelegate,
bool PrintRecordBytes)
- : W(W), Types(Types), ObjDelegate(std::move(ObjDelegate)),
+ : W(W), Types(Types), Container(Container),
+ ObjDelegate(std::move(ObjDelegate)),
PrintRecordBytes(PrintRecordBytes) {}
/// Dumps one type record. Returns false if there was a type parsing error,
@@ -44,6 +46,7 @@ public:
private:
ScopedPrinter &W;
TypeCollection &Types;
+ CodeViewContainer Container;
std::unique_ptr<SymbolDumpDelegate> ObjDelegate;
bool PrintRecordBytes;
diff --git a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
index 5d072a3b2723..391e8f127665 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolRecordMapping.h
@@ -20,8 +20,12 @@ class BinaryStreamWriter;
namespace codeview {
class SymbolRecordMapping : public SymbolVisitorCallbacks {
public:
- explicit SymbolRecordMapping(BinaryStreamReader &Reader) : IO(Reader) {}
- explicit SymbolRecordMapping(BinaryStreamWriter &Writer) : IO(Writer) {}
+ explicit SymbolRecordMapping(BinaryStreamReader &Reader,
+ CodeViewContainer Container)
+ : IO(Reader), Container(Container) {}
+ explicit SymbolRecordMapping(BinaryStreamWriter &Writer,
+ CodeViewContainer Container)
+ : IO(Writer), Container(Container) {}
Error visitSymbolBegin(CVSymbol &Record) override;
Error visitSymbolEnd(CVSymbol &Record) override;
@@ -35,6 +39,7 @@ private:
Optional<SymbolKind> Kind;
CodeViewRecordIO IO;
+ CodeViewContainer Container;
};
}
}
diff --git a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
index a8fe1a3ae1d0..42adbdb4e20f 100644
--- a/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
+++ b/include/llvm/DebugInfo/CodeView/SymbolSerializer.h
@@ -46,17 +46,18 @@ class SymbolSerializer : public SymbolVisitorCallbacks {
public:
template <typename SymType>
- static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage) {
+ static CVSymbol writeOneSymbol(SymType &Sym, BumpPtrAllocator &Storage,
+ CodeViewContainer Container) {
CVSymbol Result;
Result.Type = static_cast<SymbolKind>(Sym.Kind);
- SymbolSerializer Serializer(Storage);
+ SymbolSerializer Serializer(Storage, Container);
consumeError(Serializer.visitSymbolBegin(Result));
consumeError(Serializer.visitKnownRecord(Result, Sym));
consumeError(Serializer.visitSymbolEnd(Result));
return Result;
}
- explicit SymbolSerializer(BumpPtrAllocator &Storage);
+ SymbolSerializer(BumpPtrAllocator &Storage, CodeViewContainer Container);
virtual Error visitSymbolBegin(CVSymbol &Record) override;
virtual Error visitSymbolEnd(CVSymbol &Record) override;
diff --git a/include/llvm/DebugInfo/MSF/MappedBlockStream.h b/include/llvm/DebugInfo/MSF/MappedBlockStream.h
index d68f5f70c83e..36dce393fc66 100644
--- a/include/llvm/DebugInfo/MSF/MappedBlockStream.h
+++ b/include/llvm/DebugInfo/MSF/MappedBlockStream.h
@@ -44,17 +44,19 @@ class MappedBlockStream : public BinaryStream {
public:
static std::unique_ptr<MappedBlockStream>
createStream(uint32_t BlockSize, const MSFStreamLayout &Layout,
- BinaryStreamRef MsfData);
+ BinaryStreamRef MsfData, BumpPtrAllocator &Allocator);
static std::unique_ptr<MappedBlockStream>
createIndexedStream(const MSFLayout &Layout, BinaryStreamRef MsfData,
- uint32_t StreamIndex);
+ uint32_t StreamIndex, BumpPtrAllocator &Allocator);
static std::unique_ptr<MappedBlockStream>
- createFpmStream(const MSFLayout &Layout, BinaryStreamRef MsfData);
+ createFpmStream(const MSFLayout &Layout, BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator);
static std::unique_ptr<MappedBlockStream>
- createDirectoryStream(const MSFLayout &Layout, BinaryStreamRef MsfData);
+ createDirectoryStream(const MSFLayout &Layout, BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator);
llvm::support::endianness getEndian() const override {
return llvm::support::little;
@@ -67,9 +69,7 @@ public:
uint32_t getLength() override;
- uint32_t getNumBytesCopied() const;
-
- llvm::BumpPtrAllocator &getAllocator() { return Pool; }
+ llvm::BumpPtrAllocator &getAllocator() { return Allocator; }
void invalidateCache();
@@ -79,7 +79,7 @@ public:
protected:
MappedBlockStream(uint32_t BlockSize, const MSFStreamLayout &StreamLayout,
- BinaryStreamRef MsfData);
+ BinaryStreamRef MsfData, BumpPtrAllocator &Allocator);
private:
const MSFStreamLayout &getStreamLayout() const { return StreamLayout; }
@@ -94,7 +94,15 @@ private:
BinaryStreamRef MsfData;
typedef MutableArrayRef<uint8_t> CacheEntry;
- llvm::BumpPtrAllocator Pool;
+
+ // We just store the allocator by reference. We use this to allocate
+ // contiguous memory for things like arrays or strings that cross a block
+ // boundary, and this memory is expected to outlive the stream. For example,
+ // someone could create a stream, read some stuff, then close the stream, and
+ // we would like outstanding references to fields to remain valid since the
+ // entire file is mapped anyway. Because of that, the user must supply the
+ // allocator to allocate broken records from.
+ BumpPtrAllocator &Allocator;
DenseMap<uint32_t, std::vector<CacheEntry>> CacheMap;
};
@@ -102,18 +110,20 @@ class WritableMappedBlockStream : public WritableBinaryStream {
public:
static std::unique_ptr<WritableMappedBlockStream>
createStream(uint32_t BlockSize, const MSFStreamLayout &Layout,
- WritableBinaryStreamRef MsfData);
+ WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator);
static std::unique_ptr<WritableMappedBlockStream>
createIndexedStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData,
- uint32_t StreamIndex);
+ uint32_t StreamIndex, BumpPtrAllocator &Allocator);
static std::unique_ptr<WritableMappedBlockStream>
createDirectoryStream(const MSFLayout &Layout,
- WritableBinaryStreamRef MsfData);
+ WritableBinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator);
static std::unique_ptr<WritableMappedBlockStream>
- createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData);
+ createFpmStream(const MSFLayout &Layout, WritableBinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator);
llvm::support::endianness getEndian() const override {
return llvm::support::little;
@@ -139,7 +149,8 @@ public:
protected:
WritableMappedBlockStream(uint32_t BlockSize,
const MSFStreamLayout &StreamLayout,
- WritableBinaryStreamRef MsfData);
+ WritableBinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator);
private:
MappedBlockStream ReadInterface;
diff --git a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
index e5858d0f45e3..2ff166b24e68 100644
--- a/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
+++ b/include/llvm/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.h
@@ -49,11 +49,8 @@ public:
void setObjFileName(StringRef Name);
void addSymbol(codeview::CVSymbol Symbol);
- void addC13Fragment(std::unique_ptr<codeview::DebugLinesSubsection> Lines);
- void addC13Fragment(
- std::unique_ptr<codeview::DebugInlineeLinesSubsection> Inlinees);
- void setC13FileChecksums(
- std::unique_ptr<codeview::DebugChecksumsSubsection> Checksums);
+ void
+ addDebugSubsection(std::unique_ptr<codeview::DebugSubsection> Subsection);
uint16_t getStreamIndex() const;
StringRef getModuleName() const { return ModuleName; }
@@ -83,10 +80,6 @@ private:
std::vector<std::string> SourceFiles;
std::vector<codeview::CVSymbol> Symbols;
- std::unique_ptr<codeview::DebugChecksumsSubsection> ChecksumInfo;
- std::vector<std::unique_ptr<codeview::DebugLinesSubsection>> LineInfo;
- std::vector<std::unique_ptr<codeview::DebugInlineeLinesSubsection>> Inlinees;
-
std::vector<std::unique_ptr<codeview::DebugSubsectionRecordBuilder>>
C13Builders;
diff --git a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
index 822ce3ce13d3..a8121978d882 100644
--- a/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/ModuleDebugStream.h
@@ -12,6 +12,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/DebugInfo/CodeView/CVRecord.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
#include "llvm/DebugInfo/MSF/MappedBlockStream.h"
@@ -25,7 +26,7 @@ class PDBFile;
class DbiModuleDescriptor;
class ModuleDebugStreamRef {
- typedef codeview::DebugSubsectionArray::Iterator LinesAndChecksumsIterator;
+ typedef codeview::DebugSubsectionArray::Iterator DebugSubsectionIterator;
public:
ModuleDebugStreamRef(const DbiModuleDescriptor &Module,
@@ -39,12 +40,15 @@ public:
iterator_range<codeview::CVSymbolArray::Iterator>
symbols(bool *HadError) const;
- llvm::iterator_range<LinesAndChecksumsIterator> linesAndChecksums() const;
+ llvm::iterator_range<DebugSubsectionIterator> subsections() const;
- bool hasLineInfo() const;
+ bool hasDebugSubsections() const;
Error commit();
+ Expected<codeview::DebugChecksumsSubsectionRef>
+ findChecksumsSubsection() const;
+
private:
const DbiModuleDescriptor &Mod;
@@ -57,7 +61,7 @@ private:
BinaryStreamRef C13LinesSubstream;
BinaryStreamRef GlobalRefsSubstream;
- codeview::DebugSubsectionArray LinesAndChecksums;
+ codeview::DebugSubsectionArray Subsections;
};
}
}
diff --git a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
index 6aeb0a5479cb..28a14d7356d2 100644
--- a/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
+++ b/include/llvm/DebugInfo/PDB/Native/PDBStringTable.h
@@ -45,6 +45,8 @@ public:
FixedStreamArray<support::ulittle32_t> name_ids() const;
+ codeview::DebugStringTableSubsectionRef getStringTable() const;
+
private:
Error readHeader(BinaryStreamReader &Reader);
Error readStrings(BinaryStreamReader &Reader);
diff --git a/include/llvm/DebugInfo/PDB/Native/TpiStream.h b/include/llvm/DebugInfo/PDB/Native/TpiStream.h
index 17fba9991c2e..0ee697696ca5 100644
--- a/include/llvm/DebugInfo/PDB/Native/TpiStream.h
+++ b/include/llvm/DebugInfo/PDB/Native/TpiStream.h
@@ -34,8 +34,7 @@ class TpiStream {
friend class TpiStreamBuilder;
public:
- TpiStream(const PDBFile &File,
- std::unique_ptr<msf::MappedBlockStream> Stream);
+ TpiStream(PDBFile &File, std::unique_ptr<msf::MappedBlockStream> Stream);
~TpiStream();
Error reload();
@@ -61,7 +60,7 @@ public:
Error commit();
private:
- const PDBFile &Pdb;
+ PDBFile &Pdb;
std::unique_ptr<msf::MappedBlockStream> Stream;
std::unique_ptr<codeview::LazyRandomTypeCollection> Types;
diff --git a/include/llvm/IR/DIBuilder.h b/include/llvm/IR/DIBuilder.h
index 4afb5d9d63b2..8e6bb4baccaf 100644
--- a/include/llvm/IR/DIBuilder.h
+++ b/include/llvm/IR/DIBuilder.h
@@ -86,6 +86,10 @@ namespace llvm {
/// Construct any deferred debug info descriptors.
void finalize();
+ /// Finalize a specific subprogram - no new variables may be added to this
+ /// subprogram afterwards.
+ void finalizeSubprogram(DISubprogram *SP);
+
/// A CompileUnit provides an anchor for all debugging
/// information generated during this instance of compilation.
/// \param Lang Source programming language, eg. dwarf::DW_LANG_C99
diff --git a/include/llvm/IR/DebugLoc.h b/include/llvm/IR/DebugLoc.h
index aa74f361cda2..eef1212abc4b 100644
--- a/include/llvm/IR/DebugLoc.h
+++ b/include/llvm/IR/DebugLoc.h
@@ -90,12 +90,6 @@ namespace llvm {
DenseMap<const MDNode *, MDNode *> &Cache,
bool ReplaceLast = false);
- /// Reparent all debug locations referenced by \c I that belong to \c OrigSP
- /// to become (possibly indirect) children of \c NewSP.
- static void reparentDebugInfo(Instruction &I, DISubprogram *OrigSP,
- DISubprogram *NewSP,
- DenseMap<const MDNode *, MDNode *> &Cache);
-
unsigned getLine() const;
unsigned getCol() const;
MDNode *getScope() const;
diff --git a/include/llvm/IR/ModuleSummaryIndex.h b/include/llvm/IR/ModuleSummaryIndex.h
index c46c609609e2..757ddf6cf46b 100644
--- a/include/llvm/IR/ModuleSummaryIndex.h
+++ b/include/llvm/IR/ModuleSummaryIndex.h
@@ -134,16 +134,18 @@ public:
/// be renamed or references something that can't be renamed).
unsigned NotEligibleToImport : 1;
- /// Indicate that the global value must be considered a live root for
- /// index-based liveness analysis. Used for special LLVM values such as
- /// llvm.global_ctors that the linker does not know about.
- unsigned LiveRoot : 1;
+ /// In per-module summary, indicate that the global value must be considered
+ /// a live root for index-based liveness analysis. Used for special LLVM
+ /// values such as llvm.global_ctors that the linker does not know about.
+ ///
+ /// In combined summary, indicate that the global value is live.
+ unsigned Live : 1;
/// Convenience Constructors
explicit GVFlags(GlobalValue::LinkageTypes Linkage,
- bool NotEligibleToImport, bool LiveRoot)
+ bool NotEligibleToImport, bool Live)
: Linkage(Linkage), NotEligibleToImport(NotEligibleToImport),
- LiveRoot(LiveRoot) {}
+ Live(Live) {}
};
private:
@@ -172,6 +174,8 @@ private:
/// are listed in the derived FunctionSummary object.
std::vector<ValueInfo> RefEdgeList;
+ bool isLive() const { return Flags.Live; }
+
protected:
GlobalValueSummary(SummaryKind K, GVFlags Flags, std::vector<ValueInfo> Refs)
: Kind(K), Flags(Flags), RefEdgeList(std::move(Refs)) {}
@@ -213,19 +217,17 @@ public:
/// Return true if this global value can't be imported.
bool notEligibleToImport() const { return Flags.NotEligibleToImport; }
- /// Return true if this global value must be considered a root for live
- /// value analysis on the index.
- bool liveRoot() const { return Flags.LiveRoot; }
-
- /// Flag that this global value must be considered a root for live
- /// value analysis on the index.
- void setLiveRoot() { Flags.LiveRoot = true; }
+ void setLive(bool Live) { Flags.Live = Live; }
/// Flag that this global value cannot be imported.
void setNotEligibleToImport() { Flags.NotEligibleToImport = true; }
/// Return the list of values referenced by this global value definition.
ArrayRef<ValueInfo> refs() const { return RefEdgeList; }
+
+ friend class ModuleSummaryIndex;
+ friend void computeDeadSymbols(class ModuleSummaryIndex &,
+ const DenseSet<GlobalValue::GUID> &);
};
/// \brief Alias summary information.
@@ -535,6 +537,11 @@ private:
/// GUIDs, it will be mapped to 0.
std::map<GlobalValue::GUID, GlobalValue::GUID> OidGuidMap;
+ /// Indicates that summary-based GlobalValue GC has run, and values with
+ /// GVFlags::Live==false are really dead. Otherwise, all values must be
+ /// considered live.
+ bool WithGlobalValueDeadStripping = false;
+
// YAML I/O support.
friend yaml::MappingTraits<ModuleSummaryIndex>;
@@ -550,6 +557,17 @@ public:
const_gvsummary_iterator end() const { return GlobalValueMap.end(); }
size_t size() const { return GlobalValueMap.size(); }
+ bool withGlobalValueDeadStripping() const {
+ return WithGlobalValueDeadStripping;
+ }
+ void setWithGlobalValueDeadStripping() {
+ WithGlobalValueDeadStripping = true;
+ }
+
+ bool isGlobalValueLive(const GlobalValueSummary *GVS) const {
+ return !WithGlobalValueDeadStripping || GVS->isLive();
+ }
+
/// Return a ValueInfo for GUID if it exists, otherwise return ValueInfo().
ValueInfo getValueInfo(GlobalValue::GUID GUID) const {
auto I = GlobalValueMap.find(GUID);
diff --git a/include/llvm/IR/ModuleSummaryIndexYAML.h b/include/llvm/IR/ModuleSummaryIndexYAML.h
index 78fdb602027d..891d84c2dbca 100644
--- a/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -128,6 +128,8 @@ template <> struct MappingTraits<TypeIdSummary> {
};
struct FunctionSummaryYaml {
+ unsigned Linkage;
+ bool NotEligibleToImport, Live;
std::vector<uint64_t> TypeTests;
std::vector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
TypeCheckedLoadVCalls;
@@ -168,6 +170,9 @@ namespace yaml {
template <> struct MappingTraits<FunctionSummaryYaml> {
static void mapping(IO &io, FunctionSummaryYaml& summary) {
+ io.mapOptional("Linkage", summary.Linkage);
+ io.mapOptional("NotEligibleToImport", summary.NotEligibleToImport);
+ io.mapOptional("Live", summary.Live);
io.mapOptional("TypeTests", summary.TypeTests);
io.mapOptional("TypeTestAssumeVCalls", summary.TypeTestAssumeVCalls);
io.mapOptional("TypeCheckedLoadVCalls", summary.TypeCheckedLoadVCalls);
@@ -199,12 +204,12 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
}
auto &Elem = V[KeyInt];
for (auto &FSum : FSums) {
- GlobalValueSummary::GVFlags GVFlags(GlobalValue::ExternalLinkage, false,
- false);
Elem.SummaryList.push_back(llvm::make_unique<FunctionSummary>(
- GVFlags, 0, ArrayRef<ValueInfo>{},
- ArrayRef<FunctionSummary::EdgeTy>{}, std::move(FSum.TypeTests),
- std::move(FSum.TypeTestAssumeVCalls),
+ GlobalValueSummary::GVFlags(
+ static_cast<GlobalValue::LinkageTypes>(FSum.Linkage),
+ FSum.NotEligibleToImport, FSum.Live),
+ 0, ArrayRef<ValueInfo>{}, ArrayRef<FunctionSummary::EdgeTy>{},
+ std::move(FSum.TypeTests), std::move(FSum.TypeTestAssumeVCalls),
std::move(FSum.TypeCheckedLoadVCalls),
std::move(FSum.TypeTestAssumeConstVCalls),
std::move(FSum.TypeCheckedLoadConstVCalls)));
@@ -216,8 +221,10 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
for (auto &Sum : P.second.SummaryList) {
if (auto *FSum = dyn_cast<FunctionSummary>(Sum.get()))
FSums.push_back(FunctionSummaryYaml{
- FSum->type_tests(), FSum->type_test_assume_vcalls(),
- FSum->type_checked_load_vcalls(),
+ FSum->flags().Linkage,
+ static_cast<bool>(FSum->flags().NotEligibleToImport),
+ static_cast<bool>(FSum->flags().Live), FSum->type_tests(),
+ FSum->type_test_assume_vcalls(), FSum->type_checked_load_vcalls(),
FSum->type_test_assume_const_vcalls(),
FSum->type_checked_load_const_vcalls()});
}
@@ -231,6 +238,8 @@ template <> struct MappingTraits<ModuleSummaryIndex> {
static void mapping(IO &io, ModuleSummaryIndex& index) {
io.mapOptional("GlobalValueMap", index.GlobalValueMap);
io.mapOptional("TypeIdMap", index.TypeIdMap);
+ io.mapOptional("WithGlobalValueDeadStripping",
+ index.WithGlobalValueDeadStripping);
}
};
diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h
index f01607614a0c..a5f0130f79f4 100644
--- a/include/llvm/IR/Statepoint.h
+++ b/include/llvm/IR/Statepoint.h
@@ -228,24 +228,24 @@ public:
return cast<ConstantInt>(NumVMSArgs)->getZExtValue();
}
- typename CallSiteTy::arg_iterator vm_state_begin() const {
+ typename CallSiteTy::arg_iterator deopt_begin() const {
auto I = gc_transition_args_end() + 1;
assert((getCallSite().arg_end() - I) >= 0);
return I;
}
- typename CallSiteTy::arg_iterator vm_state_end() const {
- auto I = vm_state_begin() + getNumTotalVMSArgs();
+ typename CallSiteTy::arg_iterator deopt_end() const {
+ auto I = deopt_begin() + getNumTotalVMSArgs();
assert((getCallSite().arg_end() - I) >= 0);
return I;
}
/// range adapter for vm state arguments
- iterator_range<arg_iterator> vm_state_args() const {
- return make_range(vm_state_begin(), vm_state_end());
+ iterator_range<arg_iterator> deopt_operands() const {
+ return make_range(deopt_begin(), deopt_end());
}
typename CallSiteTy::arg_iterator gc_args_begin() const {
- return vm_state_end();
+ return deopt_end();
}
typename CallSiteTy::arg_iterator gc_args_end() const {
return getCallSite().arg_end();
@@ -289,8 +289,8 @@ public:
(void)arg_end();
(void)gc_transition_args_begin();
(void)gc_transition_args_end();
- (void)vm_state_begin();
- (void)vm_state_end();
+ (void)deopt_begin();
+ (void)deopt_end();
(void)gc_args_begin();
(void)gc_args_end();
}
diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h
index 5b9796d4fba6..abb0aa3e3caf 100644
--- a/include/llvm/InitializePasses.h
+++ b/include/llvm/InitializePasses.h
@@ -86,7 +86,6 @@ void initializeCFGOnlyPrinterLegacyPassPass(PassRegistry&);
void initializeCFGOnlyViewerLegacyPassPass(PassRegistry&);
void initializeCFGPrinterLegacyPassPass(PassRegistry&);
void initializeCFGSimplifyPassPass(PassRegistry&);
-void initializeLateCFGSimplifyPassPass(PassRegistry&);
void initializeCFGViewerLegacyPassPass(PassRegistry&);
void initializeCFLAndersAAWrapperPassPass(PassRegistry&);
void initializeCFLSteensAAWrapperPassPass(PassRegistry&);
@@ -144,8 +143,8 @@ void initializeGCMachineCodeAnalysisPass(PassRegistry&);
void initializeGCModuleInfoPass(PassRegistry&);
void initializeGCOVProfilerLegacyPassPass(PassRegistry&);
void initializeGVNHoistLegacyPassPass(PassRegistry&);
-void initializeGVNSinkLegacyPassPass(PassRegistry&);
void initializeGVNLegacyPassPass(PassRegistry&);
+void initializeGVNSinkLegacyPassPass(PassRegistry&);
void initializeGlobalDCELegacyPassPass(PassRegistry&);
void initializeGlobalMergePass(PassRegistry&);
void initializeGlobalOptLegacyPassPass(PassRegistry&);
@@ -175,13 +174,14 @@ void initializeIntervalPartitionPass(PassRegistry&);
void initializeJumpThreadingPass(PassRegistry&);
void initializeLCSSAVerificationPassPass(PassRegistry&);
void initializeLCSSAWrapperPassPass(PassRegistry&);
+void initializeLateCFGSimplifyPassPass(PassRegistry&);
void initializeLazyBlockFrequencyInfoPassPass(PassRegistry&);
void initializeLazyBranchProbabilityInfoPassPass(PassRegistry&);
void initializeLazyMachineBlockFrequencyInfoPassPass(PassRegistry&);
+void initializeLazyValueInfoPrinterPass(PassRegistry&);
void initializeLazyValueInfoWrapperPassPass(PassRegistry&);
void initializeLegacyLICMPassPass(PassRegistry&);
void initializeLegacyLoopSinkPassPass(PassRegistry&);
-void initializeLazyValueInfoPrinterPass(PassRegistry&);
void initializeLegalizerPass(PassRegistry&);
void initializeLibCallsShrinkWrapLegacyPassPass(PassRegistry&);
void initializeLintPass(PassRegistry&);
@@ -195,8 +195,8 @@ void initializeLiveVariablesPass(PassRegistry&);
void initializeLoadCombinePass(PassRegistry&);
void initializeLoadStoreVectorizerPass(PassRegistry&);
void initializeLoaderPassPass(PassRegistry&);
-void initializeLocalizerPass(PassRegistry&);
void initializeLocalStackSlotPassPass(PassRegistry&);
+void initializeLocalizerPass(PassRegistry&);
void initializeLoopAccessLegacyAnalysisPass(PassRegistry&);
void initializeLoopDataPrefetchLegacyPassPass(PassRegistry&);
void initializeLoopDeletionLegacyPassPass(PassRegistry&);
@@ -304,6 +304,7 @@ void initializeProcessImplicitDefsPass(PassRegistry&);
void initializeProfileSummaryInfoWrapperPassPass(PassRegistry&);
void initializePromoteLegacyPassPass(PassRegistry&);
void initializePruneEHPass(PassRegistry&);
+void initializeRABasicPass(PassRegistry&);
void initializeRAGreedyPass(PassRegistry&);
void initializeReassociateLegacyPassPass(PassRegistry&);
void initializeRegBankSelectPass(PassRegistry&);
@@ -327,8 +328,9 @@ void initializeSafeStackLegacyPassPass(PassRegistry&);
void initializeSampleProfileLoaderLegacyPassPass(PassRegistry&);
void initializeSanitizerCoverageModulePass(PassRegistry&);
void initializeScalarEvolutionWrapperPassPass(PassRegistry&);
-void initializeScalarizerPass(PassRegistry&);
void initializeScalarizeMaskedMemIntrinPass(PassRegistry&);
+void initializeScalarizerPass(PassRegistry&);
+void initializeScavengerTestPass(PassRegistry&);
void initializeScopedNoAliasAAWrapperPassPass(PassRegistry&);
void initializeSeparateConstOffsetFromGEPPass(PassRegistry&);
void initializeShadowStackGCLoweringPass(PassRegistry&);
diff --git a/include/llvm/LTO/Config.h b/include/llvm/LTO/Config.h
index 5ba8492db8f5..73106f77ca55 100644
--- a/include/llvm/LTO/Config.h
+++ b/include/llvm/LTO/Config.h
@@ -46,6 +46,9 @@ struct Config {
unsigned OptLevel = 2;
bool DisableVerify = false;
+ /// Use the new pass manager
+ bool UseNewPM = false;
+
/// Disable entirely the optimizer, including importing for ThinLTO
bool CodeGenOnly = false;
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
index 6ddae2e2b41c..a6d4d404415f 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLDebugSections.h
@@ -17,12 +17,20 @@
#include "llvm/DebugInfo/CodeView/CodeView.h"
#include "llvm/DebugInfo/CodeView/DebugSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionRecord.h"
#include "llvm/ObjectYAML/YAML.h"
namespace llvm {
+
+namespace codeview {
+class DebugStringTableSubsection;
+class DebugStringTableSubsectionRef;
+class DebugChecksumsSubsectionRef;
+}
namespace CodeViewYAML {
+
namespace detail {
-struct C13FragmentBase;
+struct YAMLSubsectionBase;
}
struct SourceLineEntry {
@@ -74,18 +82,24 @@ struct InlineeInfo {
std::vector<InlineeSite> Sites;
};
-struct SourceFileInfo {
- std::vector<SourceFileChecksumEntry> FileChecksums;
- std::vector<SourceLineInfo> LineFragments;
- std::vector<InlineeInfo> Inlinees;
-};
+struct YAMLDebugSubsection {
+ static Expected<YAMLDebugSubsection>
+ fromCodeViewSubection(const codeview::DebugStringTableSubsectionRef &Strings,
+ const codeview::DebugChecksumsSubsectionRef &Checksums,
+ const codeview::DebugSubsectionRecord &SS);
-struct C13DebugSection {
- std::vector<detail::C13FragmentBase> Fragments;
+ std::shared_ptr<detail::YAMLSubsectionBase> Subsection;
};
+
+Expected<std::vector<std::unique_ptr<codeview::DebugSubsection>>>
+convertSubsectionList(ArrayRef<YAMLDebugSubsection> Subsections,
+ codeview::DebugStringTableSubsection &Strings);
+
} // namespace CodeViewYAML
} // namespace llvm
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceFileInfo)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::YAMLDebugSubsection)
+
+LLVM_YAML_IS_SEQUENCE_VECTOR(CodeViewYAML::YAMLDebugSubsection)
#endif
diff --git a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
index ee4e2ac9d404..9b411e8b074f 100644
--- a/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
+++ b/include/llvm/ObjectYAML/CodeViewYAMLSymbols.h
@@ -28,7 +28,9 @@ struct SymbolRecordBase;
struct SymbolRecord {
std::shared_ptr<detail::SymbolRecordBase> Symbol;
- codeview::CVSymbol toCodeViewSymbol(BumpPtrAllocator &Allocator) const;
+ codeview::CVSymbol
+ toCodeViewSymbol(BumpPtrAllocator &Allocator,
+ codeview::CodeViewContainer Container) const;
static Expected<SymbolRecord> fromCodeViewSymbol(codeview::CVSymbol Symbol);
};
diff --git a/include/llvm/TableGen/Record.h b/include/llvm/TableGen/Record.h
index 3c181f0e511b..5c3bf88fbbfa 100644
--- a/include/llvm/TableGen/Record.h
+++ b/include/llvm/TableGen/Record.h
@@ -1361,10 +1361,6 @@ public:
return false;
}
- bool isTemplateArg(StringRef Name) const {
- return isTemplateArg(StringInit::get(Name));
- }
-
const RecordVal *getValue(const Init *Name) const {
for (const RecordVal &Val : Values)
if (Val.Name == Name) return &Val;
@@ -1388,10 +1384,6 @@ public:
TemplateArgs.push_back(Name);
}
- void addTemplateArg(StringRef Name) {
- addTemplateArg(StringInit::get(Name));
- }
-
void addValue(const RecordVal &RV) {
assert(getValue(RV.getNameInit()) == nullptr && "Value already added!");
Values.push_back(RV);
diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h
index d66b6edc7a4f..de35cdf052e1 100644
--- a/include/llvm/Transforms/IPO/FunctionImport.h
+++ b/include/llvm/Transforms/IPO/FunctionImport.h
@@ -81,15 +81,11 @@ public:
/// \p ExportLists contains for each Module the set of globals (GUID) that will
/// be imported by another module, or referenced by such a function. I.e. this
/// is the set of globals that need to be promoted/renamed appropriately.
-///
-/// \p DeadSymbols (optional) contains a list of GUID that are deemed "dead" and
-/// will be ignored for the purpose of importing.
void ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists,
- const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr);
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists);
/// Compute all the imports for the given module using the Index.
///
@@ -102,9 +98,9 @@ void ComputeCrossModuleImportForModule(
/// Compute all the symbols that are "dead": i.e these that can't be reached
/// in the graph from any of the given symbols listed in
/// \p GUIDPreservedSymbols.
-DenseSet<GlobalValue::GUID>
-computeDeadSymbols(const ModuleSummaryIndex &Index,
- const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
+void computeDeadSymbols(
+ ModuleSummaryIndex &Index,
+ const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols);
/// Compute the set of summaries needed for a ThinLTO backend compilation of
/// \p ModulePath.
diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h
index 023d7af7f729..b6c6c091631d 100644
--- a/include/llvm/Transforms/Instrumentation.h
+++ b/include/llvm/Transforms/Instrumentation.h
@@ -177,6 +177,7 @@ struct SanitizerCoverageOptions {
bool Use8bitCounters = false;
bool TracePC = false;
bool TracePCGuard = false;
+ bool Inline8bitCounters = false;
bool NoPrune = false;
SanitizerCoverageOptions() = default;
diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h
index 91c9d255302f..2a8b89d86282 100644
--- a/include/llvm/Transforms/Utils/Cloning.h
+++ b/include/llvm/Transforms/Utils/Cloning.h
@@ -36,6 +36,7 @@ class BasicBlock;
class BlockFrequencyInfo;
class CallInst;
class CallGraph;
+class DebugInfoFinder;
class DominatorTree;
class Function;
class Instruction;
@@ -110,7 +111,8 @@ struct ClonedCodeInfo {
///
BasicBlock *CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix = "", Function *F = nullptr,
- ClonedCodeInfo *CodeInfo = nullptr);
+ ClonedCodeInfo *CodeInfo = nullptr,
+ DebugInfoFinder *DIFinder = nullptr);
/// CloneFunction - Return a copy of the specified function and add it to that
/// function's module. Also, any references specified in the VMap are changed
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 6a1af87450c9..a906770dbb34 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -1170,7 +1170,9 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
const DataLayout &DL,
const TargetLibraryInfo *TLI) {
// fold: icmp (inttoptr x), null -> icmp x, 0
+ // fold: icmp null, (inttoptr x) -> icmp 0, x
// fold: icmp (ptrtoint x), 0 -> icmp x, null
+ // fold: icmp 0, (ptrtoint x) -> icmp null, x
// fold: icmp (inttoptr x), (inttoptr y) -> icmp trunc/zext x, trunc/zext y
// fold: icmp (ptrtoint x), (ptrtoint y) -> icmp x, y
//
@@ -1240,6 +1242,11 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
Predicate == ICmpInst::ICMP_EQ ? Instruction::And : Instruction::Or;
return ConstantFoldBinaryOpOperands(OpC, LHS, RHS, DL);
}
+ } else if (isa<ConstantExpr>(Ops1)) {
+ // If RHS is a constant expression, but the left side isn't, swap the
+ // operands and try again.
+ Predicate = ICmpInst::getSwappedPredicate((ICmpInst::Predicate)Predicate);
+ return ConstantFoldCompareInstOperands(Predicate, Ops1, Ops0, DL, TLI);
}
return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
diff --git a/lib/Analysis/IndirectCallPromotionAnalysis.cpp b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
index 3da33ac71421..ed233d201537 100644
--- a/lib/Analysis/IndirectCallPromotionAnalysis.cpp
+++ b/lib/Analysis/IndirectCallPromotionAnalysis.cpp
@@ -43,7 +43,7 @@ static cl::opt<unsigned>
// The percent threshold for the direct-call target (this call site vs the
// total call count) for it to be considered as the promotion target.
static cl::opt<unsigned>
- ICPPercentThreshold("icp-percent-threshold", cl::init(33), cl::Hidden,
+ ICPPercentThreshold("icp-percent-threshold", cl::init(30), cl::Hidden,
cl::ZeroOrMore,
cl::desc("The percentage threshold for the promotion"));
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 4702569126c6..77c87928728a 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -54,11 +54,6 @@ static cl::opt<int>
cl::init(45),
cl::desc("Threshold for inlining cold callsites"));
-static cl::opt<bool>
- EnableGenericSwitchCost("inline-generic-switch-cost", cl::Hidden,
- cl::init(false),
- cl::desc("Enable generic switch cost model"));
-
// We introduce this threshold to help performance of instrumentation based
// PGO before we actually hook up inliner with analysis passes such as BPI and
// BFI.
@@ -1015,83 +1010,68 @@ bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) {
if (isa<ConstantInt>(V))
return true;
- if (EnableGenericSwitchCost) {
- // Assume the most general case where the swith is lowered into
- // either a jump table, bit test, or a balanced binary tree consisting of
- // case clusters without merging adjacent clusters with the same
- // destination. We do not consider the switches that are lowered with a mix
- // of jump table/bit test/binary search tree. The cost of the switch is
- // proportional to the size of the tree or the size of jump table range.
-
- // Exit early for a large switch, assuming one case needs at least one
- // instruction.
- // FIXME: This is not true for a bit test, but ignore such case for now to
- // save compile-time.
- int64_t CostLowerBound =
- std::min((int64_t)INT_MAX,
- (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
-
- if (CostLowerBound > Threshold) {
- Cost = CostLowerBound;
- return false;
- }
+ // Assume the most general case where the swith is lowered into
+ // either a jump table, bit test, or a balanced binary tree consisting of
+ // case clusters without merging adjacent clusters with the same
+ // destination. We do not consider the switches that are lowered with a mix
+ // of jump table/bit test/binary search tree. The cost of the switch is
+ // proportional to the size of the tree or the size of jump table range.
+ //
+ // NB: We convert large switches which are just used to initialize large phi
+ // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
+ // inlining those. It will prevent inlining in cases where the optimization
+ // does not (yet) fire.
- unsigned JumpTableSize = 0;
- unsigned NumCaseCluster =
- TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
+ // Exit early for a large switch, assuming one case needs at least one
+ // instruction.
+ // FIXME: This is not true for a bit test, but ignore such case for now to
+ // save compile-time.
+ int64_t CostLowerBound =
+ std::min((int64_t)INT_MAX,
+ (int64_t)SI.getNumCases() * InlineConstants::InstrCost + Cost);
- // If suitable for a jump table, consider the cost for the table size and
- // branch to destination.
- if (JumpTableSize) {
- int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
- 4 * InlineConstants::InstrCost;
- Cost = std::min((int64_t)INT_MAX, JTCost + Cost);
- return false;
- }
+ if (CostLowerBound > Threshold) {
+ Cost = CostLowerBound;
+ return false;
+ }
- // Considering forming a binary search, we should find the number of nodes
- // which is same as the number of comparisons when lowered. For a given
- // number of clusters, n, we can define a recursive function, f(n), to find
- // the number of nodes in the tree. The recursion is :
- // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
- // and f(n) = n, when n <= 3.
- // This will lead a binary tree where the leaf should be either f(2) or f(3)
- // when n > 3. So, the number of comparisons from leaves should be n, while
- // the number of non-leaf should be :
- // 2^(log2(n) - 1) - 1
- // = 2^log2(n) * 2^-1 - 1
- // = n / 2 - 1.
- // Considering comparisons from leaf and non-leaf nodes, we can estimate the
- // number of comparisons in a simple closed form :
- // n + n / 2 - 1 = n * 3 / 2 - 1
- if (NumCaseCluster <= 3) {
- // Suppose a comparison includes one compare and one conditional branch.
- Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
- return false;
- }
- int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1;
- uint64_t SwitchCost =
- ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
- Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost);
+ unsigned JumpTableSize = 0;
+ unsigned NumCaseCluster =
+ TTI.getEstimatedNumberOfCaseClusters(SI, JumpTableSize);
+
+ // If suitable for a jump table, consider the cost for the table size and
+ // branch to destination.
+ if (JumpTableSize) {
+ int64_t JTCost = (int64_t)JumpTableSize * InlineConstants::InstrCost +
+ 4 * InlineConstants::InstrCost;
+ Cost = std::min((int64_t)INT_MAX, JTCost + Cost);
return false;
}
- // Use a simple switch cost model where we accumulate a cost proportional to
- // the number of distinct successor blocks. This fan-out in the CFG cannot
- // be represented for free even if we can represent the core switch as a
- // jumptable that takes a single instruction.
- ///
- // NB: We convert large switches which are just used to initialize large phi
- // nodes to lookup tables instead in simplify-cfg, so this shouldn't prevent
- // inlining those. It will prevent inlining in cases where the optimization
- // does not (yet) fire.
- SmallPtrSet<BasicBlock *, 8> SuccessorBlocks;
- SuccessorBlocks.insert(SI.getDefaultDest());
- for (auto Case : SI.cases())
- SuccessorBlocks.insert(Case.getCaseSuccessor());
- // Add cost corresponding to the number of distinct destinations. The first
- // we model as free because of fallthrough.
- Cost += (SuccessorBlocks.size() - 1) * InlineConstants::InstrCost;
+ // Considering forming a binary search, we should find the number of nodes
+ // which is same as the number of comparisons when lowered. For a given
+ // number of clusters, n, we can define a recursive function, f(n), to find
+ // the number of nodes in the tree. The recursion is :
+ // f(n) = 1 + f(n/2) + f (n - n/2), when n > 3,
+ // and f(n) = n, when n <= 3.
+ // This will lead a binary tree where the leaf should be either f(2) or f(3)
+ // when n > 3. So, the number of comparisons from leaves should be n, while
+ // the number of non-leaf should be :
+ // 2^(log2(n) - 1) - 1
+ // = 2^log2(n) * 2^-1 - 1
+ // = n / 2 - 1.
+ // Considering comparisons from leaf and non-leaf nodes, we can estimate the
+ // number of comparisons in a simple closed form :
+ // n + n / 2 - 1 = n * 3 / 2 - 1
+ if (NumCaseCluster <= 3) {
+ // Suppose a comparison includes one compare and one conditional branch.
+ Cost += NumCaseCluster * 2 * InlineConstants::InstrCost;
+ return false;
+ }
+ int64_t ExpectedNumberOfCompare = 3 * (uint64_t)NumCaseCluster / 2 - 1;
+ uint64_t SwitchCost =
+ ExpectedNumberOfCompare * 2 * InlineConstants::InstrCost;
+ Cost = std::min((uint64_t)INT_MAX, SwitchCost + Cost);
return false;
}
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
index a2b9015a8a1d..6a9ae6440ace 100644
--- a/lib/Analysis/LazyValueInfo.cpp
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -662,13 +662,13 @@ namespace {
bool solveBlockValuePHINode(LVILatticeVal &BBLV, PHINode *PN, BasicBlock *BB);
bool solveBlockValueSelect(LVILatticeVal &BBLV, SelectInst *S,
BasicBlock *BB);
- bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, Instruction *BBI,
+ bool solveBlockValueBinaryOp(LVILatticeVal &BBLV, BinaryOperator *BBI,
BasicBlock *BB);
- bool solveBlockValueCast(LVILatticeVal &BBLV, Instruction *BBI,
+ bool solveBlockValueCast(LVILatticeVal &BBLV, CastInst *CI,
BasicBlock *BB);
void intersectAssumeOrGuardBlockValueConstantRange(Value *Val,
LVILatticeVal &BBLV,
- Instruction *BBI);
+ Instruction *BBI);
void solve();
@@ -849,12 +849,12 @@ bool LazyValueInfoImpl::solveBlockValueImpl(LVILatticeVal &Res,
return true;
}
if (BBI->getType()->isIntegerTy()) {
- if (isa<CastInst>(BBI))
- return solveBlockValueCast(Res, BBI, BB);
-
+ if (auto *CI = dyn_cast<CastInst>(BBI))
+ return solveBlockValueCast(Res, CI, BB);
+
BinaryOperator *BO = dyn_cast<BinaryOperator>(BBI);
if (BO && isa<ConstantInt>(BO->getOperand(1)))
- return solveBlockValueBinaryOp(Res, BBI, BB);
+ return solveBlockValueBinaryOp(Res, BO, BB);
}
DEBUG(dbgs() << " compute BB '" << BB->getName()
@@ -1168,9 +1168,9 @@ bool LazyValueInfoImpl::solveBlockValueSelect(LVILatticeVal &BBLV,
}
bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
- Instruction *BBI,
- BasicBlock *BB) {
- if (!BBI->getOperand(0)->getType()->isSized()) {
+ CastInst *CI,
+ BasicBlock *BB) {
+ if (!CI->getOperand(0)->getType()->isSized()) {
// Without knowing how wide the input is, we can't analyze it in any useful
// way.
BBLV = LVILatticeVal::getOverdefined();
@@ -1180,7 +1180,7 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
// Filter out casts we don't know how to reason about before attempting to
// recurse on our operand. This can cut a long search short if we know we're
// not going to be able to get any useful information anways.
- switch (BBI->getOpcode()) {
+ switch (CI->getOpcode()) {
case Instruction::Trunc:
case Instruction::SExt:
case Instruction::ZExt:
@@ -1197,44 +1197,43 @@ bool LazyValueInfoImpl::solveBlockValueCast(LVILatticeVal &BBLV,
// Figure out the range of the LHS. If that fails, we still apply the
// transfer rule on the full set since we may be able to locally infer
// interesting facts.
- if (!hasBlockValue(BBI->getOperand(0), BB))
- if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0))))
+ if (!hasBlockValue(CI->getOperand(0), BB))
+ if (pushBlockValue(std::make_pair(BB, CI->getOperand(0))))
// More work to do before applying this transfer rule.
return false;
const unsigned OperandBitWidth =
- DL.getTypeSizeInBits(BBI->getOperand(0)->getType());
+ DL.getTypeSizeInBits(CI->getOperand(0)->getType());
ConstantRange LHSRange = ConstantRange(OperandBitWidth);
- if (hasBlockValue(BBI->getOperand(0), BB)) {
- LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
- intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
- BBI);
+ if (hasBlockValue(CI->getOperand(0), BB)) {
+ LVILatticeVal LHSVal = getBlockValue(CI->getOperand(0), BB);
+ intersectAssumeOrGuardBlockValueConstantRange(CI->getOperand(0), LHSVal,
+ CI);
if (LHSVal.isConstantRange())
LHSRange = LHSVal.getConstantRange();
}
- const unsigned ResultBitWidth =
- cast<IntegerType>(BBI->getType())->getBitWidth();
+ const unsigned ResultBitWidth = CI->getType()->getIntegerBitWidth();
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
// more definitions.
- auto CastOp = (Instruction::CastOps) BBI->getOpcode();
- BBLV = LVILatticeVal::getRange(LHSRange.castOp(CastOp, ResultBitWidth));
+ BBLV = LVILatticeVal::getRange(LHSRange.castOp(CI->getOpcode(),
+ ResultBitWidth));
return true;
}
bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
- Instruction *BBI,
+ BinaryOperator *BO,
BasicBlock *BB) {
- assert(BBI->getOperand(0)->getType()->isSized() &&
+ assert(BO->getOperand(0)->getType()->isSized() &&
"all operands to binary operators are sized");
// Filter out operators we don't know how to reason about before attempting to
// recurse on our operand(s). This can cut a long search short if we know
- // we're not going to be able to get any useful information anways.
- switch (BBI->getOpcode()) {
+ // we're not going to be able to get any useful information anyways.
+ switch (BO->getOpcode()) {
case Instruction::Add:
case Instruction::Sub:
case Instruction::Mul:
@@ -1256,29 +1255,29 @@ bool LazyValueInfoImpl::solveBlockValueBinaryOp(LVILatticeVal &BBLV,
// Figure out the range of the LHS. If that fails, use a conservative range,
// but apply the transfer rule anyways. This lets us pick up facts from
// expressions like "and i32 (call i32 @foo()), 32"
- if (!hasBlockValue(BBI->getOperand(0), BB))
- if (pushBlockValue(std::make_pair(BB, BBI->getOperand(0))))
+ if (!hasBlockValue(BO->getOperand(0), BB))
+ if (pushBlockValue(std::make_pair(BB, BO->getOperand(0))))
// More work to do before applying this transfer rule.
return false;
const unsigned OperandBitWidth =
- DL.getTypeSizeInBits(BBI->getOperand(0)->getType());
+ DL.getTypeSizeInBits(BO->getOperand(0)->getType());
ConstantRange LHSRange = ConstantRange(OperandBitWidth);
- if (hasBlockValue(BBI->getOperand(0), BB)) {
- LVILatticeVal LHSVal = getBlockValue(BBI->getOperand(0), BB);
- intersectAssumeOrGuardBlockValueConstantRange(BBI->getOperand(0), LHSVal,
- BBI);
+ if (hasBlockValue(BO->getOperand(0), BB)) {
+ LVILatticeVal LHSVal = getBlockValue(BO->getOperand(0), BB);
+ intersectAssumeOrGuardBlockValueConstantRange(BO->getOperand(0), LHSVal,
+ BO);
if (LHSVal.isConstantRange())
LHSRange = LHSVal.getConstantRange();
}
- ConstantInt *RHS = cast<ConstantInt>(BBI->getOperand(1));
+ ConstantInt *RHS = cast<ConstantInt>(BO->getOperand(1));
ConstantRange RHSRange = ConstantRange(RHS->getValue());
// NOTE: We're currently limited by the set of operations that ConstantRange
// can evaluate symbolically. Enhancing that set will allows us to analyze
// more definitions.
- auto BinOp = (Instruction::BinaryOps) BBI->getOpcode();
+ Instruction::BinaryOps BinOp = BO->getOpcode();
BBLV = LVILatticeVal::getRange(LHSRange.binaryOp(BinOp, RHSRange));
return true;
}
diff --git a/lib/Analysis/ModuleSummaryAnalysis.cpp b/lib/Analysis/ModuleSummaryAnalysis.cpp
index 26706f5509ba..3253f27c010d 100644
--- a/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -275,7 +275,7 @@ computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
// FIXME: refactor this to use the same code that inliner is using.
F.isVarArg();
GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
- /* LiveRoot = */ false);
+ /* Live = */ false);
auto FuncSummary = llvm::make_unique<FunctionSummary>(
Flags, NumInsts, RefEdges.takeVector(), CallGraphEdges.takeVector(),
TypeTests.takeVector(), TypeTestAssumeVCalls.takeVector(),
@@ -295,7 +295,7 @@ computeVariableSummary(ModuleSummaryIndex &Index, const GlobalVariable &V,
findRefEdges(Index, &V, RefEdges, Visited);
bool NonRenamableLocal = isNonRenamableLocal(V);
GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
- /* LiveRoot = */ false);
+ /* Live = */ false);
auto GVarSummary =
llvm::make_unique<GlobalVarSummary>(Flags, RefEdges.takeVector());
if (NonRenamableLocal)
@@ -308,7 +308,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
DenseSet<GlobalValue::GUID> &CantBePromoted) {
bool NonRenamableLocal = isNonRenamableLocal(A);
GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
- /* LiveRoot = */ false);
+ /* Live = */ false);
auto AS = llvm::make_unique<AliasSummary>(Flags, ArrayRef<ValueInfo>{});
auto *Aliasee = A.getBaseObject();
auto *AliaseeSummary = Index.getGlobalValueSummary(*Aliasee);
@@ -323,7 +323,7 @@ computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name)))
for (auto &Summary : VI.getSummaryList())
- Summary->setLiveRoot();
+ Summary->setLive(true);
}
ModuleSummaryIndex llvm::buildModuleSummaryIndex(
@@ -423,8 +423,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
return;
assert(GV->isDeclaration() && "Def in module asm already has definition");
GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
- /* NotEligibleToImport */ true,
- /* LiveRoot */ true);
+ /* NotEligibleToImport = */ true,
+ /* Live = */ true);
CantBePromoted.insert(GlobalValue::getGUID(Name));
// Create the appropriate summary type.
if (isa<Function>(GV)) {
diff --git a/lib/Analysis/OrderedBasicBlock.cpp b/lib/Analysis/OrderedBasicBlock.cpp
index 0f0016f22cc0..a04c0aef04be 100644
--- a/lib/Analysis/OrderedBasicBlock.cpp
+++ b/lib/Analysis/OrderedBasicBlock.cpp
@@ -55,7 +55,7 @@ bool OrderedBasicBlock::comesBefore(const Instruction *A,
assert(II != IE && "Instruction not found?");
assert((Inst == A || Inst == B) && "Should find A or B");
LastInstFound = II;
- return Inst == A;
+ return Inst != B;
}
/// \brief Find out whether \p A dominates \p B, meaning whether \p A
diff --git a/lib/Analysis/RegionPass.cpp b/lib/Analysis/RegionPass.cpp
index 82107cb18025..b38e6225c840 100644
--- a/lib/Analysis/RegionPass.cpp
+++ b/lib/Analysis/RegionPass.cpp
@@ -15,6 +15,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/RegionPass.h"
#include "llvm/Analysis/RegionIterator.h"
+#include "llvm/IR/OptBisect.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/Timer.h"
#include "llvm/Support/raw_ostream.h"
@@ -280,3 +281,18 @@ Pass *RegionPass::createPrinterPass(raw_ostream &O,
const std::string &Banner) const {
return new PrintRegionPass(Banner, O);
}
+
+bool RegionPass::skipRegion(Region &R) const {
+ Function &F = *R.getEntry()->getParent();
+ if (!F.getContext().getOptBisect().shouldRunPass(this, R))
+ return true;
+
+ if (F.hasFnAttribute(Attribute::OptimizeNone)) {
+ // Report this only once per function.
+ if (R.getEntry() == &F.getEntryBlock())
+ DEBUG(dbgs() << "Skipping pass '" << getPassName()
+ << "' on function " << F.getName() << "\n");
+ return true;
+ }
+ return false;
+}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index 686c94687669..fffa9045b2fd 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -865,11 +865,11 @@ static GlobalValueSummary::GVFlags getDecodedGVSummaryFlags(uint64_t RawFlags,
auto Linkage = GlobalValue::LinkageTypes(RawFlags & 0xF); // 4 bits
RawFlags = RawFlags >> 4;
bool NotEligibleToImport = (RawFlags & 0x1) || Version < 3;
- // The LiveRoot flag wasn't introduced until version 3. For dead stripping
+ // The Live flag wasn't introduced until version 3. For dead stripping
// to work correctly on earlier versions, we must conservatively treat all
// values as live.
- bool LiveRoot = (RawFlags & 0x2) || Version < 3;
- return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, LiveRoot);
+ bool Live = (RawFlags & 0x2) || Version < 3;
+ return GlobalValueSummary::GVFlags(Linkage, NotEligibleToImport, Live);
}
static GlobalValue::VisibilityTypes getDecodedVisibility(unsigned Val) {
diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp
index a402b4ddd462..9043b8c12d25 100644
--- a/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -351,7 +351,8 @@ public:
/// Calls the callback for each value GUID and summary to be written to
/// bitcode. This hides the details of whether they are being pulled from the
/// entire index or just those in a provided ModuleToSummariesForIndex map.
- void forEachSummary(std::function<void(GVInfo)> Callback) {
+ template<typename Functor>
+ void forEachSummary(Functor Callback) {
if (ModuleToSummariesForIndex) {
for (auto &M : *ModuleToSummariesForIndex)
for (auto &Summary : M.second)
@@ -363,6 +364,29 @@ public:
}
}
+ /// Calls the callback for each entry in the modulePaths StringMap that
+ /// should be written to the module path string table. This hides the details
+ /// of whether they are being pulled from the entire index or just those in a
+ /// provided ModuleToSummariesForIndex map.
+ template <typename Functor> void forEachModule(Functor Callback) {
+ if (ModuleToSummariesForIndex) {
+ for (const auto &M : *ModuleToSummariesForIndex) {
+ const auto &MPI = Index.modulePaths().find(M.first);
+ if (MPI == Index.modulePaths().end()) {
+ // This should only happen if the bitcode file was empty, in which
+ // case we shouldn't be importing (the ModuleToSummariesForIndex
+ // would only include the module we are writing and index for).
+ assert(ModuleToSummariesForIndex->size() == 1);
+ continue;
+ }
+ Callback(*MPI);
+ }
+ } else {
+ for (const auto &MPSE : Index.modulePaths())
+ Callback(MPSE);
+ }
+ }
+
/// Main entry point for writing a combined index to bitcode.
void write();
@@ -370,14 +394,6 @@ private:
void writeModStrings();
void writeCombinedGlobalValueSummary();
- /// Indicates whether the provided \p ModulePath should be written into
- /// the module string table, e.g. if full index written or if it is in
- /// the provided subset.
- bool doIncludeModule(StringRef ModulePath) {
- return !ModuleToSummariesForIndex ||
- ModuleToSummariesForIndex->count(ModulePath);
- }
-
Optional<unsigned> getValueId(GlobalValue::GUID ValGUID) {
auto VMI = GUIDToValueIdMap.find(ValGUID);
if (VMI == GUIDToValueIdMap.end())
@@ -864,7 +880,7 @@ static uint64_t getEncodedGVSummaryFlags(GlobalValueSummary::GVFlags Flags) {
uint64_t RawFlags = 0;
RawFlags |= Flags.NotEligibleToImport; // bool
- RawFlags |= (Flags.LiveRoot << 1);
+ RawFlags |= (Flags.Live << 1);
// Linkage don't need to be remapped at that time for the summary. Any future
// change to the getEncodedLinkage() function will need to be taken into
// account here as well.
@@ -968,19 +984,18 @@ void ModuleBitcodeWriter::writeValueSymbolTableForwardDecl() {
enum StringEncoding { SE_Char6, SE_Fixed7, SE_Fixed8 };
/// Determine the encoding to use for the given string name and length.
-static StringEncoding getStringEncoding(const char *Str, unsigned StrLen) {
+static StringEncoding getStringEncoding(StringRef Str) {
bool isChar6 = true;
- for (const char *C = Str, *E = C + StrLen; C != E; ++C) {
+ for (char C : Str) {
if (isChar6)
- isChar6 = BitCodeAbbrevOp::isChar6(*C);
- if ((unsigned char)*C & 128)
+ isChar6 = BitCodeAbbrevOp::isChar6(C);
+ if ((unsigned char)C & 128)
// don't bother scanning the rest.
return SE_Fixed8;
}
if (isChar6)
return SE_Char6;
- else
- return SE_Fixed7;
+ return SE_Fixed7;
}
/// Emit top-level description of module, including target triple, inline asm,
@@ -1073,8 +1088,7 @@ void ModuleBitcodeWriter::writeModuleInfo() {
SmallVector<unsigned, 64> Vals;
// Emit the module's source file name.
{
- StringEncoding Bits = getStringEncoding(M.getSourceFileName().data(),
- M.getSourceFileName().size());
+ StringEncoding Bits = getStringEncoding(M.getSourceFileName());
BitCodeAbbrevOp AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8);
if (Bits == SE_Char6)
AbbrevOpToUse = BitCodeAbbrevOp(BitCodeAbbrevOp::Char6);
@@ -2790,8 +2804,7 @@ void ModuleBitcodeWriter::writeFunctionLevelValueSymbolTable(
for (const ValueName &Name : VST) {
// Figure out the encoding to use for the name.
- StringEncoding Bits =
- getStringEncoding(Name.getKeyData(), Name.getKeyLength());
+ StringEncoding Bits = getStringEncoding(Name.getKey());
unsigned AbbrevToUse = VST_ENTRY_8_ABBREV;
NameVals.push_back(VE.getValueID(Name.getValue()));
@@ -3149,41 +3162,33 @@ void IndexBitcodeWriter::writeModStrings() {
unsigned AbbrevHash = Stream.EmitAbbrev(std::move(Abbv));
SmallVector<unsigned, 64> Vals;
- for (const auto &MPSE : Index.modulePaths()) {
- if (!doIncludeModule(MPSE.getKey()))
- continue;
- StringEncoding Bits =
- getStringEncoding(MPSE.getKey().data(), MPSE.getKey().size());
- unsigned AbbrevToUse = Abbrev8Bit;
- if (Bits == SE_Char6)
- AbbrevToUse = Abbrev6Bit;
- else if (Bits == SE_Fixed7)
- AbbrevToUse = Abbrev7Bit;
-
- Vals.push_back(MPSE.getValue().first);
-
- for (const auto P : MPSE.getKey())
- Vals.push_back((unsigned char)P);
-
- // Emit the finished record.
- Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
-
- Vals.clear();
- // Emit an optional hash for the module now
- auto &Hash = MPSE.getValue().second;
- bool AllZero = true; // Detect if the hash is empty, and do not generate it
- for (auto Val : Hash) {
- if (Val)
- AllZero = false;
- Vals.push_back(Val);
- }
- if (!AllZero) {
- // Emit the hash record.
- Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
- }
+ forEachModule(
+ [&](const StringMapEntry<std::pair<uint64_t, ModuleHash>> &MPSE) {
+ StringRef Key = MPSE.getKey();
+ const auto &Value = MPSE.getValue();
+ StringEncoding Bits = getStringEncoding(Key);
+ unsigned AbbrevToUse = Abbrev8Bit;
+ if (Bits == SE_Char6)
+ AbbrevToUse = Abbrev6Bit;
+ else if (Bits == SE_Fixed7)
+ AbbrevToUse = Abbrev7Bit;
+
+ Vals.push_back(Value.first);
+ Vals.append(Key.begin(), Key.end());
+
+ // Emit the finished record.
+ Stream.EmitRecord(bitc::MST_CODE_ENTRY, Vals, AbbrevToUse);
+
+ // Emit an optional hash for the module now
+ const auto &Hash = Value.second;
+ if (llvm::any_of(Hash, [](uint32_t H) { return H; })) {
+ Vals.assign(Hash.begin(), Hash.end());
+ // Emit the hash record.
+ Stream.EmitRecord(bitc::MST_CODE_HASH, Vals, AbbrevHash);
+ }
- Vals.clear();
- }
+ Vals.clear();
+ });
Stream.ExitBlock();
}
diff --git a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
index 20e1467b30c3..c2ad9db81cfd 100644
--- a/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgValueHistoryCalculator.cpp
@@ -194,6 +194,10 @@ void llvm::calculateDbgValueHistory(const MachineFunction *MF,
// some variables.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // Ignore call instructions that claim to clobber SP. The AArch64
+ // backend does this for aggregate function arguments.
+ if (MI.isCall() && MO.getReg() == SP)
+ continue;
// If this is a virtual register, only clobber it since it doesn't
// have aliases.
if (TRI->isVirtualRegister(MO.getReg()))
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 4d30c6574b12..256a0c95d365 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -77,6 +77,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializePostRASchedulerPass(Registry);
initializePreISelIntrinsicLoweringLegacyPassPass(Registry);
initializeProcessImplicitDefsPass(Registry);
+ initializeRABasicPass(Registry);
initializeRAGreedyPass(Registry);
initializeRegisterCoalescerPass(Registry);
initializeRenameIndependentSubregsPass(Registry);
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 23812a2a2344..3603f9b7ed93 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -556,6 +556,10 @@ bool GlobalMerge::doInitialization(Module &M) {
if (GV.isDeclaration() || GV.isThreadLocal() || GV.hasSection())
continue;
+ // It's not safe to merge globals that may be preempted
+ if (TM && !TM->shouldAssumeDSOLocal(M, &GV))
+ continue;
+
if (!(MergeExternalGlobals && GV.hasExternalLinkage()) &&
!GV.hasInternalLinkage())
continue;
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index 0dc1079b2ad4..cde6ccd29dfd 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -198,13 +198,12 @@ void LivePhysRegs::addLiveOutsNoPristines(const MachineBasicBlock &MBB) {
}
void LivePhysRegs::addLiveOuts(const MachineBasicBlock &MBB) {
+ const MachineFunction &MF = *MBB.getParent();
if (!MBB.succ_empty()) {
- const MachineFunction &MF = *MBB.getParent();
addPristines(*this, MF);
addLiveOutsNoPristines(MBB);
} else if (MBB.isReturnBlock()) {
// For the return block: Add all callee saved registers.
- const MachineFunction &MF = *MBB.getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.isCalleeSavedInfoValid())
addCalleeSavedRegs(*this, MF);
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index dff555f49565..3746b74e0528 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -12,11 +12,13 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveRegUnits.h"
+
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -81,46 +83,50 @@ void LiveRegUnits::accumulateBackward(const MachineInstr &MI) {
}
/// Add live-in registers of basic block \p MBB to \p LiveUnits.
-static void addLiveIns(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) {
+static void addBlockLiveIns(LiveRegUnits &LiveUnits,
+ const MachineBasicBlock &MBB) {
for (const auto &LI : MBB.liveins())
LiveUnits.addRegMasked(LI.PhysReg, LI.LaneMask);
}
-static void addLiveOuts(LiveRegUnits &LiveUnits, const MachineBasicBlock &MBB) {
- // To get the live-outs we simply merge the live-ins of all successors.
- for (const MachineBasicBlock *Succ : MBB.successors())
- addLiveIns(LiveUnits, *Succ);
+/// Adds all callee saved registers to \p LiveUnits.
+static void addCalleeSavedRegs(LiveRegUnits &LiveUnits,
+ const MachineFunction &MF) {
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (const MCPhysReg *CSR = MRI.getCalleeSavedRegs(); CSR && *CSR; ++CSR)
+ LiveUnits.addReg(*CSR);
}
-/// Add pristine registers to the given \p LiveUnits. This function removes
-/// actually saved callee save registers when \p InPrologueEpilogue is false.
-static void removeSavedRegs(LiveRegUnits &LiveUnits, const MachineFunction &MF,
- const MachineFrameInfo &MFI,
- const TargetRegisterInfo &TRI) {
+/// Adds pristine registers to the given \p LiveUnits. Pristine registers are
+/// callee saved registers that are unused in the function.
+static void addPristines(LiveRegUnits &LiveUnits, const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (!MFI.isCalleeSavedInfoValid())
+ return;
+ /// Add all callee saved regs, then remove the ones that are saved+restored.
+ addCalleeSavedRegs(LiveUnits, MF);
+ /// Remove the ones that are not saved/restored; they are pristine.
for (const CalleeSavedInfo &Info : MFI.getCalleeSavedInfo())
LiveUnits.removeReg(Info.getReg());
}
void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.isCalleeSavedInfoValid()) {
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
- addReg(*I);
- if (!MBB.isReturnBlock())
- removeSavedRegs(*this, MF, MFI, *TRI);
+ if (!MBB.succ_empty()) {
+ addPristines(*this, MF);
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*this, *Succ);
+ } else if (MBB.isReturnBlock()) {
+ // For the return block: Add all callee saved registers.
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ if (MFI.isCalleeSavedInfoValid())
+ addCalleeSavedRegs(*this, MF);
}
- ::addLiveOuts(*this, MBB);
}
void LiveRegUnits::addLiveIns(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo &MFI = MF.getFrameInfo();
- if (MFI.isCalleeSavedInfoValid()) {
- for (const MCPhysReg *I = TRI->getCalleeSavedRegs(&MF); *I; ++I)
- addReg(*I);
- if (&MBB != &MF.front())
- removeSavedRegs(*this, MF, MFI, *TRI);
- }
- ::addLiveIns(*this, MBB);
+ addPristines(*this, MF);
+ addBlockLiveIns(*this, MBB);
}
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
index 71ad4e6aa7f5..2402ffdbbcb1 100644
--- a/lib/CodeGen/MachineRegionInfo.cpp
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -1,7 +1,19 @@
-#include "llvm/CodeGen/MachineRegionInfo.h"
+//===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/RegionInfoImpl.h"
#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/MachineRegionInfo.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "machine-region-info"
@@ -11,36 +23,29 @@ STATISTIC(numMachineRegions, "The # of machine regions");
STATISTIC(numMachineSimpleRegions, "The # of simple machine regions");
namespace llvm {
+
template class RegionBase<RegionTraits<MachineFunction>>;
template class RegionNodeBase<RegionTraits<MachineFunction>>;
template class RegionInfoBase<RegionTraits<MachineFunction>>;
-}
+
+} // end namespace llvm
//===----------------------------------------------------------------------===//
// MachineRegion implementation
-//
MachineRegion::MachineRegion(MachineBasicBlock *Entry, MachineBasicBlock *Exit,
MachineRegionInfo* RI,
MachineDominatorTree *DT, MachineRegion *Parent) :
- RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {
+ RegionBase<RegionTraits<MachineFunction>>(Entry, Exit, RI, DT, Parent) {}
-}
-
-MachineRegion::~MachineRegion() { }
+MachineRegion::~MachineRegion() = default;
//===----------------------------------------------------------------------===//
// MachineRegionInfo implementation
-//
-
-MachineRegionInfo::MachineRegionInfo() :
- RegionInfoBase<RegionTraits<MachineFunction>>() {
-
-}
-MachineRegionInfo::~MachineRegionInfo() {
+MachineRegionInfo::MachineRegionInfo() = default;
-}
+MachineRegionInfo::~MachineRegionInfo() = default;
void MachineRegionInfo::updateStatistics(MachineRegion *R) {
++numMachineRegions;
@@ -73,9 +78,7 @@ MachineRegionInfoPass::MachineRegionInfoPass() : MachineFunctionPass(ID) {
initializeMachineRegionInfoPassPass(*PassRegistry::getPassRegistry());
}
-MachineRegionInfoPass::~MachineRegionInfoPass() {
-
-}
+MachineRegionInfoPass::~MachineRegionInfoPass() = default;
bool MachineRegionInfoPass::runOnMachineFunction(MachineFunction &F) {
releaseMemory();
@@ -137,8 +140,9 @@ INITIALIZE_PASS_END(MachineRegionInfoPass, DEBUG_TYPE,
// the link time optimization.
namespace llvm {
- FunctionPass *createMachineRegionInfoPass() {
- return new MachineRegionInfoPass();
- }
+
+FunctionPass *createMachineRegionInfoPass() {
+ return new MachineRegionInfoPass();
}
+} // end namespace llvm
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 265f93c363ca..f6dbf667cf02 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -36,6 +36,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/StackMaps.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/InlineAsm.h"
#include "llvm/IR/Instructions.h"
@@ -909,17 +910,43 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
}
- // Generic loads and stores must have a single MachineMemOperand
- // describing that access.
- if ((MI->getOpcode() == TargetOpcode::G_LOAD ||
- MI->getOpcode() == TargetOpcode::G_STORE) &&
- !MI->hasOneMemOperand())
- report("Generic instruction accessing memory must have one mem operand",
- MI);
-
StringRef ErrorInfo;
if (!TII->verifyInstruction(*MI, ErrorInfo))
report(ErrorInfo.data(), MI);
+
+ // Verify properties of various specific instruction types
+ switch(MI->getOpcode()) {
+ default:
+ break;
+ case TargetOpcode::G_LOAD:
+ case TargetOpcode::G_STORE:
+ // Generic loads and stores must have a single MachineMemOperand
+ // describing that access.
+ if (!MI->hasOneMemOperand())
+ report("Generic instruction accessing memory must have one mem operand",
+ MI);
+ break;
+ case TargetOpcode::STATEPOINT:
+ if (!MI->getOperand(StatepointOpers::IDPos).isImm() ||
+ !MI->getOperand(StatepointOpers::NBytesPos).isImm() ||
+ !MI->getOperand(StatepointOpers::NCallArgsPos).isImm())
+ report("meta operands to STATEPOINT not constant!", MI);
+ break;
+
+ auto VerifyStackMapConstant = [&](unsigned Offset) {
+ if (!MI->getOperand(Offset).isImm() ||
+ MI->getOperand(Offset).getImm() != StackMaps::ConstantOp ||
+ !MI->getOperand(Offset + 1).isImm())
+ report("stack map constant to STATEPOINT not well formed!", MI);
+ };
+ const unsigned VarStart = StatepointOpers(MI).getVarIdx();
+ VerifyStackMapConstant(VarStart + StatepointOpers::CCOffset);
+ VerifyStackMapConstant(VarStart + StatepointOpers::FlagsOffset);
+ VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset);
+
+ // TODO: verify we have properly encoded deopt arguments
+
+ };
}
void
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index a9813e534c5f..e9f8d43fe643 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -54,8 +54,6 @@ static void doSpillCalleeSavedRegs(MachineFunction &MF, RegScavenger *RS,
const MBBVector &SaveBlocks,
const MBBVector &RestoreBlocks);
-static void doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS);
-
namespace {
class PEI : public MachineFunctionPass {
public:
@@ -84,7 +82,7 @@ private:
const MBBVector &SaveBlocks,
const MBBVector &RestoreBlocks)>
SpillCalleeSavedRegisters;
- std::function<void(MachineFunction &MF, RegScavenger *RS)>
+ std::function<void(MachineFunction &MF, RegScavenger &RS)>
ScavengeFrameVirtualRegs;
bool UsesCalleeSaves = false;
@@ -142,7 +140,6 @@ MachineFunctionPass *llvm::createPrologEpilogInserterPass() {
return new PEI();
}
-STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
STATISTIC(NumBytesStackSpace,
"Number of bytes used for stack in all functions");
@@ -168,10 +165,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
SpillCalleeSavedRegisters = [](MachineFunction &, RegScavenger *,
unsigned &, unsigned &, const MBBVector &,
const MBBVector &) {};
- ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger *) {};
+ ScavengeFrameVirtualRegs = [](MachineFunction &, RegScavenger &) {};
} else {
SpillCalleeSavedRegisters = doSpillCalleeSavedRegs;
- ScavengeFrameVirtualRegs = doScavengeFrameVirtualRegs;
+ ScavengeFrameVirtualRegs = scavengeFrameVirtualRegs;
UsesCalleeSaves = true;
}
}
@@ -222,7 +219,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// post-pass, scavenge the virtual registers that frame index elimination
// inserted.
if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging) {
- ScavengeFrameVirtualRegs(Fn, RS);
+ ScavengeFrameVirtualRegs(Fn, *RS);
// Clear any vregs created by virtual scavenging.
Fn.getRegInfo().clearVirtRegs();
@@ -1153,92 +1150,3 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn,
RS->forward(MI);
}
}
-
-/// doScavengeFrameVirtualRegs - Replace all frame index virtual registers
-/// with physical registers. Use the register scavenger to find an
-/// appropriate register to use.
-///
-/// FIXME: Iterating over the instruction stream is unnecessary. We can simply
-/// iterate over the vreg use list, which at this point only contains machine
-/// operands for which eliminateFrameIndex need a new scratch reg.
-static void
-doScavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger *RS) {
- // Run through the instructions and find any virtual registers.
- MachineRegisterInfo &MRI = MF.getRegInfo();
- for (MachineBasicBlock &MBB : MF) {
- RS->enterBasicBlock(MBB);
-
- int SPAdj = 0;
-
- // The instruction stream may change in the loop, so check MBB.end()
- // directly.
- for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
- // We might end up here again with a NULL iterator if we scavenged a
- // register for which we inserted spill code for definition by what was
- // originally the first instruction in MBB.
- if (I == MachineBasicBlock::iterator(nullptr))
- I = MBB.begin();
-
- const MachineInstr &MI = *I;
- MachineBasicBlock::iterator J = std::next(I);
- MachineBasicBlock::iterator P =
- I == MBB.begin() ? MachineBasicBlock::iterator(nullptr)
- : std::prev(I);
-
- // RS should process this instruction before we might scavenge at this
- // location. This is because we might be replacing a virtual register
- // defined by this instruction, and if so, registers killed by this
- // instruction are available, and defined registers are not.
- RS->forward(I);
-
- for (const MachineOperand &MO : MI.operands()) {
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
-
- // When we first encounter a new virtual register, it
- // must be a definition.
- assert(MO.isDef() && "frame index virtual missing def!");
- // Scavenge a new scratch register
- const TargetRegisterClass *RC = MRI.getRegClass(Reg);
- unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj);
-
- ++NumScavengedRegs;
-
- // Replace this reference to the virtual register with the
- // scratch register.
- assert(ScratchReg && "Missing scratch register!");
- MRI.replaceRegWith(Reg, ScratchReg);
-
- // Because this instruction was processed by the RS before this
- // register was allocated, make sure that the RS now records the
- // register as being used.
- RS->setRegUsed(ScratchReg);
- }
-
- // If the scavenger needed to use one of its spill slots, the
- // spill code will have been inserted in between I and J. This is a
- // problem because we need the spill code before I: Move I to just
- // prior to J.
- if (I != std::prev(J)) {
- MBB.splice(J, &MBB, I);
-
- // Before we move I, we need to prepare the RS to visit I again.
- // Specifically, RS will assert if it sees uses of registers that
- // it believes are undefined. Because we have already processed
- // register kills in I, when it visits I again, it will believe that
- // those registers are undefined. To avoid this situation, unprocess
- // the instruction I.
- assert(RS->getCurrentPosition() == I &&
- "The register scavenger has an unexpected position");
- I = P;
- RS->unprocess(P);
- } else
- ++I;
- }
- }
-
- MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
-}
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index a87fed3a687e..24be7ea98d82 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -58,8 +58,9 @@ namespace {
/// whenever a register is unavailable. This is not practical in production but
/// provides a useful baseline both for measuring other allocators and comparing
/// the speed of the basic algorithm against other styles of allocators.
-class RABasic : public MachineFunctionPass, public RegAllocBase
-{
+class RABasic : public MachineFunctionPass,
+ public RegAllocBase,
+ private LiveRangeEdit::Delegate {
// context
MachineFunction *MF;
@@ -72,6 +73,9 @@ class RABasic : public MachineFunctionPass, public RegAllocBase
// selectOrSplit().
BitVector UsableRegs;
+ bool LRE_CanEraseVirtReg(unsigned) override;
+ void LRE_WillShrinkVirtReg(unsigned) override;
+
public:
RABasic();
@@ -121,17 +125,46 @@ char RABasic::ID = 0;
} // end anonymous namespace
+char &llvm::RABasicID = RABasic::ID;
+
+INITIALIZE_PASS_BEGIN(RABasic, "regallocbasic", "Basic Register Allocator",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_PASS_DEPENDENCY(MachineScheduler)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
+INITIALIZE_PASS_END(RABasic, "regallocbasic", "Basic Register Allocator", false,
+ false)
+
+bool RABasic::LRE_CanEraseVirtReg(unsigned VirtReg) {
+ if (VRM->hasPhys(VirtReg)) {
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ aboutToRemoveInterval(LI);
+ return true;
+ }
+ // Unassigned virtreg is probably in the priority queue.
+ // RegAllocBase will erase it after dequeueing.
+ return false;
+}
+
+void RABasic::LRE_WillShrinkVirtReg(unsigned VirtReg) {
+ if (!VRM->hasPhys(VirtReg))
+ return;
+
+ // Register is assigned, put it back on the queue for reassignment.
+ LiveInterval &LI = LIS->getInterval(VirtReg);
+ Matrix->unassign(LI);
+ enqueue(&LI);
+}
+
RABasic::RABasic(): MachineFunctionPass(ID) {
- initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
- initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
- initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
- initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
- initializeMachineSchedulerPass(*PassRegistry::getPassRegistry());
- initializeLiveStacksPass(*PassRegistry::getPassRegistry());
- initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
- initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
- initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
- initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry());
}
void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -200,7 +233,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
Matrix->unassign(Spill);
// Spill the extracted interval.
- LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ LiveRangeEdit LRE(&Spill, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
}
return true;
@@ -259,7 +292,7 @@ unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
if (!VirtReg.isSpillable())
return ~0u;
- LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, nullptr, &DeadRemats);
+ LiveRangeEdit LRE(&VirtReg, SplitVRegs, *MF, *LIS, VRM, this, &DeadRemats);
spiller().spill(LRE);
// The live virtual register requesting allocation was spilled, so tell
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 3b5964eef55e..b2dfef91add5 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -49,9 +49,11 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Math.h"
#include "llvm/CodeGen/PBQP/Solution.h"
#include "llvm/CodeGen/PBQPRAConstraint.h"
#include "llvm/CodeGen/RegAllocPBQP.h"
@@ -139,13 +141,13 @@ public:
}
private:
- typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
- typedef std::vector<const LiveInterval*> Node2LIMap;
- typedef std::vector<unsigned> AllowedSet;
- typedef std::vector<AllowedSet> AllowedSetMap;
- typedef std::pair<unsigned, unsigned> RegPair;
- typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
- typedef std::set<unsigned> RegSet;
+ using LI2NodeMap = std::map<const LiveInterval *, unsigned>;
+ using Node2LIMap = std::vector<const LiveInterval *>;
+ using AllowedSet = std::vector<unsigned>;
+ using AllowedSetMap = std::vector<AllowedSet>;
+ using RegPair = std::pair<unsigned, unsigned>;
+ using CoalesceMap = std::map<RegPair, PBQP::PBQPNum>;
+ using RegSet = std::set<unsigned>;
char *customPassID;
@@ -212,12 +214,12 @@ public:
/// @brief Add interference edges between overlapping vregs.
class Interference : public PBQPRAConstraint {
private:
- typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
- typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IKey;
- typedef DenseMap<IKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
- typedef DenseSet<IKey> DisjointAllowedRegsCache;
- typedef std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId> IEdgeKey;
- typedef DenseSet<IEdgeKey> IEdgeCache;
+ using AllowedRegVecPtr = const PBQP::RegAlloc::AllowedRegVector *;
+ using IKey = std::pair<AllowedRegVecPtr, AllowedRegVecPtr>;
+ using IMatrixCache = DenseMap<IKey, PBQPRAGraph::MatrixPtr>;
+ using DisjointAllowedRegsCache = DenseSet<IKey>;
+ using IEdgeKey = std::pair<PBQP::GraphBase::NodeId, PBQP::GraphBase::NodeId>;
+ using IEdgeCache = DenseSet<IEdgeKey>;
bool haveDisjointAllowedRegs(const PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
PBQPRAGraph::NodeId MId,
@@ -252,8 +254,8 @@ private:
// for the fast interference graph construction algorithm. The last is there
// to save us from looking up node ids via the VRegToNode map in the graph
// metadata.
- typedef std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>
- IntervalInfo;
+ using IntervalInfo =
+ std::tuple<LiveInterval*, size_t, PBQP::GraphBase::NodeId>;
static SlotIndex getStartPoint(const IntervalInfo &I) {
return std::get<0>(I)->segments[std::get<1>(I)].start;
@@ -320,9 +322,10 @@ public:
// Cache known disjoint allowed registers pairs
DisjointAllowedRegsCache D;
- typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
- typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
- decltype(&lowestStartPoint)> IntervalQueue;
+ using IntervalSet = std::set<IntervalInfo, decltype(&lowestEndPoint)>;
+ using IntervalQueue =
+ std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
+ decltype(&lowestStartPoint)>;
IntervalSet Active(lowestEndPoint);
IntervalQueue Inactive(lowestStartPoint);
@@ -658,7 +661,6 @@ void RegAllocPBQP::spillVReg(unsigned VReg,
SmallVectorImpl<unsigned> &NewIntervals,
MachineFunction &MF, LiveIntervals &LIS,
VirtRegMap &VRM, Spiller &VRegSpiller) {
-
VRegsToAlloc.erase(VReg);
LiveRangeEdit LRE(&LIS.getInterval(VReg), NewIntervals, MF, LIS, &VRM,
nullptr, &DeadRemats);
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 0635e5c0a63c..1aed58c36e17 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -15,18 +15,23 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/RegisterScavenging.h"
+
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/PassSupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
@@ -39,6 +44,8 @@ using namespace llvm;
#define DEBUG_TYPE "reg-scavenging"
+STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
+
void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
LiveUnits.addRegMasked(Reg, LaneMask);
}
@@ -469,3 +476,120 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
+
+void llvm::scavengeFrameVirtualRegs(MachineFunction &MF, RegScavenger &RS) {
+ // FIXME: Iterating over the instruction stream is unnecessary. We can simply
+ // iterate over the vreg use list, which at this point only contains machine
+ // operands for which eliminateFrameIndex need a new scratch reg.
+
+ // Run through the instructions and find any virtual registers.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineBasicBlock &MBB : MF) {
+ RS.enterBasicBlock(MBB);
+
+ int SPAdj = 0;
+
+ // The instruction stream may change in the loop, so check MBB.end()
+ // directly.
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ) {
+ // We might end up here again with a NULL iterator if we scavenged a
+ // register for which we inserted spill code for definition by what was
+ // originally the first instruction in MBB.
+ if (I == MachineBasicBlock::iterator(nullptr))
+ I = MBB.begin();
+
+ const MachineInstr &MI = *I;
+ MachineBasicBlock::iterator J = std::next(I);
+ MachineBasicBlock::iterator P =
+ I == MBB.begin() ? MachineBasicBlock::iterator(nullptr)
+ : std::prev(I);
+
+ // RS should process this instruction before we might scavenge at this
+ // location. This is because we might be replacing a virtual register
+ // defined by this instruction, and if so, registers killed by this
+ // instruction are available, and defined registers are not.
+ RS.forward(I);
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ // When we first encounter a new virtual register, it
+ // must be a definition.
+ assert(MO.isDef() && "frame index virtual missing def!");
+ // Scavenge a new scratch register
+ const TargetRegisterClass *RC = MRI.getRegClass(Reg);
+ unsigned ScratchReg = RS.scavengeRegister(RC, J, SPAdj);
+
+ ++NumScavengedRegs;
+
+ // Replace this reference to the virtual register with the
+ // scratch register.
+ assert(ScratchReg && "Missing scratch register!");
+ MRI.replaceRegWith(Reg, ScratchReg);
+
+ // Because this instruction was processed by the RS before this
+ // register was allocated, make sure that the RS now records the
+ // register as being used.
+ RS.setRegUsed(ScratchReg);
+ }
+
+ // If the scavenger needed to use one of its spill slots, the
+ // spill code will have been inserted in between I and J. This is a
+ // problem because we need the spill code before I: Move I to just
+ // prior to J.
+ if (I != std::prev(J)) {
+ MBB.splice(J, &MBB, I);
+
+ // Before we move I, we need to prepare the RS to visit I again.
+ // Specifically, RS will assert if it sees uses of registers that
+ // it believes are undefined. Because we have already processed
+ // register kills in I, when it visits I again, it will believe that
+ // those registers are undefined. To avoid this situation, unprocess
+ // the instruction I.
+ assert(RS.getCurrentPosition() == I &&
+ "The register scavenger has an unexpected position");
+ I = P;
+ RS.unprocess(P);
+ } else
+ ++I;
+ }
+ }
+
+ MRI.clearVirtRegs();
+ MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
+}
+
+namespace {
+/// This class runs register scavenging independ of the PrologEpilogInserter.
+/// This is used in for testing.
+class ScavengerTest : public MachineFunctionPass {
+public:
+ static char ID;
+ ScavengerTest() : MachineFunctionPass(ID) {}
+ bool runOnMachineFunction(MachineFunction &MF) {
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ const TargetFrameLowering &TFL = *STI.getFrameLowering();
+
+ RegScavenger RS;
+ // Let's hope that calling those outside of PrologEpilogueInserter works
+ // well enough to initialize the scavenger with some emergency spillslots
+ // for the target.
+ BitVector SavedRegs;
+ TFL.determineCalleeSaves(MF, SavedRegs, &RS);
+ TFL.processFunctionBeforeFrameFinalized(MF, &RS);
+
+ // Let's scavenge the current function
+ scavengeFrameVirtualRegs(MF, RS);
+ return true;
+ }
+};
+char ScavengerTest::ID;
+
+} // end anonymous namespace
+
+INITIALIZE_PASS(ScavengerTest, "scavenger-test",
+ "Scavenge virtual registers inside basic blocks", false, false)
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 8035ea80364b..3fdbd2459361 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -12,32 +12,54 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/ADT/IntEqClasses.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/SparseSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/LivePhysRegs.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBundle.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/RegisterPressure.h"
+#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/CodeGen/ScheduleDAGInstrs.h"
#include "llvm/CodeGen/ScheduleDFS.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/Type.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/MC/LaneBitmask.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
+#include <algorithm>
+#include <cassert>
+#include <iterator>
+#include <string>
+#include <utility>
+#include <vector>
using namespace llvm;
@@ -90,11 +112,9 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo *mli,
bool RemoveKillFlags)
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
- RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
- TrackLaneMasks(false), AAForDep(nullptr), BarrierChain(nullptr),
+ RemoveKillFlags(RemoveKillFlags),
UnknownValue(UndefValue::get(
- Type::getVoidTy(mf.getFunction()->getContext()))),
- FirstDbgValue(nullptr) {
+ Type::getVoidTy(mf.getFunction()->getContext()))) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
@@ -126,7 +146,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
return V;
}
assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
- } while (1);
+ } while (true);
}
/// This is a wrapper around GetUnderlyingObjects and adds support for basic
@@ -563,7 +583,7 @@ void ScheduleDAGInstrs::initSUnits() {
// which is contained within a basic block.
SUnits.reserve(NumRegionInstrs);
- for (MachineInstr &MI : llvm::make_range(RegionBegin, RegionEnd)) {
+ for (MachineInstr &MI : make_range(RegionBegin, RegionEnd)) {
if (MI.isDebugValue())
continue;
@@ -606,13 +626,13 @@ void ScheduleDAGInstrs::initSUnits() {
class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
/// Current total number of SUs in map.
- unsigned NumNodes;
+ unsigned NumNodes = 0;
/// 1 for loads, 0 for stores. (see comment in SUList)
unsigned TrueMemOrderLatency;
public:
- Value2SUsMap(unsigned lat = 0) : NumNodes(0), TrueMemOrderLatency(lat) {}
+ Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
/// To keep NumNodes up to date, insert() is used instead of
/// this operator w/ push_back().
@@ -630,7 +650,7 @@ public:
void inline clearList(ValueType V) {
iterator Itr = find(V);
if (Itr != end()) {
- assert (NumNodes >= Itr->second.size());
+ assert(NumNodes >= Itr->second.size());
NumNodes -= Itr->second.size();
Itr->second.clear();
@@ -646,7 +666,7 @@ public:
unsigned inline size() const { return NumNodes; }
/// Counts the number of SUs in this map after a reduction.
- void reComputeSize(void) {
+ void reComputeSize() {
NumNodes = 0;
for (auto &I : *this)
NumNodes += I.second.size();
@@ -676,7 +696,7 @@ void ScheduleDAGInstrs::addChainDependencies(SUnit *SU,
}
void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
- assert (BarrierChain != nullptr);
+ assert(BarrierChain != nullptr);
for (auto &I : map) {
SUList &sus = I.second;
@@ -687,7 +707,7 @@ void ScheduleDAGInstrs::addBarrierChain(Value2SUsMap &map) {
}
void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
- assert (BarrierChain != nullptr);
+ assert(BarrierChain != nullptr);
// Go through all lists of SUs.
for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
@@ -1028,7 +1048,7 @@ void ScheduleDAGInstrs::reduceHugeMemNodeMaps(Value2SUsMap &stores,
// The N last elements in NodeNums will be removed, and the SU with
// the lowest NodeNum of them will become the new BarrierChain to
// let the not yet seen SUs have a dependency to the removed SUs.
- assert (N <= NodeNums.size());
+ assert(N <= NodeNums.size());
SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
if (BarrierChain) {
// The aliasing and non-aliasing maps reduce independently of each
@@ -1156,6 +1176,7 @@ std::string ScheduleDAGInstrs::getDAGName() const {
//===----------------------------------------------------------------------===//
namespace llvm {
+
/// Internal state used to compute SchedDFSResult.
class SchedDFSImpl {
SchedDFSResult &R;
@@ -1163,16 +1184,16 @@ class SchedDFSImpl {
/// Join DAG nodes into equivalence classes by their subtree.
IntEqClasses SubtreeClasses;
/// List PredSU, SuccSU pairs that represent data edges between subtrees.
- std::vector<std::pair<const SUnit*, const SUnit*> > ConnectionPairs;
+ std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
struct RootData {
unsigned NodeID;
unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
- unsigned SubInstrCount; ///< Instr count in this tree only, not children.
+ unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
+ /// children.
RootData(unsigned id): NodeID(id),
- ParentNodeID(SchedDFSResult::InvalidSubtreeID),
- SubInstrCount(0) {}
+ ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
unsigned getSparseSetIndex() const { return NodeID; }
};
@@ -1340,12 +1361,15 @@ protected:
} while (FromTree != SchedDFSResult::InvalidSubtreeID);
}
};
+
} // end namespace llvm
namespace {
+
/// Manage the stack used by a reverse depth-first search over the DAG.
class SchedDAGReverseDFS {
- std::vector<std::pair<const SUnit*, SUnit::const_pred_iterator> > DFSStack;
+ std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
+
public:
bool isComplete() const { return DFSStack.empty(); }
@@ -1367,7 +1391,8 @@ public:
return getCurr()->Preds.end();
}
};
-} // anonymous
+
+} // end anonymous namespace
static bool hasDataSucc(const SUnit *SU) {
for (const SDep &SuccDep : SU->Succs) {
@@ -1392,7 +1417,7 @@ void SchedDFSResult::compute(ArrayRef<SUnit> SUnits) {
SchedDAGReverseDFS DFS;
Impl.visitPreorder(&SU);
DFS.follow(&SU);
- for (;;) {
+ while (true) {
// Traverse the leftmost path as far as possible.
while (DFS.getPred() != DFS.getPredEnd()) {
const SDep &PredDep = *DFS.getPred();
@@ -1457,4 +1482,5 @@ raw_ostream &operator<<(raw_ostream &OS, const ILPValue &Val) {
}
} // end namespace llvm
+
#endif
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 5f167f8de1cf..9355dbe77f94 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -225,6 +225,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
}
return TranslateLegalizeResults(Op, Lowered);
}
+ LLVM_FALLTHROUGH;
case TargetLowering::Expand:
Changed = true;
return LegalizeOp(ExpandLoad(Op));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 177898e1e950..80a03ea4eea0 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1,4 +1,4 @@
-//===-- SelectionDAG.cpp - Implement the SelectionDAG data structures -----===//
+//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,29 +11,46 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
#include "SDNodeDbgValue.h"
+#include "llvm/ADT/APFloat.h"
+#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
-#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/None.h"
#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/CodeGen/RuntimeLibcalls.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
-#include "llvm/IR/CallingConv.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/IR/Constant.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
-#include "llvm/IR/GlobalAlias.h"
-#include "llvm/IR/GlobalVariable.h"
-#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/GlobalValue.h"
+#include "llvm/IR/Metadata.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CodeGen.h"
+#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/KnownBits.h"
@@ -41,16 +58,20 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include <algorithm>
-#include <cmath>
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+#include <limits>
+#include <set>
+#include <string>
#include <utility>
+#include <vector>
using namespace llvm;
@@ -269,7 +290,6 @@ ISD::CondCode ISD::getSetCCInverse(ISD::CondCode Op, bool isInteger) {
return ISD::CondCode(Operation);
}
-
/// For an integer comparison, return 1 if the comparison is a signed operation
/// and 2 if the result is an unsigned comparison. Return zero if the operation
/// does not depend on the sign of the input (setne and seteq).
@@ -338,7 +358,6 @@ ISD::CondCode ISD::getSetCCAndOperation(ISD::CondCode Op1, ISD::CondCode Op2,
//===----------------------------------------------------------------------===//
/// AddNodeIDOpcode - Add the node opcode to the NodeID data.
-///
static void AddNodeIDOpcode(FoldingSetNodeID &ID, unsigned OpC) {
ID.AddInteger(OpC);
}
@@ -350,7 +369,6 @@ static void AddNodeIDValueTypes(FoldingSetNodeID &ID, SDVTList VTList) {
}
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
-///
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDValue> Ops) {
for (auto& Op : Ops) {
@@ -360,7 +378,6 @@ static void AddNodeIDOperands(FoldingSetNodeID &ID,
}
/// AddNodeIDOperands - Various routines for adding operands to the NodeID data.
-///
static void AddNodeIDOperands(FoldingSetNodeID &ID,
ArrayRef<SDUse> Ops) {
for (auto& Op : Ops) {
@@ -392,10 +409,9 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
break;
}
case ISD::TargetConstantFP:
- case ISD::ConstantFP: {
+ case ISD::ConstantFP:
ID.AddPointer(cast<ConstantFPSDNode>(N)->getConstantFPValue());
break;
- }
case ISD::TargetGlobalAddress:
case ISD::GlobalAddress:
case ISD::TargetGlobalTLSAddress:
@@ -770,7 +786,6 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
/// maps and modified in place. Add it back to the CSE maps, unless an identical
/// node already exists, in which case transfer all its users to the existing
/// node. This transfer can potentially trigger recursive merging.
-///
void
SelectionDAG::AddModifiedNodeToCSEMaps(SDNode *N) {
// For node types that aren't CSE'd, just act as if no identical node
@@ -835,7 +850,6 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
return Node;
}
-
/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
/// were replaced with those specified. If this node is never memoized,
/// return null, otherwise return a pointer to the slot it would take. If a
@@ -864,10 +878,9 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
// EntryNode could meaningfully have debug info if we can find it...
SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL)
- : TM(tm), TSI(nullptr), TLI(nullptr), OptLevel(OL),
+ : TM(tm), OptLevel(OL),
EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)),
- Root(getEntryNode()), NewNodesMustHaveLegalTypes(false),
- UpdateListeners(nullptr) {
+ Root(getEntryNode()) {
InsertNode(&EntryNode);
DbgInfo = new SDDbgInfo();
}
@@ -1038,7 +1051,6 @@ SDValue SelectionDAG::getZeroExtendVectorInReg(SDValue Op, const SDLoc &DL,
}
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
-///
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
SDValue NegOne =
@@ -1317,7 +1329,6 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
return SDValue(N, 0);
}
-
SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
@@ -1451,7 +1462,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1,
// Validate that all indices in Mask are within the range of the elements
// input to the shuffle.
int NElts = Mask.size();
- assert(all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
+ assert(llvm::all_of(Mask, [&](int M) { return M < (NElts * 2); }) &&
"Index out of range");
// Copy the mask so we can do any needed cleanup.
@@ -2918,7 +2929,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
else
DemandedRHS.setBit((unsigned)M % NumElts);
}
- Tmp = UINT_MAX;
+ Tmp = std::numeric_limits<unsigned>::max();
if (!!DemandedLHS)
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1);
if (!!DemandedRHS) {
@@ -3122,7 +3133,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
unsigned EltIdx = CEltNo->getZExtValue();
// If we demand the inserted element then get its sign bits.
- Tmp = UINT_MAX;
+ Tmp = std::numeric_limits<unsigned>::max();
if (DemandedElts[EltIdx]) {
// TODO - handle implicit truncation of inserted elements.
if (InVal.getScalarValueSizeInBits() != VTBits)
@@ -3188,7 +3199,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
case ISD::CONCAT_VECTORS:
// Determine the minimum number of sign bits across all demanded
// elts of the input vectors. Early out if the result is already 1.
- Tmp = UINT_MAX;
+ Tmp = std::numeric_limits<unsigned>::max();
EVT SubVectorVT = Op.getOperand(0).getValueType();
unsigned NumSubVectorElts = SubVectorVT.getVectorNumElements();
unsigned NumSubVectors = Op.getNumOperands();
@@ -3327,7 +3338,7 @@ bool SelectionDAG::haveNoCommonBitsSet(SDValue A, SDValue B) const {
static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
- llvm::SelectionDAG &DAG) {
+ SelectionDAG &DAG) {
assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
assert(llvm::all_of(Ops,
[Ops](SDValue Op) {
@@ -3836,8 +3847,9 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
return true;
return ISD::isBuildVectorOfConstantSDNodes(Divisor.getNode()) &&
- any_of(Divisor->op_values(),
- [](SDValue V) { return V.isUndef() || isNullConstant(V); });
+ llvm::any_of(Divisor->op_values(),
+ [](SDValue V) { return V.isUndef() ||
+ isNullConstant(V); });
// TODO: Handle signed overflow.
}
// TODO: Handle oversized shifts.
@@ -3948,8 +3960,8 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
// All operands must be vector types with the same number of elements as
// the result type and must be either UNDEF or a build vector of constant
// or UNDEF scalars.
- if (!all_of(Ops, IsConstantBuildVectorOrUndef) ||
- !all_of(Ops, IsScalarOrSameVectorSize))
+ if (!llvm::all_of(Ops, IsConstantBuildVectorOrUndef) ||
+ !llvm::all_of(Ops, IsScalarOrSameVectorSize))
return SDValue();
// If we are comparing vectors, then the result needs to be a i1 boolean
@@ -5550,7 +5562,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
Opcode == ISD::PREFETCH ||
Opcode == ISD::LIFETIME_START ||
Opcode == ISD::LIFETIME_END ||
- (Opcode <= INT_MAX &&
+ ((int)Opcode <= std::numeric_limits<int>::max() &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
@@ -5884,7 +5896,6 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue Mask, SDValue Src0,
EVT MemVT, MachineMemOperand *MMO,
ISD::LoadExtType ExtTy, bool isExpanding) {
-
SDVTList VTs = getVTList(VT, MVT::Other);
SDValue Ops[] = { Chain, Ptr, Mask, Src0 };
FoldingSetNodeID ID;
@@ -6038,13 +6049,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
switch (Opcode) {
default: break;
- case ISD::CONCAT_VECTORS: {
+ case ISD::CONCAT_VECTORS:
// Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
- }
- case ISD::SELECT_CC: {
+ case ISD::SELECT_CC:
assert(NumOps == 5 && "SELECT_CC takes 5 operands!");
assert(Ops[0].getValueType() == Ops[1].getValueType() &&
"LHS and RHS of condition must have same type!");
@@ -6053,14 +6063,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(Ops[2].getValueType() == VT &&
"select_cc node must be of same type as true and false value!");
break;
- }
- case ISD::BR_CC: {
+ case ISD::BR_CC:
assert(NumOps == 5 && "BR_CC takes 5 operands!");
assert(Ops[2].getValueType() == Ops[3].getValueType() &&
"LHS/RHS of comparison should match types!");
break;
}
- }
// Memoize nodes.
SDNode *N;
@@ -6599,7 +6607,6 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
return Res;
}
-
/// getMachineNode - These are used for target selectors to create a new node
/// with specified return type(s), MachineInstr opcode, and operands.
///
@@ -6812,7 +6819,7 @@ public:
: SelectionDAG::DAGUpdateListener(d), UI(ui), UE(ue) {}
};
-}
+} // end anonymous namespace
/// ReplaceAllUsesWith - Modify anything using 'From' to use 'To' instead.
/// This can cause recursive merging of nodes in the DAG.
@@ -6858,7 +6865,6 @@ void SelectionDAG::ReplaceAllUsesWith(SDValue FromN, SDValue To) {
AddModifiedNodeToCSEMaps(User);
}
-
// If we just RAUW'd the root, take note.
if (FromN == getRoot())
setRoot(To);
@@ -7028,6 +7034,7 @@ void SelectionDAG::ReplaceAllUsesOfValueWith(SDValue From, SDValue To){
}
namespace {
+
/// UseMemo - This class is used by SelectionDAG::ReplaceAllUsesOfValuesWith
/// to record information about a use.
struct UseMemo {
@@ -7040,7 +7047,8 @@ namespace {
bool operator<(const UseMemo &L, const UseMemo &R) {
return (intptr_t)L.User < (intptr_t)R.User;
}
-}
+
+} // end anonymous namespace
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
@@ -7106,7 +7114,6 @@ void SelectionDAG::ReplaceAllUsesOfValuesWith(const SDValue *From,
/// based on their topological order. It returns the maximum id and a vector
/// of the SDNodes* in assigned order by reference.
unsigned SelectionDAG::AssignTopologicalOrder() {
-
unsigned DAGSize = 0;
// SortedPos tracks the progress of the algorithm. Nodes before it are
@@ -7333,6 +7340,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
}
namespace {
+
struct EVTArray {
std::vector<EVT> VTs;
@@ -7342,11 +7350,12 @@ namespace {
VTs.push_back(MVT((MVT::SimpleValueType)i));
}
};
-}
-static ManagedStatic<std::set<EVT, EVT::compareRawBits> > EVTs;
+} // end anonymous namespace
+
+static ManagedStatic<std::set<EVT, EVT::compareRawBits>> EVTs;
static ManagedStatic<EVTArray> SimpleVTArray;
-static ManagedStatic<sys::SmartMutex<true> > VTMutex;
+static ManagedStatic<sys::SmartMutex<true>> VTMutex;
/// getValueTypeList - Return a pointer to the specified value type.
///
@@ -7380,7 +7389,6 @@ bool SDNode::hasNUsesOfValue(unsigned NUses, unsigned Value) const {
return NUses == 0;
}
-
/// hasAnyUseOfValue - Return true if there are any use of the indicated
/// value. This method ignores uses of other values defined by this operation.
bool SDNode::hasAnyUseOfValue(unsigned Value) const {
@@ -7393,9 +7401,7 @@ bool SDNode::hasAnyUseOfValue(unsigned Value) const {
return false;
}
-
/// isOnlyUserOf - Return true if this node is the only use of N.
-///
bool SDNode::isOnlyUserOf(const SDNode *N) const {
bool Seen = false;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
@@ -7425,7 +7431,6 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
}
/// isOperand - Return true if this node is an operand of N.
-///
bool SDValue::isOperandOf(const SDNode *N) const {
for (const SDValue &Op : N->op_values())
if (*this == Op)
@@ -7475,7 +7480,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
}
// Next, try a deep search: check whether every operand of the TokenFactor
// reaches Dest.
- return all_of((*this)->ops(), [=](SDValue Op) {
+ return llvm::all_of((*this)->ops(), [=](SDValue Op) {
return Op.reachesChainWithoutSideEffects(Dest, Depth - 1);
});
}
@@ -7627,7 +7632,6 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
return false;
}
-
/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
/// it cannot be inferred.
unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
@@ -7718,7 +7722,6 @@ unsigned GlobalAddressSDNode::getAddressSpace() const {
return getGlobal()->getType()->getAddressSpace();
}
-
Type *ConstantPoolSDNode::getType() const {
if (isMachineConstantPoolEntry())
return Val.MachineCPVal->getType();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 687b882c5e4d..b5ccd64ee76c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -2022,7 +2022,7 @@ static SDNode *findGlueUse(SDNode *N) {
}
/// findNonImmUse - Return true if "Use" is a non-immediate use of "Def".
-/// This function recursively traverses up the operand chain, ignoring
+/// This function iteratively traverses up the operand chain, ignoring
/// certain nodes.
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
SDNode *Root, SmallPtrSetImpl<SDNode*> &Visited,
@@ -2035,30 +2035,36 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
// happen because we scan down to newly selected nodes in the case of glue
// uses.
- if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
- return false;
+ std::vector<SDNode *> WorkList;
+ WorkList.push_back(Use);
- // Don't revisit nodes if we already scanned it and didn't fail, we know we
- // won't fail if we scan it again.
- if (!Visited.insert(Use).second)
- return false;
+ while (!WorkList.empty()) {
+ Use = WorkList.back();
+ WorkList.pop_back();
+ if (Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1)
+ continue;
- for (const SDValue &Op : Use->op_values()) {
- // Ignore chain uses, they are validated by HandleMergeInputChains.
- if (Op.getValueType() == MVT::Other && IgnoreChains)
+ // Don't revisit nodes if we already scanned it and didn't fail, we know we
+ // won't fail if we scan it again.
+ if (!Visited.insert(Use).second)
continue;
- SDNode *N = Op.getNode();
- if (N == Def) {
- if (Use == ImmedUse || Use == Root)
- continue; // We are not looking for immediate use.
- assert(N != Root);
- return true;
- }
+ for (const SDValue &Op : Use->op_values()) {
+ // Ignore chain uses, they are validated by HandleMergeInputChains.
+ if (Op.getValueType() == MVT::Other && IgnoreChains)
+ continue;
- // Traverse up the operand chain.
- if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
- return true;
+ SDNode *N = Op.getNode();
+ if (N == Def) {
+ if (Use == ImmedUse || Use == Root)
+ continue; // We are not looking for immediate use.
+ assert(N != Root);
+ return true;
+ }
+
+ // Traverse up the operand chain.
+ WorkList.push_back(N);
+ }
}
return false;
}
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 1c66649cae01..eed667dbe7e0 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -818,7 +818,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
SI.GCTransitionArgs =
ArrayRef<const Use>(ISP.gc_args_begin(), ISP.gc_args_end());
SI.ID = ISP.getID();
- SI.DeoptState = ArrayRef<const Use>(ISP.vm_state_begin(), ISP.vm_state_end());
+ SI.DeoptState = ArrayRef<const Use>(ISP.deopt_begin(), ISP.deopt_end());
SI.StatepointFlags = ISP.getFlags();
SI.NumPatchBytes = ISP.getNumPatchBytes();
SI.EHPadBB = EHPadBB;
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 0dffffee9976..adb2b188265b 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1493,8 +1493,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- // Ensure that the constant occurs on the RHS, and fold constant
- // comparisons.
+ // Ensure that the constant occurs on the RHS and fold constant comparisons.
ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond);
if (isa<ConstantSDNode>(N0.getNode()) &&
(DCI.isBeforeLegalizeOps() ||
@@ -1638,14 +1637,13 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, TopSetCC.getOperand(0),
TopSetCC.getOperand(1),
InvCond);
-
}
}
}
- // If the LHS is '(and load, const)', the RHS is 0,
- // the test is for equality or unsigned, and all 1 bits of the const are
- // in the same partial word, see if we can shorten the load.
+ // If the LHS is '(and load, const)', the RHS is 0, the test is for
+ // equality or unsigned, and all 1 bits of the const are in the same
+ // partial word, see if we can shorten the load.
if (DCI.isBeforeLegalize() &&
!ISD::isSignedIntSetCC(Cond) &&
N0.getOpcode() == ISD::AND && C1 == 0 &&
@@ -1669,10 +1667,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
if ((newMask & Mask) == Mask) {
- if (!DAG.getDataLayout().isLittleEndian())
- bestOffset = (origWidth/width - offset - 1) * (width/8);
- else
+ if (DAG.getDataLayout().isLittleEndian())
bestOffset = (uint64_t)offset * (width/8);
+ else
+ bestOffset = (origWidth/width - offset - 1) * (width/8);
bestMask = Mask.lshr(offset * (width/8) * 8);
bestWidth = width;
break;
@@ -1713,10 +1711,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
switch (Cond) {
case ISD::SETUGT:
case ISD::SETUGE:
- case ISD::SETEQ: return DAG.getConstant(0, dl, VT);
+ case ISD::SETEQ:
+ return DAG.getConstant(0, dl, VT);
case ISD::SETULT:
case ISD::SETULE:
- case ISD::SETNE: return DAG.getConstant(1, dl, VT);
+ case ISD::SETNE:
+ return DAG.getConstant(1, dl, VT);
case ISD::SETGT:
case ISD::SETGE:
// True if the sign bit of C1 is set.
@@ -1816,9 +1816,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
BitWidth-1))) {
// Okay, get the un-inverted input value.
SDValue Val;
- if (N0.getOpcode() == ISD::XOR)
+ if (N0.getOpcode() == ISD::XOR) {
Val = N0.getOperand(0);
- else {
+ } else {
assert(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR);
// ((X^1)&1)^1 -> X & 1
@@ -1883,7 +1883,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Canonicalize GE/LE comparisons to use GT/LT comparisons.
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
- if (C1 == MinVal) return DAG.getConstant(1, dl, VT); // X >= MIN --> true
+ // X >= MIN --> true
+ if (C1 == MinVal)
+ return DAG.getConstant(1, dl, VT);
+
// X >= C0 --> X > (C0 - 1)
APInt C = C1 - 1;
ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT;
@@ -1898,7 +1901,10 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
- if (C1 == MaxVal) return DAG.getConstant(1, dl, VT); // X <= MAX --> true
+ // X <= MAX --> true
+ if (C1 == MaxVal)
+ return DAG.getConstant(1, dl, VT);
+
// X <= C0 --> X < (C0 + 1)
APInt C = C1 + 1;
ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT;
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 900c0318b179..c43a5e18ad23 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1456,6 +1456,7 @@ void TargetLoweringBase::computeRegisterProperties(
}
if (IsLegalWiderType)
break;
+ LLVM_FALLTHROUGH;
}
case TypeWidenVector: {
// Try to widen the vector.
@@ -1473,6 +1474,7 @@ void TargetLoweringBase::computeRegisterProperties(
}
if (IsLegalWiderType)
break;
+ LLVM_FALLTHROUGH;
}
case TypeSplitVector:
case TypeScalarizeVector: {
diff --git a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
index 282e3103adc9..711144fc2faa 100644
--- a/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
+++ b/lib/DebugInfo/CodeView/CodeViewRecordIO.cpp
@@ -27,6 +27,14 @@ Error CodeViewRecordIO::beginRecord(Optional<uint32_t> MaxLength) {
Error CodeViewRecordIO::endRecord() {
assert(!Limits.empty() && "Not in a record!");
Limits.pop_back();
+ // We would like to assert that we actually read / wrote all the bytes that we
+ // expected to for this record, but unfortunately we can't do this. Some
+ // producers such as MASM over-allocate for certain types of records and
+ // commit the extraneous data, so when reading we can't be sure every byte
+ // will have been read. And when writing we over-allocate temporarily since
+ // we don't know how big the record is until we're finished writing it, so
+ // even though we don't commit the extraneous data, we still can't guarantee
+ // we're at the end of the allocated data.
return Error::success();
}
@@ -49,6 +57,12 @@ uint32_t CodeViewRecordIO::maxFieldLength() const {
return *Min;
}
+Error CodeViewRecordIO::padToAlignment(uint32_t Align) {
+ if (isReading())
+ return Reader->padToAlignment(Align);
+ return Writer->padToAlignment(Align);
+}
+
Error CodeViewRecordIO::skipPadding() {
assert(!isWriting() && "Cannot skip padding while writing!");
diff --git a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
index b8741eb0b675..2e72242181b0 100644
--- a/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
+++ b/lib/DebugInfo/CodeView/DebugStringTableSubsection.cpp
@@ -72,7 +72,7 @@ Error DebugStringTableSubsection::commit(BinaryStreamWriter &Writer) const {
uint32_t DebugStringTableSubsection::size() const { return Strings.size(); }
uint32_t DebugStringTableSubsection::getStringId(StringRef S) const {
- auto P = Strings.find(S);
- assert(P != Strings.end());
- return P->second;
+ auto Iter = Strings.find(S);
+ assert(Iter != Strings.end());
+ return Iter->second;
}
diff --git a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
index 511f36d0020a..cfd1c5d3ab0c 100644
--- a/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
+++ b/lib/DebugInfo/CodeView/DebugSubsectionRecord.cpp
@@ -16,14 +16,17 @@ using namespace llvm;
using namespace llvm::codeview;
DebugSubsectionRecord::DebugSubsectionRecord()
- : Kind(DebugSubsectionKind::None) {}
+ : Container(CodeViewContainer::ObjectFile),
+ Kind(DebugSubsectionKind::None) {}
DebugSubsectionRecord::DebugSubsectionRecord(DebugSubsectionKind Kind,
- BinaryStreamRef Data)
- : Kind(Kind), Data(Data) {}
+ BinaryStreamRef Data,
+ CodeViewContainer Container)
+ : Container(Container), Kind(Kind), Data(Data) {}
Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream,
- DebugSubsectionRecord &Info) {
+ DebugSubsectionRecord &Info,
+ CodeViewContainer Container) {
const DebugSubsectionHeader *Header;
BinaryStreamReader Reader(Stream);
if (auto EC = Reader.readObject(Header))
@@ -41,13 +44,14 @@ Error DebugSubsectionRecord::initialize(BinaryStreamRef Stream,
}
if (auto EC = Reader.readStreamRef(Info.Data, Header->Length))
return EC;
+ Info.Container = Container;
Info.Kind = Kind;
return Error::success();
}
uint32_t DebugSubsectionRecord::getRecordLength() const {
uint32_t Result = sizeof(DebugSubsectionHeader) + Data.getLength();
- assert(Result % 4 == 0);
+ assert(Result % alignOf(Container) == 0);
return Result;
}
@@ -56,25 +60,29 @@ DebugSubsectionKind DebugSubsectionRecord::kind() const { return Kind; }
BinaryStreamRef DebugSubsectionRecord::getRecordData() const { return Data; }
DebugSubsectionRecordBuilder::DebugSubsectionRecordBuilder(
- DebugSubsectionKind Kind, DebugSubsection &Frag)
- : Kind(Kind), Frag(Frag) {}
+ std::unique_ptr<DebugSubsection> Subsection, CodeViewContainer Container)
+ : Subsection(std::move(Subsection)), Container(Container) {}
uint32_t DebugSubsectionRecordBuilder::calculateSerializedLength() {
- uint32_t Size = sizeof(DebugSubsectionHeader) +
- alignTo(Frag.calculateSerializedSize(), 4);
+ uint32_t Size =
+ sizeof(DebugSubsectionHeader) +
+ alignTo(Subsection->calculateSerializedSize(), alignOf(Container));
return Size;
}
Error DebugSubsectionRecordBuilder::commit(BinaryStreamWriter &Writer) {
+ assert(Writer.getOffset() % alignOf(Container) == 0 &&
+ "Debug Subsection not properly aligned");
+
DebugSubsectionHeader Header;
- Header.Kind = uint32_t(Kind);
+ Header.Kind = uint32_t(Subsection->kind());
Header.Length = calculateSerializedLength() - sizeof(DebugSubsectionHeader);
if (auto EC = Writer.writeObject(Header))
return EC;
- if (auto EC = Frag.commit(Writer))
+ if (auto EC = Subsection->commit(Writer))
return EC;
- if (auto EC = Writer.padToAlignment(4))
+ if (auto EC = Writer.padToAlignment(alignOf(Container)))
return EC;
return Error::success();
diff --git a/lib/DebugInfo/CodeView/SymbolDumper.cpp b/lib/DebugInfo/CodeView/SymbolDumper.cpp
index 3d49a7198d1a..66045933ce9b 100644
--- a/lib/DebugInfo/CodeView/SymbolDumper.cpp
+++ b/lib/DebugInfo/CodeView/SymbolDumper.cpp
@@ -668,7 +668,7 @@ Error CVSymbolDumperImpl::visitUnknownSymbol(CVSymbol &CVR) {
Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) {
SymbolVisitorCallbackPipeline Pipeline;
- SymbolDeserializer Deserializer(ObjDelegate.get());
+ SymbolDeserializer Deserializer(ObjDelegate.get(), Container);
CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
Pipeline.addCallbackToPipeline(Deserializer);
@@ -679,7 +679,7 @@ Error CVSymbolDumper::dump(CVRecord<SymbolKind> &Record) {
Error CVSymbolDumper::dump(const CVSymbolArray &Symbols) {
SymbolVisitorCallbackPipeline Pipeline;
- SymbolDeserializer Deserializer(ObjDelegate.get());
+ SymbolDeserializer Deserializer(ObjDelegate.get(), Container);
CVSymbolDumperImpl Dumper(Types, ObjDelegate.get(), W, PrintRecordBytes);
Pipeline.addCallbackToPipeline(Deserializer);
diff --git a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
index bb1731465495..ea46841a70f6 100644
--- a/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
+++ b/lib/DebugInfo/CodeView/SymbolRecordMapping.cpp
@@ -40,6 +40,7 @@ Error SymbolRecordMapping::visitSymbolBegin(CVSymbol &Record) {
}
Error SymbolRecordMapping::visitSymbolEnd(CVSymbol &Record) {
+ error(IO.padToAlignment(alignOf(Container)));
error(IO.endRecord());
return Error::success();
}
diff --git a/lib/DebugInfo/CodeView/SymbolSerializer.cpp b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
index 251cc431f52b..9f2d619d1a1c 100644
--- a/lib/DebugInfo/CodeView/SymbolSerializer.cpp
+++ b/lib/DebugInfo/CodeView/SymbolSerializer.cpp
@@ -12,9 +12,11 @@
using namespace llvm;
using namespace llvm::codeview;
-SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator)
- : Storage(Allocator), RecordBuffer(MaxRecordLength), Stream(RecordBuffer, llvm::support::little),
- Writer(Stream), Mapping(Writer) { }
+SymbolSerializer::SymbolSerializer(BumpPtrAllocator &Allocator,
+ CodeViewContainer Container)
+ : Storage(Allocator), RecordBuffer(MaxRecordLength),
+ Stream(RecordBuffer, llvm::support::little), Writer(Stream),
+ Mapping(Writer, Container) {}
Error SymbolSerializer::visitSymbolBegin(CVSymbol &Record) {
assert(!CurrentSymbol.hasValue() && "Already in a symbol mapping!");
diff --git a/lib/DebugInfo/MSF/MappedBlockStream.cpp b/lib/DebugInfo/MSF/MappedBlockStream.cpp
index dfdeb8414212..faf2442bc94b 100644
--- a/lib/DebugInfo/MSF/MappedBlockStream.cpp
+++ b/lib/DebugInfo/MSF/MappedBlockStream.cpp
@@ -47,42 +47,46 @@ static Interval intersect(const Interval &I1, const Interval &I2) {
MappedBlockStream::MappedBlockStream(uint32_t BlockSize,
const MSFStreamLayout &Layout,
- BinaryStreamRef MsfData)
- : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData) {}
-
-std::unique_ptr<MappedBlockStream>
-MappedBlockStream::createStream(uint32_t BlockSize,
- const MSFStreamLayout &Layout,
- BinaryStreamRef MsfData) {
+ BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator)
+ : BlockSize(BlockSize), StreamLayout(Layout), MsfData(MsfData),
+ Allocator(Allocator) {}
+
+std::unique_ptr<MappedBlockStream> MappedBlockStream::createStream(
+ uint32_t BlockSize, const MSFStreamLayout &Layout, BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
- BlockSize, Layout, MsfData);
+ BlockSize, Layout, MsfData, Allocator);
}
std::unique_ptr<MappedBlockStream> MappedBlockStream::createIndexedStream(
- const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex) {
+ const MSFLayout &Layout, BinaryStreamRef MsfData, uint32_t StreamIndex,
+ BumpPtrAllocator &Allocator) {
assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index");
MSFStreamLayout SL;
SL.Blocks = Layout.StreamMap[StreamIndex];
SL.Length = Layout.StreamSizes[StreamIndex];
return llvm::make_unique<MappedBlockStreamImpl<MappedBlockStream>>(
- Layout.SB->BlockSize, SL, MsfData);
+ Layout.SB->BlockSize, SL, MsfData, Allocator);
}
std::unique_ptr<MappedBlockStream>
MappedBlockStream::createDirectoryStream(const MSFLayout &Layout,
- BinaryStreamRef MsfData) {
+ BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
MSFStreamLayout SL;
SL.Blocks = Layout.DirectoryBlocks;
SL.Length = Layout.SB->NumDirectoryBytes;
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
std::unique_ptr<MappedBlockStream>
MappedBlockStream::createFpmStream(const MSFLayout &Layout,
- BinaryStreamRef MsfData) {
+ BinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
MSFStreamLayout SL;
initializeFpmStreamLayout(Layout, SL);
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
@@ -148,7 +152,7 @@ Error MappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
// into it, and return an ArrayRef to that. Do not touch existing pool
// allocations, as existing clients may be holding a pointer which must
// not be invalidated.
- uint8_t *WriteBuffer = static_cast<uint8_t *>(Pool.Allocate(Size, 8));
+ uint8_t *WriteBuffer = static_cast<uint8_t *>(Allocator.Allocate(Size, 8));
if (auto EC = readBytes(Offset, MutableArrayRef<uint8_t>(WriteBuffer, Size)))
return EC;
@@ -269,10 +273,6 @@ Error MappedBlockStream::readBytes(uint32_t Offset,
return Error::success();
}
-uint32_t MappedBlockStream::getNumBytesCopied() const {
- return static_cast<uint32_t>(Pool.getBytesAllocated());
-}
-
void MappedBlockStream::invalidateCache() { CacheMap.shrink_and_clear(); }
void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
@@ -313,43 +313,48 @@ void MappedBlockStream::fixCacheAfterWrite(uint32_t Offset,
WritableMappedBlockStream::WritableMappedBlockStream(
uint32_t BlockSize, const MSFStreamLayout &Layout,
- WritableBinaryStreamRef MsfData)
- : ReadInterface(BlockSize, Layout, MsfData), WriteInterface(MsfData) {}
+ WritableBinaryStreamRef MsfData, BumpPtrAllocator &Allocator)
+ : ReadInterface(BlockSize, Layout, MsfData, Allocator),
+ WriteInterface(MsfData) {}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createStream(uint32_t BlockSize,
const MSFStreamLayout &Layout,
- WritableBinaryStreamRef MsfData) {
+ WritableBinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
return llvm::make_unique<MappedBlockStreamImpl<WritableMappedBlockStream>>(
- BlockSize, Layout, MsfData);
+ BlockSize, Layout, MsfData, Allocator);
}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createIndexedStream(const MSFLayout &Layout,
WritableBinaryStreamRef MsfData,
- uint32_t StreamIndex) {
+ uint32_t StreamIndex,
+ BumpPtrAllocator &Allocator) {
assert(StreamIndex < Layout.StreamMap.size() && "Invalid stream index");
MSFStreamLayout SL;
SL.Blocks = Layout.StreamMap[StreamIndex];
SL.Length = Layout.StreamSizes[StreamIndex];
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createDirectoryStream(
- const MSFLayout &Layout, WritableBinaryStreamRef MsfData) {
+ const MSFLayout &Layout, WritableBinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
MSFStreamLayout SL;
SL.Blocks = Layout.DirectoryBlocks;
SL.Length = Layout.SB->NumDirectoryBytes;
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
std::unique_ptr<WritableMappedBlockStream>
WritableMappedBlockStream::createFpmStream(const MSFLayout &Layout,
- WritableBinaryStreamRef MsfData) {
+ WritableBinaryStreamRef MsfData,
+ BumpPtrAllocator &Allocator) {
MSFStreamLayout SL;
initializeFpmStreamLayout(Layout, SL);
- return createStream(Layout.SB->BlockSize, SL, MsfData);
+ return createStream(Layout.SB->BlockSize, SL, MsfData, Allocator);
}
Error WritableMappedBlockStream::readBytes(uint32_t Offset, uint32_t Size,
diff --git a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
index b28ec2ff33ac..22c2ef31bd71 100644
--- a/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiModuleDescriptorBuilder.cpp
@@ -66,7 +66,11 @@ void DbiModuleDescriptorBuilder::setObjFileName(StringRef Name) {
void DbiModuleDescriptorBuilder::addSymbol(CVSymbol Symbol) {
Symbols.push_back(Symbol);
- SymbolByteSize += Symbol.data().size();
+ // Symbols written to a PDB file are required to be 4 byte aligned. The same
+ // is not true of object files.
+ assert(Symbol.length() % alignOf(CodeViewContainer::Pdb) == 0 &&
+ "Invalid Symbol alignment!");
+ SymbolByteSize += Symbol.length();
}
void DbiModuleDescriptorBuilder::addSourceFile(StringRef Path) {
@@ -140,7 +144,7 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
if (Layout.ModDiStream != kInvalidStreamIndex) {
auto NS = WritableMappedBlockStream::createIndexedStream(
- MsfLayout, MsfBuffer, Layout.ModDiStream);
+ MsfLayout, MsfBuffer, Layout.ModDiStream, MSF.getAllocator());
WritableBinaryStreamRef Ref(*NS);
BinaryStreamWriter SymbolWriter(Ref);
// Write the symbols.
@@ -153,7 +157,8 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
if (auto EC = SymbolWriter.writeStreamRef(RecordsRef))
return EC;
// TODO: Write C11 Line data
-
+ assert(SymbolWriter.getOffset() % alignOf(CodeViewContainer::Pdb) == 0 &&
+ "Invalid debug section alignment!");
for (const auto &Builder : C13Builders) {
assert(Builder && "Empty C13 Fragment Builder!");
if (auto EC = Builder->commit(SymbolWriter))
@@ -169,42 +174,9 @@ Error DbiModuleDescriptorBuilder::commit(BinaryStreamWriter &ModiWriter,
return Error::success();
}
-void DbiModuleDescriptorBuilder::addC13Fragment(
- std::unique_ptr<DebugLinesSubsection> Lines) {
- DebugLinesSubsection &Frag = *Lines;
-
- // File Checksums have to come first, so push an empty entry on if this
- // is the first.
- if (C13Builders.empty())
- C13Builders.push_back(nullptr);
-
- this->LineInfo.push_back(std::move(Lines));
- C13Builders.push_back(
- llvm::make_unique<DebugSubsectionRecordBuilder>(Frag.kind(), Frag));
-}
-
-void DbiModuleDescriptorBuilder::addC13Fragment(
- std::unique_ptr<codeview::DebugInlineeLinesSubsection> Inlinees) {
- DebugInlineeLinesSubsection &Frag = *Inlinees;
-
- // File Checksums have to come first, so push an empty entry on if this
- // is the first.
- if (C13Builders.empty())
- C13Builders.push_back(nullptr);
-
- this->Inlinees.push_back(std::move(Inlinees));
- C13Builders.push_back(
- llvm::make_unique<DebugSubsectionRecordBuilder>(Frag.kind(), Frag));
-}
-
-void DbiModuleDescriptorBuilder::setC13FileChecksums(
- std::unique_ptr<DebugChecksumsSubsection> Checksums) {
- assert(!ChecksumInfo && "Can't have more than one checksum info!");
-
- if (C13Builders.empty())
- C13Builders.push_back(nullptr);
-
- ChecksumInfo = std::move(Checksums);
- C13Builders[0] = llvm::make_unique<DebugSubsectionRecordBuilder>(
- ChecksumInfo->kind(), *ChecksumInfo);
+void DbiModuleDescriptorBuilder::addDebugSubsection(
+ std::unique_ptr<DebugSubsection> Subsection) {
+ assert(Subsection);
+ C13Builders.push_back(llvm::make_unique<DebugSubsectionRecordBuilder>(
+ std::move(Subsection), CodeViewContainer::Pdb));
}
diff --git a/lib/DebugInfo/PDB/Native/DbiStream.cpp b/lib/DebugInfo/PDB/Native/DbiStream.cpp
index 2f4fb6cc295d..320b11dc5cab 100644
--- a/lib/DebugInfo/PDB/Native/DbiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStream.cpp
@@ -252,7 +252,7 @@ Error DbiStream::initializeSectionHeadersData() {
return make_error<RawError>(raw_error_code::no_stream);
auto SHS = MappedBlockStream::createIndexedStream(
- Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum);
+ Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator());
size_t StreamLen = SHS->getLength();
if (StreamLen % sizeof(object::coff_section))
@@ -284,7 +284,7 @@ Error DbiStream::initializeFpoRecords() {
return make_error<RawError>(raw_error_code::no_stream);
auto FS = MappedBlockStream::createIndexedStream(
- Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum);
+ Pdb.getMsfLayout(), Pdb.getMsfBuffer(), StreamNum, Pdb.getAllocator());
size_t StreamLen = FS->getLength();
if (StreamLen % sizeof(object::FpoData))
diff --git a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
index 23c7456d7772..55c20fdb9af6 100644
--- a/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/DbiStreamBuilder.cpp
@@ -357,8 +357,8 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (auto EC = finalize())
return EC;
- auto DbiS = WritableMappedBlockStream::createIndexedStream(Layout, MsfBuffer,
- StreamDBI);
+ auto DbiS = WritableMappedBlockStream::createIndexedStream(
+ Layout, MsfBuffer, StreamDBI, Allocator);
BinaryStreamWriter Writer(*DbiS);
if (auto EC = Writer.writeObject(*Header))
@@ -396,7 +396,7 @@ Error DbiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (Stream.StreamNumber == kInvalidStreamIndex)
continue;
auto WritableStream = WritableMappedBlockStream::createIndexedStream(
- Layout, MsfBuffer, Stream.StreamNumber);
+ Layout, MsfBuffer, Stream.StreamNumber, Allocator);
BinaryStreamWriter DbgStreamWriter(*WritableStream);
if (auto EC = DbgStreamWriter.writeArray(Stream.Data))
return EC;
diff --git a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
index f019d410328a..707128f7efd4 100644
--- a/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/InfoStreamBuilder.cpp
@@ -50,8 +50,8 @@ Error InfoStreamBuilder::finalizeMsfLayout() {
Error InfoStreamBuilder::commit(const msf::MSFLayout &Layout,
WritableBinaryStreamRef Buffer) const {
- auto InfoS =
- WritableMappedBlockStream::createIndexedStream(Layout, Buffer, StreamPDB);
+ auto InfoS = WritableMappedBlockStream::createIndexedStream(
+ Layout, Buffer, StreamPDB, Msf.getAllocator());
BinaryStreamWriter Writer(*InfoS);
InfoStreamHeader H;
diff --git a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
index d7a203746a0d..c4ff30011a17 100644
--- a/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
+++ b/lib/DebugInfo/PDB/Native/ModuleDebugStream.cpp
@@ -55,9 +55,9 @@ Error ModuleDebugStreamRef::reload() {
if (auto EC = Reader.readStreamRef(C13LinesSubstream, C13Size))
return EC;
- BinaryStreamReader LineReader(C13LinesSubstream);
- if (auto EC =
- LineReader.readArray(LinesAndChecksums, LineReader.bytesRemaining()))
+ BinaryStreamReader SubsectionsReader(C13LinesSubstream);
+ if (auto EC = SubsectionsReader.readArray(Subsections,
+ SubsectionsReader.bytesRemaining()))
return EC;
uint32_t GlobalRefsSize;
@@ -77,13 +77,27 @@ ModuleDebugStreamRef::symbols(bool *HadError) const {
return make_range(SymbolsSubstream.begin(HadError), SymbolsSubstream.end());
}
-llvm::iterator_range<ModuleDebugStreamRef::LinesAndChecksumsIterator>
-ModuleDebugStreamRef::linesAndChecksums() const {
- return make_range(LinesAndChecksums.begin(), LinesAndChecksums.end());
+llvm::iterator_range<ModuleDebugStreamRef::DebugSubsectionIterator>
+ModuleDebugStreamRef::subsections() const {
+ return make_range(Subsections.begin(), Subsections.end());
}
-bool ModuleDebugStreamRef::hasLineInfo() const {
+bool ModuleDebugStreamRef::hasDebugSubsections() const {
return C13LinesSubstream.getLength() > 0;
}
Error ModuleDebugStreamRef::commit() { return Error::success(); }
+
+Expected<codeview::DebugChecksumsSubsectionRef>
+ModuleDebugStreamRef::findChecksumsSubsection() const {
+ for (const auto &SS : subsections()) {
+ if (SS.kind() != DebugSubsectionKind::FileChecksums)
+ continue;
+
+ codeview::DebugChecksumsSubsectionRef Result;
+ if (auto EC = Result.initialize(SS.getRecordData()))
+ return std::move(EC);
+ return Result;
+ }
+ return make_error<RawError>(raw_error_code::no_entry);
+}
diff --git a/lib/DebugInfo/PDB/Native/PDBFile.cpp b/lib/DebugInfo/PDB/Native/PDBFile.cpp
index 859295d2c7d3..1254e23c73eb 100644
--- a/lib/DebugInfo/PDB/Native/PDBFile.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFile.cpp
@@ -146,7 +146,8 @@ Error PDBFile::parseFileHeaders() {
// at getBlockSize() intervals, so we have to be compatible.
// See the function fpmPn() for more information:
// https://github.com/Microsoft/microsoft-pdb/blob/master/PDB/msf/msf.cpp#L489
- auto FpmStream = MappedBlockStream::createFpmStream(ContainerLayout, *Buffer);
+ auto FpmStream =
+ MappedBlockStream::createFpmStream(ContainerLayout, *Buffer, Allocator);
BinaryStreamReader FpmReader(*FpmStream);
ArrayRef<uint8_t> FpmBytes;
if (auto EC = FpmReader.readBytes(FpmBytes,
@@ -184,7 +185,8 @@ Error PDBFile::parseStreamData() {
// is exactly what we are attempting to parse. By specifying a custom
// subclass of IPDBStreamData which only accesses the fields that have already
// been parsed, we can avoid this and reuse MappedBlockStream.
- auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer);
+ auto DS = MappedBlockStream::createDirectoryStream(ContainerLayout, *Buffer,
+ Allocator);
BinaryStreamReader Reader(*DS);
if (auto EC = Reader.readInteger(NumStreams))
return EC;
@@ -407,5 +409,6 @@ PDBFile::safelyCreateIndexedStream(const MSFLayout &Layout,
uint32_t StreamIndex) const {
if (StreamIndex >= getNumStreams())
return make_error<RawError>(raw_error_code::no_stream);
- return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex);
+ return MappedBlockStream::createIndexedStream(Layout, MsfData, StreamIndex,
+ Allocator);
}
diff --git a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
index c6568029ec55..2c6465e6fb2a 100644
--- a/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBFileBuilder.cpp
@@ -140,8 +140,8 @@ Error PDBFileBuilder::commit(StringRef Filename) {
if (auto EC = Writer.writeArray(Layout.DirectoryBlocks))
return EC;
- auto DirStream =
- WritableMappedBlockStream::createDirectoryStream(Layout, Buffer);
+ auto DirStream = WritableMappedBlockStream::createDirectoryStream(
+ Layout, Buffer, Allocator);
BinaryStreamWriter DW(*DirStream);
if (auto EC = DW.writeInteger<uint32_t>(Layout.StreamSizes.size()))
return EC;
@@ -158,8 +158,8 @@ Error PDBFileBuilder::commit(StringRef Filename) {
if (!ExpectedSN)
return ExpectedSN.takeError();
- auto NS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
- *ExpectedSN);
+ auto NS = WritableMappedBlockStream::createIndexedStream(
+ Layout, Buffer, *ExpectedSN, Allocator);
BinaryStreamWriter NSWriter(*NS);
if (auto EC = Strings.commit(NSWriter))
return EC;
diff --git a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
index e84573fe07b8..6013c342cf02 100644
--- a/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
+++ b/lib/DebugInfo/PDB/Native/PDBStringTable.cpp
@@ -56,6 +56,10 @@ Error PDBStringTable::readStrings(BinaryStreamReader &Reader) {
return Error::success();
}
+codeview::DebugStringTableSubsectionRef PDBStringTable::getStringTable() const {
+ return Strings;
+}
+
Error PDBStringTable::readHashTable(BinaryStreamReader &Reader) {
const support::ulittle32_t *HashCount;
if (auto EC = Reader.readObject(HashCount))
diff --git a/lib/DebugInfo/PDB/Native/TpiStream.cpp b/lib/DebugInfo/PDB/Native/TpiStream.cpp
index 623afb371b50..67c803d3124e 100644
--- a/lib/DebugInfo/PDB/Native/TpiStream.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStream.cpp
@@ -32,8 +32,7 @@ using namespace llvm::support;
using namespace llvm::msf;
using namespace llvm::pdb;
-TpiStream::TpiStream(const PDBFile &File,
- std::unique_ptr<MappedBlockStream> Stream)
+TpiStream::TpiStream(PDBFile &File, std::unique_ptr<MappedBlockStream> Stream)
: Pdb(File), Stream(std::move(Stream)) {}
TpiStream::~TpiStream() = default;
@@ -77,7 +76,8 @@ Error TpiStream::reload() {
"Invalid TPI hash stream index.");
auto HS = MappedBlockStream::createIndexedStream(
- Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex);
+ Pdb.getMsfLayout(), Pdb.getMsfBuffer(), Header->HashStreamIndex,
+ Pdb.getAllocator());
BinaryStreamReader HSR(*HS);
// There should be a hash value for every type record, or no hashes at all.
diff --git a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
index 20456cc97823..9e943c7f114d 100644
--- a/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
+++ b/lib/DebugInfo/PDB/Native/TpiStreamBuilder.cpp
@@ -147,8 +147,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
if (auto EC = finalize())
return EC;
- auto InfoS =
- WritableMappedBlockStream::createIndexedStream(Layout, Buffer, Idx);
+ auto InfoS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
+ Idx, Allocator);
BinaryStreamWriter Writer(*InfoS);
if (auto EC = Writer.writeObject(*Header))
@@ -159,8 +159,8 @@ Error TpiStreamBuilder::commit(const msf::MSFLayout &Layout,
return EC;
if (HashStreamIndex != kInvalidStreamIndex) {
- auto HVS = WritableMappedBlockStream::createIndexedStream(Layout, Buffer,
- HashStreamIndex);
+ auto HVS = WritableMappedBlockStream::createIndexedStream(
+ Layout, Buffer, HashStreamIndex, Allocator);
BinaryStreamWriter HW(*HVS);
if (HashValueStream) {
if (auto EC = HW.writeStreamRef(*HashValueStream))
diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp
index 7e6f9a7804b9..7754ac03b43d 100644
--- a/lib/IR/DIBuilder.cpp
+++ b/lib/IR/DIBuilder.cpp
@@ -39,6 +39,21 @@ void DIBuilder::trackIfUnresolved(MDNode *N) {
UnresolvedNodes.emplace_back(N);
}
+void DIBuilder::finalizeSubprogram(DISubprogram *SP) {
+ MDTuple *Temp = SP->getVariables().get();
+ if (!Temp || !Temp->isTemporary())
+ return;
+
+ SmallVector<Metadata *, 4> Variables;
+
+ auto PV = PreservedVariables.find(SP);
+ if (PV != PreservedVariables.end())
+ Variables.append(PV->second.begin(), PV->second.end());
+
+ DINodeArray AV = getOrCreateArray(Variables);
+ TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
+}
+
void DIBuilder::finalize() {
if (!CUNode) {
assert(!AllowUnresolvedNodes &&
@@ -62,25 +77,11 @@ void DIBuilder::finalize() {
CUNode->replaceRetainedTypes(MDTuple::get(VMContext, RetainValues));
DISubprogramArray SPs = MDTuple::get(VMContext, AllSubprograms);
- auto resolveVariables = [&](DISubprogram *SP) {
- MDTuple *Temp = SP->getVariables().get();
- if (!Temp)
- return;
-
- SmallVector<Metadata *, 4> Variables;
-
- auto PV = PreservedVariables.find(SP);
- if (PV != PreservedVariables.end())
- Variables.append(PV->second.begin(), PV->second.end());
-
- DINodeArray AV = getOrCreateArray(Variables);
- TempMDTuple(Temp)->replaceAllUsesWith(AV.get());
- };
for (auto *SP : SPs)
- resolveVariables(SP);
+ finalizeSubprogram(SP);
for (auto *N : RetainValues)
if (auto *SP = dyn_cast<DISubprogram>(N))
- resolveVariables(SP);
+ finalizeSubprogram(SP);
if (!AllGVs.empty())
CUNode->replaceGlobalVariables(MDTuple::get(VMContext, AllGVs));
diff --git a/lib/IR/DebugLoc.cpp b/lib/IR/DebugLoc.cpp
index b7e3f0c6779e..0485fece7c42 100644
--- a/lib/IR/DebugLoc.cpp
+++ b/lib/IR/DebugLoc.cpp
@@ -99,87 +99,6 @@ DebugLoc DebugLoc::appendInlinedAt(DebugLoc DL, DILocation *InlinedAt,
return Last;
}
-/// Reparent \c Scope from \c OrigSP to \c NewSP.
-static DIScope *reparentScope(LLVMContext &Ctx, DIScope *Scope,
- DISubprogram *OrigSP, DISubprogram *NewSP,
- DenseMap<const MDNode *, MDNode *> &Cache) {
- SmallVector<DIScope *, 3> ScopeChain;
- DIScope *Last = NewSP;
- DIScope *CurScope = Scope;
- do {
- if (auto *SP = dyn_cast<DISubprogram>(CurScope)) {
- // Don't rewrite this scope chain if it doesn't lead to the replaced SP.
- if (SP != OrigSP)
- return Scope;
- Cache.insert({OrigSP, NewSP});
- break;
- }
- if (auto *Found = Cache[CurScope]) {
- Last = cast<DIScope>(Found);
- break;
- }
- ScopeChain.push_back(CurScope);
- } while ((CurScope = CurScope->getScope().resolve()));
-
- // Starting from the top, rebuild the nodes to point to the new inlined-at
- // location (then rebuilding the rest of the chain behind it) and update the
- // map of already-constructed inlined-at nodes.
- for (const DIScope *MD : reverse(ScopeChain)) {
- if (auto *LB = dyn_cast<DILexicalBlock>(MD))
- Cache[MD] = Last = DILexicalBlock::getDistinct(
- Ctx, Last, LB->getFile(), LB->getLine(), LB->getColumn());
- else if (auto *LB = dyn_cast<DILexicalBlockFile>(MD))
- Cache[MD] = Last = DILexicalBlockFile::getDistinct(
- Ctx, Last, LB->getFile(), LB->getDiscriminator());
- else
- llvm_unreachable("illegal parent scope");
- }
- return Last;
-}
-
-void DebugLoc::reparentDebugInfo(Instruction &I, DISubprogram *OrigSP,
- DISubprogram *NewSP,
- DenseMap<const MDNode *, MDNode *> &Cache) {
- auto DL = I.getDebugLoc();
- if (!OrigSP || !NewSP || OrigSP == NewSP || !DL)
- return;
-
- // Reparent the debug location.
- auto &Ctx = I.getContext();
- DILocation *InlinedAt = DL->getInlinedAt();
- if (InlinedAt) {
- while (auto *IA = InlinedAt->getInlinedAt())
- InlinedAt = IA;
- auto NewScope =
- reparentScope(Ctx, InlinedAt->getScope(), OrigSP, NewSP, Cache);
- InlinedAt =
- DebugLoc::get(InlinedAt->getLine(), InlinedAt->getColumn(), NewScope);
- }
- I.setDebugLoc(
- DebugLoc::get(DL.getLine(), DL.getCol(),
- reparentScope(Ctx, DL->getScope(), OrigSP, NewSP, Cache),
- DebugLoc::appendInlinedAt(DL, InlinedAt, Ctx, Cache,
- ReplaceLastInlinedAt)));
-
- // Fix up debug variables to point to NewSP.
- auto reparentVar = [&](DILocalVariable *Var) {
- return DILocalVariable::get(
- Ctx,
- cast<DILocalScope>(
- reparentScope(Ctx, Var->getScope(), OrigSP, NewSP, Cache)),
- Var->getName(), Var->getFile(), Var->getLine(), Var->getType(),
- Var->getArg(), Var->getFlags(), Var->getAlignInBits());
- };
- if (auto *DbgValue = dyn_cast<DbgValueInst>(&I)) {
- auto *Var = DbgValue->getVariable();
- I.setOperand(2, MetadataAsValue::get(Ctx, reparentVar(Var)));
- } else if (auto *DbgDeclare = dyn_cast<DbgDeclareInst>(&I)) {
- auto *Var = DbgDeclare->getVariable();
- I.setOperand(1, MetadataAsValue::get(Ctx, reparentVar(Var)));
- }
-}
-
-
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void DebugLoc::dump() const {
if (!Loc)
diff --git a/lib/IR/OptBisect.cpp b/lib/IR/OptBisect.cpp
index b670c817569a..a03a6fb62237 100644
--- a/lib/IR/OptBisect.cpp
+++ b/lib/IR/OptBisect.cpp
@@ -16,6 +16,7 @@
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/OptBisect.h"
#include "llvm/Pass.h"
@@ -53,13 +54,20 @@ static std::string getDescription(const BasicBlock &BB) {
}
static std::string getDescription(const Loop &L) {
- // FIXME: I'd like to be able to provide a better description here, but
- // calling L->getHeader() would introduce a new dependency on the
- // LLVMCore library.
+ // FIXME: Move into LoopInfo so we can get a better description
+ // (and avoid a circular dependency between IR and Analysis).
return "loop";
}
+static std::string getDescription(const Region &R) {
+ // FIXME: Move into RegionInfo so we can get a better description
+ // (and avoid a circular dependency between IR and Analysis).
+ return "region";
+}
+
static std::string getDescription(const CallGraphSCC &SCC) {
+ // FIXME: Move into CallGraphSCCPass to avoid circular dependency between
+ // IR and Analysis.
std::string Desc = "SCC (";
bool First = true;
for (CallGraphNode *CGN : SCC) {
@@ -83,6 +91,7 @@ template bool OptBisect::shouldRunPass(const Pass *, const Function &);
template bool OptBisect::shouldRunPass(const Pass *, const BasicBlock &);
template bool OptBisect::shouldRunPass(const Pass *, const Loop &);
template bool OptBisect::shouldRunPass(const Pass *, const CallGraphSCC &);
+template bool OptBisect::shouldRunPass(const Pass *, const Region &);
template <class UnitT>
bool OptBisect::shouldRunPass(const Pass *P, const UnitT &U) {
diff --git a/lib/LTO/LTO.cpp b/lib/LTO/LTO.cpp
index 9efc095f9fcf..92145aaf667a 100644
--- a/lib/LTO/LTO.cpp
+++ b/lib/LTO/LTO.cpp
@@ -122,6 +122,7 @@ static void computeCacheKey(
AddUnsigned(Conf.CGOptLevel);
AddUnsigned(Conf.CGFileType);
AddUnsigned(Conf.OptLevel);
+ AddUnsigned(Conf.UseNewPM);
AddString(Conf.OptPipeline);
AddString(Conf.AAPipeline);
AddString(Conf.OverrideTriple);
@@ -621,6 +622,19 @@ unsigned LTO::getMaxTasks() const {
}
Error LTO::run(AddStreamFn AddStream, NativeObjectCache Cache) {
+ // Compute "dead" symbols, we don't want to import/export these!
+ DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
+ for (auto &Res : GlobalResolutions) {
+ if (Res.second.VisibleOutsideThinLTO &&
+ // IRName will be defined if we have seen the prevailing copy of
+ // this value. If not, no need to preserve any ThinLTO copies.
+ !Res.second.IRName.empty())
+ GUIDPreservedSymbols.insert(GlobalValue::getGUID(
+ GlobalValue::dropLLVMManglingEscape(Res.second.IRName)));
+ }
+
+ computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols);
+
// Save the status of having a regularLTO combined module, as
// this is needed for generating the ThinLTO Task ID, and
// the CombinedModule will be moved at the end of runRegularLTO.
@@ -930,6 +944,17 @@ ThinBackend lto::createWriteIndexesThinBackend(std::string OldPrefix,
};
}
+static bool IsLiveByGUID(const ModuleSummaryIndex &Index,
+ GlobalValue::GUID GUID) {
+ auto VI = Index.getValueInfo(GUID);
+ if (!VI)
+ return false;
+ for (auto &I : VI.getSummaryList())
+ if (Index.isGlobalValueLive(I.get()))
+ return true;
+ return false;
+}
+
Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
bool HasRegularLTO) {
if (ThinLTO.ModuleMap.empty())
@@ -962,22 +987,8 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
if (Conf.OptLevel > 0) {
- // Compute "dead" symbols, we don't want to import/export these!
- DenseSet<GlobalValue::GUID> GUIDPreservedSymbols;
- for (auto &Res : GlobalResolutions) {
- if (Res.second.VisibleOutsideThinLTO &&
- // IRName will be defined if we have seen the prevailing copy of
- // this value. If not, no need to preserve any ThinLTO copies.
- !Res.second.IRName.empty())
- GUIDPreservedSymbols.insert(GlobalValue::getGUID(
- GlobalValue::dropLLVMManglingEscape(Res.second.IRName)));
- }
-
- auto DeadSymbols =
- computeDeadSymbols(ThinLTO.CombinedIndex, GUIDPreservedSymbols);
-
ComputeCrossModuleImport(ThinLTO.CombinedIndex, ModuleToDefinedGVSummaries,
- ImportLists, ExportLists, &DeadSymbols);
+ ImportLists, ExportLists);
std::set<GlobalValue::GUID> ExportedGUIDs;
for (auto &Res : GlobalResolutions) {
@@ -992,7 +1003,7 @@ Error LTO::runThinLTO(AddStreamFn AddStream, NativeObjectCache Cache,
auto GUID = GlobalValue::getGUID(
GlobalValue::dropLLVMManglingEscape(Res.second.IRName));
// Mark exported unless index-based analysis determined it to be dead.
- if (!DeadSymbols.count(GUID))
+ if (IsLiveByGUID(ThinLTO.CombinedIndex, GUID))
ExportedGUIDs.insert(GUID);
}
diff --git a/lib/LTO/LTOBackend.cpp b/lib/LTO/LTOBackend.cpp
index f9c41f5c9744..3f72e446cdf2 100644
--- a/lib/LTO/LTOBackend.cpp
+++ b/lib/LTO/LTOBackend.cpp
@@ -42,11 +42,6 @@
using namespace llvm;
using namespace lto;
-static cl::opt<bool>
- LTOUseNewPM("lto-use-new-pm",
- cl::desc("Run LTO passes using the new pass manager"),
- cl::init(false), cl::Hidden);
-
LLVM_ATTRIBUTE_NORETURN static void reportOpenError(StringRef Path, Twine Msg) {
errs() << "failed to open " << Path << ": " << Msg << '\n';
errs().flush();
@@ -266,7 +261,7 @@ bool opt(Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
if (!Conf.OptPipeline.empty())
runNewPMCustomPasses(Mod, TM, Conf.OptPipeline, Conf.AAPipeline,
Conf.DisableVerify);
- else if (LTOUseNewPM)
+ else if (Conf.UseNewPM)
runNewPMPasses(Mod, TM, Conf.OptLevel, IsThinLTO);
else
runOldPMPasses(Conf, Mod, TM, IsThinLTO, ExportSummary, ImportSummary);
diff --git a/lib/LTO/ThinLTOCodeGenerator.cpp b/lib/LTO/ThinLTOCodeGenerator.cpp
index ca3fc60f9501..6b221a347c17 100644
--- a/lib/LTO/ThinLTOCodeGenerator.cpp
+++ b/lib/LTO/ThinLTOCodeGenerator.cpp
@@ -628,13 +628,13 @@ void ThinLTOCodeGenerator::promote(Module &TheModule,
PreservedSymbols, Triple(TheModule.getTargetTriple()));
// Compute "dead" symbols, we don't want to import/export these!
- auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+ computeDeadSymbols(Index, GUIDPreservedSymbols);
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists, &DeadSymbols);
+ ExportLists);
// Resolve LinkOnce/Weak symbols.
StringMap<std::map<GlobalValue::GUID, GlobalValue::LinkageTypes>> ResolvedODR;
@@ -673,13 +673,13 @@ void ThinLTOCodeGenerator::crossModuleImport(Module &TheModule,
PreservedSymbols, Triple(TheModule.getTargetTriple()));
// Compute "dead" symbols, we don't want to import/export these!
- auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+ computeDeadSymbols(Index, GUIDPreservedSymbols);
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists, &DeadSymbols);
+ ExportLists);
auto &ImportList = ImportLists[TheModule.getModuleIdentifier()];
crossImportIntoModule(TheModule, Index, ModuleMap, ImportList);
@@ -750,13 +750,13 @@ void ThinLTOCodeGenerator::internalize(Module &TheModule,
Index.collectDefinedGVSummariesPerModule(ModuleToDefinedGVSummaries);
// Compute "dead" symbols, we don't want to import/export these!
- auto DeadSymbols = computeDeadSymbols(Index, GUIDPreservedSymbols);
+ computeDeadSymbols(Index, GUIDPreservedSymbols);
// Generate import/export list
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists, &DeadSymbols);
+ ExportLists);
auto &ExportList = ExportLists[ModuleIdentifier];
// Be friendly and don't nuke totally the module when the client didn't
@@ -902,14 +902,14 @@ void ThinLTOCodeGenerator::run() {
computeGUIDPreservedSymbols(PreservedSymbols, TMBuilder.TheTriple);
// Compute "dead" symbols, we don't want to import/export these!
- auto DeadSymbols = computeDeadSymbols(*Index, GUIDPreservedSymbols);
+ computeDeadSymbols(*Index, GUIDPreservedSymbols);
// Collect the import/export lists for all modules from the call-graph in the
// combined index.
StringMap<FunctionImporter::ImportMapTy> ImportLists(ModuleCount);
StringMap<FunctionImporter::ExportSetTy> ExportLists(ModuleCount);
ComputeCrossModuleImport(*Index, ModuleToDefinedGVSummaries, ImportLists,
- ExportLists, &DeadSymbols);
+ ExportLists);
// We use a std::map here to be able to have a defined ordering when
// producing a hash for the cache entry.
diff --git a/lib/MC/WasmObjectWriter.cpp b/lib/MC/WasmObjectWriter.cpp
index 8c3df36cfb48..9b2031f05043 100644
--- a/lib/MC/WasmObjectWriter.cpp
+++ b/lib/MC/WasmObjectWriter.cpp
@@ -40,6 +40,7 @@ using namespace llvm;
#define DEBUG_TYPE "reloc-info"
namespace {
+
// For patching purposes, we need to remember where each section starts, both
// for patching up the section size field, and for patching up references to
// locations within the section.
@@ -50,6 +51,82 @@ struct SectionBookkeeping {
uint64_t ContentsOffset;
};
+// The signature of a wasm function, in a struct capable of being used as a
+// DenseMap key.
+struct WasmFunctionType {
+ // Support empty and tombstone instances, needed by DenseMap.
+ enum { Plain, Empty, Tombstone } State;
+
+ // The return types of the function.
+ SmallVector<wasm::ValType, 1> Returns;
+
+ // The parameter types of the function.
+ SmallVector<wasm::ValType, 4> Params;
+
+ WasmFunctionType() : State(Plain) {}
+
+ bool operator==(const WasmFunctionType &Other) const {
+ return State == Other.State && Returns == Other.Returns &&
+ Params == Other.Params;
+ }
+};
+
+// Traits for using WasmFunctionType in a DenseMap.
+struct WasmFunctionTypeDenseMapInfo {
+ static WasmFunctionType getEmptyKey() {
+ WasmFunctionType FuncTy;
+ FuncTy.State = WasmFunctionType::Empty;
+ return FuncTy;
+ }
+ static WasmFunctionType getTombstoneKey() {
+ WasmFunctionType FuncTy;
+ FuncTy.State = WasmFunctionType::Tombstone;
+ return FuncTy;
+ }
+ static unsigned getHashValue(const WasmFunctionType &FuncTy) {
+ uintptr_t Value = FuncTy.State;
+ for (wasm::ValType Ret : FuncTy.Returns)
+ Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret));
+ for (wasm::ValType Param : FuncTy.Params)
+ Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param));
+ return Value;
+ }
+ static bool isEqual(const WasmFunctionType &LHS,
+ const WasmFunctionType &RHS) {
+ return LHS == RHS;
+ }
+};
+
+// A wasm import to be written into the import section.
+struct WasmImport {
+ StringRef ModuleName;
+ StringRef FieldName;
+ unsigned Kind;
+ int32_t Type;
+};
+
+// A wasm function to be written into the function section.
+struct WasmFunction {
+ int32_t Type;
+ const MCSymbolWasm *Sym;
+};
+
+// A wasm export to be written into the export section.
+struct WasmExport {
+ StringRef FieldName;
+ unsigned Kind;
+ uint32_t Index;
+};
+
+// A wasm global to be written into the global section.
+struct WasmGlobal {
+ wasm::ValType Type;
+ bool IsMutable;
+ bool HasImport;
+ uint64_t InitialValue;
+ uint32_t ImportIndex;
+};
+
class WasmObjectWriter : public MCObjectWriter {
/// Helper struct for containing some precomputed information on symbols.
struct WasmSymbolData {
@@ -91,18 +168,10 @@ public:
: MCObjectWriter(OS, /*IsLittleEndian=*/true), TargetObjectWriter(MOTW) {}
private:
- void reset() override {
- MCObjectWriter::reset();
- }
-
~WasmObjectWriter() override;
void writeHeader(const MCAssembler &Asm);
- void writeValueType(wasm::ValType Ty) {
- encodeSLEB128(int32_t(Ty), getStream());
- }
-
void recordRelocation(MCAssembler &Asm, const MCAsmLayout &Layout,
const MCFragment *Fragment, const MCFixup &Fixup,
MCValue Target, bool &IsPCRel,
@@ -112,7 +181,37 @@ private:
const MCAsmLayout &Layout) override;
void writeObject(MCAssembler &Asm, const MCAsmLayout &Layout) override;
+
+ void writeValueType(wasm::ValType Ty) {
+ encodeSLEB128(int32_t(Ty), getStream());
+ }
+
+ void writeTypeSection(const SmallVector<WasmFunctionType, 4> &FunctionTypes);
+ void writeImportSection(const SmallVector<WasmImport, 4> &Imports);
+ void writeFunctionSection(const SmallVector<WasmFunction, 4> &Functions);
+ void writeTableSection(const SmallVector<uint32_t, 4> &TableElems);
+ void writeMemorySection(const SmallVector<char, 0> &DataBytes);
+ void writeGlobalSection(const SmallVector<WasmGlobal, 4> &Globals);
+ void writeExportSection(const SmallVector<WasmExport, 4> &Exports);
+ void writeElemSection(const SmallVector<uint32_t, 4> &TableElems);
+ void writeCodeSection(const MCAssembler &Asm, const MCAsmLayout &Layout,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+ const SmallVector<WasmFunction, 4> &Functions);
+ uint64_t
+ writeDataSection(const SmallVector<char, 0> &DataBytes,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices);
+ void writeNameSection(const SmallVector<WasmFunction, 4> &Functions,
+ const SmallVector<WasmImport, 4> &Imports,
+ uint32_t NumFuncImports);
+ void writeCodeRelocSection(
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices);
+ void writeDataRelocSection(
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+ uint64_t DataSectionHeaderSize);
+ void writeLinkingMetaDataSection(bool HasStackPointer,
+ uint32_t StackPointerGlobal);
};
+
} // end anonymous namespace
WasmObjectWriter::~WasmObjectWriter() {}
@@ -278,86 +377,6 @@ void WasmObjectWriter::recordRelocation(MCAssembler &Asm,
DataRelocations.push_back(Rec);
}
-namespace {
-
-// The signature of a wasm function, in a struct capable of being used as a
-// DenseMap key.
-struct WasmFunctionType {
- // Support empty and tombstone instances, needed by DenseMap.
- enum { Plain, Empty, Tombstone } State;
-
- // The return types of the function.
- SmallVector<wasm::ValType, 1> Returns;
-
- // The parameter types of the function.
- SmallVector<wasm::ValType, 4> Params;
-
- WasmFunctionType() : State(Plain) {}
-
- bool operator==(const WasmFunctionType &Other) const {
- return State == Other.State && Returns == Other.Returns &&
- Params == Other.Params;
- }
-};
-
-// Traits for using WasmFunctionType in a DenseMap.
-struct WasmFunctionTypeDenseMapInfo {
- static WasmFunctionType getEmptyKey() {
- WasmFunctionType FuncTy;
- FuncTy.State = WasmFunctionType::Empty;
- return FuncTy;
- }
- static WasmFunctionType getTombstoneKey() {
- WasmFunctionType FuncTy;
- FuncTy.State = WasmFunctionType::Tombstone;
- return FuncTy;
- }
- static unsigned getHashValue(const WasmFunctionType &FuncTy) {
- uintptr_t Value = FuncTy.State;
- for (wasm::ValType Ret : FuncTy.Returns)
- Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Ret));
- for (wasm::ValType Param : FuncTy.Params)
- Value += DenseMapInfo<int32_t>::getHashValue(int32_t(Param));
- return Value;
- }
- static bool isEqual(const WasmFunctionType &LHS,
- const WasmFunctionType &RHS) {
- return LHS == RHS;
- }
-};
-
-// A wasm import to be written into the import section.
-struct WasmImport {
- StringRef ModuleName;
- StringRef FieldName;
- unsigned Kind;
- int32_t Type;
-};
-
-// A wasm function to be written into the function section.
-struct WasmFunction {
- int32_t Type;
- const MCSymbolWasm *Sym;
-};
-
-// A wasm export to be written into the export section.
-struct WasmExport {
- StringRef FieldName;
- unsigned Kind;
- uint32_t Index;
-};
-
-// A wasm global to be written into the global section.
-struct WasmGlobal {
- wasm::ValType Type;
- bool IsMutable;
- bool HasImport;
- uint64_t InitialValue;
- uint32_t ImportIndex;
-};
-
-} // end anonymous namespace
-
// Write X as an (unsigned) LEB value at offset Offset in Stream, padded
// to allow patching.
static void
@@ -529,6 +548,367 @@ static void WriteTypeRelocations(
}
}
+void WasmObjectWriter::writeTypeSection(
+ const SmallVector<WasmFunctionType, 4> &FunctionTypes) {
+ if (FunctionTypes.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_TYPE);
+
+ encodeULEB128(FunctionTypes.size(), getStream());
+
+ for (const WasmFunctionType &FuncTy : FunctionTypes) {
+ encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream());
+ encodeULEB128(FuncTy.Params.size(), getStream());
+ for (wasm::ValType Ty : FuncTy.Params)
+ writeValueType(Ty);
+ encodeULEB128(FuncTy.Returns.size(), getStream());
+ for (wasm::ValType Ty : FuncTy.Returns)
+ writeValueType(Ty);
+ }
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeImportSection(
+ const SmallVector<WasmImport, 4> &Imports) {
+ if (Imports.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_IMPORT);
+
+ encodeULEB128(Imports.size(), getStream());
+ for (const WasmImport &Import : Imports) {
+ StringRef ModuleName = Import.ModuleName;
+ encodeULEB128(ModuleName.size(), getStream());
+ writeBytes(ModuleName);
+
+ StringRef FieldName = Import.FieldName;
+ encodeULEB128(FieldName.size(), getStream());
+ writeBytes(FieldName);
+
+ encodeULEB128(Import.Kind, getStream());
+
+ switch (Import.Kind) {
+ case wasm::WASM_EXTERNAL_FUNCTION:
+ encodeULEB128(Import.Type, getStream());
+ break;
+ case wasm::WASM_EXTERNAL_GLOBAL:
+ encodeSLEB128(int32_t(Import.Type), getStream());
+ encodeULEB128(0, getStream()); // mutability
+ break;
+ default:
+ llvm_unreachable("unsupported import kind");
+ }
+ }
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeFunctionSection(
+ const SmallVector<WasmFunction, 4> &Functions) {
+ if (Functions.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_FUNCTION);
+
+ encodeULEB128(Functions.size(), getStream());
+ for (const WasmFunction &Func : Functions)
+ encodeULEB128(Func.Type, getStream());
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeTableSection(
+ const SmallVector<uint32_t, 4> &TableElems) {
+ // For now, always emit the table section, since indirect calls are not
+ // valid without it. In the future, we could perhaps be more clever and omit
+ // it if there are no indirect calls.
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_TABLE);
+
+ // The number of tables, fixed to 1 for now.
+ encodeULEB128(1, getStream());
+
+ encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream());
+
+ encodeULEB128(0, getStream()); // flags
+ encodeULEB128(TableElems.size(), getStream()); // initial
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeMemorySection(
+ const SmallVector<char, 0> &DataBytes) {
+ // For now, always emit the memory section, since loads and stores are not
+ // valid without it. In the future, we could perhaps be more clever and omit
+ // it if there are no loads or stores.
+ SectionBookkeeping Section;
+ uint32_t NumPages =
+ (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize;
+
+ startSection(Section, wasm::WASM_SEC_MEMORY);
+ encodeULEB128(1, getStream()); // number of memory spaces
+
+ encodeULEB128(0, getStream()); // flags
+ encodeULEB128(NumPages, getStream()); // initial
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeGlobalSection(
+ const SmallVector<WasmGlobal, 4> &Globals) {
+ if (Globals.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_GLOBAL);
+
+ encodeULEB128(Globals.size(), getStream());
+ for (const WasmGlobal &Global : Globals) {
+ writeValueType(Global.Type);
+ write8(Global.IsMutable);
+
+ if (Global.HasImport) {
+ assert(Global.InitialValue == 0);
+ write8(wasm::WASM_OPCODE_GET_GLOBAL);
+ encodeULEB128(Global.ImportIndex, getStream());
+ } else {
+ assert(Global.ImportIndex == 0);
+ write8(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(Global.InitialValue, getStream()); // offset
+ }
+ write8(wasm::WASM_OPCODE_END);
+ }
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeExportSection(
+ const SmallVector<WasmExport, 4> &Exports) {
+ if (Exports.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_EXPORT);
+
+ encodeULEB128(Exports.size(), getStream());
+ for (const WasmExport &Export : Exports) {
+ encodeULEB128(Export.FieldName.size(), getStream());
+ writeBytes(Export.FieldName);
+
+ encodeSLEB128(Export.Kind, getStream());
+
+ encodeULEB128(Export.Index, getStream());
+ }
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeElemSection(
+ const SmallVector<uint32_t, 4> &TableElems) {
+ if (TableElems.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_ELEM);
+
+ encodeULEB128(1, getStream()); // number of "segments"
+ encodeULEB128(0, getStream()); // the table index
+
+ // init expr for starting offset
+ write8(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(0, getStream());
+ write8(wasm::WASM_OPCODE_END);
+
+ encodeULEB128(TableElems.size(), getStream());
+ for (uint32_t Elem : TableElems)
+ encodeULEB128(Elem, getStream());
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeCodeSection(
+ const MCAssembler &Asm, const MCAsmLayout &Layout,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+ const SmallVector<WasmFunction, 4> &Functions) {
+ if (Functions.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CODE);
+
+ encodeULEB128(Functions.size(), getStream());
+
+ for (const WasmFunction &Func : Functions) {
+ MCSectionWasm &FuncSection =
+ static_cast<MCSectionWasm &>(Func.Sym->getSection());
+
+ if (Func.Sym->isVariable())
+ report_fatal_error("weak symbols not supported yet");
+
+ if (Func.Sym->getOffset() != 0)
+ report_fatal_error("function sections must contain one function each");
+
+ if (!Func.Sym->getSize())
+ report_fatal_error("function symbols must have a size set with .size");
+
+ int64_t Size = 0;
+ if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout))
+ report_fatal_error(".size expression must be evaluatable");
+
+ encodeULEB128(Size, getStream());
+
+ FuncSection.setSectionOffset(getStream().tell() -
+ Section.ContentsOffset);
+
+ Asm.writeSectionData(&FuncSection, Layout);
+ }
+
+ // Apply the type index fixups for call_indirect etc. instructions.
+ for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) {
+ uint32_t Type = TypeIndexFixupTypes[i];
+ unsigned Padding = PaddingFor5ByteULEB128(Type);
+
+ const WasmRelocationEntry &Fixup = TypeIndexFixups[i];
+ assert(Fixup.Addend == 0);
+ assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
+ uint64_t Offset = Fixup.Offset +
+ Fixup.FixupSection->getSectionOffset();
+
+ uint8_t Buffer[16];
+ unsigned SizeLen = encodeULEB128(Type, Buffer, Padding);
+ assert(SizeLen == 5);
+ getStream().pwrite((char *)Buffer, SizeLen,
+ Section.ContentsOffset + Offset);
+ }
+
+ // Apply fixups.
+ ApplyRelocations(CodeRelocations, getStream(), SymbolIndices,
+ Section.ContentsOffset);
+
+ endSection(Section);
+}
+
+uint64_t WasmObjectWriter::writeDataSection(
+ const SmallVector<char, 0> &DataBytes,
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices) {
+ if (DataBytes.empty())
+ return 0;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_DATA);
+
+ encodeULEB128(1, getStream()); // count
+ encodeULEB128(0, getStream()); // memory index
+ write8(wasm::WASM_OPCODE_I32_CONST);
+ encodeSLEB128(0, getStream()); // offset
+ write8(wasm::WASM_OPCODE_END);
+ encodeULEB128(DataBytes.size(), getStream()); // size
+ uint32_t HeaderSize = getStream().tell() - Section.ContentsOffset;
+ writeBytes(DataBytes); // data
+
+ // Apply fixups.
+ ApplyRelocations(DataRelocations, getStream(), SymbolIndices,
+ Section.ContentsOffset + HeaderSize);
+
+ endSection(Section);
+ return HeaderSize;
+}
+
+void WasmObjectWriter::writeNameSection(
+ const SmallVector<WasmFunction, 4> &Functions,
+ const SmallVector<WasmImport, 4> &Imports,
+ unsigned NumFuncImports) {
+ uint32_t TotalFunctions = NumFuncImports + Functions.size();
+ if (TotalFunctions == 0)
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "name");
+ SectionBookkeeping SubSection;
+ startSection(SubSection, wasm::WASM_NAMES_FUNCTION);
+
+ encodeULEB128(TotalFunctions, getStream());
+ uint32_t Index = 0;
+ for (const WasmImport &Import : Imports) {
+ if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
+ encodeULEB128(Index, getStream());
+ encodeULEB128(Import.FieldName.size(), getStream());
+ writeBytes(Import.FieldName);
+ ++Index;
+ }
+ }
+ for (const WasmFunction &Func : Functions) {
+ encodeULEB128(Index, getStream());
+ encodeULEB128(Func.Sym->getName().size(), getStream());
+ writeBytes(Func.Sym->getName());
+ ++Index;
+ }
+
+ endSection(SubSection);
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeCodeRelocSection(
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices) {
+ // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+ // for descriptions of the reloc sections.
+
+ if (CodeRelocations.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE");
+
+ encodeULEB128(wasm::WASM_SEC_CODE, getStream());
+ encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream());
+
+ WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0);
+ WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream());
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeDataRelocSection(
+ DenseMap<const MCSymbolWasm *, uint32_t> &SymbolIndices,
+ uint64_t DataSectionHeaderSize) {
+ // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
+ // for descriptions of the reloc sections.
+
+ if (DataRelocations.empty())
+ return;
+
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA");
+
+ encodeULEB128(wasm::WASM_SEC_DATA, getStream());
+ encodeULEB128(DataRelocations.size(), getStream());
+
+ WriteRelocations(DataRelocations, getStream(), SymbolIndices,
+ DataSectionHeaderSize);
+
+ endSection(Section);
+}
+
+void WasmObjectWriter::writeLinkingMetaDataSection(
+ bool HasStackPointer, uint32_t StackPointerGlobal) {
+ if (!HasStackPointer)
+ return;
+ SectionBookkeeping Section;
+ startSection(Section, wasm::WASM_SEC_CUSTOM, "linking");
+
+ encodeULEB128(1, getStream()); // count
+
+ encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type
+ encodeULEB128(StackPointerGlobal, getStream()); // id
+
+ endSection(Section);
+}
+
void WasmObjectWriter::writeObject(MCAssembler &Asm,
const MCAsmLayout &Layout) {
MCContext &Ctx = Asm.getContext();
@@ -730,16 +1110,21 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
if (IsAddressTaken.count(&WS))
TableElems.push_back(Index);
} else {
- if (WS.getOffset() != 0)
- report_fatal_error("data sections must contain one variable each");
- if (!WS.getSize())
- report_fatal_error("data symbols must have a size set with .size");
-
- int64_t Size = 0;
- if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
- report_fatal_error(".size expression must be evaluatable");
+ if (WS.isTemporary() && !WS.getSize())
+ continue;
if (WS.isDefined(false)) {
+ if (WS.getOffset() != 0)
+ report_fatal_error("data sections must contain one variable each: " +
+ WS.getName());
+ if (!WS.getSize())
+ report_fatal_error("data symbols must have a size set with .size: " +
+ WS.getName());
+
+ int64_t Size = 0;
+ if (!WS.getSize()->evaluateAsAbsolute(Size, Layout))
+ report_fatal_error(".size expression must be evaluatable");
+
MCSectionWasm &DataSection =
static_cast<MCSectionWasm &>(WS.getSection());
@@ -827,322 +1212,23 @@ void WasmObjectWriter::writeObject(MCAssembler &Asm,
// Write out the Wasm header.
writeHeader(Asm);
- SectionBookkeeping Section;
-
- // === Type Section =========================================================
- if (!FunctionTypes.empty()) {
- startSection(Section, wasm::WASM_SEC_TYPE);
-
- encodeULEB128(FunctionTypes.size(), getStream());
-
- for (WasmFunctionType &FuncTy : FunctionTypes) {
- encodeSLEB128(wasm::WASM_TYPE_FUNC, getStream());
- encodeULEB128(FuncTy.Params.size(), getStream());
- for (wasm::ValType Ty : FuncTy.Params)
- writeValueType(Ty);
- encodeULEB128(FuncTy.Returns.size(), getStream());
- for (wasm::ValType Ty : FuncTy.Returns)
- writeValueType(Ty);
- }
-
- endSection(Section);
- }
-
- // === Import Section ========================================================
- if (!Imports.empty()) {
- startSection(Section, wasm::WASM_SEC_IMPORT);
-
- encodeULEB128(Imports.size(), getStream());
- for (const WasmImport &Import : Imports) {
- StringRef ModuleName = Import.ModuleName;
- encodeULEB128(ModuleName.size(), getStream());
- writeBytes(ModuleName);
-
- StringRef FieldName = Import.FieldName;
- encodeULEB128(FieldName.size(), getStream());
- writeBytes(FieldName);
-
- encodeULEB128(Import.Kind, getStream());
-
- switch (Import.Kind) {
- case wasm::WASM_EXTERNAL_FUNCTION:
- encodeULEB128(Import.Type, getStream());
- break;
- case wasm::WASM_EXTERNAL_GLOBAL:
- encodeSLEB128(int32_t(Import.Type), getStream());
- encodeULEB128(0, getStream()); // mutability
- break;
- default:
- llvm_unreachable("unsupported import kind");
- }
- }
-
- endSection(Section);
- }
-
- // === Function Section ======================================================
- if (!Functions.empty()) {
- startSection(Section, wasm::WASM_SEC_FUNCTION);
-
- encodeULEB128(Functions.size(), getStream());
- for (const WasmFunction &Func : Functions)
- encodeULEB128(Func.Type, getStream());
-
- endSection(Section);
- }
-
- // === Table Section =========================================================
- // For now, always emit the table section, since indirect calls are not
- // valid without it. In the future, we could perhaps be more clever and omit
- // it if there are no indirect calls.
- startSection(Section, wasm::WASM_SEC_TABLE);
-
- // The number of tables, fixed to 1 for now.
- encodeULEB128(1, getStream());
-
- encodeSLEB128(wasm::WASM_TYPE_ANYFUNC, getStream());
-
- encodeULEB128(0, getStream()); // flags
- encodeULEB128(TableElems.size(), getStream()); // initial
-
- endSection(Section);
-
- // === Memory Section ========================================================
- // For now, always emit the memory section, since loads and stores are not
- // valid without it. In the future, we could perhaps be more clever and omit
- // it if there are no loads or stores.
- uint32_t NumPages =
- (DataBytes.size() + wasm::WasmPageSize - 1) / wasm::WasmPageSize;
-
- startSection(Section, wasm::WASM_SEC_MEMORY);
- encodeULEB128(1, getStream()); // number of memory spaces
-
- encodeULEB128(0, getStream()); // flags
- encodeULEB128(NumPages, getStream()); // initial
-
- endSection(Section);
-
- // === Global Section ========================================================
- if (!Globals.empty()) {
- startSection(Section, wasm::WASM_SEC_GLOBAL);
-
- encodeULEB128(Globals.size(), getStream());
- for (const WasmGlobal &Global : Globals) {
- writeValueType(Global.Type);
- write8(Global.IsMutable);
-
- if (Global.HasImport) {
- assert(Global.InitialValue == 0);
- write8(wasm::WASM_OPCODE_GET_GLOBAL);
- encodeULEB128(Global.ImportIndex, getStream());
- } else {
- assert(Global.ImportIndex == 0);
- write8(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(Global.InitialValue, getStream()); // offset
- }
- write8(wasm::WASM_OPCODE_END);
- }
-
- endSection(Section);
- }
-
- // === Export Section ========================================================
- if (!Exports.empty()) {
- startSection(Section, wasm::WASM_SEC_EXPORT);
-
- encodeULEB128(Exports.size(), getStream());
- for (const WasmExport &Export : Exports) {
- encodeULEB128(Export.FieldName.size(), getStream());
- writeBytes(Export.FieldName);
-
- encodeSLEB128(Export.Kind, getStream());
-
- encodeULEB128(Export.Index, getStream());
- }
-
- endSection(Section);
- }
-
-#if 0 // TODO: Start Section
- if (HaveStartFunction) {
- // === Start Section =========================================================
- startSection(Section, wasm::WASM_SEC_START);
-
- encodeSLEB128(StartFunction, getStream());
-
- endSection(Section);
- }
-#endif
-
- // === Elem Section ==========================================================
- if (!TableElems.empty()) {
- startSection(Section, wasm::WASM_SEC_ELEM);
-
- encodeULEB128(1, getStream()); // number of "segments"
- encodeULEB128(0, getStream()); // the table index
-
- // init expr for starting offset
- write8(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(0, getStream());
- write8(wasm::WASM_OPCODE_END);
-
- encodeULEB128(TableElems.size(), getStream());
- for (uint32_t Elem : TableElems)
- encodeULEB128(Elem, getStream());
-
- endSection(Section);
- }
-
- // === Code Section ==========================================================
- if (!Functions.empty()) {
- startSection(Section, wasm::WASM_SEC_CODE);
-
- encodeULEB128(Functions.size(), getStream());
-
- for (const WasmFunction &Func : Functions) {
- MCSectionWasm &FuncSection =
- static_cast<MCSectionWasm &>(Func.Sym->getSection());
-
- if (Func.Sym->isVariable())
- report_fatal_error("weak symbols not supported yet");
-
- if (Func.Sym->getOffset() != 0)
- report_fatal_error("function sections must contain one function each");
-
- if (!Func.Sym->getSize())
- report_fatal_error("function symbols must have a size set with .size");
-
- int64_t Size = 0;
- if (!Func.Sym->getSize()->evaluateAsAbsolute(Size, Layout))
- report_fatal_error(".size expression must be evaluatable");
-
- encodeULEB128(Size, getStream());
-
- FuncSection.setSectionOffset(getStream().tell() -
- Section.ContentsOffset);
-
- Asm.writeSectionData(&FuncSection, Layout);
- }
-
- // Apply the type index fixups for call_indirect etc. instructions.
- for (size_t i = 0, e = TypeIndexFixups.size(); i < e; ++i) {
- uint32_t Type = TypeIndexFixupTypes[i];
- unsigned Padding = PaddingFor5ByteULEB128(Type);
-
- const WasmRelocationEntry &Fixup = TypeIndexFixups[i];
- assert(Fixup.Addend == 0);
- assert(Fixup.Type == wasm::R_WEBASSEMBLY_TYPE_INDEX_LEB);
- uint64_t Offset = Fixup.Offset +
- Fixup.FixupSection->getSectionOffset();
-
- uint8_t Buffer[16];
- unsigned SizeLen = encodeULEB128(Type, Buffer, Padding);
- assert(SizeLen == 5);
- getStream().pwrite((char *)Buffer, SizeLen,
- Section.ContentsOffset + Offset);
- }
-
- // Apply fixups.
- ApplyRelocations(CodeRelocations, getStream(), SymbolIndices,
- Section.ContentsOffset);
-
- endSection(Section);
- }
-
- // === Data Section ==========================================================
- uint32_t DataSectionHeaderSize = 0;
- if (!DataBytes.empty()) {
- startSection(Section, wasm::WASM_SEC_DATA);
-
- encodeULEB128(1, getStream()); // count
- encodeULEB128(0, getStream()); // memory index
- write8(wasm::WASM_OPCODE_I32_CONST);
- encodeSLEB128(0, getStream()); // offset
- write8(wasm::WASM_OPCODE_END);
- encodeULEB128(DataBytes.size(), getStream()); // size
- DataSectionHeaderSize = getStream().tell() - Section.ContentsOffset;
- writeBytes(DataBytes); // data
-
- // Apply fixups.
- ApplyRelocations(DataRelocations, getStream(), SymbolIndices,
- Section.ContentsOffset + DataSectionHeaderSize);
-
- endSection(Section);
- }
-
- // === Name Section ==========================================================
- uint32_t TotalFunctions = NumFuncImports + Functions.size();
- if (TotalFunctions != 0) {
- startSection(Section, wasm::WASM_SEC_CUSTOM, "name");
- SectionBookkeeping SubSection;
- startSection(SubSection, wasm::WASM_NAMES_FUNCTION);
-
- encodeULEB128(TotalFunctions, getStream());
- uint32_t Index = 0;
- for (const WasmImport &Import : Imports) {
- if (Import.Kind == wasm::WASM_EXTERNAL_FUNCTION) {
- encodeULEB128(Index, getStream());
- encodeULEB128(Import.FieldName.size(), getStream());
- writeBytes(Import.FieldName);
- ++Index;
- }
- }
- for (const WasmFunction &Func : Functions) {
- encodeULEB128(Index, getStream());
- encodeULEB128(Func.Sym->getName().size(), getStream());
- writeBytes(Func.Sym->getName());
- ++Index;
- }
-
- endSection(SubSection);
- endSection(Section);
- }
-
- // See: https://github.com/WebAssembly/tool-conventions/blob/master/Linking.md
- // for descriptions of the reloc sections.
-
- // === Code Reloc Section ====================================================
- if (!CodeRelocations.empty()) {
- startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.CODE");
-
- encodeULEB128(wasm::WASM_SEC_CODE, getStream());
-
- encodeULEB128(CodeRelocations.size() + TypeIndexFixups.size(), getStream());
-
- WriteRelocations(CodeRelocations, getStream(), SymbolIndices, 0);
- WriteTypeRelocations(TypeIndexFixups, TypeIndexFixupTypes, getStream());
-
- endSection(Section);
- }
-
- // === Data Reloc Section ====================================================
- if (!DataRelocations.empty()) {
- startSection(Section, wasm::WASM_SEC_CUSTOM, "reloc.DATA");
-
- encodeULEB128(wasm::WASM_SEC_DATA, getStream());
-
- encodeULEB128(DataRelocations.size(), getStream());
-
- WriteRelocations(DataRelocations, getStream(), SymbolIndices,
- DataSectionHeaderSize);
-
- endSection(Section);
- }
-
- // === Linking Metadata Section ==============================================
- if (HasStackPointer) {
- startSection(Section, wasm::WASM_SEC_CUSTOM, "linking");
-
- encodeULEB128(1, getStream()); // count
-
- encodeULEB128(wasm::WASM_STACK_POINTER, getStream()); // type
- encodeULEB128(StackPointerGlobal, getStream()); // id
-
- endSection(Section);
- }
+ writeTypeSection(FunctionTypes);
+ writeImportSection(Imports);
+ writeFunctionSection(Functions);
+ writeTableSection(TableElems);
+ writeMemorySection(DataBytes);
+ writeGlobalSection(Globals);
+ writeExportSection(Exports);
+ // TODO: Start Section
+ writeElemSection(TableElems);
+ writeCodeSection(Asm, Layout, SymbolIndices, Functions);
+ uint64_t DataSectionHeaderSize = writeDataSection(DataBytes, SymbolIndices);
+ writeNameSection(Functions, Imports, NumFuncImports);
+ writeCodeRelocSection(SymbolIndices);
+ writeDataRelocSection(SymbolIndices, DataSectionHeaderSize);
+ writeLinkingMetaDataSection(HasStackPointer, StackPointerGlobal);
// TODO: Translate the .comment section to the output.
-
// TODO: Translate debug sections to the output.
}
diff --git a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
index f652ff57f30d..21d29835624e 100644
--- a/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLDebugSections.cpp
@@ -17,6 +17,11 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/DebugInfo/CodeView/CodeViewError.h"
+#include "llvm/DebugInfo/CodeView/DebugChecksumsSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugLinesSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
+#include "llvm/DebugInfo/CodeView/DebugSubsectionVisitor.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
#include "llvm/DebugInfo/CodeView/SymbolRecord.h"
@@ -36,16 +41,80 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(InlineeInfo)
LLVM_YAML_IS_SEQUENCE_VECTOR(StringRef)
LLVM_YAML_DECLARE_SCALAR_TRAITS(HexFormattedString, false)
+LLVM_YAML_DECLARE_ENUM_TRAITS(DebugSubsectionKind)
LLVM_YAML_DECLARE_ENUM_TRAITS(FileChecksumKind)
LLVM_YAML_DECLARE_BITSET_TRAITS(LineFlags)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceLineEntry)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceColumnEntry)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceFileChecksumEntry)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceLineInfo)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::SourceLineBlock)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::InlineeInfo)
-LLVM_YAML_DECLARE_MAPPING_TRAITS(CodeViewYAML::InlineeSite)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineEntry)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceColumnEntry)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceFileChecksumEntry)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(SourceLineBlock)
+LLVM_YAML_DECLARE_MAPPING_TRAITS(InlineeSite)
+
+namespace llvm {
+namespace CodeViewYAML {
+namespace detail {
+struct YAMLSubsectionBase {
+ explicit YAMLSubsectionBase(DebugSubsectionKind Kind) : Kind(Kind) {}
+ DebugSubsectionKind Kind;
+ virtual ~YAMLSubsectionBase() {}
+
+ virtual void map(IO &IO) = 0;
+ virtual std::unique_ptr<DebugSubsection>
+ toCodeViewSubsection(DebugStringTableSubsection *UseStrings,
+ DebugChecksumsSubsection *UseChecksums) const = 0;
+};
+}
+}
+}
+
+namespace {
+struct YAMLChecksumsSubsection : public YAMLSubsectionBase {
+ YAMLChecksumsSubsection()
+ : YAMLSubsectionBase(DebugSubsectionKind::FileChecksums) {}
+
+ void map(IO &IO) override;
+ std::unique_ptr<DebugSubsection>
+ toCodeViewSubsection(DebugStringTableSubsection *Strings,
+ DebugChecksumsSubsection *Checksums) const override;
+ static Expected<std::shared_ptr<YAMLChecksumsSubsection>>
+ fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &FC);
+
+ std::vector<SourceFileChecksumEntry> Checksums;
+};
+
+struct YAMLLinesSubsection : public YAMLSubsectionBase {
+ YAMLLinesSubsection() : YAMLSubsectionBase(DebugSubsectionKind::Lines) {}
+
+ void map(IO &IO) override;
+ std::unique_ptr<DebugSubsection>
+ toCodeViewSubsection(DebugStringTableSubsection *Strings,
+ DebugChecksumsSubsection *Checksums) const override;
+ static Expected<std::shared_ptr<YAMLLinesSubsection>>
+ fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugLinesSubsectionRef &Lines);
+
+ SourceLineInfo Lines;
+};
+
+struct YAMLInlineeLinesSubsection : public YAMLSubsectionBase {
+ YAMLInlineeLinesSubsection()
+ : YAMLSubsectionBase(DebugSubsectionKind::InlineeLines) {}
+
+ void map(IO &IO) override;
+ std::unique_ptr<DebugSubsection>
+ toCodeViewSubsection(DebugStringTableSubsection *Strings,
+ DebugChecksumsSubsection *Checksums) const override;
+ static Expected<std::shared_ptr<YAMLInlineeLinesSubsection>>
+ fromCodeViewSubsection(const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugInlineeLinesSubsectionRef &Lines);
+
+ InlineeInfo InlineeLines;
+};
+}
void ScalarBitSetTraits<LineFlags>::bitset(IO &io, LineFlags &Flags) {
io.bitSetCase(Flags, "HasColumnInfo", LF_HaveColumns);
@@ -99,21 +168,6 @@ void MappingTraits<SourceFileChecksumEntry>::mapping(
IO.mapRequired("Checksum", Obj.ChecksumBytes);
}
-void MappingTraits<SourceLineInfo>::mapping(IO &IO, SourceLineInfo &Obj) {
- IO.mapRequired("CodeSize", Obj.CodeSize);
-
- IO.mapRequired("Flags", Obj.Flags);
- IO.mapRequired("RelocOffset", Obj.RelocOffset);
- IO.mapRequired("RelocSegment", Obj.RelocSegment);
- IO.mapRequired("Blocks", Obj.Blocks);
-}
-
-void MappingTraits<SourceFileInfo>::mapping(IO &IO, SourceFileInfo &Obj) {
- IO.mapOptional("Checksums", Obj.FileChecksums);
- IO.mapOptional("Lines", Obj.LineFragments);
- IO.mapOptional("InlineeLines", Obj.Inlinees);
-}
-
void MappingTraits<InlineeSite>::mapping(IO &IO, InlineeSite &Obj) {
IO.mapRequired("FileName", Obj.FileName);
IO.mapRequired("LineNum", Obj.SourceLineNum);
@@ -121,7 +175,310 @@ void MappingTraits<InlineeSite>::mapping(IO &IO, InlineeSite &Obj) {
IO.mapOptional("ExtraFiles", Obj.ExtraFiles);
}
-void MappingTraits<InlineeInfo>::mapping(IO &IO, InlineeInfo &Obj) {
- IO.mapRequired("HasExtraFiles", Obj.HasExtraFiles);
- IO.mapRequired("Sites", Obj.Sites);
+void YAMLChecksumsSubsection::map(IO &IO) {
+ IO.mapTag("!FileChecksums", true);
+ IO.mapRequired("Checksums", Checksums);
+}
+
+void YAMLLinesSubsection::map(IO &IO) {
+ IO.mapTag("!Lines", true);
+ IO.mapRequired("CodeSize", Lines.CodeSize);
+
+ IO.mapRequired("Flags", Lines.Flags);
+ IO.mapRequired("RelocOffset", Lines.RelocOffset);
+ IO.mapRequired("RelocSegment", Lines.RelocSegment);
+ IO.mapRequired("Blocks", Lines.Blocks);
+}
+
+void YAMLInlineeLinesSubsection::map(IO &IO) {
+ IO.mapTag("!InlineeLines", true);
+ IO.mapRequired("HasExtraFiles", InlineeLines.HasExtraFiles);
+ IO.mapRequired("Sites", InlineeLines.Sites);
+}
+
+void MappingTraits<YAMLDebugSubsection>::mapping(
+ IO &IO, YAMLDebugSubsection &Subsection) {
+ if (!IO.outputting()) {
+ if (IO.mapTag("!FileChecksums")) {
+ auto SS = std::make_shared<YAMLChecksumsSubsection>();
+ Subsection.Subsection = SS;
+ } else if (IO.mapTag("!Lines")) {
+ Subsection.Subsection = std::make_shared<YAMLLinesSubsection>();
+ } else if (IO.mapTag("!InlineeLines")) {
+ Subsection.Subsection = std::make_shared<YAMLInlineeLinesSubsection>();
+ } else {
+ llvm_unreachable("Unexpected subsection tag!");
+ }
+ }
+ Subsection.Subsection->map(IO);
+}
+
+static Expected<const YAMLChecksumsSubsection &>
+findChecksums(ArrayRef<YAMLDebugSubsection> Subsections) {
+ for (const auto &SS : Subsections) {
+ if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums) {
+ return static_cast<const YAMLChecksumsSubsection &>(*SS.Subsection);
+ }
+ }
+ return make_error<CodeViewError>(cv_error_code::no_records);
+}
+
+std::unique_ptr<DebugSubsection> YAMLChecksumsSubsection::toCodeViewSubsection(
+ DebugStringTableSubsection *UseStrings,
+ DebugChecksumsSubsection *UseChecksums) const {
+ assert(UseStrings && !UseChecksums);
+ auto Result = llvm::make_unique<DebugChecksumsSubsection>(*UseStrings);
+ for (const auto &CS : Checksums) {
+ Result->addChecksum(CS.FileName, CS.Kind, CS.ChecksumBytes.Bytes);
+ }
+ return std::move(Result);
+}
+
+std::unique_ptr<DebugSubsection> YAMLLinesSubsection::toCodeViewSubsection(
+ DebugStringTableSubsection *UseStrings,
+ DebugChecksumsSubsection *UseChecksums) const {
+ assert(UseStrings && UseChecksums);
+ auto Result =
+ llvm::make_unique<DebugLinesSubsection>(*UseChecksums, *UseStrings);
+ Result->setCodeSize(Lines.CodeSize);
+ Result->setRelocationAddress(Lines.RelocSegment, Lines.RelocOffset);
+ Result->setFlags(Lines.Flags);
+ for (const auto &LC : Lines.Blocks) {
+ Result->createBlock(LC.FileName);
+ if (Result->hasColumnInfo()) {
+ for (const auto &Item : zip(LC.Lines, LC.Columns)) {
+ auto &L = std::get<0>(Item);
+ auto &C = std::get<1>(Item);
+ uint32_t LE = L.LineStart + L.EndDelta;
+ Result->addLineAndColumnInfo(L.Offset,
+ LineInfo(L.LineStart, LE, L.IsStatement),
+ C.StartColumn, C.EndColumn);
+ }
+ } else {
+ for (const auto &L : LC.Lines) {
+ uint32_t LE = L.LineStart + L.EndDelta;
+ Result->addLineInfo(L.Offset, LineInfo(L.LineStart, LE, L.IsStatement));
+ }
+ }
+ }
+ return llvm::cast<DebugSubsection>(std::move(Result));
+}
+
+std::unique_ptr<DebugSubsection>
+YAMLInlineeLinesSubsection::toCodeViewSubsection(
+ DebugStringTableSubsection *UseStrings,
+ DebugChecksumsSubsection *UseChecksums) const {
+ assert(UseChecksums);
+ auto Result = llvm::make_unique<DebugInlineeLinesSubsection>(
+ *UseChecksums, InlineeLines.HasExtraFiles);
+
+ for (const auto &Site : InlineeLines.Sites) {
+ Result->addInlineSite(TypeIndex(Site.Inlinee), Site.FileName,
+ Site.SourceLineNum);
+ if (!InlineeLines.HasExtraFiles)
+ continue;
+
+ for (auto EF : Site.ExtraFiles) {
+ Result->addExtraFile(EF);
+ }
+ }
+ return llvm::cast<DebugSubsection>(std::move(Result));
+}
+
+static Expected<SourceFileChecksumEntry>
+convertOneChecksum(const DebugStringTableSubsectionRef &Strings,
+ const FileChecksumEntry &CS) {
+ auto ExpectedString = Strings.getString(CS.FileNameOffset);
+ if (!ExpectedString)
+ return ExpectedString.takeError();
+
+ SourceFileChecksumEntry Result;
+ Result.ChecksumBytes.Bytes = CS.Checksum;
+ Result.Kind = CS.Kind;
+ Result.FileName = *ExpectedString;
+ return Result;
+}
+
+static Expected<StringRef>
+getFileName(const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums, uint32_t FileID) {
+ auto Iter = Checksums.getArray().at(FileID);
+ if (Iter == Checksums.getArray().end())
+ return make_error<CodeViewError>(cv_error_code::no_records);
+ uint32_t Offset = Iter->FileNameOffset;
+ return Strings.getString(Offset);
+}
+
+Expected<std::shared_ptr<YAMLChecksumsSubsection>>
+YAMLChecksumsSubsection::fromCodeViewSubsection(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &FC) {
+ auto Result = std::make_shared<YAMLChecksumsSubsection>();
+
+ for (const auto &CS : FC) {
+ auto ConvertedCS = convertOneChecksum(Strings, CS);
+ if (!ConvertedCS)
+ return ConvertedCS.takeError();
+ Result->Checksums.push_back(*ConvertedCS);
+ }
+ return Result;
+}
+
+Expected<std::shared_ptr<YAMLLinesSubsection>>
+YAMLLinesSubsection::fromCodeViewSubsection(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugLinesSubsectionRef &Lines) {
+ auto Result = std::make_shared<YAMLLinesSubsection>();
+ Result->Lines.CodeSize = Lines.header()->CodeSize;
+ Result->Lines.RelocOffset = Lines.header()->RelocOffset;
+ Result->Lines.RelocSegment = Lines.header()->RelocSegment;
+ Result->Lines.Flags = static_cast<LineFlags>(uint16_t(Lines.header()->Flags));
+ for (const auto &L : Lines) {
+ SourceLineBlock Block;
+ auto EF = getFileName(Strings, Checksums, L.NameIndex);
+ if (!EF)
+ return EF.takeError();
+ Block.FileName = *EF;
+ if (Lines.hasColumnInfo()) {
+ for (const auto &C : L.Columns) {
+ SourceColumnEntry SCE;
+ SCE.EndColumn = C.EndColumn;
+ SCE.StartColumn = C.StartColumn;
+ Block.Columns.push_back(SCE);
+ }
+ }
+ for (const auto &LN : L.LineNumbers) {
+ SourceLineEntry SLE;
+ LineInfo LI(LN.Flags);
+ SLE.Offset = LN.Offset;
+ SLE.LineStart = LI.getStartLine();
+ SLE.EndDelta = LI.getLineDelta();
+ SLE.IsStatement = LI.isStatement();
+ Block.Lines.push_back(SLE);
+ }
+ Result->Lines.Blocks.push_back(Block);
+ }
+ return Result;
+}
+
+Expected<std::shared_ptr<YAMLInlineeLinesSubsection>>
+YAMLInlineeLinesSubsection::fromCodeViewSubsection(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugInlineeLinesSubsectionRef &Lines) {
+ auto Result = std::make_shared<YAMLInlineeLinesSubsection>();
+
+ Result->InlineeLines.HasExtraFiles = Lines.hasExtraFiles();
+ for (const auto &IL : Lines) {
+ InlineeSite Site;
+ auto ExpF = getFileName(Strings, Checksums, IL.Header->FileID);
+ if (!ExpF)
+ return ExpF.takeError();
+ Site.FileName = *ExpF;
+ Site.Inlinee = IL.Header->Inlinee.getIndex();
+ Site.SourceLineNum = IL.Header->SourceLineNum;
+ if (Lines.hasExtraFiles()) {
+ for (const auto EF : IL.ExtraFiles) {
+ auto ExpF2 = getFileName(Strings, Checksums, EF);
+ if (!ExpF2)
+ return ExpF2.takeError();
+ Site.ExtraFiles.push_back(*ExpF2);
+ }
+ }
+ Result->InlineeLines.Sites.push_back(Site);
+ }
+ return Result;
+}
+
+Expected<std::vector<std::unique_ptr<DebugSubsection>>>
+llvm::CodeViewYAML::convertSubsectionList(
+ ArrayRef<YAMLDebugSubsection> Subsections,
+ DebugStringTableSubsection &Strings) {
+ std::vector<std::unique_ptr<DebugSubsection>> Result;
+ if (Subsections.empty())
+ return std::move(Result);
+
+ auto Checksums = findChecksums(Subsections);
+ if (!Checksums)
+ return Checksums.takeError();
+ auto ChecksumsBase = Checksums->toCodeViewSubsection(&Strings, nullptr);
+ DebugChecksumsSubsection &CS =
+ llvm::cast<DebugChecksumsSubsection>(*ChecksumsBase);
+ for (const auto &SS : Subsections) {
+ // We've already converted the checksums subsection, don't do it
+ // twice.
+ std::unique_ptr<DebugSubsection> CVS;
+ if (SS.Subsection->Kind == DebugSubsectionKind::FileChecksums)
+ CVS = std::move(ChecksumsBase);
+ else
+ CVS = SS.Subsection->toCodeViewSubsection(&Strings, &CS);
+ Result.push_back(std::move(CVS));
+ }
+ return std::move(Result);
+}
+
+namespace {
+struct SubsectionConversionVisitor : public DebugSubsectionVisitor {
+ explicit SubsectionConversionVisitor(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums)
+ : Strings(Strings), Checksums(Checksums) {}
+
+ Error visitUnknown(DebugUnknownSubsectionRef &Unknown) override;
+ Error visitLines(DebugLinesSubsectionRef &Lines) override;
+ Error visitFileChecksums(DebugChecksumsSubsectionRef &Checksums) override;
+ Error visitInlineeLines(DebugInlineeLinesSubsectionRef &Inlinees) override;
+
+ YAMLDebugSubsection Subsection;
+
+private:
+ const DebugStringTableSubsectionRef &Strings;
+ const DebugChecksumsSubsectionRef &Checksums;
+};
+
+Error SubsectionConversionVisitor::visitUnknown(
+ DebugUnknownSubsectionRef &Unknown) {
+ return make_error<CodeViewError>(cv_error_code::operation_unsupported);
+}
+
+Error SubsectionConversionVisitor::visitLines(DebugLinesSubsectionRef &Lines) {
+ auto Result =
+ YAMLLinesSubsection::fromCodeViewSubsection(Strings, Checksums, Lines);
+ if (!Result)
+ return Result.takeError();
+ Subsection.Subsection = *Result;
+ return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitFileChecksums(
+ DebugChecksumsSubsectionRef &Checksums) {
+ auto Result =
+ YAMLChecksumsSubsection::fromCodeViewSubsection(Strings, Checksums);
+ if (!Result)
+ return Result.takeError();
+ Subsection.Subsection = *Result;
+ return Error::success();
+}
+
+Error SubsectionConversionVisitor::visitInlineeLines(
+ DebugInlineeLinesSubsectionRef &Inlinees) {
+ auto Result = YAMLInlineeLinesSubsection::fromCodeViewSubsection(
+ Strings, Checksums, Inlinees);
+ if (!Result)
+ return Result.takeError();
+ Subsection.Subsection = *Result;
+ return Error::success();
+}
+}
+
+Expected<YAMLDebugSubsection> YAMLDebugSubsection::fromCodeViewSubection(
+ const DebugStringTableSubsectionRef &Strings,
+ const DebugChecksumsSubsectionRef &Checksums,
+ const DebugSubsectionRecord &SS) {
+ SubsectionConversionVisitor V(Strings, Checksums);
+ if (auto EC = visitDebugSubsection(SS, V))
+ return std::move(EC);
+
+ return V.Subsection;
}
diff --git a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
index 6e8bb5c7372c..bd97af3a9323 100644
--- a/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
+++ b/lib/ObjectYAML/CodeViewYAMLSymbols.cpp
@@ -148,7 +148,8 @@ struct SymbolRecordBase {
virtual ~SymbolRecordBase() {}
virtual void map(yaml::IO &io) = 0;
virtual codeview::CVSymbol
- toCodeViewSymbol(BumpPtrAllocator &Allocator) const = 0;
+ toCodeViewSymbol(BumpPtrAllocator &Allocator,
+ CodeViewContainer Container) const = 0;
virtual Error fromCodeViewSymbol(codeview::CVSymbol Type) = 0;
};
@@ -159,8 +160,9 @@ template <typename T> struct SymbolRecordImpl : public SymbolRecordBase {
void map(yaml::IO &io) override;
codeview::CVSymbol
- toCodeViewSymbol(BumpPtrAllocator &Allocator) const override {
- return SymbolSerializer::writeOneSymbol(Symbol, Allocator);
+ toCodeViewSymbol(BumpPtrAllocator &Allocator,
+ CodeViewContainer Container) const override {
+ return SymbolSerializer::writeOneSymbol(Symbol, Allocator, Container);
}
Error fromCodeViewSymbol(codeview::CVSymbol CVS) override {
return SymbolDeserializer::deserializeAs<T>(CVS, Symbol);
@@ -429,8 +431,8 @@ template <> void SymbolRecordImpl<ThreadLocalDataSym>::map(IO &IO) {
}
CVSymbol CodeViewYAML::SymbolRecord::toCodeViewSymbol(
- BumpPtrAllocator &Allocator) const {
- return Symbol->toCodeViewSymbol(Allocator);
+ BumpPtrAllocator &Allocator, CodeViewContainer Container) const {
+ return Symbol->toCodeViewSymbol(Allocator, Container);
}
namespace llvm {
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index eb81e58b9b0e..17c60348633c 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -310,6 +310,10 @@ PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
// Catch trivial redundancies
FPM.addPass(EarlyCSEPass());
+ // Hoisting of scalars and load expressions.
+ if (EnableGVNHoist)
+ FPM.addPass(GVNHoistPass());
+
// Speculative execution if the target has divergent branches; otherwise nop.
FPM.addPass(SpeculativeExecutionPass());
@@ -473,8 +477,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
EarlyFPM.addPass(SROA());
EarlyFPM.addPass(EarlyCSEPass());
EarlyFPM.addPass(LowerExpectIntrinsicPass());
- if (EnableGVNHoist)
- EarlyFPM.addPass(GVNHoistPass());
MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
// Interprocedural constant propagation now that basic cleanup has occured
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 318e21da999d..f7b7ad89e959 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -649,12 +649,10 @@ static Triple::ObjectFormatType getDefaultFormat(const Triple &T) {
case Triple::tce:
case Triple::tcele:
case Triple::thumbeb:
- case Triple::xcore:
- return Triple::ELF;
-
case Triple::wasm32:
case Triple::wasm64:
- return Triple::Wasm;
+ case Triple::xcore:
+ return Triple::ELF;
case Triple::ppc:
case Triple::ppc64:
diff --git a/lib/Target/AArch64/AArch64PBQPRegAlloc.h b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
index 4f656f94ea12..b99c1d1d6b3e 100644
--- a/lib/Target/AArch64/AArch64PBQPRegAlloc.h
+++ b/lib/Target/AArch64/AArch64PBQPRegAlloc.h
@@ -1,4 +1,4 @@
-//===-- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints -------===//
+//==- AArch64PBQPRegAlloc.h - AArch64 specific PBQP constraints --*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
@@ -15,6 +15,8 @@
namespace llvm {
+class TargetRegisterInfo;
+
/// Add the accumulator chaining constraint to a PBQP graph
class A57ChainingConstraint : public PBQPRAConstraint {
public:
@@ -33,6 +35,7 @@ private:
// Add constraints between existing chains
void addInterChainConstraint(PBQPRAGraph &G, unsigned Rd, unsigned Ra);
};
-}
+
+} // end namespace llvm
#endif // LLVM_LIB_TARGET_AARCH64_AARCH64PBQPREGALOC_H
diff --git a/lib/Target/AArch64/AArch64SchedFalkorDetails.td b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
index d098cf7a5a37..7402bcf1346c 100644
--- a/lib/Target/AArch64/AArch64SchedFalkorDetails.td
+++ b/lib/Target/AArch64/AArch64SchedFalkorDetails.td
@@ -56,12 +56,14 @@ def FalkorWr_1Z_0cyc : SchedWriteRes<[FalkorUnitZ]> { let Latency = 0; }
def FalkorWr_1ZB_0cyc : SchedWriteRes<[FalkorUnitZB]> { let Latency = 0; }
def FalkorWr_1LD_3cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 3; }
def FalkorWr_1LD_4cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 4; }
+def FalkorWr_1XYZ_0cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 0; }
def FalkorWr_1XYZ_1cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 1; }
def FalkorWr_1XYZ_2cyc : SchedWriteRes<[FalkorUnitXYZ]> { let Latency = 2; }
def FalkorWr_1XYZB_0cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 0; }
def FalkorWr_1XYZB_1cyc : SchedWriteRes<[FalkorUnitXYZB]>{ let Latency = 1; }
def FalkorWr_1none_0cyc : SchedWriteRes<[]> { let Latency = 0; }
+def FalkorWr_1VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 0; }
def FalkorWr_1VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 1; }
def FalkorWr_1VXVY_2cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 2; }
def FalkorWr_1VXVY_3cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 3; }
@@ -76,6 +78,7 @@ def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
+def FalkorWr_1GTOV_0cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 0; }
def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
@@ -83,6 +86,10 @@ def FalkorWr_1VTOG_1cyc : SchedWriteRes<[FalkorUnitVTOG]>{ let Latency = 1; }
//===----------------------------------------------------------------------===//
// Define 2 micro-op types
+def FalkorWr_2VXVY_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
+ let Latency = 0;
+ let NumMicroOps = 2;
+}
def FalkorWr_2VXVY_1cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitVXVY]> {
let Latency = 1;
let NumMicroOps = 2;
@@ -476,17 +483,19 @@ def FalkorReadFMA64 : SchedReadAdvance<2, [FalkorWr_FMUL64_1VXVY_6cyc, FalkorWr
// SchedPredicates and WriteVariants for Immediate Zero and LSLFast/ASRFast
// -----------------------------------------------------------------------------
def FalkorImmZPred : SchedPredicate<[{MI->getOperand(1).getImm() == 0}]>;
-def FalkorFMOVZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
+def FalkorOp1ZrReg : SchedPredicate<[{MI->getOperand(1).getReg() == AArch64::WZR ||
+
MI->getOperand(1).getReg() == AArch64::XZR}]>;
def FalkorShiftExtFastPred : SchedPredicate<[{TII->isFalkorShiftExtFast(*MI)}]>;
def FalkorWr_FMOV : SchedWriteVariant<[
- SchedVar<FalkorFMOVZrReg, [FalkorWr_1none_0cyc]>,
+ SchedVar<FalkorOp1ZrReg, [FalkorWr_1none_0cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
def FalkorWr_MOVZ : SchedWriteVariant<[
SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
- SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>;
+ SchedVar<NoSchedPred, [FalkorWr_1XYZB_0cyc]>]>; // imm fwd
+
def FalkorWr_ADDSUBsx : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1XYZ_1cyc]>,
@@ -500,6 +509,10 @@ def FalkorWr_LDRSro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1LD_4cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;
+def FalkorWr_ORRi : SchedWriteVariant<[
+ SchedVar<FalkorOp1ZrReg, [FalkorWr_1XYZ_0cyc]>, // imm fwd
+ SchedVar<NoSchedPred, [FalkorWr_1XYZ_1cyc]>]>;
+
def FalkorWr_PRFMro : SchedWriteVariant<[
SchedVar<FalkorShiftExtFastPred, [FalkorWr_1ST_3cyc]>,
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
@@ -810,7 +823,8 @@ def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^BIC(S)?(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EON(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^EOR(W|X)r(i|r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORN(W|X)r(r|s)$")>;
-def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(i|r|s)$")>;
+def : InstRW<[FalkorWr_ORRi], (instregex "^ORR(W|X)ri$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^ORR(W|X)r(r|s)$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SBC(S)?(W|X)r$")>;
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^SUB(S)?(W|X)r(r|i)$")>;
def : InstRW<[FalkorWr_ADDSUBsx], (instregex "^ADD(S)?(W|X)r(s|x|x64)$")>;
@@ -825,7 +839,7 @@ def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>;
+def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>; // imm fwd
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
@@ -849,7 +863,7 @@ def : InstRW<[FalkorWr_1GTOV_1VXVY_2cyc],
def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v16i8$")>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
-def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>;
+def : InstRW<[FalkorWr_2VXVY_0cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>; // imm fwd
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
@@ -1036,13 +1050,13 @@ def : InstRW<[FalkorWr_FMUL64_1VXVY_6cyc, ReadDefault, ReadDefault, FalkorReadFM
// FP Miscellaneous Instructions
// -----------------------------------------------------------------------------
def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(WS|XD|XDHigh)r$")>;
-def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(S|D)i$")>;
+def : InstRW<[FalkorWr_1GTOV_0cyc], (instregex "^FMOV(S|D)i$")>; // imm fwd
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)S(W|X)(D|S)ri$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(d|s)$")>;
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(SW|DX|DXHigh)r$")>;
-def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>;
+def : InstRW<[FalkorWr_1VXVY_0cyc], (instregex "^FMOV(Sr|Dr|v.*_ns)$")>; // imm fwd
// FIXME: We are currently generating movi v0.2d, #0 for these, which is worse than fmov wzr/xzr
-def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs FMOVD0, FMOVS0)>;
+def : InstRW<[FalkorWr_2VXVY_0cyc], (instrs FMOVD0, FMOVS0)>; // imm fwd
def : InstRW<[FalkorWr_1GTOV_4cyc], (instregex "^(S|U)CVTF(S|U)(W|X)(D|S)ri$")>;
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)CVTF(v1i32|v2i32|v1i64|v2f32|d|s)(_shift)?")>;
@@ -1107,11 +1121,12 @@ def : InstRW<[FalkorWr_VMUL32_2VXVY_4cyc, FalkorReadVMA],
// Move and Shift Instructions
// -----------------------------------------------------------------------------
-def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>;
-def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>;
-def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>;
+def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV)(W|X)r$")>;
+def : InstRW<[FalkorWr_1XYZ_0cyc], (instregex "^MOVK(W|X)i$")>; // imm fwd
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^ADRP?$")>; // imm fwd
+def : InstRW<[FalkorWr_1XYZB_0cyc], (instregex "^MOVN(W|X)i$")>; // imm fwd
def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
-def : InstRW<[FalkorWr_1XYZ_1cyc], (instrs MOVi32imm, MOVi64imm)>;
+def : InstRW<[FalkorWr_1XYZ_0cyc], (instrs MOVi32imm, MOVi64imm)>; // imm fwd (approximation)
def : InstRW<[WriteSequence<[FalkorWr_1XYZ_1cyc, FalkorWr_1XYZ_1cyc]>],
(instrs MOVaddr, MOVaddrBA, MOVaddrCP, MOVaddrEXT, MOVaddrJT, MOVaddrTLS)>;
def : InstRW<[WriteSequence<[FalkorWr_1LD_3cyc, FalkorWr_1XYZ_1cyc]>],
diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h
index 78ff3bbe3d1a..55d18c3f3646 100644
--- a/lib/Target/AMDGPU/AMDGPU.h
+++ b/lib/Target/AMDGPU/AMDGPU.h
@@ -55,6 +55,8 @@ FunctionPass *createAMDGPUMachineCFGStructurizerPass();
void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
extern char &AMDGPUMachineCFGStructurizerID;
+void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
+
ModulePass *createAMDGPUAnnotateKernelFeaturesPass();
void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
extern char &AMDGPUAnnotateKernelFeaturesID;
diff --git a/lib/Target/AMDGPU/AMDGPU.td b/lib/Target/AMDGPU/AMDGPU.td
index e7ebb37a9d62..b50e8d1d659e 100644
--- a/lib/Target/AMDGPU/AMDGPU.td
+++ b/lib/Target/AMDGPU/AMDGPU.td
@@ -365,6 +365,13 @@ def FeatureFlatForGlobal : SubtargetFeature<"flat-for-global",
"Force to generate flat instruction for global"
>;
+def FeatureAutoWaitcntBeforeBarrier : SubtargetFeature <
+ "auto-waitcnt-before-barrier",
+ "AutoWaitcntBeforeBarrier",
+ "true",
+ "Hardware automatically inserts waitcnt before barrier"
+>;
+
// Dummy feature used to disable assembler instructions.
def FeatureDisable : SubtargetFeature<"",
"FeatureDisable","true",
diff --git a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
index 1d03714874e2..8084d368c80f 100644
--- a/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
+++ b/lib/Target/AMDGPU/AMDGPUAlwaysInlinePass.cpp
@@ -22,18 +22,22 @@ using namespace llvm;
namespace {
class AMDGPUAlwaysInline : public ModulePass {
- static char ID;
-
bool GlobalOpt;
public:
- AMDGPUAlwaysInline(bool GlobalOpt) : ModulePass(ID), GlobalOpt(GlobalOpt) { }
+ static char ID;
+
+ AMDGPUAlwaysInline(bool GlobalOpt = false) :
+ ModulePass(ID), GlobalOpt(GlobalOpt) { }
bool runOnModule(Module &M) override;
StringRef getPassName() const override { return "AMDGPU Always Inline Pass"; }
};
} // End anonymous namespace
+INITIALIZE_PASS(AMDGPUAlwaysInline, "amdgpu-always-inline",
+ "AMDGPU Inline All Functions", false, false)
+
char AMDGPUAlwaysInline::ID = 0;
bool AMDGPUAlwaysInline::runOnModule(Module &M) {
diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 57905be18813..267f4807a788 100644
--- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -28,11 +28,16 @@ using namespace llvm;
AMDGPULegalizerInfo::AMDGPULegalizerInfo() {
using namespace TargetOpcode;
+ const LLT S1= LLT::scalar(1);
const LLT S32 = LLT::scalar(32);
const LLT S64 = LLT::scalar(64);
const LLT P1 = LLT::pointer(1, 64);
const LLT P2 = LLT::pointer(2, 64);
+ // FIXME: i1 operands to intrinsics should always be legal, but other i1
+ // values may not be legal. We need to figure out how to distinguish
+ // between these two scenarios.
+ setAction({G_CONSTANT, S1}, Legal);
setAction({G_CONSTANT, S32}, Legal);
setAction({G_CONSTANT, S64}, Legal);
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 6e301b4ad527..8d157e2f98f2 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -91,6 +91,7 @@ AMDGPUSubtarget::AMDGPUSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
FPExceptions(false),
DX10Clamp(false),
FlatForGlobal(false),
+ AutoWaitcntBeforeBarrier(false),
UnalignedScratchAccess(false),
UnalignedBufferAccess(false),
diff --git a/lib/Target/AMDGPU/AMDGPUSubtarget.h b/lib/Target/AMDGPU/AMDGPUSubtarget.h
index 0582ce95693a..ed9cbb994fad 100644
--- a/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -110,6 +110,7 @@ protected:
bool FPExceptions;
bool DX10Clamp;
bool FlatForGlobal;
+ bool AutoWaitcntBeforeBarrier;
bool UnalignedScratchAccess;
bool UnalignedBufferAccess;
bool HasApertureRegs;
@@ -195,7 +196,8 @@ public:
}
bool isOpenCLEnv() const {
- return TargetTriple.getEnvironment() == Triple::OpenCL;
+ return TargetTriple.getEnvironment() == Triple::OpenCL ||
+ TargetTriple.getEnvironmentName() == "amdgizcl";
}
Generation getGeneration() const {
@@ -363,6 +365,10 @@ public:
return FlatForGlobal;
}
+ bool hasAutoWaitcntBeforeBarrier() const {
+ return AutoWaitcntBeforeBarrier;
+ }
+
bool hasUnalignedBufferAccess() const {
return UnalignedBufferAccess;
}
@@ -727,12 +733,6 @@ public:
/// Return the maximum number of waves per SIMD for kernels using \p VGPRs VGPRs
unsigned getOccupancyWithNumVGPRs(unsigned VGPRs) const;
- /// \returns True if waitcnt instruction is needed before barrier instruction,
- /// false otherwise.
- bool needWaitcntBeforeBarrier() const {
- return true;
- }
-
/// \returns true if the flat_scratch register should be initialized with the
/// pointer to the wave's scratch memory rather than a size and offset.
bool flatScratchIsPointer() const {
diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 596f02ae4a64..404598ff4738 100644
--- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -116,7 +116,7 @@ static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
static cl::opt<bool> EnableSIInsertWaitcntsPass(
"enable-si-insert-waitcnts",
cl::desc("Use new waitcnt insertion pass"),
- cl::init(false));
+ cl::init(true));
// Option to run late CFG structurizer
static cl::opt<bool> LateCFGStructurize(
@@ -139,6 +139,7 @@ extern "C" void LLVMInitializeAMDGPUTarget() {
initializeSIShrinkInstructionsPass(*PR);
initializeSIFixControlFlowLiveIntervalsPass(*PR);
initializeSILoadStoreOptimizerPass(*PR);
+ initializeAMDGPUAlwaysInlinePass(*PR);
initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
initializeAMDGPUAnnotateUniformValuesPass(*PR);
initializeAMDGPULowerIntrinsicsPass(*PR);
diff --git a/lib/Target/AMDGPU/SIFoldOperands.cpp b/lib/Target/AMDGPU/SIFoldOperands.cpp
index f13629a3185f..dfac068d1f69 100644
--- a/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -35,9 +35,12 @@ struct FoldCandidate {
};
unsigned char UseOpNo;
MachineOperand::MachineOperandType Kind;
+ bool Commuted;
- FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp) :
- UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()) {
+ FoldCandidate(MachineInstr *MI, unsigned OpNo, MachineOperand *FoldOp,
+ bool Commuted_ = false) :
+ UseMI(MI), OpToFold(nullptr), UseOpNo(OpNo), Kind(FoldOp->getType()),
+ Commuted(Commuted_) {
if (FoldOp->isImm()) {
ImmToFold = FoldOp->getImm();
} else if (FoldOp->isFI()) {
@@ -59,6 +62,10 @@ struct FoldCandidate {
bool isReg() const {
return Kind == MachineOperand::MO_Register;
}
+
+ bool isCommuted() const {
+ return Commuted;
+ }
};
class SIFoldOperands : public MachineFunctionPass {
@@ -237,8 +244,13 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
!TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1))
return false;
- if (!TII->isOperandLegal(*MI, OpNo, OpToFold))
+ if (!TII->isOperandLegal(*MI, OpNo, OpToFold)) {
+ TII->commuteInstruction(*MI, false, CommuteIdx0, CommuteIdx1);
return false;
+ }
+
+ FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold, true));
+ return true;
}
FoldList.push_back(FoldCandidate(MI, OpNo, OpToFold));
@@ -699,6 +711,9 @@ void SIFoldOperands::foldInstOperand(MachineInstr &MI,
DEBUG(dbgs() << "Folded source from " << MI << " into OpNo " <<
static_cast<int>(Fold.UseOpNo) << " of " << *Fold.UseMI << '\n');
tryFoldInst(TII, Fold.UseMI);
+ } else if (Fold.isCommuted()) {
+ // Restoring instruction's original operand order if fold has failed.
+ TII->commuteInstruction(*Fold.UseMI, false);
}
}
}
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 76c2644867aa..b48b23911105 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3571,7 +3571,7 @@ SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
}
if (AS == AMDGPUASI.CONSTANT_ADDRESS || AS == AMDGPUASI.GLOBAL_ADDRESS) {
if (Subtarget->getScalarizeGlobalBehavior() && isMemOpUniform(Load) &&
- isMemOpHasNoClobberedMemOperand(Load))
+ !Load->isVolatile() && isMemOpHasNoClobberedMemOperand(Load))
return SDValue();
// Non-uniform loads will be selected to MUBUF instructions, so they
// have the same legalization requirements as global and private
diff --git a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
index e22166d03e9a..c10badba88f3 100644
--- a/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaitcnts.cpp
@@ -1009,7 +1009,8 @@ MachineInstr *SIInsertWaitcnts::generateSWaitCntInstBefore(
// occurs before the instruction. Doing it here prevents any additional
// S_WAITCNTs from being emitted if the instruction was marked as
// requiring a WAITCNT beforehand.
- if (MI.getOpcode() == AMDGPU::S_BARRIER && ST->needWaitcntBeforeBarrier()) {
+ if (MI.getOpcode() == AMDGPU::S_BARRIER &&
+ !ST->hasAutoWaitcntBeforeBarrier()) {
EmitSwaitcnt |=
ScoreBrackets->updateByWait(VM_CNT, ScoreBrackets->getScoreUB(VM_CNT));
EmitSwaitcnt |= ScoreBrackets->updateByWait(
diff --git a/lib/Target/AMDGPU/SIInsertWaits.cpp b/lib/Target/AMDGPU/SIInsertWaits.cpp
index 9f32ecfa52ff..bc86515d8b1f 100644
--- a/lib/Target/AMDGPU/SIInsertWaits.cpp
+++ b/lib/Target/AMDGPU/SIInsertWaits.cpp
@@ -630,7 +630,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
// but we also want to wait for any other outstanding transfers before
// signalling other hardware blocks
if ((I->getOpcode() == AMDGPU::S_BARRIER &&
- ST->needWaitcntBeforeBarrier()) ||
+ !ST->hasAutoWaitcntBeforeBarrier()) ||
I->getOpcode() == AMDGPU::S_SENDMSG ||
I->getOpcode() == AMDGPU::S_SENDMSGHALT)
Required = LastIssued;
diff --git a/lib/Target/AMDGPU/SMInstructions.td b/lib/Target/AMDGPU/SMInstructions.td
index 5b840a14dbc3..73dd8b7daa4e 100644
--- a/lib/Target/AMDGPU/SMInstructions.td
+++ b/lib/Target/AMDGPU/SMInstructions.td
@@ -229,6 +229,7 @@ def smrd_load : PatFrag <(ops node:$ptr), (load node:$ptr), [{
((Ld->getAddressSpace() == AMDGPUASI.CONSTANT_ADDRESS &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N)) ||
(Subtarget->getScalarizeGlobalBehavior() && Ld->getAddressSpace() == AMDGPUASI.GLOBAL_ADDRESS &&
+ !Ld->isVolatile() &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpUniform(N) &&
static_cast<const SITargetLowering *>(getTargetLowering())->isMemOpHasNoClobberedMemOperand(N)));
}]>;
diff --git a/lib/Target/AMDGPU/VOP3Instructions.td b/lib/Target/AMDGPU/VOP3Instructions.td
index 001fc960b228..77fc9551cff9 100644
--- a/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/lib/Target/AMDGPU/VOP3Instructions.td
@@ -245,9 +245,10 @@ def V_MAD_I64_I32 : VOP3Inst <"v_mad_i64_i32", VOP3b_I64_I1_I32_I32_I64>;
let SubtargetPredicate = Has16BitInsts in {
+def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
+
let isCommutable = 1 in {
-def V_DIV_FIXUP_F16 : VOP3Inst <"v_div_fixup_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, AMDGPUdiv_fixup>;
def V_FMA_F16 : VOP3Inst <"v_fma_f16", VOP3_Profile<VOP_F16_F16_F16_F16>, fma>;
def V_INTERP_P1LL_F16 : VOP3Inst <"v_interp_p1ll_f16", VOP3_Profile<VOP_F32_F32_F16>>;
def V_INTERP_P1LV_F16 : VOP3Inst <"v_interp_p1lv_f16", VOP3_Profile<VOP_F32_F32_F16_F16>>;
diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td
index 46fd1f70ee99..ca68f5d42c32 100644
--- a/lib/Target/ARM/ARM.td
+++ b/lib/Target/ARM/ARM.td
@@ -205,6 +205,13 @@ def FeatureAvoidPartialCPSR : SubtargetFeature<"avoid-partial-cpsr",
"AvoidCPSRPartialUpdate", "true",
"Avoid CPSR partial update for OOO execution">;
+/// Disable +1 predication cost for instructions updating CPSR.
+/// Enabled for Cortex-A57.
+def FeatureCheapPredicableCPSR : SubtargetFeature<"cheap-predicable-cpsr",
+ "CheapPredicableCPSRDef",
+ "true",
+ "Disable +1 predication cost for instructions updating CPSR">;
+
def FeatureAvoidMOVsShOp : SubtargetFeature<"avoid-movs-shop",
"AvoidMOVsShifterOperand", "true",
"Avoid movs instructions with shifter operand">;
@@ -788,12 +795,14 @@ def : ProcNoItin<"cortex-a53", [ARMv8a, ProcA53,
FeatureCRC,
FeatureFPAO]>;
-def : ProcNoItin<"cortex-a57", [ARMv8a, ProcA57,
- FeatureHWDivThumb,
- FeatureHWDivARM,
- FeatureCrypto,
- FeatureCRC,
- FeatureFPAO]>;
+def : ProcessorModel<"cortex-a57", CortexA57Model, [ARMv8a, ProcA57,
+ FeatureHWDivThumb,
+ FeatureHWDivARM,
+ FeatureCrypto,
+ FeatureCRC,
+ FeatureFPAO,
+ FeatureAvoidPartialCPSR,
+ FeatureCheapPredicableCPSR]>;
def : ProcNoItin<"cortex-a72", [ARMv8a, ProcA72,
FeatureHWDivThumb,
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 5c9d589e2625..f8b65573f9cd 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -558,13 +558,68 @@ bool ARMBaseInstrInfo::DefinesPredicate(
return Found;
}
-static bool isCPSRDefined(const MachineInstr *MI) {
- for (const auto &MO : MI->operands())
+bool ARMBaseInstrInfo::isCPSRDefined(const MachineInstr &MI) {
+ for (const auto &MO : MI.operands())
if (MO.isReg() && MO.getReg() == ARM::CPSR && MO.isDef() && !MO.isDead())
return true;
return false;
}
+bool ARMBaseInstrInfo::isAddrMode3OpImm(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Offset = MI.getOperand(Op + 1);
+ return Offset.getReg() != 0;
+}
+
+// Load with negative register offset requires additional 1cyc and +I unit
+// for Cortex A57
+bool ARMBaseInstrInfo::isAddrMode3OpMinusReg(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Offset = MI.getOperand(Op + 1);
+ const MachineOperand &Opc = MI.getOperand(Op + 2);
+ assert(Opc.isImm());
+ assert(Offset.isReg());
+ int64_t OpcImm = Opc.getImm();
+
+ bool isSub = ARM_AM::getAM3Op(OpcImm) == ARM_AM::sub;
+ return (isSub && Offset.getReg() != 0);
+}
+
+bool ARMBaseInstrInfo::isLdstScaledReg(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Opc = MI.getOperand(Op + 2);
+ unsigned OffImm = Opc.getImm();
+ return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
+}
+
+// Load, scaled register offset, not plus LSL2
+bool ARMBaseInstrInfo::isLdstScaledRegNotPlusLsl2(const MachineInstr &MI,
+ unsigned Op) const {
+ const MachineOperand &Opc = MI.getOperand(Op + 2);
+ unsigned OffImm = Opc.getImm();
+
+ bool isAdd = ARM_AM::getAM2Op(OffImm) == ARM_AM::add;
+ unsigned Amt = ARM_AM::getAM2Offset(OffImm);
+ ARM_AM::ShiftOpc ShiftOpc = ARM_AM::getAM2ShiftOpc(OffImm);
+ if (ShiftOpc == ARM_AM::no_shift) return false; // not scaled
+ bool SimpleScaled = (isAdd && ShiftOpc == ARM_AM::lsl && Amt == 2);
+ return !SimpleScaled;
+}
+
+// Minus reg for ldstso addr mode
+bool ARMBaseInstrInfo::isLdstSoMinusReg(const MachineInstr &MI,
+ unsigned Op) const {
+ unsigned OffImm = MI.getOperand(Op + 2).getImm();
+ return ARM_AM::getAM2Op(OffImm) == ARM_AM::sub;
+}
+
+// Load, scaled register offset
+bool ARMBaseInstrInfo::isAm2ScaledReg(const MachineInstr &MI,
+ unsigned Op) const {
+ unsigned OffImm = MI.getOperand(Op + 2).getImm();
+ return ARM_AM::getAM2ShiftOpc(OffImm) != ARM_AM::no_shift;
+}
+
static bool isEligibleForITBlock(const MachineInstr *MI) {
switch (MI->getOpcode()) {
default: return true;
@@ -590,7 +645,7 @@ static bool isEligibleForITBlock(const MachineInstr *MI) {
case ARM::tSUBi3: // SUB (immediate) T1
case ARM::tSUBi8: // SUB (immediate) T2
case ARM::tSUBrr: // SUB (register) T1
- return !isCPSRDefined(MI);
+ return !ARMBaseInstrInfo::isCPSRDefined(*MI);
}
}
@@ -3349,6 +3404,22 @@ ARMBaseInstrInfo::getVLDMDefCycle(const InstrItineraryData *ItinData,
return DefCycle;
}
+bool ARMBaseInstrInfo::isLDMBaseRegInList(const MachineInstr &MI) const {
+ unsigned BaseReg = MI.getOperand(0).getReg();
+ for (unsigned i = 1, sz = MI.getNumOperands(); i < sz; ++i) {
+ const auto &Op = MI.getOperand(i);
+ if (Op.isReg() && Op.getReg() == BaseReg)
+ return true;
+ }
+ return false;
+}
+unsigned
+ARMBaseInstrInfo::getLDMVariableDefsSize(const MachineInstr &MI) const {
+ // ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops
+ // (outs GPR:$wb), (ins GPR:$Rn, pred:$p (2xOp), reglist:$regs, variable_ops)
+ return MI.getNumOperands() + 1 - MI.getDesc().getNumOperands();
+}
+
int
ARMBaseInstrInfo::getLDMDefCycle(const InstrItineraryData *ItinData,
const MCInstrDesc &DefMCID,
@@ -4119,7 +4190,8 @@ unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr &MI) const {
const MCInstrDesc &MCID = MI.getDesc();
- if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) {
+ if (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
+ !Subtarget.cheapPredicableCPSRDef())) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
return 1;
@@ -4148,7 +4220,8 @@ unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
}
const MCInstrDesc &MCID = MI.getDesc();
- if (PredCost && (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR))) {
+ if (PredCost && (MCID.isCall() || (MCID.hasImplicitDefOfPhysReg(ARM::CPSR) &&
+ !Subtarget.cheapPredicableCPSRDef()))) {
// When predicated, CPSR is an additional source operand for CPSR updating
// instructions, this apparently increases their latencies.
*PredCost = 1;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index dd7fe871345a..c52e572786d4 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -159,6 +159,24 @@ public:
bool isPredicable(const MachineInstr &MI) const override;
+ // CPSR defined in instruction
+ static bool isCPSRDefined(const MachineInstr &MI);
+ bool isAddrMode3OpImm(const MachineInstr &MI, unsigned Op) const;
+ bool isAddrMode3OpMinusReg(const MachineInstr &MI, unsigned Op) const;
+
+ // Load, scaled register offset
+ bool isLdstScaledReg(const MachineInstr &MI, unsigned Op) const;
+ // Load, scaled register offset, not plus LSL2
+ bool isLdstScaledRegNotPlusLsl2(const MachineInstr &MI, unsigned Op) const;
+ // Minus reg for ldstso addr mode
+ bool isLdstSoMinusReg(const MachineInstr &MI, unsigned Op) const;
+ // Scaled register offset in address mode 2
+ bool isAm2ScaledReg(const MachineInstr &MI, unsigned Op) const;
+ // Load multiple, base reg in list
+ bool isLDMBaseRegInList(const MachineInstr &MI) const;
+ // get LDM variable defs size
+ unsigned getLDMVariableDefsSize(const MachineInstr &MI) const;
+
/// GetInstSize - Returns the size of the specified MachineInstr.
///
unsigned getInstSizeInBytes(const MachineInstr &MI) const override;
diff --git a/lib/Target/ARM/ARMCallLowering.cpp b/lib/Target/ARM/ARMCallLowering.cpp
index 31a2f499a9a7..a33d025d114e 100644
--- a/lib/Target/ARM/ARMCallLowering.cpp
+++ b/lib/Target/ARM/ARMCallLowering.cpp
@@ -34,7 +34,7 @@ ARMCallLowering::ARMCallLowering(const ARMTargetLowering &TLI)
static bool isSupportedType(const DataLayout &DL, const ARMTargetLowering &TLI,
Type *T) {
- if (T->isArrayTy())
+ if (T->isArrayTy() || T->isStructTy())
return true;
EVT VT = TLI.getValueType(DL, T, true);
@@ -167,8 +167,11 @@ void ARMCallLowering::splitToValueTypes(
if (SplitVTs.size() == 1) {
// Even if there is no splitting to do, we still want to replace the
// original type (e.g. pointer type -> integer).
- SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx),
- OrigArg.Flags, OrigArg.IsFixed);
+ auto Flags = OrigArg.Flags;
+ unsigned OriginalAlignment = DL.getABITypeAlignment(OrigArg.Ty);
+ Flags.setOrigAlign(OriginalAlignment);
+ SplitArgs.emplace_back(OrigArg.Reg, SplitVTs[0].getTypeForEVT(Ctx), Flags,
+ OrigArg.IsFixed);
return;
}
@@ -177,6 +180,10 @@ void ARMCallLowering::splitToValueTypes(
EVT SplitVT = SplitVTs[i];
Type *SplitTy = SplitVT.getTypeForEVT(Ctx);
auto Flags = OrigArg.Flags;
+
+ unsigned OriginalAlignment = DL.getABITypeAlignment(SplitTy);
+ Flags.setOrigAlign(OriginalAlignment);
+
bool NeedsConsecutiveRegisters =
TLI.functionArgumentNeedsConsecutiveRegisters(
SplitTy, F->getCallingConv(), F->isVarArg());
@@ -185,6 +192,7 @@ void ARMCallLowering::splitToValueTypes(
if (i == e - 1)
Flags.setInConsecutiveRegsLast();
}
+
SplitArgs.push_back(
ArgInfo{MRI.createGenericVirtualRegister(getLLTForType(*SplitTy, DL)),
SplitTy, Flags, OrigArg.IsFixed});
diff --git a/lib/Target/ARM/ARMSchedule.td b/lib/Target/ARM/ARMSchedule.td
index ec5b97cba8cd..1c7902520f2d 100644
--- a/lib/Target/ARM/ARMSchedule.td
+++ b/lib/Target/ARM/ARMSchedule.td
@@ -147,6 +147,9 @@ def : PredicateProlog<[{
const ARMBaseInstrInfo *TII =
static_cast<const ARMBaseInstrInfo*>(SchedModel->getInstrInfo());
(void)TII;
+ const ARMSubtarget *STI =
+ static_cast<const ARMSubtarget*>(SchedModel->getSubtargetInfo());
+ (void)STI;
}]>;
def IsPredicatedPred : SchedPredicate<[{TII->isPredicated(*MI)}]>;
@@ -420,3 +423,4 @@ include "ARMScheduleA8.td"
include "ARMScheduleA9.td"
include "ARMScheduleSwift.td"
include "ARMScheduleR52.td"
+include "ARMScheduleA57.td"
diff --git a/lib/Target/ARM/ARMScheduleA57.td b/lib/Target/ARM/ARMScheduleA57.td
new file mode 100644
index 000000000000..525079d12d51
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleA57.td
@@ -0,0 +1,1471 @@
+//=- ARMScheduleA57.td - ARM Cortex-A57 Scheduling Defs -----*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the machine model for ARM Cortex-A57 to support
+// instruction scheduling and other instruction cost heuristics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// *** Common description and scheduling model parameters taken from AArch64 ***
+// The Cortex-A57 is a traditional superscalar microprocessor with a
+// conservative 3-wide in-order stage for decode and dispatch. Combined with the
+// much wider out-of-order issue stage, this produced a need to carefully
+// schedule micro-ops so that all three decoded each cycle are successfully
+// issued as the reservation station(s) simply don't stay occupied for long.
+// Therefore, IssueWidth is set to the narrower of the two at three, while still
+// modeling the machine as out-of-order.
+
+def IsCPSRDefinedPred : SchedPredicate<[{TII->isCPSRDefined(*MI)}]>;
+def IsCPSRDefinedAndPredicatedPred :
+ SchedPredicate<[{TII->isCPSRDefined(*MI) && TII->isPredicated(*MI)}]>;
+
+// Cortex A57 rev. r1p0 or later (false = r0px)
+def IsR1P0AndLaterPred : SchedPredicate<[{false}]>;
+
+// If Addrmode3 contains register offset (not immediate)
+def IsLdrAm3RegOffPred :
+ SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 1)}]>;
+// The same predicate with operand offset 2 and 3:
+def IsLdrAm3RegOffPredX2 :
+ SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 2)}]>;
+def IsLdrAm3RegOffPredX3 :
+ SchedPredicate<[{!TII->isAddrMode3OpImm(*MI, 3)}]>;
+
+// If Addrmode3 contains "minus register"
+def IsLdrAm3NegRegOffPred :
+ SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 1)}]>;
+// The same predicate with operand offset 2 and 3:
+def IsLdrAm3NegRegOffPredX2 :
+ SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 2)}]>;
+def IsLdrAm3NegRegOffPredX3 :
+ SchedPredicate<[{TII->isAddrMode3OpMinusReg(*MI, 3)}]>;
+
+// Load, scaled register offset, not plus LSL2
+def IsLdstsoScaledNotOptimalPredX0 :
+ SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 0)}]>;
+def IsLdstsoScaledNotOptimalPred :
+ SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 1)}]>;
+def IsLdstsoScaledNotOptimalPredX2 :
+ SchedPredicate<[{TII->isLdstScaledRegNotPlusLsl2(*MI, 2)}]>;
+
+// Load, scaled register offset
+def IsLdstsoScaledPred :
+ SchedPredicate<[{TII->isLdstScaledReg(*MI, 1)}]>;
+def IsLdstsoScaledPredX2 :
+ SchedPredicate<[{TII->isLdstScaledReg(*MI, 2)}]>;
+
+def IsLdstsoMinusRegPredX0 :
+ SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 0)}]>;
+def IsLdstsoMinusRegPred :
+ SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 1)}]>;
+def IsLdstsoMinusRegPredX2 :
+ SchedPredicate<[{TII->isLdstSoMinusReg(*MI, 2)}]>;
+
+// Load, scaled register offset
+def IsLdrAm2ScaledPred :
+ SchedPredicate<[{TII->isAm2ScaledReg(*MI, 1)}]>;
+
+// LDM, base reg in list
+def IsLdmBaseRegInList :
+ SchedPredicate<[{TII->isLDMBaseRegInList(*MI)}]>;
+
+class A57WriteLMOpsListType<list<SchedWriteRes> writes> {
+ list <SchedWriteRes> Writes = writes;
+ SchedMachineModel SchedModel = ?;
+}
+
+// *** Common description and scheduling model parameters taken from AArch64 ***
+// (AArch64SchedA57.td)
+def CortexA57Model : SchedMachineModel {
+ let IssueWidth = 3; // 3-way decode and dispatch
+ let MicroOpBufferSize = 128; // 128 micro-op re-order buffer
+ let LoadLatency = 4; // Optimistic load latency
+ let MispredictPenalty = 16; // Fetch + Decode/Rename/Dispatch + Branch
+
+ // Enable partial & runtime unrolling.
+ let LoopMicroOpBufferSize = 16;
+ let CompleteModel = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Define each kind of processor resource and number available on Cortex-A57.
+// Cortex A-57 has 8 pipelines that each has its own 8-entry queue where
+// micro-ops wait for their operands and then issue out-of-order.
+
+def A57UnitB : ProcResource<1>; // Type B micro-ops
+def A57UnitI : ProcResource<2>; // Type I micro-ops
+def A57UnitM : ProcResource<1>; // Type M micro-ops
+def A57UnitL : ProcResource<1>; // Type L micro-ops
+def A57UnitS : ProcResource<1>; // Type S micro-ops
+
+def A57UnitX : ProcResource<1>; // Type X micro-ops (F1)
+def A57UnitW : ProcResource<1>; // Type W micro-ops (F0)
+
+let SchedModel = CortexA57Model in {
+ def A57UnitV : ProcResGroup<[A57UnitX, A57UnitW]>; // Type V micro-ops
+}
+
+let SchedModel = CortexA57Model in {
+
+//===----------------------------------------------------------------------===//
+// Define customized scheduler read/write types specific to the Cortex-A57.
+
+include "ARMScheduleA57WriteRes.td"
+
+// To have "CompleteModel = 1", support of pseudos and special instructions
+def : InstRW<[WriteNoop], (instregex "(t)?BKPT$", "(t2)?CDP(2)?$",
+ "(t2)?CLREX$", "CONSTPOOL_ENTRY$", "COPY_STRUCT_BYVAL_I32$",
+ "(t2)?CPS[123]p$", "(t2)?DBG$", "(t2)?DMB$", "(t2)?DSB$", "ERET$",
+ "(t2|t)?HINT$", "(t)?HLT$", "(t2)?HVC$", "(t2)?ISB$", "ITasm$",
+ "(t2)?RFE(DA|DB|IA|IB)", "(t)?SETEND", "(t2)?SETPAN", "(t2)?SMC", "SPACE",
+ "(t2)?SRS(DA|DB|IA|IB)", "SWP(B)?", "t?TRAP", "UDF$", "t2DCPS", "t2SG",
+ "t2TT", "tCPS", "CMP_SWAP", "t?SVC", "t2IT", "CompilerBarrier")>;
+
+def : InstRW<[WriteNoop], (instregex "VMRS", "VMSR", "FMSTAT")>;
+
+// Specific memory instrs
+def : InstRW<[WriteNoop, WriteNoop], (instregex "(t2)?LDA", "(t2)?LDC", "(t2)?STC",
+ "(t2)?STL", "(t2)?LDREX", "(t2)?STREX", "MEMCPY")>;
+
+// coprocessor moves
+def : InstRW<[WriteNoop, WriteNoop], (instregex
+ "(t2)?MCR(2|R|R2)?$", "(t2)?MRC(2)?$",
+ "(t2)?MRRC(2)?$", "(t2)?MRS(banked|sys|_AR|_M|sys_AR)?$",
+ "(t2)?MSR(banked|i|_AR|_M)?$")>;
+
+// Deprecated instructions
+def : InstRW<[WriteNoop], (instregex "FLDM", "FSTM")>;
+
+// Pseudos
+def : InstRW<[WriteNoop], (instregex "(t2)?ABS$",
+ "(t)?ADJCALLSTACKDOWN$", "(t)?ADJCALLSTACKUP$", "(t2|t)?Int_eh_sjlj",
+ "tLDRpci_pic", "t2SUBS_PC_LR",
+ "JUMPTABLE", "tInt_WIN_eh_sjlj_longjmp",
+ "VLD(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
+ "VLD(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
+ "VST(1|2)LN(d|q)(WB_fixed_|WB_register_)?Asm",
+ "VST(3|4)(DUP|LN)?(d|q)(WB_fixed_|WB_register_)?Asm",
+ "WIN__CHKSTK", "WIN__DBZCHK")>;
+
+// Miscellaneous
+// -----------------------------------------------------------------------------
+
+def : InstRW<[A57Write_1cyc_1I], (instrs COPY)>;
+
+// --- 3.2 Branch Instructions ---
+// B, BX, BL, BLX (imm, reg != LR, reg == LR), CBZ, CBNZ
+
+def : InstRW<[A57Write_1cyc_1B], (instregex "(t2|t)?B$", "t?BX", "(t2|t)?Bcc$",
+ "t?TAILJMP(d|r)", "TCRETURN(d|r)i", "tBfar", "tCBN?Z")>;
+def : InstRW<[A57Write_1cyc_1B_1I],
+ (instregex "t?BL$", "BL_pred$", "t?BLXi", "t?TPsoft")>;
+def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BLX", "tBLX(NS)?r")>;
+// Pseudos
+def : InstRW<[A57Write_2cyc_1B_1I], (instregex "BCCi64", "BCCZi64")>;
+def : InstRW<[A57Write_3cyc_1B_1I], (instregex "BR_JTadd", "t?BR_JTr",
+ "t2BR_JT", "t2BXJ", "(t2)?TB(B|H)(_JT)?$", "tBRIND")>;
+def : InstRW<[A57Write_6cyc_1B_1L], (instregex "BR_JTm")>;
+
+// --- 3.3 Arithmetic and Logical Instructions ---
+// ADD{S}, ADC{S}, ADR, AND{S}, BIC{S}, CMN, CMP, EOR{S}, ORN{S}, ORR{S},
+// RSB{S}, RSC{S}, SUB{S}, SBC{S}, TEQ, TST
+
+def : InstRW<[A57Write_1cyc_1I], (instregex "tADDframe")>;
+
+// shift by register, conditional or unconditional
+// TODO: according to the doc, conditional uses I0/I1, unconditional uses M
+// Why more complex instruction uses more simple pipeline?
+// May be an error in doc.
+def A57WriteALUsi : SchedWriteVariant<[
+ // lsl #2, lsl #1, or lsr #1.
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1M]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def A57WriteALUsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def A57WriteALUSsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def A57ReadALUsr : SchedReadVariant<[
+ SchedVar<IsPredicatedPred, [ReadDefault]>,
+ SchedVar<NoSchedPred, [ReadDefault]>
+]>;
+def : SchedAlias<WriteALUsi, A57WriteALUsi>;
+def : SchedAlias<WriteALUsr, A57WriteALUsr>;
+def : SchedAlias<WriteALUSsr, A57WriteALUSsr>;
+def : SchedAlias<ReadALUsr, A57ReadALUsr>;
+
+def A57WriteCMPsr : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def : SchedAlias<WriteCMP, A57Write_1cyc_1I>;
+def : SchedAlias<WriteCMPsi, A57Write_2cyc_1M>;
+def : SchedAlias<WriteCMPsr, A57WriteCMPsr>;
+
+// --- 3.4 Move and Shift Instructions ---
+// Move, basic
+// MOV{S}, MOVW, MVN{S}
+def : InstRW<[A57Write_1cyc_1I], (instregex "MOV(r|i|i16|r_TC)",
+ "(t2)?MVN(CC)?(r|i)", "BMOVPCB_CALL", "BMOVPCRX_CALL",
+ "MOVCC(r|i|i16|i32imm)", "tMOV", "tMVN")>;
+
+// Move, shift by immed, setflags/no setflags
+// (ASR, LSL, LSR, ROR, RRX)=MOVsi, MVN
+// setflags = isCPSRDefined
+def A57WriteMOVsi : SchedWriteVariant<[
+ SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
+]>;
+def : InstRW<[A57WriteMOVsi], (instregex "MOV(CC)?si", "MVNsi",
+ "ASRi", "(t2|t)ASRri", "LSRi", "(t2|t)LSRri", "LSLi", "(t2|t)LSLri", "RORi",
+ "(t2|t)RORri", "(t2)?RRX", "t2MOV", "tROR")>;
+
+// shift by register, conditional or unconditional, setflags/no setflags
+def A57WriteMOVsr : SchedWriteVariant<[
+ SchedVar<IsCPSRDefinedAndPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<IsCPSRDefinedPred, [A57Write_2cyc_1M]>,
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
+]>;
+def : InstRW<[A57WriteMOVsr], (instregex "MOV(CC)?sr", "MVNsr", "t2MVNs",
+ "ASRr", "(t2|t)ASRrr", "LSRr", "(t2|t)LSRrr", "LSLr", "(t2|t)?LSLrr", "RORr",
+ "(t2|t)RORrr")>;
+
+// Move, top
+// MOVT - A57Write_2cyc_1M for r0px, A57Write_1cyc_1I for r1p0 and later
+def A57WriteMOVT : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_1cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1M]>
+]>;
+def : InstRW<[A57WriteMOVT], (instregex "MOVTi16")>;
+
+def A57WriteI2pc :
+ WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_1cyc_1I]>;
+def A57WriteI2ld :
+ WriteSequence<[A57Write_1cyc_1I, A57Write_1cyc_1I, A57Write_4cyc_1L]>;
+def : InstRW< [A57WriteI2pc], (instregex "MOV_ga_pcrel")>;
+def : InstRW< [A57WriteI2ld], (instregex "MOV_ga_pcrel_ldr")>;
+
+// +2cyc for branch forms
+def : InstRW<[A57Write_3cyc_1I], (instregex "MOVPC(LR|RX)")>;
+
+// --- 3.5 Divide and Multiply Instructions ---
+// Divide: SDIV, UDIV
+// latency from documentration: 4 ­‐ 20, maximum taken
+def : SchedAlias<WriteDIV, A57Write_20cyc_1M>;
+// Multiply: tMul not bound to common WriteRes types
+def : InstRW<[A57Write_3cyc_1M], (instregex "tMUL")>;
+def : SchedAlias<WriteMUL16, A57Write_3cyc_1M>;
+def : SchedAlias<WriteMUL32, A57Write_3cyc_1M>;
+def : ReadAdvance<ReadMUL, 0>;
+
+// Multiply accumulate: MLA, MLS, SMLABB, SMLABT, SMLATB, SMLATT, SMLAWB,
+// SMLAWT, SMLAD{X}, SMLSD{X}, SMMLA{R}, SMMLS{R}
+// Multiply-accumulate pipelines support late-forwarding of accumulate operands
+// from similar μops, allowing a typical sequence of multiply-accumulate μops
+// to issue one every 1 cycle (sched advance = 2).
+def A57WriteMLA : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57WriteMLAL : SchedWriteRes<[A57UnitM]> { let Latency = 4; }
+def A57ReadMLA : SchedReadAdvance<2, [A57WriteMLA, A57WriteMLAL]>;
+
+def : SchedAlias<WriteMAC16, A57WriteMLA>;
+def : SchedAlias<WriteMAC32, A57WriteMLA>;
+def : SchedAlias<ReadMAC, A57ReadMLA>;
+
+def : SchedAlias<WriteMAC64Lo, A57WriteMLAL>;
+def : SchedAlias<WriteMAC64Hi, A57WriteMLAL>;
+
+// Multiply long: SMULL, UMULL
+def : SchedAlias<WriteMUL64Lo, A57Write_4cyc_1M>;
+def : SchedAlias<WriteMUL64Hi, A57Write_4cyc_1M>;
+
+// --- 3.6 Saturating and Parallel Arithmetic Instructions ---
+// Parallel arith
+// SADD16, SADD8, SSUB16, SSUB8, UADD16, UADD8, USUB16, USUB8
+// Conditional GE-setting instructions require three extra μops
+// and two additional cycles to conditionally update the GE field.
+def A57WriteParArith : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_4cyc_1I_1M]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1I_1M]>
+]>;
+def : InstRW< [A57WriteParArith], (instregex
+ "(t2)?SADD(16|8)", "(t2)?SSUB(16|8)",
+ "(t2)?UADD(16|8)", "(t2)?USUB(16|8)")>;
+
+// Parallel arith with exchange: SASX, SSAX, UASX, USAX
+def A57WriteParArithExch : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_5cyc_1I_1M]>,
+ SchedVar<NoSchedPred, [A57Write_3cyc_1I_1M]>
+]>;
+def : InstRW<[A57WriteParArithExch],
+ (instregex "(t2)?SASX", "(t2)?SSAX", "(t2)?UASX", "(t2)?USAX")>;
+
+// Parallel halving arith
+// SHADD16, SHADD8, SHSUB16, SHSUB8, UHADD16, UHADD8, UHSUB16, UHSUB8
+def : InstRW<[A57Write_2cyc_1M], (instregex
+ "(t2)?SHADD(16|8)", "(t2)?SHSUB(16|8)",
+ "(t2)?UHADD(16|8)", "(t2)?UHSUB(16|8)")>;
+
+// Parallel halving arith with exchange
+// SHASX, SHSAX, UHASX, UHSAX
+def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?SHASX", "(t2)?SHSAX",
+ "(t2)?UHASX", "(t2)?UHSAX")>;
+
+// Parallel saturating arith
+// QADD16, QADD8, QSUB16, QSUB8, UQADD16, UQADD8, UQSUB16, UQSUB8
+def : InstRW<[A57Write_2cyc_1M], (instregex "QADD(16|8)", "QSUB(16|8)",
+ "UQADD(16|8)", "UQSUB(16|8)", "t2(U?)QADD", "t2(U?)QSUB")>;
+
+// Parallel saturating arith with exchange
+// QASX, QSAX, UQASX, UQSAX
+def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QASX", "(t2)?QSAX",
+ "(t2)?UQASX", "(t2)?UQSAX")>;
+
+// Saturate: SSAT, SSAT16, USAT, USAT16
+def : InstRW<[A57Write_2cyc_1M],
+ (instregex "(t2)?SSAT(16)?", "(t2)?USAT(16)?")>;
+
+// Saturating arith: QADD, QSUB
+def : InstRW<[A57Write_2cyc_1M], (instregex "QADD$", "QSUB$")>;
+
+// Saturating doubling arith: QDADD, QDSUB
+def : InstRW<[A57Write_3cyc_1I_1M], (instregex "(t2)?QDADD", "(t2)?QDSUB")>;
+
+// --- 3.7 Miscellaneous Data-Processing Instructions ---
+// Bit field extract: SBFX, UBFX
+def : InstRW<[A57Write_1cyc_1I], (instregex "(t2)?SBFX", "(t2)?UBFX")>;
+
+// Bit field insert/clear: BFI, BFC
+def : InstRW<[A57Write_2cyc_1M], (instregex "(t2)?BFI", "(t2)?BFC")>;
+
+// Select bytes, conditional/unconditional
+def A57WriteSEL : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_2cyc_1I]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1I]>
+]>;
+def : InstRW<[A57WriteSEL], (instregex "(t2)?SEL")>;
+
+// Sign/zero extend, normal: SXTB, SXTH, UXTB, UXTH
+def : InstRW<[A57Write_1cyc_1I],
+ (instregex "(t2|t)?SXT(B|H)$", "(t2|t)?UXT(B|H)$")>;
+
+// Sign/zero extend and add, normal: SXTAB, SXTAH, UXTAB, UXTAH
+def : InstRW<[A57Write_2cyc_1M],
+ (instregex "(t2)?SXTA(B|H)$", "(t2)?UXTA(B|H)$")>;
+
+// Sign/zero extend and add, parallel: SXTAB16, UXTAB16
+def : InstRW<[A57Write_4cyc_1M], (instregex "(t2)?SXTAB16", "(t2)?UXTAB16")>;
+
+// Sum of absolute differences: USAD8, USADA8
+def : InstRW<[A57Write_3cyc_1M], (instregex "(t2)?USAD8", "(t2)?USADA8")>;
+
+// --- 3.8 Load Instructions ---
+
+// Load, immed offset
+// LDR and LDRB have LDRi12 and LDRBi12 forms for immediate
+def : InstRW<[A57Write_4cyc_1L], (instregex "LDRi12", "LDRBi12",
+ "LDRcp", "(t2|t)?LDRConstPool", "LDRLIT_ga_(pcrel|abs)",
+ "PICLDR", "tLDR")>;
+
+def : InstRW<[A57Write_4cyc_1L],
+ (instregex "t2LDRS?(B|H)?(pcrel|T|i8|i12|pci|pci_pic|s)?$")>;
+
+// For "Load, register offset, minus" we need +1cyc, +1I
+def A57WriteLdrAm3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPred, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WriteLdrAm3], (instregex "LDR(H|SH|SB)$")>;
+def A57WriteLdrAm3X2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WriteLdrAm3X2, A57WriteLdrAm3X2], (instregex "LDRD$")>;
+def : InstRW<[A57Write_4cyc_1L, A57Write_4cyc_1L], (instregex "t2LDRDi8")>;
+
+def A57WriteLdrAmLDSTSO : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_5cyc_1I_1L]>,
+ SchedVar<IsLdstsoMinusRegPred, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WriteLdrAmLDSTSO], (instregex "LDRrs", "LDRBrs")>;
+
+def A57WrBackOne : SchedWriteRes<[]> {
+ let Latency = 1;
+ let NumMicroOps = 0;
+}
+def A57WrBackTwo : SchedWriteRes<[]> {
+ let Latency = 2;
+ let NumMicroOps = 0;
+}
+def A57WrBackThree : SchedWriteRes<[]> {
+ let Latency = 3;
+ let NumMicroOps = 0;
+}
+
+// --- LDR pre-indexed ---
+// Load, immed pre-indexed (4 cyc for load result, 1 cyc for Base update)
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR_PRE_IMM",
+ "LDRB_PRE_IMM", "t2LDRB_PRE")>;
+
+// Load, register pre-indexed (4 cyc for load result, 2 cyc for Base update)
+// (5 cyc load result for not-lsl2 scaled)
+def A57WriteLdrAmLDSTSOPre : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
+]>;
+def : InstRW<[A57WriteLdrAmLDSTSOPre, A57WrBackTwo],
+ (instregex "LDR_PRE_REG", "LDRB_PRE_REG")>;
+
+def A57WriteLdrAm3PreWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX2, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57Write_4cyc_1L, A57WriteLdrAm3PreWrBack],
+ (instregex "LDR(H|SH|SB)_PRE")>;
+def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
+ (instregex "t2LDR(H|SH|SB)?_PRE")>;
+
+// LDRD pre-indexed: 5(2) cyc for reg, 4(1) cyc for imm.
+def A57WriteLdrDAm3Pre : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX3, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
+]>;
+def A57WriteLdrDAm3PreWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteLdrDAm3Pre, A57WriteLdrDAm3Pre, A57WriteLdrDAm3PreWrBack],
+ (instregex "LDRD_PRE")>;
+def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
+ (instregex "t2LDRD_PRE")>;
+
+// --- LDR post-indexed ---
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackOne], (instregex "LDR(T?)_POST_IMM",
+ "LDRB(T?)_POST_IMM", "LDR(SB|H|SH)Ti", "t2LDRB_POST")>;
+
+def A57WriteLdrAm3PostWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPred, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57Write_4cyc_1L_1I, A57WriteLdrAm3PostWrBack],
+ (instregex "LDR(H|SH|SB)_POST")>;
+def : InstRW<[A57Write_4cyc_1L, A57WrBackOne],
+ (instregex "t2LDR(H|SH|SB)?_POST")>;
+
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR_POST_REG",
+ "LDRB_POST_REG", "LDR(B?)T_POST$")>;
+
+def A57WriteLdrTRegPost : SchedWriteVariant<[
+ SchedVar<IsLdrAm2ScaledPred, [A57Write_4cyc_1I_1L_1M]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L_1I]>
+]>;
+def A57WriteLdrTRegPostWrBack : SchedWriteVariant<[
+ SchedVar<IsLdrAm2ScaledPred, [A57WrBackThree]>,
+ SchedVar<NoSchedPred, [A57WrBackTwo]>
+]>;
+// 4(3) "I0/I1,L,M" for scaled register, otherwise 4(2) "I0/I1,L"
+def : InstRW<[A57WriteLdrTRegPost, A57WriteLdrTRegPostWrBack],
+ (instregex "LDRT_POST_REG", "LDRBT_POST_REG")>;
+
+def : InstRW<[A57Write_4cyc_1L_1I, A57WrBackTwo], (instregex "LDR(SB|H|SH)Tr")>;
+
+def A57WriteLdrAm3PostWrBackX3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3RegOffPredX3, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+// LDRD post-indexed: 4(2) cyc for reg, 4(1) cyc for imm.
+def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
+ A57WriteLdrAm3PostWrBackX3], (instregex "LDRD_POST")>;
+def : InstRW<[A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I, A57WrBackOne],
+ (instregex "t2LDRD_POST")>;
+
+// --- Preload instructions ---
+// Preload, immed offset
+def : InstRW<[A57Write_4cyc_1L], (instregex "(t2)?PLDi12", "(t2)?PLDWi12",
+ "t2PLDW?(i8|pci|s)", "(t2)?PLI")>;
+
+// Preload, register offset,
+// 5cyc "I0/I1,L" for minus reg or scaled not plus lsl2
+// otherwise 4cyc "L"
+def A57WritePLD : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPredX0, [A57Write_5cyc_1I_1L]>,
+ SchedVar<IsLdstsoMinusRegPredX0, [A57Write_5cyc_1I_1L]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1L]>
+]>;
+def : InstRW<[A57WritePLD], (instregex "PLDrs", "PLDWrs")>;
+
+// --- Load multiple instructions ---
+foreach NumAddr = 1-8 in {
+ def A57LMAddrPred#NumAddr :
+ SchedPredicate<"(TII->getLDMVariableDefsSize(*MI)+1)/2 == "#NumAddr>;
+}
+
+def A57LDMOpsListNoregin : A57WriteLMOpsListType<
+ [A57Write_3cyc_1L, A57Write_3cyc_1L,
+ A57Write_4cyc_1L, A57Write_4cyc_1L,
+ A57Write_5cyc_1L, A57Write_5cyc_1L,
+ A57Write_6cyc_1L, A57Write_6cyc_1L,
+ A57Write_7cyc_1L, A57Write_7cyc_1L,
+ A57Write_8cyc_1L, A57Write_8cyc_1L,
+ A57Write_9cyc_1L, A57Write_9cyc_1L,
+ A57Write_10cyc_1L, A57Write_10cyc_1L]>;
+def A57WriteLDMnoreginlist : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57LDMOpsListNoregin.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57LDMOpsListNoregin.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57LDMOpsListNoregin.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57LDMOpsListNoregin.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57LDMOpsListNoregin.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57LDMOpsListNoregin.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57LDMOpsListNoregin.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57LDMOpsListNoregin.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57LDMOpsListNoregin.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57LDMOpsListRegin : A57WriteLMOpsListType<
+ [A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
+ A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
+ A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
+ A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
+ A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
+ A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I]>;
+def A57WriteLDMreginlist : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57LDMOpsListRegin.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57LDMOpsListRegin.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57LDMOpsListRegin.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57LDMOpsListRegin.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57LDMOpsListRegin.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57LDMOpsListRegin.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57LDMOpsListRegin.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57LDMOpsListRegin.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57LDMOpsListRegin.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57LDMOpsList_Upd : A57WriteLMOpsListType<
+ [A57WrBackOne,
+ A57Write_3cyc_1L_1I, A57Write_3cyc_1L_1I,
+ A57Write_4cyc_1L_1I, A57Write_4cyc_1L_1I,
+ A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
+ A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
+ A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
+ A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I]>;
+def A57WriteLDM_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57LDMOpsList_Upd.Writes[0-2]>,
+ SchedVar<A57LMAddrPred2, A57LDMOpsList_Upd.Writes[0-4]>,
+ SchedVar<A57LMAddrPred3, A57LDMOpsList_Upd.Writes[0-6]>,
+ SchedVar<A57LMAddrPred4, A57LDMOpsList_Upd.Writes[0-8]>,
+ SchedVar<A57LMAddrPred5, A57LDMOpsList_Upd.Writes[0-10]>,
+ SchedVar<A57LMAddrPred6, A57LDMOpsList_Upd.Writes[0-12]>,
+ SchedVar<A57LMAddrPred7, A57LDMOpsList_Upd.Writes[0-14]>,
+ SchedVar<A57LMAddrPred8, A57LDMOpsList_Upd.Writes[0-16]>,
+ SchedVar<NoSchedPred, A57LDMOpsList_Upd.Writes[0-16]>
+]> { let Variadic=1; }
+
+def A57WriteLDM : SchedWriteVariant<[
+ SchedVar<IsLdmBaseRegInList, [A57WriteLDMreginlist]>,
+ SchedVar<NoSchedPred, [A57WriteLDMnoreginlist]>
+]> { let Variadic=1; }
+
+def : InstRW<[A57WriteLDM], (instregex "(t|t2|sys)?LDM(IA|DA|DB|IB)$")>;
+
+// TODO: no writeback latency defined in documentation (implemented as 1 cyc)
+def : InstRW<[A57WriteLDM_Upd],
+ (instregex "(t|t2|sys)?LDM(IA_UPD|DA_UPD|DB_UPD|IB_UPD|IA_RET)", "tPOP")>;
+
+// --- 3.9 Store Instructions ---
+
+// Store, immed offset
+def : InstRW<[A57Write_1cyc_1S], (instregex "STRi12", "STRBi12", "PICSTR",
+ "t2STR(B?)(T|i12|i8|s)", "t2STRDi8", "t2STRH(i12|i8|s)", "tSTR")>;
+
+// Store, register offset
+// For minus or for not plus lsl2 scaled we need 3cyc "I0/I1, S",
+// otherwise 1cyc S.
+def A57WriteStrAmLDSTSO : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPred, [A57Write_3cyc_1I_1S]>,
+ SchedVar<IsLdstsoMinusRegPred, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
+]>;
+def : InstRW<[A57WriteStrAmLDSTSO], (instregex "STRrs", "STRBrs")>;
+
+// STRH,STRD: 3cyc "I0/I1, S" for minus reg, 1cyc S for imm or for plus reg.
+def A57WriteStrAm3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPred, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
+]>;
+def : InstRW<[A57WriteStrAm3], (instregex "STRH$")>;
+def A57WriteStrAm3X2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S]>
+]>;
+def : InstRW<[A57WriteStrAm3X2], (instregex "STRD$")>;
+
+// Store, immed pre-indexed (1cyc "S, I0/I1", 1cyc writeback)
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR_PRE_IMM",
+ "STRB_PRE_IMM", "STR(B)?(r|i)_preidx", "(t2)?STRH_(preidx|PRE)",
+ "t2STR(B?)_(PRE|preidx)", "t2STRD_PRE")>;
+
+// Store, register pre-indexed:
+// 1(1) "S, I0/I1" for plus reg
+// 3(2) "I0/I1, S" for minus reg
+// 1(2) "S, M" for scaled plus lsl2
+// 3(2) "I0/I1, S" for other scaled
+def A57WriteStrAmLDSTSOPre : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledNotOptimalPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<IsLdstsoMinusRegPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<IsLdstsoScaledPredX2, [A57Write_1cyc_1S_1M]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
+]>;
+def A57WriteStrAmLDSTSOPreWrBack : SchedWriteVariant<[
+ SchedVar<IsLdstsoScaledPredX2, [A57WrBackTwo]>,
+ SchedVar<IsLdstsoMinusRegPredX2, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteStrAmLDSTSOPreWrBack, A57WriteStrAmLDSTSOPre],
+ (instregex "STR_PRE_REG", "STRB_PRE_REG")>;
+
+// pre-indexed STRH/STRD (STRH_PRE, STRD_PRE)
+// 1(1) "S, I0/I1" for imm or reg plus
+// 3(2) "I0/I1, S" for reg minus
+def A57WriteStrAm3PreX2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
+]>;
+def A57WriteStrAm3PreWrBackX2 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX2, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteStrAm3PreWrBackX2, A57WriteStrAm3PreX2],
+ (instregex "STRH_PRE")>;
+
+def A57WriteStrAm3PreX3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX3, [A57Write_3cyc_1I_1S]>,
+ SchedVar<NoSchedPred, [A57Write_1cyc_1S_1I]>
+]>;
+def A57WriteStrAm3PreWrBackX3 : SchedWriteVariant<[
+ SchedVar<IsLdrAm3NegRegOffPredX3, [A57WrBackTwo]>,
+ SchedVar<NoSchedPred, [A57WrBackOne]>
+]>;
+def : InstRW<[A57WriteStrAm3PreWrBackX3, A57WriteStrAm3PreX3],
+ (instregex "STRD_PRE")>;
+
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I], (instregex "STR(T?)_POST_IMM",
+ "STRB(T?)_POST_IMM", "t2STR(B?)_POST")>;
+
+// 1(2) "S, M" for STR/STRB register post-indexed (both scaled or not)
+def : InstRW<[A57WrBackTwo, A57Write_1cyc_1S_1M], (instregex "STR(T?)_POST_REG",
+ "STRB(T?)_POST_REG", "STR(B?)T_POST$")>;
+
+// post-indexed STRH/STRD(STRH_POST, STRD_POST), STRHTi, STRHTr
+// 1(1) "S, I0/I1" both for reg or imm
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
+ (instregex "(t2)?STR(H|D)_POST", "STRHT(i|r)", "t2STRHT")>;
+
+// --- Store multiple instructions ---
+// TODO: no writeback latency defined in documentation
+def A57WriteSTM : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
+]>;
+def A57WriteSTM_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
+]>;
+
+def : InstRW<[A57WriteSTM], (instregex "(t2|sys|t)?STM(IA|DA|DB|IB)$")>;
+def : InstRW<[A57WrBackOne, A57WriteSTM_Upd],
+ (instregex "(t2|sys|t)?STM(IA_UPD|DA_UPD|DB_UPD|IB_UPD)", "tPUSH")>;
+
+// --- 3.10 FP Data Processing Instructions ---
+def : SchedAlias<WriteFPALU32, A57Write_5cyc_1V>;
+def : SchedAlias<WriteFPALU64, A57Write_5cyc_1V>;
+
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(S|D|H)")>;
+
+// fp compare - 3cyc F1 for unconditional, 6cyc "F0/F1, F1" for conditional
+def A57WriteVcmp : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57Write_6cyc_1V_1X]>,
+ SchedVar<NoSchedPred, [A57Write_3cyc_1X]>
+]>;
+def : InstRW<[A57WriteVcmp],
+ (instregex "VCMP(D|S|H|ZD|ZS|ZH)$", "VCMPE(D|S|H|ZD|ZS|ZH)")>;
+
+// fp convert
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "VCVT(A|N|P|M)(SH|UH|SS|US|SD|UD)", "VCVT(BDH|THD|TDH)")>;
+
+def : SchedAlias<WriteFPCVT, A57Write_5cyc_1V>;
+
+// FP round to integral
+def : InstRW<[A57Write_5cyc_1V], (instregex "VRINT(A|N|P|M|Z|R|X)(H|S|D)$")>;
+
+// FP divide, FP square root
+def : SchedAlias<WriteFPDIV32, A57Write_17cyc_1W>;
+def : SchedAlias<WriteFPDIV64, A57Write_32cyc_1W>;
+def : SchedAlias<WriteFPSQRT32, A57Write_17cyc_1W>;
+def : SchedAlias<WriteFPSQRT64, A57Write_32cyc_1W>;
+
+// FP max/min
+def : InstRW<[A57Write_5cyc_1V], (instregex "VMAX", "VMIN")>;
+
+// FP multiply-accumulate pipelines support late forwarding of the result
+// from FP multiply μops to the accumulate operands of an
+// FP multiply-accumulate μop. The latter can potentially be issued 1 cycle
+// after the FP multiply μop has been issued
+// FP multiply, FZ
+def A57WriteVMUL : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+
+def : SchedAlias<WriteFPMUL32, A57WriteVMUL>;
+def : SchedAlias<WriteFPMUL64, A57WriteVMUL>;
+def : ReadAdvance<ReadFPMUL, 0>;
+
+// FP multiply accumulate, FZ: 9cyc "F0/F1" or 4 cyc for sequenced accumulate
+// VFMA, VFMS, VFNMA, VFNMS, VMLA, VMLS, VNMLA, VNMLS
+def A57WriteVFMA : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+
+// VFMA takes 9 cyc for common case and 4 cyc for VFMA->VFMA chain (5 read adv.)
+// VMUL takes 5 cyc for common case and 1 cyc for VMUL->VFMA chain (4 read adv.)
+// Currently, there is no way to define different read advances for VFMA operand
+// from VFMA or from VMUL, so there will be 5 read advance.
+// Zero latency (instead of one) for VMUL->VFMA shouldn't break something.
+// The same situation with ASIMD VMUL/VFMA instructions
+// def A57ReadVFMA : SchedRead;
+// def : ReadAdvance<A57ReadVFMA, 5, [A57WriteVFMA]>;
+// def : ReadAdvance<A57ReadVFMA, 4, [A57WriteVMUL]>;
+def A57ReadVFMA5 : SchedReadAdvance<5, [A57WriteVFMA, A57WriteVMUL]>;
+
+def : SchedAlias<WriteFPMAC32, A57WriteVFMA>;
+def : SchedAlias<WriteFPMAC64, A57WriteVFMA>;
+def : SchedAlias<ReadFPMAC, A57ReadVFMA5>;
+
+def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VSEL")>;
+
+// --- 3.11 FP Miscellaneous Instructions ---
+// VMOV: 3cyc "F0/F1" for imm/reg
+def : InstRW<[A57Write_3cyc_1V], (instregex "FCONST(D|S|H)")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VMOV(D|S|H)(cc)?$")>;
+
+// 5cyc L for FP transfer, vfp to core reg,
+// 5cyc L for FP transfer, core reg to vfp
+def : SchedAlias<WriteFPMOV, A57Write_5cyc_1L>;
+// VMOVRRS/VMOVRRD in common code declared with one WriteFPMOV (instead of 2).
+def : InstRW<[A57Write_5cyc_1L, A57Write_5cyc_1L], (instregex "VMOV(RRS|RRD)")>;
+
+// 8cyc "L,F0/F1" for FP transfer, core reg to upper or lower half of vfp D-reg
+def : InstRW<[A57Write_8cyc_1L_1I], (instregex "VMOVDRR")>;
+
+// --- 3.12 FP Load Instructions ---
+def : InstRW<[A57Write_5cyc_1L], (instregex "VLDR(D|S|H)")>;
+
+def : InstRW<[A57Write_5cyc_1L], (instregex "VLDMQIA$")>;
+
+// FP load multiple (VLDM)
+
+def A57VLDMOpsListUncond : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L, A57Write_5cyc_1L,
+ A57Write_6cyc_1L, A57Write_6cyc_1L,
+ A57Write_7cyc_1L, A57Write_7cyc_1L,
+ A57Write_8cyc_1L, A57Write_8cyc_1L,
+ A57Write_9cyc_1L, A57Write_9cyc_1L,
+ A57Write_10cyc_1L, A57Write_10cyc_1L,
+ A57Write_11cyc_1L, A57Write_11cyc_1L,
+ A57Write_12cyc_1L, A57Write_12cyc_1L]>;
+def A57WriteVLDMuncond : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListUncond.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57VLDMOpsListCond : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L, A57Write_6cyc_1L,
+ A57Write_7cyc_1L, A57Write_8cyc_1L,
+ A57Write_9cyc_1L, A57Write_10cyc_1L,
+ A57Write_11cyc_1L, A57Write_12cyc_1L,
+ A57Write_13cyc_1L, A57Write_14cyc_1L,
+ A57Write_15cyc_1L, A57Write_16cyc_1L,
+ A57Write_17cyc_1L, A57Write_18cyc_1L,
+ A57Write_19cyc_1L, A57Write_20cyc_1L]>;
+def A57WriteVLDMcond : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListCond.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListCond.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListCond.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListCond.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListCond.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListCond.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListCond.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListCond.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListCond.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57WriteVLDM : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57WriteVLDMcond]>,
+ SchedVar<NoSchedPred, [A57WriteVLDMuncond]>
+]> { let Variadic=1; }
+
+def : InstRW<[A57WriteVLDM], (instregex "VLDM(DIA|SIA)$")>;
+
+def A57VLDMOpsListUncond_Upd : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L_1I, A57Write_5cyc_1L_1I,
+ A57Write_6cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_7cyc_1L_1I,
+ A57Write_8cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_9cyc_1L_1I,
+ A57Write_10cyc_1L_1I, A57Write_10cyc_1L_1I,
+ A57Write_11cyc_1L_1I, A57Write_11cyc_1L_1I,
+ A57Write_12cyc_1L_1I, A57Write_12cyc_1L_1I]>;
+def A57WriteVLDMuncond_UPD : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListUncond_Upd.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListUncond_Upd.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListUncond_Upd.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListUncond_Upd.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListUncond_Upd.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListUncond_Upd.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListUncond_Upd.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListUncond_Upd.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListUncond_Upd.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57VLDMOpsListCond_Upd : A57WriteLMOpsListType<
+ [A57Write_5cyc_1L_1I, A57Write_6cyc_1L_1I,
+ A57Write_7cyc_1L_1I, A57Write_8cyc_1L_1I,
+ A57Write_9cyc_1L_1I, A57Write_10cyc_1L_1I,
+ A57Write_11cyc_1L_1I, A57Write_12cyc_1L_1I,
+ A57Write_13cyc_1L_1I, A57Write_14cyc_1L_1I,
+ A57Write_15cyc_1L_1I, A57Write_16cyc_1L_1I,
+ A57Write_17cyc_1L_1I, A57Write_18cyc_1L_1I,
+ A57Write_19cyc_1L_1I, A57Write_20cyc_1L_1I]>;
+def A57WriteVLDMcond_UPD : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, A57VLDMOpsListCond_Upd.Writes[0-1]>,
+ SchedVar<A57LMAddrPred2, A57VLDMOpsListCond_Upd.Writes[0-3]>,
+ SchedVar<A57LMAddrPred3, A57VLDMOpsListCond_Upd.Writes[0-5]>,
+ SchedVar<A57LMAddrPred4, A57VLDMOpsListCond_Upd.Writes[0-7]>,
+ SchedVar<A57LMAddrPred5, A57VLDMOpsListCond_Upd.Writes[0-9]>,
+ SchedVar<A57LMAddrPred6, A57VLDMOpsListCond_Upd.Writes[0-11]>,
+ SchedVar<A57LMAddrPred7, A57VLDMOpsListCond_Upd.Writes[0-13]>,
+ SchedVar<A57LMAddrPred8, A57VLDMOpsListCond_Upd.Writes[0-15]>,
+ SchedVar<NoSchedPred, A57VLDMOpsListCond_Upd.Writes[0-15]>
+]> { let Variadic=1; }
+
+def A57WriteVLDM_UPD : SchedWriteVariant<[
+ SchedVar<IsPredicatedPred, [A57WriteVLDMcond_UPD]>,
+ SchedVar<NoSchedPred, [A57WriteVLDMuncond_UPD]>
+]> { let Variadic=1; }
+
+def : InstRW<[A57WrBackOne, A57WriteVLDM_UPD],
+ (instregex "VLDM(DIA_UPD|DDB_UPD|SIA_UPD|SDB_UPD)")>;
+
+// --- 3.13 FP Store Instructions ---
+def : InstRW<[A57Write_1cyc_1S], (instregex "VSTR(D|S|H)")>;
+
+def : InstRW<[A57Write_2cyc_1S], (instregex "VSTMQIA$")>;
+
+def A57WriteVSTMs : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S]>
+]>;
+def A57WriteVSTMd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S]>,
+ SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S]>,
+ SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S]>,
+ SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S]>,
+ SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S]>,
+ SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S]>,
+ SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S]>,
+ SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S]>,
+ SchedVar<NoSchedPred, [A57Write_4cyc_1S]>
+]>;
+def A57WriteVSTMs_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_1cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred2, [A57Write_2cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred3, [A57Write_3cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred4, [A57Write_4cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred5, [A57Write_5cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred6, [A57Write_6cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred7, [A57Write_7cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred8, [A57Write_8cyc_1S_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
+]>;
+def A57WriteVSTMd_Upd : SchedWriteVariant<[
+ SchedVar<A57LMAddrPred1, [A57Write_2cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred2, [A57Write_4cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred3, [A57Write_6cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred4, [A57Write_8cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred5, [A57Write_10cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred6, [A57Write_12cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred7, [A57Write_14cyc_1S_1I]>,
+ SchedVar<A57LMAddrPred8, [A57Write_16cyc_1S_1I]>,
+ SchedVar<NoSchedPred, [A57Write_2cyc_1S_1I]>
+]>;
+
+def : InstRW<[A57WriteVSTMs], (instregex "VSTMSIA$")>;
+def : InstRW<[A57WriteVSTMd], (instregex "VSTMDIA$")>;
+def : InstRW<[A57WrBackOne, A57WriteVSTMs_Upd],
+ (instregex "VSTM(SIA_UPD|SDB_UPD)")>;
+def : InstRW<[A57WrBackOne, A57WriteVSTMd_Upd],
+ (instregex "VSTM(DIA_UPD|DDB_UPD)")>;
+
+// --- 3.14 ASIMD Integer Instructions ---
+
+// ASIMD absolute diff, 3cyc F0/F1 for integer VABD
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABD(s|u)")>;
+
+// ASIMD absolute diff accum: 4(1) F1 for D-form, 5(2) F1 for Q-form
+def A57WriteVABAD : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVABAD : SchedReadAdvance<3, [A57WriteVABAD]>;
+def : InstRW<[A57WriteVABAD, A57ReadVABAD],
+ (instregex "VABA(s|u)(v8i8|v4i16|v2i32)")>;
+def A57WriteVABAQ : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
+def A57ReadVABAQ : SchedReadAdvance<3, [A57WriteVABAQ]>;
+def : InstRW<[A57WriteVABAQ, A57ReadVABAQ],
+ (instregex "VABA(s|u)(v16i8|v8i16|v4i32)")>;
+
+// ASIMD absolute diff accum long: 4(1) F1 for VABAL
+def A57WriteVABAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVABAL : SchedReadAdvance<3, [A57WriteVABAL]>;
+def : InstRW<[A57WriteVABAL, A57ReadVABAL], (instregex "VABAL(s|u)")>;
+
+// ASIMD absolute diff long: 3cyc F0/F1 for VABDL
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABDL(s|u)")>;
+
+// ASIMD arith, basic
+def : InstRW<[A57Write_3cyc_1V], (instregex "VADD", "VADDL", "VADDW",
+ "VNEG(s8d|s16d|s32d|s8q|s16q|s32q|d|q)",
+ "VPADDi", "VPADDL", "VSUB", "VSUBL", "VSUBW")>;
+
+// ASIMD arith, complex
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABS", "VADDHN", "VHADD", "VHSUB",
+ "VQABS", "VQADD", "VQNEG", "VQSUB",
+ "VRADDHN", "VRHADD", "VRSUBHN", "VSUBHN")>;
+
+// ASIMD compare
+def : InstRW<[A57Write_3cyc_1V],
+ (instregex "VCEQ", "VCGE", "VCGT", "VCLE", "VTST", "VCLT")>;
+
+// ASIMD logical
+def : InstRW<[A57Write_3cyc_1V],
+ (instregex "VAND", "VBIC", "VMVN", "VORR", "VORN", "VEOR")>;
+
+// ASIMD max/min
+def : InstRW<[A57Write_3cyc_1V],
+ (instregex "(VMAX|VMIN)(s|u)", "(VPMAX|VPMIN)(s8|s16|s32|u8|u16|u32)")>;
+
+// ASIMD multiply, D-form: 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
+// Cortex-A57 r1p0 and later reduce the latency of ASIMD multiply
+// and multiply-with-accumulate instructions relative to r0pX.
+def A57WriteVMULD_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def : InstRW<[A57WriteVMULD_VecInt], (instregex
+ "VMUL(v8i8|v4i16|v2i32|pd)", "VMULsl(v4i16|v2i32)",
+ "VQDMULH(sl)?(v4i16|v2i32)", "VQRDMULH(sl)?(v4i16|v2i32)")>;
+
+// ASIMD multiply, Q-form: 6cyc F0 for r0px, 5cyc F0 for r1p0 and later
+def A57WriteVMULQ_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
+def : InstRW<[A57WriteVMULQ_VecInt], (instregex
+ "VMUL(v16i8|v8i16|v4i32|pq)", "VMULsl(v8i16|v4i32)",
+ "VQDMULH(sl)?(v8i16|v4i32)", "VQRDMULH(sl)?(v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate, D-form
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
+// (4 or 3 ReadAdvance)
+def A57WriteVMLAD_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def A57ReadVMLAD_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAD_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAD_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVMLAD_VecInt, A57ReadVMLAD_VecInt],
+ (instregex "VMLA(sl)?(v8i8|v4i16|v2i32)", "VMLS(sl)?(v8i8|v4i16|v2i32)")>;
+
+// ASIMD multiply accumulate, Q-form
+// 6cyc F0 for r0px, 5cyc F0 for r1p0 and later, 2cyc for accumulate sequence
+// (4 or 3 ReadAdvance)
+def A57WriteVMLAQ_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_5cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_6cyc_1W]>]>;
+def A57ReadVMLAQ_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAQ_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAQ_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVMLAQ_VecInt, A57ReadVMLAQ_VecInt],
+ (instregex "VMLA(sl)?(v16i8|v8i16|v4i32)", "VMLS(sl)?(v16i8|v8i16|v4i32)")>;
+
+// ASIMD multiply accumulate long
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 1cyc for accumulate sequence
+// (4 or 3 ReadAdvance)
+def A57WriteVMLAL_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def A57ReadVMLAL_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<3, [A57WriteVMLAL_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<4, [A57WriteVMLAL_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVMLAL_VecInt, A57ReadVMLAL_VecInt],
+ (instregex "VMLAL(s|u)", "VMLSL(s|u)")>;
+
+// ASIMD multiply accumulate saturating long
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later, 2cyc for accumulate sequence
+// (3 or 2 ReadAdvance)
+def A57WriteVQDMLAL_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def A57ReadVQDMLAL_VecInt : SchedReadVariant<[
+ SchedVar<IsR1P0AndLaterPred, [SchedReadAdvance<2, [A57WriteVQDMLAL_VecInt]>]>,
+ SchedVar<NoSchedPred, [SchedReadAdvance<3, [A57WriteVQDMLAL_VecInt]>]>
+]>;
+def : InstRW<[A57WriteVQDMLAL_VecInt, A57ReadVQDMLAL_VecInt],
+ (instregex "VQDMLAL", "VQDMLSL")>;
+
+// ASIMD multiply long
+// 5cyc F0 for r0px, 4cyc F0 for r1p0 and later
+def A57WriteVMULL_VecInt : SchedWriteVariant<[
+ SchedVar<IsR1P0AndLaterPred, [A57Write_4cyc_1W]>,
+ SchedVar<NoSchedPred, [A57Write_5cyc_1W]>]>;
+def : InstRW<[A57WriteVMULL_VecInt],
+ (instregex "VMULL(s|u|p8|sls|slu)", "VQDMULL")>;
+
+// ASIMD pairwise add and accumulate
+// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
+def A57WriteVPADAL : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVPADAL : SchedReadAdvance<3, [A57WriteVPADAL]>;
+def : InstRW<[A57WriteVPADAL, A57ReadVPADAL], (instregex "VPADAL(s|u)")>;
+
+// ASIMD shift accumulate
+// 4cyc F1, 1cyc for accumulate sequence (3cyc ReadAdvance)
+def A57WriteVSRA : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57ReadVSRA : SchedReadAdvance<3, [A57WriteVSRA]>;
+def : InstRW<[A57WriteVSRA, A57ReadVSRA], (instregex "VSRA", "VRSRA")>;
+
+// ASIMD shift by immed, basic
+def : InstRW<[A57Write_3cyc_1X],
+ (instregex "VMOVL", "VSHLi", "VSHLL", "VSHR(s|u)", "VSHRN")>;
+
+// ASIMD shift by immed, complex
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VQRSHRN", "VQRSHRUN", "VQSHL(si|ui|su)", "VQSHRN", "VQSHRUN", "VRSHR(s|u)",
+ "VRSHRN")>;
+
+// ASIMD shift by immed and insert, basic, D-form
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VSLI(v8i8|v4i16|v2i32|v1i64)", "VSRI(v8i8|v4i16|v2i32|v1i64)")>;
+
+// ASIMD shift by immed and insert, basic, Q-form
+def : InstRW<[A57Write_5cyc_1X], (instregex
+ "VSLI(v16i8|v8i16|v4i32|v2i64)", "VSRI(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD shift by register, basic, D-form
+def : InstRW<[A57Write_3cyc_1X], (instregex
+ "VSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
+
+// ASIMD shift by register, basic, Q-form
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
+
+// ASIMD shift by register, complex, D-form
+// VQRSHL, VQSHL, VRSHL
+def : InstRW<[A57Write_4cyc_1X], (instregex
+ "VQRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)", "VQSHL(s|u)(v8i8|v4i16|v2i32|v1i64)",
+ "VRSHL(s|u)(v8i8|v4i16|v2i32|v1i64)")>;
+
+// ASIMD shift by register, complex, Q-form
+def : InstRW<[A57Write_5cyc_1X], (instregex
+ "VQRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)", "VQSHL(s|u)(v16i8|v8i16|v4i32|v2i64)",
+ "VRSHL(s|u)(v16i8|v8i16|v4i32|v2i64)")>;
+
+// --- 3.15 ASIMD Floating-Point Instructions ---
+// ASIMD FP absolute value
+def : InstRW<[A57Write_3cyc_1V], (instregex "VABS(fd|fq|hd|hq)")>;
+
+// ASIMD FP arith
+def : InstRW<[A57Write_5cyc_1V], (instregex "VABD(fd|fq|hd|hq)",
+ "VADD(fd|fq|hd|hq)", "VPADD(f|h)", "VSUB(fd|fq|hd|hq)")>;
+
+// ASIMD FP compare
+def : InstRW<[A57Write_5cyc_1V], (instregex "VAC(GE|GT|LE|LT)",
+ "VC(EQ|GE|GT|LE)(fd|fq|hd|hq)")>;
+
+// ASIMD FP convert, integer
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "VCVT(f2sd|f2ud|s2fd|u2fd|f2sq|f2uq|s2fq|u2fq|f2xsd|f2xud|xs2fd|xu2fd)",
+ "VCVT(f2xsq|f2xuq|xs2fq|xu2fq)",
+ "VCVT(AN|MN|NN|PN)(SDf|SQf|UDf|UQf|SDh|SQh|UDh|UQh)")>;
+
+// ASIMD FP convert, half-precision: 8cyc F0/F1
+def : InstRW<[A57Write_8cyc_1V], (instregex
+ "VCVT(h2sd|h2ud|s2hd|u2hd|h2sq|h2uq|s2hq|u2hq|h2xsd|h2xud|xs2hd|xu2hd)",
+ "VCVT(h2xsq|h2xuq|xs2hq|xu2hq)",
+ "VCVT(f2h|h2f)")>;
+
+// ASIMD FP max/min
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "(VMAX|VMIN)(fd|fq|hd|hq)", "(VPMAX|VPMIN)(f|h)", "VMAXNM", "VMINNM")>;
+
+// ASIMD FP multiply
+def A57WriteVMUL_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def : InstRW<[A57WriteVMUL_VecFP], (instregex "VMUL(sl)?(fd|fq|hd|hq)")>;
+
+// ASIMD FP multiply accumulate: 9cyc F0/F1, 4cyc for accumulate sequence
+def A57WriteVMLA_VecFP : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57ReadVMLA_VecFP :
+ SchedReadAdvance<5, [A57WriteVMLA_VecFP, A57WriteVMUL_VecFP]>;
+def : InstRW<[A57WriteVMLA_VecFP, A57ReadVMLA_VecFP],
+ (instregex "(VMLA|VMLS)(sl)?(fd|fq|hd|hq)", "(VFMA|VFMS)(fd|fq|hd|hq)")>;
+
+// ASIMD FP negate
+def : InstRW<[A57Write_3cyc_1V], (instregex "VNEG(fd|f32q|hd|hq)")>;
+
+// ASIMD FP round to integral
+def : InstRW<[A57Write_5cyc_1V], (instregex
+ "VRINT(AN|MN|NN|PN|XN|ZN)(Df|Qf|Dh|Qh)")>;
+
+// --- 3.16 ASIMD Miscellaneous Instructions ---
+
+// ASIMD bitwise insert
+def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>;
+
+// ASIMD count
+def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
+
+// ASIMD duplicate, core reg: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VDUP(8|16|32)(d|q)")>;
+
+// ASIMD duplicate, scalar: 3cyc "F0/F1"
+def : InstRW<[A57Write_3cyc_1V], (instregex "VDUPLN(8|16|32)(d|q)")>;
+
+// ASIMD extract
+def : InstRW<[A57Write_3cyc_1V], (instregex "VEXT(d|q)(8|16|32|64)")>;
+
+// ASIMD move, immed
+def : InstRW<[A57Write_3cyc_1V], (instregex
+ "VMOV(v8i8|v16i8|v4i16|v8i16|v2i32|v4i32|v1i64|v2i64|v2f32|v4f32)",
+ "VMOVQ0")>;
+
+// ASIMD move, narrowing
+def : InstRW<[A57Write_3cyc_1V], (instregex "VMOVN")>;
+
+// ASIMD move, saturating
+def : InstRW<[A57Write_4cyc_1X], (instregex "VQMOVN")>;
+
+// ASIMD reciprocal estimate
+def : InstRW<[A57Write_5cyc_1V], (instregex "VRECPE", "VRSQRTE")>;
+
+// ASIMD reciprocal step, FZ
+def : InstRW<[A57Write_9cyc_1V], (instregex "VRECPS", "VRSQRTS")>;
+
+// ASIMD reverse, swap, table lookup (1-2 reg)
+def : InstRW<[A57Write_3cyc_1V], (instregex "VREV", "VSWP", "VTB(L|X)(1|2)")>;
+
+// ASIMD table lookup (3-4 reg)
+def : InstRW<[A57Write_6cyc_1V], (instregex "VTBL(3|4)", "VTBX(3|4)")>;
+
+// ASIMD transfer, scalar to core reg: 6cyc "L, I0/I1"
+def : InstRW<[A57Write_6cyc_1L_1I], (instregex "VGETLN")>;
+
+// ASIMD transfer, core reg to scalar: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex "VSETLN")>;
+
+// ASIMD transpose
+def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V], (instregex "VTRN")>;
+
+// ASIMD unzip/zip, D-form
+def : InstRW<[A57Write_3cyc_1V, A57Write_3cyc_1V],
+ (instregex "VUZPd", "VZIPd")>;
+
+// ASIMD unzip/zip, Q-form
+def : InstRW<[A57Write_6cyc_1V, A57Write_6cyc_1V],
+ (instregex "VUZPq", "VZIPq")>;
+
+// --- 3.17 ASIMD Load Instructions ---
+
+// Overriden via InstRW for this processor.
+def : WriteRes<WriteVLD1, []>;
+def : WriteRes<WriteVLD2, []>;
+def : WriteRes<WriteVLD3, []>;
+def : WriteRes<WriteVLD4, []>;
+def : WriteRes<WriteVST1, []>;
+def : WriteRes<WriteVST2, []>;
+def : WriteRes<WriteVST3, []>;
+def : WriteRes<WriteVST4, []>;
+
+// 1-2 reg: 5cyc L, +I for writeback, 1 cyc wb latency
+def : InstRW<[A57Write_5cyc_1L], (instregex "VLD1(d|q)(8|16|32|64)$")>;
+def : InstRW<[A57Write_5cyc_1L_1I, A57WrBackOne],
+ (instregex "VLD1(d|q)(8|16|32|64)wb")>;
+
+// 3-4 reg: 6cyc L, +I for writeback, 1 cyc wb latency
+def : InstRW<[A57Write_6cyc_1L],
+ (instregex "VLD1(d|q)(8|16|32|64)(T|Q)$", "VLD1d64(T|Q)Pseudo")>;
+
+def : InstRW<[A57Write_6cyc_1L_1I, A57WrBackOne],
+ (instregex "VLD1(d|q)(8|16|32|64)(T|Q)wb")>;
+
+// ASIMD load, 1 element, one lane and all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V], (instregex
+ "VLD1(LN|DUP)(d|q)(8|16|32)$", "VLD1(LN|DUP)(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne], (instregex
+ "VLD1(LN|DUP)(d|q)(8|16|32)(wb|_UPD)", "VLD1LNq(8|16|32)Pseudo_UPD")>;
+
+// ASIMD load, 2 element, multiple, 2 reg: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V],
+ (instregex "VLD2(d|q)(8|16|32)$", "VLD2q(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD2(d|q)(8|16|32)wb", "VLD2q(8|16|32)PseudoWB")>;
+
+// ASIMD load, 2 element, multiple, 4 reg: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V], (instregex "VLD2b(8|16|32)$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD2b(8|16|32)wb")>;
+
+// ASIMD load, 2 element, one lane and all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
+ (instregex "VLD2(DUP|LN)(d|q)(8|16|32|8x2|16x2|32x2)$",
+ "VLD2LN(d|q)(8|16|32)Pseudo$")>;
+// 2 results + wb result
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V, A57WrBackOne],
+ (instregex "VLD2LN(d|q)(8|16|32)_UPD$")>;
+// 1 result + wb result
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD2DUPd(8|16|32|8x2|16x2|32x2)wb",
+ "VLD2LN(d|q)(8|16|32)Pseudo_UPD")>;
+
+// ASIMD load, 3 element, multiple, 3 reg: 9cyc "L, F0/F1"
+// 3 results
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
+ (instregex "VLD3(d|q)(8|16|32)$")>;
+// 1 result
+def : InstRW<[A57Write_9cyc_1L_1V],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo$")>;
+// 3 results + wb
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3(d|q)(8|16|32)_UPD$")>;
+// 1 result + wb
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+// ASIMD load, 3 element, one lane, size 32: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
+ (instregex "VLD3LN(d|q)32$",
+ "VLD3LN(d|q)32Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)32_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)32Pseudo_UPD")>;
+
+// ASIMD load, 3 element, one lane, size 8/16: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V],
+ (instregex "VLD3LN(d|q)(8|16)$",
+ "VLD3LN(d|q)(8|16)Pseudo$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)(8|16)_UPD")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3LN(d|q)(8|16)Pseudo_UPD")>;
+
+// ASIMD load, 3 element, all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V],
+ (instregex "VLD3DUP(d|q)(8|16|32)$",
+ "VLD3DUP(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3DUP(d|q)(8|16|32)_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD3DUP(d|q)(8|16|32)Pseudo_UPD")>;
+
+// ASIMD load, 4 element, multiple, 4 reg: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
+ A57Write_9cyc_1L_1V],
+ (instregex "VLD4(d|q)(8|16|32)$")>;
+def : InstRW<[A57Write_9cyc_1L_1V],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4(d|q)(8|16|32)_UPD")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4(d|q)(8|16|32)(oddP|P)seudo_UPD")>;
+
+// ASIMD load, 4 element, one lane, size 32: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
+ A57Write_8cyc_1L_1V],
+ (instregex "VLD4LN(d|q)32$",
+ "VLD4LN(d|q)32Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57WrBackOne],
+ (instregex "VLD4LN(d|q)32_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4LN(d|q)32Pseudo_UPD")>;
+
+// ASIMD load, 4 element, one lane, size 8/16: 9cyc "L, F0/F1"
+def : InstRW<[A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V, A57Write_9cyc_1L_1V,
+ A57Write_9cyc_1L_1V],
+ (instregex "VLD4LN(d|q)(8|16)$",
+ "VLD4LN(d|q)(8|16)Pseudo$")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57Write_9cyc_1L_1V_1I, A57Write_9cyc_1L_1V_1I,
+ A57WrBackOne],
+ (instregex "VLD4LN(d|q)(8|16)_UPD")>;
+def : InstRW<[A57Write_9cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4LN(d|q)(8|16)Pseudo_UPD")>;
+
+// ASIMD load, 4 element, all lanes: 8cyc "L, F0/F1"
+def : InstRW<[A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V, A57Write_8cyc_1L_1V,
+ A57Write_8cyc_1L_1V],
+ (instregex "VLD4DUP(d|q)(8|16|32)$",
+ "VLD4DUP(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57Write_8cyc_1L_1V_1I, A57Write_8cyc_1L_1V_1I,
+ A57WrBackOne],
+ (instregex "VLD4DUP(d|q)(8|16|32)_UPD")>;
+def : InstRW<[A57Write_8cyc_1L_1V_1I, A57WrBackOne],
+ (instregex "VLD4DUP(d|q)(8|16|32)Pseudo_UPD")>;
+
+// --- 3.18 ASIMD Store Instructions ---
+
+// ASIMD store, 1 element, multiple, 1 reg: 1cyc S
+def : InstRW<[A57Write_1cyc_1S], (instregex "VST1d(8|16|32|64)$")>;
+def : InstRW<[A57WrBackOne, A57Write_1cyc_1S_1I],
+ (instregex "VST1d(8|16|32|64)wb")>;
+// ASIMD store, 1 element, multiple, 2 reg: 2cyc S
+def : InstRW<[A57Write_2cyc_1S], (instregex "VST1q(8|16|32|64)$")>;
+def : InstRW<[A57WrBackOne, A57Write_2cyc_1S_1I],
+ (instregex "VST1q(8|16|32|64)wb")>;
+// ASIMD store, 1 element, multiple, 3 reg: 3cyc S
+def : InstRW<[A57Write_3cyc_1S],
+ (instregex "VST1d(8|16|32|64)T$", "VST1d64TPseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1I],
+ (instregex "VST1d(8|16|32|64)Twb", "VST1d64TPseudoWB")>;
+// ASIMD store, 1 element, multiple, 4 reg: 4cyc S
+def : InstRW<[A57Write_4cyc_1S],
+ (instregex "VST1d(8|16|32|64)(Q|QPseudo)$")>;
+def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1I],
+ (instregex "VST1d(8|16|32|64)(Qwb|QPseudoWB)")>;
+// ASIMD store, 1 element, one lane: 3cyc "F0/F1, S"
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST1LNd(8|16|32)$", "VST1LNq(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST1LNd(8|16|32)_UPD", "VST1LNq(8|16|32)Pseudo_UPD")>;
+// ASIMD store, 2 element, multiple, 2 reg: 3cyc "F0/F1, S"
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST2(d|b)(8|16|32)$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST2(b|d)(8|16|32)wb")>;
+// ASIMD store, 2 element, multiple, 4 reg: 4cyc "F0/F1, S"
+def : InstRW<[A57Write_4cyc_1S_1V],
+ (instregex "VST2q(8|16|32)$", "VST2q(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
+ (instregex "VST2q(8|16|32)wb", "VST2q(8|16|32)PseudoWB")>;
+// ASIMD store, 2 element, one lane: 3cyc "F0/F1, S"
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST2LN(d|q)(8|16|32)$", "VST2LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST2LN(d|q)(8|16|32)_UPD",
+ "VST2LN(d|q)(8|16|32)Pseudo_UPD")>;
+// ASIMD store, 3 element, multiple, 3 reg
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST3(d|q)(8|16|32)$", "VST3(d|q)(8|16|32)(oddP|P)seudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST3(d|q)(8|16|32)_UPD",
+ "VST3(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+// ASIMD store, 3 element, one lane
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST3LN(d|q)(8|16|32)$", "VST3LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST3LN(d|q)(8|16|32)_UPD",
+ "VST3LN(d|q)(8|16|32)Pseudo_UPD")>;
+// ASIMD store, 4 element, multiple, 4 reg
+def : InstRW<[A57Write_4cyc_1S_1V],
+ (instregex "VST4(d|q)(8|16|32)$", "VST4(d|q)(8|16|32)(oddP|P)seudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_4cyc_1S_1V_1I],
+ (instregex "VST4(d|q)(8|16|32)_UPD",
+ "VST4(d|q)(8|16|32)(oddP|P)seudo_UPD$")>;
+// ASIMD store, 4 element, one lane
+def : InstRW<[A57Write_3cyc_1S_1V],
+ (instregex "VST4LN(d|q)(8|16|32)$", "VST4LN(d|q)(8|16|32)Pseudo$")>;
+def : InstRW<[A57WrBackOne, A57Write_3cyc_1S_1V_1I],
+ (instregex "VST4LN(d|q)(8|16|32)_UPD",
+ "VST4LN(d|q)(8|16|32)Pseudo_UPD")>;
+
+// --- 3.19 Cryptography Extensions ---
+// Crypto AES ops
+// AESD, AESE, AESIMC, AESMC: 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^AES")>;
+// Crypto polynomial (64x64) multiply long (VMULL.P64): 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^VMULLp64")>;
+// Crypto SHA1 xor ops: 6cyc F0/F1
+def : InstRW<[A57Write_6cyc_2V], (instregex "^SHA1SU0")>;
+// Crypto SHA1 fast ops: 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA1(H|SU1)")>;
+// Crypto SHA1 slow ops: 6cyc F0
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA1[CMP]")>;
+// Crypto SHA256 fast ops: 3cyc F0
+def : InstRW<[A57Write_3cyc_1W], (instregex "^SHA256SU0")>;
+// Crypto SHA256 slow ops: 6cyc F0
+def : InstRW<[A57Write_6cyc_2W], (instregex "^SHA256(H|H2|SU1)")>;
+
+// --- 3.20 CRC ---
+def : InstRW<[A57Write_3cyc_1W], (instregex "^(t2)?CRC32")>;
+
+// -----------------------------------------------------------------------------
+// Common definitions
+def : WriteRes<WriteNoop, []> { let Latency = 0; let NumMicroOps = 0; }
+def : SchedAlias<WriteALU, A57Write_1cyc_1I>;
+
+def : SchedAlias<WriteBr, A57Write_1cyc_1B>;
+def : SchedAlias<WriteBrL, A57Write_1cyc_1B_1I>;
+def : SchedAlias<WriteBrTbl, A57Write_1cyc_1B_1I>;
+def : SchedAlias<WritePreLd, A57Write_4cyc_1L>;
+
+def : SchedAlias<WriteLd, A57Write_4cyc_1L>;
+def : SchedAlias<WriteST, A57Write_1cyc_1S>;
+def : ReadAdvance<ReadALU, 0>;
+
+} // SchedModel = CortexA57Model
+
diff --git a/lib/Target/ARM/ARMScheduleA57WriteRes.td b/lib/Target/ARM/ARMScheduleA57WriteRes.td
new file mode 100644
index 000000000000..670717dc7c13
--- /dev/null
+++ b/lib/Target/ARM/ARMScheduleA57WriteRes.td
@@ -0,0 +1,323 @@
+//=- ARMScheduleA57WriteRes.td - ARM Cortex-A57 Write Res ---*- tablegen -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Contains all of the Cortex-A57 specific SchedWriteRes types. The approach
+// below is to define a generic SchedWriteRes for every combination of
+// latency and microOps. The naming conventions is to use a prefix, one field
+// for latency, and one or more microOp count/type designators.
+// Prefix: A57Write
+// Latency: #cyc
+// MicroOp Count/Types: #(B|I|M|L|S|X|W|V)
+//
+// e.g. A57Write_6cyc_1I_6S_4V means the total latency is 6 and there are
+// 11 micro-ops to be issued as follows: one to I pipe, six to S pipes and
+// four to V pipes.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Define Generic 1 micro-op types
+
+def A57Write_5cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 5; }
+def A57Write_5cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 5; }
+def A57Write_5cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 5; }
+def A57Write_10cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 10; }
+def A57Write_17cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 17;
+ let ResourceCycles = [17]; }
+def A57Write_18cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 18;
+ let ResourceCycles = [18]; }
+def A57Write_19cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 19;
+ let ResourceCycles = [19]; }
+def A57Write_20cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 20;
+ let ResourceCycles = [20]; }
+def A57Write_1cyc_1B : SchedWriteRes<[A57UnitB]> { let Latency = 1; }
+def A57Write_1cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 1; }
+def A57Write_2cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 2; }
+def A57Write_3cyc_1I : SchedWriteRes<[A57UnitI]> { let Latency = 3; }
+def A57Write_1cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 1; }
+def A57Write_2cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 2; }
+def A57Write_3cyc_1S : SchedWriteRes<[A57UnitS]> { let Latency = 3; }
+def A57Write_2cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 2; }
+def A57Write_32cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 32;
+ let ResourceCycles = [32]; }
+def A57Write_32cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 32;
+ let ResourceCycles = [32]; }
+def A57Write_35cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 35;
+ let ResourceCycles = [35]; }
+def A57Write_3cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 3; }
+def A57Write_3cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 3; }
+def A57Write_3cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 3; }
+def A57Write_3cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 3; }
+
+// A57Write_3cyc_1L - A57Write_20cyc_1L
+foreach Lat = 3-20 in {
+ def A57Write_#Lat#cyc_1L : SchedWriteRes<[A57UnitL]> {
+ let Latency = Lat;
+ }
+}
+
+// A57Write_4cyc_1S - A57Write_16cyc_1S
+foreach Lat = 4-16 in {
+ def A57Write_#Lat#cyc_1S : SchedWriteRes<[A57UnitS]> {
+ let Latency = Lat;
+ }
+}
+
+def A57Write_4cyc_1M : SchedWriteRes<[A57UnitL]> { let Latency = 4; }
+def A57Write_4cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 4; }
+def A57Write_4cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 4; }
+def A57Write_5cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 5; }
+def A57Write_6cyc_1X : SchedWriteRes<[A57UnitX]> { let Latency = 6; }
+def A57Write_6cyc_1W : SchedWriteRes<[A57UnitW]> { let Latency = 6; }
+def A57Write_8cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 8; }
+def A57Write_9cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 9; }
+def A57Write_6cyc_1M : SchedWriteRes<[A57UnitM]> { let Latency = 6; }
+def A57Write_6cyc_1V : SchedWriteRes<[A57UnitV]> { let Latency = 6; }
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 2 micro-op types
+
+def A57Write_64cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 64;
+ let NumMicroOps = 2;
+ let ResourceCycles = [32, 32];
+}
+def A57Write_6cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_7cyc_1V_1X : SchedWriteRes<[A57UnitV,
+ A57UnitX]> {
+ let Latency = 7;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_9cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 9;
+ let NumMicroOps = 2;
+}
+def A57Write_8cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 8;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2L : SchedWriteRes<[A57UnitL, A57UnitL]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_2W : SchedWriteRes<[A57UnitW, A57UnitW]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1L : SchedWriteRes<[A57UnitI,
+ A57UnitL]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_5cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 5;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_1L_1V : SchedWriteRes<[A57UnitL,
+ A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_10cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1S_1I : SchedWriteRes<[A57UnitS,
+ A57UnitI]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1S_1I : SchedWriteRes<[A57UnitS,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1I : SchedWriteRes<[A57UnitS,
+ A57UnitI]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_1cyc_1S_1M : SchedWriteRes<[A57UnitS,
+ A57UnitM]> {
+ let Latency = 1;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1B_1I : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_6cyc_1B_1L : SchedWriteRes<[A57UnitB,
+ A57UnitI]> {
+ let Latency = 6;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2S : SchedWriteRes<[A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_2cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 2;
+ let NumMicroOps = 2;
+}
+def A57Write_36cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 36;
+ let NumMicroOps = 2;
+ let ResourceCycles = [18, 18];
+}
+def A57Write_3cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1I_1M : SchedWriteRes<[A57UnitI,
+ A57UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+// A57Write_3cyc_1L_1I - A57Write_20cyc_1L_1I
+foreach Lat = 3-20 in {
+ def A57Write_#Lat#cyc_1L_1I : SchedWriteRes<[A57UnitL, A57UnitI]> {
+ let Latency = Lat; let NumMicroOps = 2;
+ }
+}
+
+def A57Write_3cyc_1I_1S : SchedWriteRes<[A57UnitI,
+ A57UnitS]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+def A57Write_4cyc_1S_1V : SchedWriteRes<[A57UnitS,
+ A57UnitV]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+def A57Write_3cyc_2V : SchedWriteRes<[A57UnitV, A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 2;
+}
+
+// A57Write_4cyc_1S_1I - A57Write_16cyc_1S_1I
+foreach Lat = 4-16 in {
+ def A57Write_#Lat#cyc_1S_1I : SchedWriteRes<[A57UnitS, A57UnitI]> {
+ let Latency = Lat; let NumMicroOps = 2;
+ }
+}
+
+def A57Write_4cyc_2X : SchedWriteRes<[A57UnitX, A57UnitX]> {
+ let Latency = 4;
+ let NumMicroOps = 2;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Define Generic 3 micro-op types
+
+def A57Write_10cyc_3V : SchedWriteRes<[A57UnitV, A57UnitV, A57UnitV]> {
+ let Latency = 10;
+ let NumMicroOps = 3;
+}
+def A57Write_2cyc_1I_2S : SchedWriteRes<[A57UnitI,
+ A57UnitS, A57UnitS]> {
+ let Latency = 2;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1I_1S_1V : SchedWriteRes<[A57UnitI,
+ A57UnitS,
+ A57UnitV]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_3cyc_1S_1V_1I : SchedWriteRes<[A57UnitS,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 3;
+ let NumMicroOps = 3;
+}
+def A57Write_4cyc_1S_1V_1I : SchedWriteRes<[A57UnitS,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def A57Write_4cyc_1I_1L_1M : SchedWriteRes<[A57UnitI, A57UnitL, A57UnitM]> {
+ let Latency = 4;
+ let NumMicroOps = 3;
+}
+def A57Write_8cyc_1L_1V_1I : SchedWriteRes<[A57UnitL,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 8;
+ let NumMicroOps = 3;
+}
+def A57Write_9cyc_1L_1V_1I : SchedWriteRes<[A57UnitL,
+ A57UnitV,
+ A57UnitI]> {
+ let Latency = 9;
+ let NumMicroOps = 3;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index d2630685d91b..af682dd8321c 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -234,6 +234,10 @@ protected:
/// CPSR setting instruction.
bool AvoidCPSRPartialUpdate = false;
+ /// CheapPredicableCPSRDef - If true, disable +1 predication cost
+ /// for instructions updating CPSR. Enabled for Cortex-A57.
+ bool CheapPredicableCPSRDef = false;
+
/// AvoidMOVsShifterOperand - If true, codegen should avoid using flag setting
/// movs with shifter operand (i.e. asr, lsl, lsr).
bool AvoidMOVsShifterOperand = false;
@@ -543,6 +547,7 @@ public:
bool nonpipelinedVFP() const { return NonpipelinedVFP; }
bool prefers32BitThumb() const { return Pref32BitThumb; }
bool avoidCPSRPartialUpdate() const { return AvoidCPSRPartialUpdate; }
+ bool cheapPredicableCPSRDef() const { return CheapPredicableCPSRDef; }
bool avoidMOVsShifterOperand() const { return AvoidMOVsShifterOperand; }
bool hasRetAddrStack() const { return HasRetAddrStack; }
bool hasMPExtension() const { return HasMPExtension; }
diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp
index 0fef91ec4d3e..b76da727237c 100644
--- a/lib/Target/Hexagon/HexagonInstrInfo.cpp
+++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp
@@ -3419,9 +3419,7 @@ int HexagonInstrInfo::getDotNewPredOp(const MachineInstr &MI,
int NewOpcode = Hexagon::getPredNewOpcode(MI.getOpcode());
if (NewOpcode >= 0)
return NewOpcode;
-
- dbgs() << "Cannot convert to .new: " << getName(MI.getOpcode()) << '\n';
- llvm_unreachable(nullptr);
+ return 0;
}
int HexagonInstrInfo::getDotOldOp(const MachineInstr &MI) const {
diff --git a/lib/Target/Mips/MicroMipsSizeReduction.cpp b/lib/Target/Mips/MicroMipsSizeReduction.cpp
index 4593fc92ca6f..35948e36ad91 100644
--- a/lib/Target/Mips/MicroMipsSizeReduction.cpp
+++ b/lib/Target/Mips/MicroMipsSizeReduction.cpp
@@ -135,6 +135,14 @@ private:
// returns true on success.
static bool ReduceXWtoXWSP(MachineInstr *MI, const ReduceEntry &Entry);
+ // Attempts to reduce LBU/LHU instruction into LBU16/LHU16,
+ // returns true on success.
+ static bool ReduceLXUtoLXU16(MachineInstr *MI, const ReduceEntry &Entry);
+
+ // Attempts to reduce SB/SH instruction into SB16/SH16,
+ // returns true on success.
+ static bool ReduceSXtoSX16(MachineInstr *MI, const ReduceEntry &Entry);
+
// Attempts to reduce arithmetic instructions, returns true on success
static bool ReduceArithmeticInstructions(MachineInstr *MI,
const ReduceEntry &Entry);
@@ -162,10 +170,26 @@ llvm::SmallVector<ReduceEntry, 16> MicroMipsSizeReduce::ReduceTable = {
{RT_OneInstr, OpCodes(Mips::ADDu_MM, Mips::ADDU16_MM),
ReduceArithmeticInstructions, OpInfo(OT_OperandsAll),
ImmField(0, 0, 0, -1)},
+ {RT_OneInstr, OpCodes(Mips::LBu, Mips::LBU16_MM), ReduceLXUtoLXU16,
+ OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)},
+ {RT_OneInstr, OpCodes(Mips::LBu_MM, Mips::LBU16_MM), ReduceLXUtoLXU16,
+ OpInfo(OT_OperandsAll), ImmField(0, -1, 15, 2)},
+ {RT_OneInstr, OpCodes(Mips::LHu, Mips::LHU16_MM), ReduceLXUtoLXU16,
+ OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
+ {RT_OneInstr, OpCodes(Mips::LHu_MM, Mips::LHU16_MM), ReduceLXUtoLXU16,
+ OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
{RT_OneInstr, OpCodes(Mips::LW, Mips::LWSP_MM), ReduceXWtoXWSP,
OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
{RT_OneInstr, OpCodes(Mips::LW_MM, Mips::LWSP_MM), ReduceXWtoXWSP,
OpInfo(OT_OperandsAll), ImmField(2, 0, 32, 2)},
+ {RT_OneInstr, OpCodes(Mips::SB, Mips::SB16_MM), ReduceSXtoSX16,
+ OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)},
+ {RT_OneInstr, OpCodes(Mips::SB_MM, Mips::SB16_MM), ReduceSXtoSX16,
+ OpInfo(OT_OperandsAll), ImmField(0, 0, 16, 2)},
+ {RT_OneInstr, OpCodes(Mips::SH, Mips::SH16_MM), ReduceSXtoSX16,
+ OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
+ {RT_OneInstr, OpCodes(Mips::SH_MM, Mips::SH16_MM), ReduceSXtoSX16,
+ OpInfo(OT_OperandsAll), ImmField(1, 0, 16, 2)},
{RT_OneInstr, OpCodes(Mips::SUBu, Mips::SUBU16_MM),
ReduceArithmeticInstructions, OpInfo(OT_OperandsAll),
ImmField(0, 0, 0, -1)},
@@ -193,6 +217,13 @@ static bool isMMThreeBitGPRegister(const MachineOperand &MO) {
return false;
}
+// Returns true if the machine operand MO is register $0, $17, or $2-$7.
+static bool isMMSourceRegister(const MachineOperand &MO) {
+ if (MO.isReg() && Mips::GPRMM16ZeroRegClass.contains(MO.getReg()))
+ return true;
+ return false;
+}
+
// Returns true if the operand Op is an immediate value
// and writes the immediate value into variable Imm
static bool GetImm(MachineInstr *MI, unsigned Op, int64_t &Imm) {
@@ -279,6 +310,32 @@ bool MicroMipsSizeReduce::ReduceArithmeticInstructions(
return ReplaceInstruction(MI, Entry);
}
+bool MicroMipsSizeReduce::ReduceLXUtoLXU16(MachineInstr *MI,
+ const ReduceEntry &Entry) {
+
+ if (!ImmInRange(MI, Entry))
+ return false;
+
+ if (!isMMThreeBitGPRegister(MI->getOperand(0)) ||
+ !isMMThreeBitGPRegister(MI->getOperand(1)))
+ return false;
+
+ return ReplaceInstruction(MI, Entry);
+}
+
+bool MicroMipsSizeReduce::ReduceSXtoSX16(MachineInstr *MI,
+ const ReduceEntry &Entry) {
+
+ if (!ImmInRange(MI, Entry))
+ return false;
+
+ if (!isMMSourceRegister(MI->getOperand(0)) ||
+ !isMMThreeBitGPRegister(MI->getOperand(1)))
+ return false;
+
+ return ReplaceInstruction(MI, Entry);
+}
+
bool MicroMipsSizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
bool Modified = false;
MachineBasicBlock::instr_iterator MII = MBB.instr_begin(),
diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt
index 54619589c341..35a67134775a 100644
--- a/lib/Target/WebAssembly/known_gcc_test_failures.txt
+++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt
@@ -88,6 +88,3 @@ pr45695.c wasm-o
pr49279.c wasm-o
pr49390.c wasm-o
pr52286.c wasm-o
-
-# fatal error: error in backend: data symbols must have a size set with .size
-921110-1.c wasm-o
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 0a41f35f9320..5303d7a406ad 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -4753,7 +4753,7 @@ static void scaleShuffleMask(int Scale, ArrayRef<int> Mask,
SmallVectorImpl<int> &ScaledMask) {
assert(0 < Scale && "Unexpected scaling factor");
int NumElts = Mask.size();
- ScaledMask.assign(NumElts * Scale, -1);
+ ScaledMask.assign(static_cast<size_t>(NumElts * Scale), -1);
for (int i = 0; i != NumElts; ++i) {
int M = Mask[i];
@@ -5848,17 +5848,39 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
return true;
}
case ISD::SCALAR_TO_VECTOR: {
- // Match against a scalar_to_vector of an extract from a similar vector.
+ // Match against a scalar_to_vector of an extract from a vector,
+ // for PEXTRW/PEXTRB we must handle the implicit zext of the scalar.
SDValue N0 = N.getOperand(0);
- if (N0.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- N0.getOperand(0).getValueType() != VT ||
- !isa<ConstantSDNode>(N0.getOperand(1)) ||
- NumElts <= N0.getConstantOperandVal(1) ||
- !N->isOnlyUserOf(N0.getNode()))
+ SDValue SrcExtract;
+
+ if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ N0.getOperand(0).getValueType() == VT) {
+ SrcExtract = N0;
+ } else if (N0.getOpcode() == ISD::AssertZext &&
+ N0.getOperand(0).getOpcode() == X86ISD::PEXTRW &&
+ cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i16) {
+ SrcExtract = N0.getOperand(0);
+ assert(SrcExtract.getOperand(0).getValueType() == MVT::v8i16);
+ } else if (N0.getOpcode() == ISD::AssertZext &&
+ N0.getOperand(0).getOpcode() == X86ISD::PEXTRB &&
+ cast<VTSDNode>(N0.getOperand(1))->getVT() == MVT::i8) {
+ SrcExtract = N0.getOperand(0);
+ assert(SrcExtract.getOperand(0).getValueType() == MVT::v16i8);
+ }
+
+ if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1)) ||
+ NumElts <= SrcExtract.getConstantOperandVal(1))
return false;
- Ops.push_back(N0.getOperand(0));
- Mask.push_back(N0.getConstantOperandVal(1));
- Mask.append(NumElts - 1, SM_SentinelUndef);
+
+ SDValue SrcVec = SrcExtract.getOperand(0);
+ EVT SrcVT = SrcVec.getValueType();
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ unsigned NumZeros = (NumBitsPerElt / SrcVT.getScalarSizeInBits()) - 1;
+
+ Ops.push_back(SrcVec);
+ Mask.push_back(SrcExtract.getConstantOperandVal(1));
+ Mask.append(NumZeros, SM_SentinelZero);
+ Mask.append(NumSrcElts - Mask.size(), SM_SentinelUndef);
return true;
}
case X86ISD::PINSRB:
@@ -6542,12 +6564,12 @@ static Constant *getConstantVector(MVT VT, const APInt &SplatValue,
APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i);
Constant *Const;
if (VT.isFloatingPoint()) {
- assert((ScalarSize == 32 || ScalarSize == 64) &&
- "Unsupported floating point scalar size");
- if (ScalarSize == 32)
- Const = ConstantFP::get(Type::getFloatTy(C), Val.bitsToFloat());
- else
- Const = ConstantFP::get(Type::getDoubleTy(C), Val.bitsToDouble());
+ if (ScalarSize == 32) {
+ Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val));
+ } else {
+ assert(ScalarSize == 64 && "Unsupported floating point scalar size");
+ Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val));
+ }
} else
Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val);
ConstantVec.push_back(Const);
@@ -6633,11 +6655,13 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp,
// AVX have support for 32 and 64 bit broadcast for floats only.
// No 64bit integer in 32bit subtarget.
MVT CVT = MVT::getFloatingPointVT(SplatBitSize);
- Constant *C = SplatBitSize == 32
- ? ConstantFP::get(Type::getFloatTy(*Ctx),
- SplatValue.bitsToFloat())
- : ConstantFP::get(Type::getDoubleTy(*Ctx),
- SplatValue.bitsToDouble());
+ // Lower the splat via APFloat directly, to avoid any conversion.
+ Constant *C =
+ SplatBitSize == 32
+ ? ConstantFP::get(*Ctx,
+ APFloat(APFloat::IEEEsingle(), SplatValue))
+ : ConstantFP::get(*Ctx,
+ APFloat(APFloat::IEEEdouble(), SplatValue));
SDValue CP = DAG.getConstantPool(C, PVT);
unsigned Repeat = VT.getSizeInBits() / SplatBitSize;
@@ -8003,7 +8027,7 @@ static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) {
static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT,
ArrayRef<int> Mask,
SmallVectorImpl<int> &RepeatedMask) {
- int LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
+ auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits();
RepeatedMask.assign(LaneSize, -1);
int Size = Mask.size();
for (int i = 0; i < Size; ++i) {
@@ -16997,7 +17021,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
SDValue Op1 = Op.getOperand(1);
SDValue CC = Op.getOperand(2);
MVT VT = Op.getSimpleValueType();
- ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get();
+ ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get();
bool isFP = Op.getOperand(1).getSimpleValueType().isFloatingPoint();
SDLoc dl(Op);
@@ -17024,18 +17048,18 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// TODO: This can be avoided if Intel (and only Intel as of 2016) AVX is
// available.
SDValue Cmp;
- unsigned SSECC = translateX86FSETCC(SetCCOpcode, Op0, Op1);
+ unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1);
if (SSECC == 8) {
// LLVM predicate is SETUEQ or SETONE.
unsigned CC0, CC1;
unsigned CombineOpc;
- if (SetCCOpcode == ISD::SETUEQ) {
+ if (Cond == ISD::SETUEQ) {
CC0 = 3; // UNORD
CC1 = 0; // EQ
CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FOR) :
static_cast<unsigned>(ISD::OR);
} else {
- assert(SetCCOpcode == ISD::SETONE);
+ assert(Cond == ISD::SETONE);
CC0 = 7; // ORD
CC1 = 4; // NEQ
CombineOpc = Opc == X86ISD::CMPP ? static_cast<unsigned>(X86ISD::FAND) :
@@ -17082,7 +17106,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
// 2. The original operand type has been promoted to a 256-bit vector.
//
// Note that condition 2. only applies for AVX targets.
- SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, SetCCOpcode);
+ SDValue NewOp = DAG.getSetCC(dl, VTOp0, Op0, Op1, Cond);
return DAG.getZExtOrTrunc(NewOp, dl, VT);
}
@@ -17122,7 +17146,7 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
VT == MVT::v4i32 || VT == MVT::v2i64) && Subtarget.hasXOP()) {
// Translate compare code to XOP PCOM compare mode.
unsigned CmpMode = 0;
- switch (SetCCOpcode) {
+ switch (Cond) {
default: llvm_unreachable("Unexpected SETCC condition");
case ISD::SETULT:
case ISD::SETLT: CmpMode = 0x00; break;
@@ -17137,60 +17161,49 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
// Are we comparing unsigned or signed integers?
- unsigned Opc = ISD::isUnsignedIntSetCC(SetCCOpcode)
- ? X86ISD::VPCOMU : X86ISD::VPCOM;
+ unsigned Opc =
+ ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM;
return DAG.getNode(Opc, dl, VT, Op0, Op1,
DAG.getConstant(CmpMode, dl, MVT::i8));
}
- // We are handling one of the integer comparisons here. Since SSE only has
+ // We are handling one of the integer comparisons here. Since SSE only has
// GT and EQ comparisons for integer, swapping operands and multiple
// operations may be required for some comparisons.
- unsigned Opc;
- bool Swap = false, Invert = false, FlipSigns = false, MinMax = false;
- bool Subus = false;
-
- switch (SetCCOpcode) {
- default: llvm_unreachable("Unexpected SETCC condition");
- case ISD::SETNE: Invert = true; LLVM_FALLTHROUGH;
- case ISD::SETEQ: Opc = X86ISD::PCMPEQ; break;
- case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETGT: Opc = X86ISD::PCMPGT; break;
- case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETLE: Opc = X86ISD::PCMPGT;
- Invert = true; break;
- case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETUGT: Opc = X86ISD::PCMPGT;
- FlipSigns = true; break;
- case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH;
- case ISD::SETULE: Opc = X86ISD::PCMPGT;
- FlipSigns = true; Invert = true; break;
- }
+ unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ
+ : X86ISD::PCMPGT;
+ bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT ||
+ Cond == ISD::SETGE || Cond == ISD::SETUGE;
+ bool Invert = Cond == ISD::SETNE ||
+ (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond));
+ bool FlipSigns = ISD::isUnsignedIntSetCC(Cond);
// Special case: Use min/max operations for SETULE/SETUGE
MVT VET = VT.getVectorElementType();
- bool hasMinMax =
- (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32))
- || (Subtarget.hasSSE2() && (VET == MVT::i8));
-
- if (hasMinMax) {
- switch (SetCCOpcode) {
+ bool HasMinMax =
+ (Subtarget.hasSSE41() && (VET >= MVT::i8 && VET <= MVT::i32)) ||
+ (Subtarget.hasSSE2() && (VET == MVT::i8));
+ bool MinMax = false;
+ if (HasMinMax) {
+ switch (Cond) {
default: break;
case ISD::SETULE: Opc = ISD::UMIN; MinMax = true; break;
case ISD::SETUGE: Opc = ISD::UMAX; MinMax = true; break;
}
- if (MinMax) { Swap = false; Invert = false; FlipSigns = false; }
+ if (MinMax)
+ Swap = Invert = FlipSigns = false;
}
- bool hasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
- if (!MinMax && hasSubus) {
+ bool HasSubus = Subtarget.hasSSE2() && (VET == MVT::i8 || VET == MVT::i16);
+ bool Subus = false;
+ if (!MinMax && HasSubus) {
// As another special case, use PSUBUS[BW] when it's profitable. E.g. for
// Op0 u<= Op1:
// t = psubus Op0, Op1
// pcmpeq t, <0..0>
- switch (SetCCOpcode) {
+ switch (Cond) {
default: break;
case ISD::SETULT: {
// If the comparison is against a constant we can turn this into a
diff --git a/lib/Transforms/Coroutines/CoroSplit.cpp b/lib/Transforms/Coroutines/CoroSplit.cpp
index 613b4a7f03e9..626a891f65c6 100644
--- a/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -228,7 +228,7 @@ static Function *createClone(Function &F, Twine Suffix, coro::Shape &Shape,
SmallVector<ReturnInst *, 4> Returns;
- CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/false, Returns);
+ CloneFunctionInto(NewF, &F, VMap, /*ModuleLevelChanges=*/true, Returns);
// Remove old returns.
for (ReturnInst *Return : Returns)
diff --git a/lib/Transforms/Coroutines/Coroutines.cpp b/lib/Transforms/Coroutines/Coroutines.cpp
index ea48043f9381..44e1f9b404ed 100644
--- a/lib/Transforms/Coroutines/Coroutines.cpp
+++ b/lib/Transforms/Coroutines/Coroutines.cpp
@@ -218,6 +218,8 @@ void coro::Shape::buildFrom(Function &F) {
size_t FinalSuspendIndex = 0;
clear(*this);
SmallVector<CoroFrameInst *, 8> CoroFrames;
+ SmallVector<CoroSaveInst *, 2> UnusedCoroSaves;
+
for (Instruction &I : instructions(F)) {
if (auto II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
@@ -229,6 +231,12 @@ void coro::Shape::buildFrom(Function &F) {
case Intrinsic::coro_frame:
CoroFrames.push_back(cast<CoroFrameInst>(II));
break;
+ case Intrinsic::coro_save:
+ // After optimizations, coro_suspends using this coro_save might have
+ // been removed, remember orphaned coro_saves to remove them later.
+ if (II->use_empty())
+ UnusedCoroSaves.push_back(cast<CoroSaveInst>(II));
+ break;
case Intrinsic::coro_suspend:
CoroSuspends.push_back(cast<CoroSuspendInst>(II));
if (CoroSuspends.back()->isFinal()) {
@@ -311,4 +319,8 @@ void coro::Shape::buildFrom(Function &F) {
if (HasFinalSuspend &&
FinalSuspendIndex != CoroSuspends.size() - 1)
std::swap(CoroSuspends[FinalSuspendIndex], CoroSuspends.back());
+
+ // Remove orphaned coro.saves.
+ for (CoroSaveInst *CoroSave : UnusedCoroSaves)
+ CoroSave->eraseFromParent();
}
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 28cc81c76d4f..5cc29a493798 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -1188,6 +1188,10 @@ static bool runImpl(CallGraphSCC &SCC, AARGetterT AARGetter) {
SCCNodes.insert(F);
}
+ // Skip it if the SCC only contains optnone functions.
+ if (SCCNodes.empty())
+ return Changed;
+
Changed |= addArgumentReturnedAttrs(SCCNodes);
Changed |= addReadAttrs(SCCNodes, AARGetter);
Changed |= addArgumentAttrs(SCCNodes);
diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp
index 231487923fad..6d34ab8b0d96 100644
--- a/lib/Transforms/IPO/FunctionImport.cpp
+++ b/lib/Transforms/IPO/FunctionImport.cpp
@@ -292,8 +292,7 @@ static void computeImportForFunction(
static void ComputeImportForModule(
const GVSummaryMapTy &DefinedGVSummaries, const ModuleSummaryIndex &Index,
FunctionImporter::ImportMapTy &ImportList,
- StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr,
- const DenseSet<GlobalValue::GUID> *DeadSymbols = nullptr) {
+ StringMap<FunctionImporter::ExportSetTy> *ExportLists = nullptr) {
// Worklist contains the list of function imported in this module, for which
// we will analyse the callees and may import further down the callgraph.
SmallVector<EdgeInfo, 128> Worklist;
@@ -301,7 +300,7 @@ static void ComputeImportForModule(
// Populate the worklist with the import for the functions in the current
// module
for (auto &GVSummary : DefinedGVSummaries) {
- if (DeadSymbols && DeadSymbols->count(GVSummary.first)) {
+ if (!Index.isGlobalValueLive(GVSummary.second)) {
DEBUG(dbgs() << "Ignores Dead GUID: " << GVSummary.first << "\n");
continue;
}
@@ -344,15 +343,14 @@ void llvm::ComputeCrossModuleImport(
const ModuleSummaryIndex &Index,
const StringMap<GVSummaryMapTy> &ModuleToDefinedGVSummaries,
StringMap<FunctionImporter::ImportMapTy> &ImportLists,
- StringMap<FunctionImporter::ExportSetTy> &ExportLists,
- const DenseSet<GlobalValue::GUID> *DeadSymbols) {
+ StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
// For each module that has function defined, compute the import/export lists.
for (auto &DefinedGVSummaries : ModuleToDefinedGVSummaries) {
auto &ImportList = ImportLists[DefinedGVSummaries.first()];
DEBUG(dbgs() << "Computing import for Module '"
<< DefinedGVSummaries.first() << "'\n");
ComputeImportForModule(DefinedGVSummaries.second, Index, ImportList,
- &ExportLists, DeadSymbols);
+ &ExportLists);
}
// When computing imports we added all GUIDs referenced by anything
@@ -414,82 +412,71 @@ void llvm::ComputeCrossModuleImportForModule(
#endif
}
-DenseSet<GlobalValue::GUID> llvm::computeDeadSymbols(
- const ModuleSummaryIndex &Index,
+void llvm::computeDeadSymbols(
+ ModuleSummaryIndex &Index,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ assert(!Index.withGlobalValueDeadStripping());
if (!ComputeDead)
- return DenseSet<GlobalValue::GUID>();
+ return;
if (GUIDPreservedSymbols.empty())
// Don't do anything when nothing is live, this is friendly with tests.
- return DenseSet<GlobalValue::GUID>();
- DenseSet<ValueInfo> LiveSymbols;
+ return;
+ unsigned LiveSymbols = 0;
SmallVector<ValueInfo, 128> Worklist;
Worklist.reserve(GUIDPreservedSymbols.size() * 2);
for (auto GUID : GUIDPreservedSymbols) {
ValueInfo VI = Index.getValueInfo(GUID);
if (!VI)
continue;
- DEBUG(dbgs() << "Live root: " << VI.getGUID() << "\n");
- LiveSymbols.insert(VI);
- Worklist.push_back(VI);
+ for (auto &S : VI.getSummaryList())
+ S->setLive(true);
}
+
// Add values flagged in the index as live roots to the worklist.
- for (const auto &Entry : Index) {
- bool IsLiveRoot = llvm::any_of(
- Entry.second.SummaryList,
- [&](const std::unique_ptr<llvm::GlobalValueSummary> &Summary) {
- return Summary->liveRoot();
- });
- if (!IsLiveRoot)
- continue;
- DEBUG(dbgs() << "Live root (summary): " << Entry.first << "\n");
- Worklist.push_back(ValueInfo(&Entry));
- }
+ for (const auto &Entry : Index)
+ for (auto &S : Entry.second.SummaryList)
+ if (S->isLive()) {
+ DEBUG(dbgs() << "Live root: " << Entry.first << "\n");
+ Worklist.push_back(ValueInfo(&Entry));
+ ++LiveSymbols;
+ break;
+ }
+
+ // Make value live and add it to the worklist if it was not live before.
+ // FIXME: we should only make the prevailing copy live here
+ auto visit = [&](ValueInfo VI) {
+ for (auto &S : VI.getSummaryList())
+ if (S->isLive())
+ return;
+ for (auto &S : VI.getSummaryList())
+ S->setLive(true);
+ ++LiveSymbols;
+ Worklist.push_back(VI);
+ };
while (!Worklist.empty()) {
auto VI = Worklist.pop_back_val();
-
- // FIXME: we should only make the prevailing copy live here
for (auto &Summary : VI.getSummaryList()) {
- for (auto Ref : Summary->refs()) {
- if (LiveSymbols.insert(Ref).second) {
- DEBUG(dbgs() << "Marking live (ref): " << Ref.getGUID() << "\n");
- Worklist.push_back(Ref);
- }
- }
- if (auto *FS = dyn_cast<FunctionSummary>(Summary.get())) {
- for (auto Call : FS->calls()) {
- if (LiveSymbols.insert(Call.first).second) {
- DEBUG(dbgs() << "Marking live (call): " << Call.first.getGUID()
- << "\n");
- Worklist.push_back(Call.first);
- }
- }
- }
+ for (auto Ref : Summary->refs())
+ visit(Ref);
+ if (auto *FS = dyn_cast<FunctionSummary>(Summary.get()))
+ for (auto Call : FS->calls())
+ visit(Call.first);
if (auto *AS = dyn_cast<AliasSummary>(Summary.get())) {
auto AliaseeGUID = AS->getAliasee().getOriginalName();
ValueInfo AliaseeVI = Index.getValueInfo(AliaseeGUID);
- if (AliaseeVI && LiveSymbols.insert(AliaseeVI).second) {
- DEBUG(dbgs() << "Marking live (alias): " << AliaseeGUID << "\n");
- Worklist.push_back(AliaseeVI);
- }
+ if (AliaseeVI)
+ visit(AliaseeVI);
}
}
}
- DenseSet<GlobalValue::GUID> DeadSymbols;
- DeadSymbols.reserve(
- std::min(Index.size(), Index.size() - LiveSymbols.size()));
- for (auto &Entry : Index) {
- if (!LiveSymbols.count(ValueInfo(&Entry))) {
- DEBUG(dbgs() << "Marking dead: " << Entry.first << "\n");
- DeadSymbols.insert(Entry.first);
- }
- }
- DEBUG(dbgs() << LiveSymbols.size() << " symbols Live, and "
- << DeadSymbols.size() << " symbols Dead \n");
- NumDeadSymbols += DeadSymbols.size();
- NumLiveSymbols += LiveSymbols.size();
- return DeadSymbols;
+ Index.setWithGlobalValueDeadStripping();
+
+ unsigned DeadSymbols = Index.size() - LiveSymbols;
+ DEBUG(dbgs() << LiveSymbols << " symbols Live, and " << DeadSymbols
+ << " symbols Dead \n");
+ NumDeadSymbols += DeadSymbols;
+ NumLiveSymbols += LiveSymbols;
}
/// Compute the set of summaries needed for a ThinLTO backend compilation of
diff --git a/lib/Transforms/IPO/LowerTypeTests.cpp b/lib/Transforms/IPO/LowerTypeTests.cpp
index ca4ee92f971a..7bec50d9d25f 100644
--- a/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -1442,9 +1442,8 @@ bool LowerTypeTestsModule::lower() {
for (auto &P : *ExportSummary) {
for (auto &S : P.second.SummaryList) {
auto *FS = dyn_cast<FunctionSummary>(S.get());
- if (!FS)
+ if (!FS || !ExportSummary->isGlobalValueLive(FS))
continue;
- // FIXME: Only add live functions.
for (GlobalValue::GUID G : FS->type_tests())
for (Metadata *MD : MetadataByGUID[G])
AddTypeIdUse(MD).IsExported = true;
diff --git a/lib/Transforms/IPO/PartialInlining.cpp b/lib/Transforms/IPO/PartialInlining.cpp
index bc0967448cdd..ea805efc66b7 100644
--- a/lib/Transforms/IPO/PartialInlining.cpp
+++ b/lib/Transforms/IPO/PartialInlining.cpp
@@ -68,6 +68,10 @@ static cl::opt<int>
cl::desc("Relative frequency of outline region to "
"the entry block"));
+static cl::opt<unsigned> ExtraOutliningPenalty(
+ "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
+ cl::desc("A debug option to add additional penalty to the computed one."));
+
namespace {
struct FunctionOutliningInfo {
@@ -83,7 +87,7 @@ struct FunctionOutliningInfo {
SmallVector<BasicBlock *, 4> Entries;
// The return block that is not included in the outlined region.
BasicBlock *ReturnBlock;
- // The dominating block of the region ot be outlined.
+ // The dominating block of the region to be outlined.
BasicBlock *NonReturnBlock;
// The set of blocks in Entries that that are predecessors to ReturnBlock
SmallVector<BasicBlock *, 4> ReturnBlockPreds;
@@ -407,11 +411,23 @@ BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
if (hasProfileData(F, OI))
return OutlineRegionRelFreq;
- // When profile data is not available, we need to be very
- // conservative in estimating the overall savings. We need to make sure
- // the outline region relative frequency is not below the threshold
- // specified by the option.
- OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
+ // When profile data is not available, we need to be conservative in
+ // estimating the overall savings. Static branch prediction can usually
+ // guess the branch direction right (taken/non-taken), but the guessed
+ // branch probability is usually not biased enough. In case when the
+ // outlined region is predicted to be likely, its probability needs
+ // to be made higher (more biased) to not under-estimate the cost of
+ // function outlining. On the other hand, if the outlined region
+ // is predicted to be less likely, the predicted probablity is usually
+ // higher than the actual. For instance, the actual probability of the
+ // less likely target is only 5%, but the guessed probablity can be
+ // 40%. In the latter case, there is no need for further adjustement.
+ // FIXME: add an option for this.
+ if (OutlineRegionRelFreq < BranchProbability(45, 100))
+ return OutlineRegionRelFreq;
+
+ OutlineRegionRelFreq = std::max(
+ OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
return OutlineRegionRelFreq;
}
@@ -496,6 +512,26 @@ int PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB) {
if (isa<DbgInfoIntrinsic>(I))
continue;
+ switch (I->getOpcode()) {
+ case Instruction::BitCast:
+ case Instruction::PtrToInt:
+ case Instruction::IntToPtr:
+ case Instruction::Alloca:
+ continue;
+ case Instruction::GetElementPtr:
+ if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+ continue;
+ default:
+ break;
+ }
+
+ IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
+ if (IntrInst) {
+ if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
+ IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+ continue;
+ }
+
if (CallInst *CI = dyn_cast<CallInst>(I)) {
InlineCost += getCallsiteCost(CallSite(CI), DL);
continue;
@@ -519,7 +555,13 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
BasicBlock *OutliningCallBB) {
// First compute the cost of the outlined region 'OI' in the original
- // function 'F':
+ // function 'F'.
+ // FIXME: The code extractor (outliner) can now do code sinking/hoisting
+ // to reduce outlining cost. The hoisted/sunk code currently do not
+ // incur any runtime cost so it is still OK to compare the outlined
+ // function cost with the outlined region in the original function.
+ // If this ever changes, we will need to introduce new extractor api
+ // to pass the information.
int OutlinedRegionCost = 0;
for (BasicBlock &BB : *F) {
if (&BB != OI->ReturnBlock &&
@@ -542,8 +584,14 @@ std::tuple<int, int, int> PartialInlinerImpl::computeOutliningCosts(
assert(OutlinedFunctionCost >= OutlinedRegionCost &&
"Outlined function cost should be no less than the outlined region");
- int OutliningRuntimeOverhead =
- OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
+ // The code extractor introduces a new root and exit stub blocks with
+ // additional unconditional branches. Those branches will be eliminated
+ // later with bb layout. The cost should be adjusted accordingly:
+ OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
+
+ int OutliningRuntimeOverhead = OutliningFuncCallCost +
+ (OutlinedFunctionCost - OutlinedRegionCost) +
+ ExtraOutliningPenalty;
return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
OutlinedRegionCost);
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 9fd3a9021a27..16fba32e9805 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -157,7 +157,7 @@ static cl::opt<bool>
static cl::opt<bool> EnableGVNSink(
"enable-gvn-sink", cl::init(false), cl::Hidden,
- cl::desc("Enable the GVN sinking pass (default = on)"));
+ cl::desc("Enable the GVN sinking pass (default = off)"));
PassManagerBuilder::PassManagerBuilder() {
OptLevel = 2;
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 2c2b7317a1c0..c0798e164c39 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4508,13 +4508,16 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
Builder->CreateAnd(A, B),
Op1);
- // ~x < ~y --> y < x
- // ~x < cst --> ~cst < x
+ // ~X < ~Y --> Y < X
+ // ~X < C --> X > ~C
if (match(Op0, m_Not(m_Value(A)))) {
if (match(Op1, m_Not(m_Value(B))))
return new ICmpInst(I.getPredicate(), B, A);
- if (ConstantInt *RHSC = dyn_cast<ConstantInt>(Op1))
- return new ICmpInst(I.getPredicate(), ConstantExpr::getNot(RHSC), A);
+
+ const APInt *C;
+ if (match(Op1, m_APInt(C)))
+ return new ICmpInst(I.getSwappedPredicate(), A,
+ ConstantInt::get(Op1->getType(), ~(*C)));
}
Instruction *AddI = nullptr;
diff --git a/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index ff753c20a94a..df4ee9969c02 100644
--- a/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2087,6 +2087,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
switch (I.getNumArgOperands()) {
case 3:
assert(isa<ConstantInt>(I.getArgOperand(2)) && "Invalid rounding mode");
+ LLVM_FALLTHROUGH;
case 2:
CopyOp = I.getArgOperand(0);
ConvertOp = I.getArgOperand(1);
diff --git a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
index 325b64cd8b43..8aa40d1759de 100644
--- a/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
+++ b/lib/Transforms/Instrumentation/SanitizerCoverage.cpp
@@ -57,6 +57,11 @@ static const char *const SanCovTracePCGuardName =
"__sanitizer_cov_trace_pc_guard";
static const char *const SanCovTracePCGuardInitName =
"__sanitizer_cov_trace_pc_guard_init";
+static const char *const SanCov8bitCountersInitName =
+ "__sanitizer_cov_8bit_counters_init";
+
+static const char *const SanCovGuardsSectionName = "sancov_guards";
+static const char *const SanCovCountersSectionName = "sancov_counters";
static cl::opt<int> ClCoverageLevel(
"sanitizer-coverage-level",
@@ -64,14 +69,18 @@ static cl::opt<int> ClCoverageLevel(
"3: all blocks and critical edges"),
cl::Hidden, cl::init(0));
-static cl::opt<bool> ClExperimentalTracePC("sanitizer-coverage-trace-pc",
- cl::desc("Experimental pc tracing"),
- cl::Hidden, cl::init(false));
+static cl::opt<bool> ClTracePC("sanitizer-coverage-trace-pc",
+ cl::desc("Experimental pc tracing"), cl::Hidden,
+ cl::init(false));
static cl::opt<bool> ClTracePCGuard("sanitizer-coverage-trace-pc-guard",
cl::desc("pc tracing with a guard"),
cl::Hidden, cl::init(false));
+static cl::opt<bool> ClInline8bitCounters("sanitizer-coverage-inline-8bit-counters",
+ cl::desc("increments 8-bit counter for every edge"),
+ cl::Hidden, cl::init(false));
+
static cl::opt<bool>
ClCMPTracing("sanitizer-coverage-trace-compares",
cl::desc("Tracing of CMP and similar instructions"),
@@ -123,9 +132,10 @@ SanitizerCoverageOptions OverrideFromCL(SanitizerCoverageOptions Options) {
Options.TraceCmp |= ClCMPTracing;
Options.TraceDiv |= ClDIVTracing;
Options.TraceGep |= ClGEPTracing;
- Options.TracePC |= ClExperimentalTracePC;
+ Options.TracePC |= ClTracePC;
Options.TracePCGuard |= ClTracePCGuard;
- if (!Options.TracePCGuard && !Options.TracePC)
+ Options.Inline8bitCounters |= ClInline8bitCounters;
+ if (!Options.TracePCGuard && !Options.TracePC && !Options.Inline8bitCounters)
Options.TracePCGuard = true; // TracePCGuard is default.
Options.NoPrune |= !ClPruneBlocks;
return Options;
@@ -159,11 +169,22 @@ private:
void InjectTraceForSwitch(Function &F,
ArrayRef<Instruction *> SwitchTraceTargets);
bool InjectCoverage(Function &F, ArrayRef<BasicBlock *> AllBlocks);
- void CreateFunctionGuardArray(size_t NumGuards, Function &F);
+ GlobalVariable *CreateFunctionLocalArrayInSection(size_t NumElements,
+ Function &F, Type *Ty,
+ const char *Section);
+ void CreateFunctionLocalArrays(size_t NumGuards, Function &F);
void InjectCoverageAtBlock(Function &F, BasicBlock &BB, size_t Idx);
- StringRef getSanCovTracePCGuardSection() const;
- StringRef getSanCovTracePCGuardSectionStart() const;
- StringRef getSanCovTracePCGuardSectionEnd() const;
+ void CreateInitCallForSection(Module &M, const char *InitFunctionName,
+ Type *Ty, const std::string &Section);
+
+ void SetNoSanitizeMetadata(Instruction *I) {
+ I->setMetadata(I->getModule()->getMDKindID("nosanitize"),
+ MDNode::get(*C, None));
+ }
+
+ std::string getSectionName(const std::string &Section) const;
+ std::string getSectionStart(const std::string &Section) const;
+ std::string getSectionEnd(const std::string &Section) const;
Function *SanCovTracePCIndir;
Function *SanCovTracePC, *SanCovTracePCGuard;
Function *SanCovTraceCmpFunction[4];
@@ -171,20 +192,48 @@ private:
Function *SanCovTraceGepFunction;
Function *SanCovTraceSwitchFunction;
InlineAsm *EmptyAsm;
- Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy;
+ Type *IntptrTy, *IntptrPtrTy, *Int64Ty, *Int64PtrTy, *Int32Ty, *Int32PtrTy,
+ *Int8Ty, *Int8PtrTy;
Module *CurModule;
Triple TargetTriple;
LLVMContext *C;
const DataLayout *DL;
GlobalVariable *FunctionGuardArray; // for trace-pc-guard.
- bool HasSancovGuardsSection;
+ GlobalVariable *Function8bitCounterArray; // for inline-8bit-counters.
SanitizerCoverageOptions Options;
};
} // namespace
+void SanitizerCoverageModule::CreateInitCallForSection(
+ Module &M, const char *InitFunctionName, Type *Ty,
+ const std::string &Section) {
+ IRBuilder<> IRB(M.getContext());
+ Function *CtorFunc;
+ GlobalVariable *SecStart =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage, nullptr,
+ getSectionStart(Section));
+ SecStart->setVisibility(GlobalValue::HiddenVisibility);
+ GlobalVariable *SecEnd =
+ new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
+ nullptr, getSectionEnd(Section));
+ SecEnd->setVisibility(GlobalValue::HiddenVisibility);
+
+ std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
+ M, SanCovModuleCtorName, InitFunctionName, {Ty, Ty},
+ {IRB.CreatePointerCast(SecStart, Ty), IRB.CreatePointerCast(SecEnd, Ty)});
+
+ if (TargetTriple.supportsCOMDAT()) {
+ // Use comdat to dedup CtorFunc.
+ CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
+ } else {
+ appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
+ }
+}
+
bool SanitizerCoverageModule::runOnModule(Module &M) {
if (Options.CoverageType == SanitizerCoverageOptions::SCK_None)
return false;
@@ -192,15 +241,18 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
DL = &M.getDataLayout();
CurModule = &M;
TargetTriple = Triple(M.getTargetTriple());
- HasSancovGuardsSection = false;
+ FunctionGuardArray = nullptr;
+ Function8bitCounterArray = nullptr;
IntptrTy = Type::getIntNTy(*C, DL->getPointerSizeInBits());
IntptrPtrTy = PointerType::getUnqual(IntptrTy);
Type *VoidTy = Type::getVoidTy(*C);
IRBuilder<> IRB(*C);
Int64PtrTy = PointerType::getUnqual(IRB.getInt64Ty());
Int32PtrTy = PointerType::getUnqual(IRB.getInt32Ty());
+ Int8PtrTy = PointerType::getUnqual(IRB.getInt8Ty());
Int64Ty = IRB.getInt64Ty();
Int32Ty = IRB.getInt32Ty();
+ Int8Ty = IRB.getInt8Ty();
SanCovTracePCIndir = checkSanitizerInterfaceFunction(
M.getOrInsertFunction(SanCovTracePCIndirName, VoidTy, IntptrTy));
@@ -243,34 +295,13 @@ bool SanitizerCoverageModule::runOnModule(Module &M) {
for (auto &F : M)
runOnFunction(F);
- // Create variable for module (compilation unit) name
- if (Options.TracePCGuard) {
- if (HasSancovGuardsSection) {
- Function *CtorFunc;
- GlobalVariable *SecStart = new GlobalVariable(
- M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr,
- getSanCovTracePCGuardSectionStart());
- SecStart->setVisibility(GlobalValue::HiddenVisibility);
- GlobalVariable *SecEnd = new GlobalVariable(
- M, Int32PtrTy, false, GlobalVariable::ExternalLinkage, nullptr,
- getSanCovTracePCGuardSectionEnd());
- SecEnd->setVisibility(GlobalValue::HiddenVisibility);
-
- std::tie(CtorFunc, std::ignore) = createSanitizerCtorAndInitFunctions(
- M, SanCovModuleCtorName, SanCovTracePCGuardInitName,
- {Int32PtrTy, Int32PtrTy},
- {IRB.CreatePointerCast(SecStart, Int32PtrTy),
- IRB.CreatePointerCast(SecEnd, Int32PtrTy)});
-
- if (TargetTriple.supportsCOMDAT()) {
- // Use comdat to dedup CtorFunc.
- CtorFunc->setComdat(M.getOrInsertComdat(SanCovModuleCtorName));
- appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority, CtorFunc);
- } else {
- appendToGlobalCtors(M, CtorFunc, SanCtorAndDtorPriority);
- }
- }
- }
+ if (FunctionGuardArray)
+ CreateInitCallForSection(M, SanCovTracePCGuardInitName, Int32PtrTy,
+ SanCovGuardsSectionName);
+ if (Function8bitCounterArray)
+ CreateInitCallForSection(M, SanCov8bitCountersInitName, Int8PtrTy,
+ SanCovCountersSectionName);
+
return true;
}
@@ -393,17 +424,26 @@ bool SanitizerCoverageModule::runOnFunction(Function &F) {
InjectTraceForGep(F, GepTraceTargets);
return true;
}
-void SanitizerCoverageModule::CreateFunctionGuardArray(size_t NumGuards,
- Function &F) {
- if (!Options.TracePCGuard) return;
- HasSancovGuardsSection = true;
- ArrayType *ArrayOfInt32Ty = ArrayType::get(Int32Ty, NumGuards);
- FunctionGuardArray = new GlobalVariable(
- *CurModule, ArrayOfInt32Ty, false, GlobalVariable::PrivateLinkage,
- Constant::getNullValue(ArrayOfInt32Ty), "__sancov_gen_");
+
+GlobalVariable *SanitizerCoverageModule::CreateFunctionLocalArrayInSection(
+ size_t NumElements, Function &F, Type *Ty, const char *Section) {
+ ArrayType *ArrayTy = ArrayType::get(Ty, NumElements);
+ auto Array = new GlobalVariable(
+ *CurModule, ArrayTy, false, GlobalVariable::PrivateLinkage,
+ Constant::getNullValue(ArrayTy), "__sancov_gen_");
if (auto Comdat = F.getComdat())
- FunctionGuardArray->setComdat(Comdat);
- FunctionGuardArray->setSection(getSanCovTracePCGuardSection());
+ Array->setComdat(Comdat);
+ Array->setSection(getSectionName(Section));
+ return Array;
+}
+void SanitizerCoverageModule::CreateFunctionLocalArrays(size_t NumGuards,
+ Function &F) {
+ if (Options.TracePCGuard)
+ FunctionGuardArray = CreateFunctionLocalArrayInSection(
+ NumGuards, F, Int32Ty, SanCovGuardsSectionName);
+ if (Options.Inline8bitCounters)
+ Function8bitCounterArray = CreateFunctionLocalArrayInSection(
+ NumGuards, F, Int8Ty, SanCovCountersSectionName);
}
bool SanitizerCoverageModule::InjectCoverage(Function &F,
@@ -413,11 +453,11 @@ bool SanitizerCoverageModule::InjectCoverage(Function &F,
case SanitizerCoverageOptions::SCK_None:
return false;
case SanitizerCoverageOptions::SCK_Function:
- CreateFunctionGuardArray(1, F);
+ CreateFunctionLocalArrays(1, F);
InjectCoverageAtBlock(F, F.getEntryBlock(), 0);
return true;
default: {
- CreateFunctionGuardArray(AllBlocks.size(), F);
+ CreateFunctionLocalArrays(AllBlocks.size(), F);
for (size_t i = 0, N = AllBlocks.size(); i < N; i++)
InjectCoverageAtBlock(F, *AllBlocks[i], i);
return true;
@@ -436,7 +476,7 @@ void SanitizerCoverageModule::InjectCoverageForIndirectCalls(
Function &F, ArrayRef<Instruction *> IndirCalls) {
if (IndirCalls.empty())
return;
- assert(Options.TracePC || Options.TracePCGuard);
+ assert(Options.TracePC || Options.TracePCGuard || Options.Inline8bitCounters);
for (auto I : IndirCalls) {
IRBuilder<> IRB(I);
CallSite CS(I);
@@ -564,8 +604,8 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
if (Options.TracePC) {
IRB.CreateCall(SanCovTracePC); // gets the PC using GET_CALLER_PC.
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
- } else {
- assert(Options.TracePCGuard);
+ }
+ if (Options.TracePCGuard) {
auto GuardPtr = IRB.CreateIntToPtr(
IRB.CreateAdd(IRB.CreatePointerCast(FunctionGuardArray, IntptrTy),
ConstantInt::get(IntptrTy, Idx * 4)),
@@ -573,26 +613,39 @@ void SanitizerCoverageModule::InjectCoverageAtBlock(Function &F, BasicBlock &BB,
IRB.CreateCall(SanCovTracePCGuard, GuardPtr);
IRB.CreateCall(EmptyAsm, {}); // Avoids callback merge.
}
+ if (Options.Inline8bitCounters) {
+ auto CounterPtr = IRB.CreateGEP(
+ Function8bitCounterArray,
+ {ConstantInt::get(IntptrTy, 0), ConstantInt::get(IntptrTy, Idx)});
+ auto Load = IRB.CreateLoad(CounterPtr);
+ auto Inc = IRB.CreateAdd(Load, ConstantInt::get(Int8Ty, 1));
+ auto Store = IRB.CreateStore(Inc, CounterPtr);
+ SetNoSanitizeMetadata(Load);
+ SetNoSanitizeMetadata(Store);
+ }
}
-StringRef SanitizerCoverageModule::getSanCovTracePCGuardSection() const {
+std::string
+SanitizerCoverageModule::getSectionName(const std::string &Section) const {
if (TargetTriple.getObjectFormat() == Triple::COFF)
return ".SCOV$M";
if (TargetTriple.isOSBinFormatMachO())
- return "__DATA,__sancov_guards";
- return "__sancov_guards";
+ return "__DATA,__" + Section;
+ return "__" + Section;
}
-StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionStart() const {
+std::string
+SanitizerCoverageModule::getSectionStart(const std::string &Section) const {
if (TargetTriple.isOSBinFormatMachO())
- return "\1section$start$__DATA$__sancov_guards";
- return "__start___sancov_guards";
+ return "\1section$start$__DATA$__" + Section;
+ return "__start___" + Section;
}
-StringRef SanitizerCoverageModule::getSanCovTracePCGuardSectionEnd() const {
+std::string
+SanitizerCoverageModule::getSectionEnd(const std::string &Section) const {
if (TargetTriple.isOSBinFormatMachO())
- return "\1section$end$__DATA$__sancov_guards";
- return "__stop___sancov_guards";
+ return "\1section$end$__DATA$__" + Section;
+ return "__stop___" + Section;
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 3953198fe605..9a7882211bac 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1823,6 +1823,7 @@ static PHINode *getLoopPhiForCounter(Value *IncV, Loop *L, DominatorTree *DT) {
// An IV counter must preserve its type.
if (IncI->getNumOperands() == 2)
break;
+ LLVM_FALLTHROUGH;
default:
return nullptr;
}
diff --git a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
index 930696b036c0..7d8da9b453f9 100644
--- a/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
+++ b/lib/Transforms/Scalar/LowerExpectIntrinsic.cpp
@@ -14,6 +14,7 @@
#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
@@ -83,6 +84,149 @@ static bool handleSwitchExpect(SwitchInst &SI) {
return true;
}
+/// Handler for PHINodes that define the value argument to an
+/// @llvm.expect call.
+///
+/// If the operand of the phi has a constant value and it 'contradicts'
+/// with the expected value of phi def, then the corresponding incoming
+/// edge of the phi is unlikely to be taken. Using that information,
+/// the branch probability info for the originating branch can be inferred.
+static void handlePhiDef(CallInst *Expect) {
+ Value &Arg = *Expect->getArgOperand(0);
+ ConstantInt *ExpectedValue = cast<ConstantInt>(Expect->getArgOperand(1));
+ const APInt &ExpectedPhiValue = ExpectedValue->getValue();
+
+ // Walk up in backward a list of instructions that
+ // have 'copy' semantics by 'stripping' the copies
+ // until a PHI node or an instruction of unknown kind
+ // is reached. Negation via xor is also handled.
+ //
+ // C = PHI(...);
+ // B = C;
+ // A = B;
+ // D = __builtin_expect(A, 0);
+ //
+ Value *V = &Arg;
+ SmallVector<Instruction *, 4> Operations;
+ while (!isa<PHINode>(V)) {
+ if (ZExtInst *ZExt = dyn_cast<ZExtInst>(V)) {
+ V = ZExt->getOperand(0);
+ Operations.push_back(ZExt);
+ continue;
+ }
+
+ if (SExtInst *SExt = dyn_cast<SExtInst>(V)) {
+ V = SExt->getOperand(0);
+ Operations.push_back(SExt);
+ continue;
+ }
+
+ BinaryOperator *BinOp = dyn_cast<BinaryOperator>(V);
+ if (!BinOp || BinOp->getOpcode() != Instruction::Xor)
+ return;
+
+ ConstantInt *CInt = dyn_cast<ConstantInt>(BinOp->getOperand(1));
+ if (!CInt)
+ return;
+
+ V = BinOp->getOperand(0);
+ Operations.push_back(BinOp);
+ }
+
+ // Executes the recorded operations on input 'Value'.
+ auto ApplyOperations = [&](const APInt &Value) {
+ APInt Result = Value;
+ for (auto Op : llvm::reverse(Operations)) {
+ switch (Op->getOpcode()) {
+ case Instruction::Xor:
+ Result ^= cast<ConstantInt>(Op->getOperand(1))->getValue();
+ break;
+ case Instruction::ZExt:
+ Result = Result.zext(Op->getType()->getIntegerBitWidth());
+ break;
+ case Instruction::SExt:
+ Result = Result.sext(Op->getType()->getIntegerBitWidth());
+ break;
+ default:
+ llvm_unreachable("Unexpected operation");
+ }
+ }
+ return Result;
+ };
+
+ auto *PhiDef = dyn_cast<PHINode>(V);
+
+ // Get the first dominating conditional branch of the operand
+ // i's incoming block.
+ auto GetDomConditional = [&](unsigned i) -> BranchInst * {
+ BasicBlock *BB = PhiDef->getIncomingBlock(i);
+ BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (BI && BI->isConditional())
+ return BI;
+ BB = BB->getSinglePredecessor();
+ if (!BB)
+ return nullptr;
+ BI = dyn_cast<BranchInst>(BB->getTerminator());
+ if (!BI || BI->isUnconditional())
+ return nullptr;
+ return BI;
+ };
+
+ // Now walk through all Phi operands to find phi oprerands with values
+ // conflicting with the expected phi output value. Any such operand
+ // indicates the incoming edge to that operand is unlikely.
+ for (unsigned i = 0, e = PhiDef->getNumIncomingValues(); i != e; ++i) {
+
+ Value *PhiOpnd = PhiDef->getIncomingValue(i);
+ ConstantInt *CI = dyn_cast<ConstantInt>(PhiOpnd);
+ if (!CI)
+ continue;
+
+ // Not an interesting case when IsUnlikely is false -- we can not infer
+ // anything useful when the operand value matches the expected phi
+ // output.
+ if (ExpectedPhiValue == ApplyOperations(CI->getValue()))
+ continue;
+
+ BranchInst *BI = GetDomConditional(i);
+ if (!BI)
+ continue;
+
+ MDBuilder MDB(PhiDef->getContext());
+
+ // There are two situations in which an operand of the PhiDef comes
+ // from a given successor of a branch instruction BI.
+ // 1) When the incoming block of the operand is the successor block;
+ // 2) When the incoming block is BI's enclosing block and the
+ // successor is the PhiDef's enclosing block.
+ //
+ // Returns true if the operand which comes from OpndIncomingBB
+ // comes from outgoing edge of BI that leads to Succ block.
+ auto *OpndIncomingBB = PhiDef->getIncomingBlock(i);
+ auto IsOpndComingFromSuccessor = [&](BasicBlock *Succ) {
+ if (OpndIncomingBB == Succ)
+ // If this successor is the incoming block for this
+ // Phi operand, then this successor does lead to the Phi.
+ return true;
+ if (OpndIncomingBB == BI->getParent() && Succ == PhiDef->getParent())
+ // Otherwise, if the edge is directly from the branch
+ // to the Phi, this successor is the one feeding this
+ // Phi operand.
+ return true;
+ return false;
+ };
+
+ if (IsOpndComingFromSuccessor(BI->getSuccessor(1)))
+ BI->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights(LikelyBranchWeight, UnlikelyBranchWeight));
+ else if (IsOpndComingFromSuccessor(BI->getSuccessor(0)))
+ BI->setMetadata(
+ LLVMContext::MD_prof,
+ MDB.createBranchWeights(UnlikelyBranchWeight, LikelyBranchWeight));
+ }
+}
+
// Handle both BranchInst and SelectInst.
template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
@@ -99,25 +243,31 @@ template <class BrSelInst> static bool handleBrSelExpect(BrSelInst &BSI) {
ICmpInst *CmpI = dyn_cast<ICmpInst>(BSI.getCondition());
CmpInst::Predicate Predicate;
- uint64_t ValueComparedTo = 0;
+ ConstantInt *CmpConstOperand = nullptr;
if (!CmpI) {
CI = dyn_cast<CallInst>(BSI.getCondition());
Predicate = CmpInst::ICMP_NE;
- ValueComparedTo = 0;
} else {
Predicate = CmpI->getPredicate();
if (Predicate != CmpInst::ICMP_NE && Predicate != CmpInst::ICMP_EQ)
return false;
- ConstantInt *CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1));
+
+ CmpConstOperand = dyn_cast<ConstantInt>(CmpI->getOperand(1));
if (!CmpConstOperand)
return false;
- ValueComparedTo = CmpConstOperand->getZExtValue();
CI = dyn_cast<CallInst>(CmpI->getOperand(0));
}
if (!CI)
return false;
+ uint64_t ValueComparedTo = 0;
+ if (CmpConstOperand) {
+ if (CmpConstOperand->getBitWidth() > 64)
+ return false;
+ ValueComparedTo = CmpConstOperand->getZExtValue();
+ }
+
Function *Fn = CI->getCalledFunction();
if (!Fn || Fn->getIntrinsicID() != Intrinsic::expect)
return false;
@@ -181,6 +331,10 @@ static bool lowerExpectIntrinsic(Function &F) {
Function *Fn = CI->getCalledFunction();
if (Fn && Fn->getIntrinsicID() == Intrinsic::expect) {
+ // Before erasing the llvm.expect, walk backward to find
+ // phi that define llvm.expect's first arg, and
+ // infer branch probability:
+ handlePhiDef(CI);
Value *Exp = CI->getArgOperand(0);
CI->replaceAllUsesWith(Exp);
CI->eraseFromParent();
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index 77b2bd84f9b6..350b50ffcdd4 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -7,8 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// Rewrite an existing set of gc.statepoints such that they make potential
-// relocations performed by the garbage collector explicit in the IR.
+// Rewrite call/invoke instructions so as to make potential relocations
+// performed by the garbage collector explicit in the IR.
//
//===----------------------------------------------------------------------===//
@@ -2094,9 +2094,9 @@ static bool insertParsePoints(Function &F, DominatorTree &DT,
// live in the IR. We'll remove all of these when done.
SmallVector<CallInst *, 64> Holders;
- // Insert a dummy call with all of the arguments to the vm_state we'll need
- // for the actual safepoint insertion. This ensures reference arguments in
- // the deopt argument list are considered live through the safepoint (and
+ // Insert a dummy call with all of the deopt operands we'll need for the
+ // actual safepoint insertion as arguments. This ensures reference operands
+ // in the deopt argument list are considered live through the safepoint (and
// thus makes sure they get relocated.)
for (CallSite CS : ToUpdate) {
SmallVector<Value *, 64> DeoptValues;
diff --git a/lib/Transforms/Scalar/SROA.cpp b/lib/Transforms/Scalar/SROA.cpp
index 6e113bccff94..fb1b5813fd79 100644
--- a/lib/Transforms/Scalar/SROA.cpp
+++ b/lib/Transforms/Scalar/SROA.cpp
@@ -3698,7 +3698,8 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
int Idx = 0, Size = Offsets.Splits.size();
for (;;) {
auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
- auto *PartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
+ auto *LoadPartPtrTy = PartTy->getPointerTo(LI->getPointerAddressSpace());
+ auto *StorePartPtrTy = PartTy->getPointerTo(SI->getPointerAddressSpace());
// Either lookup a split load or create one.
LoadInst *PLoad;
@@ -3709,7 +3710,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
PLoad = IRB.CreateAlignedLoad(
getAdjustedPtr(IRB, DL, LoadBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, LoadBasePtr->getName() + "."),
+ LoadPartPtrTy, LoadBasePtr->getName() + "."),
getAdjustedAlignment(LI, PartOffset, DL), /*IsVolatile*/ false,
LI->getName());
}
@@ -3719,7 +3720,7 @@ bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
StoreInst *PStore = IRB.CreateAlignedStore(
PLoad, getAdjustedPtr(IRB, DL, StoreBasePtr,
APInt(DL.getPointerSizeInBits(), PartOffset),
- PartPtrTy, StoreBasePtr->getName() + "."),
+ StorePartPtrTy, StoreBasePtr->getName() + "."),
getAdjustedAlignment(SI, PartOffset, DL), /*IsVolatile*/ false);
// Now build a new slice for the alloca.
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 1ec3d0d49637..1c1a75c111e9 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -37,10 +37,10 @@
using namespace llvm;
/// See comments in Cloning.h.
-BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
- ValueToValueMapTy &VMap,
+BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap,
const Twine &NameSuffix, Function *F,
- ClonedCodeInfo *CodeInfo) {
+ ClonedCodeInfo *CodeInfo,
+ DebugInfoFinder *DIFinder) {
DenseMap<const MDNode *, MDNode *> Cache;
BasicBlock *NewBB = BasicBlock::Create(BB->getContext(), "", F);
if (BB->hasName()) NewBB->setName(BB->getName()+NameSuffix);
@@ -50,10 +50,11 @@ BasicBlock *llvm::CloneBasicBlock(const BasicBlock *BB,
// Loop over all instructions, and copy them over.
for (BasicBlock::const_iterator II = BB->begin(), IE = BB->end();
II != IE; ++II) {
+
+ if (DIFinder && F->getParent() && II->getDebugLoc())
+ DIFinder->processLocation(*F->getParent(), II->getDebugLoc().get());
+
Instruction *NewInst = II->clone();
- if (F && F->getSubprogram())
- DebugLoc::reparentDebugInfo(*NewInst, BB->getParent()->getSubprogram(),
- F->getSubprogram(), Cache);
if (II->hasName())
NewInst->setName(II->getName()+NameSuffix);
NewBB->getInstList().push_back(NewInst);
@@ -122,31 +123,38 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
AttributeList::get(NewFunc->getContext(), OldAttrs.getFnAttributes(),
OldAttrs.getRetAttributes(), NewArgAttrs));
+ bool MustCloneSP =
+ OldFunc->getParent() && OldFunc->getParent() == NewFunc->getParent();
+ DISubprogram *SP = OldFunc->getSubprogram();
+ if (SP) {
+ assert(!MustCloneSP || ModuleLevelChanges);
+ // Add mappings for some DebugInfo nodes that we don't want duplicated
+ // even if they're distinct.
+ auto &MD = VMap.MD();
+ MD[SP->getUnit()].reset(SP->getUnit());
+ MD[SP->getType()].reset(SP->getType());
+ MD[SP->getFile()].reset(SP->getFile());
+ // If we're not cloning into the same module, no need to clone the
+ // subprogram
+ if (!MustCloneSP)
+ MD[SP].reset(SP);
+ }
+
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
OldFunc->getAllMetadata(MDs);
for (auto MD : MDs) {
- MDNode *NewMD;
- bool MustCloneSP =
- (MD.first == LLVMContext::MD_dbg && OldFunc->getParent() &&
- OldFunc->getParent() == NewFunc->getParent());
- if (MustCloneSP) {
- auto *SP = cast<DISubprogram>(MD.second);
- NewMD = DISubprogram::getDistinct(
- NewFunc->getContext(), SP->getScope(), SP->getName(),
- SP->getLinkageName(), SP->getFile(), SP->getLine(), SP->getType(),
- SP->isLocalToUnit(), SP->isDefinition(), SP->getScopeLine(),
- SP->getContainingType(), SP->getVirtuality(), SP->getVirtualIndex(),
- SP->getThisAdjustment(), SP->getFlags(), SP->isOptimized(),
- SP->getUnit(), SP->getTemplateParams(), SP->getDeclaration(),
- SP->getVariables(), SP->getThrownTypes());
- } else
- NewMD =
- MapMetadata(MD.second, VMap,
- ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
- TypeMapper, Materializer);
- NewFunc->addMetadata(MD.first, *NewMD);
+ NewFunc->addMetadata(
+ MD.first,
+ *MapMetadata(MD.second, VMap,
+ ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges,
+ TypeMapper, Materializer));
}
+ // When we remap instructions, we want to avoid duplicating inlined
+ // DISubprograms, so record all subprograms we find as we duplicate
+ // instructions and then freeze them in the MD map.
+ DebugInfoFinder DIFinder;
+
// Loop over all of the basic blocks in the function, cloning them as
// appropriate. Note that we save BE this way in order to handle cloning of
// recursive functions into themselves.
@@ -156,7 +164,8 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
const BasicBlock &BB = *BI;
// Create a new basic block and copy instructions into it!
- BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo);
+ BasicBlock *CBB = CloneBasicBlock(&BB, VMap, NameSuffix, NewFunc, CodeInfo,
+ SP ? &DIFinder : nullptr);
// Add basic block mapping.
VMap[&BB] = CBB;
@@ -178,6 +187,12 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
Returns.push_back(RI);
}
+ for (DISubprogram *ISP : DIFinder.subprograms()) {
+ if (ISP != SP) {
+ VMap.MD()[ISP].reset(ISP);
+ }
+ }
+
// Loop over all of the instructions in the function, fixing up operand
// references as we go. This uses VMap to do all the hard work.
for (Function::iterator BB =
@@ -226,7 +241,7 @@ Function *llvm::CloneFunction(Function *F, ValueToValueMapTy &VMap,
}
SmallVector<ReturnInst*, 8> Returns; // Ignore returns cloned.
- CloneFunctionInto(NewF, F, VMap, /*ModuleLevelChanges=*/false, Returns, "",
+ CloneFunctionInto(NewF, F, VMap, F->getSubprogram() != nullptr, Returns, "",
CodeInfo);
return NewF;
diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp
index 8b9a64c220cc..799eef21dc4e 100644
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4779,6 +4779,7 @@ void InnerLoopVectorizer::vectorizeInstruction(Instruction &I) {
scalarizeInstruction(&I, true);
break;
}
+ LLVM_FALLTHROUGH;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
@@ -7396,6 +7397,7 @@ unsigned LoopVectorizationCostModel::getInstructionCost(Instruction *I,
// likely.
return Cost / getReciprocalPredBlockProb();
}
+ LLVM_FALLTHROUGH;
case Instruction::Add:
case Instruction::FAdd:
case Instruction::Sub:
diff --git a/lib/Transforms/Vectorize/SLPVectorizer.cpp b/lib/Transforms/Vectorize/SLPVectorizer.cpp
index e6f78e6b94a3..d1349535f298 100644
--- a/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -259,6 +259,7 @@ static bool InTreeUserNeedToExtract(Value *Scalar, Instruction *UserInst,
if (hasVectorInstrinsicScalarOpd(ID, 1)) {
return (CI->getArgOperand(1) == Scalar);
}
+ LLVM_FALLTHROUGH;
}
default:
return false;
@@ -4749,56 +4750,18 @@ static Value *getReductionValue(const DominatorTree *DT, PHINode *P,
return nullptr;
}
-namespace {
-/// Tracks instructons and its children.
-class WeakTrackingVHWithLevel final : public CallbackVH {
- /// Operand index of the instruction currently beeing analized.
- unsigned Level = 0;
- /// Is this the instruction that should be vectorized, or are we now
- /// processing children (i.e. operands of this instruction) for potential
- /// vectorization?
- bool IsInitial = true;
-
-public:
- explicit WeakTrackingVHWithLevel() = default;
- WeakTrackingVHWithLevel(Value *V) : CallbackVH(V){};
- /// Restart children analysis each time it is repaced by the new instruction.
- void allUsesReplacedWith(Value *New) override {
- setValPtr(New);
- Level = 0;
- IsInitial = true;
- }
- /// Check if the instruction was not deleted during vectorization.
- bool isValid() const { return !getValPtr(); }
- /// Is the istruction itself must be vectorized?
- bool isInitial() const { return IsInitial; }
- /// Try to vectorize children.
- void clearInitial() { IsInitial = false; }
- /// Are all children processed already?
- bool isFinal() const {
- assert(getValPtr() &&
- (isa<Instruction>(getValPtr()) &&
- cast<Instruction>(getValPtr())->getNumOperands() >= Level));
- return getValPtr() &&
- cast<Instruction>(getValPtr())->getNumOperands() == Level;
- }
- /// Get next child operation.
- Value *nextOperand() {
- assert(getValPtr() && isa<Instruction>(getValPtr()) &&
- cast<Instruction>(getValPtr())->getNumOperands() > Level);
- return cast<Instruction>(getValPtr())->getOperand(Level++);
- }
- virtual ~WeakTrackingVHWithLevel() = default;
-};
-} // namespace
-
-/// \brief Attempt to reduce a horizontal reduction.
-/// If it is legal to match a horizontal reduction feeding
-/// the phi node P with reduction operators Root in a basic block BB, then check
-/// if it can be done.
-/// \returns true if a horizontal reduction was matched and reduced.
-/// \returns false if a horizontal reduction was not matched.
-static bool canBeVectorized(
+/// Attempt to reduce a horizontal reduction.
+/// If it is legal to match a horizontal reduction feeding the phi node \a P
+/// with reduction operators \a Root (or one of its operands) in a basic block
+/// \a BB, then check if it can be done. If horizontal reduction is not found
+/// and root instruction is a binary operation, vectorization of the operands is
+/// attempted.
+/// \returns true if a horizontal reduction was matched and reduced or operands
+/// of one of the binary instruction were vectorized.
+/// \returns false if a horizontal reduction was not matched (or not possible)
+/// or no vectorization of any binary operation feeding \a Root instruction was
+/// performed.
+static bool tryToVectorizeHorReductionOrInstOperands(
PHINode *P, Instruction *Root, BasicBlock *BB, BoUpSLP &R,
TargetTransformInfo *TTI,
const function_ref<bool(BinaryOperator *, BoUpSLP &)> Vectorize) {
@@ -4810,56 +4773,62 @@ static bool canBeVectorized(
if (Root->getParent() != BB)
return false;
- SmallVector<WeakTrackingVHWithLevel, 8> Stack(1, Root);
+ // Start analysis starting from Root instruction. If horizontal reduction is
+ // found, try to vectorize it. If it is not a horizontal reduction or
+ // vectorization is not possible or not effective, and currently analyzed
+ // instruction is a binary operation, try to vectorize the operands, using
+ // pre-order DFS traversal order. If the operands were not vectorized, repeat
+ // the same procedure considering each operand as a possible root of the
+ // horizontal reduction.
+ // Interrupt the process if the Root instruction itself was vectorized or all
+ // sub-trees not higher that RecursionMaxDepth were analyzed/vectorized.
+ SmallVector<std::pair<WeakTrackingVH, unsigned>, 8> Stack(1, {Root, 0});
SmallSet<Value *, 8> VisitedInstrs;
bool Res = false;
while (!Stack.empty()) {
- Value *V = Stack.back();
- if (!V) {
- Stack.pop_back();
+ Value *V;
+ unsigned Level;
+ std::tie(V, Level) = Stack.pop_back_val();
+ if (!V)
continue;
- }
auto *Inst = dyn_cast<Instruction>(V);
- if (!Inst || isa<PHINode>(Inst)) {
- Stack.pop_back();
+ if (!Inst || isa<PHINode>(Inst))
continue;
- }
- if (Stack.back().isInitial()) {
- Stack.back().clearInitial();
- if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
- HorizontalReduction HorRdx;
- if (HorRdx.matchAssociativeReduction(P, BI)) {
- if (HorRdx.tryToReduce(R, TTI)) {
- Res = true;
- P = nullptr;
- continue;
- }
- }
- if (P) {
- Inst = dyn_cast<Instruction>(BI->getOperand(0));
- if (Inst == P)
- Inst = dyn_cast<Instruction>(BI->getOperand(1));
- if (!Inst) {
- P = nullptr;
- continue;
- }
+ if (auto *BI = dyn_cast<BinaryOperator>(Inst)) {
+ HorizontalReduction HorRdx;
+ if (HorRdx.matchAssociativeReduction(P, BI)) {
+ if (HorRdx.tryToReduce(R, TTI)) {
+ Res = true;
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ continue;
}
}
- P = nullptr;
- if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
- Res = true;
- continue;
+ if (P) {
+ Inst = dyn_cast<Instruction>(BI->getOperand(0));
+ if (Inst == P)
+ Inst = dyn_cast<Instruction>(BI->getOperand(1));
+ if (!Inst) {
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ continue;
+ }
}
}
- if (Stack.back().isFinal()) {
- Stack.pop_back();
+ // Set P to nullptr to avoid re-analysis of phi node in
+ // matchAssociativeReduction function unless this is the root node.
+ P = nullptr;
+ if (Vectorize(dyn_cast<BinaryOperator>(Inst), R)) {
+ Res = true;
continue;
}
- if (auto *NextV = dyn_cast<Instruction>(Stack.back().nextOperand()))
- if (NextV->getParent() == BB && VisitedInstrs.insert(NextV).second &&
- Stack.size() < RecursionMaxDepth)
- Stack.push_back(NextV);
+ // Try to vectorize operands.
+ if (++Level < RecursionMaxDepth)
+ for (auto *Op : Inst->operand_values())
+ Stack.emplace_back(Op, Level);
}
return Res;
}
@@ -4876,10 +4845,10 @@ bool SLPVectorizerPass::vectorizeRootInstruction(PHINode *P, Value *V,
if (!isa<BinaryOperator>(I))
P = nullptr;
// Try to match and vectorize a horizontal reduction.
- return canBeVectorized(P, I, BB, R, TTI,
- [this](BinaryOperator *BI, BoUpSLP &R) -> bool {
- return tryToVectorize(BI, R);
- });
+ return tryToVectorizeHorReductionOrInstOperands(
+ P, I, BB, R, TTI, [this](BinaryOperator *BI, BoUpSLP &R) -> bool {
+ return tryToVectorize(BI, R);
+ });
}
bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
diff --git a/runtimes/CMakeLists.txt b/runtimes/CMakeLists.txt
index ef56fa1b9367..6793a49a2ddc 100644
--- a/runtimes/CMakeLists.txt
+++ b/runtimes/CMakeLists.txt
@@ -188,6 +188,7 @@ else() # if this is included from LLVM's CMake
else()
get_cmake_property(variableNames VARIABLES)
add_custom_target(builtins)
+ add_custom_target(install-builtins)
foreach(target ${LLVM_BUILTIN_TARGETS})
string(REPLACE "-" ";" builtin_target_list ${target})
foreach(item ${builtin_target_list})
@@ -218,6 +219,7 @@ else() # if this is included from LLVM's CMake
USE_TOOLCHAIN
${EXTRA_ARGS})
add_dependencies(builtins builtins-${target})
+ add_dependencies(install-builtins install-builtins-${target})
endforeach()
endif()
set(deps builtins)
diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
index 0557008ceb4f..b3e41c7751c5 100644
--- a/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
+++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-constant.mir
@@ -10,18 +10,27 @@
entry:
ret void
}
+
+ declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #1
+
+ attributes #1 = { nounwind }
+
...
---
name: test_constant
registers:
- { id: 0, class: _ }
+ - { id: 1, class: _ }
body: |
bb.0.entry:
; CHECK-LABEL: name: test_constant
; CHECK: %0(s32) = G_CONSTANT i32 5
+ ; CHECK: %1(s1) = G_CONSTANT i1 false
%0(s32) = G_CONSTANT i32 5
+ %1(s1) = G_CONSTANT i1 0
+ G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp.f32), %0, %0, %0, %0, %0, %0, %1, %1;
...
---
diff --git a/test/CodeGen/AMDGPU/basic-branch.ll b/test/CodeGen/AMDGPU/basic-branch.ll
index e245e4296df2..d8f9e4f51ff4 100644
--- a/test/CodeGen/AMDGPU/basic-branch.ll
+++ b/test/CodeGen/AMDGPU/basic-branch.ll
@@ -34,8 +34,6 @@ end:
; GCN: s_cbranch_vccnz [[END:BB[0-9]+_[0-9]+]]
; GCN: buffer_store_dword
-; GCNOPT-NEXT: s_waitcnt vmcnt(0) expcnt(0)
-; TODO: This waitcnt can be eliminated
; GCN: {{^}}[[END]]:
; GCN: s_endpgm
diff --git a/test/CodeGen/AMDGPU/branch-condition-and.ll b/test/CodeGen/AMDGPU/branch-condition-and.ll
index 68b77ea3490e..662ea37a2b99 100644
--- a/test/CodeGen/AMDGPU/branch-condition-and.ll
+++ b/test/CodeGen/AMDGPU/branch-condition-and.ll
@@ -19,9 +19,8 @@
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %bb4
; GCN: ds_write_b32
-; GCN: s_waitcnt
-; GCN-NEXT: [[BB5]]
+; GCN: [[BB5]]
; GCN: s_or_b64 exec, exec
; GCN-NEXT: s_endpgm
; GCN-NEXT: .Lfunc_end
diff --git a/test/CodeGen/AMDGPU/branch-relaxation.ll b/test/CodeGen/AMDGPU/branch-relaxation.ll
index 263059d4a6ed..d3f835bdf163 100644
--- a/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -223,7 +223,6 @@ bb3:
; GCN-NEXT: [[BB2]]: ; %bb2
; GCN: v_mov_b32_e32 [[BB2_K:v[0-9]+]], 17
; GCN: buffer_store_dword [[BB2_K]]
-; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: [[LONG_JUMP1:BB[0-9]+_[0-9]+]]: ; %bb2
; GCN-NEXT: s_getpc_b64 vcc
@@ -393,7 +392,6 @@ bb3:
; GCN-NEXT: ; BB#2: ; %if_uniform
; GCN: buffer_store_dword
-; GCN: s_waitcnt vmcnt(0)
; GCN-NEXT: [[ENDIF]]: ; %endif
; GCN-NEXT: s_or_b64 exec, exec, [[MASK]]
diff --git a/test/CodeGen/AMDGPU/commute-compares.ll b/test/CodeGen/AMDGPU/commute-compares.ll
index 66148a43a271..caba83c50428 100644
--- a/test/CodeGen/AMDGPU/commute-compares.ll
+++ b/test/CodeGen/AMDGPU/commute-compares.ll
@@ -35,7 +35,7 @@ define amdgpu_kernel void @commute_ne_64_i32(i32 addrspace(1)* %out, i32 addrspa
; FIXME: Why isn't this being folded as a constant?
; GCN-LABEL: {{^}}commute_ne_litk_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3039
-; GCN: v_cmp_ne_u32_e32 vcc, [[K]], v{{[0-9]+}}
+; GCN: v_cmp_ne_u32_e32 vcc, v{{[0-9]+}}, [[K]]
define amdgpu_kernel void @commute_ne_litk_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
@@ -99,11 +99,9 @@ define amdgpu_kernel void @commute_ule_63_i32(i32 addrspace(1)* %out, i32 addrsp
ret void
}
-; FIXME: Undo canonicalization to gt (x + 1) since it doesn't use the inline imm
-
; GCN-LABEL: {{^}}commute_ule_64_i32:
; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x41{{$}}
-; GCN: v_cmp_gt_u32_e32 vcc, [[K]], v{{[0-9]+}}
+; GCN: v_cmp_lt_u32_e32 vcc, v{{[0-9]+}}, [[K]]
define amdgpu_kernel void @commute_ule_64_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #1 {
%tid = call i32 @llvm.amdgcn.workitem.id.x() #0
%gep.in = getelementptr i32, i32 addrspace(1)* %in, i32 %tid
@@ -702,7 +700,7 @@ define amdgpu_kernel void @commute_uno_2.0_f64(i32 addrspace(1)* %out, double ad
; XGCN: v_cmp_eq_u32_e32 vcc, 0, v{{[0-9]+}}
; GCN: v_mov_b32_e32 [[FI:v[0-9]+]], 4{{$}}
-; GCN: v_cmp_eq_u32_e32 vcc, [[FI]], v{{[0-9]+}}
+; GCN: v_cmp_eq_u32_e32 vcc, v{{[0-9]+}}, [[FI]]
define amdgpu_kernel void @commute_frameindex(i32 addrspace(1)* nocapture %out) #0 {
entry:
%stack0 = alloca i32
diff --git a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
index d3e6c11ef908..79d9b1691878 100644
--- a/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
+++ b/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll
@@ -37,22 +37,21 @@
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
-; GCN: s_waitcnt vmcnt(0) expcnt(0)
; GCN: mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: {{^}}BB{{[0-9]+}}_1: ; %if
; GCN: s_mov_b32 m0, -1
; GCN: ds_read_b32 [[LOAD1:v[0-9]+]]
+; GCN: s_waitcnt lgkmcnt(0)
; GCN: buffer_load_dword [[RELOAD_LOAD0:v[0-9]+]], off, s[0:3], s7 offset:[[LOAD0_OFFSET]] ; 4-byte Folded Reload
-; GCN: s_waitcnt vmcnt(0)
; Spill val register
; GCN: v_add_i32_e32 [[VAL:v[0-9]+]], vcc, [[LOAD1]], [[RELOAD_LOAD0]]
; GCN: buffer_store_dword [[VAL]], off, s[0:3], s7 offset:[[VAL_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: s_waitcnt vmcnt(0)
; VMEM: [[ENDIF]]:
; Reload and restore exec mask
+; VGPR: s_waitcnt lgkmcnt(0)
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]]
; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]]
@@ -119,7 +118,6 @@ endif:
; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}
-; GCN: s_waitcnt vmcnt(0) expcnt(0)
; GCN-NEXT: ; mask branch [[END:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_cbranch_execz [[END]]
@@ -130,7 +128,6 @@ endif:
; GCN: v_cmp_ne_u32_e32 vcc,
; GCN: s_and_b64 vcc, exec, vcc
; GCN: buffer_store_dword [[VAL_LOOP]], off, s[0:3], s7 offset:[[VAL_SUB_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: s_waitcnt vmcnt(0) expcnt(0)
; GCN-NEXT: s_cbranch_vccnz [[LOOP]]
@@ -197,7 +194,6 @@ end:
; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:[[SAVEEXEC_HI_OFFSET:[0-9]+]] ; 4-byte Folded Spill
; GCN: s_mov_b64 exec, [[CMP0]]
-; GCN: s_waitcnt vmcnt(0) expcnt(0)
; FIXME: It makes no sense to put this skip here
; GCN-NEXT: ; mask branch [[FLOW:BB[0-9]+_[0-9]+]]
@@ -235,7 +231,6 @@ end:
; GCN: buffer_store_dword [[FLOW_VAL]], off, s[0:3], s7 offset:[[RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill
; GCN: s_xor_b64 exec, exec, s{{\[}}[[FLOW_S_RELOAD_SAVEEXEC_LO]]:[[FLOW_S_RELOAD_SAVEEXEC_HI]]{{\]}}
-; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
; GCN-NEXT: ; mask branch [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN-NEXT: s_cbranch_execz [[ENDIF]]
@@ -245,14 +240,12 @@ end:
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
; GCN: v_add_i32_e32 [[ADD:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[RESULT_OFFSET]] ; 4-byte Folded Spill
-; GCN: s_waitcnt vmcnt(0) expcnt(0)
; GCN-NEXT: s_branch [[ENDIF:BB[0-9]+_[0-9]+]]
; GCN: [[ELSE]]: ; %else
; GCN: buffer_load_dword v[[LOAD0_RELOAD:[0-9]+]], off, s[0:3], s7 offset:4 ; 4-byte Folded Reload
; GCN: v_subrev_i32_e32 [[SUB:v[0-9]+]], vcc, v{{[0-9]+}}, v[[LOAD0_RELOAD]]
; GCN: buffer_store_dword [[ADD]], off, s[0:3], s7 offset:[[FLOW_RESULT_OFFSET:[0-9]+]] ; 4-byte Folded Spill
-; GCN: s_waitcnt vmcnt(0) expcnt(0)
; GCN-NEXT: s_branch [[FLOW]]
; GCN: [[ENDIF]]:
diff --git a/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index b18ae353ca4c..fab1f8d12253 100644
--- a/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -120,8 +120,7 @@ entry:
; FIXME: The waitcnt for the argument load can go after the loop
; IDXMODE: s_set_gpr_idx_on 0, src0
; GCN: s_mov_b64 s{{\[[0-9]+:[0-9]+\]}}, exec
-; GCN: s_waitcnt lgkmcnt(0)
-
+; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]:
; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]], v{{[0-9]+}}
; MOVREL: s_add_i32 m0, [[READLANE]], 0xfffffe0
@@ -250,8 +249,6 @@ entry:
; GCN-DAG: v_mov_b32_e32 [[VEC_ELT3:v[0-9]+]], 4{{$}}
; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
-; GCN: s_waitcnt lgkmcnt(0)
-
; GCN: [[LOOPBB:BB[0-9]+_[0-9]+]]:
; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]]
@@ -290,7 +287,6 @@ entry:
; IDXMODE: s_set_gpr_idx_on 0, dst
; GCN: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec
-; GCN: s_waitcnt lgkmcnt(0)
; The offset depends on the register that holds the first element of the vector.
; GCN: v_readfirstlane_b32 [[READLANE:s[0-9]+]]
@@ -330,9 +326,9 @@ entry:
; IDXMODE: s_set_gpr_idx_on 0, src0
; GCN: s_mov_b64 [[MASK:s\[[0-9]+:[0-9]+\]]], exec
-; GCN: s_waitcnt vmcnt(0)
; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]:
+; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]]
; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]]
@@ -411,6 +407,7 @@ bb2:
; IDXMODE: s_set_gpr_idx_on 0, dst
; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]:
+; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readfirstlane_b32 [[READLANE:s[0-9]+]], [[IDX0]]
; GCN: v_cmp_eq_u32_e32 vcc, [[READLANE]], [[IDX0]]
diff --git a/test/CodeGen/AMDGPU/infinite-loop.ll b/test/CodeGen/AMDGPU/infinite-loop.ll
index 73482756b8c8..3caffc342c7e 100644
--- a/test/CodeGen/AMDGPU/infinite-loop.ll
+++ b/test/CodeGen/AMDGPU/infinite-loop.ll
@@ -4,8 +4,8 @@
; SI-LABEL: {{^}}infinite_loop:
; SI: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3e7
; SI: BB0_1:
+; SI: s_waitcnt lgkmcnt(0)
; SI: buffer_store_dword [[REG]]
-; SI: s_waitcnt vmcnt(0) expcnt(0)
; SI: s_branch BB0_1
define amdgpu_kernel void @infinite_loop(i32 addrspace(1)* %out) {
entry:
diff --git a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index 350dd38ef583..1edccff3bf15 100644
--- a/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -421,11 +421,10 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_sgpr(<2 x i16> addrspac
}
; GCN-LABEL: {{^}}v_insertelement_v2i16_dynamic_vgpr:
-; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
-; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7
; GCN: flat_load_dword [[IDX:v[0-9]+]]
; GCN: flat_load_dword [[VEC:v[0-9]+]]
-; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7
+; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
@@ -449,11 +448,10 @@ define amdgpu_kernel void @v_insertelement_v2i16_dynamic_vgpr(<2 x i16> addrspac
}
; GCN-LABEL: {{^}}v_insertelement_v2f16_dynamic_vgpr:
-; GFX89: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
-; CI: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234
; GCN: flat_load_dword [[IDX:v[0-9]+]]
; GCN: flat_load_dword [[VEC:v[0-9]+]]
-; GFX89-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234
+; GFX89-DAG: s_mov_b32 [[MASKK:s[0-9]+]], 0xffff{{$}}
+; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x1234
; GFX89-DAG: v_lshlrev_b32_e32 [[SCALED_IDX:v[0-9]+]], 16, [[IDX]]
; GFX89-DAG: v_lshlrev_b32_e64 [[MASK:v[0-9]+]], [[SCALED_IDX]], [[MASKK]]
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
index 555a1d23ebe9..e50455f6f9a1 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.format.ll
@@ -58,7 +58,7 @@ main_body:
;
;CHECK-LABEL: {{^}}buffer_store_wait:
;CHECK: buffer_store_format_xyzw v[0:3], v4, s[0:3], 0 idxen
-;CHECK: s_waitcnt vmcnt(0) expcnt(0)
+;CHECK: s_waitcnt expcnt(0)
;CHECK: buffer_load_format_xyzw v[0:3], v5, s[0:3], 0 idxen
;CHECK: s_waitcnt vmcnt(0)
;CHECK: buffer_store_format_xyzw v[0:3], v6, s[0:3], 0 idxen
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
index 5ae255c7a26c..81597516d5f2 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.store.ll
@@ -58,7 +58,7 @@ main_body:
;
;CHECK-LABEL: {{^}}buffer_store_wait:
;CHECK: buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 idxen
-;CHECK: s_waitcnt vmcnt(0) expcnt(0)
+;CHECK: s_waitcnt expcnt(0)
;CHECK: buffer_load_dwordx4 v[0:3], v5, s[0:3], 0 idxen
;CHECK: s_waitcnt vmcnt(0)
;CHECK: buffer_store_dwordx4 v[0:3], v6, s[0:3], 0 idxen
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
index 02642142ae2c..d97644262016 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.ds.swizzle.ll
@@ -5,7 +5,6 @@ declare i32 @llvm.amdgcn.ds.swizzle(i32, i32) #0
; FUNC-LABEL: {{^}}ds_swizzle:
; CHECK: ds_swizzle_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:swizzle(BITMASK_PERM,"00p11")
-; CHECK: s_waitcnt lgkmcnt
define amdgpu_kernel void @ds_swizzle(i32 addrspace(1)* %out, i32 %src) nounwind {
%swizzle = call i32 @llvm.amdgcn.ds.swizzle(i32 %src, i32 100) #0
store i32 %swizzle, i32 addrspace(1)* %out, align 4
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
index c74c0fa15855..a289f7b0cfb1 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.image.ll
@@ -130,7 +130,7 @@ main_body:
;
; GCN-LABEL: {{^}}image_store_wait:
; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm
-; GCN: s_waitcnt vmcnt(0) expcnt(0)
+; GCN: s_waitcnt expcnt(0)
; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm
; GCN: s_waitcnt vmcnt(0)
; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
index 055dddbfa8af..9a27809f37bb 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.kernarg.segment.ptr.ll
@@ -2,6 +2,8 @@
; RUN: llc -mtriple=amdgcn--amdhsa-opencl -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-OPENCL %s
; RUN: llc -mtriple=amdgcn-mesa-mesa3d -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,OS-MESA3D,MESA,ALL %s
; RUN: llc -mtriple=amdgcn-mesa-unknown -verify-machineinstrs < %s | FileCheck -check-prefixes=OS-UNKNOWN,MESA,ALL %s
+; RUN: llc -mtriple=amdgcn--amdhsa-amdgiz -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-NOENV %s
+; RUN: llc -mtriple=amdgcn--amdhsa-amdgizcl -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=CO-V2,HSA,ALL,HSA-OPENCL %s
; ALL-LABEL: {{^}}test:
; CO-V2: enable_sgpr_kernarg_segment_ptr = 1
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
index ef9cda142850..3d815cca5be2 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.barrier.ll
@@ -1,10 +1,13 @@
-; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s
-; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=NOAUTO %s
+; RUN: llc -march=amdgcn -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX8 -check-prefix=AUTO %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=NOAUTO %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+auto-waitcnt-before-barrier -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=AUTO %s
; GCN-LABEL: {{^}}test_barrier:
; GFX8: buffer_store_dword
; GFX9: flat_store_dword
-; GCN: s_waitcnt
+; NOAUTO: s_waitcnt
+; AUTO-NOT: s_waitcnt
; GCN: s_barrier
define amdgpu_kernel void @test_barrier(i32 addrspace(1)* %out, i32 %size) #0 {
entry:
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
index b488565c6b3a..224b2ed72e3b 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.ll
@@ -20,7 +20,7 @@ define amdgpu_kernel void @test_s_dcache_inv() #0 {
; GCN: s_waitcnt lgkmcnt(0) ; encoding
define amdgpu_kernel void @test_s_dcache_inv_insert_wait() #0 {
call void @llvm.amdgcn.s.dcache.inv()
- call void @llvm.amdgcn.s.waitcnt(i32 0)
+ call void @llvm.amdgcn.s.waitcnt(i32 127)
br label %end
end:
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
index a3a5c329f411..f96d5db5794a 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.inv.vol.ll
@@ -20,7 +20,7 @@ define amdgpu_kernel void @test_s_dcache_inv_vol() #0 {
; GCN: s_waitcnt lgkmcnt(0) ; encoding
define amdgpu_kernel void @test_s_dcache_inv_vol_insert_wait() #0 {
call void @llvm.amdgcn.s.dcache.inv.vol()
- call void @llvm.amdgcn.s.waitcnt(i32 0)
+ call void @llvm.amdgcn.s.waitcnt(i32 127)
br label %end
end:
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
index 909a85dda3e8..99b651350439 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.ll
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test_s_dcache_wb() #0 {
; VI: s_waitcnt lgkmcnt(0) ; encoding
define amdgpu_kernel void @test_s_dcache_wb_insert_wait() #0 {
call void @llvm.amdgcn.s.dcache.wb()
- call void @llvm.amdgcn.s.waitcnt(i32 0)
+ call void @llvm.amdgcn.s.waitcnt(i32 127)
br label %end
end:
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
index 217bf97c41a4..844fcecdb48b 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.dcache.wb.vol.ll
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test_s_dcache_wb_vol() #0 {
; VI: s_waitcnt lgkmcnt(0) ; encoding
define amdgpu_kernel void @test_s_dcache_wb_vol_insert_wait() #0 {
call void @llvm.amdgcn.s.dcache.wb.vol()
- call void @llvm.amdgcn.s.waitcnt(i32 0)
+ call void @llvm.amdgcn.s.waitcnt(i32 127)
br label %end
end:
diff --git a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
index 6083ec885a86..ee58d359a935 100644
--- a/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
+++ b/test/CodeGen/AMDGPU/llvm.amdgcn.s.waitcnt.ll
@@ -18,8 +18,8 @@ define amdgpu_ps void @test1(<8 x i32> inreg %rsrc, <4 x float> %d0, <4 x float>
;
; CHECK-LABEL: {{^}}test2:
; CHECK: image_load
-; CHECK-NOT: s_waitcnt vmcnt(0){{$}}
-; CHECK: s_waitcnt
+; CHECK-NEXT: s_waitcnt
+; CHECK: s_waitcnt vmcnt(0){{$}}
; CHECK-NEXT: image_store
define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, i32 %c) {
%t = call <4 x float> @llvm.amdgcn.image.load.v4f32.i32.v8i32(i32 %c, <8 x i32> %rsrc, i32 15, i1 0, i1 0, i1 0, i1 0)
diff --git a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index 9d0b6b395996..82c27f204a47 100644
--- a/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -362,6 +362,7 @@ exit1: ; preds = %LeafBlock, %LeafBlock1
; GCN: {{^BB[0-9]+_[0-9]+}}: ; %UnifiedReturnBlock
; GCN-NEXT: s_or_b64 exec, exec
+; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: ; return
define amdgpu_ps float @uniform_branch_to_multi_divergent_region_exit_ret_ret_return_value(i32 inreg %sgpr, i32 %vgpr) #0 {
diff --git a/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll b/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll
new file mode 100644
index 000000000000..bced3c408c52
--- /dev/null
+++ b/test/CodeGen/AMDGPU/not-scalarize-volatile-load.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple amdgcn--amdhsa -mcpu=fiji -amdgpu-scalarize-global-loads < %s | FileCheck -check-prefix=GCN %s
+
+; GCN-LABEL: @volatile_load
+; GCN: s_load_dwordx2 s{{\[}}[[LO_SREG:[0-9]+]]:[[HI_SREG:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0
+; GCN: v_mov_b32_e32 v[[LO_VREG:[0-9]+]], s[[LO_SREG]]
+; GCN: v_mov_b32_e32 v[[HI_VREG:[0-9]+]], s[[HI_SREG]]
+; GCN: flat_load_dword v{{[0-9]+}}, v{{\[}}[[LO_VREG]]:[[HI_VREG]]{{\]}}
+
+define amdgpu_kernel void @volatile_load(i32 addrspace(1)* %arg, i32 addrspace(1)* nocapture %arg1) {
+bb:
+ %tmp18 = load volatile i32, i32 addrspace(1)* %arg, align 4
+ %tmp26 = getelementptr inbounds i32, i32 addrspace(1)* %arg1, i64 5
+ store i32 %tmp18, i32 addrspace(1)* %tmp26, align 4
+ ret void
+}
diff --git a/test/CodeGen/AMDGPU/ret_jump.ll b/test/CodeGen/AMDGPU/ret_jump.ll
index f2fbacbab82e..e7a05d94cdc4 100644
--- a/test/CodeGen/AMDGPU/ret_jump.ll
+++ b/test/CodeGen/AMDGPU/ret_jump.ll
@@ -65,7 +65,6 @@ ret.bb: ; preds = %else, %main_body
; GCN-NEXT: ; %unreachable.bb
; GCN: ds_write_b32
-; GCN: s_waitcnt
; GCN: ; divergent unreachable
; GCN: ; %ret.bb
@@ -73,6 +72,7 @@ ret.bb: ; preds = %else, %main_body
; GCN: ; %UnifiedReturnBlock
; GCN-NEXT: s_or_b64 exec, exec
+; GCN-NEXT: s_waitcnt
; GCN-NEXT: ; return
; GCN-NEXT: .Lfunc_end
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @uniform_br_nontrivial_ret_divergent_br_nontrivial_unreachable([9 x <16 x i8>] addrspace(2)* byval %arg, [17 x <16 x i8>] addrspace(2)* byval %arg1, [17 x <8 x i32>] addrspace(2)* byval %arg2, i32 addrspace(2)* byval %arg3, float inreg %arg4, i32 inreg %arg5, <2 x i32> %arg6, <2 x i32> %arg7, <2 x i32> %arg8, <3 x i32> %arg9, <2 x i32> %arg10, <2 x i32> %arg11, <2 x i32> %arg12, float %arg13, float %arg14, float %arg15, float %arg16, float %arg17, i32 inreg %arg18, i32 %arg19, float %arg20, i32 %arg21) #0 {
diff --git a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
index cb010cf15300..5b0d5274d5bc 100644
--- a/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
+++ b/test/CodeGen/AMDGPU/si-lower-control-flow-unreachable-block.ll
@@ -9,7 +9,6 @@
; GCN-NEXT: BB{{[0-9]+_[0-9]+}}: ; %unreachable
; GCN: ds_write_b32
; GCN: ; divergent unreachable
-; GCN: s_waitcnt
; GCN-NEXT: [[RET]]: ; %UnifiedReturnBlock
; GCN-NEXT: s_or_b64 exec, exec
@@ -38,7 +37,6 @@ ret:
; GCN-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %unreachable
; GCN: ds_write_b32
; GCN: ; divergent unreachable
-; GCN: s_waitcnt
; GCN: [[RETURN]]:
; GCN-NEXT: s_or_b64 exec, exec
@@ -66,7 +64,6 @@ unreachable:
; GCN: [[UNREACHABLE]]:
; GCN: ds_write_b32
-; GCN: s_waitcnt
define amdgpu_kernel void @uniform_lower_control_flow_unreachable_terminator(i32 %arg0) #0 {
bb:
%tmp63 = icmp eq i32 %arg0, 32
diff --git a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
index 343211b0219c..333113e8a9b6 100644
--- a/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
+++ b/test/CodeGen/AMDGPU/smrd-vccz-bug.ll
@@ -5,7 +5,7 @@
; GCN-FUNC: {{^}}vccz_workaround:
; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}], 0x0
; GCN: v_cmp_neq_f32_e64 vcc, s{{[0-9]+}}, 0{{$}}
-; GCN: s_waitcnt lgkmcnt(0)
+; VCCZ-BUG: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; VCCZ-BUG: s_mov_b64 vcc, vcc
; NOVCCZ-BUG-NOT: s_mov_b64 vcc, vcc
; GCN: s_cbranch_vccnz [[EXIT:[0-9A-Za-z_]+]]
diff --git a/test/CodeGen/AMDGPU/spill-m0.ll b/test/CodeGen/AMDGPU/spill-m0.ll
index 8f1aebfe9ceb..7e8fa118c2c2 100644
--- a/test/CodeGen/AMDGPU/spill-m0.ll
+++ b/test/CodeGen/AMDGPU/spill-m0.ll
@@ -18,13 +18,11 @@
; TOVMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
; TOVMEM-DAG: v_mov_b32_e32 [[SPILL_VREG:v[0-9]+]], [[M0_COPY]]
; TOVMEM: buffer_store_dword [[SPILL_VREG]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4 ; 4-byte Folded Spill
-; TOVMEM: s_waitcnt vmcnt(0)
; TOSMEM-DAG: s_mov_b32 [[M0_COPY:s[0-9]+]], m0
; TOSMEM: s_add_u32 m0, s3, 0x100{{$}}
; TOSMEM-NOT: [[M0_COPY]]
; TOSMEM: s_buffer_store_dword [[M0_COPY]], s{{\[}}[[LO]]:[[HI]]], m0 ; 4-byte Folded Spill
-; TOSMEM: s_waitcnt lgkmcnt(0)
; GCN: s_cbranch_scc1 [[ENDIF:BB[0-9]+_[0-9]+]]
diff --git a/test/CodeGen/AMDGPU/sub.i16.ll b/test/CodeGen/AMDGPU/sub.i16.ll
index cf9e714ea6d3..1d407ea9bcda 100644
--- a/test/CodeGen/AMDGPU/sub.i16.ll
+++ b/test/CodeGen/AMDGPU/sub.i16.ll
@@ -85,9 +85,9 @@ define amdgpu_kernel void @v_test_sub_i16_zext_to_i32(i32 addrspace(1)* %out, i1
; FIXME: Need to handle non-uniform case for function below (load without gep).
; GCN-LABEL: {{^}}v_test_sub_i16_zext_to_i64:
+; VI: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI: flat_load_ushort [[A:v[0-9]+]]
; VI: flat_load_ushort [[B:v[0-9]+]]
-; VI-DAG: v_mov_b32_e32 v[[VZERO:[0-9]+]], 0
; VI-DAG: v_subrev_u16_e32 v[[ADD:[0-9]+]], [[B]], [[A]]
; VI: buffer_store_dwordx2 v{{\[}}[[ADD]]:[[VZERO]]{{\]}}, off, {{s\[[0-9]+:[0-9]+\]}}, 0{{$}}
define amdgpu_kernel void @v_test_sub_i16_zext_to_i64(i64 addrspace(1)* %out, i16 addrspace(1)* %in0, i16 addrspace(1)* %in1) #1 {
diff --git a/test/CodeGen/AMDGPU/valu-i1.ll b/test/CodeGen/AMDGPU/valu-i1.ll
index 85a8929ebe58..a67f36d0a7e8 100644
--- a/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/test/CodeGen/AMDGPU/valu-i1.ll
@@ -11,7 +11,6 @@ declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
; SI: v_cmp_lt_i32_e32 vcc, 0,
; SI-NEXT: s_and_saveexec_b64 [[SAVE1:s\[[0-9]+:[0-9]+\]]], vcc
; SI-NEXT: s_xor_b64 [[SAVE2:s\[[0-9]+:[0-9]+\]]], exec, [[SAVE1]]
-; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: ; mask branch [[FLOW_BB:BB[0-9]+_[0-9]+]]
; SI-NEXT: s_cbranch_execz [[FLOW_BB]]
@@ -72,7 +71,6 @@ end:
; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
; SI: buffer_store_dword
-; SI-NEXT: s_waitcnt
; SI-NEXT: {{^}}[[EXIT]]:
; SI: s_or_b64 exec, exec, [[BR_SREG]]
@@ -101,7 +99,6 @@ exit:
; SI-NEXT: BB{{[0-9]+_[0-9]+}}:
; SI: buffer_store_dword
-; SI-NEXT: s_waitcnt
; SI-NEXT: {{^}}[[EXIT]]:
; SI: s_or_b64 exec, exec, [[BR_SREG]]
@@ -132,7 +129,6 @@ exit:
; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %exit
; SI: ds_write_b32
-; SI: s_waitcnt
; SI-NEXT: {{^}}[[FLOW]]:
; SI-NEXT: s_or_saveexec_b64
@@ -140,8 +136,8 @@ exit:
; SI-NEXT: ; mask branch [[UNIFIED_RETURN:BB[0-9]+_[0-9]+]]
; SI-NEXT: {{^BB[0-9]+_[0-9]+}}: ; %then
-; SI: buffer_store_dword
-; SI-NEXT: s_waitcnt
+; SI: s_waitcnt
+; SI-NEXT: buffer_store_dword
; SI-NEXT: {{^}}[[UNIFIED_RETURN]]: ; %UnifiedReturnBlock
; SI: s_or_b64 exec, exec
diff --git a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
index f4aba880ff76..1c7769894a27 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-irtranslator.ll
@@ -974,6 +974,68 @@ entry:
ret [2 x i32*] %r
}
+declare arm_aapcscc {i32, i32} @structs_target({i32, i32}, {i32*, float, i32, double})
+
+define arm_aapcscc {i32, i32} @test_structs({i32, i32} %x, {i32*, float, i32, double} %y) {
+; CHECK-LABEL: test_structs
+; CHECK: fixedStack:
+; CHECK-DAG: id: [[Y2_ID:[0-9]+]], offset: 0, size: 4
+; CHECK-DAG: id: [[Y3_ID:[0-9]+]], offset: 8, size: 8
+; CHECK: liveins: %r0, %r1, %r2, %r3
+; CHECK-DAG: [[X0:%[0-9]+]](s32) = COPY %r0
+; CHECK-DAG: [[X1:%[0-9]+]](s32) = COPY %r1
+; CHECK-DAG: [[Y0:%[0-9]+]](s32) = COPY %r2
+; CHECK-DAG: [[Y1:%[0-9]+]](s32) = COPY %r3
+; CHECK: [[Y2_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Y2_ID]]
+; CHECK: [[Y2:%[0-9]+]](s32) = G_LOAD [[Y2_ADDR]](p0){{.*}}load 4
+; CHECK: [[Y3_ADDR:%[0-9]+]](p0) = G_FRAME_INDEX %fixed-stack.[[Y3_ID]]
+; CHECK: [[Y3:%[0-9]+]](s64) = G_LOAD [[Y3_ADDR]](p0){{.*}}load 8
+; CHECK: [[X_0:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[X_1:%[0-9]+]](s64) = G_INSERT [[X_0]], [[X0]](s32), 0
+; CHECK: [[X_2:%[0-9]+]](s64) = G_INSERT [[X_1]], [[X1]](s32), 32
+; CHECK: [[X:%[0-9]+]](s64) = COPY [[X_2]]
+; CHECK: [[Y_0:%[0-9]+]](s192) = IMPLICIT_DEF
+; CHECK: [[Y_1:%[0-9]+]](s192) = G_INSERT [[Y_0]], [[Y0]](s32), 0
+; CHECK: [[Y_2:%[0-9]+]](s192) = G_INSERT [[Y_1]], [[Y1]](s32), 32
+; CHECK: [[Y_3:%[0-9]+]](s192) = G_INSERT [[Y_2]], [[Y2]](s32), 64
+; CHECK: [[Y_4:%[0-9]+]](s192) = G_INSERT [[Y_3]], [[Y3]](s64), 128
+; CHECK: [[Y:%[0-9]+]](s192) = COPY [[Y_4]]
+; CHECK: ADJCALLSTACKDOWN 16, 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: [[X0:%[0-9]+]](s32) = G_EXTRACT [[X]](s64), 0
+; CHECK: [[X1:%[0-9]+]](s32) = G_EXTRACT [[X]](s64), 32
+; CHECK: [[Y0:%[0-9]+]](s32) = G_EXTRACT [[Y]](s192), 0
+; CHECK: [[Y1:%[0-9]+]](s32) = G_EXTRACT [[Y]](s192), 32
+; CHECK: [[Y2:%[0-9]+]](s32) = G_EXTRACT [[Y]](s192), 64
+; CHECK: [[Y3:%[0-9]+]](s64) = G_EXTRACT [[Y]](s192), 128
+; CHECK-DAG: %r0 = COPY [[X0]](s32)
+; CHECK-DAG: %r1 = COPY [[X1]](s32)
+; CHECK-DAG: %r2 = COPY [[Y0]](s32)
+; CHECK-DAG: %r3 = COPY [[Y1]](s32)
+; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[Y2_OFF:%[0-9]+]](s32) = G_CONSTANT i32 0
+; CHECK: [[Y2_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Y2_OFF]](s32)
+; CHECK: G_STORE [[Y2]](s32), [[Y2_ADDR]](p0){{.*}}store 4
+; CHECK: [[SP:%[0-9]+]](p0) = COPY %sp
+; CHECK: [[Y3_OFF:%[0-9]+]](s32) = G_CONSTANT i32 8
+; CHECK: [[Y3_ADDR:%[0-9]+]](p0) = G_GEP [[SP]], [[Y3_OFF]](s32)
+; CHECK: G_STORE [[Y3]](s64), [[Y3_ADDR]](p0){{.*}}store 8
+; CHECK: BLX @structs_target, csr_aapcs, implicit-def %lr, implicit %sp, implicit %r0, implicit %r1, implicit %r2, implicit %r3, implicit-def %r0, implicit-def %r1
+; CHECK: [[R0:%[0-9]+]](s32) = COPY %r0
+; CHECK: [[R1:%[0-9]+]](s32) = COPY %r1
+; CHECK: [[R_0:%[0-9]+]](s64) = IMPLICIT_DEF
+; CHECK: [[R_1:%[0-9]+]](s64) = G_INSERT [[R_0]], [[R0]](s32), 0
+; CHECK: [[R_2:%[0-9]+]](s64) = G_INSERT [[R_1]], [[R1]](s32), 32
+; CHECK: [[R:%[0-9]+]](s64) = COPY [[R_2]]
+; CHECK: ADJCALLSTACKUP 16, 0, 14, _, implicit-def %sp, implicit %sp
+; CHECK: [[R0:%[0-9]+]](s32) = G_EXTRACT [[R]](s64), 0
+; CHECK: [[R1:%[0-9]+]](s32) = G_EXTRACT [[R]](s64), 32
+; CHECK: %r0 = COPY [[R0]](s32)
+; CHECK: %r1 = COPY [[R1]](s32)
+; CHECK: BX_RET 14, _, implicit %r0, implicit %r1
+ %r = notail call arm_aapcscc {i32, i32} @structs_target({i32, i32} %x, {i32*, float, i32, double} %y)
+ ret {i32, i32} %r
+}
+
define i32 @test_shufflevector_s32_v2s32(i32 %arg) {
; CHECK-LABEL: name: test_shufflevector_s32_v2s32
; CHECK: [[ARG:%[0-9]+]](s32) = COPY %r0
diff --git a/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll b/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
index ef30cb1063f8..34f00aebe1be 100644
--- a/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
+++ b/test/CodeGen/ARM/GlobalISel/arm-unsupported.ll
@@ -54,10 +54,15 @@ define [16 x i32] @test_ret_demotion() {
ret [16 x i32] %res
}
-define void @test_structs({i32, i32} %struct) {
-; CHECK: remark: {{.*}} unable to lower arguments: void ({ i32, i32 })*
-; CHECK-LABEL: warning: Instruction selection used fallback path for test_structs
- ret void
+%large.struct = type { i32, i32, i32, i32, i32} ; Doesn't fit in R0-R3
+
+declare %large.struct @large_struct_return_target()
+
+define %large.struct @test_large_struct_return() {
+; CHECK: remark: {{.*}} unable to translate instruction: call{{.*}} @large_struct_return_target
+; CHECK-LABEL: warning: Instruction selection used fallback path for test_large_struct_return
+ %r = call %large.struct @large_struct_return_target()
+ ret %large.struct %r
}
define void @test_vararg_definition(i32 %a, ...) {
diff --git a/test/CodeGen/ARM/cortex-a57-misched-alu.ll b/test/CodeGen/ARM/cortex-a57-misched-alu.ll
new file mode 100644
index 000000000000..960ee87532b0
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-alu.ll
@@ -0,0 +1,81 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+
+; Check the latency for ALU shifted operand variants.
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK: foo:BB#0 entry
+
+; ALU, basic - 1 cyc I0/I1
+; CHECK: EORrr
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 1
+
+; ALU, shift by immed - 2 cyc M
+; CHECK: ADDrsi
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 2
+
+; ALU, shift by register, unconditional - 2 cyc M
+; CHECK: RSBrsr
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 2
+
+; ALU, shift by register, conditional - 2 cyc I0/I1
+; CHECK: ANDrsr
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 2
+
+; Checking scheduling units
+
+; CHECK: ** ScheduleDAGMILive::schedule picking next node
+; Skipping COPY
+; CHECK: ** ScheduleDAGMILive::schedule picking next node
+; CHECK: Scheduling
+; CHECK-SAME: ANDrsr
+; CHECK: Ready
+; CHECK-NEXT: A57UnitI
+
+; CHECK: ** ScheduleDAGMILive::schedule picking next node
+; CHECK: Scheduling
+; CHECK-SAME: CMPri
+; CHECK: Ready
+; CHECK-NEXT: A57UnitI
+
+; CHECK: ** ScheduleDAGMILive::schedule picking next node
+; CHECK: Scheduling
+; CHECK-SAME: RSBrsr
+; CHECK: Ready
+; CHECK-NEXT: A57UnitM
+
+; CHECK: ** ScheduleDAGMILive::schedule picking next node
+; CHECK: Scheduling
+; CHECK-SAME: ADDrsi
+; CHECK: Ready
+; CHECK-NEXT: A57UnitM
+
+; CHECK: ** ScheduleDAGMILive::schedule picking next node
+; CHECK: Scheduling
+; CHECK-SAME: EORrr
+; CHECK: Ready
+; CHECK-NEXT: A57UnitI
+
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv8r-arm-none-eabi"
+
+; Function Attrs: norecurse nounwind readnone
+define hidden i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr #0 {
+entry:
+ %xor = xor i32 %a, %b
+ %xor_shl = shl i32 %xor, 2
+ %add = add i32 %xor_shl, %d
+ %add_ashr = ashr i32 %add, %a
+ %sub = sub i32 %add_ashr, %a
+ %sub_lshr_pred = lshr i32 %sub, %c
+ %pred = icmp sgt i32 %a, 4
+ %and = and i32 %sub_lshr_pred, %b
+ %rv = select i1 %pred, i32 %and, i32 %d
+ ret i32 %rv
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-basic.ll b/test/CodeGen/ARM/cortex-a57-misched-basic.ll
new file mode 100644
index 000000000000..2ec50b9d3343
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-basic.ll
@@ -0,0 +1,53 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=A57_SCHED
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC
+
+; Check the latency for instructions for both generic and cortex-a57.
+; SDIV should be scheduled at the block's begin (20 cyc of independent M unit).
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK: foo:BB#0 entry
+
+; GENERIC: SDIV
+; GENERIC: Latency : 1
+; GENERIC: EORrr
+; GENERIC: Latency : 1
+; GENERIC: LDRi12
+; GENERIC: Latency : 4
+; GENERIC: ADDrr
+; GENERIC: Latency : 1
+; GENERIC: SUBrr
+; GENERIC: Latency : 1
+
+; A57_SCHED: SDIV
+; A57_SCHED: Latency : 20
+; A57_SCHED: EORrr
+; A57_SCHED: Latency : 1
+; A57_SCHED: LDRi12
+; A57_SCHED: Latency : 4
+; A57_SCHED: ADDrr
+; A57_SCHED: Latency : 1
+; A57_SCHED: SUBrr
+; A57_SCHED: Latency : 1
+
+; CHECK: ** Final schedule for BB#0 ***
+; GENERIC: LDRi12
+; GENERIC: SDIV
+; A57_SCHED: SDIV
+; A57_SCHED: LDRi12
+; CHECK: ********** INTERVALS **********
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv8r-arm-none-eabi"
+
+; Function Attrs: norecurse nounwind readnone
+define hidden i32 @foo(i32 %a, i32 %b, i32 %c, i32* %d) local_unnamed_addr #0 {
+entry:
+ %xor = xor i32 %c, %b
+ %ld = load i32, i32* %d
+ %add = add nsw i32 %xor, %ld
+ %div = sdiv i32 %a, %b
+ %sub = sub i32 %div, %add
+ ret i32 %sub
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll
new file mode 100644
index 000000000000..d54848a6bcf1
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll
@@ -0,0 +1,37 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+;
+
+@a = global i32 0, align 4
+@b = global i32 0, align 4
+@c = global i32 0, align 4
+
+; CHECK: ********** MI Scheduling **********
+; We need second, post-ra scheduling to have LDM instruction combined from single-loads
+; CHECK: ********** MI Scheduling **********
+; CHECK: LDMIA_UPD
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 4
+; CHECK: Successors:
+; CHECK: data
+; CHECK-SAME: Latency=1
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=3
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=3
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=4
+define i32 @bar(i32 %a1, i32 %b1, i32 %c1) minsize optsize {
+ %1 = load i32, i32* @a, align 4
+ %2 = load i32, i32* @b, align 4
+ %3 = load i32, i32* @c, align 4
+
+ %ptr_after = getelementptr i32, i32* @a, i32 3
+
+ %ptr_val = ptrtoint i32* %ptr_after to i32
+ %mul1 = mul i32 %ptr_val, %1
+ %mul2 = mul i32 %mul1, %2
+ %mul3 = mul i32 %mul2, %3
+ ret i32 %mul3
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-ldm.ll b/test/CodeGen/ARM/cortex-a57-misched-ldm.ll
new file mode 100644
index 000000000000..9cb076651f5b
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-ldm.ll
@@ -0,0 +1,28 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+
+; CHECK: ********** MI Scheduling **********
+; We need second, post-ra scheduling to have LDM instruction combined from single-loads
+; CHECK: ********** MI Scheduling **********
+; CHECK: LDMIA
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 3
+; CHECK: Successors:
+; CHECK: data
+; CHECK-SAME: Latency=3
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=3
+
+define i32 @foo(i32* %a) nounwind optsize {
+entry:
+ %b = getelementptr i32, i32* %a, i32 1
+ %c = getelementptr i32, i32* %a, i32 2
+ %0 = load i32, i32* %a, align 4
+ %1 = load i32, i32* %b, align 4
+ %2 = load i32, i32* %c, align 4
+
+ %mul1 = mul i32 %0, %1
+ %mul2 = mul i32 %mul1, %2
+ ret i32 %mul2
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll
new file mode 100644
index 000000000000..774b0a907e39
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-stm-wrback.ll
@@ -0,0 +1,36 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+; N=3 STMIA_UPD should have latency 2cyc and writeback latency 1cyc
+
+; CHECK: ********** MI Scheduling **********
+; We need second, post-ra scheduling to have STM instruction combined from single-stores
+; CHECK: ********** MI Scheduling **********
+; CHECK: schedule starting
+; CHECK: STMIA_UPD
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 2
+; CHECK: Successors
+; CHECK: data
+; CHECK-SAME: Latency=1
+
+define i32 @bar(i32 %v0, i32 %v1, i32 %v2, i32* %addr) {
+
+ %addr.1 = getelementptr i32, i32* %addr, i32 0
+ store i32 %v0, i32* %addr.1
+
+ %addr.2 = getelementptr i32, i32* %addr, i32 1
+ store i32 %v1, i32* %addr.2
+
+ %addr.3 = getelementptr i32, i32* %addr, i32 2
+ store i32 %v2, i32* %addr.3
+
+ %ptr_after = getelementptr i32, i32* %addr, i32 3
+ %val = ptrtoint i32* %ptr_after to i32
+
+ %rv1 = mul i32 %val, %v0
+ %rv2 = mul i32 %rv1, %v1
+ %rv3 = mul i32 %rv2, %v2
+
+ ret i32 %rv3
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-stm.ll b/test/CodeGen/ARM/cortex-a57-misched-stm.ll
new file mode 100644
index 000000000000..474f39d84bae
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-stm.ll
@@ -0,0 +1,29 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+; N=3 STMIB should have latency 2cyc
+
+; CHECK: ********** MI Scheduling **********
+; We need second, post-ra scheduling to have STM instruction combined from single-stores
+; CHECK: ********** MI Scheduling **********
+; CHECK: schedule starting
+; CHECK: STMIB
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 2
+
+define i32 @test_stm(i32 %v0, i32 %v1, i32* %addr) {
+
+ %addr.1 = getelementptr i32, i32* %addr, i32 1
+ store i32 %v0, i32* %addr.1
+
+ %addr.2 = getelementptr i32, i32* %addr, i32 2
+ store i32 %v1, i32* %addr.2
+
+ %addr.3 = getelementptr i32, i32* %addr, i32 3
+ %val = ptrtoint i32* %addr to i32
+ store i32 %val, i32* %addr.3
+
+ %rv = add i32 %v0, %v1
+
+ ret i32 %rv
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-vfma.ll b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
new file mode 100644
index 000000000000..a9223e1e2a99
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
@@ -0,0 +1,77 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+; Check latencies of vmul/vfma accumulate chains.
+
+define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
+; CHECK: ********** MI Scheduling **********
+; CHECK: Test1:BB#0
+
+; CHECK: VMULS
+; > VMULS common latency = 5
+; CHECK: Latency : 5
+; CHECK: Successors:
+; CHECK: data
+; > VMULS read-advanced latency to VMLAS = 0
+; CHECK-SAME: Latency=0
+
+; CHECK: VMLAS
+; > VMLAS common latency = 9
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLAS read-advanced latency to the next VMLAS = 4
+; CHECK-SAME: Latency=4
+
+; CHECK: VMLAS
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLAS not-optimized latency to VMOVRS = 9
+; CHECK-SAME: Latency=9
+
+; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLAS, VMLAS
+ %mul1 = fmul float %f1, %f2
+ %mul2 = fmul float %f3, %f4
+ %mul3 = fmul float %f5, %f6
+ %add1 = fadd float %mul1, %mul2
+ %add2 = fadd float %add1, %mul3
+ ret float %add2
+}
+
+; ASIMD form
+define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) {
+; CHECK: ********** MI Scheduling **********
+; CHECK: Test2:BB#0
+
+; CHECK: VMULfd
+; > VMULfd common latency = 5
+; CHECK: Latency : 5
+; CHECK: Successors:
+; CHECK: data
+; VMULfd read-advanced latency to VMLAfd = 0
+; CHECK-SAME: Latency=0
+
+; CHECK: VMLAfd
+; > VMLAfd common latency = 9
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLAfd read-advanced latency to the next VMLAfd = 4
+; CHECK-SAME: Latency=4
+
+; CHECK: VMLAfd
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLAfd not-optimized latency to VMOVRRD = 9
+; CHECK-SAME: Latency=9
+
+; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLAS, VMLAS
+ %mul1 = fmul <2 x float> %f1, %f2
+ %mul2 = fmul <2 x float> %f3, %f4
+ %mul3 = fmul <2 x float> %f5, %f6
+ %add1 = fadd <2 x float> %mul1, %mul2
+ %add2 = fadd <2 x float> %add1, %mul3
+ ret <2 x float> %add2
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
new file mode 100644
index 000000000000..6cfa823fb969
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
@@ -0,0 +1,50 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+;
+
+@a = global double 0.0, align 4
+@b = global double 0.0, align 4
+@c = global double 0.0, align 4
+
+; CHECK: ********** MI Scheduling **********
+; We need second, post-ra scheduling to have VLDM instruction combined from single-loads
+; CHECK: ********** MI Scheduling **********
+; CHECK: VLDMDIA_UPD
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 6
+; CHECK: Successors:
+; CHECK: data
+; CHECK-SAME: Latency=1
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=1
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=5
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=5
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=6
+define i32 @bar(i32* %iptr) minsize optsize {
+ %1 = load double, double* @a, align 8
+ %2 = load double, double* @b, align 8
+ %3 = load double, double* @c, align 8
+
+ %ptr_after = getelementptr double, double* @a, i32 3
+
+ %ptr_new_ival = ptrtoint double* %ptr_after to i32
+ %ptr_new = inttoptr i32 %ptr_new_ival to i32*
+
+ store i32 %ptr_new_ival, i32* %iptr, align 8
+
+ %v1 = fptoui double %1 to i32
+
+ %mul1 = mul i32 %ptr_new_ival, %v1
+
+ %v2 = fptoui double %2 to i32
+ %v3 = fptoui double %3 to i32
+
+ %mul2 = mul i32 %mul1, %v2
+ %mul3 = mul i32 %mul2, %v3
+
+ ret i32 %mul3
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-vldm.ll b/test/CodeGen/ARM/cortex-a57-misched-vldm.ll
new file mode 100644
index 000000000000..218b5b41a7e4
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-vldm.ll
@@ -0,0 +1,30 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+
+; CHECK: ********** MI Scheduling **********
+; We need second, post-ra scheduling to have VLDM instruction combined from single-loads
+; CHECK: ********** MI Scheduling **********
+; CHECK: VLDMDIA
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 6
+; CHECK: Successors:
+; CHECK: data
+; CHECK-SAME: Latency=5
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=5
+; CHECK-NEXT: data
+; CHECK-SAME: Latency=6
+
+define double @foo(double* %a) nounwind optsize {
+entry:
+ %b = getelementptr double, double* %a, i32 1
+ %c = getelementptr double, double* %a, i32 2
+ %0 = load double, double* %a, align 4
+ %1 = load double, double* %b, align 4
+ %2 = load double, double* %c, align 4
+
+ %mul1 = fmul double %0, %1
+ %mul2 = fmul double %mul1, %2
+ ret double %mul2
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll b/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll
new file mode 100644
index 000000000000..af1c469d4443
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-vstm-wrback.ll
@@ -0,0 +1,43 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+
+; CHECK: ********** MI Scheduling **********
+; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
+; CHECK: ********** MI Scheduling **********
+; CHECK: schedule starting
+; CHECK: VSTMDIA_UPD
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 4
+; CHECK: Successors:
+; CHECK: data
+; CHECK-SAME: Latency=1
+
+@a = global double 0.0, align 4
+@b = global double 0.0, align 4
+@c = global double 0.0, align 4
+
+define i32 @bar(double* %vptr, i32 %iv1, i32* %iptr) minsize {
+
+ %vp2 = getelementptr double, double* %vptr, i32 1
+ %vp3 = getelementptr double, double* %vptr, i32 2
+
+ %v1 = load double, double* %vptr, align 8
+ %v2 = load double, double* %vp2, align 8
+ %v3 = load double, double* %vp3, align 8
+
+ store double %v1, double* @a, align 8
+ store double %v2, double* @b, align 8
+ store double %v3, double* @c, align 8
+
+ %ptr_after = getelementptr double, double* @a, i32 3
+
+ %ptr_new_ival = ptrtoint double* %ptr_after to i32
+ %ptr_new = inttoptr i32 %ptr_new_ival to i32*
+
+ store i32 %ptr_new_ival, i32* %iptr, align 8
+
+ %mul1 = mul i32 %ptr_new_ival, %iv1
+
+ ret i32 %mul1
+}
+
diff --git a/test/CodeGen/ARM/cortex-a57-misched-vstm.ll b/test/CodeGen/ARM/cortex-a57-misched-vstm.ll
new file mode 100644
index 000000000000..f31474f66558
--- /dev/null
+++ b/test/CodeGen/ARM/cortex-a57-misched-vstm.ll
@@ -0,0 +1,23 @@
+; REQUIRES: asserts
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -misched-postra -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+
+; CHECK: ********** MI Scheduling **********
+; We need second, post-ra scheduling to have VSTM instruction combined from single-stores
+; CHECK: ********** MI Scheduling **********
+; CHECK: schedule starting
+; CHECK: VSTMDIA
+; CHECK: rdefs left
+; CHECK-NEXT: Latency : 2
+
+%bigVec = type [2 x double]
+
+@var = global %bigVec zeroinitializer
+
+define void @bar(%bigVec* %ptr) {
+
+ %tmp = load %bigVec, %bigVec* %ptr
+ store %bigVec %tmp, %bigVec* @var
+
+ ret void
+}
+
diff --git a/test/CodeGen/ARM/global-merge-external.ll b/test/CodeGen/ARM/global-merge-external.ll
index a9e0d199705a..03c977614320 100644
--- a/test/CodeGen/ARM/global-merge-external.ll
+++ b/test/CodeGen/ARM/global-merge-external.ll
@@ -2,6 +2,7 @@
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=true | FileCheck %s --check-prefix=CHECK-MERGE
; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -global-merge-on-external=false | FileCheck %s --check-prefix=CHECK-NO-MERGE
; RUN: llc < %s -mtriple=arm-macho -arm-global-merge | FileCheck %s --check-prefix=CHECK-NO-MERGE
+; RUN: llc < %s -mtriple=arm-eabi -arm-global-merge -relocation-model=pic | FileCheck %s --check-prefix=CHECK-NO-MERGE
@x = global i32 0, align 4
@y = global i32 0, align 4
diff --git a/test/CodeGen/Hexagon/newify-crash.ll b/test/CodeGen/Hexagon/newify-crash.ll
new file mode 100644
index 000000000000..705170b13a59
--- /dev/null
+++ b/test/CodeGen/Hexagon/newify-crash.ll
@@ -0,0 +1,44 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+;
+; Check that this testcase doesn't crash.
+; CHECK: vadd
+
+target triple = "hexagon"
+
+define void @fred() #0 {
+b0:
+ br label %b1
+
+b1: ; preds = %b7, %b0
+ %v2 = phi i32 [ 0, %b0 ], [ %v16, %b7 ]
+ %v3 = phi <32 x i32> [ undef, %b0 ], [ %v15, %b7 ]
+ %v4 = icmp slt i32 %v2, undef
+ br i1 %v4, label %b5, label %b7
+
+b5: ; preds = %b1
+ %v6 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v3, <32 x i32> undef)
+ br label %b7
+
+b7: ; preds = %b5, %b1
+ %v8 = phi <32 x i32> [ %v6, %b5 ], [ %v3, %b1 ]
+ %v9 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v8, <32 x i32> undef)
+ %v10 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v9, <32 x i32> undef)
+ %v11 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v10, <32 x i32> undef)
+ %v12 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v11, <32 x i32> undef)
+ %v13 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v12, <32 x i32> zeroinitializer)
+ %v14 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v13, <32 x i32> undef)
+ %v15 = tail call <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32> %v14, <32 x i32> undef)
+ %v16 = add nsw i32 %v2, 8
+ %v17 = icmp eq i32 %v16, 64
+ br i1 %v17, label %b18, label %b1
+
+b18: ; preds = %b7
+ tail call void @f0() #0
+ ret void
+}
+
+declare <32 x i32> @llvm.hexagon.V6.vaddhsat.128B(<32 x i32>, <32 x i32>) #1
+declare void @f0() #0
+
+attributes #0 = { nounwind "target-cpu"="hexagonv60" "target-features"="+hvx,+hvx-double" }
+attributes #1 = { nounwind readnone }
diff --git a/test/CodeGen/MIR/Generic/runPass.mir b/test/CodeGen/MIR/Generic/runPass.mir
index eeef9d526510..33380d4c6bb4 100644
--- a/test/CodeGen/MIR/Generic/runPass.mir
+++ b/test/CodeGen/MIR/Generic/runPass.mir
@@ -1,4 +1,5 @@
# RUN: llc -run-pass=greedy -debug-pass=Arguments -o - %s | FileCheck %s
+# RUN: llc -run-pass=regallocbasic -debug-pass=Arguments -o - %s | FileCheck %s
# Check that passes are initialized correctly, so that it's possible to
# use -run-pass.
diff --git a/test/CodeGen/Mips/micromips-sizereduction/micromips-lbu16-lhu16-sb16-sh16.ll b/test/CodeGen/Mips/micromips-sizereduction/micromips-lbu16-lhu16-sb16-sh16.ll
new file mode 100644
index 000000000000..804ea1e5c438
--- /dev/null
+++ b/test/CodeGen/Mips/micromips-sizereduction/micromips-lbu16-lhu16-sb16-sh16.ll
@@ -0,0 +1,40 @@
+; RUN: llc -march=mipsel -mcpu=mips32r2 -mattr=+micromips -verify-machineinstrs < %s | FileCheck %s
+
+define void @f1(i8* %p) {
+entry:
+; CHECK-LABEL: f1:
+; CHECK: lbu16
+; CHECK: sb16
+ %0 = load i8, i8* %p, align 4
+ %a = zext i8 %0 to i32
+ %and = and i32 %a, 1
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i8 0, i8* %p, align 1
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+define void @f2(i16* %p) {
+entry:
+; CHECK-LABEL: f2:
+; CHECK: lhu16
+; CHECK: sh16
+ %0 = load i16, i16* %p, align 2
+ %a = zext i16 %0 to i32
+ %and = and i32 %a, 2
+ %cmp = icmp eq i32 %and, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ store i16 0, i16* %p, align 2
+ br label %if.end
+
+if.end:
+ ret void
+}
+
diff --git a/test/CodeGen/PowerPC/scavenging.mir b/test/CodeGen/PowerPC/scavenging.mir
new file mode 100644
index 000000000000..8b5c26230bc6
--- /dev/null
+++ b/test/CodeGen/PowerPC/scavenging.mir
@@ -0,0 +1,149 @@
+# RUN: llc -mtriple=ppc64-- -run-pass scavenger-test -verify-machineinstrs -o - %s | FileCheck %s
+---
+# CHECK-LABEL: name: noscav0
+name: noscav0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK: [[REG0:%r[0-9]+]] = LI 42
+ ; CHECK-NEXT: NOP implicit [[REG0]]
+ %0 : gprc = LI 42
+ NOP implicit %0
+
+ ; CHECK: [[REG1:%r[0-9]+]] = LI 42
+ ; CHECK-NEXT: NOP
+ ; CHECK-NEXT: NOP implicit [[REG1]]
+ ; CHECK-NEXT: NOP
+ ; CHECK-NEXT: NOP implicit [[REG1]]
+ %1 : gprc = LI 42
+ NOP
+ NOP implicit %1
+ NOP
+ NOP implicit %1
+
+ ; CHECK: [[REG2:%r[0-9]+]] = LI 42
+ ; CHECK-NEXT: NOP implicit [[REG2]]
+ %2 : gprc = LI 42
+ NOP implicit %2
+
+ %x0 = IMPLICIT_DEF
+ %x1 = IMPLICIT_DEF
+ %x2 = IMPLICIT_DEF
+ %x3 = IMPLICIT_DEF
+ %x4 = IMPLICIT_DEF
+ %x27 = IMPLICIT_DEF
+ %x28 = IMPLICIT_DEF
+ %x29 = IMPLICIT_DEF
+ %x30 = IMPLICIT_DEF
+
+ ; CHECK-NOT: %x0 = LI 42
+ ; CHECK-NOT: %x1 = LI 42
+ ; CHECK-NOT: %x2 = LI 42
+ ; CHECK-NOT: %x3 = LI 42
+ ; CHECK-NOT: %x4 = LI 42
+ ; CHECK-NOT: %x5 = LI 42
+ ; CHECK-NOT: %x27 = LI 42
+ ; CHECK-NOT: %x28 = LI 42
+ ; CHECK-NOT: %x29 = LI 42
+ ; CHECK-NOT: %x30 = LI 42
+ ; CHECK: [[REG3:%r[0-9]+]] = LI 42
+ ; CHECK-NEXT: %x5 = IMPLICIT_DEF
+ ; CHECK-NEXT: NOP implicit [[REG2]]
+ ; CHECK-NEXT: NOP implicit [[REG3]]
+ %3 : gprc = LI 42
+ %x5 = IMPLICIT_DEF
+ NOP implicit %2
+ NOP implicit %3
+
+ NOP implicit %x0
+ NOP implicit %x1
+ NOP implicit %x2
+ NOP implicit %x3
+ NOP implicit %x4
+ NOP implicit %x5
+ NOP implicit %x27
+ NOP implicit %x28
+ NOP implicit %x29
+ NOP implicit %x30
+...
+---
+# CHECK-LABEL: name: scav0
+name: scav0
+tracksRegLiveness: true
+stack:
+ # variable-sized object should be a reason to reserve an emergency spillslot
+ # in the RegScavenger
+ - { id: 0, type: variable-sized, offset: -32, alignment: 1 }
+body: |
+ bb.0:
+ %x0 = IMPLICIT_DEF
+ %x1 = IMPLICIT_DEF
+ %x2 = IMPLICIT_DEF
+ %x3 = IMPLICIT_DEF
+ %x4 = IMPLICIT_DEF
+ %x5 = IMPLICIT_DEF
+ %x6 = IMPLICIT_DEF
+ %x7 = IMPLICIT_DEF
+ %x8 = IMPLICIT_DEF
+ %x9 = IMPLICIT_DEF
+ %x10 = IMPLICIT_DEF
+ %x11 = IMPLICIT_DEF
+ %x12 = IMPLICIT_DEF
+ %x13 = IMPLICIT_DEF
+ %x14 = IMPLICIT_DEF
+ %x15 = IMPLICIT_DEF
+ %x16 = IMPLICIT_DEF
+ %x17 = IMPLICIT_DEF
+ %x18 = IMPLICIT_DEF
+ %x19 = IMPLICIT_DEF
+ %x20 = IMPLICIT_DEF
+ %x21 = IMPLICIT_DEF
+ %x22 = IMPLICIT_DEF
+ %x23 = IMPLICIT_DEF
+ %x24 = IMPLICIT_DEF
+ %x25 = IMPLICIT_DEF
+ %x26 = IMPLICIT_DEF
+ %x27 = IMPLICIT_DEF
+ %x28 = IMPLICIT_DEF
+ %x29 = IMPLICIT_DEF
+ %x30 = IMPLICIT_DEF
+
+ ; CHECK: STD killed [[SPILLEDREG:%x[0-9]+]]
+ ; CHECK: [[SPILLEDREG]] = LI8 42
+ ; CHECK: NOP implicit [[SPILLEDREG]]
+ ; CHECK: [[SPILLEDREG]] = LD
+ %0 : g8rc = LI8 42
+ NOP implicit %0
+
+ NOP implicit %x0
+ NOP implicit %x1
+ NOP implicit %x2
+ NOP implicit %x3
+ NOP implicit %x4
+ NOP implicit %x5
+ NOP implicit %x6
+ NOP implicit %x7
+ NOP implicit %x8
+ NOP implicit %x9
+ NOP implicit %x10
+ NOP implicit %x11
+ NOP implicit %x12
+ NOP implicit %x13
+ NOP implicit %x14
+ NOP implicit %x15
+ NOP implicit %x16
+ NOP implicit %x17
+ NOP implicit %x18
+ NOP implicit %x19
+ NOP implicit %x20
+ NOP implicit %x21
+ NOP implicit %x22
+ NOP implicit %x23
+ NOP implicit %x24
+ NOP implicit %x25
+ NOP implicit %x26
+ NOP implicit %x27
+ NOP implicit %x28
+ NOP implicit %x29
+ NOP implicit %x30
+...
diff --git a/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir b/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir
new file mode 100644
index 000000000000..2f532f0a5efb
--- /dev/null
+++ b/test/CodeGen/SystemZ/RAbasic-invalid-LR-update.mir
@@ -0,0 +1,267 @@
+# RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -run-pass=regallocbasic %s -o - | FileCheck %s
+# This test used to assert in RABasic. The problem was when we split live-ranges,
+# we were not updating the LiveRegMatrix properly and the interference calculation
+# wouldn't match what the assignment thought it could do.
+# In other words, this test case needs to trigger live-range splitting to exercise
+# the problem.
+#
+# PR33057
+--- |
+ target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+ target triple = "s390x--linux-gnu"
+
+ define void @autogen_SD21418() #0 {
+ ret void
+ }
+
+ attributes #0 = { "target-cpu"="z13" }
+
+...
+
+# CHECK: name: autogen_SD21418
+# Check that at least one live-range has been split
+# CHECK: id: 114, class
+---
+name: autogen_SD21418
+alignment: 2
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: vr128bit }
+ - { id: 1, class: vr128bit }
+ - { id: 2, class: vr128bit }
+ - { id: 3, class: vr64bit }
+ - { id: 4, class: gr64bit }
+ - { id: 5, class: vr128bit }
+ - { id: 6, class: grx32bit }
+ - { id: 7, class: vr128bit }
+ - { id: 8, class: vr128bit }
+ - { id: 9, class: gr32bit }
+ - { id: 10, class: gr64bit }
+ - { id: 11, class: vr128bit }
+ - { id: 12, class: fp64bit }
+ - { id: 13, class: vr64bit }
+ - { id: 14, class: vr64bit }
+ - { id: 15, class: gr64bit }
+ - { id: 16, class: gr128bit }
+ - { id: 17, class: gr64bit }
+ - { id: 18, class: gr32bit }
+ - { id: 19, class: gr32bit }
+ - { id: 20, class: gr128bit }
+ - { id: 21, class: gr32bit }
+ - { id: 22, class: gr64bit }
+ - { id: 23, class: gr32bit }
+ - { id: 24, class: gr32bit }
+ - { id: 25, class: gr128bit }
+ - { id: 26, class: grx32bit }
+ - { id: 27, class: gr64bit }
+ - { id: 28, class: gr64bit }
+ - { id: 29, class: vr128bit }
+ - { id: 30, class: vr128bit }
+ - { id: 31, class: gr64bit }
+ - { id: 32, class: gr32bit }
+ - { id: 33, class: gr32bit }
+ - { id: 34, class: gr128bit }
+ - { id: 35, class: gr32bit }
+ - { id: 36, class: vr128bit }
+ - { id: 37, class: gr64bit }
+ - { id: 38, class: gr32bit }
+ - { id: 39, class: gr32bit }
+ - { id: 40, class: gr128bit }
+ - { id: 41, class: gr32bit }
+ - { id: 42, class: addr64bit }
+ - { id: 43, class: grx32bit }
+ - { id: 44, class: addr64bit }
+ - { id: 45, class: vr64bit }
+ - { id: 46, class: vr64bit }
+ - { id: 47, class: gr32bit }
+ - { id: 48, class: gr32bit }
+ - { id: 49, class: grx32bit }
+ - { id: 50, class: vr64bit }
+ - { id: 51, class: gr64bit }
+ - { id: 52, class: grx32bit }
+ - { id: 53, class: gr32bit }
+ - { id: 54, class: gr64bit }
+ - { id: 55, class: grx32bit }
+ - { id: 56, class: gr32bit }
+ - { id: 57, class: gr128bit }
+ - { id: 58, class: gr128bit }
+ - { id: 59, class: gr32bit }
+ - { id: 60, class: gr64bit }
+ - { id: 61, class: grx32bit }
+ - { id: 62, class: gr32bit }
+ - { id: 63, class: gr64bit }
+ - { id: 64, class: grx32bit }
+ - { id: 65, class: gr32bit }
+ - { id: 66, class: gr128bit }
+ - { id: 67, class: gr128bit }
+ - { id: 68, class: grx32bit }
+ - { id: 69, class: gr64bit }
+ - { id: 70, class: gr64bit }
+ - { id: 71, class: vr128bit }
+ - { id: 72, class: vr128bit }
+ - { id: 73, class: gr64bit }
+ - { id: 74, class: grx32bit }
+ - { id: 75, class: gr32bit }
+ - { id: 76, class: gr64bit }
+ - { id: 77, class: grx32bit }
+ - { id: 78, class: gr32bit }
+ - { id: 79, class: gr128bit }
+ - { id: 80, class: gr128bit }
+ - { id: 81, class: gr32bit }
+ - { id: 82, class: vr128bit }
+ - { id: 83, class: gr64bit }
+ - { id: 84, class: grx32bit }
+ - { id: 85, class: gr32bit }
+ - { id: 86, class: gr64bit }
+ - { id: 87, class: grx32bit }
+ - { id: 88, class: gr32bit }
+ - { id: 89, class: gr128bit }
+ - { id: 90, class: gr128bit }
+ - { id: 91, class: gr32bit }
+ - { id: 92, class: grx32bit }
+ - { id: 93, class: gr64bit }
+ - { id: 94, class: gr32bit }
+ - { id: 95, class: gr32bit }
+ - { id: 96, class: gr32bit }
+ - { id: 97, class: gr64bit }
+ - { id: 98, class: gr64bit }
+ - { id: 99, class: grx32bit }
+ - { id: 100, class: grx32bit }
+ - { id: 101, class: gr128bit }
+ - { id: 102, class: gr128bit }
+ - { id: 103, class: gr128bit }
+ - { id: 104, class: gr64bit }
+ - { id: 105, class: gr128bit }
+ - { id: 106, class: gr128bit }
+ - { id: 107, class: gr64bit }
+ - { id: 108, class: gr128bit }
+ - { id: 109, class: gr128bit }
+ - { id: 110, class: gr64bit }
+ - { id: 111, class: gr128bit }
+ - { id: 112, class: gr128bit }
+ - { id: 113, class: gr64bit }
+constants:
+ - id: 0
+ value: double 0xD55960F86F577076
+ alignment: 8
+body: |
+ bb.0:
+ %11 = VGBM 0
+ %43 = LHIMux 0
+ %44 = LARL %const.0
+ %45 = VL64 %44, 0, _ :: (load 8 from constant-pool)
+
+ bb.1:
+ ADJCALLSTACKDOWN 0, 0
+ %12 = LZDR
+ %f0d = COPY %12
+ CallBRASL $fmod, killed %f0d, undef %f2d, csr_systemz, implicit-def dead %r14d, implicit-def dead %cc, implicit-def %f0d
+ ADJCALLSTACKUP 0, 0
+ KILL killed %f0d
+
+ bb.2:
+ %17 = VLGVH %11, _, 0
+ %19 = LHR %17.subreg_l32
+ undef %20.subreg_l64 = LGHI 0
+ %20 = DSGFR %20, %19
+ %22 = VLGVH %11, _, 3
+ %24 = LHR %22.subreg_l32
+ undef %25.subreg_l64 = LGHI 0
+ %25 = DSGFR %25, %24
+ %31 = VLGVH %11, _, 1
+ %33 = LHR %31.subreg_l32
+ undef %34.subreg_l64 = LGHI 0
+ %34 = DSGFR %34, %33
+ %37 = VLGVH %11, _, 2
+ %39 = LHR %37.subreg_l32
+ undef %40.subreg_l64 = LGHI 0
+ %40 = DSGFR %40, %39
+ CHIMux %43, 0, implicit-def %cc
+ BRC 14, 6, %bb.2, implicit killed %cc
+ J %bb.3
+
+ bb.3:
+ WFCDB undef %46, %45, implicit-def %cc
+ %48 = IPM implicit killed %cc
+ %48 = AFIMux %48, 268435456, implicit-def dead %cc
+ %6 = RISBMux undef %6, %48, 31, 159, 35
+ WFCDB undef %50, %45, implicit-def %cc
+ BRC 15, 6, %bb.1, implicit killed %cc
+ J %bb.4
+
+ bb.4:
+ %36 = VLVGP %25.subreg_l64, %25.subreg_l64
+ %36 = VLVGH %36, %20.subreg_l32, _, 0
+ %36 = VLVGH %36, %34.subreg_l32, _, 1
+ dead %36 = VLVGH %36, %40.subreg_l32, _, 2
+ %4 = LG undef %42, 0, _ :: (load 8 from `i64* undef`)
+ undef %57.subreg_h64 = LLILL 0
+ undef %66.subreg_h64 = LLILL 0
+ undef %79.subreg_h64 = LLILL 0
+ undef %89.subreg_h64 = LLILL 0
+ %92 = LHIMux 0
+
+ bb.5:
+
+ bb.6:
+ %51 = VLGVH undef %7, _, 0
+ %53 = LLHRMux %51.subreg_l32
+ %54 = VLGVH undef %1, _, 0
+ %57.subreg_l32 = LLHRMux %54.subreg_l32
+ %58 = COPY %57
+ %58 = DLR %58, %53
+ %60 = VLGVH undef %7, _, 3
+ %62 = LLHRMux %60.subreg_l32
+ %63 = VLGVH undef %1, _, 3
+ %66.subreg_l32 = LLHRMux %63.subreg_l32
+ %67 = COPY %66
+ %67 = DLR %67, %62
+ %73 = VLGVH undef %7, _, 1
+ %75 = LLHRMux %73.subreg_l32
+ %76 = VLGVH undef %1, _, 1
+ %79.subreg_l32 = LLHRMux %76.subreg_l32
+ %80 = COPY %79
+ %80 = DLR %80, %75
+ %83 = VLGVH undef %7, _, 2
+ %85 = LLHRMux %83.subreg_l32
+ %86 = VLGVH undef %1, _, 2
+ %89.subreg_l32 = LLHRMux %86.subreg_l32
+ %90 = COPY %89
+ %90 = DLR %90, %85
+ CHIMux %92, 0, implicit-def %cc
+ BRC 14, 6, %bb.7, implicit killed %cc
+ J %bb.6
+
+ bb.7:
+ CGHI undef %93, 0, implicit-def %cc
+ %96 = IPM implicit killed %cc
+ CGHI undef %97, 0, implicit-def %cc
+ BRC 14, 6, %bb.6, implicit killed %cc
+
+ bb.8:
+ CHIMux %6, 0, implicit-def %cc
+ %10 = LLILL 41639
+ dead %10 = LOCGR %10, %4, 14, 6, implicit killed %cc
+ CHIMux %92, 0, implicit-def %cc
+ BRC 14, 6, %bb.5, implicit killed %cc
+ J %bb.9
+
+ bb.9:
+ %82 = VLVGP %67.subreg_h64, %67.subreg_h64
+ %82 = VLVGH %82, %58.subreg_hl32, _, 0
+ %82 = VLVGH %82, %80.subreg_hl32, _, 1
+ dead %82 = VLVGH %82, %90.subreg_hl32, _, 2
+ %96 = AFIMux %96, 1879048192, implicit-def dead %cc
+ %96 = SRL %96, _, 31
+ dead %11 = VLVGF %11, %96, _, 1
+ %100 = LHIMux 0
+
+ bb.10:
+ CHIMux %100, 0, implicit-def %cc
+ BRC 14, 6, %bb.10, implicit killed %cc
+ J %bb.11
+
+ bb.11:
+ Return
+
+...
diff --git a/test/CodeGen/X86/and-sink.ll b/test/CodeGen/X86/and-sink.ll
index 46e50f2a6a74..0f877e778c70 100644
--- a/test/CodeGen/X86/and-sink.ll
+++ b/test/CodeGen/X86/and-sink.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-unknown -verify-machineinstrs < %s | FileCheck %s
; RUN: opt < %s -codegenprepare -S -mtriple=x86_64-unknown-unknown | FileCheck --check-prefix=CHECK-CGP %s
@@ -8,12 +9,20 @@
; Test that 'and' is sunk into bb0.
define i32 @and_sink1(i32 %a, i1 %c) {
; CHECK-LABEL: and_sink1:
-; CHECK: testb $1,
-; CHECK: je
-; CHECK-NOT: andl $4,
-; CHECK: movl $0, A
-; CHECK: testb $4,
-; CHECK: jne
+; CHECK: # BB#0:
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB0_3
+; CHECK-NEXT: # BB#1: # %bb0
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl $0, A
+; CHECK-NEXT: testb $4, %al
+; CHECK-NEXT: jne .LBB0_3
+; CHECK-NEXT: # BB#2: # %bb1
+; CHECK-NEXT: movl $1, %eax
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB0_3: # %bb2
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retl
; CHECK-CGP-LABEL: @and_sink1(
; CHECK-CGP-NOT: and i32
@@ -37,16 +46,30 @@ bb2:
; Test that both 'and' and cmp get sunk to bb1.
define i32 @and_sink2(i32 %a, i1 %c, i1 %c2) {
; CHECK-LABEL: and_sink2:
-; CHECK: movl $0, A
-; CHECK: testb $1,
-; CHECK: je
-; CHECK-NOT: andl $4,
-; CHECK: movl $0, B
-; CHECK: testb $1,
-; CHECK: je
-; CHECK: movl $0, C
-; CHECK: testb $4,
-; CHECK: jne
+; CHECK: # BB#0:
+; CHECK-NEXT: movl $0, A
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB1_5
+; CHECK-NEXT: # BB#1: # %bb0.preheader
+; CHECK-NEXT: movb {{[0-9]+}}(%esp), %al
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB1_2: # %bb0
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movl $0, B
+; CHECK-NEXT: testb $1, %al
+; CHECK-NEXT: je .LBB1_5
+; CHECK-NEXT: # BB#3: # %bb1
+; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
+; CHECK-NEXT: movl $0, C
+; CHECK-NEXT: testb $4, %cl
+; CHECK-NEXT: jne .LBB1_2
+; CHECK-NEXT: # BB#4: # %bb2
+; CHECK-NEXT: movl $1, %eax
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB1_5: # %bb3
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retl
; CHECK-CGP-LABEL: @and_sink2(
; CHECK-CGP-NOT: and i32
@@ -77,12 +100,21 @@ bb3:
; Test that CodeGenPrepare doesn't get stuck in a loop sinking and hoisting a masked load.
define i32 @and_sink3(i1 %c, i32* %p) {
; CHECK-LABEL: and_sink3:
-; CHECK: testb $1,
-; CHECK: je
-; CHECK: movzbl
-; CHECK-DAG: movl $0, A
-; CHECK-DAG: testl %
-; CHECK: je
+; CHECK: # BB#0:
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB2_3
+; CHECK-NEXT: # BB#1: # %bb0
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movzbl (%eax), %eax
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: movl $0, A
+; CHECK-NEXT: je .LBB2_2
+; CHECK-NEXT: .LBB2_3: # %bb2
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB2_2: # %bb1
+; CHECK-NEXT: movl $1, %eax
+; CHECK-NEXT: retl
; CHECK-CGP-LABEL: @and_sink3(
; CHECK-CGP: load i32
@@ -106,15 +138,26 @@ bb2:
; Test that CodeGenPrepare sinks/duplicates non-immediate 'and'.
define i32 @and_sink4(i32 %a, i32 %b, i1 %c) {
; CHECK-LABEL: and_sink4:
-; CHECK: testb $1,
-; CHECK: je
-; CHECK-NOT: andl
-; CHECK-DAG: movl $0, A
-; CHECK-DAG: testl [[REG1:%[a-z0-9]+]], [[REG2:%[a-z0-9]+]]
-; CHECK: jne
-; CHECK-DAG: movl {{%[a-z0-9]+}}, B
-; CHECK-DAG: testl [[REG1]], [[REG2]]
-; CHECK: je
+; CHECK: # BB#0:
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB3_4
+; CHECK-NEXT: # BB#1: # %bb0
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: testl %eax, %ecx
+; CHECK-NEXT: movl $0, A
+; CHECK-NEXT: jne .LBB3_4
+; CHECK-NEXT: # BB#2: # %bb1
+; CHECK-NEXT: leal (%ecx,%eax), %edx
+; CHECK-NEXT: testl %eax, %ecx
+; CHECK-NEXT: movl %edx, B
+; CHECK-NEXT: je .LBB3_3
+; CHECK-NEXT: .LBB3_4: # %bb3
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB3_3: # %bb2
+; CHECK-NEXT: movl $1, %eax
+; CHECK-NEXT: retl
; CHECK-CGP-LABEL: @and_sink4(
; CHECK-CGP-NOT: and i32
@@ -146,14 +189,26 @@ bb3:
; when it would increase register pressure.
define i32 @and_sink5(i32 %a, i32 %b, i32 %a2, i32 %b2, i1 %c) {
; CHECK-LABEL: and_sink5:
-; CHECK: testb $1,
-; CHECK: je
-; CHECK-DAG: andl {{[0-9]+\(%[a-z0-9]+\)}}, [[REG:%[a-z0-9]+]]
-; CHECK-DAG: movl $0, A
-; CHECK: jne
-; CHECK-DAG: movl {{%[a-z0-9]+}}, B
-; CHECK-DAG: testl [[REG]], [[REG]]
-; CHECK: je
+; CHECK: # BB#0:
+; CHECK-NEXT: testb $1, {{[0-9]+}}(%esp)
+; CHECK-NEXT: je .LBB4_4
+; CHECK-NEXT: # BB#1: # %bb0
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: andl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: movl $0, A
+; CHECK-NEXT: jne .LBB4_4
+; CHECK-NEXT: # BB#2: # %bb1
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; CHECK-NEXT: testl %eax, %eax
+; CHECK-NEXT: movl %ecx, B
+; CHECK-NEXT: je .LBB4_3
+; CHECK-NEXT: .LBB4_4: # %bb3
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: retl
+; CHECK-NEXT: .LBB4_3: # %bb2
+; CHECK-NEXT: movl $1, %eax
+; CHECK-NEXT: retl
; CHECK-CGP-LABEL: @and_sink5(
; CHECK-CGP: and i32
diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll
index 33ac15de9de9..8f6afa8785d0 100644
--- a/test/CodeGen/X86/avx512-cvt.ll
+++ b/test/CodeGen/X86/avx512-cvt.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=DQ --check-prefix=AVX512DQ
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=ALL --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=KNL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLBW --check-prefix=SKX
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLNOBW --check-prefix=AVX512VL
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=DQ --check-prefix=AVX512DQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NOVL --check-prefix=NODQ --check-prefix=NOVLDQ --check-prefix=AVX512BW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=DQ --check-prefix=VL --check-prefix=VLDQ --check-prefix=VLNOBW --check-prefix=AVX512VLDQ
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512vl,avx512bw | FileCheck %s --check-prefix=ALL --check-prefix=AVX512 --check-prefix=NODQ --check-prefix=VL --check-prefix=VLNODQ --check-prefix=VLBW --check-prefix=AVX512VLBW
define <16 x float> @sitof32(<16 x i32> %a) nounwind {
; ALL-LABEL: sitof32:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
; ALL-NEXT: retq
%b = sitofp <16 x i32> %a to <16 x float>
@@ -19,7 +19,7 @@ define <16 x float> @sitof32(<16 x i32> %a) nounwind {
define <8 x double> @sltof864(<8 x i64> %a) {
; NODQ-LABEL: sltof864:
-; NODQ: ## BB#0:
+; NODQ: # BB#0:
; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
@@ -49,7 +49,7 @@ define <8 x double> @sltof864(<8 x i64> %a) {
; NODQ-NEXT: retq
;
; DQ-LABEL: sltof864:
-; DQ: ## BB#0:
+; DQ: # BB#0:
; DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
; DQ-NEXT: retq
%b = sitofp <8 x i64> %a to <8 x double>
@@ -58,7 +58,7 @@ define <8 x double> @sltof864(<8 x i64> %a) {
define <4 x double> @sltof464(<4 x i64> %a) {
; NODQ-LABEL: sltof464:
-; NODQ: ## BB#0:
+; NODQ: # BB#0:
; NODQ-NEXT: vextracti128 $1, %ymm0, %xmm1
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtsi2sdq %rax, %xmm2, %xmm2
@@ -74,15 +74,15 @@ define <4 x double> @sltof464(<4 x i64> %a) {
; NODQ-NEXT: retq
;
; VLDQ-LABEL: sltof464:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vcvtqq2pd %ymm0, %ymm0
; VLDQ-NEXT: retq
;
; AVX512DQ-LABEL: sltof464:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvtqq2pd %zmm0, %zmm0
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512DQ-NEXT: retq
%b = sitofp <4 x i64> %a to <4 x double>
ret <4 x double> %b
@@ -90,7 +90,7 @@ define <4 x double> @sltof464(<4 x i64> %a) {
define <2 x float> @sltof2f32(<2 x i64> %a) {
; NODQ-LABEL: sltof2f32:
-; NODQ: ## BB#0:
+; NODQ: # BB#0:
; NODQ-NEXT: vpextrq $1, %xmm0, %rax
; NODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
; NODQ-NEXT: vmovq %xmm0, %rax
@@ -101,15 +101,15 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
; NODQ-NEXT: retq
;
; VLDQ-LABEL: sltof2f32:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0
; VLDQ-NEXT: retq
;
; AVX512DQ-LABEL: sltof2f32:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
%b = sitofp <2 x i64> %a to <2 x float>
@@ -118,7 +118,7 @@ define <2 x float> @sltof2f32(<2 x i64> %a) {
define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
; KNL-LABEL: sltof4f32_mem:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: vmovdqu (%rdi), %ymm0
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
@@ -135,12 +135,12 @@ define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
; KNL-NEXT: retq
;
; VLDQ-LABEL: sltof4f32_mem:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vcvtqq2psy (%rdi), %xmm0
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sltof4f32_mem:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vmovdqu (%rdi), %ymm0
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
@@ -158,15 +158,15 @@ define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: sltof4f32_mem:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vmovups (%rdi), %ymm0
; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: sltof4f32_mem:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vmovdqu (%rdi), %ymm0
; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
@@ -189,7 +189,7 @@ define <4 x float> @sltof4f32_mem(<4 x i64>* %a) {
define <4 x i64> @f64tosl(<4 x double> %a) {
; NODQ-LABEL: f64tosl:
-; NODQ: ## BB#0:
+; NODQ: # BB#0:
; NODQ-NEXT: vextractf128 $1, %ymm0, %xmm1
; NODQ-NEXT: vcvttsd2si %xmm1, %rax
; NODQ-NEXT: vmovq %rax, %xmm2
@@ -207,15 +207,15 @@ define <4 x i64> @f64tosl(<4 x double> %a) {
; NODQ-NEXT: retq
;
; VLDQ-LABEL: f64tosl:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vcvttpd2qq %ymm0, %ymm0
; VLDQ-NEXT: retq
;
; AVX512DQ-LABEL: f64tosl:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvttpd2qq %zmm0, %zmm0
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512DQ-NEXT: retq
%b = fptosi <4 x double> %a to <4 x i64>
ret <4 x i64> %b
@@ -223,7 +223,7 @@ define <4 x i64> @f64tosl(<4 x double> %a) {
define <4 x i64> @f32tosl(<4 x float> %a) {
; NODQ-LABEL: f32tosl:
-; NODQ: ## BB#0:
+; NODQ: # BB#0:
; NODQ-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3]
; NODQ-NEXT: vcvttss2si %xmm1, %rax
; NODQ-NEXT: vmovq %rax, %xmm1
@@ -241,15 +241,15 @@ define <4 x i64> @f32tosl(<4 x float> %a) {
; NODQ-NEXT: retq
;
; VLDQ-LABEL: f32tosl:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vcvttps2qq %xmm0, %ymm0
; VLDQ-NEXT: retq
;
; AVX512DQ-LABEL: f32tosl:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; AVX512DQ-NEXT: retq
%b = fptosi <4 x float> %a to <4 x i64>
ret <4 x i64> %b
@@ -257,7 +257,7 @@ define <4 x i64> @f32tosl(<4 x float> %a) {
define <4 x float> @sltof432(<4 x i64> %a) {
; KNL-LABEL: sltof432:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
; KNL-NEXT: vmovq %xmm0, %rax
@@ -273,13 +273,13 @@ define <4 x float> @sltof432(<4 x i64> %a) {
; KNL-NEXT: retq
;
; VLDQ-LABEL: sltof432:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vcvtqq2ps %ymm0, %xmm0
; VLDQ-NEXT: vzeroupper
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sltof432:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
; VLNODQ-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
; VLNODQ-NEXT: vmovq %xmm0, %rax
@@ -296,15 +296,15 @@ define <4 x float> @sltof432(<4 x i64> %a) {
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: sltof432:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvtqq2ps %zmm0, %ymm0
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: sltof432:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
; AVX512BW-NEXT: vcvtsi2ssq %rax, %xmm1, %xmm1
; AVX512BW-NEXT: vmovq %xmm0, %rax
@@ -325,7 +325,7 @@ define <4 x float> @sltof432(<4 x i64> %a) {
define <4 x float> @ultof432(<4 x i64> %a) {
; KNL-LABEL: ultof432:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: vpextrq $1, %xmm0, %rax
; KNL-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
; KNL-NEXT: vmovq %xmm0, %rax
@@ -341,13 +341,13 @@ define <4 x float> @ultof432(<4 x i64> %a) {
; KNL-NEXT: retq
;
; VLDQ-LABEL: ultof432:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vcvtuqq2ps %ymm0, %xmm0
; VLDQ-NEXT: vzeroupper
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: ultof432:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax
; VLNODQ-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
; VLNODQ-NEXT: vmovq %xmm0, %rax
@@ -364,15 +364,15 @@ define <4 x float> @ultof432(<4 x i64> %a) {
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: ultof432:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvtuqq2ps %zmm0, %ymm0
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: ultof432:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpextrq $1, %xmm0, %rax
; AVX512BW-NEXT: vcvtusi2ssq %rax, %xmm1, %xmm1
; AVX512BW-NEXT: vmovq %xmm0, %rax
@@ -393,7 +393,7 @@ define <4 x float> @ultof432(<4 x i64> %a) {
define <8 x double> @ultof64(<8 x i64> %a) {
; NODQ-LABEL: ultof64:
-; NODQ: ## BB#0:
+; NODQ: # BB#0:
; NODQ-NEXT: vextracti32x4 $3, %zmm0, %xmm1
; NODQ-NEXT: vpextrq $1, %xmm1, %rax
; NODQ-NEXT: vcvtusi2sdq %rax, %xmm2, %xmm2
@@ -423,7 +423,7 @@ define <8 x double> @ultof64(<8 x i64> %a) {
; NODQ-NEXT: retq
;
; DQ-LABEL: ultof64:
-; DQ: ## BB#0:
+; DQ: # BB#0:
; DQ-NEXT: vcvtuqq2pd %zmm0, %zmm0
; DQ-NEXT: retq
%b = uitofp <8 x i64> %a to <8 x double>
@@ -432,7 +432,7 @@ define <8 x double> @ultof64(<8 x i64> %a) {
define <16 x i32> @fptosi00(<16 x float> %a) nounwind {
; ALL-LABEL: fptosi00:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvttps2dq %zmm0, %zmm0
; ALL-NEXT: retq
%b = fptosi <16 x float> %a to <16 x i32>
@@ -441,7 +441,7 @@ define <16 x i32> @fptosi00(<16 x float> %a) nounwind {
define <16 x i32> @fptoui00(<16 x float> %a) nounwind {
; ALL-LABEL: fptoui00:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvttps2udq %zmm0, %zmm0
; ALL-NEXT: retq
%b = fptoui <16 x float> %a to <16 x i32>
@@ -450,14 +450,14 @@ define <16 x i32> @fptoui00(<16 x float> %a) nounwind {
define <8 x i32> @fptoui_256(<8 x float> %a) nounwind {
; NOVL-LABEL: fptoui_256:
-; NOVL: ## BB#0:
-; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NOVL: # BB#0:
+; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NOVL-NEXT: vcvttps2udq %zmm0, %zmm0
-; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; NOVL-NEXT: retq
;
; VL-LABEL: fptoui_256:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vcvttps2udq %ymm0, %ymm0
; VL-NEXT: retq
%b = fptoui <8 x float> %a to <8 x i32>
@@ -466,30 +466,30 @@ define <8 x i32> @fptoui_256(<8 x float> %a) nounwind {
define <4 x i32> @fptoui_128(<4 x float> %a) nounwind {
; KNL-LABEL: fptoui_128:
-; KNL: ## BB#0:
-; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL: # BB#0:
+; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL-NEXT: vcvttps2udq %zmm0, %zmm0
-; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; VL-LABEL: fptoui_128:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vcvttps2udq %xmm0, %xmm0
; VL-NEXT: retq
;
; AVX512DQ-LABEL: fptoui_128:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvttps2udq %zmm0, %zmm0
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: fptoui_128:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vcvttps2udq %zmm0, %zmm0
-; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%b = fptoui <4 x float> %a to <4 x i32>
@@ -498,7 +498,7 @@ define <4 x i32> @fptoui_128(<4 x float> %a) nounwind {
define <8 x i32> @fptoui01(<8 x double> %a) nounwind {
; ALL-LABEL: fptoui01:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvttpd2udq %zmm0, %ymm0
; ALL-NEXT: retq
%b = fptoui <8 x double> %a to <8 x i32>
@@ -507,31 +507,31 @@ define <8 x i32> @fptoui01(<8 x double> %a) nounwind {
define <4 x i32> @fptoui_256d(<4 x double> %a) nounwind {
; KNL-LABEL: fptoui_256d:
-; KNL: ## BB#0:
-; KNL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; KNL: # BB#0:
+; KNL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; KNL-NEXT: vcvttpd2udq %zmm0, %ymm0
-; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; KNL-NEXT: retq
;
; VL-LABEL: fptoui_256d:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vcvttpd2udq %ymm0, %xmm0
; VL-NEXT: vzeroupper
; VL-NEXT: retq
;
; AVX512DQ-LABEL: fptoui_256d:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvttpd2udq %zmm0, %ymm0
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: fptoui_256d:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vcvttpd2udq %zmm0, %ymm0
-; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
+; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill>
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%b = fptoui <4 x double> %a to <4 x i32>
@@ -540,7 +540,7 @@ define <4 x i32> @fptoui_256d(<4 x double> %a) nounwind {
define <8 x double> @sitof64(<8 x i32> %a) {
; ALL-LABEL: sitof64:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0
; ALL-NEXT: retq
%b = sitofp <8 x i32> %a to <8 x double>
@@ -548,31 +548,31 @@ define <8 x double> @sitof64(<8 x i32> %a) {
}
define <8 x double> @sitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
; KNL-LABEL: sitof64_mask:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
; KNL-NEXT: retq
;
; VLBW-LABEL: sitof64_mask:
-; VLBW: ## BB#0:
+; VLBW: # BB#0:
; VLBW-NEXT: kmovd %edi, %k1
; VLBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
; VLBW-NEXT: retq
;
; VLNOBW-LABEL: sitof64_mask:
-; VLNOBW: ## BB#0:
+; VLNOBW: # BB#0:
; VLNOBW-NEXT: kmovw %edi, %k1
; VLNOBW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
; VLNOBW-NEXT: retq
;
; AVX512DQ-LABEL: sitof64_mask:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: kmovw %edi, %k1
; AVX512DQ-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: sitof64_mask:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vcvtdq2pd %ymm1, %zmm0 {%k1}
; AVX512BW-NEXT: retq
@@ -583,31 +583,31 @@ define <8 x double> @sitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind
}
define <8 x double> @sitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
; KNL-LABEL: sitof64_maskz:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; VLBW-LABEL: sitof64_maskz:
-; VLBW: ## BB#0:
+; VLBW: # BB#0:
; VLBW-NEXT: kmovd %edi, %k1
; VLBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
; VLBW-NEXT: retq
;
; VLNOBW-LABEL: sitof64_maskz:
-; VLNOBW: ## BB#0:
+; VLNOBW: # BB#0:
; VLNOBW-NEXT: kmovw %edi, %k1
; VLNOBW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
; VLNOBW-NEXT: retq
;
; AVX512DQ-LABEL: sitof64_maskz:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: kmovw %edi, %k1
; AVX512DQ-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: sitof64_maskz:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vcvtdq2pd %ymm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
@@ -619,7 +619,7 @@ define <8 x double> @sitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
define <8 x i32> @fptosi01(<8 x double> %a) {
; ALL-LABEL: fptosi01:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvttpd2dq %zmm0, %ymm0
; ALL-NEXT: retq
%b = fptosi <8 x double> %a to <8 x i32>
@@ -628,12 +628,12 @@ define <8 x i32> @fptosi01(<8 x double> %a) {
define <4 x i32> @fptosi03(<4 x double> %a) {
; KNL-LABEL: fptosi03:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: vcvttpd2dq %ymm0, %xmm0
; KNL-NEXT: retq
;
; AVX512-LABEL: fptosi03:
-; AVX512: ## BB#0:
+; AVX512: # BB#0:
; AVX512-NEXT: vcvttpd2dq %ymm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -643,14 +643,14 @@ define <4 x i32> @fptosi03(<4 x double> %a) {
define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
; NODQ-LABEL: fptrunc00:
-; NODQ: ## BB#0:
+; NODQ: # BB#0:
; NODQ-NEXT: vcvtpd2ps %zmm0, %ymm0
; NODQ-NEXT: vcvtpd2ps %zmm1, %ymm1
; NODQ-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
; NODQ-NEXT: retq
;
; DQ-LABEL: fptrunc00:
-; DQ: ## BB#0:
+; DQ: # BB#0:
; DQ-NEXT: vcvtpd2ps %zmm0, %ymm0
; DQ-NEXT: vcvtpd2ps %zmm1, %ymm1
; DQ-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0
@@ -661,12 +661,12 @@ define <16 x float> @fptrunc00(<16 x double> %b) nounwind {
define <4 x float> @fptrunc01(<4 x double> %b) {
; KNL-LABEL: fptrunc01:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: vcvtpd2ps %ymm0, %xmm0
; KNL-NEXT: retq
;
; AVX512-LABEL: fptrunc01:
-; AVX512: ## BB#0:
+; AVX512: # BB#0:
; AVX512-NEXT: vcvtpd2ps %ymm0, %xmm0
; AVX512-NEXT: vzeroupper
; AVX512-NEXT: retq
@@ -676,7 +676,7 @@ define <4 x float> @fptrunc01(<4 x double> %b) {
define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
; KNL-LABEL: fptrunc02:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: vpslld $31, %xmm1, %xmm1
; KNL-NEXT: vpsrad $31, %xmm1, %xmm1
; KNL-NEXT: vcvtpd2ps %ymm0, %xmm0
@@ -684,7 +684,7 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
; KNL-NEXT: retq
;
; VL-LABEL: fptrunc02:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vpslld $31, %xmm1, %xmm1
; VL-NEXT: vptestmd %xmm1, %xmm1, %k1
; VL-NEXT: vcvtpd2ps %ymm0, %xmm0 {%k1} {z}
@@ -692,7 +692,7 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
; VL-NEXT: retq
;
; AVX512DQ-LABEL: fptrunc02:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vpslld $31, %xmm1, %xmm1
; AVX512DQ-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX512DQ-NEXT: vcvtpd2ps %ymm0, %xmm0
@@ -701,7 +701,7 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: fptrunc02:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: vpslld $31, %xmm1, %xmm1
; AVX512BW-NEXT: vpsrad $31, %xmm1, %xmm1
; AVX512BW-NEXT: vcvtpd2ps %ymm0, %xmm0
@@ -715,7 +715,7 @@ define <4 x float> @fptrunc02(<4 x double> %b, <4 x i1> %mask) {
define <4 x float> @fptrunc03(<2 x double> %a0, <4 x float> %a1) nounwind {
; ALL-LABEL: fptrunc03:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvtsd2ss %xmm0, %xmm1, %xmm0
; ALL-NEXT: retq
%ext = extractelement <2 x double> %a0, i32 0
@@ -726,7 +726,7 @@ define <4 x float> @fptrunc03(<2 x double> %a0, <4 x float> %a1) nounwind {
define <8 x double> @fpext00(<8 x float> %b) nounwind {
; ALL-LABEL: fpext00:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvtps2pd %ymm0, %zmm0
; ALL-NEXT: retq
%a = fpext <8 x float> %b to <8 x double>
@@ -735,14 +735,14 @@ define <8 x double> @fpext00(<8 x float> %b) nounwind {
define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) {
; NOVL-LABEL: fpext01:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vcvtps2pd %xmm0, %ymm0
; NOVL-NEXT: vcmpltpd %ymm2, %ymm1, %ymm1
; NOVL-NEXT: vandpd %ymm0, %ymm1, %ymm0
; NOVL-NEXT: retq
;
; VL-LABEL: fpext01:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vcmpltpd %ymm2, %ymm1, %k1
; VL-NEXT: vcvtps2pd %xmm0, %ymm0 {%k1} {z}
; VL-NEXT: retq
@@ -754,7 +754,7 @@ define <4 x double> @fpext01(<4 x float> %b, <4 x double>%b1, <4 x double>%a1) {
define <2 x double> @fpext02(<2 x double> %a0, <4 x float> %a1) nounwind {
; ALL-LABEL: fpext02:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0
; ALL-NEXT: retq
%ext = extractelement <4 x float> %a1, i32 0
@@ -765,7 +765,7 @@ define <2 x double> @fpext02(<2 x double> %a0, <4 x float> %a1) nounwind {
define double @funcA(i64* nocapture %e) {
; ALL-LABEL: funcA:
-; ALL: ## BB#0: ## %entry
+; ALL: # BB#0: # %entry
; ALL-NEXT: vcvtsi2sdq (%rdi), %xmm0, %xmm0
; ALL-NEXT: retq
entry:
@@ -776,7 +776,7 @@ entry:
define double @funcB(i32* %e) {
; ALL-LABEL: funcB:
-; ALL: ## BB#0: ## %entry
+; ALL: # BB#0: # %entry
; ALL-NEXT: vcvtsi2sdl (%rdi), %xmm0, %xmm0
; ALL-NEXT: retq
entry:
@@ -787,7 +787,7 @@ entry:
define float @funcC(i32* %e) {
; ALL-LABEL: funcC:
-; ALL: ## BB#0: ## %entry
+; ALL: # BB#0: # %entry
; ALL-NEXT: vcvtsi2ssl (%rdi), %xmm0, %xmm0
; ALL-NEXT: retq
entry:
@@ -798,7 +798,7 @@ entry:
define float @i64tof32(i64* %e) {
; ALL-LABEL: i64tof32:
-; ALL: ## BB#0: ## %entry
+; ALL: # BB#0: # %entry
; ALL-NEXT: vcvtsi2ssq (%rdi), %xmm0, %xmm0
; ALL-NEXT: retq
entry:
@@ -809,7 +809,7 @@ entry:
define void @fpext() {
; ALL-LABEL: fpext:
-; ALL: ## BB#0: ## %entry
+; ALL: # BB#0: # %entry
; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; ALL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0
; ALL-NEXT: vmovsd %xmm0, -{{[0-9]+}}(%rsp)
@@ -825,7 +825,7 @@ entry:
define void @fpround_scalar() nounwind uwtable {
; ALL-LABEL: fpround_scalar:
-; ALL: ## BB#0: ## %entry
+; ALL: # BB#0: # %entry
; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; ALL-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0
; ALL-NEXT: vmovss %xmm0, -{{[0-9]+}}(%rsp)
@@ -841,7 +841,7 @@ entry:
define double @long_to_double(i64 %x) {
; ALL-LABEL: long_to_double:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vmovq %rdi, %xmm0
; ALL-NEXT: retq
%res = bitcast i64 %x to double
@@ -850,7 +850,7 @@ define double @long_to_double(i64 %x) {
define i64 @double_to_long(double %x) {
; ALL-LABEL: double_to_long:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vmovq %xmm0, %rax
; ALL-NEXT: retq
%res = bitcast double %x to i64
@@ -859,7 +859,7 @@ define i64 @double_to_long(double %x) {
define float @int_to_float(i32 %x) {
; ALL-LABEL: int_to_float:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vmovd %edi, %xmm0
; ALL-NEXT: retq
%res = bitcast i32 %x to float
@@ -868,7 +868,7 @@ define float @int_to_float(i32 %x) {
define i32 @float_to_int(float %x) {
; ALL-LABEL: float_to_int:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vmovd %xmm0, %eax
; ALL-NEXT: retq
%res = bitcast float %x to i32
@@ -877,7 +877,7 @@ define i32 @float_to_int(float %x) {
define <16 x double> @uitof64(<16 x i32> %a) nounwind {
; NODQ-LABEL: uitof64:
-; NODQ: ## BB#0:
+; NODQ: # BB#0:
; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm2
; NODQ-NEXT: vextracti64x4 $1, %zmm0, %ymm0
; NODQ-NEXT: vcvtudq2pd %ymm0, %zmm1
@@ -885,7 +885,7 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind {
; NODQ-NEXT: retq
;
; DQ-LABEL: uitof64:
-; DQ: ## BB#0:
+; DQ: # BB#0:
; DQ-NEXT: vcvtudq2pd %ymm0, %zmm2
; DQ-NEXT: vextracti32x8 $1, %zmm0, %ymm0
; DQ-NEXT: vcvtudq2pd %ymm0, %zmm1
@@ -896,31 +896,31 @@ define <16 x double> @uitof64(<16 x i32> %a) nounwind {
}
define <8 x double> @uitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind {
; KNL-LABEL: uitof64_mask:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
; KNL-NEXT: retq
;
; VLBW-LABEL: uitof64_mask:
-; VLBW: ## BB#0:
+; VLBW: # BB#0:
; VLBW-NEXT: kmovd %edi, %k1
; VLBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
; VLBW-NEXT: retq
;
; VLNOBW-LABEL: uitof64_mask:
-; VLNOBW: ## BB#0:
+; VLNOBW: # BB#0:
; VLNOBW-NEXT: kmovw %edi, %k1
; VLNOBW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
; VLNOBW-NEXT: retq
;
; AVX512DQ-LABEL: uitof64_mask:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: kmovw %edi, %k1
; AVX512DQ-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: uitof64_mask:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vcvtudq2pd %ymm1, %zmm0 {%k1}
; AVX512BW-NEXT: retq
@@ -931,31 +931,31 @@ define <8 x double> @uitof64_mask(<8 x double> %a, <8 x i32> %b, i8 %c) nounwind
}
define <8 x double> @uitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
; KNL-LABEL: uitof64_maskz:
-; KNL: ## BB#0:
+; KNL: # BB#0:
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
; KNL-NEXT: retq
;
; VLBW-LABEL: uitof64_maskz:
-; VLBW: ## BB#0:
+; VLBW: # BB#0:
; VLBW-NEXT: kmovd %edi, %k1
; VLBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
; VLBW-NEXT: retq
;
; VLNOBW-LABEL: uitof64_maskz:
-; VLNOBW: ## BB#0:
+; VLNOBW: # BB#0:
; VLNOBW-NEXT: kmovw %edi, %k1
; VLNOBW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
; VLNOBW-NEXT: retq
;
; AVX512DQ-LABEL: uitof64_maskz:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: kmovw %edi, %k1
; AVX512DQ-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: uitof64_maskz:
-; AVX512BW: ## BB#0:
+; AVX512BW: # BB#0:
; AVX512BW-NEXT: kmovd %edi, %k1
; AVX512BW-NEXT: vcvtudq2pd %ymm0, %zmm0 {%k1} {z}
; AVX512BW-NEXT: retq
@@ -967,14 +967,14 @@ define <8 x double> @uitof64_maskz(<8 x i32> %a, i8 %b) nounwind {
define <4 x double> @uitof64_256(<4 x i32> %a) nounwind {
; NOVL-LABEL: uitof64_256:
-; NOVL: ## BB#0:
-; NOVL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %YMM0<def>
+; NOVL: # BB#0:
+; NOVL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<def>
; NOVL-NEXT: vcvtudq2pd %ymm0, %zmm0
-; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; NOVL-NEXT: retq
;
; VL-LABEL: uitof64_256:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vcvtudq2pd %xmm0, %ymm0
; VL-NEXT: retq
%b = uitofp <4 x i32> %a to <4 x double>
@@ -983,7 +983,7 @@ define <4 x double> @uitof64_256(<4 x i32> %a) nounwind {
define <16 x float> @uitof32(<16 x i32> %a) nounwind {
; ALL-LABEL: uitof32:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvtudq2ps %zmm0, %zmm0
; ALL-NEXT: retq
%b = uitofp <16 x i32> %a to <16 x float>
@@ -992,14 +992,14 @@ define <16 x float> @uitof32(<16 x i32> %a) nounwind {
define <8 x float> @uitof32_256(<8 x i32> %a) nounwind {
; NOVL-LABEL: uitof32_256:
-; NOVL: ## BB#0:
-; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NOVL: # BB#0:
+; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0
-; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; NOVL-NEXT: retq
;
; VL-LABEL: uitof32_256:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vcvtudq2ps %ymm0, %ymm0
; VL-NEXT: retq
%b = uitofp <8 x i32> %a to <8 x float>
@@ -1008,30 +1008,30 @@ define <8 x float> @uitof32_256(<8 x i32> %a) nounwind {
define <4 x float> @uitof32_128(<4 x i32> %a) nounwind {
; KNL-LABEL: uitof32_128:
-; KNL: ## BB#0:
-; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; KNL: # BB#0:
+; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; KNL-NEXT: vcvtudq2ps %zmm0, %zmm0
-; KNL-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; KNL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; KNL-NEXT: retq
;
; VL-LABEL: uitof32_128:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vcvtudq2ps %xmm0, %xmm0
; VL-NEXT: retq
;
; AVX512DQ-LABEL: uitof32_128:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vcvtudq2ps %zmm0, %zmm0
-; AVX512DQ-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512DQ-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512DQ-NEXT: vzeroupper
; AVX512DQ-NEXT: retq
;
; AVX512BW-LABEL: uitof32_128:
-; AVX512BW: ## BB#0:
-; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
+; AVX512BW: # BB#0:
+; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def>
; AVX512BW-NEXT: vcvtudq2ps %zmm0, %zmm0
-; AVX512BW-NEXT: ## kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
+; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill>
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
%b = uitofp <4 x i32> %a to <4 x float>
@@ -1040,7 +1040,7 @@ define <4 x float> @uitof32_128(<4 x i32> %a) nounwind {
define i32 @fptosi02(float %a) nounwind {
; ALL-LABEL: fptosi02:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvttss2si %xmm0, %eax
; ALL-NEXT: retq
%b = fptosi float %a to i32
@@ -1049,7 +1049,7 @@ define i32 @fptosi02(float %a) nounwind {
define i32 @fptoui02(float %a) nounwind {
; ALL-LABEL: fptoui02:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvttss2usi %xmm0, %eax
; ALL-NEXT: retq
%b = fptoui float %a to i32
@@ -1058,7 +1058,7 @@ define i32 @fptoui02(float %a) nounwind {
define float @uitofp02(i32 %a) nounwind {
; ALL-LABEL: uitofp02:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
; ALL-NEXT: retq
%b = uitofp i32 %a to float
@@ -1067,7 +1067,7 @@ define float @uitofp02(i32 %a) nounwind {
define double @uitofp03(i32 %a) nounwind {
; ALL-LABEL: uitofp03:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
; ALL-NEXT: retq
%b = uitofp i32 %a to double
@@ -1076,7 +1076,7 @@ define double @uitofp03(i32 %a) nounwind {
define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
; NODQ-LABEL: sitofp_16i1_float:
-; NODQ: ## BB#0:
+; NODQ: # BB#0:
; NODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
; NODQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; NODQ-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
@@ -1084,7 +1084,7 @@ define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
; NODQ-NEXT: retq
;
; DQ-LABEL: sitofp_16i1_float:
-; DQ: ## BB#0:
+; DQ: # BB#0:
; DQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
; DQ-NEXT: vpcmpgtd %zmm0, %zmm1, %k0
; DQ-NEXT: vpmovm2d %k0, %zmm0
@@ -1097,7 +1097,7 @@ define <16 x float> @sitofp_16i1_float(<16 x i32> %a) {
define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
; ALL-LABEL: sitofp_16i8_float:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vpmovsxbd %xmm0, %zmm0
; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
; ALL-NEXT: retq
@@ -1107,7 +1107,7 @@ define <16 x float> @sitofp_16i8_float(<16 x i8> %a) {
define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
; ALL-LABEL: sitofp_16i16_float:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vpmovsxwd %ymm0, %zmm0
; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
; ALL-NEXT: retq
@@ -1117,7 +1117,7 @@ define <16 x float> @sitofp_16i16_float(<16 x i16> %a) {
define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
; ALL-LABEL: sitofp_8i16_double:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vpmovsxwd %xmm0, %ymm0
; ALL-NEXT: vcvtdq2pd %ymm0, %zmm0
; ALL-NEXT: retq
@@ -1127,7 +1127,7 @@ define <8 x double> @sitofp_8i16_double(<8 x i16> %a) {
define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
; ALL-LABEL: sitofp_8i8_double:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; ALL-NEXT: vpslld $24, %ymm0, %ymm0
; ALL-NEXT: vpsrad $24, %ymm0, %ymm0
@@ -1139,7 +1139,7 @@ define <8 x double> @sitofp_8i8_double(<8 x i8> %a) {
define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
; NOVLDQ-LABEL: sitofp_16i1_double:
-; NOVLDQ: ## BB#0:
+; NOVLDQ: # BB#0:
; NOVLDQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
; NOVLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
@@ -1152,7 +1152,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sitofp_16i1_double:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2
; VLDQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0
; VLDQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1
@@ -1163,7 +1163,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sitofp_16i1_double:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpxord %zmm2, %zmm2, %zmm2
; VLNODQ-NEXT: vcmpltpd %zmm1, %zmm2, %k1
; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm2, %k2
@@ -1175,7 +1175,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_16i1_double:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vxorpd %zmm2, %zmm2, %zmm2
; AVX512DQ-NEXT: vcmpltpd %zmm1, %zmm2, %k0
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm2, %k1
@@ -1191,7 +1191,7 @@ define <16 x double> @sitofp_16i1_double(<16 x double> %a) {
define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
; NOVLDQ-LABEL: sitofp_8i1_double:
-; NOVLDQ: ## BB#0:
+; NOVLDQ: # BB#0:
; NOVLDQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
; NOVLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
@@ -1200,7 +1200,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sitofp_8i1_double:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1
; VLDQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
; VLDQ-NEXT: vpmovm2d %k0, %ymm0
@@ -1208,7 +1208,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sitofp_8i1_double:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpxord %zmm1, %zmm1, %zmm1
; VLNODQ-NEXT: vcmpltpd %zmm0, %zmm1, %k1
; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
@@ -1217,7 +1217,7 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_8i1_double:
-; AVX512DQ: ## BB#0:
+; AVX512DQ: # BB#0:
; AVX512DQ-NEXT: vxorpd %zmm1, %zmm1, %zmm1
; AVX512DQ-NEXT: vcmpltpd %zmm0, %zmm1, %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
@@ -1230,8 +1230,8 @@ define <8 x double> @sitofp_8i1_double(<8 x double> %a) {
define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
; NOVLDQ-LABEL: sitofp_8i1_float:
-; NOVLDQ: ## BB#0:
-; NOVLDQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NOVLDQ: # BB#0:
+; NOVLDQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NOVLDQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
; NOVLDQ-NEXT: vcmpltps %zmm0, %zmm1, %k1
; NOVLDQ-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
@@ -1240,7 +1240,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
; NOVLDQ-NEXT: retq
;
; VLDQ-LABEL: sitofp_8i1_float:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
; VLDQ-NEXT: vcmpltps %ymm0, %ymm1, %k0
; VLDQ-NEXT: vpmovm2d %k0, %ymm0
@@ -1248,7 +1248,7 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sitofp_8i1_float:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
; VLNODQ-NEXT: vcmpltps %ymm0, %ymm1, %k1
; VLNODQ-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
@@ -1257,8 +1257,8 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
; VLNODQ-NEXT: retq
;
; AVX512DQ-LABEL: sitofp_8i1_float:
-; AVX512DQ: ## BB#0:
-; AVX512DQ-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; AVX512DQ: # BB#0:
+; AVX512DQ-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; AVX512DQ-NEXT: vxorps %ymm1, %ymm1, %ymm1
; AVX512DQ-NEXT: vcmpltps %zmm0, %zmm1, %k0
; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0
@@ -1271,14 +1271,14 @@ define <8 x float> @sitofp_8i1_float(<8 x float> %a) {
define <4 x float> @sitofp_4i1_float(<4 x float> %a) {
; NOVL-LABEL: sitofp_4i1_float:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
; NOVL-NEXT: retq
;
; VLDQ-LABEL: sitofp_4i1_float:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0
; VLDQ-NEXT: vpmovm2d %k0, %xmm0
@@ -1286,7 +1286,7 @@ define <4 x float> @sitofp_4i1_float(<4 x float> %a) {
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sitofp_4i1_float:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
@@ -1300,7 +1300,7 @@ define <4 x float> @sitofp_4i1_float(<4 x float> %a) {
define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
; NOVL-LABEL: sitofp_4i1_double:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; NOVL-NEXT: vcmpltpd %ymm0, %ymm1, %ymm0
; NOVL-NEXT: vpmovqd %zmm0, %ymm0
@@ -1308,7 +1308,7 @@ define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
; NOVL-NEXT: retq
;
; VLDQ-LABEL: sitofp_4i1_double:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vxorpd %ymm1, %ymm1, %ymm1
; VLDQ-NEXT: vcmpltpd %ymm0, %ymm1, %k0
; VLDQ-NEXT: vpmovm2d %k0, %xmm0
@@ -1316,7 +1316,7 @@ define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sitofp_4i1_double:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpxor %ymm1, %ymm1, %ymm1
; VLNODQ-NEXT: vcmpltpd %ymm0, %ymm1, %k1
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
@@ -1330,14 +1330,14 @@ define <4 x double> @sitofp_4i1_double(<4 x double> %a) {
define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
; NOVL-LABEL: sitofp_2i1_float:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vxorps %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vcmpltps %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vcvtdq2ps %xmm0, %xmm0
; NOVL-NEXT: retq
;
; VLDQ-LABEL: sitofp_2i1_float:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vxorps %xmm1, %xmm1, %xmm1
; VLDQ-NEXT: vcmpltps %xmm0, %xmm1, %k0
; VLDQ-NEXT: vpmovm2d %k0, %xmm0
@@ -1345,7 +1345,7 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sitofp_2i1_float:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vcmpltps %xmm0, %xmm1, %k1
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
@@ -1359,7 +1359,7 @@ define <2 x float> @sitofp_2i1_float(<2 x float> %a) {
define <2 x double> @sitofp_2i1_double(<2 x double> %a) {
; NOVL-LABEL: sitofp_2i1_double:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,2,2,3]
@@ -1367,7 +1367,7 @@ define <2 x double> @sitofp_2i1_double(<2 x double> %a) {
; NOVL-NEXT: retq
;
; VLDQ-LABEL: sitofp_2i1_double:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vxorpd %xmm1, %xmm1, %xmm1
; VLDQ-NEXT: vcmpltpd %xmm0, %xmm1, %k0
; VLDQ-NEXT: vpmovm2q %k0, %xmm0
@@ -1375,7 +1375,7 @@ define <2 x double> @sitofp_2i1_double(<2 x double> %a) {
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: sitofp_2i1_double:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vcmpltpd %xmm0, %xmm1, %k1
; VLNODQ-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
@@ -1393,7 +1393,7 @@ define <2 x double> @sitofp_2i1_double(<2 x double> %a) {
define <16 x float> @uitofp_16i8(<16 x i8>%a) {
; ALL-LABEL: uitofp_16i8:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
; ALL-NEXT: retq
@@ -1403,7 +1403,7 @@ define <16 x float> @uitofp_16i8(<16 x i8>%a) {
define <16 x float> @uitofp_16i16(<16 x i16>%a) {
; ALL-LABEL: uitofp_16i16:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
; ALL-NEXT: vcvtdq2ps %zmm0, %zmm0
; ALL-NEXT: retq
@@ -1413,7 +1413,7 @@ define <16 x float> @uitofp_16i16(<16 x i16>%a) {
define <16 x float> @uitofp_16i1_float(<16 x i32> %a) {
; ALL-LABEL: uitofp_16i1_float:
-; ALL: ## BB#0:
+; ALL: # BB#0:
; ALL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; ALL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; ALL-NEXT: vpbroadcastd {{.*}}(%rip), %zmm0 {%k1} {z}
@@ -1426,7 +1426,7 @@ define <16 x float> @uitofp_16i1_float(<16 x i32> %a) {
define <16 x double> @uitofp_16i1_double(<16 x i32> %a) {
; NOVL-LABEL: uitofp_16i1_double:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; NOVL-NEXT: movq {{.*}}(%rip), %rax
@@ -1440,7 +1440,7 @@ define <16 x double> @uitofp_16i1_double(<16 x i32> %a) {
; NOVL-NEXT: retq
;
; VL-LABEL: uitofp_16i1_double:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vpxord %zmm1, %zmm1, %zmm1
; VL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; VL-NEXT: movl {{.*}}(%rip), %eax
@@ -1457,18 +1457,18 @@ define <16 x double> @uitofp_16i1_double(<16 x i32> %a) {
define <8 x float> @uitofp_8i1_float(<8 x i32> %a) {
; NOVL-LABEL: uitofp_8i1_float:
-; NOVL: ## BB#0:
-; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NOVL: # BB#0:
+; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NOVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
; NOVL-NEXT: vpmovqd %zmm0, %ymm0
; NOVL-NEXT: vcvtudq2ps %zmm0, %zmm0
-; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
+; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill>
; NOVL-NEXT: retq
;
; VL-LABEL: uitofp_8i1_float:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
@@ -1481,8 +1481,8 @@ define <8 x float> @uitofp_8i1_float(<8 x i32> %a) {
define <8 x double> @uitofp_8i1_double(<8 x i32> %a) {
; NOVL-LABEL: uitofp_8i1_double:
-; NOVL: ## BB#0:
-; NOVL-NEXT: ## kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
+; NOVL: # BB#0:
+; NOVL-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def>
; NOVL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; NOVL-NEXT: vpcmpgtd %zmm0, %zmm1, %k1
; NOVL-NEXT: vpbroadcastq {{.*}}(%rip), %zmm0 {%k1} {z}
@@ -1491,7 +1491,7 @@ define <8 x double> @uitofp_8i1_double(<8 x i32> %a) {
; NOVL-NEXT: retq
;
; VL-LABEL: uitofp_8i1_double:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vpxor %ymm1, %ymm1, %ymm1
; VL-NEXT: vpcmpgtd %ymm0, %ymm1, %k1
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %ymm0 {%k1} {z}
@@ -1504,7 +1504,7 @@ define <8 x double> @uitofp_8i1_double(<8 x i32> %a) {
define <4 x float> @uitofp_4i1_float(<4 x i32> %a) {
; NOVL-LABEL: uitofp_4i1_float:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
@@ -1512,7 +1512,7 @@ define <4 x float> @uitofp_4i1_float(<4 x i32> %a) {
; NOVL-NEXT: retq
;
; VL-LABEL: uitofp_4i1_float:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
@@ -1525,7 +1525,7 @@ define <4 x float> @uitofp_4i1_float(<4 x i32> %a) {
define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
; NOVL-LABEL: uitofp_4i1_double:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
; NOVL-NEXT: vpsrld $31, %xmm0, %xmm0
@@ -1533,7 +1533,7 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
; NOVL-NEXT: retq
;
; VL-LABEL: uitofp_4i1_double:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VL-NEXT: vpcmpgtd %xmm0, %xmm1, %k1
; VL-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
@@ -1546,7 +1546,7 @@ define <4 x double> @uitofp_4i1_double(<4 x i32> %a) {
define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; NOVL-LABEL: uitofp_2i1_float:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
@@ -1562,7 +1562,7 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
; NOVL-NEXT: retq
;
; VL-LABEL: uitofp_2i1_float:
-; VL: ## BB#0:
+; VL: # BB#0:
; VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; VL-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
@@ -1576,7 +1576,7 @@ define <2 x float> @uitofp_2i1_float(<2 x i32> %a) {
define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
; NOVL-LABEL: uitofp_2i1_double:
-; NOVL: ## BB#0:
+; NOVL: # BB#0:
; NOVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; NOVL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; NOVL-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808]
@@ -1586,7 +1586,7 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
; NOVL-NEXT: retq
;
; VLDQ-LABEL: uitofp_2i1_double:
-; VLDQ: ## BB#0:
+; VLDQ: # BB#0:
; VLDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLDQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; VLDQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
@@ -1595,7 +1595,7 @@ define <2 x double> @uitofp_2i1_double(<2 x i32> %a) {
; VLDQ-NEXT: retq
;
; VLNODQ-LABEL: uitofp_2i1_double:
-; VLNODQ: ## BB#0:
+; VLNODQ: # BB#0:
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
; VLNODQ-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3]
; VLNODQ-NEXT: vpcmpltuq %xmm1, %xmm0, %k1
diff --git a/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll b/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
index b13965a30ed8..bbe31c5c2ac5 100644
--- a/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
+++ b/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll
@@ -1203,3 +1203,35 @@ define <8 x double> @f8xf64_f256(<8 x double> %a) {
ret <8 x double> %res2
}
+
+
+; ALL: .LCPI38
+; ALL-NEXT: .long 4290379776 # 0xffba0000
+
+; AVX: .LCPI38
+; AVX-NEXT: .long 4290379776 # float NaN
+
+define <8 x i16> @f8xi16_i32_NaN(<8 x i16> %a) {
+; ALL32-LABEL: f8xi16_i32_NaN:
+; ALL32: # BB#0:
+; ALL32-NEXT: vpbroadcastd {{\.LCPI.*}}, %xmm1
+; ALL32-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; ALL32-NEXT: vpand %xmm1, %xmm0, %xmm0
+; ALL32-NEXT: retl
+;
+; ALL64-LABEL: f8xi16_i32_NaN:
+; ALL64: # BB#0:
+; ALL64-NEXT: vpbroadcastd {{.*}}(%rip), %xmm1
+; ALL64-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; ALL64-NEXT: vpand %xmm1, %xmm0, %xmm0
+; ALL64-NEXT: retq
+;
+; AVX-LABEL: f8xi16_i32_NaN:
+; AVX: # BB#0:
+; AVX-NEXT: vbroadcastss {{\.LCPI.*}}, %xmm1
+; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0
+ %res1 = add <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %a
+ %res2 = and <8 x i16> <i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70, i16 0, i16 -70>, %res1
+ ret <8 x i16> %res2
+}
diff --git a/test/CodeGen/X86/clear_upper_vector_element_bits.ll b/test/CodeGen/X86/clear_upper_vector_element_bits.ll
index ae0f4406ba0d..1218b68b1be4 100644
--- a/test/CodeGen/X86/clear_upper_vector_element_bits.ll
+++ b/test/CodeGen/X86/clear_upper_vector_element_bits.ll
@@ -405,12 +405,7 @@ define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind {
;
; AVX-LABEL: _clearupper16xi8a:
; AVX: # BB#0:
-; AVX-NEXT: vpextrb $0, %xmm0, %eax
-; AVX-NEXT: vpextrb $1, %xmm0, %ecx
-; AVX-NEXT: vmovd %eax, %xmm1
-; AVX-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1
-; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5,6,7]
-; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: retq
%x0 = extractelement <16 x i8> %0, i32 0
%x1 = extractelement <16 x i8> %0, i32 1
@@ -575,39 +570,10 @@ define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind {
; SSE-NEXT: pand %xmm2, %xmm1
; SSE-NEXT: retq
;
-; AVX1-LABEL: _clearupper32xi8a:
-; AVX1: # BB#0:
-; AVX1-NEXT: vpextrb $0, %xmm0, %eax
-; AVX1-NEXT: vpextrb $1, %xmm0, %ecx
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
-; AVX1-NEXT: vpextrb $0, %xmm1, %edx
-; AVX1-NEXT: vpextrb $1, %xmm1, %esi
-; AVX1-NEXT: vmovd %edx, %xmm2
-; AVX1-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7]
-; AVX1-NEXT: vmovd %eax, %xmm2
-; AVX1-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2
-; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7]
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
-; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
-; AVX1-NEXT: retq
-;
-; AVX2-LABEL: _clearupper32xi8a:
-; AVX2: # BB#0:
-; AVX2-NEXT: vpextrb $0, %xmm0, %eax
-; AVX2-NEXT: vpextrb $1, %xmm0, %ecx
-; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1
-; AVX2-NEXT: vpextrb $0, %xmm1, %edx
-; AVX2-NEXT: vpextrb $1, %xmm1, %esi
-; AVX2-NEXT: vmovd %edx, %xmm2
-; AVX2-NEXT: vpinsrb $1, %esi, %xmm2, %xmm2
-; AVX2-NEXT: vpblendw {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3,4,5,6,7]
-; AVX2-NEXT: vmovd %eax, %xmm2
-; AVX2-NEXT: vpinsrb $1, %ecx, %xmm2, %xmm2
-; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5,6,7]
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0
-; AVX2-NEXT: retq
+; AVX-LABEL: _clearupper32xi8a:
+; AVX: # BB#0:
+; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0
+; AVX-NEXT: retq
%x0 = extractelement <32 x i8> %0, i32 0
%x1 = extractelement <32 x i8> %0, i32 1
%x2 = extractelement <32 x i8> %0, i32 2
diff --git a/test/CodeGen/X86/scavenger.mir b/test/CodeGen/X86/scavenger.mir
new file mode 100644
index 000000000000..8d97aeb22cb9
--- /dev/null
+++ b/test/CodeGen/X86/scavenger.mir
@@ -0,0 +1,54 @@
+# RUN: llc -mtriple=i386-- -run-pass scavenger-test -verify-machineinstrs -o - %s | FileCheck %s
+---
+# CHECK-LABEL: name: func0
+name: func0
+tracksRegLiveness: true
+body: |
+ bb.0:
+ %0 : gr32 = MOV32ri 42
+ %ebp = COPY %0
+...
+---
+# CHECK-LABEL: name: func2
+name: func2
+tracksRegLiveness: true
+body: |
+ bb.0:
+ ; CHECK-NOT: %eax = MOV32ri 42
+ ; CHECK: [[REG0:%e[a-z]+]] = MOV32ri 42
+ ; CHECK: %ebp = COPY [[REG0]]
+ %eax = MOV32ri 13
+ %0 : gr32 = MOV32ri 42
+ %ebp = COPY %0
+
+ ; CHECK: [[REG1:%e[a-z]+]] = MOV32ri 23
+ ; CHECK: [[REG2:%e[a-z]+]] = MOV32ri 7
+ ; CHECK: [[REG1]] = ADD32ri8 [[REG1]], 5, implicit-def dead %eflags
+ %1 : gr32 = MOV32ri 23
+ %2 : gr32 = MOV32ri 7
+ %1 = ADD32ri8 %1, 5, implicit-def dead %eflags
+
+ NOOP implicit %ebp
+
+ ; CHECK: NOOP implicit [[REG2]]
+ ; CHECK: NOOP implicit [[REG1]]
+ NOOP implicit %2
+ NOOP implicit %1
+ RETQ %eax
+...
+---
+# Defs without uses are currently broken
+#name: func3
+#tracksRegLiveness: true
+#body: |
+# bb.0:
+# dead %0 : gr32 = MOV32ri 42
+...
+---
+# Uses without defs are currently broken (and honestly not that useful).
+#name: func3
+#tracksRegLiveness: true
+#body: |
+# bb.0:
+# NOOP undef implicit %0 : gr32
+...
diff --git a/test/CodeGen/X86/select.ll b/test/CodeGen/X86/select.ll
index 1afef86a5f11..7c2937936313 100644
--- a/test/CodeGen/X86/select.ll
+++ b/test/CodeGen/X86/select.ll
@@ -15,6 +15,7 @@ define i32 @test1(%0* %p, %0* %q, i1 %r) nounwind {
; CHECK-NEXT: cmovneq %rdi, %rsi
; CHECK-NEXT: movl (%rsi), %eax
; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
;
; MCU-LABEL: test1:
; MCU: # BB#0:
@@ -55,6 +56,7 @@ define i32 @test2() nounwind {
; CHECK-NEXT: popq %rcx
; CHECK-NEXT: retq
; CHECK-NEXT: LBB1_1: ## %bb90
+; CHECK-NEXT: ## -- End function
;
; MCU-LABEL: test2:
; MCU: # BB#0: # %entry
@@ -100,6 +102,7 @@ define float @test3(i32 %x) nounwind readnone {
; CHECK-NEXT: leaq {{.*}}(%rip), %rcx
; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
;
; MCU-LABEL: test3:
; MCU: # BB#0: # %entry
@@ -123,6 +126,7 @@ define signext i8 @test4(i8* nocapture %P, double %F) nounwind readonly {
; CHECK-NEXT: seta %al
; CHECK-NEXT: movsbl (%rdi,%rax,4), %eax
; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
;
; MCU-LABEL: test4:
; MCU: # BB#0: # %entry
@@ -157,6 +161,7 @@ define void @test5(i1 %c, <2 x i16> %a, <2 x i16> %b, <2 x i16>* %p) nounwind {
; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; CHECK-NEXT: movd %xmm0, (%rsi)
; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
;
; MCU-LABEL: test5:
; MCU: # BB#0:
@@ -196,6 +201,7 @@ define void @test6(i32 %C, <4 x float>* %A, <4 x float>* %B) nounwind {
; CHECK-NEXT: mulps %xmm0, %xmm0
; CHECK-NEXT: movaps %xmm0, (%rsi)
; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
;
; MCU-LABEL: test6:
; MCU: # BB#0:
@@ -267,6 +273,7 @@ define x86_fp80 @test7(i32 %tmp8) nounwind {
; CHECK-NEXT: leaq {{.*}}(%rip), %rcx
; CHECK-NEXT: fldt (%rax,%rcx)
; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
;
; MCU-LABEL: test7:
; MCU: # BB#0:
@@ -319,6 +326,7 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
; GENERIC-NEXT: movq %xmm0, 16(%rsi)
; GENERIC-NEXT: movdqa %xmm1, (%rsi)
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test8:
; ATOM: ## BB#0:
@@ -358,6 +366,7 @@ define void @test8(i1 %c, <6 x i32>* %dst.addr, <6 x i32> %src1,<6 x i32> %src2)
; ATOM-NEXT: movq %xmm0, 16(%rsi)
; ATOM-NEXT: movdqa %xmm1, (%rsi)
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test8:
; MCU: # BB#0:
@@ -448,6 +457,7 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; GENERIC-NEXT: sbbq %rax, %rax
; GENERIC-NEXT: orq %rsi, %rax
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test9:
; ATOM: ## BB#0:
@@ -457,6 +467,7 @@ define i64 @test9(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test9:
; MCU: # BB#0:
@@ -483,6 +494,7 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; GENERIC-NEXT: sbbq %rax, %rax
; GENERIC-NEXT: orq %rsi, %rax
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test9a:
; ATOM: ## BB#0:
@@ -492,6 +504,7 @@ define i64 @test9a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test9a:
; MCU: # BB#0:
@@ -516,6 +529,7 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; GENERIC-NEXT: sbbq %rax, %rax
; GENERIC-NEXT: orq %rsi, %rax
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test9b:
; ATOM: ## BB#0:
@@ -525,6 +539,7 @@ define i64 @test9b(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test9b:
; MCU: # BB#0:
@@ -552,6 +567,7 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; GENERIC-NEXT: sbbq %rax, %rax
; GENERIC-NEXT: orq $1, %rax
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test10:
; ATOM: ## BB#0:
@@ -561,6 +577,7 @@ define i64 @test10(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test10:
; MCU: # BB#0:
@@ -586,6 +603,7 @@ define i64 @test11(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK-NEXT: notq %rax
; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
;
; MCU-LABEL: test11:
; MCU: # BB#0:
@@ -612,6 +630,7 @@ define i64 @test11a(i64 %x, i64 %y) nounwind readnone ssp noredzone {
; CHECK-NEXT: notq %rax
; CHECK-NEXT: orq %rsi, %rax
; CHECK-NEXT: retq
+; CHECK-NEXT: ## -- End function
;
; MCU-LABEL: test11a:
; MCU: # BB#0:
@@ -641,6 +660,7 @@ define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
; GENERIC-NEXT: movq $-1, %rdi
; GENERIC-NEXT: cmovnoq %rax, %rdi
; GENERIC-NEXT: jmp __Znam ## TAILCALL
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test12:
; ATOM: ## BB#0: ## %entry
@@ -650,6 +670,7 @@ define noalias i8* @test12(i64 %count) nounwind ssp noredzone {
; ATOM-NEXT: movq $-1, %rdi
; ATOM-NEXT: cmovnoq %rax, %rdi
; ATOM-NEXT: jmp __Znam ## TAILCALL
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test12:
; MCU: # BB#0: # %entry
@@ -700,6 +721,7 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
; GENERIC-NEXT: cmpl %esi, %edi
; GENERIC-NEXT: sbbl %eax, %eax
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test13:
; ATOM: ## BB#0:
@@ -710,6 +732,7 @@ define i32 @test13(i32 %a, i32 %b) nounwind {
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test13:
; MCU: # BB#0:
@@ -728,6 +751,7 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
; GENERIC-NEXT: sbbl %eax, %eax
; GENERIC-NEXT: notl %eax
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test14:
; ATOM: ## BB#0:
@@ -737,6 +761,7 @@ define i32 @test14(i32 %a, i32 %b) nounwind {
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test14:
; MCU: # BB#0:
@@ -756,6 +781,7 @@ define i32 @test15(i32 %x) nounwind {
; GENERIC-NEXT: negl %edi
; GENERIC-NEXT: sbbl %eax, %eax
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test15:
; ATOM: ## BB#0: ## %entry
@@ -766,6 +792,7 @@ define i32 @test15(i32 %x) nounwind {
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test15:
; MCU: # BB#0: # %entry
@@ -817,6 +844,7 @@ define i16 @test17(i16 %x) nounwind {
; GENERIC-NEXT: negw %di
; GENERIC-NEXT: sbbw %ax, %ax
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test17:
; ATOM: ## BB#0: ## %entry
@@ -827,6 +855,7 @@ define i16 @test17(i16 %x) nounwind {
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test17:
; MCU: # BB#0: # %entry
@@ -846,6 +875,7 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
; GENERIC-NEXT: cmovgel %edx, %esi
; GENERIC-NEXT: movl %esi, %eax
; GENERIC-NEXT: retq
+; GENERIC-NEXT: ## -- End function
;
; ATOM-LABEL: test18:
; ATOM: ## BB#0:
@@ -855,6 +885,7 @@ define i8 @test18(i32 %x, i8 zeroext %a, i8 zeroext %b) nounwind {
; ATOM-NEXT: nop
; ATOM-NEXT: nop
; ATOM-NEXT: retq
+; ATOM-NEXT: ## -- End function
;
; MCU-LABEL: test18:
; MCU: # BB#0:
diff --git a/test/CodeGen/X86/shrink-compare.ll b/test/CodeGen/X86/shrink-compare.ll
index 41f5d2d5be23..7f35258377ec 100644
--- a/test/CodeGen/X86/shrink-compare.ll
+++ b/test/CodeGen/X86/shrink-compare.ll
@@ -1,8 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
declare void @bar()
define void @test1(i32* nocapture %X) nounwind minsize {
+; CHECK-LABEL: test1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $47, (%rdi)
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%tmp1 = load i32, i32* %X, align 4
%and = and i32 %tmp1, 255
@@ -15,11 +22,15 @@ if.then:
if.end:
ret void
-; CHECK-LABEL: test1:
-; CHECK: cmpb $47, (%{{rdi|rcx}})
}
define void @test2(i32 %X) nounwind minsize {
+; CHECK-LABEL: test2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $47, %dil
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%and = and i32 %X, 255
%cmp = icmp eq i32 %and, 47
@@ -31,11 +42,15 @@ if.then:
if.end:
ret void
-; CHECK-LABEL: test2:
-; CHECK: cmpb $47, %{{dil|cl}}
}
define void @test3(i32 %X) nounwind minsize {
+; CHECK-LABEL: test3:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $-1, %dil
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%and = and i32 %X, 255
%cmp = icmp eq i32 %and, 255
@@ -47,12 +62,22 @@ if.then:
if.end:
ret void
-; CHECK-LABEL: test3:
-; CHECK: cmpb $-1, %{{dil|cl}}
}
; PR16083
define i1 @test4(i64 %a, i32 %b) {
+; CHECK-LABEL: test4:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: testl %esi, %esi
+; CHECK-NEXT: je .LBB3_1
+; CHECK-NEXT: # BB#2: # %lor.end
+; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB3_1: # %lor.rhs
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: # kill: %AL<def> %AL<kill> %EAX<kill>
+; CHECK-NEXT: retq
entry:
%tobool = icmp ne i32 %b, 0
br i1 %tobool, label %lor.end, label %lor.rhs
@@ -71,6 +96,16 @@ lor.end: ; preds = %lor.rhs, %entry
; PR16551
define void @test5(i32 %X) nounwind minsize {
+; CHECK-LABEL: test5:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movzbl x+{{.*}}(%rip), %eax
+; CHECK-NEXT: shll $16, %eax
+; CHECK-NEXT: movzwl x+{{.*}}(%rip), %ecx
+; CHECK-NEXT: orl %eax, %ecx
+; CHECK-NEXT: cmpl $1, %ecx
+; CHECK-NEXT: jne bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%bf.load = load i56, i56* bitcast ({ i8, i8, i8, i8, i8, i8, i8, i8 }* @x to i56*), align 4
%bf.lshr = lshr i56 %bf.load, 32
@@ -84,17 +119,16 @@ if.then:
if.end:
ret void
-
-; CHECK-LABEL: test5:
-; CHECK-NOT: cmpl $1,{{.*}}x+4
-; CHECK: ret
}
-; CHECK-LABEL: test2_1:
-; CHECK: movzbl
-; CHECK: cmpl $256
-; CHECK: je bar
define void @test2_1(i32 %X) nounwind minsize {
+; CHECK-LABEL: test2_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movzbl %dil, %eax
+; CHECK-NEXT: cmpl $256, %eax # imm = 0x100
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%and = and i32 %X, 255
%cmp = icmp eq i32 %and, 256
@@ -108,9 +142,13 @@ if.end:
ret void
}
-; CHECK-LABEL: test_sext_i8_icmp_1:
-; CHECK: cmpb $1, %{{dil|cl}}
define void @test_sext_i8_icmp_1(i8 %x) nounwind minsize {
+; CHECK-LABEL: test_sext_i8_icmp_1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $1, %dil
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, 1
@@ -124,9 +162,13 @@ if.end:
ret void
}
-; CHECK-LABEL: test_sext_i8_icmp_47:
-; CHECK: cmpb $47, %{{dil|cl}}
define void @test_sext_i8_icmp_47(i8 %x) nounwind minsize {
+; CHECK-LABEL: test_sext_i8_icmp_47:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $47, %dil
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, 47
@@ -140,9 +182,13 @@ if.end:
ret void
}
-; CHECK-LABEL: test_sext_i8_icmp_127:
-; CHECK: cmpb $127, %{{dil|cl}}
define void @test_sext_i8_icmp_127(i8 %x) nounwind minsize {
+; CHECK-LABEL: test_sext_i8_icmp_127:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $127, %dil
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, 127
@@ -156,9 +202,13 @@ if.end:
ret void
}
-; CHECK-LABEL: test_sext_i8_icmp_neg1:
-; CHECK: cmpb $-1, %{{dil|cl}}
define void @test_sext_i8_icmp_neg1(i8 %x) nounwind minsize {
+; CHECK-LABEL: test_sext_i8_icmp_neg1:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $-1, %dil
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, -1
@@ -172,9 +222,13 @@ if.end:
ret void
}
-; CHECK-LABEL: test_sext_i8_icmp_neg2:
-; CHECK: cmpb $-2, %{{dil|cl}}
define void @test_sext_i8_icmp_neg2(i8 %x) nounwind minsize {
+; CHECK-LABEL: test_sext_i8_icmp_neg2:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $-2, %dil
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, -2
@@ -188,9 +242,13 @@ if.end:
ret void
}
-; CHECK-LABEL: test_sext_i8_icmp_neg127:
-; CHECK: cmpb $-127, %{{dil|cl}}
define void @test_sext_i8_icmp_neg127(i8 %x) nounwind minsize {
+; CHECK-LABEL: test_sext_i8_icmp_neg127:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $-127, %dil
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, -127
@@ -204,9 +262,13 @@ if.end:
ret void
}
-; CHECK-LABEL: test_sext_i8_icmp_neg128:
-; CHECK: cmpb $-128, %{{dil|cl}}
define void @test_sext_i8_icmp_neg128(i8 %x) nounwind minsize {
+; CHECK-LABEL: test_sext_i8_icmp_neg128:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: cmpb $-128, %dil
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, -128
@@ -220,11 +282,14 @@ if.end:
ret void
}
-; CHECK-LABEL: test_sext_i8_icmp_255:
-; CHECK: movb $1,
-; CHECK: testb
-; CHECK: je bar
define void @test_sext_i8_icmp_255(i8 %x) nounwind minsize {
+; CHECK-LABEL: test_sext_i8_icmp_255:
+; CHECK: # BB#0: # %entry
+; CHECK-NEXT: movb $1, %al
+; CHECK-NEXT: testb %al, %al
+; CHECK-NEXT: je bar # TAILCALL
+; CHECK-NEXT: # BB#1: # %if.end
+; CHECK-NEXT: retq
entry:
%sext = sext i8 %x to i32
%cmp = icmp eq i32 %sext, 255
diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll
index 6d51fb54f8b8..79b949a6ccb1 100644
--- a/test/CodeGen/X86/sse3.ll
+++ b/test/CodeGen/X86/sse3.ll
@@ -14,6 +14,7 @@ define void @t0(<8 x i16>* %dest, <8 x i16>* %old) nounwind {
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
; X64-NEXT: movdqa %xmm0, (%rdi)
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
entry:
%tmp3 = load <8 x i16>, <8 x i16>* %old
%tmp6 = shufflevector <8 x i16> %tmp3,
@@ -32,6 +33,7 @@ define <8 x i16> @t1(<8 x i16>* %A, <8 x i16>* %B) nounwind {
; X64-NEXT: andps (%rdi), %xmm0
; X64-NEXT: orps %xmm1, %xmm0
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
%tmp1 = load <8 x i16>, <8 x i16>* %A
%tmp2 = load <8 x i16>, <8 x i16>* %B
%tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> < i32 8, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
@@ -48,6 +50,7 @@ define <8 x i16> @t2(<8 x i16> %A, <8 x i16> %B) nounwind {
; X64-NEXT: pandn %xmm1, %xmm2
; X64-NEXT: por %xmm2, %xmm0
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 9, i32 1, i32 2, i32 9, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp
}
@@ -61,6 +64,7 @@ define <8 x i16> @t3(<8 x i16> %A, <8 x i16> %B) nounwind {
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,3,2,1,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4]
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
%tmp = shufflevector <8 x i16> %A, <8 x i16> %A, <8 x i32> < i32 8, i32 3, i32 2, i32 13, i32 7, i32 6, i32 5, i32 4 >
ret <8 x i16> %tmp
}
@@ -73,6 +77,7 @@ define <8 x i16> @t4(<8 x i16> %A, <8 x i16> %B) nounwind {
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,0]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,7,4,7]
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 7, i32 2, i32 3, i32 1, i32 5, i32 6, i32 5 >
ret <8 x i16> %tmp
}
@@ -83,6 +88,7 @@ define <8 x i16> @t5(<8 x i16> %A, <8 x i16> %B) nounwind {
; X64-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 0, i32 1, i32 10, i32 11, i32 2, i32 3 >
ret <8 x i16> %tmp
}
@@ -92,6 +98,7 @@ define <8 x i16> @t6(<8 x i16> %A, <8 x i16> %B) nounwind {
; X64: ## BB#0:
; X64-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 8, i32 9, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7 >
ret <8 x i16> %tmp
}
@@ -102,6 +109,7 @@ define <8 x i16> @t7(<8 x i16> %A, <8 x i16> %B) nounwind {
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,3,2,4,5,6,7]
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,4,7]
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
%tmp = shufflevector <8 x i16> %A, <8 x i16> %B, <8 x i32> < i32 0, i32 0, i32 3, i32 2, i32 4, i32 6, i32 4, i32 7 >
ret <8 x i16> %tmp
}
@@ -113,6 +121,7 @@ define void @t8(<2 x i64>* %res, <2 x i64>* %A) nounwind {
; X64-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,4,7]
; X64-NEXT: movdqa %xmm0, (%rdi)
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
%tmp = load <2 x i64>, <2 x i64>* %A
%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16>
%tmp0 = extractelement <8 x i16> %tmp.upgrd.1, i32 0
@@ -143,6 +152,7 @@ define void @t9(<4 x float>* %r, <2 x i32>* %A) nounwind {
; X64-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
; X64-NEXT: movapd %xmm0, (%rdi)
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
%tmp = load <4 x float>, <4 x float>* %r
%tmp.upgrd.3 = bitcast <2 x i32>* %A to double*
%tmp.upgrd.4 = load double, double* %tmp.upgrd.3
@@ -179,6 +189,7 @@ define void @t10() nounwind {
; X64-NEXT: movq _g2@{{.*}}(%rip), %rax
; X64-NEXT: movq %xmm0, (%rax)
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
load <4 x i32>, <4 x i32>* @g1, align 16
bitcast <4 x i32> %1 to <8 x i16>
shufflevector <8 x i16> %2, <8 x i16> undef, <8 x i32> < i32 0, i32 2, i32 4, i32 6, i32 undef, i32 undef, i32 undef, i32 undef >
@@ -196,6 +207,7 @@ define <8 x i16> @t11(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-NEXT: psrld $16, %xmm0
; X64-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
entry:
%tmp7 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 1, i32 8, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
ret <8 x i16> %tmp7
@@ -209,6 +221,7 @@ define <8 x i16> @t12(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
entry:
%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 0, i32 1, i32 undef, i32 undef, i32 3, i32 11, i32 undef , i32 undef >
ret <8 x i16> %tmp9
@@ -222,6 +235,7 @@ define <8 x i16> @t13(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,2,2,3,4,5,6,7]
; X64-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,3,3]
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
entry:
%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 11, i32 3, i32 undef , i32 undef >
ret <8 x i16> %tmp9
@@ -234,6 +248,7 @@ define <8 x i16> @t14(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
entry:
%tmp9 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 2, i32 undef , i32 undef >
ret <8 x i16> %tmp9
@@ -247,6 +262,7 @@ define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
; X64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7]
; X64-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
entry:
%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
ret <8 x i16> %tmp8
@@ -260,6 +276,7 @@ define <16 x i8> @t16(<16 x i8> %T0) nounwind readnone {
; X64-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
; X64-NEXT: movdqa %xmm1, %xmm0
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
entry:
%tmp8 = shufflevector <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 1, i8 1, i8 1, i8 1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
%tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 2, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
@@ -275,6 +292,7 @@ define <4 x i32> @t17() nounwind {
; X64-NEXT: pxor %xmm1, %xmm1
; X64-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; X64-NEXT: retq
+; X64-NEXT: ## -- End function
entry:
%tmp1 = load <4 x float>, <4 x float>* undef, align 16
%tmp2 = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
diff --git a/test/CodeGen/X86/stack-folding-fp-avx1.ll b/test/CodeGen/X86/stack-folding-fp-avx1.ll
index a00d47bb13e9..f937d484ce0d 100644
--- a/test/CodeGen/X86/stack-folding-fp-avx1.ll
+++ b/test/CodeGen/X86/stack-folding-fp-avx1.ll
@@ -1926,5 +1926,19 @@ define <8 x float> @stack_fold_xorps_ymm(<8 x float> %a0, <8 x float> %a1) {
ret <8 x float> %6
}
+define <4 x float> @stack_nofold_insertps(<8 x float> %a0, <8 x float> %a1) {
+; Cannot fold this without changing the immediate.
+; CHECK-LABEL: stack_nofold_insertps
+; CHECK: 32-byte Spill
+; CHECK: nop
+; CHECK: 32-byte Reload
+; CHECK: vinsertps $179, {{%xmm., %xmm., %xmm.}}
+ %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
+ %v0 = shufflevector <8 x float> %a0, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %v1 = shufflevector <8 x float> %a1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v0, <4 x float> %v1, i8 179)
+ ret <4 x float> %res
+}
+
attributes #0 = { "unsafe-fp-math"="false" }
attributes #1 = { "unsafe-fp-math"="true" }
diff --git a/test/CodeGen/X86/statepoint-allocas.ll b/test/CodeGen/X86/statepoint-allocas.ll
index 9f5418432abc..b8e5c82913a5 100644
--- a/test/CodeGen/X86/statepoint-allocas.ll
+++ b/test/CodeGen/X86/statepoint-allocas.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; Check that we can lower a use of an alloca both as a deopt value (where the
; exact meaning is up to the consumer of the stackmap) and as an explicit spill
; slot used for GC.
diff --git a/test/CodeGen/X86/statepoint-call-lowering.ll b/test/CodeGen/X86/statepoint-call-lowering.ll
index 6e5cdd605122..bd2dd53b654a 100644
--- a/test/CodeGen/X86/statepoint-call-lowering.ll
+++ b/test/CodeGen/X86/statepoint-call-lowering.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; This file contains a collection of basic tests to ensure we didn't
; screw up normal call lowering when there are no deopt or gc arguments.
diff --git a/test/CodeGen/X86/statepoint-far-call.ll b/test/CodeGen/X86/statepoint-far-call.ll
index dc49061f6461..9f9b684efae8 100644
--- a/test/CodeGen/X86/statepoint-far-call.ll
+++ b/test/CodeGen/X86/statepoint-far-call.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; Test to check that Statepoints with X64 far-immediate targets
; are lowered correctly to an indirect call via a scratch register.
diff --git a/test/CodeGen/X86/statepoint-forward.ll b/test/CodeGen/X86/statepoint-forward.ll
index d97bc0c75602..bee4b5ac884e 100644
--- a/test/CodeGen/X86/statepoint-forward.ll
+++ b/test/CodeGen/X86/statepoint-forward.ll
@@ -1,5 +1,5 @@
; RUN: opt -O3 -S < %s | FileCheck --check-prefix=CHECK-OPT %s
-; RUN: llc < %s | FileCheck --check-prefix=CHECK-LLC %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck --check-prefix=CHECK-LLC %s
; These tests are targetted at making sure we don't retain information
; about memory which contains potential gc references across a statepoint.
; They're carefully written to only outlaw forwarding of references.
diff --git a/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
index 11dbe9e2e6c1..b88ca03805f2 100644
--- a/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
+++ b/test/CodeGen/X86/statepoint-gctransition-call-lowering.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; This file contains a collection of basic tests to ensure we didn't
; screw up normal call lowering when a statepoint is a GC transition.
diff --git a/test/CodeGen/X86/statepoint-invoke.ll b/test/CodeGen/X86/statepoint-invoke.ll
index 3e8b8ca49f1d..29f8e3ed4f78 100644
--- a/test/CodeGen/X86/statepoint-invoke.ll
+++ b/test/CodeGen/X86/statepoint-invoke.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s 2>&1 | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s 2>&1 | FileCheck %s
target triple = "x86_64-pc-linux-gnu"
diff --git a/test/CodeGen/X86/statepoint-live-in.ll b/test/CodeGen/X86/statepoint-live-in.ll
index abe2b0a7acc8..aaa4d7c8422a 100644
--- a/test/CodeGen/X86/statepoint-live-in.ll
+++ b/test/CodeGen/X86/statepoint-live-in.ll
@@ -1,4 +1,5 @@
-; RUN: llc -O3 < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: llc -verify-machineinstrs -O3 < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
@@ -6,38 +7,70 @@ declare void @bar() #0
declare void @baz()
define void @test1(i32 %a) gc "statepoint-example" {
+; CHECK-LABEL: test1:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: Lcfi0:
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp0:
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+;
entry:
; We expect the argument to be passed in an extra register to bar
-; CHECK-LABEL: test1
-; CHECK: pushq %rax
-; CHECK-NEXT: Lcfi0:
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: callq _bar
%statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a)
ret void
}
define void @test2(i32 %a, i32 %b) gc "statepoint-example" {
+; CHECK-LABEL: test2:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: pushq %rbp
+; CHECK-NEXT: Lcfi1:
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: Lcfi2:
+; CHECK-NEXT: .cfi_def_cfa_offset 24
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: Lcfi3:
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: Lcfi4:
+; CHECK-NEXT: .cfi_offset %rbx, -24
+; CHECK-NEXT: Lcfi5:
+; CHECK-NEXT: .cfi_offset %rbp, -16
+; CHECK-NEXT: movl %esi, %ebx
+; CHECK-NEXT: movl %edi, %ebp
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp1:
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp2:
+; CHECK-NEXT: addq $8, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %rbp
+; CHECK-NEXT: retq
+;
entry:
; Because the first call clobbers esi, we have to move the values into
; new registers. Note that they stay in the registers for both calls.
-; CHECK-LABEL: @test2
-; CHECK: movl %esi, %ebx
-; CHECK-NEXT: movl %edi, %ebp
-; CHECK-NEXT: callq _bar
call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 2, i32 %a, i32 %b)
call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 2, i32 %b, i32 %a)
ret void
}
define void @test3(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i) gc "statepoint-example" {
+; CHECK-LABEL: test3:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: Lcfi6:
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp3:
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+;
entry:
-; TODO: We should have folded the reload into the statepoint.
-; CHECK-LABEL: @test3
-; CHECK: pushq %rax
-; CHECK-NEXT: Lcfi
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: callq _bar
+; We directly reference the argument slot
%statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 9, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i)
ret void
}
@@ -47,25 +80,39 @@ entry:
; also ends up being a good test of whether we can fold loads from immutable
; stack slots into the statepoint.
define void @test4(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z) gc "statepoint-example" {
+; CHECK-LABEL: test4:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: Lcfi7:
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp4:
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: retq
+;
entry:
-; CHECK-LABEL: test4
-; CHECK: pushq %rax
-; CHECK-NEXT: Lcfi
-; CHECK-NEXT: .cfi_def_cfa_offset 16
-; CHECK-NEXT: callq _bar
%statepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 26, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h, i32 %i, i32 %j, i32 %k, i32 %l, i32 %m, i32 %n, i32 %o, i32 %p, i32 %q, i32 %r, i32 %s, i32 %t, i32 %u, i32 %v, i32 %w, i32 %x, i32 %y, i32 %z)
ret void
}
; A live-through gc-value must be spilled even if it is also a live-in deopt
; value. For live-in, we could technically report the register copy, but from
-; a code quality perspective it's better to reuse the required stack slot so
+; a code quality perspective it's better to reuse the required stack slot so
; as to put less stress on the register allocator for no benefit.
define i32 addrspace(1)* @test5(i32 %a, i32 addrspace(1)* %p) gc "statepoint-example" {
+; CHECK-LABEL: test5:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: Lcfi8:
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: movq %rsi, (%rsp)
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp5:
+; CHECK-NEXT: movq (%rsp), %rax
+; CHECK-NEXT: popq %rcx
+; CHECK-NEXT: retq
+;
entry:
-; CHECK-LABEL: test5
-; CHECK: movq %rsi, (%rsp)
-; CHECK-NEXT: callq _bar
%token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a, i32 addrspace(1)* %p, i32 addrspace(1)* %p)
%p2 = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 9, i32 9)
ret i32 addrspace(1)* %p2
@@ -73,14 +120,27 @@ entry:
; Show the interaction of live-through spilling followed by live-in.
define void @test6(i32 %a) gc "statepoint-example" {
+; CHECK-LABEL: test6:
+; CHECK: ## BB#0: ## %entry
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: Lcfi9:
+; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: subq $16, %rsp
+; CHECK-NEXT: Lcfi10:
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: Lcfi11:
+; CHECK-NEXT: .cfi_offset %rbx, -16
+; CHECK-NEXT: movl %edi, %ebx
+; CHECK-NEXT: movl %ebx, {{[0-9]+}}(%rsp)
+; CHECK-NEXT: callq _baz
+; CHECK-NEXT: Ltmp6:
+; CHECK-NEXT: callq _bar
+; CHECK-NEXT: Ltmp7:
+; CHECK-NEXT: addq $16, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: retq
+;
entry:
-; TODO: We could have reused the previous spill slot at zero additional cost.
-; CHECK-LABEL: test6
-; CHECK: movl %edi, %ebx
-; CHECK: movl %ebx, 12(%rsp)
-; CHECK-NEXT: callq _baz
-; CHECK-NEXT: Ltmp
-; CHECK-NEXT: callq _bar
call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @baz, i32 0, i32 0, i32 0, i32 1, i32 %a)
call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 2, i32 0, i32 1, i32 %a)
ret void
diff --git a/test/CodeGen/X86/statepoint-stack-usage.ll b/test/CodeGen/X86/statepoint-stack-usage.ll
index 5c27898f284a..b16426eae3d5 100644
--- a/test/CodeGen/X86/statepoint-stack-usage.ll
+++ b/test/CodeGen/X86/statepoint-stack-usage.ll
@@ -1,4 +1,4 @@
-; RUN: llc -stack-symbol-ordering=0 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 < %s | FileCheck %s
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc-linux-gnu"
diff --git a/test/CodeGen/X86/statepoint-stackmap-format.ll b/test/CodeGen/X86/statepoint-stackmap-format.ll
index 0506381b9ec2..966f66815f92 100644
--- a/test/CodeGen/X86/statepoint-stackmap-format.ll
+++ b/test/CodeGen/X86/statepoint-stackmap-format.ll
@@ -1,5 +1,5 @@
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple="x86_64-pc-linux-gnu" | FileCheck %s
-; RUN: llc < %s -stack-symbol-ordering=0 -mtriple="x86_64-pc-unknown-elf" | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -stack-symbol-ordering=0 -mtriple="x86_64-pc-linux-gnu" | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -stack-symbol-ordering=0 -mtriple="x86_64-pc-unknown-elf" | FileCheck %s
; This test is a sanity check to ensure statepoints are generating StackMap
; sections correctly. This is not intended to be a rigorous test of the
diff --git a/test/CodeGen/X86/statepoint-uniqueing.ll b/test/CodeGen/X86/statepoint-uniqueing.ll
index e791bc6b2333..a5fa1f2d99c9 100644
--- a/test/CodeGen/X86/statepoint-uniqueing.ll
+++ b/test/CodeGen/X86/statepoint-uniqueing.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
; Checks for a crash we had when two gc.relocate calls would
; relocating identical values
diff --git a/test/CodeGen/X86/statepoint-vector-bad-spill.ll b/test/CodeGen/X86/statepoint-vector-bad-spill.ll
index 848988589cb0..7c55491bb1be 100644
--- a/test/CodeGen/X86/statepoint-vector-bad-spill.ll
+++ b/test/CodeGen/X86/statepoint-vector-bad-spill.ll
@@ -1,4 +1,4 @@
-; RUN: llc -O3 < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -O3 < %s | FileCheck %s
; This is checking for a crash.
diff --git a/test/CodeGen/X86/statepoint-vector.ll b/test/CodeGen/X86/statepoint-vector.ll
index 000e88742880..5bc8f983ff06 100644
--- a/test/CodeGen/X86/statepoint-vector.ll
+++ b/test/CodeGen/X86/statepoint-vector.ll
@@ -1,4 +1,4 @@
-; RUN: llc -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s
+; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s
; REQUIRES: asserts
target triple = "x86_64-pc-linux-gnu"
diff --git a/test/CodeGen/X86/vector-unsigned-cmp.ll b/test/CodeGen/X86/vector-unsigned-cmp.ll
new file mode 100644
index 000000000000..fc246669992c
--- /dev/null
+++ b/test/CodeGen/X86/vector-unsigned-cmp.ll
@@ -0,0 +1,519 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2
+
+; PR33276 - https://bugs.llvm.org/show_bug.cgi?id=33276
+; If both operands of an unsigned icmp are known non-negative, then
+; we don't need to flip the sign bits in order to map to signed pcmpgt*.
+
+define <2 x i1> @ugt_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; SSE-LABEL: ugt_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: psrlq $1, %xmm0
+; SSE-NEXT: psrlq $1, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; SSE-NEXT: pand %xmm3, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ugt_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
+ %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
+ %cmp = icmp ugt <2 x i64> %sh1, %sh2
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ult_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; SSE-LABEL: ult_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: psrlq $1, %xmm0
+; SSE-NEXT: psrlq $1, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3]
+; SSE-NEXT: pand %xmm3, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm2[1,1,3,3]
+; SSE-NEXT: por %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ult_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
+ %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
+ %cmp = icmp ult <2 x i64> %sh1, %sh2
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @uge_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; SSE-LABEL: uge_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: psrlq $1, %xmm0
+; SSE-NEXT: psrlq $1, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: movdqa %xmm1, %xmm2
+; SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE-NEXT: pcmpeqd %xmm0, %xmm1
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,3,3]
+; SSE-NEXT: pand %xmm3, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uge_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
+ %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
+ %cmp = icmp uge <2 x i64> %sh1, %sh2
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ule_v2i64(<2 x i64> %x, <2 x i64> %y) {
+; SSE-LABEL: ule_v2i64:
+; SSE: # BB#0:
+; SSE-NEXT: psrlq $1, %xmm0
+; SSE-NEXT: psrlq $1, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: movdqa %xmm0, %xmm2
+; SSE-NEXT: pcmpgtd %xmm1, %xmm2
+; SSE-NEXT: pshufd {{.*#+}} xmm3 = xmm2[0,0,2,2]
+; SSE-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
+; SSE-NEXT: pand %xmm3, %xmm0
+; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm2[1,1,3,3]
+; SSE-NEXT: por %xmm0, %xmm1
+; SSE-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ule_v2i64:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlq $1, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808]
+; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <2 x i64> %x, <i64 1, i64 1>
+ %sh2 = lshr <2 x i64> %y, <i64 1, i64 1>
+ %cmp = icmp ule <2 x i64> %sh1, %sh2
+ ret <2 x i1> %cmp
+}
+
+define <4 x i1> @ugt_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; SSE-LABEL: ugt_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: psrld $1, %xmm0
+; SSE-NEXT: psrld $1, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: ugt_v4i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: ugt_v4i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
+; AVX2-NEXT: retq
+ %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp ugt <4 x i32> %sh1, %sh2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @ult_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; SSE-LABEL: ult_v4i32:
+; SSE: # BB#0:
+; SSE-NEXT: psrld $1, %xmm0
+; SSE-NEXT: psrld $1, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX1-LABEL: ult_v4i32:
+; AVX1: # BB#0:
+; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX1-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX1-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX1-NEXT: retq
+;
+; AVX2-LABEL: ult_v4i32:
+; AVX2: # BB#0:
+; AVX2-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX2-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX2-NEXT: vpbroadcastd {{.*}}(%rip), %xmm2
+; AVX2-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX2-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX2-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; AVX2-NEXT: retq
+ %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp ult <4 x i32> %sh1, %sh2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @uge_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; SSE2-LABEL: uge_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: psrld $1, %xmm0
+; SSE2-NEXT: psrld $1, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm2
+; SSE2-NEXT: pcmpgtd %xmm0, %xmm2
+; SSE2-NEXT: pcmpeqd %xmm0, %xmm0
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: uge_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: psrld $1, %xmm0
+; SSE41-NEXT: psrld $1, %xmm1
+; SSE41-NEXT: pmaxud %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: uge_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX-NEXT: vpmaxud %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp uge <4 x i32> %sh1, %sh2
+ ret <4 x i1> %cmp
+}
+
+define <4 x i1> @ule_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; SSE2-LABEL: ule_v4i32:
+; SSE2: # BB#0:
+; SSE2-NEXT: psrld $1, %xmm0
+; SSE2-NEXT: psrld $1, %xmm1
+; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648]
+; SSE2-NEXT: pxor %xmm2, %xmm1
+; SSE2-NEXT: pxor %xmm2, %xmm0
+; SSE2-NEXT: pcmpgtd %xmm1, %xmm0
+; SSE2-NEXT: pcmpeqd %xmm1, %xmm1
+; SSE2-NEXT: pxor %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: ule_v4i32:
+; SSE41: # BB#0:
+; SSE41-NEXT: psrld $1, %xmm0
+; SSE41-NEXT: psrld $1, %xmm1
+; SSE41-NEXT: pminud %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqd %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: ule_v4i32:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrld $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrld $1, %xmm1, %xmm1
+; AVX-NEXT: vpminud %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <4 x i32> %x, <i32 1, i32 1, i32 1, i32 1>
+ %sh2 = lshr <4 x i32> %y, <i32 1, i32 1, i32 1, i32 1>
+ %cmp = icmp ule <4 x i32> %sh1, %sh2
+ ret <4 x i1> %cmp
+}
+
+define <8 x i1> @ugt_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; SSE-LABEL: ugt_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: psrlw $1, %xmm0
+; SSE-NEXT: psrlw $1, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT: pxor %xmm2, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pcmpgtw %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ugt_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %cmp = icmp ugt <8 x i16> %sh1, %sh2
+ ret <8 x i1> %cmp
+}
+
+define <8 x i1> @ult_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; SSE-LABEL: ult_v8i16:
+; SSE: # BB#0:
+; SSE-NEXT: psrlw $1, %xmm0
+; SSE-NEXT: psrlw $1, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pcmpgtw %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ult_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768]
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %cmp = icmp ult <8 x i16> %sh1, %sh2
+ ret <8 x i1> %cmp
+}
+
+define <8 x i1> @uge_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; SSE2-LABEL: uge_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: psrlw $1, %xmm1
+; SSE2-NEXT: psubusw %xmm0, %xmm1
+; SSE2-NEXT: pxor %xmm0, %xmm0
+; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: uge_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: psrlw $1, %xmm0
+; SSE41-NEXT: psrlw $1, %xmm1
+; SSE41-NEXT: pmaxuw %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: uge_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %cmp = icmp uge <8 x i16> %sh1, %sh2
+ ret <8 x i1> %cmp
+}
+
+define <8 x i1> @ule_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; SSE2-LABEL: ule_v8i16:
+; SSE2: # BB#0:
+; SSE2-NEXT: psrlw $1, %xmm0
+; SSE2-NEXT: psrlw $1, %xmm1
+; SSE2-NEXT: psubusw %xmm1, %xmm0
+; SSE2-NEXT: pxor %xmm1, %xmm1
+; SSE2-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE2-NEXT: retq
+;
+; SSE41-LABEL: ule_v8i16:
+; SSE41: # BB#0:
+; SSE41-NEXT: psrlw $1, %xmm0
+; SSE41-NEXT: psrlw $1, %xmm1
+; SSE41-NEXT: pminuw %xmm0, %xmm1
+; SSE41-NEXT: pcmpeqw %xmm1, %xmm0
+; SSE41-NEXT: retq
+;
+; AVX-LABEL: ule_v8i16:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX-NEXT: vpminuw %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <8 x i16> %x, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %sh2 = lshr <8 x i16> %y, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %cmp = icmp ule <8 x i16> %sh1, %sh2
+ ret <8 x i1> %cmp
+}
+
+define <16 x i1> @ugt_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; SSE-LABEL: ugt_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: psrlw $1, %xmm0
+; SSE-NEXT: pand {{.*}}(%rip), %xmm0
+; SSE-NEXT: psrlw $1, %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE-NEXT: por %xmm2, %xmm1
+; SSE-NEXT: pxor %xmm2, %xmm0
+; SSE-NEXT: pcmpgtb %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ugt_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX-NEXT: vpor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpxor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %cmp = icmp ugt <16 x i8> %sh1, %sh2
+ ret <16 x i1> %cmp
+}
+
+define <16 x i1> @ult_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; SSE-LABEL: ult_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: psrlw $1, %xmm0
+; SSE-NEXT: psrlw $1, %xmm1
+; SSE-NEXT: pand {{.*}}(%rip), %xmm1
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; SSE-NEXT: por %xmm2, %xmm0
+; SSE-NEXT: pxor %xmm1, %xmm2
+; SSE-NEXT: pcmpgtb %xmm0, %xmm2
+; SSE-NEXT: movdqa %xmm2, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ult_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
+; AVX-NEXT: vpor %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpxor %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %cmp = icmp ult <16 x i8> %sh1, %sh2
+ ret <16 x i1> %cmp
+}
+
+define <16 x i1> @uge_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; SSE-LABEL: uge_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: psrlw $1, %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: psrlw $1, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pmaxub %xmm0, %xmm1
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: uge_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %cmp = icmp uge <16 x i8> %sh1, %sh2
+ ret <16 x i1> %cmp
+}
+
+define <16 x i1> @ule_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; SSE-LABEL: ule_v16i8:
+; SSE: # BB#0:
+; SSE-NEXT: psrlw $1, %xmm0
+; SSE-NEXT: movdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; SSE-NEXT: pand %xmm2, %xmm0
+; SSE-NEXT: psrlw $1, %xmm1
+; SSE-NEXT: pand %xmm2, %xmm1
+; SSE-NEXT: pminub %xmm0, %xmm1
+; SSE-NEXT: pcmpeqb %xmm1, %xmm0
+; SSE-NEXT: retq
+;
+; AVX-LABEL: ule_v16i8:
+; AVX: # BB#0:
+; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0
+; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
+; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0
+; AVX-NEXT: vpsrlw $1, %xmm1, %xmm1
+; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1
+; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm1
+; AVX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0
+; AVX-NEXT: retq
+ %sh1 = lshr <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %sh2 = lshr <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %cmp = icmp ule <16 x i8> %sh1, %sh2
+ ret <16 x i1> %cmp
+}
+
diff --git a/test/CodeGen/X86/wide-fma-contraction.ll b/test/CodeGen/X86/wide-fma-contraction.ll
index f51f917fbac9..99e03c891c00 100644
--- a/test/CodeGen/X86/wide-fma-contraction.ll
+++ b/test/CodeGen/X86/wide-fma-contraction.ll
@@ -1,26 +1,48 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=x86 -mcpu=bdver2 -mattr=-fma -mtriple=x86_64-apple-darwin < %s | FileCheck %s
; RUN: llc -march=x86 -mcpu=bdver2 -mattr=-fma,-fma4 -mtriple=x86_64-apple-darwin < %s | FileCheck %s --check-prefix=CHECK-NOFMA
; CHECK-LABEL: fmafunc
; CHECK-NOFMA-LABEL: fmafunc
define <16 x float> @fmafunc(<16 x float> %a, <16 x float> %b, <16 x float> %c) {
+; CHECK-LABEL: fmafunc:
+; CHECK: ## BB#0:
+; CHECK-NEXT: pushl %ebp
+; CHECK-NEXT: Lcfi0:
+; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: Lcfi1:
+; CHECK-NEXT: .cfi_offset %ebp, -8
+; CHECK-NEXT: movl %esp, %ebp
+; CHECK-NEXT: Lcfi2:
+; CHECK-NEXT: .cfi_def_cfa_register %ebp
+; CHECK-NEXT: andl $-32, %esp
+; CHECK-NEXT: subl $32, %esp
+; CHECK-NEXT: vfmaddps 8(%ebp), %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: vfmaddps 40(%ebp), %ymm3, %ymm1, %ymm1
+; CHECK-NEXT: movl %ebp, %esp
+; CHECK-NEXT: popl %ebp
+; CHECK-NEXT: retl
+;
+; CHECK-NOFMA-LABEL: fmafunc:
+; CHECK-NOFMA: ## BB#0:
+; CHECK-NOFMA-NEXT: pushl %ebp
+; CHECK-NOFMA-NEXT: Lcfi0:
+; CHECK-NOFMA-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NOFMA-NEXT: Lcfi1:
+; CHECK-NOFMA-NEXT: .cfi_offset %ebp, -8
+; CHECK-NOFMA-NEXT: movl %esp, %ebp
+; CHECK-NOFMA-NEXT: Lcfi2:
+; CHECK-NOFMA-NEXT: .cfi_def_cfa_register %ebp
+; CHECK-NOFMA-NEXT: andl $-32, %esp
+; CHECK-NOFMA-NEXT: subl $32, %esp
+; CHECK-NOFMA-NEXT: vmulps %ymm2, %ymm0, %ymm0
+; CHECK-NOFMA-NEXT: vaddps 8(%ebp), %ymm0, %ymm0
+; CHECK-NOFMA-NEXT: vmulps %ymm3, %ymm1, %ymm1
+; CHECK-NOFMA-NEXT: vaddps 40(%ebp), %ymm1, %ymm1
+; CHECK-NOFMA-NEXT: movl %ebp, %esp
+; CHECK-NOFMA-NEXT: popl %ebp
+; CHECK-NOFMA-NEXT: retl
-; CHECK-NOT: vmulps
-; CHECK-NOT: vaddps
-; CHECK: vfmaddps
-; CHECK-NOT: vmulps
-; CHECK-NOT: vaddps
-; CHECK: vfmaddps
-; CHECK-NOT: vmulps
-; CHECK-NOT: vaddps
-
-; CHECK-NOFMA-NOT: calll
-; CHECK-NOFMA: vmulps
-; CHECK-NOFMA: vaddps
-; CHECK-NOFMA-NOT: calll
-; CHECK-NOFMA: vmulps
-; CHECK-NOFMA: vaddps
-; CHECK-NOFMA-NOT: calll
%ret = tail call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %a, <16 x float> %b, <16 x float> %c)
ret <16 x float> %ret
diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll
index 397e5bc10f5b..cd58dd1e7604 100644
--- a/test/CodeGen/X86/xor-icmp.ll
+++ b/test/CodeGen/X86/xor-icmp.ll
@@ -1,21 +1,33 @@
-; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32
-; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s -check-prefix=X32
+; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s -check-prefix=X64
; rdar://7367229
define i32 @t(i32 %a, i32 %b) nounwind ssp {
+; X32-LABEL: t:
+; X32: # BB#0: # %entry
+; X32-NEXT: movb {{[0-9]+}}(%esp), %al
+; X32-NEXT: xorb {{[0-9]+}}(%esp), %al
+; X32-NEXT: testb $64, %al
+; X32-NEXT: je .LBB0_1
+; X32-NEXT: # BB#2: # %bb1
+; X32-NEXT: jmp bar # TAILCALL
+; X32-NEXT: .LBB0_1: # %bb
+; X32-NEXT: jmp foo # TAILCALL
+;
+; X64-LABEL: t:
+; X64: # BB#0: # %entry
+; X64-NEXT: movl %edi, %eax
+; X64-NEXT: xorl %esi, %eax
+; X64-NEXT: testb $64, %ah
+; X64-NEXT: je .LBB0_1
+; X64-NEXT: # BB#2: # %bb1
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: jmp bar # TAILCALL
+; X64-NEXT: .LBB0_1: # %bb
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: jmp foo # TAILCALL
entry:
-; X32-LABEL: t:
-; X32: xorb
-; X32-NOT: andb
-; X32-NOT: shrb
-; X32: testb $64
-; X32: je
-
-; X64-LABEL: t:
-; X64-NOT: setne
-; X64: xorl
-; X64: testb $64
-; X64: je
%0 = and i32 %a, 16384
%1 = icmp ne i32 %0, 0
%2 = and i32 %b, 16384
@@ -38,20 +50,32 @@ declare i32 @bar(...)
define i32 @t2(i32 %x, i32 %y) nounwind ssp {
; X32-LABEL: t2:
-; X32: cmpl
-; X32: sete
-; X32: cmpl
-; X32: sete
-; X32-NOT: xor
-; X32: je
-
+; X32: # BB#0: # %entry
+; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X32-NEXT: sete %al
+; X32-NEXT: cmpl $0, {{[0-9]+}}(%esp)
+; X32-NEXT: sete %cl
+; X32-NEXT: cmpb %al, %cl
+; X32-NEXT: je .LBB1_1
+; X32-NEXT: # BB#2: # %bb
+; X32-NEXT: jmp foo # TAILCALL
+; X32-NEXT: .LBB1_1: # %return
+; X32-NEXT: retl
+;
; X64-LABEL: t2:
-; X64: testl
-; X64: sete
-; X64: testl
-; X64: sete
-; X64-NOT: xor
-; X64: je
+; X64: # BB#0: # %entry
+; X64-NEXT: testl %edi, %edi
+; X64-NEXT: sete %al
+; X64-NEXT: testl %esi, %esi
+; X64-NEXT: sete %cl
+; X64-NEXT: cmpb %al, %cl
+; X64-NEXT: je .LBB1_1
+; X64-NEXT: # BB#2: # %bb
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: jmp foo # TAILCALL
+; X64-NEXT: .LBB1_1: # %return
+; X64-NEXT: retq
+
entry:
%0 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
%1 = icmp eq i32 %y, 0 ; <i1> [#uses=1]
diff --git a/test/DebugInfo/MIR/AArch64/clobber-sp.mir b/test/DebugInfo/MIR/AArch64/clobber-sp.mir
new file mode 100644
index 000000000000..444faee81cb3
--- /dev/null
+++ b/test/DebugInfo/MIR/AArch64/clobber-sp.mir
@@ -0,0 +1,181 @@
+# RUN: llc -start-after=livedebugvalues -filetype=obj -o - %s \
+# RUN: | llvm-dwarfdump - | FileCheck %s
+# CHECK: .debug_info contents:
+# CHECK: DW_TAG_formal_parameter
+# CHECK: DW_TAG_formal_parameter
+# CHECK-NEXT: DW_AT_location [DW_FORM_data4] ([[LOC:.*]])
+# CHECK-NEXT: DW_AT_name {{.*}}"y"
+# CHECK: .debug_loc contents:
+# CHECK: [[LOC]]:
+# CHECK-SAME: Beginning address offset: 0x0000000000000000
+# CHECK-NEXT: Ending address offset: 0x0000000000000014
+# CHECK-NEXT: Location description: 51
+# reg1
+#
+# The range of y's [SP+8] location must not be interrupted by the call to h.
+# CHECK: Beginning address offset: 0x0000000000000014
+# CHECK-NEXT: Ending address offset: 0x0000000000000038
+# CHECK-NEXT: Location description: 8f 08
+# breg31 +8
+--- |
+ ; Generated at -Os from:
+ ; struct Rect {
+ ; double x, y, w, h;
+ ; };
+ ; void g(struct Rect);
+ ; void h(int *);
+ ; int f(int x, int y, struct Rect s) {
+ ; g(s);
+ ; if (y)
+ ; h(&x);
+ ; return 0;
+ ; }
+ source_filename = "/tmp/clobber.c"
+ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+ target triple = "arm64-apple-ios"
+
+ %struct.Rect = type { double, double, double, double }
+
+ ; Function Attrs: nounwind optsize ssp
+ define i32 @f(i32 %x, i32 %y, [4 x double] %s.coerce) local_unnamed_addr #0 !dbg !7 {
+ entry:
+ %x.addr = alloca i32, align 4
+ tail call void @llvm.dbg.value(metadata i32 %x, i64 0, metadata !19, metadata !22), !dbg !23
+ store i32 %x, i32* %x.addr, align 4, !tbaa !24
+ tail call void @llvm.dbg.value(metadata i32 %y, i64 0, metadata !20, metadata !22), !dbg !28
+ tail call void @llvm.dbg.declare(metadata %struct.Rect* undef, metadata !21, metadata !22), !dbg !29
+ tail call void @g([4 x double] %s.coerce) #4, !dbg !30
+ %tobool = icmp eq i32 %y, 0, !dbg !31
+ br i1 %tobool, label %if.end, label %if.then, !dbg !33
+
+ if.then: ; preds = %entry
+ tail call void @llvm.dbg.value(metadata i32* %x.addr, i64 0, metadata !19, metadata !22), !dbg !23
+ call void @h(i32* nonnull %x.addr) #4, !dbg !34
+ br label %if.end, !dbg !34
+
+ if.end: ; preds = %if.then, %entry
+ ret i32 0, !dbg !35
+ }
+
+ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+ declare void @g([4 x double]) local_unnamed_addr #2
+ declare void @h(i32*) local_unnamed_addr #2
+ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #1
+ declare void @llvm.stackprotector(i8*, i8**) #3
+
+ attributes #0 = { nounwind optsize ssp }
+ attributes #1 = { nounwind readnone speculatable }
+ attributes #2 = { optsize }
+ attributes #3 = { nounwind }
+ attributes #4 = { nounwind optsize }
+
+ !llvm.dbg.cu = !{!0}
+ !llvm.module.flags = !{!3, !4, !5}
+ !llvm.ident = !{!6}
+
+ !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 5.0.0 (trunk 302682) (llvm/trunk 302683)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+ !1 = !DIFile(filename: "/tmp/clobber.c", directory: "/Volumes/Data/apple-internal/swift")
+ !2 = !{}
+ !3 = !{i32 2, !"Dwarf Version", i32 2}
+ !4 = !{i32 2, !"Debug Info Version", i32 3}
+ !5 = !{i32 1, !"PIC Level", i32 2}
+ !6 = !{!"clang version 5.0.0 (trunk 302682) (llvm/trunk 302683)"}
+ !7 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 7, type: !8, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !18)
+ !8 = !DISubroutineType(types: !9)
+ !9 = !{!10, !10, !10, !11}
+ !10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+ !11 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Rect", file: !1, line: 1, size: 256, elements: !12)
+ !12 = !{!13, !15, !16, !17}
+ !13 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !11, file: !1, line: 2, baseType: !14, size: 64)
+ !14 = !DIBasicType(name: "double", size: 64, encoding: DW_ATE_float)
+ !15 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !11, file: !1, line: 2, baseType: !14, size: 64, offset: 64)
+ !16 = !DIDerivedType(tag: DW_TAG_member, name: "w", scope: !11, file: !1, line: 2, baseType: !14, size: 64, offset: 128)
+ !17 = !DIDerivedType(tag: DW_TAG_member, name: "h", scope: !11, file: !1, line: 2, baseType: !14, size: 64, offset: 192)
+ !18 = !{!19, !20, !21}
+ !19 = !DILocalVariable(name: "x", arg: 1, scope: !7, file: !1, line: 7, type: !10)
+ !20 = !DILocalVariable(name: "y", arg: 2, scope: !7, file: !1, line: 7, type: !10)
+ !21 = !DILocalVariable(name: "s", arg: 3, scope: !7, file: !1, line: 7, type: !11)
+ !22 = !DIExpression()
+ !23 = !DILocation(line: 7, column: 11, scope: !7)
+ !24 = !{!25, !25, i64 0}
+ !25 = !{!"int", !26, i64 0}
+ !26 = !{!"omnipotent char", !27, i64 0}
+ !27 = !{!"Simple C/C++ TBAA"}
+ !28 = !DILocation(line: 7, column: 18, scope: !7)
+ !29 = !DILocation(line: 7, column: 33, scope: !7)
+ !30 = !DILocation(line: 8, column: 3, scope: !7)
+ !31 = !DILocation(line: 9, column: 7, scope: !32)
+ !32 = distinct !DILexicalBlock(scope: !7, file: !1, line: 9, column: 7)
+ !33 = !DILocation(line: 9, column: 7, scope: !7)
+ !34 = !DILocation(line: 10, column: 5, scope: !32)
+ !35 = !DILocation(line: 12, column: 3, scope: !7)
+
+...
+---
+name: f
+alignment: 2
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+tracksRegLiveness: true
+liveins:
+ - { reg: '%w0' }
+ - { reg: '%w1' }
+ - { reg: '%d0' }
+ - { reg: '%d1' }
+ - { reg: '%d2' }
+ - { reg: '%d3' }
+frameInfo:
+ isFrameAddressTaken: false
+ isReturnAddressTaken: false
+ hasStackMap: false
+ hasPatchPoint: false
+ stackSize: 32
+ offsetAdjustment: 0
+ maxAlignment: 8
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+ hasOpaqueSPAdjustment: false
+ hasVAStart: false
+ hasMustTailInVarArgFunc: false
+stack:
+ - { id: 0, name: x.addr, offset: -20, size: 4, alignment: 4, local-offset: -4 }
+ - { id: 1, type: spill-slot, offset: -24, size: 4, alignment: 4 }
+ - { id: 2, type: spill-slot, offset: -8, size: 8, alignment: 8, callee-saved-register: '%lr' }
+ - { id: 3, type: spill-slot, offset: -16, size: 8, alignment: 8, callee-saved-register: '%fp' }
+body: |
+ bb.0.entry:
+ successors: %bb.2.if.end(0x40000000), %bb.1.if.then(0x40000000)
+ liveins: %w0, %w1, %d0, %d1, %d2, %d3, %lr
+
+ %sp = frame-setup SUBXri %sp, 32, 0
+ frame-setup STPXi killed %fp, killed %lr, %sp, 2 :: (store 8 into %stack.3), (store 8 into %stack.2)
+ %fp = frame-setup ADDXri %sp, 16, 0
+ DBG_VALUE debug-use %w0, debug-use _, !19, !22, debug-location !23
+ STURWi killed %w0, %fp, -4 :: (store 4 into %stack.0.x.addr)
+ DBG_VALUE debug-use %w1, debug-use _, !20, !22, debug-location !28
+ STRWui killed %w1, %sp, 2, debug-location !30 :: (store 4 into %stack.1)
+ DBG_VALUE %sp, 8, !20, !22, debug-location !28
+ BL @g, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit killed %d0, implicit killed %d1, implicit killed %d2, implicit killed %d3, implicit-def %sp, debug-location !30
+ %w0 = LDRWui %sp, 2, debug-location !33 :: (load 4 from %stack.1)
+ CBZW killed %w0, %bb.2.if.end, debug-location !33
+
+ bb.1.if.then:
+ successors: %bb.2.if.end(0x80000000)
+
+ DBG_VALUE debug-use %sp, 8, !20, !22, debug-location !28
+ %x0 = SUBXri %fp, 4, 0
+ DBG_VALUE debug-use %x0, debug-use _, !19, !22, debug-location !23
+ BL @h, csr_aarch64_aapcs, implicit-def dead %lr, implicit %sp, implicit killed %x0, debug-location !34
+
+ bb.2.if.end:
+ DBG_VALUE debug-use %sp, 8, !20, !22, debug-location !28
+ %w8 = MOVZWi 0, 0
+ %x0 = ORRXrs %xzr, undef %x8, 0, implicit killed %w8, debug-location !35
+ %fp, %lr = LDPXi %sp, 2, debug-location !35 :: (load 8 from %stack.3), (load 8 from %stack.2)
+ %sp = ADDXri %sp, 32, 0, debug-location !35
+ RET undef %lr, implicit killed %w0, debug-location !35
+
+...
diff --git a/test/DebugInfo/MIR/AArch64/lit.local.cfg b/test/DebugInfo/MIR/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..cec29af5bbe4
--- /dev/null
+++ b/test/DebugInfo/MIR/AArch64/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/DebugInfo/PDB/Inputs/simple-line-info.yaml b/test/DebugInfo/PDB/Inputs/simple-line-info.yaml
index 66030020f8f4..d1324d26d8bb 100644
--- a/test/DebugInfo/PDB/Inputs/simple-line-info.yaml
+++ b/test/DebugInfo/PDB/Inputs/simple-line-info.yaml
@@ -5,39 +5,40 @@ DbiStream:
ObjFile: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
SourceFiles:
- 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
- LineInfo:
- Checksums:
- - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
- Kind: MD5
- Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC
- - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
- Kind: MD5
- Checksum: 1154D69F5B2650196E1FC34F4134E56B
- Lines:
- - CodeSize: 10
- Flags: [ ]
- RelocOffset: 16
- RelocSegment: 1
- Blocks:
- - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
- Lines:
- - Offset: 0
- LineStart: 5
- IsStatement: true
- EndDelta: 0
- - Offset: 3
- LineStart: 6
- IsStatement: true
- EndDelta: 0
- - Offset: 8
- LineStart: 7
- IsStatement: true
- EndDelta: 0
- Columns:
- InlineeLines:
- - HasExtraFiles: false
- Sites:
- - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
- LineNum: 26950
- Inlinee: 22767
+ Subsections:
+ - !FileChecksums
+ Checksums:
+ - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+ Kind: MD5
+ Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC
+ - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+ Kind: MD5
+ Checksum: 1154D69F5B2650196E1FC34F4134E56B
+ - !Lines
+ CodeSize: 10
+ Flags: [ ]
+ RelocOffset: 16
+ RelocSegment: 1
+ Blocks:
+ - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+ Lines:
+ - Offset: 0
+ LineStart: 5
+ IsStatement: true
+ EndDelta: 0
+ - Offset: 3
+ LineStart: 6
+ IsStatement: true
+ EndDelta: 0
+ - Offset: 8
+ LineStart: 7
+ IsStatement: true
+ EndDelta: 0
+ Columns:
+ - !InlineeLines
+ HasExtraFiles: false
+ Sites:
+ - FileName: 'f:\dd\externalapis\windows\10\sdk\inc\winerror.h'
+ LineNum: 26950
+ Inlinee: 22767
...
diff --git a/test/DebugInfo/PDB/pdbdump-write.test b/test/DebugInfo/PDB/pdbdump-write.test
index f56b4fbe3624..393473a53af1 100644
--- a/test/DebugInfo/PDB/pdbdump-write.test
+++ b/test/DebugInfo/PDB/pdbdump-write.test
@@ -11,10 +11,10 @@
; (for example if we don't write the entire stream)
;
; RUN: llvm-pdbdump pdb2yaml -stream-metadata -stream-directory \
-; RUN: -pdb-stream -tpi-stream %p/Inputs/empty.pdb > %t.1
+; RUN: -pdb-stream -tpi-stream -dbi-module-syms %p/Inputs/empty.pdb > %t.1
; RUN: llvm-pdbdump yaml2pdb -pdb=%t.2 %t.1
; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \
-; RUN: -no-file-headers %p/Inputs/empty.pdb > %t.3
+; RUN: -dbi-module-syms -no-file-headers %p/Inputs/empty.pdb > %t.3
; RUN: llvm-pdbdump pdb2yaml -pdb-stream -tpi-stream \
-; RUN: -no-file-headers %t.2 > %t.4
+; RUN: -dbi-module-syms -no-file-headers %t.2 > %t.4
; RUN: diff %t.3 %t.4
diff --git a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
index ca7427c0099b..f959805c7474 100644
--- a/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
+++ b/test/DebugInfo/PDB/pdbdump-yaml-lineinfo.test
@@ -28,12 +28,8 @@ YAML: - Module: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
YAML: ObjFile: 'd:\src\llvm\test\DebugInfo\PDB\Inputs\empty.obj'
YAML: SourceFiles:
YAML: - 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML: LineInfo:
-YAML: Checksums:
-YAML: - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
-YAML: Kind: MD5
-YAML: Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC
-YAML: Lines:
+YAML: Subsections:
+YAML: - !Lines
YAML: CodeSize: 10
YAML: Flags: [ ]
YAML: RelocOffset: 16
@@ -54,6 +50,11 @@ YAML: LineStart: 7
YAML: IsStatement: true
YAML: EndDelta: 0
YAML: Columns:
+YAML: - !FileChecksums
+YAML: Checksums:
+YAML: - FileName: 'd:\src\llvm\test\debuginfo\pdb\inputs\empty.cpp'
+YAML: Kind: MD5
+YAML: Checksum: A0A5BD0D3ECD93FC29D19DE826FBF4BC
YAML: - Module: '* Linker *'
YAML: ObjFile: ''
YAML: ... \ No newline at end of file
diff --git a/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll b/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll
new file mode 100644
index 000000000000..4df6ffeb5a8c
--- /dev/null
+++ b/test/Instrumentation/SanitizerCoverage/inline-8bit-counters.ll
@@ -0,0 +1,13 @@
+; Test -sanitizer-coverage-inline-8bit-counters=1
+; RUN: opt < %s -sancov -sanitizer-coverage-level=1 -sanitizer-coverage-inline-8bit-counters=1 -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+define void @foo() {
+entry:
+; CHECK: %0 = load i8, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__sancov_gen_, i64 0, i64 0), !nosanitize
+; CHECK: %1 = add i8 %0, 1
+; CHECK: store i8 %1, i8* getelementptr inbounds ([1 x i8], [1 x i8]* @__sancov_gen_, i64 0, i64 0), !nosanitize
+ ret void
+}
+; CHECK: call void @__sanitizer_cov_8bit_counters_init(i8* bitcast (i8** @__start___sancov_counters to i8*), i8* bitcast (i8** @__stop___sancov_counters to i8*))
diff --git a/test/MC/WebAssembly/external-data.ll b/test/MC/WebAssembly/external-data.ll
new file mode 100644
index 000000000000..91e05b3f13a6
--- /dev/null
+++ b/test/MC/WebAssembly/external-data.ll
@@ -0,0 +1,21 @@
+; RUN: llc -mtriple wasm32-unknown-unknown-wasm -filetype=obj %s -o - | obj2yaml | FileCheck %s
+; Verify relocations are correctly generated for addresses of externals
+; in the data section.
+
+declare i32 @f1(...)
+
+@foo = global i64 7, align 4
+@far = local_unnamed_addr global i32 (...)* @f1, align 4
+
+; CHECK: - Type: DATA
+; CHECK: Relocations:
+; CHECK: - Type: R_WEBASSEMBLY_GLOBAL_ADDR_I32
+; CHECK: Index: 0
+; CHECK: Offset: 0x0000000E
+; CHECK: Segments:
+; CHECK: - Index: 0
+; CHECK: Offset:
+; CHECK: Opcode: I32_CONST
+; CHECK: Value: 0
+; CHECK: Content: 0700000000000000FFFFFFFF
+
diff --git a/test/ThinLTO/X86/deadstrip.ll b/test/ThinLTO/X86/deadstrip.ll
index 0c85322eb565..c19ccb01be3c 100644
--- a/test/ThinLTO/X86/deadstrip.ll
+++ b/test/ThinLTO/X86/deadstrip.ll
@@ -22,6 +22,20 @@
; RUN: llvm-dis < %t.out.1.3.import.bc | FileCheck %s --check-prefix=CHECK2
; RUN: llvm-nm %t.out.1 | FileCheck %s --check-prefix=CHECK2-NM
+; RUN: llvm-bcanalyzer -dump %t.out.index.bc | FileCheck %s --check-prefix=COMBINED
+; Live, NotEligibleForImport, Internal
+; COMBINED-DAG: <COMBINED {{.*}} op2=55
+; Live, Internal
+; COMBINED-DAG: <COMBINED {{.*}} op2=39
+; Live, External
+; COMBINED-DAG: <COMBINED {{.*}} op2=32
+; COMBINED-DAG: <COMBINED {{.*}} op2=32
+; COMBINED-DAG: <COMBINED {{.*}} op2=32
+; (Dead)
+; COMBINED-DAG: <COMBINED {{.*}} op2=0
+; COMBINED-DAG: <COMBINED {{.*}} op2=0
+; COMBINED-DAG: <COMBINED {{.*}} op2=0
+
; Dead-stripping on the index allows to internalize these,
; and limit the import of @baz thanks to early pruning.
; CHECK-NOT: available_externally {{.*}} @baz()
@@ -35,7 +49,7 @@
; Make sure we didn't internalize @boo, which is reachable via
; llvm.global_ctors
; CHECK2: define void @boo()
-; We should have eventually revoved @baz since it was internalized and unused
+; We should have eventually removed @baz since it was internalized and unused
; CHECK2-NM-NOT: _baz
; The final binary should not contain any of the dead functions,
diff --git a/test/ThinLTO/X86/newpm-basic.ll b/test/ThinLTO/X86/newpm-basic.ll
index d357cbc85d00..bfcc60c6807b 100644
--- a/test/ThinLTO/X86/newpm-basic.ll
+++ b/test/ThinLTO/X86/newpm-basic.ll
@@ -1,7 +1,7 @@
; RUN: opt -module-summary %s -o %t1.bc
; RUN: llvm-lto2 run %t1.bc -o %t.o \
; RUN: -r=%t1.bc,_tinkywinky,pxl \
-; RUN: -lto-use-new-pm
+; RUN: -use-new-pm
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.11.0"
diff --git a/test/Transforms/CodeExtractor/cost.ll b/test/Transforms/CodeExtractor/cost.ll
new file mode 100644
index 000000000000..4ac5acee019a
--- /dev/null
+++ b/test/Transforms/CodeExtractor/cost.ll
@@ -0,0 +1,64 @@
+; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
+; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
+define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
+bb:
+; ptr != null is predicted to be true
+ %tmp = icmp ne i32* %arg, null
+ br i1 %tmp, label %bb8, label %bb1
+
+; bb1 is not likely
+bb1: ; preds = %bb
+ %tmp2 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp3 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp4 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp5 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp6 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp7 = tail call i32 @foo(i32* nonnull %arg)
+ br label %bb8
+
+bb8: ; preds = %bb1, %bb
+ %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
+ ret i32 %tmp9
+}
+
+define i32 @outline_region_likely(i32* %arg) local_unnamed_addr {
+bb:
+; ptr == null is predicted to be false
+ %tmp = icmp eq i32* %arg, null
+ br i1 %tmp, label %bb8, label %bb1
+
+; bb1 is likely
+bb1: ; preds = %bb
+ %tmp2 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp3 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp4 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp5 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp6 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp7 = tail call i32 @foo(i32* nonnull %arg)
+ br label %bb8
+
+bb8: ; preds = %bb1, %bb
+ %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
+ ret i32 %tmp9
+}
+
+declare i32 @foo(i32* %arg)
+
+define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
+; CHECK-LABEL: @dummy_caller
+ %tmp = call i32 @outline_region_notlikely(i32* %arg)
+; CHECK: call void @outline_region_notlikely.2_bb1
+ %tmp2 = tail call i32 @outline_region_likely(i32* %arg)
+; CHECK: %tmp2 = tail call i32 @outline_region_likely(i32* %arg)
+ ret i32 %tmp
+
+}
+
+; CHECK-LABEL: define internal void @outline_region_notlikely.2_bb1(i32* %arg) {
+; CHECK-NEXT: newFuncRoot:
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
diff --git a/test/Transforms/CodeExtractor/cost_meta.ll b/test/Transforms/CodeExtractor/cost_meta.ll
new file mode 100644
index 000000000000..2e4467a8d0c9
--- /dev/null
+++ b/test/Transforms/CodeExtractor/cost_meta.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S < %s -partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
+; RUN: opt -S < %s -passes=partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
+define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
+bb:
+; ptr != null is predicted to be true
+ %tmp = icmp ne i32* %arg, null
+ br i1 %tmp, label %bb8, label %bb1, !prof !2
+
+; bb1 is not likely
+bb1: ; preds = %bb
+ %tmp2 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp3 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp4 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp5 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp6 = tail call i32 @foo(i32* nonnull %arg)
+ %tmp7 = tail call i32 @foo(i32* nonnull %arg)
+ br label %bb8
+
+bb8: ; preds = %bb1, %bb
+ %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
+ ret i32 %tmp9
+}
+
+define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
+; CHECK-LABEL: @dummy_caller
+ %tmp = call i32 @outline_region_notlikely(i32* %arg)
+ ret i32 %tmp
+ }
+
+
+; CHECK-LABEL: define internal void @outline_region_notlikely.1_bb1(i32* %arg) {
+; CHECK-NEXT: newFuncRoot:
+
+declare i32 @foo(i32 * %arg)
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!"branch_weights", i32 2000, i32 1}
diff --git a/test/Transforms/Coroutines/coro-split-02.ll b/test/Transforms/Coroutines/coro-split-02.ll
index 953c25088652..4dc8921cd69a 100644
--- a/test/Transforms/Coroutines/coro-split-02.ll
+++ b/test/Transforms/Coroutines/coro-split-02.ll
@@ -1,5 +1,6 @@
; Tests that coro-split can handle the case when a code after coro.suspend uses
; a value produces between coro.save and coro.suspend (%Result.i19)
+; and checks whether stray coro.saves are properly removed
; RUN: opt < %s -coro-split -S | FileCheck %s
%"struct.std::coroutine_handle" = type { i8* }
@@ -24,9 +25,10 @@ entry:
i8 1, label %exit
]
await.ready:
+ %StrayCoroSave = call token @llvm.coro.save(i8* null)
%val = load i32, i32* %Result.i19
call void @print(i32 %val)
- br label %exit
+ br label %exit
exit:
call i1 @llvm.coro.end(i8* null, i1 false)
ret void
@@ -35,6 +37,7 @@ exit:
; CHECK-LABEL: @a.resume(
; CHECK: getelementptr inbounds %a.Frame
; CHECK-NEXT: getelementptr inbounds %"struct.lean_future<int>::Awaiter"
+; CHECK-NOT: call token @llvm.coro.save(i8* null)
; CHECK-NEXT: %val = load i32, i32* %Result
; CHECK-NEXT: call void @print(i32 %val)
; CHECK-NEXT: ret void
diff --git a/test/Transforms/Inline/AArch64/switch.ll b/test/Transforms/Inline/AArch64/switch.ll
index 96d6bf2db682..a530ba734705 100644
--- a/test/Transforms/Inline/AArch64/switch.ll
+++ b/test/Transforms/Inline/AArch64/switch.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -inline -inline-threshold=20 -S -mtriple=aarch64-none-linux -inline-generic-switch-cost=true | FileCheck %s
-; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S -mtriple=aarch64-none-linux -inline-generic-switch-cost=true | FileCheck %s
+; RUN: opt < %s -inline -inline-threshold=20 -S -mtriple=aarch64-none-linux | FileCheck %s
+; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S -mtriple=aarch64-none-linux | FileCheck %s
define i32 @callee_range(i32 %a, i32* %P) {
switch i32 %a, label %sw.default [
diff --git a/test/Transforms/InstCombine/not.ll b/test/Transforms/InstCombine/not.ll
index 6ff0a50318d2..8352c07a816b 100644
--- a/test/Transforms/InstCombine/not.ll
+++ b/test/Transforms/InstCombine/not.ll
@@ -33,17 +33,46 @@ define i1 @invert_fcmp(float %X, float %Y) {
; PR2298
-define zeroext i8 @test6(i32 %a, i32 %b) {
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 %b, %a
-; CHECK-NEXT: [[RETVAL67:%.*]] = zext i1 [[TMP3]] to i8
-; CHECK-NEXT: ret i8 [[RETVAL67]]
+define i1 @not_not_cmp(i32 %a, i32 %b) {
+; CHECK-LABEL: @not_not_cmp(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 %b, %a
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %nota = xor i32 %a, -1
+ %notb = xor i32 %b, -1
+ %cmp = icmp slt i32 %nota, %notb
+ ret i1 %cmp
+}
+
+define <2 x i1> @not_not_cmp_vector(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @not_not_cmp_vector(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i32> %b, %a
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %nota = xor <2 x i32> %a, <i32 -1, i32 -1>
+ %notb = xor <2 x i32> %b, <i32 -1, i32 -1>
+ %cmp = icmp ugt <2 x i32> %nota, %notb
+ ret <2 x i1> %cmp
+}
+
+define i1 @not_cmp_constant(i32 %a) {
+; CHECK-LABEL: @not_cmp_constant(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %a, -43
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %nota = xor i32 %a, -1
+ %cmp = icmp ugt i32 %nota, 42
+ ret i1 %cmp
+}
+
+define <2 x i1> @not_cmp_constant_vector(<2 x i32> %a) {
+; CHECK-LABEL: @not_cmp_constant_vector(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i32> %a, <i32 -43, i32 -43>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
;
- %tmp1not = xor i32 %a, -1
- %tmp2not = xor i32 %b, -1
- %tmp3 = icmp slt i32 %tmp1not, %tmp2not
- %retval67 = zext i1 %tmp3 to i8
- ret i8 %retval67
+ %nota = xor <2 x i32> %a, <i32 -1, i32 -1>
+ %cmp = icmp slt <2 x i32> %nota, <i32 42, i32 42>
+ ret <2 x i1> %cmp
}
define <2 x i1> @test7(<2 x i32> %A, <2 x i32> %B) {
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index d6f1b634102f..20ebd36991a5 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -1278,3 +1278,19 @@ define void @icmp_slt_sge_or(i32 %Ax, i32 %Bx) {
; CHECK: call void @helper_i1(i1 true)
ret void
}
+
+define i1 @constant_fold_inttoptr_null() {
+; CHECK-LABEL: @constant_fold_inttoptr_null(
+; CHECK-NEXT: ret i1 false
+;
+ %x = icmp eq i32* inttoptr (i64 32 to i32*), null
+ ret i1 %x
+}
+
+define i1 @constant_fold_null_inttoptr() {
+; CHECK-LABEL: @constant_fold_null_inttoptr(
+; CHECK-NEXT: ret i1 false
+;
+ %x = icmp eq i32* null, inttoptr (i64 32 to i32*)
+ ret i1 %x
+}
diff --git a/test/Transforms/LowerExpectIntrinsic/phi_merge.ll b/test/Transforms/LowerExpectIntrinsic/phi_merge.ll
new file mode 100644
index 000000000000..3b407c0f3a5a
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/phi_merge.ll
@@ -0,0 +1,356 @@
+; RUN: opt -lower-expect -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s
+
+; The C case
+; if (__builtin_expect((x > goo() && y > hoo() && z > too()), 1))
+; For the above case, all 3 branches should be annotated.
+;
+; if (__builtin_expect((x > goo() && y > hoo() && z > too()), 0))
+; For the above case, we don't have enough information, so
+; only the last branch is annotated.
+
+define void @foo(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo
+bb:
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp sgt i32 %tmp8, %arg
+ br i1 %tmp9, label %bb10, label %bb18
+; CHECK: !prof [[WEIGHT:![0-9]+]]
+
+bb10: ; preds = %bb
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %arg1, %tmp12
+ br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13, {{.*}}!prof [[WEIGHT]]
+
+bb14: ; preds = %bb10
+ %tmp16 = call i32 @too()
+ %tmp17 = icmp sgt i32 %arg2, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb10, %bb
+ %tmp19 = phi i1 [ false, %bb10 ], [ false, %bb ], [ %tmp17, %bb14 ]
+ %tmp20 = xor i1 %tmp19, true
+ %tmp21 = xor i1 %tmp20, true
+ %tmp22 = zext i1 %tmp21 to i32
+ %tmp23 = sext i32 %tmp22 to i64
+ %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 1)
+ %tmp25 = icmp ne i64 %tmp24, 0
+ br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26: ; preds = %bb18
+ %tmp27 = call i32 @goo()
+ br label %bb30
+
+bb28: ; preds = %bb18
+ %tmp29 = call i32 @hoo()
+ br label %bb30
+
+bb30: ; preds = %bb28, %bb26
+ ret void
+}
+
+define void @foo2(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo2
+bb:
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp sgt i32 %tmp8, %arg
+ br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9
+; CHECK-NOT: !prof
+
+bb10: ; preds = %bb
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %arg1, %tmp12
+ br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13
+; CHECK-NOT: !prof
+
+bb14: ; preds = %bb10
+ %tmp16 = call i32 @too()
+ %tmp17 = icmp sgt i32 %arg2, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb10, %bb
+ %tmp19 = phi i1 [ false, %bb10 ], [ false, %bb ], [ %tmp17, %bb14 ]
+ %tmp20 = xor i1 %tmp19, true
+ %tmp21 = xor i1 %tmp20, true
+ %tmp22 = zext i1 %tmp21 to i32
+ %tmp23 = sext i32 %tmp22 to i64
+ %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 0)
+ %tmp25 = icmp ne i64 %tmp24, 0
+ br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT2:![0-9]+]]
+
+bb26: ; preds = %bb18
+ %tmp27 = call i32 @goo()
+ br label %bb30
+
+bb28: ; preds = %bb18
+ %tmp29 = call i32 @hoo()
+ br label %bb30
+
+bb30: ; preds = %bb28, %bb26
+ ret void
+}
+
+define void @foo_i32(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo_i32
+bb:
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp sgt i32 %tmp8, %arg
+ br i1 %tmp9, label %bb10, label %bb18
+; CHECK: !prof [[WEIGHT]]
+
+bb10: ; preds = %bb
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %arg1, %tmp12
+ br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13, {{.*}}!prof [[WEIGHT]]
+
+bb14: ; preds = %bb10
+ %tmp16 = call i32 @too()
+ %tmp17 = icmp sgt i32 %arg2, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb10, %bb
+ %tmp19 = phi i32 [ 5, %bb10 ], [ 5, %bb ], [ %tmp16, %bb14 ]
+ %tmp23 = sext i32 %tmp19 to i64
+ %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4)
+ %tmp25 = icmp ne i64 %tmp24, 0
+ br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26: ; preds = %bb18
+ %tmp27 = call i32 @goo()
+ br label %bb30
+
+bb28: ; preds = %bb18
+ %tmp29 = call i32 @hoo()
+ br label %bb30
+
+bb30: ; preds = %bb28, %bb26
+ ret void
+}
+
+
+define void @foo_i32_not_unlikely(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo_i32_not_unlikely
+bb:
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp sgt i32 %tmp8, %arg
+ br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9
+; CHECK-NOT: !prof
+
+bb10: ; preds = %bb
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %arg1, %tmp12
+ br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13
+; CHECK-NOT: !prof
+
+bb14: ; preds = %bb10
+ %tmp16 = call i32 @too()
+ %tmp17 = icmp sgt i32 %arg2, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb10, %bb
+ %tmp19 = phi i32 [ 4, %bb10 ], [ 4, %bb ], [ %tmp16, %bb14 ]
+ %tmp23 = sext i32 %tmp19 to i64
+ %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4)
+ %tmp25 = icmp ne i64 %tmp24, 0
+ br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26: ; preds = %bb18
+ %tmp27 = call i32 @goo()
+ br label %bb30
+
+bb28: ; preds = %bb18
+ %tmp29 = call i32 @hoo()
+ br label %bb30
+
+bb30: ; preds = %bb28, %bb26
+ ret void
+}
+
+define void @foo_i32_xor(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo_i32_xor
+bb:
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp sgt i32 %tmp8, %arg
+ br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9,{{.*}}!prof [[WEIGHT]]
+
+bb10: ; preds = %bb
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %arg1, %tmp12
+ br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13,{{.*}}!prof [[WEIGHT]]
+
+bb14: ; preds = %bb10
+ %tmp16 = call i32 @too()
+ %tmp17 = icmp sgt i32 %arg2, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb10, %bb
+ %tmp19 = phi i32 [ 6, %bb10 ], [ 6, %bb ], [ %tmp16, %bb14 ]
+ %tmp20 = xor i32 %tmp19, 3
+ %tmp23 = sext i32 %tmp20 to i64
+ %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4)
+ %tmp25 = icmp ne i64 %tmp24, 0
+ br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26: ; preds = %bb18
+ %tmp27 = call i32 @goo()
+ br label %bb30
+
+bb28: ; preds = %bb18
+ %tmp29 = call i32 @hoo()
+ br label %bb30
+bb30: ; preds = %bb28, %bb26
+ ret void
+}
+
+define void @foo_i8_sext(i32 %arg, i32 %arg1, i8 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo_i8_sext
+bb:
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp sgt i32 %tmp8, %arg
+ br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9,{{.*}}!prof [[WEIGHT]]
+
+bb10: ; preds = %bb
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %arg1, %tmp12
+ br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13,{{.*}}!prof [[WEIGHT]]
+
+bb14: ; preds = %bb10
+ %tmp16 = call i8 @too8()
+ %tmp17 = icmp sgt i8 %arg2, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb10, %bb
+ %tmp19 = phi i8 [ 255, %bb10 ], [ 255, %bb ], [ %tmp16, %bb14 ]
+ %tmp23 = sext i8 %tmp19 to i64
+; after sign extension, the operand value becomes -1 which does not match 255
+ %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 255)
+ %tmp25 = icmp ne i64 %tmp24, 0
+ br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26: ; preds = %bb18
+ %tmp27 = call i32 @goo()
+ br label %bb30
+
+bb28: ; preds = %bb18
+ %tmp29 = call i32 @hoo()
+ br label %bb30
+bb30: ; preds = %bb28, %bb26
+ ret void
+}
+
+define void @foo_i8_sext_not_unlikely(i32 %arg, i32 %arg1, i8 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo_i8_sext_not_unlikely
+bb:
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp sgt i32 %tmp8, %arg
+ br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9
+; CHECK-NOT: !prof
+
+bb10: ; preds = %bb
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %arg1, %tmp12
+ br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13
+; CHECK-NOT: !prof
+
+bb14: ; preds = %bb10
+ %tmp16 = call i8 @too8()
+ %tmp17 = icmp sgt i8 %arg2, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb10, %bb
+ %tmp19 = phi i8 [ 255, %bb10 ], [ 255, %bb ], [ %tmp16, %bb14 ]
+ %tmp23 = sext i8 %tmp19 to i64
+; after sign extension, the operand value becomes -1 which matches -1
+ %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 -1)
+ %tmp25 = icmp ne i64 %tmp24, 0
+ br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26: ; preds = %bb18
+ %tmp27 = call i32 @goo()
+ br label %bb30
+
+bb28: ; preds = %bb18
+ %tmp29 = call i32 @hoo()
+ br label %bb30
+bb30: ; preds = %bb28, %bb26
+ ret void
+}
+
+
+define void @foo_i32_xor_not_unlikely(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo_i32_xor_not_unlikely
+bb:
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp sgt i32 %tmp8, %arg
+ br i1 %tmp9, label %bb10, label %bb18
+; CHECK: br i1 %tmp9
+; CHECK-NOT: !prof
+
+bb10: ; preds = %bb
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %arg1, %tmp12
+ br i1 %tmp13, label %bb14, label %bb18
+; CHECK: br i1 %tmp13
+; CHECK-NOT: !prof
+
+bb14: ; preds = %bb10
+ %tmp16 = call i32 @too()
+ %tmp17 = icmp sgt i32 %arg2, %tmp16
+ br label %bb18
+
+bb18: ; preds = %bb14, %bb10, %bb
+ %tmp19 = phi i32 [ 6, %bb10 ], [ 6, %bb ], [ %tmp16, %bb14 ]
+ %tmp20 = xor i32 %tmp19, 2
+ %tmp23 = sext i32 %tmp20 to i64
+ %tmp24 = call i64 @llvm.expect.i64(i64 %tmp23, i64 4)
+ %tmp25 = icmp ne i64 %tmp24, 0
+ br i1 %tmp25, label %bb26, label %bb28
+; CHECK: br i1 %tmp25,{{.*}}!prof [[WEIGHT]]
+
+bb26: ; preds = %bb18
+ %tmp27 = call i32 @goo()
+ br label %bb30
+
+bb28: ; preds = %bb18
+ %tmp29 = call i32 @hoo()
+ br label %bb30
+
+bb30: ; preds = %bb28, %bb26
+ ret void
+}
+
+declare i32 @goo()
+
+declare i32 @hoo()
+
+declare i32 @too()
+
+declare i8 @too8()
+
+; Function Attrs: nounwind readnone
+declare i64 @llvm.expect.i64(i64, i64)
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 (trunk 302965)"}
+; CHECK: [[WEIGHT]] = !{!"branch_weights", i32 2000, i32 1}
+; CHECK: [[WEIGHT2]] = !{!"branch_weights", i32 1, i32 2000}
diff --git a/test/Transforms/LowerExpectIntrinsic/phi_or.ll b/test/Transforms/LowerExpectIntrinsic/phi_or.ll
new file mode 100644
index 000000000000..849baef3dca8
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/phi_or.ll
@@ -0,0 +1,103 @@
+; RUN: opt -lower-expect -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s
+;
+; if (__builtin_expect((x > goo() || y > hoo()), 1)) {
+; ..
+; }
+; For the above case, only the second branch should be
+; annotated.
+; if (__builtin_expect((x > goo() || y > hoo()), 0)) {
+; ..
+; }
+; For the above case, two branches should be annotated.
+; Function Attrs: noinline nounwind uwtable
+define void @foo(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo
+bb:
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp slt i32 %arg, %tmp8
+ br i1 %tmp9, label %bb14, label %bb10
+; CHECK: br i1 %tmp9
+; CHECK-NOT: br i1 %tmp9{{.*}}!prof
+
+bb10: ; preds = %bb
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %arg1, %tmp12
+ br label %bb14
+
+bb14: ; preds = %bb10, %bb
+ %tmp15 = phi i1 [ true, %bb ], [ %tmp13, %bb10 ]
+ %tmp16 = zext i1 %tmp15 to i32
+ %tmp17 = sext i32 %tmp16 to i64
+ %expect = call i64 @llvm.expect.i64(i64 %tmp17, i64 1)
+ %tmp18 = icmp ne i64 %expect, 0
+ br i1 %tmp18, label %bb19, label %bb21
+; CHECK: br i1 %tmp18{{.*}}!prof [[WEIGHT:![0-9]+]]
+
+bb19: ; preds = %bb14
+ %tmp20 = call i32 @goo()
+ br label %bb23
+
+bb21: ; preds = %bb14
+ %tmp22 = call i32 @hoo()
+ br label %bb23
+
+bb23: ; preds = %bb21, %bb19
+ ret void
+}
+
+define void @foo2(i32 %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
+; CHECK-LABEL: void @foo2
+bb:
+ %tmp = alloca i32, align 4
+ %tmp4 = alloca i32, align 4
+ %tmp5 = alloca i32, align 4
+ %tmp6 = alloca i32, align 4
+ store i32 %arg, i32* %tmp, align 4
+ store i32 %arg1, i32* %tmp4, align 4
+ store i32 %arg2, i32* %tmp5, align 4
+ store i32 %arg3, i32* %tmp6, align 4
+ %tmp7 = load i32, i32* %tmp, align 4
+ %tmp8 = call i32 @goo()
+ %tmp9 = icmp slt i32 %tmp7, %tmp8
+ br i1 %tmp9, label %bb14, label %bb10
+; CHECK: br i1 %tmp9{{.*}}!prof [[WEIGHT2:![0-9]+]]
+
+bb10: ; preds = %bb
+ %tmp11 = load i32, i32* %tmp5, align 4
+ %tmp12 = call i32 @hoo()
+ %tmp13 = icmp sgt i32 %tmp11, %tmp12
+ br label %bb14
+
+bb14: ; preds = %bb10, %bb
+ %tmp15 = phi i1 [ true, %bb ], [ %tmp13, %bb10 ]
+ %tmp16 = zext i1 %tmp15 to i32
+ %tmp17 = sext i32 %tmp16 to i64
+ %expect = call i64 @llvm.expect.i64(i64 %tmp17, i64 0)
+ %tmp18 = icmp ne i64 %expect, 0
+ br i1 %tmp18, label %bb19, label %bb21
+; CHECK: br i1 %tmp18{{.*}}!prof [[WEIGHT2]]
+
+bb19: ; preds = %bb14
+ %tmp20 = call i32 @goo()
+ br label %bb23
+
+bb21: ; preds = %bb14
+ %tmp22 = call i32 @hoo()
+ br label %bb23
+
+bb23: ; preds = %bb21, %bb19
+ ret void
+}
+
+declare i32 @goo()
+declare i32 @hoo()
+declare i64 @llvm.expect.i64(i64, i64)
+
+
+!llvm.ident = !{!0}
+
+
+!0 = !{!"clang version 5.0.0 (trunk 302965)"}
+; CHECK: [[WEIGHT]] = !{!"branch_weights", i32 2000, i32 1}
+; CHECK: [[WEIGHT2]] = !{!"branch_weights", i32 1, i32 2000}
diff --git a/test/Transforms/LowerExpectIntrinsic/phi_tern.ll b/test/Transforms/LowerExpectIntrinsic/phi_tern.ll
new file mode 100644
index 000000000000..3c603d51b438
--- /dev/null
+++ b/test/Transforms/LowerExpectIntrinsic/phi_tern.ll
@@ -0,0 +1,56 @@
+; RUN: opt -lower-expect -S -o - < %s | FileCheck %s
+; RUN: opt -S -passes='function(lower-expect)' < %s | FileCheck %s
+
+; return __builtin_expect((a > b ? 1, goo(), 0);
+;
+; Function Attrs: noinline nounwind uwtable
+define i32 @foo(i32 %arg, i32 %arg1) {
+; CHECK-LABEL: i32 @foo
+bb:
+ %tmp5 = icmp sgt i32 %arg, %arg1
+ br i1 %tmp5, label %bb9, label %bb7
+; CHECK: br i1 %tmp5{{.*}}!prof [[WEIGHT:![0-9]+]]
+
+bb7: ; preds = %bb
+ %tmp8 = call i32 @goo()
+ br label %bb9
+
+bb9: ; preds = %bb7, %bb9
+ %tmp10 = phi i32 [ 1, %bb ], [ %tmp8, %bb7 ]
+ %tmp11 = sext i32 %tmp10 to i64
+ %expect = call i64 @llvm.expect.i64(i64 %tmp11, i64 0)
+ %tmp12 = trunc i64 %expect to i32
+ ret i32 %tmp12
+}
+
+define i32 @foo2(i32 %arg, i32 %arg1) {
+bb:
+ %tmp5 = icmp sgt i32 %arg, %arg1
+ br i1 %tmp5, label %bb6, label %bb7
+; CHECK: br i1 %tmp5{{.*}}!prof [[WEIGHT:![0-9]+]]
+
+bb6: ; preds = %bb
+ br label %bb9
+
+bb7: ; preds = %bb
+ %tmp8 = call i32 @goo()
+ br label %bb9
+
+bb9: ; preds = %bb7, %bb6
+ %tmp10 = phi i32 [ 1, %bb6 ], [ %tmp8, %bb7 ]
+ %tmp11 = sext i32 %tmp10 to i64
+ %expect = call i64 @llvm.expect.i64(i64 %tmp11, i64 0)
+ %tmp12 = trunc i64 %expect to i32
+ ret i32 %tmp12
+}
+
+declare i32 @goo()
+declare i64 @llvm.expect.i64(i64, i64)
+
+
+
+!llvm.ident = !{!0}
+
+!0 = !{!"clang version 5.0.0 (trunk 302965)"}
+
+; CHECK: [[WEIGHT]] = !{!"branch_weights", i32 1, i32 2000}
diff --git a/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml b/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml
index b7a1d208fc6f..cfac37986bda 100644
--- a/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml
+++ b/test/Transforms/LowerTypeTests/Inputs/import-unsat.yaml
@@ -1,7 +1,8 @@
---
GlobalValueMap:
42:
- - TypeTests: [123]
+ - Live: true
+ TypeTests: [123]
TypeIdMap:
typeid1:
TTRes:
diff --git a/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml b/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml
new file mode 100644
index 000000000000..7baa02ada86c
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/Inputs/use-typeid1-dead.yaml
@@ -0,0 +1,7 @@
+---
+GlobalValueMap:
+ 42:
+ - Live: false
+ TypeTests: [14276520915468743435] # guid("typeid1")
+WithGlobalValueDeadStripping: true
+...
diff --git a/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml b/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml
index 031b2e8de04e..f30257cfc0d4 100644
--- a/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml
+++ b/test/Transforms/LowerTypeTests/Inputs/use-typeid1-typeid2.yaml
@@ -1,5 +1,6 @@
---
GlobalValueMap:
42:
- - TypeTests: [14276520915468743435, 15427464259790519041] # guid("typeid1"), guid("typeid2")
+ - Live: true
+ TypeTests: [14276520915468743435, 15427464259790519041] # guid("typeid1"), guid("typeid2")
...
diff --git a/test/Transforms/LowerTypeTests/export-dead.ll b/test/Transforms/LowerTypeTests/export-dead.ll
new file mode 100644
index 000000000000..265402b34a6e
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/export-dead.ll
@@ -0,0 +1,14 @@
+; The only use of "typeid1" is in a dead function. Export nothing.
+
+; RUN: opt -S -lowertypetests -lowertypetests-summary-action=export -lowertypetests-read-summary=%S/Inputs/use-typeid1-dead.yaml -lowertypetests-write-summary=%t < %s | FileCheck %s
+; RUN: FileCheck --check-prefix=SUMMARY %s < %t
+
+@foo = constant i32 42, !type !0
+
+!0 = !{i32 0, !"typeid1"}
+
+; CHECK-NOT: @__typeid_typeid1_global_addr =
+
+; SUMMARY: TypeIdMap:
+; SUMMARY-NEXT: WithGlobalValueDeadStripping: true
+; SUMMARY-NEXT: ...
diff --git a/test/Transforms/LowerTypeTests/export-nothing.ll b/test/Transforms/LowerTypeTests/export-nothing.ll
index 9ab41b5f6cb6..8ad331539942 100644
--- a/test/Transforms/LowerTypeTests/export-nothing.ll
+++ b/test/Transforms/LowerTypeTests/export-nothing.ll
@@ -4,4 +4,5 @@
; CHECK: ---
; CHECK-NEXT: GlobalValueMap:
; CHECK-NEXT: TypeIdMap:
+; CHECK-NEXT: WithGlobalValueDeadStripping: false
; CHECK-NEXT: ...
diff --git a/test/Transforms/LowerTypeTests/import-unsat.ll b/test/Transforms/LowerTypeTests/import-unsat.ll
index 76b244001986..6cb9b26fb574 100644
--- a/test/Transforms/LowerTypeTests/import-unsat.ll
+++ b/test/Transforms/LowerTypeTests/import-unsat.ll
@@ -4,7 +4,10 @@
; SUMMARY: GlobalValueMap:
; SUMMARY-NEXT: 42:
-; SUMMARY-NEXT: - TypeTests: [ 123 ]
+; SUMMARY-NEXT: - Linkage: 0
+; SUMMARY-NEXT: NotEligibleToImport: false
+; SUMMARY-NEXT: Live: true
+; SUMMARY-NEXT: TypeTests: [ 123 ]
; SUMMARY-NEXT: TypeIdMap:
; SUMMARY-NEXT: typeid1:
; SUMMARY-NEXT: TTRes:
diff --git a/test/Transforms/SROA/address-spaces.ll b/test/Transforms/SROA/address-spaces.ll
index 119f2252d95e..8fba30c2720f 100644
--- a/test/Transforms/SROA/address-spaces.ll
+++ b/test/Transforms/SROA/address-spaces.ll
@@ -83,3 +83,21 @@ define void @pr27557() {
store i32 addrspace(3)* @l, i32 addrspace(3)** %3, align 8
ret void
}
+
+; Make sure pre-splitting doesn't try to introduce an illegal bitcast
+define float @presplit(i64 addrspace(1)* %p) {
+entry:
+; CHECK-LABEL: @presplit(
+; CHECK: %[[CAST:.*]] = bitcast i64 addrspace(1)* {{.*}} to i32 addrspace(1)*
+; CHECK: load i32, i32 addrspace(1)* %[[CAST]]
+ %b = alloca i64
+ %b.cast = bitcast i64* %b to [2 x float]*
+ %b.gep1 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 0
+ %b.gep2 = getelementptr [2 x float], [2 x float]* %b.cast, i32 0, i32 1
+ %l = load i64, i64 addrspace(1)* %p
+ store i64 %l, i64* %b
+ %f1 = load float, float* %b.gep1
+ %f2 = load float, float* %b.gep2
+ %ret = fadd float %f1, %f2
+ ret float %ret
+}
diff --git a/test/Transforms/Util/PredicateInfo/condprop.ll b/test/Transforms/Util/PredicateInfo/condprop.ll
index 61f59f03e1bc..496bb8385217 100644
--- a/test/Transforms/Util/PredicateInfo/condprop.ll
+++ b/test/Transforms/Util/PredicateInfo/condprop.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -print-predicateinfo -analyze < %s 2>&1 | FileCheck %s
-; RUN: opt -print-predicateinfo -analyze -reverse-iterate < %s 2>&1 | FileCheck %s
@a = external global i32 ; <i32*> [#uses=7]
diff --git a/test/Transforms/Util/PredicateInfo/condprop2.ll b/test/Transforms/Util/PredicateInfo/condprop2.ll
new file mode 100644
index 000000000000..415fa7c879e3
--- /dev/null
+++ b/test/Transforms/Util/PredicateInfo/condprop2.ll
@@ -0,0 +1,474 @@
+; REQUIRES: asserts
+; NOTE: The flag -reverse-iterate is present only in a +Asserts build.
+; Hence, this test has been split from condprop.ll to test with -reverse-iterate.
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -print-predicateinfo -analyze -reverse-iterate < %s 2>&1 | FileCheck %s
+
+@a = external global i32 ; <i32*> [#uses=7]
+
+define i32 @test1() nounwind {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 4
+; CHECK-NEXT: br i1 [[TMP1]], label [[BB:%.*]], label [[BB1:%.*]]
+; CHECK: bb:
+; CHECK-NEXT: br label [[BB8:%.*]]
+; CHECK: bb1:
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 5
+; CHECK-NEXT: br i1 [[TMP3]], label [[BB2:%.*]], label [[BB3:%.*]]
+; CHECK: bb2:
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb3:
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 4
+; CHECK-NEXT: br i1 [[TMP5]], label [[BB4:%.*]], label [[BB5:%.*]]
+; CHECK: bb4:
+; CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[TMP6]], 5
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb5:
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 5
+; CHECK-NEXT: br i1 [[TMP9]], label [[BB6:%.*]], label [[BB7:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[TMP10]], 4
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb7:
+; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* @a, align 4
+; CHECK-NEXT: br label [[BB8]]
+; CHECK: bb8:
+; CHECK-NEXT: [[DOT0:%.*]] = phi i32 [ [[TMP12]], [[BB7]] ], [ [[TMP11]], [[BB6]] ], [ [[TMP7]], [[BB4]] ], [ 4, [[BB2]] ], [ 5, [[BB]] ]
+; CHECK-NEXT: br label [[RETURN:%.*]]
+; CHECK: return:
+; CHECK-NEXT: ret i32 [[DOT0]]
+;
+entry:
+ %0 = load i32, i32* @a, align 4
+ %1 = icmp eq i32 %0, 4
+ br i1 %1, label %bb, label %bb1
+
+bb: ; preds = %entry
+ br label %bb8
+
+bb1: ; preds = %entry
+ %2 = load i32, i32* @a, align 4
+ %3 = icmp eq i32 %2, 5
+ br i1 %3, label %bb2, label %bb3
+
+bb2: ; preds = %bb1
+ br label %bb8
+
+bb3: ; preds = %bb1
+ %4 = load i32, i32* @a, align 4
+ %5 = icmp eq i32 %4, 4
+ br i1 %5, label %bb4, label %bb5
+
+bb4: ; preds = %bb3
+ %6 = load i32, i32* @a, align 4
+ %7 = add i32 %6, 5
+ br label %bb8
+
+bb5: ; preds = %bb3
+ %8 = load i32, i32* @a, align 4
+ %9 = icmp eq i32 %8, 5
+ br i1 %9, label %bb6, label %bb7
+
+bb6: ; preds = %bb5
+ %10 = load i32, i32* @a, align 4
+ %11 = add i32 %10, 4
+ br label %bb8
+
+bb7: ; preds = %bb5
+ %12 = load i32, i32* @a, align 4
+ br label %bb8
+
+bb8: ; preds = %bb7, %bb6, %bb4, %bb2, %bb
+ %.0 = phi i32 [ %12, %bb7 ], [ %11, %bb6 ], [ %7, %bb4 ], [ 4, %bb2 ], [ 5, %bb ]
+ br label %return
+
+return: ; preds = %bb8
+ ret i32 %.0
+}
+
+declare void @foo(i1)
+declare void @bar(i32)
+
+define void @test3(i32 %x, i32 %y) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
+; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]]
+; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK: [[XZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]])
+; CHECK: [[YZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]])
+; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]])
+; CHECK-NEXT: br i1 [[Z]], label [[BOTH_ZERO:%.*]], label [[NOPE:%.*]]
+; CHECK: both_zero:
+; CHECK-NEXT: call void @foo(i1 [[XZ_0]])
+; CHECK-NEXT: call void @foo(i1 [[YZ_0]])
+; CHECK-NEXT: call void @bar(i32 [[X_0]])
+; CHECK-NEXT: call void @bar(i32 [[Y_0]])
+; CHECK-NEXT: ret void
+; CHECK: nope:
+; CHECK-NEXT: call void @foo(i1 [[Z_0]])
+; CHECK-NEXT: ret void
+;
+ %xz = icmp eq i32 %x, 0
+ %yz = icmp eq i32 %y, 0
+ %z = and i1 %xz, %yz
+ br i1 %z, label %both_zero, label %nope
+both_zero:
+ call void @foo(i1 %xz)
+ call void @foo(i1 %yz)
+ call void @bar(i32 %x)
+ call void @bar(i32 %y)
+ ret void
+nope:
+ call void @foo(i1 %z)
+ ret void
+}
+
+define void @test4(i1 %b, i32 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: br i1 [[B:%.*]], label [[SW:%.*]], label [[CASE3:%.*]]
+; CHECK: sw:
+; CHECK: i32 0, label [[CASE0:%.*]]
+; CHECK-NEXT: i32 1, label [[CASE1:%.*]]
+; CHECK-NEXT: i32 2, label [[CASE0]]
+; CHECK-NEXT: i32 3, label [[CASE3]]
+; CHECK-NEXT: i32 4, label [[DEFAULT:%.*]]
+; CHECK-NEXT: ] Edge: [label [[SW]],label %case1] }
+; CHECK-NEXT: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X:%.*]])
+; CHECK-NEXT: switch i32 [[X]], label [[DEFAULT]] [
+; CHECK-NEXT: i32 0, label [[CASE0]]
+; CHECK-NEXT: i32 1, label [[CASE1]]
+; CHECK-NEXT: i32 2, label [[CASE0]]
+; CHECK-NEXT: i32 3, label [[CASE3]]
+; CHECK-NEXT: i32 4, label [[DEFAULT]]
+; CHECK-NEXT: ]
+; CHECK: default:
+; CHECK-NEXT: call void @bar(i32 [[X]])
+; CHECK-NEXT: ret void
+; CHECK: case0:
+; CHECK-NEXT: call void @bar(i32 [[X]])
+; CHECK-NEXT: ret void
+; CHECK: case1:
+; CHECK-NEXT: call void @bar(i32 [[X_0]])
+; CHECK-NEXT: ret void
+; CHECK: case3:
+; CHECK-NEXT: call void @bar(i32 [[X]])
+; CHECK-NEXT: ret void
+;
+ br i1 %b, label %sw, label %case3
+sw:
+ switch i32 %x, label %default [
+ i32 0, label %case0
+ i32 1, label %case1
+ i32 2, label %case0
+ i32 3, label %case3
+ i32 4, label %default
+ ]
+default:
+ call void @bar(i32 %x)
+ ret void
+case0:
+ call void @bar(i32 %x)
+ ret void
+case1:
+ call void @bar(i32 %x)
+ ret void
+case3:
+ call void @bar(i32 %x)
+ ret void
+}
+
+define i1 @test5(i32 %x, i32 %y) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]]
+; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK: [[Y_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK: same:
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[X_0]], [[Y_0]]
+; CHECK-NEXT: ret i1 [[CMP2]]
+; CHECK: different:
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[X_1]], [[Y_1]]
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+ %cmp = icmp eq i32 %x, %y
+ br i1 %cmp, label %same, label %different
+
+same:
+ %cmp2 = icmp ne i32 %x, %y
+ ret i1 %cmp2
+
+different:
+ %cmp3 = icmp eq i32 %x, %y
+ ret i1 %cmp3
+}
+
+define i1 @test6(i32 %x, i32 %y) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: [[CMP2:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK: same:
+; CHECK-NEXT: ret i1 [[CMP2]]
+; CHECK: different:
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+ %cmp2 = icmp ne i32 %x, %y
+ %cmp = icmp eq i32 %x, %y
+ %cmp3 = icmp eq i32 %x, %y
+ br i1 %cmp, label %same, label %different
+
+same:
+ ret i1 %cmp2
+
+different:
+ ret i1 %cmp3
+}
+
+define i1 @test6_fp(float %x, float %y) {
+; CHECK-LABEL: @test6_fp(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp une float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[X]], [[Y]]
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp oeq float [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK: same:
+; CHECK-NEXT: ret i1 [[CMP2]]
+; CHECK: different:
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+ %cmp2 = fcmp une float %x, %y
+ %cmp = fcmp oeq float %x, %y
+ %cmp3 = fcmp oeq float %x, %y
+ br i1 %cmp, label %same, label %different
+
+same:
+ ret i1 %cmp2
+
+different:
+ ret i1 %cmp3
+}
+
+define i1 @test7(i32 %x, i32 %y) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X:%.*]], [[Y:%.*]]
+; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK: [[Y_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK: same:
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[X_0]], [[Y_0]]
+; CHECK-NEXT: ret i1 [[CMP2]]
+; CHECK: different:
+; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X_1]], [[Y_1]]
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+ %cmp = icmp sgt i32 %x, %y
+ br i1 %cmp, label %same, label %different
+
+same:
+ %cmp2 = icmp sle i32 %x, %y
+ ret i1 %cmp2
+
+different:
+ %cmp3 = icmp sgt i32 %x, %y
+ ret i1 %cmp3
+}
+
+define i1 @test7_fp(float %x, float %y) {
+; CHECK-LABEL: @test7_fp(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
+; CHECK: [[X_0:%.*]] = call float @llvm.ssa.copy.f32(float [[X]])
+; CHECK: [[X_1:%.*]] = call float @llvm.ssa.copy.f32(float [[X]])
+; CHECK: [[Y_0:%.*]] = call float @llvm.ssa.copy.f32(float [[Y]])
+; CHECK: [[Y_1:%.*]] = call float @llvm.ssa.copy.f32(float [[Y]])
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK: same:
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ule float [[X_0]], [[Y_0]]
+; CHECK-NEXT: ret i1 [[CMP2]]
+; CHECK: different:
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X_1]], [[Y_1]]
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+ %cmp = fcmp ogt float %x, %y
+ br i1 %cmp, label %same, label %different
+
+same:
+ %cmp2 = fcmp ule float %x, %y
+ ret i1 %cmp2
+
+different:
+ %cmp3 = fcmp ogt float %x, %y
+ ret i1 %cmp3
+}
+
+define i1 @test8(i32 %x, i32 %y) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT: [[CMP3:%.*]] = icmp sgt i32 [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK: same:
+; CHECK-NEXT: ret i1 [[CMP2]]
+; CHECK: different:
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+ %cmp2 = icmp sle i32 %x, %y
+ %cmp = icmp sgt i32 %x, %y
+ %cmp3 = icmp sgt i32 %x, %y
+ br i1 %cmp, label %same, label %different
+
+same:
+ ret i1 %cmp2
+
+different:
+ ret i1 %cmp3
+}
+
+define i1 @test8_fp(float %x, float %y) {
+; CHECK-LABEL: @test8_fp(
+; CHECK-NEXT: [[CMP2:%.*]] = fcmp ule float [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X]], [[Y]]
+; CHECK-NEXT: [[CMP3:%.*]] = fcmp ogt float [[X]], [[Y]]
+; CHECK-NEXT: br i1 [[CMP]], label [[SAME:%.*]], label [[DIFFERENT:%.*]]
+; CHECK: same:
+; CHECK-NEXT: ret i1 [[CMP2]]
+; CHECK: different:
+; CHECK-NEXT: ret i1 [[CMP3]]
+;
+ %cmp2 = fcmp ule float %x, %y
+ %cmp = fcmp ogt float %x, %y
+ %cmp3 = fcmp ogt float %x, %y
+ br i1 %cmp, label %same, label %different
+
+same:
+ ret i1 %cmp2
+
+different:
+ ret i1 %cmp3
+}
+
+define i32 @test9(i32 %i, i32 %j) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]]
+; CHECK: [[I_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[I]])
+; CHECK: [[J_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[J]])
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]]
+; CHECK: cond_true:
+; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]]
+; CHECK-NEXT: ret i32 [[DIFF]]
+; CHECK: ret:
+; CHECK-NEXT: ret i32 5
+;
+ %cmp = icmp eq i32 %i, %j
+ br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+ %diff = sub i32 %i, %j
+ ret i32 %diff
+
+ret:
+ ret i32 5
+}
+
+define i32 @test10(i32 %j, i32 %i) {
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[I:%.*]], [[J:%.*]]
+; CHECK: [[J_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[J]])
+; CHECK: [[I_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[I]])
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[RET:%.*]]
+; CHECK: cond_true:
+; CHECK-NEXT: [[DIFF:%.*]] = sub i32 [[I_0]], [[J_0]]
+; CHECK-NEXT: ret i32 [[DIFF]]
+; CHECK: ret:
+; CHECK-NEXT: ret i32 5
+;
+ %cmp = icmp eq i32 %i, %j
+ br i1 %cmp, label %cond_true, label %ret
+
+cond_true:
+ %diff = sub i32 %i, %j
+ ret i32 %diff
+
+ret:
+ ret i32 5
+}
+
+declare i32 @yogibar()
+
+define i32 @test11(i32 %x) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: [[V0:%.*]] = call i32 @yogibar()
+; CHECK-NEXT: [[V1:%.*]] = call i32 @yogibar()
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V0]], [[V1]]
+; CHECK: [[V0_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V0]])
+; CHECK: [[V1_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V1]])
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[NEXT:%.*]]
+; CHECK: cond_true:
+; CHECK-NEXT: ret i32 [[V1_0]]
+; CHECK: next:
+; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i32 [[X:%.*]], [[V0_0]]
+; CHECK: [[V0_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[V0_0]])
+; CHECK-NEXT: br i1 [[CMP2]], label [[COND_TRUE2:%.*]], label [[NEXT2:%.*]]
+; CHECK: cond_true2:
+; CHECK-NEXT: ret i32 [[V0_0_1]]
+; CHECK: next2:
+; CHECK-NEXT: ret i32 0
+;
+ %v0 = call i32 @yogibar()
+ %v1 = call i32 @yogibar()
+ %cmp = icmp eq i32 %v0, %v1
+ br i1 %cmp, label %cond_true, label %next
+
+cond_true:
+ ret i32 %v1
+
+next:
+ %cmp2 = icmp eq i32 %x, %v0
+ br i1 %cmp2, label %cond_true2, label %next2
+
+cond_true2:
+ ret i32 %v0
+
+next2:
+ ret i32 0
+}
+
+define i32 @test12(i32 %x) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[X_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; CHECK: cond_true:
+; CHECK-NEXT: br label [[RET:%.*]]
+; CHECK: cond_false:
+; CHECK-NEXT: br label [[RET]]
+; CHECK: ret:
+; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[X_0]], [[COND_TRUE]] ], [ [[X_1]], [[COND_FALSE]] ]
+; CHECK-NEXT: ret i32 [[RES]]
+;
+ %cmp = icmp eq i32 %x, 0
+ br i1 %cmp, label %cond_true, label %cond_false
+
+cond_true:
+ br label %ret
+
+cond_false:
+ br label %ret
+
+ret:
+ %res = phi i32 [ %x, %cond_true ], [ %x, %cond_false ]
+ ret i32 %res
+}
diff --git a/test/Transforms/Util/PredicateInfo/testandor.ll b/test/Transforms/Util/PredicateInfo/testandor.ll
index 43c508670908..c1048cf6d0f6 100644
--- a/test/Transforms/Util/PredicateInfo/testandor.ll
+++ b/test/Transforms/Util/PredicateInfo/testandor.ll
@@ -1,6 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -print-predicateinfo < %s 2>&1 | FileCheck %s
-; RUN: opt -print-predicateinfo -reverse-iterate < %s 2>&1 | FileCheck %s
declare void @foo(i1)
declare void @bar(i32)
diff --git a/test/Transforms/Util/PredicateInfo/testandor2.ll b/test/Transforms/Util/PredicateInfo/testandor2.ll
new file mode 100644
index 000000000000..a03250c2f7a0
--- /dev/null
+++ b/test/Transforms/Util/PredicateInfo/testandor2.ll
@@ -0,0 +1,214 @@
+; REQUIRES: asserts
+; NOTE: The flag -reverse-iterate is present only in a +Asserts build.
+; Hence, this test has been split from testandor.ll to test with -reverse-iterate.
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -print-predicateinfo -reverse-iterate < %s 2>&1 | FileCheck %s
+
+declare void @foo(i1)
+declare void @bar(i32)
+declare void @llvm.assume(i1)
+
+define void @testor(i32 %x, i32 %y) {
+; CHECK-LABEL: @testor(
+; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
+; CHECK-NEXT: [[Z:%.*]] = or i1 [[XZ]], [[YZ]]
+; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK: [[XZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]])
+; CHECK: [[YZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]])
+; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]])
+; CHECK-NEXT: br i1 [[Z]], label [[ONEOF:%.*]], label [[NEITHER:%.*]]
+; CHECK: oneof:
+; CHECK-NEXT: call void @foo(i1 [[XZ]])
+; CHECK-NEXT: call void @foo(i1 [[YZ]])
+; CHECK-NEXT: call void @bar(i32 [[X]])
+; CHECK-NEXT: call void @bar(i32 [[Y]])
+; CHECK-NEXT: ret void
+; CHECK: neither:
+; CHECK-NEXT: call void @foo(i1 [[XZ_0]])
+; CHECK-NEXT: call void @foo(i1 [[YZ_0]])
+; CHECK-NEXT: call void @bar(i32 [[X_0]])
+; CHECK-NEXT: call void @bar(i32 [[Y_0]])
+; CHECK-NEXT: call void @foo(i1 [[Z_0]])
+; CHECK-NEXT: ret void
+;
+ %xz = icmp eq i32 %x, 0
+ %yz = icmp eq i32 %y, 0
+ %z = or i1 %xz, %yz
+ br i1 %z, label %oneof, label %neither
+oneof:
+;; Should not insert on the true edge for or
+ call void @foo(i1 %xz)
+ call void @foo(i1 %yz)
+ call void @bar(i32 %x)
+ call void @bar(i32 %y)
+ ret void
+neither:
+ call void @foo(i1 %xz)
+ call void @foo(i1 %yz)
+ call void @bar(i32 %x)
+ call void @bar(i32 %y)
+ call void @foo(i1 %z)
+ ret void
+}
+define void @testand(i32 %x, i32 %y) {
+; CHECK-LABEL: @testand(
+; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
+; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]]
+; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[Y_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK: [[XZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]])
+; CHECK: [[YZ_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]])
+; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]])
+; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK: both:
+; CHECK-NEXT: call void @foo(i1 [[XZ_0]])
+; CHECK-NEXT: call void @foo(i1 [[YZ_0]])
+; CHECK-NEXT: call void @bar(i32 [[X_0]])
+; CHECK-NEXT: call void @bar(i32 [[Y_0]])
+; CHECK-NEXT: ret void
+; CHECK: nope:
+; CHECK-NEXT: call void @foo(i1 [[XZ]])
+; CHECK-NEXT: call void @foo(i1 [[YZ]])
+; CHECK-NEXT: call void @bar(i32 [[X]])
+; CHECK-NEXT: call void @bar(i32 [[Y]])
+; CHECK-NEXT: call void @foo(i1 [[Z_0]])
+; CHECK-NEXT: ret void
+;
+ %xz = icmp eq i32 %x, 0
+ %yz = icmp eq i32 %y, 0
+ %z = and i1 %xz, %yz
+ br i1 %z, label %both, label %nope
+both:
+ call void @foo(i1 %xz)
+ call void @foo(i1 %yz)
+ call void @bar(i32 %x)
+ call void @bar(i32 %y)
+ ret void
+nope:
+;; Should not insert on the false edge for and
+ call void @foo(i1 %xz)
+ call void @foo(i1 %yz)
+ call void @bar(i32 %x)
+ call void @bar(i32 %y)
+ call void @foo(i1 %z)
+ ret void
+}
+define void @testandsame(i32 %x, i32 %y) {
+; CHECK-LABEL: @testandsame(
+; CHECK-NEXT: [[XGT:%.*]] = icmp sgt i32 [[X:%.*]], 0
+; CHECK-NEXT: [[XLT:%.*]] = icmp slt i32 [[X]], 100
+; CHECK-NEXT: [[Z:%.*]] = and i1 [[XGT]], [[XLT]]
+; CHECK: [[X_0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[X_0_1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X_0]])
+; CHECK: [[XGT_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XGT]])
+; CHECK: [[XLT_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XLT]])
+; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]])
+; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK: both:
+; CHECK-NEXT: call void @foo(i1 [[XGT_0]])
+; CHECK-NEXT: call void @foo(i1 [[XLT_0]])
+; CHECK-NEXT: call void @bar(i32 [[X_0_1]])
+; CHECK-NEXT: ret void
+; CHECK: nope:
+; CHECK-NEXT: call void @foo(i1 [[XGT]])
+; CHECK-NEXT: call void @foo(i1 [[XLT]])
+; CHECK-NEXT: call void @foo(i1 [[Z_0]])
+; CHECK-NEXT: ret void
+;
+ %xgt = icmp sgt i32 %x, 0
+ %xlt = icmp slt i32 %x, 100
+ %z = and i1 %xgt, %xlt
+ br i1 %z, label %both, label %nope
+both:
+ call void @foo(i1 %xgt)
+ call void @foo(i1 %xlt)
+ call void @bar(i32 %x)
+ ret void
+nope:
+ call void @foo(i1 %xgt)
+ call void @foo(i1 %xlt)
+ call void @foo(i1 %z)
+ ret void
+}
+
+define void @testandassume(i32 %x, i32 %y) {
+; CHECK-LABEL: @testandassume(
+; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
+; CHECK-NEXT: [[Z:%.*]] = and i1 [[XZ]], [[YZ]]
+; CHECK: [[TMP1:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[X]])
+; CHECK: [[TMP2:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[Y]])
+; CHECK: [[TMP3:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[XZ]])
+; CHECK: [[TMP4:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[YZ]])
+; CHECK: [[TMP5:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]])
+; CHECK-NEXT: call void @llvm.assume(i1 [[TMP5]])
+; CHECK: [[DOT0:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[TMP1]])
+; CHECK: [[DOT01:%.*]] = call i32 @llvm.ssa.copy.i32(i32 [[TMP2]])
+; CHECK: [[DOT02:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[TMP3]])
+; CHECK: [[DOT03:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[TMP4]])
+; CHECK: [[DOT04:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[TMP5]])
+; CHECK-NEXT: br i1 [[TMP5]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK: both:
+; CHECK-NEXT: call void @foo(i1 [[DOT02]])
+; CHECK-NEXT: call void @foo(i1 [[DOT03]])
+; CHECK-NEXT: call void @bar(i32 [[DOT0]])
+; CHECK-NEXT: call void @bar(i32 [[DOT01]])
+; CHECK-NEXT: ret void
+; CHECK: nope:
+; CHECK-NEXT: call void @foo(i1 [[DOT04]])
+; CHECK-NEXT: ret void
+;
+ %xz = icmp eq i32 %x, 0
+ %yz = icmp eq i32 %y, 0
+ %z = and i1 %xz, %yz
+ call void @llvm.assume(i1 %z)
+ br i1 %z, label %both, label %nope
+both:
+ call void @foo(i1 %xz)
+ call void @foo(i1 %yz)
+ call void @bar(i32 %x)
+ call void @bar(i32 %y)
+ ret void
+nope:
+ call void @foo(i1 %z)
+ ret void
+}
+
+;; Unlike and/or for branches, assume is *always* true, so we only match and for it
+define void @testorassume(i32 %x, i32 %y) {
+;
+; CHECK-LABEL: @testorassume(
+; CHECK-NEXT: [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT: [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
+; CHECK-NEXT: [[Z:%.*]] = or i1 [[XZ]], [[YZ]]
+; CHECK-NEXT: call void @llvm.assume(i1 [[Z]])
+; CHECK: [[Z_0:%.*]] = call i1 @llvm.ssa.copy.i1(i1 [[Z]])
+; CHECK-NEXT: br i1 [[Z]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK: both:
+; CHECK-NEXT: call void @foo(i1 [[XZ]])
+; CHECK-NEXT: call void @foo(i1 [[YZ]])
+; CHECK-NEXT: call void @bar(i32 [[X]])
+; CHECK-NEXT: call void @bar(i32 [[Y]])
+; CHECK-NEXT: ret void
+; CHECK: nope:
+; CHECK-NEXT: call void @foo(i1 [[Z_0]])
+; CHECK-NEXT: ret void
+;
+ %xz = icmp eq i32 %x, 0
+ %yz = icmp eq i32 %y, 0
+ %z = or i1 %xz, %yz
+ call void @llvm.assume(i1 %z)
+ br i1 %z, label %both, label %nope
+both:
+ call void @foo(i1 %xz)
+ call void @foo(i1 %yz)
+ call void @bar(i32 %x)
+ call void @bar(i32 %y)
+ ret void
+nope:
+ call void @foo(i1 %z)
+ ret void
+}
diff --git a/test/Transforms/WholeProgramDevirt/Inputs/export.yaml b/test/Transforms/WholeProgramDevirt/Inputs/export.yaml
index 0f6f59de7522..71cf38b216c7 100644
--- a/test/Transforms/WholeProgramDevirt/Inputs/export.yaml
+++ b/test/Transforms/WholeProgramDevirt/Inputs/export.yaml
@@ -1,7 +1,8 @@
---
GlobalValueMap:
42:
- - TypeTestAssumeVCalls:
+ - Live: true
+ TypeTestAssumeVCalls:
- GUID: 14276520915468743435 # typeid1
Offset: 0
TypeCheckedLoadVCalls:
diff --git a/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml b/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml
index 1cb3ad3f134c..30159c5012b0 100644
--- a/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml
+++ b/test/Transforms/WholeProgramDevirt/Inputs/import-indir.yaml
@@ -1,7 +1,8 @@
---
GlobalValueMap:
42:
- - TypeTestAssumeVCalls:
+ - Live: true
+ TypeTestAssumeVCalls:
- GUID: 123
Offset: 0
- GUID: 456
diff --git a/test/Transforms/WholeProgramDevirt/export-nothing.ll b/test/Transforms/WholeProgramDevirt/export-nothing.ll
index e0814efbf9c0..4707eaa17ead 100644
--- a/test/Transforms/WholeProgramDevirt/export-nothing.ll
+++ b/test/Transforms/WholeProgramDevirt/export-nothing.ll
@@ -4,4 +4,5 @@
; CHECK: ---
; CHECK-NEXT: GlobalValueMap:
; CHECK-NEXT: TypeIdMap:
+; CHECK-NEXT: WithGlobalValueDeadStripping: false
; CHECK-NEXT: ...
diff --git a/test/Transforms/WholeProgramDevirt/export-single-impl.ll b/test/Transforms/WholeProgramDevirt/export-single-impl.ll
index f4f3fd054c46..15de77381ed1 100644
--- a/test/Transforms/WholeProgramDevirt/export-single-impl.ll
+++ b/test/Transforms/WholeProgramDevirt/export-single-impl.ll
@@ -38,6 +38,7 @@
; SUMMARY-NEXT: Kind: SingleImpl
; SUMMARY-NEXT: SingleImplName: 'vf4$merged'
; SUMMARY-NEXT: ResByArg:
+; SUMMARY-NEXT: WithGlobalValueDeadStripping: false
; SUMMARY-NEXT: ...
; CHECK: @vt1 = constant void (i8*)* @vf1
diff --git a/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll b/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
index 1d7030c41fd0..11b1c5de4d83 100644
--- a/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
+++ b/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
@@ -1,8 +1,7 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s
; RUN: FileCheck --check-prefix=SUMMARY %s < %t
-; SUMMARY: - TypeTests:
-; SUMMARY-NEXT: TypeTestAssumeVCalls:
+; SUMMARY-NOT: TypeTests:
; SUMMARY: TypeIdMap:
; SUMMARY-NEXT: typeid4:
diff --git a/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll b/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
index 174a573b5b0d..0878d01cce03 100644
--- a/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
+++ b/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
@@ -1,8 +1,7 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -S -o - %s | FileCheck %s
; RUN: FileCheck --check-prefix=SUMMARY %s < %t
-; SUMMARY: - TypeTests:
-; SUMMARY-NEXT: TypeTestAssumeVCalls:
+; SUMMARY-NOT: TypeTests:
; SUMMARY: TypeIdMap:
; SUMMARY-NEXT: typeid3:
diff --git a/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll b/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll
index 0785ade28570..3132444a9f36 100644
--- a/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll
+++ b/test/Transforms/WholeProgramDevirt/export-unsuccessful-checked.ll
@@ -1,7 +1,7 @@
; RUN: opt -wholeprogramdevirt -wholeprogramdevirt-summary-action=export -wholeprogramdevirt-read-summary=%S/Inputs/export.yaml -wholeprogramdevirt-write-summary=%t -o /dev/null %s
; RUN: FileCheck %s < %t
-; CHECK: - TypeTests: [ 15427464259790519041, 17525413373118030901 ]
+; CHECK: TypeTests: [ 15427464259790519041, 17525413373118030901 ]
; CHECK-NEXT: TypeTestAssumeVCalls:
@vt1a = constant void (i8*)* @vf1a, !type !0
diff --git a/test/Transforms/WholeProgramDevirt/import-indir.ll b/test/Transforms/WholeProgramDevirt/import-indir.ll
index 1de9352eeb22..73c982b17893 100644
--- a/test/Transforms/WholeProgramDevirt/import-indir.ll
+++ b/test/Transforms/WholeProgramDevirt/import-indir.ll
@@ -4,7 +4,9 @@
; SUMMARY: GlobalValueMap:
; SUMMARY-NEXT: 42:
-; SUMMARY-NEXT: - TypeTests:
+; SUMMARY-NEXT: - Linkage: 0
+; SUMMARY-NEXT: NotEligibleToImport: false
+; SUMMARY-NEXT: Live: true
; SUMMARY-NEXT: TypeTestAssumeVCalls:
; SUMMARY-NEXT: - GUID: 123
; SUMMARY-NEXT: Offset: 0
diff --git a/test/tools/llvm-lto2/X86/pipeline.ll b/test/tools/llvm-lto2/X86/pipeline.ll
index dbec9ab22527..7effb0c801b9 100644
--- a/test/tools/llvm-lto2/X86/pipeline.ll
+++ b/test/tools/llvm-lto2/X86/pipeline.ll
@@ -8,7 +8,7 @@
; Try the new pass manager LTO default pipeline (make sure the option
; is accepted).
-; RUN: llvm-lto2 run %t1.bc -o %t.o -lto-use-new-pm -r %t1.bc,patatino,px
+; RUN: llvm-lto2 run %t1.bc -o %t.o -use-new-pm -r %t1.bc,patatino,px
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/tools/llc/llc.cpp b/tools/llc/llc.cpp
index 589005943045..e10d112dcf90 100644
--- a/tools/llc/llc.cpp
+++ b/tools/llc/llc.cpp
@@ -304,6 +304,9 @@ int main(int argc, char **argv) {
initializeScalarizeMaskedMemIntrinPass(*Registry);
initializeExpandReductionsPass(*Registry);
+ // Initialize debugging passes.
+ initializeScavengerTestPass(*Registry);
+
// Register the target printer for --version.
cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
diff --git a/tools/llvm-config/llvm-config.cpp b/tools/llvm-config/llvm-config.cpp
index 888da7143c9f..08b096afb052 100644
--- a/tools/llvm-config/llvm-config.cpp
+++ b/tools/llvm-config/llvm-config.cpp
@@ -333,7 +333,7 @@ int main(int argc, char **argv) {
} else {
ActivePrefix = CurrentExecPrefix;
ActiveIncludeDir = ActivePrefix + "/include";
- SmallString<PATH_MAX> path(StringRef(LLVM_TOOLS_INSTALL_DIR));
+ SmallString<256> path(StringRef(LLVM_TOOLS_INSTALL_DIR));
sys::fs::make_absolute(ActivePrefix, path);
ActiveBinDir = path.str();
ActiveLibDir = ActivePrefix + "/lib" + LLVM_LIBDIR_SUFFIX;
diff --git a/tools/llvm-lto2/llvm-lto2.cpp b/tools/llvm-lto2/llvm-lto2.cpp
index 3d2643db85bd..89f85157e1df 100644
--- a/tools/llvm-lto2/llvm-lto2.cpp
+++ b/tools/llvm-lto2/llvm-lto2.cpp
@@ -99,6 +99,11 @@ static cl::opt<bool> OptRemarksWithHotness(
cl::desc("Whether to include hotness informations in the remarks.\n"
"Has effect only if -pass-remarks-output is specified."));
+static cl::opt<bool>
+ UseNewPM("use-new-pm",
+ cl::desc("Run LTO passes using the new pass manager"),
+ cl::init(false), cl::Hidden);
+
static void check(Error E, std::string Msg) {
if (!E)
return;
@@ -196,6 +201,7 @@ static int run(int argc, char **argv) {
Conf.AAPipeline = AAPipeline;
Conf.OptLevel = OptLevel - '0';
+ Conf.UseNewPM = UseNewPM;
switch (CGOptLevel) {
case '0':
Conf.CGOptLevel = CodeGenOpt::None;
@@ -351,7 +357,7 @@ int main(int argc, char **argv) {
// FIXME: This should use llvm::cl subcommands, but it isn't currently
// possible to pass an argument not associated with a subcommand to a
- // subcommand (e.g. -lto-use-new-pm).
+ // subcommand (e.g. -use-new-pm).
if (argc < 2)
return usage();
diff --git a/tools/llvm-pdbdump/LLVMOutputStyle.cpp b/tools/llvm-pdbdump/LLVMOutputStyle.cpp
index d95eca1aeddb..31c342cd0f5a 100644
--- a/tools/llvm-pdbdump/LLVMOutputStyle.cpp
+++ b/tools/llvm-pdbdump/LLVMOutputStyle.cpp
@@ -483,8 +483,8 @@ Error LLVMOutputStyle::dumpStreamBytes() {
if (SI >= File.getNumStreams())
return make_error<RawError>(raw_error_code::no_stream);
- auto S = MappedBlockStream::createIndexedStream(File.getMsfLayout(),
- File.getMsfBuffer(), SI);
+ auto S = MappedBlockStream::createIndexedStream(
+ File.getMsfLayout(), File.getMsfBuffer(), SI, File.getAllocator());
if (!S)
continue;
DictScope DD(P, "Stream");
@@ -791,7 +791,7 @@ Error LLVMOutputStyle::dumpDbiStream() {
if (HasModuleDI && (ShouldDumpSymbols || opts::raw::DumpLineInfo)) {
auto ModStreamData = MappedBlockStream::createIndexedStream(
File.getMsfLayout(), File.getMsfBuffer(),
- Modi.getModuleStreamIndex());
+ Modi.getModuleStreamIndex(), File.getAllocator());
ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
if (auto EC = ModS.reload())
@@ -804,7 +804,8 @@ Error LLVMOutputStyle::dumpDbiStream() {
auto &Types = *ExpectedTypes;
ListScope SS(P, "Symbols");
- codeview::CVSymbolDumper SD(P, Types, nullptr, false);
+ codeview::CVSymbolDumper SD(P, Types, CodeViewContainer::Pdb, nullptr,
+ false);
bool HadError = false;
for (auto S : ModS.symbols(&HadError)) {
DictScope LL(P, "");
@@ -830,8 +831,7 @@ Error LLVMOutputStyle::dumpDbiStream() {
return ExpectedTypes.takeError();
auto &IpiItems = *ExpectedTypes;
C13RawVisitor V(P, File, IpiItems);
- if (auto EC =
- codeview::visitDebugSubsections(ModS.linesAndChecksums(), V))
+ if (auto EC = codeview::visitDebugSubsections(ModS.subsections(), V))
return EC;
}
}
@@ -952,7 +952,7 @@ Error LLVMOutputStyle::dumpPublicsStream() {
return ExpectedTypes.takeError();
auto &Tpi = *ExpectedTypes;
- codeview::CVSymbolDumper SD(P, Tpi, nullptr, false);
+ codeview::CVSymbolDumper SD(P, Tpi, CodeViewContainer::Pdb, nullptr, false);
bool HadError = false;
for (auto S : Publics->getSymbols(&HadError)) {
DictScope DD(P, "");
diff --git a/tools/llvm-pdbdump/PdbYaml.cpp b/tools/llvm-pdbdump/PdbYaml.cpp
index e288063e2afa..b4a41fbfdb8f 100644
--- a/tools/llvm-pdbdump/PdbYaml.cpp
+++ b/tools/llvm-pdbdump/PdbYaml.cpp
@@ -12,6 +12,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/DebugInfo/CodeView/CVSymbolVisitor.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
+#include "llvm/DebugInfo/CodeView/DebugStringTableSubsection.h"
#include "llvm/DebugInfo/CodeView/SymbolDeserializer.h"
#include "llvm/DebugInfo/CodeView/SymbolVisitorCallbackPipeline.h"
#include "llvm/DebugInfo/CodeView/TypeSerializer.h"
@@ -21,6 +22,7 @@
#include "llvm/DebugInfo/PDB/Native/TpiHashing.h"
#include "llvm/DebugInfo/PDB/PDBExtras.h"
#include "llvm/DebugInfo/PDB/PDBTypes.h"
+#include "llvm/ObjectYAML/CodeViewYAMLDebugSections.h"
#include "llvm/ObjectYAML/CodeViewYAMLTypes.h"
using namespace llvm;
@@ -220,6 +222,6 @@ void MappingTraits<PdbDbiModuleInfo>::mapping(IO &IO, PdbDbiModuleInfo &Obj) {
IO.mapRequired("Module", Obj.Mod);
IO.mapOptional("ObjFile", Obj.Obj, Obj.Mod);
IO.mapOptional("SourceFiles", Obj.SourceFiles);
- IO.mapOptional("LineInfo", Obj.FileLineInfo);
+ IO.mapOptional("Subsections", Obj.Subsections);
IO.mapOptional("Modi", Obj.Modi);
}
diff --git a/tools/llvm-pdbdump/PdbYaml.h b/tools/llvm-pdbdump/PdbYaml.h
index deb500ec2074..62ed608916fc 100644
--- a/tools/llvm-pdbdump/PdbYaml.h
+++ b/tools/llvm-pdbdump/PdbYaml.h
@@ -28,6 +28,9 @@
#include <vector>
namespace llvm {
+namespace codeview {
+class DebugStringTableSubsection;
+}
namespace pdb {
namespace yaml {
@@ -68,7 +71,7 @@ struct PdbDbiModuleInfo {
StringRef Obj;
StringRef Mod;
std::vector<StringRef> SourceFiles;
- Optional<CodeViewYAML::SourceFileInfo> FileLineInfo;
+ std::vector<CodeViewYAML::YAMLDebugSubsection> Subsections;
Optional<PdbModiStream> Modi;
};
diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.cpp b/tools/llvm-pdbdump/YAMLOutputStyle.cpp
index 18839a7679d3..ee72b90b12d1 100644
--- a/tools/llvm-pdbdump/YAMLOutputStyle.cpp
+++ b/tools/llvm-pdbdump/YAMLOutputStyle.cpp
@@ -101,117 +101,6 @@ Error YAMLOutputStyle::dump() {
return Error::success();
}
-namespace {
-class C13YamlVisitor : public C13DebugFragmentVisitor {
-public:
- C13YamlVisitor(CodeViewYAML::SourceFileInfo &Info, PDBFile &F)
- : C13DebugFragmentVisitor(F), Info(Info) {}
-
- Error handleFileChecksums() override {
- for (const auto &C : *Checksums) {
- CodeViewYAML::SourceFileChecksumEntry Entry;
- if (auto Result = getNameFromStringTable(C.FileNameOffset))
- Entry.FileName = *Result;
- else
- return Result.takeError();
-
- Entry.Kind = C.Kind;
- Entry.ChecksumBytes.Bytes = C.Checksum;
- Info.FileChecksums.push_back(Entry);
- }
- return Error::success();
- }
-
- Error handleLines() override {
- for (const auto &LF : Lines) {
- Info.LineFragments.emplace_back();
- auto &Fragment = Info.LineFragments.back();
-
- Fragment.CodeSize = LF.header()->CodeSize;
- Fragment.Flags =
- static_cast<codeview::LineFlags>(uint16_t(LF.header()->Flags));
- Fragment.RelocOffset = LF.header()->RelocOffset;
- Fragment.RelocSegment = LF.header()->RelocSegment;
-
- for (const auto &L : LF) {
- Fragment.Blocks.emplace_back();
- auto &Block = Fragment.Blocks.back();
-
- if (auto Result = getNameFromChecksumsBuffer(L.NameIndex))
- Block.FileName = *Result;
- else
- return Result.takeError();
-
- for (const auto &N : L.LineNumbers) {
- CodeViewYAML::SourceLineEntry Line;
- Line.Offset = N.Offset;
- codeview::LineInfo LI(N.Flags);
- Line.LineStart = LI.getStartLine();
- Line.EndDelta = LI.getLineDelta();
- Line.IsStatement = LI.isStatement();
- Block.Lines.push_back(Line);
- }
-
- if (LF.hasColumnInfo()) {
- for (const auto &C : L.Columns) {
- CodeViewYAML::SourceColumnEntry Column;
- Column.StartColumn = C.StartColumn;
- Column.EndColumn = C.EndColumn;
- Block.Columns.push_back(Column);
- }
- }
- }
- }
- return Error::success();
- }
-
- Error handleInlineeLines() override {
- for (const auto &ILF : InlineeLines) {
- Info.Inlinees.emplace_back();
- auto &Inlinee = Info.Inlinees.back();
-
- Inlinee.HasExtraFiles = ILF.hasExtraFiles();
- for (const auto &IL : ILF) {
- Inlinee.Sites.emplace_back();
- auto &Site = Inlinee.Sites.back();
- if (auto Result = getNameFromChecksumsBuffer(IL.Header->FileID))
- Site.FileName = *Result;
- else
- return Result.takeError();
-
- Site.Inlinee = IL.Header->Inlinee.getIndex();
- Site.SourceLineNum = IL.Header->SourceLineNum;
- if (ILF.hasExtraFiles()) {
- for (const auto &EF : IL.ExtraFiles) {
- if (auto Result = getNameFromChecksumsBuffer(EF))
- Site.ExtraFiles.push_back(*Result);
- else
- return Result.takeError();
- }
- }
- }
- }
- return Error::success();
- }
-
-private:
- CodeViewYAML::SourceFileInfo &Info;
-};
-}
-
-Expected<Optional<CodeViewYAML::SourceFileInfo>>
-YAMLOutputStyle::getFileLineInfo(const pdb::ModuleDebugStreamRef &ModS) {
- if (!ModS.hasLineInfo())
- return None;
-
- CodeViewYAML::SourceFileInfo Info;
- C13YamlVisitor Visitor(Info, File);
- if (auto EC =
- codeview::visitDebugSubsections(ModS.linesAndChecksums(), Visitor))
- return std::move(EC);
-
- return Info;
-}
Error YAMLOutputStyle::dumpFileHeaders() {
if (opts::pdb2yaml::NoFileHeaders)
@@ -236,14 +125,17 @@ Error YAMLOutputStyle::dumpFileHeaders() {
}
Error YAMLOutputStyle::dumpStringTable() {
- if (!opts::pdb2yaml::StringTable)
+ bool RequiresStringTable = opts::pdb2yaml::DbiModuleSourceFileInfo ||
+ opts::pdb2yaml::DbiModuleSourceLineInfo;
+ bool RequestedStringTable = opts::pdb2yaml::StringTable;
+ if (!RequiresStringTable && !RequestedStringTable)
return Error::success();
- Obj.StringTable.emplace();
auto ExpectedST = File.getStringTable();
if (!ExpectedST)
return ExpectedST.takeError();
+ Obj.StringTable.emplace();
const auto &ST = ExpectedST.get();
for (auto ID : ST.name_ids()) {
auto S = ST.getStringForID(ID);
@@ -337,17 +229,30 @@ Error YAMLOutputStyle::dumpDbiStream() {
continue;
auto ModStreamData = msf::MappedBlockStream::createIndexedStream(
- File.getMsfLayout(), File.getMsfBuffer(), ModiStream);
+ File.getMsfLayout(), File.getMsfBuffer(), ModiStream,
+ File.getAllocator());
pdb::ModuleDebugStreamRef ModS(MI, std::move(ModStreamData));
if (auto EC = ModS.reload())
return EC;
- if (opts::pdb2yaml::DbiModuleSourceLineInfo) {
- auto ExpectedInfo = getFileLineInfo(ModS);
- if (!ExpectedInfo)
- return ExpectedInfo.takeError();
- DMI.FileLineInfo = *ExpectedInfo;
+ auto ExpectedST = File.getStringTable();
+ if (!ExpectedST)
+ return ExpectedST.takeError();
+ if (opts::pdb2yaml::DbiModuleSourceLineInfo &&
+ ModS.hasDebugSubsections()) {
+ auto ExpectedChecksums = ModS.findChecksumsSubsection();
+ if (!ExpectedChecksums)
+ return ExpectedChecksums.takeError();
+
+ for (const auto &SS : ModS.subsections()) {
+ auto Converted =
+ CodeViewYAML::YAMLDebugSubsection::fromCodeViewSubection(
+ ExpectedST->getStringTable(), *ExpectedChecksums, SS);
+ if (!Converted)
+ return Converted.takeError();
+ DMI.Subsections.push_back(*Converted);
+ }
}
if (opts::pdb2yaml::DbiModuleSyms) {
diff --git a/tools/llvm-pdbdump/YAMLOutputStyle.h b/tools/llvm-pdbdump/YAMLOutputStyle.h
index 6e4067c48f88..3690e3529d4a 100644
--- a/tools/llvm-pdbdump/YAMLOutputStyle.h
+++ b/tools/llvm-pdbdump/YAMLOutputStyle.h
@@ -27,9 +27,6 @@ public:
Error dump() override;
private:
- Expected<Optional<CodeViewYAML::SourceFileInfo>>
- getFileLineInfo(const pdb::ModuleDebugStreamRef &ModS);
-
Error dumpStringTable();
Error dumpFileHeaders();
Error dumpStreamMetadata();
diff --git a/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp b/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp
index 14cd222d138a..5f09416a9ff6 100644
--- a/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp
+++ b/tools/llvm-pdbdump/fuzzer/llvm-pdbdump-fuzzer.cpp
@@ -85,7 +85,7 @@ extern "C" int LLVMFuzzerTestOneInput(uint8_t *data, size_t size) {
for (auto &Modi : DS.modules()) {
auto ModStreamData = pdb::MappedBlockStream::createIndexedStream(
- Modi.Info.getModuleStreamIndex(), *File);
+ Modi.Info.getModuleStreamIndex(), *File, File->getAllocator());
if (!ModStreamData) {
consumeError(ModStreamData.takeError());
return 0;
diff --git a/tools/llvm-pdbdump/llvm-pdbdump.cpp b/tools/llvm-pdbdump/llvm-pdbdump.cpp
index 0b2b766a3c52..4626de9c4440 100644
--- a/tools/llvm-pdbdump/llvm-pdbdump.cpp
+++ b/tools/llvm-pdbdump/llvm-pdbdump.cpp
@@ -476,7 +476,6 @@ static void yamlToPdb(StringRef Path) {
std::unique_ptr<MemoryBuffer> &Buffer = ErrorOrBuffer.get();
llvm::yaml::Input In(Buffer->getBuffer());
- In.setContext(&Allocator);
pdb::yaml::PdbObject YamlObj(Allocator);
In >> YamlObj;
@@ -535,67 +534,16 @@ static void yamlToPdb(StringRef Path) {
ExitOnErr(DbiBuilder.addModuleSourceFile(MI.Mod, S));
if (MI.Modi.hasValue()) {
const auto &ModiStream = *MI.Modi;
- for (auto Symbol : ModiStream.Symbols)
- ModiBuilder.addSymbol(Symbol.toCodeViewSymbol(Allocator));
- }
- if (MI.FileLineInfo.hasValue()) {
- const auto &FLI = *MI.FileLineInfo;
-
- // File Checksums must be emitted before line information, because line
- // info records use offsets into the checksum buffer to reference a file's
- // source file name.
- auto Checksums = llvm::make_unique<DebugChecksumsSubsection>(Strings);
- auto &ChecksumRef = *Checksums;
- if (!FLI.FileChecksums.empty()) {
- for (auto &FC : FLI.FileChecksums)
- Checksums->addChecksum(FC.FileName, FC.Kind, FC.ChecksumBytes.Bytes);
- }
- ModiBuilder.setC13FileChecksums(std::move(Checksums));
-
- for (const auto &Fragment : FLI.LineFragments) {
- auto Lines =
- llvm::make_unique<DebugLinesSubsection>(ChecksumRef, Strings);
- Lines->setCodeSize(Fragment.CodeSize);
- Lines->setRelocationAddress(Fragment.RelocSegment,
- Fragment.RelocOffset);
- Lines->setFlags(Fragment.Flags);
- for (const auto &LC : Fragment.Blocks) {
- Lines->createBlock(LC.FileName);
- if (Lines->hasColumnInfo()) {
- for (const auto &Item : zip(LC.Lines, LC.Columns)) {
- auto &L = std::get<0>(Item);
- auto &C = std::get<1>(Item);
- uint32_t LE = L.LineStart + L.EndDelta;
- Lines->addLineAndColumnInfo(
- L.Offset, LineInfo(L.LineStart, LE, L.IsStatement),
- C.StartColumn, C.EndColumn);
- }
- } else {
- for (const auto &L : LC.Lines) {
- uint32_t LE = L.LineStart + L.EndDelta;
- Lines->addLineInfo(L.Offset,
- LineInfo(L.LineStart, LE, L.IsStatement));
- }
- }
- }
- ModiBuilder.addC13Fragment(std::move(Lines));
+ for (auto Symbol : ModiStream.Symbols) {
+ ModiBuilder.addSymbol(
+ Symbol.toCodeViewSymbol(Allocator, CodeViewContainer::Pdb));
}
+ }
- for (const auto &Inlinee : FLI.Inlinees) {
- auto Inlinees = llvm::make_unique<DebugInlineeLinesSubsection>(
- ChecksumRef, Inlinee.HasExtraFiles);
- for (const auto &Site : Inlinee.Sites) {
- Inlinees->addInlineSite(TypeIndex(Site.Inlinee), Site.FileName,
- Site.SourceLineNum);
- if (!Inlinee.HasExtraFiles)
- continue;
-
- for (auto EF : Site.ExtraFiles) {
- Inlinees->addExtraFile(EF);
- }
- }
- ModiBuilder.addC13Fragment(std::move(Inlinees));
- }
+ auto CodeViewSubsections =
+ ExitOnErr(CodeViewYAML::convertSubsectionList(MI.Subsections, Strings));
+ for (auto &SS : CodeViewSubsections) {
+ ModiBuilder.addDebugSubsection(std::move(SS));
}
}
diff --git a/tools/llvm-readobj/COFFDumper.cpp b/tools/llvm-readobj/COFFDumper.cpp
index 663f7b4c8a82..bc07bd296ad2 100644
--- a/tools/llvm-readobj/COFFDumper.cpp
+++ b/tools/llvm-readobj/COFFDumper.cpp
@@ -978,7 +978,8 @@ void COFFDumper::printCodeViewSymbolsSubsection(StringRef Subsection,
Subsection.bytes_end());
auto CODD = llvm::make_unique<COFFObjectDumpDelegate>(*this, Section, Obj,
SectionContents);
- CVSymbolDumper CVSD(W, Types, std::move(CODD), opts::CodeViewSubsectionBytes);
+ CVSymbolDumper CVSD(W, Types, CodeViewContainer::ObjectFile, std::move(CODD),
+ opts::CodeViewSubsectionBytes);
CVSymbolArray Symbols;
BinaryStreamReader Reader(BinaryData, llvm::support::little);
if (auto EC = Reader.readArray(Symbols, Reader.getLength())) {
diff --git a/unittests/ADT/SmallVectorTest.cpp b/unittests/ADT/SmallVectorTest.cpp
index 7367ad470e3a..ca6391024f27 100644
--- a/unittests/ADT/SmallVectorTest.cpp
+++ b/unittests/ADT/SmallVectorTest.cpp
@@ -424,6 +424,16 @@ TYPED_TEST(SmallVectorTest, AssignTest) {
this->assertValuesInOrder(this->theVector, 2u, 77, 77);
}
+// Assign test
+TYPED_TEST(SmallVectorTest, AssignRangeTest) {
+ SCOPED_TRACE("AssignTest");
+
+ this->theVector.push_back(Constructable(1));
+ int arr[] = {1, 2, 3};
+ this->theVector.assign(std::begin(arr), std::end(arr));
+ this->assertValuesInOrder(this->theVector, 3u, 1, 2, 3);
+}
+
// Move-assign test
TYPED_TEST(SmallVectorTest, MoveAssignTest) {
SCOPED_TRACE("MoveAssignTest");
diff --git a/unittests/Analysis/CMakeLists.txt b/unittests/Analysis/CMakeLists.txt
index 40d5ea5f5ad7..8082c54b9c66 100644
--- a/unittests/Analysis/CMakeLists.txt
+++ b/unittests/Analysis/CMakeLists.txt
@@ -9,17 +9,18 @@ add_llvm_unittest(AnalysisTests
AliasAnalysisTest.cpp
BlockFrequencyInfoTest.cpp
BranchProbabilityInfoTest.cpp
+ CallGraphTest.cpp
CFGTest.cpp
CGSCCPassManagerTest.cpp
- CallGraphTest.cpp
LazyCallGraphTest.cpp
LoopInfoTest.cpp
MemoryBuiltinsTest.cpp
MemorySSA.cpp
+ OrderedBasicBlockTest.cpp
ProfileSummaryInfoTest.cpp
ScalarEvolutionTest.cpp
- TBAATest.cpp
TargetLibraryInfoTest.cpp
+ TBAATest.cpp
UnrollAnalyzer.cpp
ValueTrackingTest.cpp
)
diff --git a/unittests/Analysis/OrderedBasicBlockTest.cpp b/unittests/Analysis/OrderedBasicBlockTest.cpp
new file mode 100644
index 000000000000..b8b9ff04ce7c
--- /dev/null
+++ b/unittests/Analysis/OrderedBasicBlockTest.cpp
@@ -0,0 +1,58 @@
+//===- OrderedBasicBlockTest.cpp - OrderedBasicBlock unit tests -----------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/OrderedBasicBlock.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/DataTypes.h"
+#include "llvm/Support/SourceMgr.h"
+#include "gtest/gtest.h"
+
+namespace llvm {
+namespace {
+
+class OrderedBasicBlockTest : public testing::Test {
+protected:
+ LLVMContext C;
+
+ std::unique_ptr<Module> makeLLVMModule() {
+ const char *ModuleString = R"(define i32 @f(i32 %x) {
+ %add = add i32 %x, 42
+ ret i32 %add
+ })";
+ SMDiagnostic Err;
+ auto foo = parseAssemblyString(ModuleString, Err, C);
+ return foo;
+ }
+};
+
+TEST_F(OrderedBasicBlockTest, Basic) {
+ auto M = makeLLVMModule();
+ Function *F = M->getFunction("f");
+ BasicBlock::iterator I = F->front().begin();
+ Instruction *Add = &*I++;
+ Instruction *Ret = &*I++;
+
+ OrderedBasicBlock OBB(&F->front());
+ // Intentionally duplicated to verify cached and uncached are the same.
+ EXPECT_FALSE(OBB.dominates(Add, Add));
+ EXPECT_FALSE(OBB.dominates(Add, Add));
+ EXPECT_TRUE(OBB.dominates(Add, Ret));
+ EXPECT_TRUE(OBB.dominates(Add, Ret));
+ EXPECT_FALSE(OBB.dominates(Ret, Add));
+ EXPECT_FALSE(OBB.dominates(Ret, Add));
+ EXPECT_FALSE(OBB.dominates(Ret, Ret));
+ EXPECT_FALSE(OBB.dominates(Ret, Ret));
+}
+
+} // end anonymous namespace
+} // end namespace llvm
diff --git a/unittests/DebugInfo/PDB/MappedBlockStreamTest.cpp b/unittests/DebugInfo/PDB/MappedBlockStreamTest.cpp
index 9d90e265df33..789fe515b018 100644
--- a/unittests/DebugInfo/PDB/MappedBlockStreamTest.cpp
+++ b/unittests/DebugInfo/PDB/MappedBlockStreamTest.cpp
@@ -70,6 +70,8 @@ public:
return MSFStreamLayout{static_cast<uint32_t>(Data.size()), Blocks};
}
+ BumpPtrAllocator Allocator;
+
private:
std::vector<support::ulittle32_t> Blocks;
MutableArrayRef<uint8_t> Data;
@@ -77,7 +79,8 @@ private:
TEST(MappedBlockStreamTest, NumBlocks) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
EXPECT_EQ(F.block_size(), S->getBlockSize());
EXPECT_EQ(F.layout().Blocks.size(), S->getNumBlocks());
@@ -87,7 +90,8 @@ TEST(MappedBlockStreamTest, NumBlocks) {
// and does not allocate.
TEST(MappedBlockStreamTest, ReadBeyondEndOfStreamRef) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
BinaryStreamRef SR;
@@ -102,13 +106,14 @@ TEST(MappedBlockStreamTest, ReadBeyondEndOfStreamRef) {
// does not fail due to the length of the output buffer.
TEST(MappedBlockStreamTest, ReadOntoNonEmptyBuffer) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
StringRef Str = "ZYXWVUTSRQPONMLKJIHGFEDCBA";
EXPECT_NO_ERROR(R.readFixedString(Str, 1));
EXPECT_EQ(Str, StringRef("A"));
- EXPECT_EQ(0U, S->getNumBytesCopied());
+ EXPECT_EQ(0U, F.Allocator.getBytesAllocated());
}
// Tests that a read which crosses a block boundary, but where the subsequent
@@ -116,18 +121,18 @@ TEST(MappedBlockStreamTest, ReadOntoNonEmptyBuffer) {
// not allocate memory.
TEST(MappedBlockStreamTest, ZeroCopyReadContiguousBreak) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(),
- F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
StringRef Str;
EXPECT_NO_ERROR(R.readFixedString(Str, 2));
EXPECT_EQ(Str, StringRef("AB"));
- EXPECT_EQ(0U, S->getNumBytesCopied());
+ EXPECT_EQ(0U, F.Allocator.getBytesAllocated());
R.setOffset(6);
EXPECT_NO_ERROR(R.readFixedString(Str, 4));
EXPECT_EQ(Str, StringRef("GHIJ"));
- EXPECT_EQ(0U, S->getNumBytesCopied());
+ EXPECT_EQ(0U, F.Allocator.getBytesAllocated());
}
// Tests that a read which crosses a block boundary and cannot be referenced
@@ -135,62 +140,67 @@ TEST(MappedBlockStreamTest, ZeroCopyReadContiguousBreak) {
// requested.
TEST(MappedBlockStreamTest, CopyReadNonContiguousBreak) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
StringRef Str;
EXPECT_NO_ERROR(R.readFixedString(Str, 10));
EXPECT_EQ(Str, StringRef("ABCDEFGHIJ"));
- EXPECT_EQ(10U, S->getNumBytesCopied());
+ EXPECT_EQ(10U, F.Allocator.getBytesAllocated());
}
// Test that an out of bounds read which doesn't cross a block boundary
// fails and allocates no memory.
TEST(MappedBlockStreamTest, InvalidReadSizeNoBreak) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
StringRef Str;
R.setOffset(10);
EXPECT_ERROR(R.readFixedString(Str, 1));
- EXPECT_EQ(0U, S->getNumBytesCopied());
+ EXPECT_EQ(0U, F.Allocator.getBytesAllocated());
}
// Test that an out of bounds read which crosses a contiguous block boundary
// fails and allocates no memory.
TEST(MappedBlockStreamTest, InvalidReadSizeContiguousBreak) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
StringRef Str;
R.setOffset(6);
EXPECT_ERROR(R.readFixedString(Str, 5));
- EXPECT_EQ(0U, S->getNumBytesCopied());
+ EXPECT_EQ(0U, F.Allocator.getBytesAllocated());
}
// Test that an out of bounds read which crosses a discontiguous block
// boundary fails and allocates no memory.
TEST(MappedBlockStreamTest, InvalidReadSizeNonContiguousBreak) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
StringRef Str;
EXPECT_ERROR(R.readFixedString(Str, 11));
- EXPECT_EQ(0U, S->getNumBytesCopied());
+ EXPECT_EQ(0U, F.Allocator.getBytesAllocated());
}
// Tests that a read which is entirely contained within a single block but
// beyond the end of a StreamRef fails.
TEST(MappedBlockStreamTest, ZeroCopyReadNoBreak) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
StringRef Str;
EXPECT_NO_ERROR(R.readFixedString(Str, 1));
EXPECT_EQ(Str, StringRef("A"));
- EXPECT_EQ(0U, S->getNumBytesCopied());
+ EXPECT_EQ(0U, F.Allocator.getBytesAllocated());
}
// Tests that a read which is not aligned on the same boundary as a previous
@@ -198,19 +208,20 @@ TEST(MappedBlockStreamTest, ZeroCopyReadNoBreak) {
// previous allocation.
TEST(MappedBlockStreamTest, UnalignedOverlappingRead) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
StringRef Str1;
StringRef Str2;
EXPECT_NO_ERROR(R.readFixedString(Str1, 7));
EXPECT_EQ(Str1, StringRef("ABCDEFG"));
- EXPECT_EQ(7U, S->getNumBytesCopied());
+ EXPECT_EQ(7U, F.Allocator.getBytesAllocated());
R.setOffset(2);
EXPECT_NO_ERROR(R.readFixedString(Str2, 3));
EXPECT_EQ(Str2, StringRef("CDE"));
EXPECT_EQ(Str1.data() + 2, Str2.data());
- EXPECT_EQ(7U, S->getNumBytesCopied());
+ EXPECT_EQ(7U, F.Allocator.getBytesAllocated());
}
// Tests that a read which is not aligned on the same boundary as a previous
@@ -218,18 +229,19 @@ TEST(MappedBlockStreamTest, UnalignedOverlappingRead) {
// still works correctly and allocates again from the shared pool.
TEST(MappedBlockStreamTest, UnalignedOverlappingReadFail) {
DiscontiguousStream F(BlocksAry, DataAry);
- auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F);
+ auto S = MappedBlockStream::createStream(F.block_size(), F.layout(), F,
+ F.Allocator);
BinaryStreamReader R(*S);
StringRef Str1;
StringRef Str2;
EXPECT_NO_ERROR(R.readFixedString(Str1, 6));
EXPECT_EQ(Str1, StringRef("ABCDEF"));
- EXPECT_EQ(6U, S->getNumBytesCopied());
+ EXPECT_EQ(6U, F.Allocator.getBytesAllocated());
R.setOffset(4);
EXPECT_NO_ERROR(R.readFixedString(Str2, 4));
EXPECT_EQ(Str2, StringRef("EFGH"));
- EXPECT_EQ(10U, S->getNumBytesCopied());
+ EXPECT_EQ(10U, F.Allocator.getBytesAllocated());
}
TEST(MappedBlockStreamTest, WriteBeyondEndOfStream) {
@@ -241,8 +253,8 @@ TEST(MappedBlockStreamTest, WriteBeyondEndOfStream) {
"LargeBuffer is not big enough");
DiscontiguousStream F(BlocksAry, Data);
- auto S = WritableMappedBlockStream::createStream(
- F.block_size(), F.layout(), F);
+ auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(),
+ F, F.Allocator);
ArrayRef<uint8_t> Buffer;
EXPECT_ERROR(S->writeBytes(0, ArrayRef<uint8_t>(LargeBuffer)));
@@ -254,8 +266,8 @@ TEST(MappedBlockStreamTest, WriteBeyondEndOfStream) {
TEST(MappedBlockStreamTest, TestWriteBytesNoBreakBoundary) {
static uint8_t Data[] = {'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'};
DiscontiguousStream F(BlocksAry, Data);
- auto S = WritableMappedBlockStream::createStream(
- F.block_size(), F.layout(), F);
+ auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(),
+ F, F.Allocator);
ArrayRef<uint8_t> Buffer;
EXPECT_NO_ERROR(S->readBytes(0, 1, Buffer));
@@ -287,8 +299,8 @@ TEST(MappedBlockStreamTest, TestWriteBytesBreakBoundary) {
'T', 'G', '.', '0', '0'};
DiscontiguousStream F(BlocksAry, Data);
- auto S = WritableMappedBlockStream::createStream(
- F.block_size(), F.layout(), F);
+ auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(),
+ F, F.Allocator);
ArrayRef<uint8_t> Buffer;
EXPECT_NO_ERROR(S->writeBytes(0, TestData));
@@ -306,8 +318,8 @@ TEST(MappedBlockStreamTest, TestWriteThenRead) {
const uint32_t Blocks[] = {2, 1, 0, 6, 3, 4, 5, 7, 9, 8};
DiscontiguousStream F(Blocks, Data);
- auto S = WritableMappedBlockStream::createStream(
- F.block_size(), F.layout(), F);
+ auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(),
+ F, F.Allocator);
enum class MyEnum : uint32_t { Val1 = 2908234, Val2 = 120891234 };
using support::ulittle32_t;
@@ -399,7 +411,7 @@ TEST(MappedBlockStreamTest, TestWriteContiguousStreamRef) {
DiscontiguousStream F(DestBlocks, DestData);
auto DestStream = WritableMappedBlockStream::createStream(
- F.block_size(), F.layout(), F);
+ F.block_size(), F.layout(), F, F.Allocator);
// First write "Test Str" into the source stream.
MutableBinaryByteStream SourceStream(SrcData, little);
@@ -434,9 +446,9 @@ TEST(MappedBlockStreamTest, TestWriteDiscontiguousStreamRef) {
DiscontiguousStream SrcF(SrcBlocks, SrcData);
auto Dest = WritableMappedBlockStream::createStream(
- DestF.block_size(), DestF.layout(), DestF);
+ DestF.block_size(), DestF.layout(), DestF, DestF.Allocator);
auto Src = WritableMappedBlockStream::createStream(
- SrcF.block_size(), SrcF.layout(), SrcF);
+ SrcF.block_size(), SrcF.layout(), SrcF, SrcF.Allocator);
// First write "Test Str" into the source stream.
BinaryStreamWriter SourceWriter(*Src);
@@ -457,4 +469,27 @@ TEST(MappedBlockStreamTest, TestWriteDiscontiguousStreamRef) {
EXPECT_EQ(Result, "Test Str");
}
+TEST(MappedBlockStreamTest, DataLivesAfterStreamDestruction) {
+ std::vector<uint8_t> DataBytes(10);
+ MutableArrayRef<uint8_t> Data(DataBytes);
+ const uint32_t Blocks[] = {2, 1, 0, 6, 3, 4, 5, 7, 9, 8};
+
+ StringRef Str[] = {"Zero Str", ""};
+
+ DiscontiguousStream F(Blocks, Data);
+ {
+ auto S = WritableMappedBlockStream::createStream(F.block_size(), F.layout(),
+ F, F.Allocator);
+
+ BinaryStreamReader Reader(*S);
+ BinaryStreamWriter Writer(*S);
+ ::memset(DataBytes.data(), 0, 10);
+ EXPECT_NO_ERROR(Writer.writeCString(Str[0]));
+ EXPECT_NO_ERROR(Reader.readCString(Str[1]));
+ EXPECT_EQ(Str[0], Str[1]);
+ }
+
+ EXPECT_EQ(Str[0], Str[1]);
+}
+
} // end anonymous namespace
diff --git a/unittests/Transforms/Utils/Cloning.cpp b/unittests/Transforms/Utils/Cloning.cpp
index d13547a842e4..db3d10847cd8 100644
--- a/unittests/Transforms/Utils/Cloning.cpp
+++ b/unittests/Transforms/Utils/Cloning.cpp
@@ -361,7 +361,7 @@ TEST_F(CloneFunc, NewFunctionCreated) {
// Test that a new subprogram entry was added and is pointing to the new
// function, while the original subprogram still points to the old one.
TEST_F(CloneFunc, Subprogram) {
- EXPECT_FALSE(verifyModule(*M));
+ EXPECT_FALSE(verifyModule(*M, &errs()));
EXPECT_EQ(3U, Finder->subprogram_count());
EXPECT_NE(NewFunc->getSubprogram(), OldFunc->getSubprogram());
}
diff --git a/utils/TableGen/X86FoldTablesEmitter.cpp b/utils/TableGen/X86FoldTablesEmitter.cpp
index b89cee2ce4bb..34f5fbc6ea31 100644
--- a/utils/TableGen/X86FoldTablesEmitter.cpp
+++ b/utils/TableGen/X86FoldTablesEmitter.cpp
@@ -101,6 +101,11 @@ const char *const NoFoldSet[] = {
"BTS16rr", "BTS32rr", "BTS64rr",
"BTS16mr", "BTS32mr", "BTS64mr",
+ // insertps cannot be folded without adjusting the immediate. There's custom
+ // code to handle it in X86InstrInfo.cpp, ignore it here.
+ "INSERTPSrr", "INSERTPSrm",
+ "VINSERTPSrr", "VINSERTPSrm", "VINSERTPSZrr", "VINSERTPSZrm",
+
// Memory folding is enabled only when optimizing for size by DAG
// patterns only. (issue detailed in D28744 review)
"VCVTSS2SDrm", "VCVTSS2SDrr",
diff --git a/utils/lit/lit/util.py b/utils/lit/lit/util.py
index 104e9dac464d..8991588a868d 100644
--- a/utils/lit/lit/util.py
+++ b/utils/lit/lit/util.py
@@ -267,6 +267,20 @@ def usePlatformSdkOnDarwin(config, lit_config):
lit_config.note('using SDKROOT: %r' % sdk_path)
config.environment['SDKROOT'] = sdk_path
+def findPlatformSdkVersionOnMacOS(config, lit_config):
+ if 'darwin' in config.target_triple:
+ try:
+ cmd = subprocess.Popen(['xcrun', '--show-sdk-version', '--sdk', 'macosx'],
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ out, err = cmd.communicate()
+ out = out.strip()
+ res = cmd.wait()
+ except OSError:
+ res = -1
+ if res == 0 and out:
+ return out
+ return None
+
def killProcessAndChildren(pid):
"""
This function kills a process with ``pid`` and all its