aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp7
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h7
-rw-r--r--lib/CodeGen/AllocationOrder.cpp7
-rw-r--r--lib/CodeGen/AllocationOrder.h7
-rw-r--r--lib/CodeGen/Analysis.cpp52
-rw-r--r--lib/CodeGen/AntiDepBreaker.h7
-rw-r--r--lib/CodeGen/AsmPrinter/ARMException.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/AccelTable.cpp46
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.cpp31
-rw-r--r--lib/CodeGen/AsmPrinter/AddressPool.h9
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp279
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp31
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp104
-rw-r--r--lib/CodeGen/AsmPrinter/ByteStreamer.h17
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp371
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h24
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.cpp26
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.cpp10
-rw-r--r--lib/CodeGen/AsmPrinter/DIEHash.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp354
-rw-r--r--lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp74
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocEntry.h205
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocStream.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocStream.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp184
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h29
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp668
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h92
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp120
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h86
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h10
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfStringPool.h7
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp197
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h38
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp20
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.h7
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/WasmException.cpp11
-rw-r--r--lib/CodeGen/AsmPrinter/WasmException.h7
-rw-r--r--lib/CodeGen/AsmPrinter/WinCFGuard.cpp7
-rw-r--r--lib/CodeGen/AsmPrinter/WinCFGuard.h7
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp49
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.h10
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp70
-rw-r--r--lib/CodeGen/BasicTargetTransformInfo.cpp7
-rw-r--r--lib/CodeGen/BranchFolding.cpp72
-rw-r--r--lib/CodeGen/BranchFolding.h7
-rw-r--r--lib/CodeGen/BranchRelaxation.cpp7
-rw-r--r--lib/CodeGen/BreakFalseDeps.cpp7
-rw-r--r--lib/CodeGen/BuiltinGCs.cpp7
-rw-r--r--lib/CodeGen/CFIInstrInserter.cpp7
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp7
-rw-r--r--lib/CodeGen/CallingConvLower.cpp7
-rw-r--r--lib/CodeGen/CodeGen.cpp10
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp523
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp7
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h7
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp7
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp15
-rw-r--r--lib/CodeGen/DetectDeadLanes.cpp7
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp11
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp7
-rw-r--r--lib/CodeGen/EdgeBundles.cpp9
-rw-r--r--lib/CodeGen/ExecutionDomainFix.cpp16
-rw-r--r--lib/CodeGen/ExpandMemCmp.cpp68
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp7
-rw-r--r--lib/CodeGen/ExpandReductions.cpp59
-rw-r--r--lib/CodeGen/FEntryInserter.cpp7
-rw-r--r--lib/CodeGen/FaultMaps.cpp7
-rw-r--r--lib/CodeGen/FinalizeISel.cpp (renamed from lib/CodeGen/ExpandISelPseudos.cpp)36
-rw-r--r--lib/CodeGen/FuncletLayout.cpp7
-rw-r--r--lib/CodeGen/GCMetadata.cpp7
-rw-r--r--lib/CodeGen/GCMetadataPrinter.cpp7
-rw-r--r--lib/CodeGen/GCRootLowering.cpp9
-rw-r--r--lib/CodeGen/GCStrategy.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/CSEInfo.cpp47
-rw-r--r--lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp21
-rw-r--r--lib/CodeGen/GlobalISel/CallLowering.cpp154
-rw-r--r--lib/CodeGen/GlobalISel/Combiner.cpp12
-rw-r--r--lib/CodeGen/GlobalISel/CombinerHelper.cpp220
-rw-r--r--lib/CodeGen/GlobalISel/GISelChangeObserver.cpp8
-rw-r--r--lib/CodeGen/GlobalISel/GlobalISel.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp1284
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelect.cpp19
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp19
-rw-r--r--lib/CodeGen/GlobalISel/LegalityPredicates.cpp86
-rw-r--r--lib/CodeGen/GlobalISel/LegalizeMutations.cpp54
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp54
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp2884
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp186
-rw-r--r--lib/CodeGen/GlobalISel/Localizer.cpp233
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp429
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp139
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBank.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp115
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp159
-rw-r--r--lib/CodeGen/GlobalMerge.cpp29
-rw-r--r--lib/CodeGen/HardwareLoops.cpp463
-rw-r--r--lib/CodeGen/IfConversion.cpp9
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp25
-rw-r--r--lib/CodeGen/IndirectBrExpandPass.cpp15
-rw-r--r--lib/CodeGen/InlineSpiller.cpp52
-rw-r--r--lib/CodeGen/InterferenceCache.cpp7
-rw-r--r--lib/CodeGen/InterferenceCache.h7
-rw-r--r--lib/CodeGen/InterleavedAccessPass.cpp19
-rw-r--r--lib/CodeGen/InterleavedLoadCombinePass.cpp10
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp115
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp16
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp7
-rw-r--r--lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp7
-rw-r--r--lib/CodeGen/LexicalScopes.cpp7
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp720
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp181
-rw-r--r--lib/CodeGen/LiveDebugVariables.h7
-rw-r--r--lib/CodeGen/LiveInterval.cpp64
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp7
-rw-r--r--lib/CodeGen/LiveIntervals.cpp13
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp7
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp16
-rw-r--r--lib/CodeGen/LiveRangeCalc.h7
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp9
-rw-r--r--lib/CodeGen/LiveRangeShrink.cpp7
-rw-r--r--lib/CodeGen/LiveRangeUtils.h7
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp7
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp23
-rw-r--r--lib/CodeGen/LiveStacks.cpp7
-rw-r--r--lib/CodeGen/LiveVariables.cpp9
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp23
-rw-r--r--lib/CodeGen/LoopTraversal.cpp7
-rw-r--r--lib/CodeGen/LowLevelType.cpp7
-rw-r--r--lib/CodeGen/LowerEmuTLS.cpp7
-rw-r--r--lib/CodeGen/MIRCanonicalizerPass.cpp65
-rw-r--r--lib/CodeGen/MIRParser/MILexer.cpp8
-rw-r--r--lib/CodeGen/MIRParser/MILexer.h8
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp574
-rw-r--r--lib/CodeGen/MIRParser/MIParser.h125
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp184
-rw-r--r--lib/CodeGen/MIRPrinter.cpp67
-rw-r--r--lib/CodeGen/MIRPrintingPass.cpp7
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp17
-rw-r--r--lib/CodeGen/MachineBlockFrequencyInfo.cpp7
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp398
-rw-r--r--lib/CodeGen/MachineBranchProbabilityInfo.cpp7
-rw-r--r--lib/CodeGen/MachineCSE.cpp181
-rw-r--r--lib/CodeGen/MachineCombiner.cpp26
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp7
-rw-r--r--lib/CodeGen/MachineDominanceFrontier.cpp7
-rw-r--r--lib/CodeGen/MachineDominators.cpp7
-rw-r--r--lib/CodeGen/MachineFrameInfo.cpp18
-rw-r--r--lib/CodeGen/MachineFunction.cpp87
-rw-r--r--lib/CodeGen/MachineFunctionPass.cpp7
-rw-r--r--lib/CodeGen/MachineFunctionPrinterPass.cpp7
-rw-r--r--lib/CodeGen/MachineInstr.cpp128
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp7
-rw-r--r--lib/CodeGen/MachineLICM.cpp7
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp7
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp30
-rw-r--r--lib/CodeGen/MachineModuleInfoImpls.cpp7
-rw-r--r--lib/CodeGen/MachineOperand.cpp29
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp7
-rw-r--r--lib/CodeGen/MachineOutliner.cpp42
-rw-r--r--lib/CodeGen/MachinePipeliner.cpp534
-rw-r--r--lib/CodeGen/MachinePostDominators.cpp7
-rw-r--r--lib/CodeGen/MachineRegionInfo.cpp7
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp20
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp7
-rw-r--r--lib/CodeGen/MachineScheduler.cpp144
-rw-r--r--lib/CodeGen/MachineSink.cpp17
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp7
-rw-r--r--lib/CodeGen/MachineVerifier.cpp510
-rw-r--r--lib/CodeGen/MacroFusion.cpp19
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp14
-rw-r--r--lib/CodeGen/PHIElimination.cpp7
-rw-r--r--lib/CodeGen/PHIEliminationUtils.cpp7
-rw-r--r--lib/CodeGen/PHIEliminationUtils.h7
-rw-r--r--lib/CodeGen/ParallelCG.cpp7
-rw-r--r--lib/CodeGen/PatchableFunction.cpp7
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp24
-rw-r--r--lib/CodeGen/PostRAHazardRecognizer.cpp7
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp7
-rw-r--r--lib/CodeGen/PreISelIntrinsicLowering.cpp13
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp7
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp198
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp7
-rw-r--r--lib/CodeGen/ReachingDefAnalysis.cpp7
-rw-r--r--lib/CodeGen/RegAllocBase.cpp23
-rw-r--r--lib/CodeGen/RegAllocBase.h7
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp7
-rw-r--r--lib/CodeGen/RegAllocFast.cpp240
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp65
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp7
-rw-r--r--lib/CodeGen/RegUsageInfoCollector.cpp90
-rw-r--r--lib/CodeGen/RegUsageInfoPropagate.cpp7
-rw-r--r--lib/CodeGen/RegisterClassInfo.cpp11
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp140
-rw-r--r--lib/CodeGen/RegisterCoalescer.h7
-rw-r--r--lib/CodeGen/RegisterPressure.cpp15
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp45
-rw-r--r--lib/CodeGen/RegisterUsageInfo.cpp7
-rw-r--r--lib/CodeGen/RenameIndependentSubregs.cpp7
-rw-r--r--lib/CodeGen/ResetMachineFunctionPass.cpp9
-rw-r--r--lib/CodeGen/SafeStack.cpp60
-rw-r--r--lib/CodeGen/SafeStackColoring.cpp7
-rw-r--r--lib/CodeGen/SafeStackColoring.h7
-rw-r--r--lib/CodeGen/SafeStackLayout.cpp7
-rw-r--r--lib/CodeGen/SafeStackLayout.h7
-rw-r--r--lib/CodeGen/ScalarizeMaskedMemIntrin.cpp306
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp47
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp66
-rw-r--r--lib/CodeGen/ScheduleDAGPrinter.cpp7
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp3271
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp81
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp83
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp92
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h14
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp472
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp168
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp447
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp8
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h50
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp181
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp646
-rw-r--r--lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h10
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp10
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp94
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp107
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h7
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp1429
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp139
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp2305
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h383
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp152
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp446
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp109
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.h14
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1723
-rw-r--r--lib/CodeGen/ShadowStackGCLowering.cpp13
-rw-r--r--lib/CodeGen/ShrinkWrap.cpp16
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp31
-rw-r--r--lib/CodeGen/SlotIndexes.cpp24
-rw-r--r--lib/CodeGen/SpillPlacement.cpp7
-rw-r--r--lib/CodeGen/SpillPlacement.h7
-rw-r--r--lib/CodeGen/Spiller.h7
-rw-r--r--lib/CodeGen/SplitKit.cpp16
-rw-r--r--lib/CodeGen/SplitKit.h7
-rw-r--r--lib/CodeGen/StackColoring.cpp16
-rw-r--r--lib/CodeGen/StackMapLivenessAnalysis.cpp7
-rw-r--r--lib/CodeGen/StackMaps.cpp7
-rw-r--r--lib/CodeGen/StackProtector.cpp70
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp11
-rw-r--r--lib/CodeGen/SwiftErrorValueTracking.cpp312
-rw-r--r--lib/CodeGen/SwitchLoweringUtils.cpp489
-rw-r--r--lib/CodeGen/TailDuplication.cpp7
-rw-r--r--lib/CodeGen/TailDuplicator.cpp16
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp7
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp41
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp137
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp46
-rw-r--r--lib/CodeGen/TargetOptionsImpl.cpp7
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp106
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp13
-rw-r--r--lib/CodeGen/TargetSchedule.cpp7
-rw-r--r--lib/CodeGen/TargetSubtargetInfo.cpp69
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp12
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp43
-rw-r--r--lib/CodeGen/ValueTypes.cpp43
-rw-r--r--lib/CodeGen/VirtRegMap.cpp9
-rw-r--r--lib/CodeGen/WasmEHPrepare.cpp180
-rw-r--r--lib/CodeGen/WinEHPrepare.cpp20
-rw-r--r--lib/CodeGen/XRayInstrumentation.cpp9
281 files changed, 21606 insertions, 9958 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 632ea8e9cdc4..444f618d8b8c 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -1,9 +1,8 @@
//===- AggressiveAntiDepBreaker.cpp - Anti-dep breaker --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 5dce3c2499e5..0cf2e6d78f7f 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/AggressiveAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
index 37dcb0be824e..c99800659bfd 100644
--- a/lib/CodeGen/AllocationOrder.cpp
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
index 467bcc2edc6f..9247dd844936 100644
--- a/lib/CodeGen/AllocationOrder.h
+++ b/lib/CodeGen/AllocationOrder.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index 797f05ee5cf3..d158e70b86ac 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -1,9 +1,8 @@
//===-- Analysis.cpp - CodeGen LLVM IR Analysis Utilities -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -83,6 +82,7 @@ unsigned llvm::ComputeLinearIndex(Type *Ty,
///
void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<EVT> *MemVTs,
SmallVectorImpl<uint64_t> *Offsets,
uint64_t StartingOffset) {
// Given a struct type, recursively traverse the elements.
@@ -92,7 +92,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
EI = EB,
EE = STy->element_end();
EI != EE; ++EI)
- ComputeValueVTs(TLI, DL, *EI, ValueVTs, Offsets,
+ ComputeValueVTs(TLI, DL, *EI, ValueVTs, MemVTs, Offsets,
StartingOffset + SL->getElementOffset(EI - EB));
return;
}
@@ -101,7 +101,7 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
Type *EltTy = ATy->getElementType();
uint64_t EltSize = DL.getTypeAllocSize(EltTy);
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
- ComputeValueVTs(TLI, DL, EltTy, ValueVTs, Offsets,
+ ComputeValueVTs(TLI, DL, EltTy, ValueVTs, MemVTs, Offsets,
StartingOffset + i * EltSize);
return;
}
@@ -110,10 +110,50 @@ void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
return;
// Base case: we can get an EVT for this LLVM IR type.
ValueVTs.push_back(TLI.getValueType(DL, Ty));
+ if (MemVTs)
+ MemVTs->push_back(TLI.getMemValueType(DL, Ty));
if (Offsets)
Offsets->push_back(StartingOffset);
}
+void llvm::ComputeValueVTs(const TargetLowering &TLI, const DataLayout &DL,
+ Type *Ty, SmallVectorImpl<EVT> &ValueVTs,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ return ComputeValueVTs(TLI, DL, Ty, ValueVTs, /*MemVTs=*/nullptr, Offsets,
+ StartingOffset);
+}
+
+void llvm::computeValueLLTs(const DataLayout &DL, Type &Ty,
+ SmallVectorImpl<LLT> &ValueTys,
+ SmallVectorImpl<uint64_t> *Offsets,
+ uint64_t StartingOffset) {
+ // Given a struct type, recursively traverse the elements.
+ if (StructType *STy = dyn_cast<StructType>(&Ty)) {
+ const StructLayout *SL = DL.getStructLayout(STy);
+ for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
+ computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
+ StartingOffset + SL->getElementOffset(I));
+ return;
+ }
+ // Given an array type, recursively traverse the elements.
+ if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
+ Type *EltTy = ATy->getElementType();
+ uint64_t EltSize = DL.getTypeAllocSize(EltTy);
+ for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
+ computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
+ StartingOffset + i * EltSize);
+ return;
+ }
+ // Interpret void as zero return values.
+ if (Ty.isVoidTy())
+ return;
+ // Base case: we can get an LLT for this LLVM IR type.
+ ValueTys.push_back(getLLTForType(Ty, DL));
+ if (Offsets != nullptr)
+ Offsets->push_back(StartingOffset * 8);
+}
+
/// ExtractTypeInfo - Returns the type info, possibly bitcast, encoded in V.
GlobalValue *llvm::ExtractTypeInfo(Value *V) {
V = V->stripPointerCasts();
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index d93716287981..b11148595136 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/AntiDepBreaker.h - Anti-Dependence Breaking -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp
index 9011f025f595..f6ef85a5b78f 100644
--- a/lib/CodeGen/AsmPrinter/ARMException.cpp
+++ b/lib/CodeGen/AsmPrinter/ARMException.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/ARMException.cpp - ARM EHABI Exception Impl ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/AccelTable.cpp b/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 95875ccb8a0b..b1b7921ea976 100644
--- a/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/AsmPrinter/AccelTable.cpp - Accelerator Tables --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -56,10 +55,10 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
// Create the individual hash data outputs.
for (auto &E : Entries) {
// Unique the entries.
- std::stable_sort(E.second.Values.begin(), E.second.Values.end(),
- [](const AccelTableData *A, const AccelTableData *B) {
- return *A < *B;
- });
+ llvm::stable_sort(E.second.Values,
+ [](const AccelTableData *A, const AccelTableData *B) {
+ return *A < *B;
+ });
E.second.Values.erase(
std::unique(E.second.Values.begin(), E.second.Values.end()),
E.second.Values.end());
@@ -82,10 +81,9 @@ void AccelTableBase::finalize(AsmPrinter *Asm, StringRef Prefix) {
// Sort the contents of the buckets by hash value so that hash collisions end
// up together. Stable sort makes testing easier and doesn't cost much more.
for (auto &Bucket : Buckets)
- std::stable_sort(Bucket.begin(), Bucket.end(),
- [](HashData *LHS, HashData *RHS) {
- return LHS->HashValue < RHS->HashValue;
- });
+ llvm::stable_sort(Bucket, [](HashData *LHS, HashData *RHS) {
+ return LHS->HashValue < RHS->HashValue;
+ });
}
namespace {
@@ -557,8 +555,8 @@ void llvm::emitDWARF5AccelTable(
SmallVector<unsigned, 1> CUIndex(CUs.size());
int Count = 0;
for (const auto &CU : enumerate(CUs)) {
- if (CU.value()->getCUNode()->getNameTableKind() ==
- DICompileUnit::DebugNameTableKind::None)
+ if (CU.value()->getCUNode()->getNameTableKind() !=
+ DICompileUnit::DebugNameTableKind::Default)
continue;
CUIndex[CU.index()] = Count++;
assert(CU.index() == CU.value()->getUniqueID());
@@ -616,30 +614,10 @@ void AppleAccelTableStaticTypeData::emit(AsmPrinter *Asm) const {
Asm->emitInt32(QualifiedNameHash);
}
-#ifndef _MSC_VER
-// The lines below are rejected by older versions (TBD) of MSVC.
constexpr AppleAccelTableData::Atom AppleAccelTableTypeData::Atoms[];
constexpr AppleAccelTableData::Atom AppleAccelTableOffsetData::Atoms[];
constexpr AppleAccelTableData::Atom AppleAccelTableStaticOffsetData::Atoms[];
constexpr AppleAccelTableData::Atom AppleAccelTableStaticTypeData::Atoms[];
-#else
-// FIXME: Erase this path once the minimum MSCV version has been bumped.
-const SmallVector<AppleAccelTableData::Atom, 4>
- AppleAccelTableOffsetData::Atoms = {
- Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-const SmallVector<AppleAccelTableData::Atom, 4> AppleAccelTableTypeData::Atoms =
- {Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
- Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
- Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)};
-const SmallVector<AppleAccelTableData::Atom, 4>
- AppleAccelTableStaticOffsetData::Atoms = {
- Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)};
-const SmallVector<AppleAccelTableData::Atom, 4>
- AppleAccelTableStaticTypeData::Atoms = {
- Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4),
- Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2),
- Atom(5, dwarf::DW_FORM_data1), Atom(6, dwarf::DW_FORM_data4)};
-#endif
#ifndef NDEBUG
void AppleAccelTableWriter::Header::print(raw_ostream &OS) const {
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.cpp b/lib/CodeGen/AsmPrinter/AddressPool.cpp
index 042243b79259..f11c7de5ed8a 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.cpp
+++ b/lib/CodeGen/AsmPrinter/AddressPool.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/AddressPool.cpp - Dwarf Debug Framework ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -24,21 +23,24 @@ unsigned AddressPool::getIndex(const MCSymbol *Sym, bool TLS) {
return IterBool.first->second.Number;
}
-
-void AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
+MCSymbol *AddressPool::emitHeader(AsmPrinter &Asm, MCSection *Section) {
static const uint8_t AddrSize = Asm.getDataLayout().getPointerSize();
- uint64_t Length = sizeof(uint16_t) // version
- + sizeof(uint8_t) // address_size
- + sizeof(uint8_t) // segment_selector_size
- + AddrSize * Pool.size(); // entries
+ StringRef Prefix = "debug_addr_";
+ MCSymbol *BeginLabel = Asm.createTempSymbol(Prefix + "start");
+ MCSymbol *EndLabel = Asm.createTempSymbol(Prefix + "end");
+
Asm.OutStreamer->AddComment("Length of contribution");
- Asm.emitInt32(Length); // TODO: Support DWARF64 format.
+ Asm.EmitLabelDifference(EndLabel, BeginLabel,
+ 4); // TODO: Support DWARF64 format.
+ Asm.OutStreamer->EmitLabel(BeginLabel);
Asm.OutStreamer->AddComment("DWARF version number");
Asm.emitInt16(Asm.getDwarfVersion());
Asm.OutStreamer->AddComment("Address size");
Asm.emitInt8(AddrSize);
Asm.OutStreamer->AddComment("Segment selector size");
Asm.emitInt8(0); // TODO: Support non-zero segment_selector_size.
+
+ return EndLabel;
}
// Emit addresses into the section given.
@@ -49,8 +51,10 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
// Start the dwarf addr section.
Asm.OutStreamer->SwitchSection(AddrSection);
+ MCSymbol *EndLabel = nullptr;
+
if (Asm.getDwarfVersion() >= 5)
- emitHeader(Asm, AddrSection);
+ EndLabel = emitHeader(Asm, AddrSection);
// Define the symbol that marks the start of the contribution.
// It is referenced via DW_AT_addr_base.
@@ -67,4 +71,7 @@ void AddressPool::emit(AsmPrinter &Asm, MCSection *AddrSection) {
for (const MCExpr *Entry : Entries)
Asm.OutStreamer->EmitValue(Entry, Asm.getDataLayout().getPointerSize());
+
+ if (EndLabel)
+ Asm.OutStreamer->EmitLabel(EndLabel);
}
diff --git a/lib/CodeGen/AsmPrinter/AddressPool.h b/lib/CodeGen/AsmPrinter/AddressPool.h
index 2209c7eb50ed..f92cf72093ca 100644
--- a/lib/CodeGen/AsmPrinter/AddressPool.h
+++ b/lib/CodeGen/AsmPrinter/AddressPool.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/AddressPool.h - Dwarf Debug Framework -------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -55,7 +54,7 @@ public:
void setLabel(MCSymbol *Sym) { AddressTableBaseSym = Sym; }
private:
- void emitHeader(AsmPrinter &Asm, MCSection *Section);
+ MCSymbol *emitHeader(AsmPrinter &Asm, MCSection *Section);
/// Symbol designates the start of the contribution to the address table.
MCSymbol *AddressTableBaseSym = nullptr;
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 7070451e3330..54f6cc2d5571 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1,9 +1,8 @@
//===- AsmPrinter.cpp - Common AsmPrinter code ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -35,7 +34,6 @@
#include "llvm/BinaryFormat/COFF.h"
#include "llvm/BinaryFormat/Dwarf.h"
#include "llvm/BinaryFormat/ELF.h"
-#include "llvm/CodeGen/AsmPrinterHandler.h"
#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/GCStrategy.h"
@@ -60,7 +58,6 @@
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Comdat.h"
#include "llvm/IR/Constant.h"
@@ -80,6 +77,7 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Operator.h"
+#include "llvm/IR/RemarkStreamer.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -101,6 +99,9 @@
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
#include "llvm/Pass.h"
+#include "llvm/Remarks/Remark.h"
+#include "llvm/Remarks/RemarkFormat.h"
+#include "llvm/Remarks/RemarkStringTable.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -143,9 +144,10 @@ static const char *const CodeViewLineTablesGroupDescription =
STATISTIC(EmittedInsts, "Number of machine instrs printed");
-static cl::opt<bool>
- PrintSchedule("print-schedule", cl::Hidden, cl::init(false),
- cl::desc("Print 'sched: [latency:throughput]' in .s output"));
+static cl::opt<bool> EnableRemarksSection(
+ "remarks-section",
+ cl::desc("Emit a section containing remark diagnostics metadata"),
+ cl::init(false));
char AsmPrinter::ID = 0;
@@ -232,6 +234,12 @@ void AsmPrinter::EmitToStreamer(MCStreamer &S, const MCInst &Inst) {
S.EmitInstruction(Inst, getSubtargetInfo());
}
+void AsmPrinter::emitInitialRawDwarfLocDirective(const MachineFunction &MF) {
+ assert(DD && "Dwarf debug file is not defined.");
+ assert(OutStreamer->hasRawTextSupport() && "Expected assembly output mode.");
+ (void)DD->emitInitialLocDirective(MF, /*CUID=*/0);
+}
+
/// getCurrentSection() - Return the current section we are emitting to.
const MCSection *AsmPrinter::getCurrentSection() const {
return OutStreamer->getCurrentSectionOnly();
@@ -252,6 +260,9 @@ bool AsmPrinter::doInitialization(Module &M) {
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
.Initialize(OutContext, TM);
+ const_cast<TargetLoweringObjectFile &>(getObjFileLowering())
+ .getModuleMetadata(M);
+
OutStreamer->InitSections(false);
// Emit the version-min deployment target directive if needed.
@@ -300,16 +311,17 @@ bool AsmPrinter::doInitialization(Module &M) {
if (MAI->doesSupportDebugInformation()) {
bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {
- Handlers.push_back(HandlerInfo(new CodeViewDebug(this),
- DbgTimerName, DbgTimerDescription,
- CodeViewLineTablesGroupName,
- CodeViewLineTablesGroupDescription));
+ Handlers.emplace_back(llvm::make_unique<CodeViewDebug>(this),
+ DbgTimerName, DbgTimerDescription,
+ CodeViewLineTablesGroupName,
+ CodeViewLineTablesGroupDescription);
}
if (!EmitCodeView || MMI->getModule()->getDwarfVersion()) {
DD = new DwarfDebug(this, &M);
DD->beginModule();
- Handlers.push_back(HandlerInfo(DD, DbgTimerName, DbgTimerDescription,
- DWARFGroupName, DWARFGroupDescription));
+ Handlers.emplace_back(std::unique_ptr<DwarfDebug>(DD), DbgTimerName,
+ DbgTimerDescription, DWARFGroupName,
+ DWARFGroupDescription);
}
}
@@ -362,14 +374,15 @@ bool AsmPrinter::doInitialization(Module &M) {
break;
}
if (ES)
- Handlers.push_back(HandlerInfo(ES, EHTimerName, EHTimerDescription,
- DWARFGroupName, DWARFGroupDescription));
+ Handlers.emplace_back(std::unique_ptr<EHStreamer>(ES), EHTimerName,
+ EHTimerDescription, DWARFGroupName,
+ DWARFGroupDescription);
if (mdconst::extract_or_null<ConstantInt>(
MMI->getModule()->getModuleFlag("cfguardtable")))
- Handlers.push_back(HandlerInfo(new WinCFGuard(this), CFGuardName,
- CFGuardDescription, DWARFGroupName,
- DWARFGroupDescription));
+ Handlers.emplace_back(llvm::make_unique<WinCFGuard>(this), CFGuardName,
+ CFGuardDescription, DWARFGroupName,
+ DWARFGroupDescription);
return false;
}
@@ -483,7 +496,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM);
const DataLayout &DL = GV->getParent()->getDataLayout();
- uint64_t Size = DL.getTypeAllocSize(GV->getType()->getElementType());
+ uint64_t Size = DL.getTypeAllocSize(GV->getValueType());
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
@@ -658,6 +671,9 @@ void AsmPrinter::EmitFunctionHeader() {
if (MAI->hasDotTypeDotSizeDirective())
OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_ELF_TypeFunction);
+ if (F.hasFnAttribute(Attribute::Cold))
+ OutStreamer->EmitSymbolAttribute(CurrentFnSym, MCSA_Cold);
+
if (isVerbose()) {
F.printAsOperand(OutStreamer->GetCommentOS(),
/*PrintType=*/false, F.getParent());
@@ -738,74 +754,30 @@ void AsmPrinter::EmitFunctionEntryLabel() {
}
/// emitComments - Pretty-print comments for instructions.
-/// It returns true iff the sched comment was emitted.
-/// Otherwise it returns false.
-static bool emitComments(const MachineInstr &MI, raw_ostream &CommentOS,
- AsmPrinter *AP) {
+static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
const MachineFunction *MF = MI.getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
// Check for spills and reloads
- int FI;
-
- const MachineFrameInfo &MFI = MF->getFrameInfo();
- bool Commented = false;
-
- auto getSize =
- [&MFI](const SmallVectorImpl<const MachineMemOperand *> &Accesses) {
- unsigned Size = 0;
- for (auto A : Accesses)
- if (MFI.isSpillSlotObjectIndex(
- cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
- ->getFrameIndex()))
- Size += A->getSize();
- return Size;
- };
// We assume a single instruction only has a spill or reload, not
// both.
- const MachineMemOperand *MMO;
- SmallVector<const MachineMemOperand *, 2> Accesses;
- if (TII->isLoadFromStackSlotPostFE(MI, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI)) {
- MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Reload";
- Commented = true;
- }
- } else if (TII->hasLoadFromStackSlot(MI, Accesses)) {
- if (auto Size = getSize(Accesses)) {
- CommentOS << Size << "-byte Folded Reload";
- Commented = true;
- }
- } else if (TII->isStoreToStackSlotPostFE(MI, FI)) {
- if (MFI.isSpillSlotObjectIndex(FI)) {
- MMO = *MI.memoperands_begin();
- CommentOS << MMO->getSize() << "-byte Spill";
- Commented = true;
- }
- } else if (TII->hasStoreToStackSlot(MI, Accesses)) {
- if (auto Size = getSize(Accesses)) {
- CommentOS << Size << "-byte Folded Spill";
- Commented = true;
- }
+ Optional<unsigned> Size;
+ if ((Size = MI.getRestoreSize(TII))) {
+ CommentOS << *Size << "-byte Reload\n";
+ } else if ((Size = MI.getFoldedRestoreSize(TII))) {
+ if (*Size)
+ CommentOS << *Size << "-byte Folded Reload\n";
+ } else if ((Size = MI.getSpillSize(TII))) {
+ CommentOS << *Size << "-byte Spill\n";
+ } else if ((Size = MI.getFoldedSpillSize(TII))) {
+ if (*Size)
+ CommentOS << *Size << "-byte Folded Spill\n";
}
// Check for spill-induced copies
- if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse)) {
- Commented = true;
- CommentOS << " Reload Reuse";
- }
-
- if (Commented) {
- if (AP->EnablePrintSchedInfo) {
- // If any comment was added above and we need sched info comment then add
- // this new comment just after the above comment w/o "\n" between them.
- CommentOS << " " << MF->getSubtarget().getSchedInfoStr(MI) << "\n";
- return true;
- }
- CommentOS << "\n";
- }
- return false;
+ if (MI.getAsmPrinterFlag(MachineInstr::ReloadReuse))
+ CommentOS << " Reload Reuse\n";
}
/// emitImplicitDef - This method emits the specified machine instruction
@@ -1093,10 +1065,8 @@ void AsmPrinter::EmitFunctionBody() {
}
}
- if (isVerbose() && emitComments(MI, OutStreamer->GetCommentOS(), this)) {
- MachineInstr *MIP = const_cast<MachineInstr *>(&MI);
- MIP->setAsmPrinterFlag(MachineInstr::NoSchedComment);
- }
+ if (isVerbose())
+ emitComments(MI, OutStreamer->GetCommentOS());
switch (MI.getOpcode()) {
case TargetOpcode::CFI_INSTRUCTION:
@@ -1105,11 +1075,13 @@ void AsmPrinter::EmitFunctionBody() {
case TargetOpcode::LOCAL_ESCAPE:
emitFrameAlloc(MI);
break;
+ case TargetOpcode::ANNOTATION_LABEL:
case TargetOpcode::EH_LABEL:
case TargetOpcode::GC_LABEL:
OutStreamer->EmitLabel(MI.getOperand(0).getMCSymbol());
break;
case TargetOpcode::INLINEASM:
+ case TargetOpcode::INLINEASM_BR:
EmitInlineAsm(&MI);
break;
case TargetOpcode::DBG_VALUE:
@@ -1266,7 +1238,7 @@ static bool isGOTEquivalentCandidate(const GlobalVariable *GV,
// GlobalVariable or Function, i.e., as GlobalValue.
if (!GV->hasGlobalUnnamedAddr() || !GV->hasInitializer() ||
!GV->isConstant() || !GV->isDiscardableIfUnused() ||
- !dyn_cast<GlobalValue>(GV->getOperand(0)))
+ !isa<GlobalValue>(GV->getOperand(0)))
return false;
// To be a got equivalent, at least one of its users need to be a constant
@@ -1329,9 +1301,19 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
else
assert(GIS.hasLocalLinkage() && "Invalid alias or ifunc linkage");
+ bool IsFunction = GIS.getValueType()->isFunctionTy();
+
+ // Treat bitcasts of functions as functions also. This is important at least
+ // on WebAssembly where object and function addresses can't alias each other.
+ if (!IsFunction)
+ if (auto *CE = dyn_cast<ConstantExpr>(GIS.getIndirectSymbol()))
+ if (CE->getOpcode() == Instruction::BitCast)
+ IsFunction =
+ CE->getOperand(0)->getType()->getPointerElementType()->isFunctionTy();
+
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
- if (GIS.getType()->getPointerElementType()->isFunctionTy()) {
+ if (IsFunction) {
OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
if (isa<GlobalIFunc>(GIS))
OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
@@ -1363,6 +1345,66 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
}
}
+void AsmPrinter::emitRemarksSection(Module &M) {
+ RemarkStreamer *RS = M.getContext().getRemarkStreamer();
+ if (!RS)
+ return;
+ const remarks::Serializer &Serializer = RS->getSerializer();
+
+ // Switch to the right section: .remarks/__remarks.
+ MCSection *RemarksSection =
+ OutContext.getObjectFileInfo()->getRemarksSection();
+ OutStreamer->SwitchSection(RemarksSection);
+
+ // Emit the magic number.
+ OutStreamer->EmitBytes(remarks::Magic);
+ // Explicitly emit a '\0'.
+ OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+
+ // Emit the version number: little-endian uint64_t.
+ // The version number is located at the offset 0x0 in the section.
+ std::array<char, 8> Version;
+ support::endian::write64le(Version.data(), remarks::Version);
+ OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));
+
+ // Emit the string table in the section.
+ // Note: we need to use the streamer here to emit it in the section. We can't
+ // just use the serialize function with a raw_ostream because of the way
+ // MCStreamers work.
+ uint64_t StrTabSize =
+ Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0;
+ // Emit the total size of the string table (the size itself excluded):
+ // little-endian uint64_t.
+ // The total size is located after the version number.
+ // Note: even if no string table is used, emit 0.
+ std::array<char, 8> StrTabSizeBuf;
+ support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
+ OutStreamer->EmitBinaryData(
+ StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
+
+ if (const Optional<remarks::StringTable> &StrTab = Serializer.StrTab) {
+ std::vector<StringRef> StrTabStrings = StrTab->serialize();
+ // Emit a list of null-terminated strings.
+ // Note: the order is important here: the ID used in the remarks corresponds
+ // to the position of the string in the section.
+ for (StringRef Str : StrTabStrings) {
+ OutStreamer->EmitBytes(Str);
+ // Explicitly emit a '\0'.
+ OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+ }
+ }
+
+ // Emit the null-terminated absolute path to the remark file.
+ // The path is located at the offset 0x4 in the section.
+ StringRef FilenameRef = RS->getFilename();
+ SmallString<128> Filename = FilenameRef;
+ sys::fs::make_absolute(Filename);
+ assert(!Filename.empty() && "The filename can't be empty.");
+ OutStreamer->EmitBytes(Filename);
+ // Explicitly emit a '\0'.
+ OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+}
+
bool AsmPrinter::doFinalization(Module &M) {
// Set the MachineFunction to nullptr so that we can catch attempted
// accesses to MF specific features at the module level and so that
@@ -1394,6 +1436,12 @@ bool AsmPrinter::doFinalization(Module &M) {
EmitVisibility(Name, V, false);
}
+ // Emit the remarks section contents.
+ // FIXME: Figure out when is the safest time to emit this section. It should
+ // not come after debug info.
+ if (EnableRemarksSection)
+ emitRemarksSection(M);
+
const TargetLoweringObjectFile &TLOF = getObjFileLowering();
TLOF.emitModuleMetadata(*OutStreamer, M);
@@ -1448,7 +1496,6 @@ bool AsmPrinter::doFinalization(Module &M) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription, HI.TimerGroupName,
HI.TimerGroupDescription, TimePassesIsEnabled);
HI.Handler->endModule();
- delete HI.Handler;
}
Handlers.clear();
DD = nullptr;
@@ -1592,6 +1639,24 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer->EmitAddrsigSym(getSymbol(&GV));
}
+ // Emit symbol partition specifications (ELF only).
+ if (TM.getTargetTriple().isOSBinFormatELF()) {
+ unsigned UniqueID = 0;
+ for (const GlobalValue &GV : M.global_values()) {
+ if (!GV.hasPartition() || GV.isDeclarationForLinker() ||
+ GV.getVisibility() != GlobalValue::DefaultVisibility)
+ continue;
+
+ OutStreamer->SwitchSection(OutContext.getELFSection(
+ ".llvm_sympart", ELF::SHT_LLVM_SYMPART, 0, 0, "", ++UniqueID));
+ OutStreamer->EmitBytes(GV.getPartition());
+ OutStreamer->EmitZeros(1);
+ OutStreamer->EmitValue(
+ MCSymbolRefExpr::create(getSymbol(&GV), OutContext),
+ MAI->getCodePointerSize());
+ }
+ }
+
// Allow the target to emit any magic that it wants at the end of the file,
// after everything else has gone out.
EmitEndOfAsmFile(M);
@@ -1628,11 +1693,6 @@ void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
}
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
-
- const TargetSubtargetInfo &STI = MF.getSubtarget();
- EnablePrintSchedInfo = PrintSchedule.getNumOccurrences()
- ? PrintSchedule
- : STI.supportPrintSchedInfo();
}
namespace {
@@ -1905,8 +1965,7 @@ bool AsmPrinter::EmitSpecialLLVMGlobal(const GlobalVariable *GV) {
}
/// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each
-/// global in the specified llvm.used list for which emitUsedDirectiveFor
-/// is true, as being used with this directive.
+/// global in the specified llvm.used list.
void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) {
// Should be an array of 'i8*'.
for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) {
@@ -1933,7 +1992,7 @@ struct Structor {
/// priority.
void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
bool isCtor) {
- // Should be an array of '{ int, void ()* }' structs. The first value is the
+ // Should be an array of '{ i32, void ()*, i8* }' structs. The first value is the
// init priority.
if (!isa<ConstantArray>(List)) return;
@@ -1941,12 +2000,10 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
const ConstantArray *InitList = dyn_cast<ConstantArray>(List);
if (!InitList) return; // Not an array!
StructType *ETy = dyn_cast<StructType>(InitList->getType()->getElementType());
- // FIXME: Only allow the 3-field form in LLVM 4.0.
- if (!ETy || ETy->getNumElements() < 2 || ETy->getNumElements() > 3)
- return; // Not an array of two or three elements!
- if (!isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
- !isa<PointerType>(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr).
- if (ETy->getNumElements() == 3 && !isa<PointerType>(ETy->getTypeAtIndex(2U)))
+ if (!ETy || ETy->getNumElements() != 3 ||
+ !isa<IntegerType>(ETy->getTypeAtIndex(0U)) ||
+ !isa<PointerType>(ETy->getTypeAtIndex(1U)) ||
+ !isa<PointerType>(ETy->getTypeAtIndex(2U)))
return; // Not (int, ptr, ptr).
// Gather the structors in a form that's convenient for sorting by priority.
@@ -1962,16 +2019,16 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
Structor &S = Structors.back();
S.Priority = Priority->getLimitedValue(65535);
S.Func = CS->getOperand(1);
- if (ETy->getNumElements() == 3 && !CS->getOperand(2)->isNullValue())
+ if (!CS->getOperand(2)->isNullValue())
S.ComdatKey =
dyn_cast<GlobalValue>(CS->getOperand(2)->stripPointerCasts());
}
// Emit the function pointers in the target-specific order
unsigned Align = Log2_32(DL.getPointerPrefAlignment());
- std::stable_sort(Structors.begin(), Structors.end(),
- [](const Structor &L,
- const Structor &R) { return L.Priority < R.Priority; });
+ llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) {
+ return L.Priority < R.Priority;
+ });
for (Structor &S : Structors) {
const TargetLoweringObjectFile &Obj = getObjFileLowering();
const MCSymbol *KeySym = nullptr;
@@ -2199,7 +2256,10 @@ const MCExpr *AsmPrinter::lowerConstant(const Constant *CV) {
// We can emit the pointer value into this slot if the slot is an
// integer slot equal to the size of the pointer.
- if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType()))
+ //
+ // If the pointer is larger than the resultant integer, then
+ // as with Trunc just depend on the assembler to truncate it.
+ if (DL.getTypeAllocSize(Ty) <= DL.getTypeAllocSize(Op->getType()))
return OpExpr;
// Otherwise the pointer is smaller than the resultant integer, mask off
@@ -2740,7 +2800,7 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const BasicBlock *BB) const {
/// GetCPISymbol - Return the symbol for the specified constant pool entry.
MCSymbol *AsmPrinter::GetCPISymbol(unsigned CPID) const {
- if (getSubtargetInfo().getTargetTriple().isKnownWindowsMSVCEnvironment()) {
+ if (getSubtargetInfo().getTargetTriple().isWindowsMSVCEnvironment()) {
const MachineConstantPoolEntry &CPE =
MF->getConstantPool()->getConstants()[CPID];
if (!CPE.isMachineConstantPoolEntry()) {
@@ -2858,7 +2918,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
MCCodePaddingContext &Context) const {
assert(MF != nullptr && "Machine function must be valid");
Context.IsPaddingActive = !MF->hasInlineAsm() &&
- !MF->getFunction().optForSize() &&
+ !MF->getFunction().hasOptSize() &&
TM.getOptLevel() != CodeGenOpt::None;
Context.IsBasicBlockReachableViaFallthrough =
std::find(MBB.pred_begin(), MBB.pred_end(), MBB.getPrevNode()) !=
@@ -2918,13 +2978,16 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
// Print the main label for the block.
if (MBB.pred_empty() ||
- (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry())) {
+ (isBlockOnlyReachableByFallthrough(&MBB) && !MBB.isEHFuncletEntry() &&
+ !MBB.hasLabelMustBeEmitted())) {
if (isVerbose()) {
// NOTE: Want this comment at start of line, don't emit with AddComment.
OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
false);
}
} else {
+ if (isVerbose() && MBB.hasLabelMustBeEmitted())
+ OutStreamer->AddComment("Label of block must be emitted");
OutStreamer->EmitLabel(MBB.getSymbol());
}
}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index afce3ad3133b..992e44d95306 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -1,9 +1,8 @@
//===-- AsmPrinterDwarf.cpp - AsmPrinter Dwarf Support --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,6 +18,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSection.h"
#include "llvm/MC/MCStreamer.h"
@@ -43,11 +43,11 @@ void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const {
OutStreamer->EmitSLEB128IntValue(Value);
}
-void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc) const {
+void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const {
if (isVerbose() && Desc)
OutStreamer->AddComment(Desc);
- OutStreamer->EmitULEB128IntValue(Value);
+ OutStreamer->EmitULEB128IntValue(Value, PadTo);
}
/// Emit something like ".uleb128 Hi-Lo".
@@ -183,6 +183,25 @@ void AsmPrinter::EmitDwarfOffset(const MCSymbol *Label, uint64_t Offset) const {
EmitLabelPlusOffset(Label, Offset, MAI->getCodePointerSize());
}
+void AsmPrinter::EmitCallSiteOffset(const MCSymbol *Hi,
+ const MCSymbol *Lo,
+ unsigned Encoding) const {
+ // The least significant 3 bits specify the width of the encoding
+ if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+ EmitLabelDifferenceAsULEB128(Hi, Lo);
+ else
+ EmitLabelDifference(Hi, Lo, GetSizeOfEncodedValue(Encoding));
+}
+
+void AsmPrinter::EmitCallSiteValue(uint64_t Value,
+ unsigned Encoding) const {
+ // The least significant 3 bits specify the width of the encoding
+ if ((Encoding & 0x7) == dwarf::DW_EH_PE_uleb128)
+ EmitULEB128(Value);
+ else
+ OutStreamer->EmitIntValue(Value, GetSizeOfEncodedValue(Encoding));
+}
+
//===----------------------------------------------------------------------===//
// Dwarf Lowering Routines
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 62103e3107c0..7721e996aca5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -1,9 +1,8 @@
//===-- AsmPrinterInlineAsm.cpp - AsmPrinter Inline Asm Handling ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,7 +18,6 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
-#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/InlineAsm.h"
@@ -155,15 +153,10 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
" we don't have an asm parser for this target\n");
Parser->setAssemblerDialect(Dialect);
Parser->setTargetParser(*TAP.get());
- Parser->setEnablePrintSchedInfo(EnablePrintSchedInfo);
// Enable lexing Masm binary and hex integer literals in intel inline
// assembly.
if (Dialect == InlineAsm::AD_Intel)
Parser->getLexer().setLexMasmIntegers(true);
- if (MF) {
- const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
- TAP->SetFrameRegister(TRI->getFrameRegister(*MF));
- }
emitInlineAsmStart();
// Don't implicitly switch to the text section before the asm.
@@ -176,9 +169,8 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MCSubtargetInfo &STI,
}
static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, int InlineAsmVariant,
- AsmPrinter *AP, unsigned LocCookie,
- raw_ostream &OS) {
+ MachineModuleInfo *MMI, AsmPrinter *AP,
+ unsigned LocCookie, raw_ostream &OS) {
// Switch to the inline assembly variant.
OS << "\t.intel_syntax\n\t";
@@ -270,11 +262,9 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
++OpNo; // Skip over the ID number.
if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
- /*Modifier*/ nullptr, OS);
+ Error = AP->PrintAsmMemoryOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
} else {
- Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
- /*Modifier*/ nullptr, OS);
+ Error = AP->PrintAsmOperand(MI, OpNo, /*Modifier*/ nullptr, OS);
}
}
if (Error) {
@@ -291,9 +281,9 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
}
static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
- MachineModuleInfo *MMI, int InlineAsmVariant,
- int AsmPrinterVariant, AsmPrinter *AP,
- unsigned LocCookie, raw_ostream &OS) {
+ MachineModuleInfo *MMI, int AsmPrinterVariant,
+ AsmPrinter *AP, unsigned LocCookie,
+ raw_ostream &OS) {
int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
unsigned NumOperands = MI->getNumOperands();
@@ -435,17 +425,25 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
unsigned OpFlags = MI->getOperand(OpNo).getImm();
++OpNo; // Skip over the ID number.
+ // FIXME: Shouldn't arch-independent output template handling go into
+ // PrintAsmOperand?
if (Modifier[0] == 'l') { // Labels are target independent.
- // FIXME: What if the operand isn't an MBB, report error?
- const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
- Sym->print(OS, AP->MAI);
+ if (MI->getOperand(OpNo).isBlockAddress()) {
+ const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
+ MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
+ Sym->print(OS, AP->MAI);
+ } else if (MI->getOperand(OpNo).isMBB()) {
+ const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
+ Sym->print(OS, AP->MAI);
+ } else {
+ Error = true;
+ }
} else {
if (InlineAsm::isMemKind(OpFlags)) {
- Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant,
- Modifier[0] ? Modifier : nullptr,
- OS);
+ Error = AP->PrintAsmMemoryOperand(
+ MI, OpNo, Modifier[0] ? Modifier : nullptr, OS);
} else {
- Error = AP->PrintAsmOperand(MI, OpNo, InlineAsmVariant,
+ Error = AP->PrintAsmOperand(MI, OpNo,
Modifier[0] ? Modifier : nullptr, OS);
}
}
@@ -515,18 +513,11 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
// The variant of the current asmprinter.
int AsmPrinterVariant = MAI->getAssemblerDialect();
- InlineAsm::AsmDialect InlineAsmVariant = MI->getInlineAsmDialect();
AsmPrinter *AP = const_cast<AsmPrinter*>(this);
- if (InlineAsmVariant == InlineAsm::AD_ATT)
- EmitGCCInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AsmPrinterVariant,
- AP, LocCookie, OS);
+ if (MI->getInlineAsmDialect() == InlineAsm::AD_ATT)
+ EmitGCCInlineAsmStr(AsmStr, MI, MMI, AsmPrinterVariant, AP, LocCookie, OS);
else
- EmitMSInlineAsmStr(AsmStr, MI, MMI, InlineAsmVariant, AP, LocCookie, OS);
-
- // Reset SanitizeAddress based on the function's attribute.
- MCTargetOptions MCOptions = TM.Options.MCOptions;
- MCOptions.SanitizeAddress =
- MF->getFunction().hasFnAttribute(Attribute::SanitizeAddress);
+ EmitMSInlineAsmStr(AsmStr, MI, MMI, AP, LocCookie, OS);
// Emit warnings if we use reserved registers on the clobber list, as
// that might give surprising results.
@@ -566,7 +557,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
SrcMgr.PrintMessage(Loc, SourceMgr::DK_Note, Note);
}
- EmitInlineAsm(OS.str(), getSubtargetInfo(), MCOptions, LocMD,
+ EmitInlineAsm(OS.str(), getSubtargetInfo(), TM.Options.MCOptions, LocMD,
MI->getInlineAsmDialect());
// Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
@@ -608,32 +599,50 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
}
}
+void AsmPrinter::PrintSymbolOperand(const MachineOperand &MO, raw_ostream &OS) {
+ assert(MO.isGlobal() && "caller should check MO.isGlobal");
+ getSymbol(MO.getGlobal())->print(OS, MAI);
+ printOffset(MO.getOffset(), OS);
+}
+
/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
/// instruction, using the specified assembler variant. Targets should
-/// override this to format as appropriate.
+/// override this to format as appropriate for machine specific ExtraCodes
+/// or when the arch-independent handling would be too complex otherwise.
bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant, const char *ExtraCode,
- raw_ostream &O) {
+ const char *ExtraCode, raw_ostream &O) {
// Does this asm operand have a single letter operand modifier?
if (ExtraCode && ExtraCode[0]) {
if (ExtraCode[1] != 0) return true; // Unknown modifier.
+ // https://gcc.gnu.org/onlinedocs/gccint/Output-Template.html
const MachineOperand &MO = MI->getOperand(OpNo);
switch (ExtraCode[0]) {
default:
return true; // Unknown modifier.
+ case 'a': // Print as memory address.
+ if (MO.isReg()) {
+ PrintAsmMemoryOperand(MI, OpNo, nullptr, O);
+ return false;
+ }
+ LLVM_FALLTHROUGH; // GCC allows '%a' to behave like '%c' with immediates.
case 'c': // Substitute immediate value without immediate syntax
- if (MO.getType() != MachineOperand::MO_Immediate)
- return true;
- O << MO.getImm();
- return false;
+ if (MO.isImm()) {
+ O << MO.getImm();
+ return false;
+ }
+ if (MO.isGlobal()) {
+ PrintSymbolOperand(MO, O);
+ return false;
+ }
+ return true;
case 'n': // Negate the immediate constant.
- if (MO.getType() != MachineOperand::MO_Immediate)
+ if (!MO.isImm())
return true;
O << -MO.getImm();
return false;
case 's': // The GCC deprecated s modifier
- if (MO.getType() != MachineOperand::MO_Immediate)
+ if (!MO.isImm())
return true;
O << ((32 - MO.getImm()) & 31);
return false;
@@ -643,7 +652,6 @@ bool AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
}
bool AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo,
- unsigned AsmVariant,
const char *ExtraCode, raw_ostream &O) {
// Target doesn't support this yet!
return true;
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index 2163cc7e3e11..db2ff458eb2e 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/ByteStreamer.h - ByteStreamer class --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,7 +31,7 @@ class ByteStreamer {
// For now we're just handling the calls we need for dwarf emission/hashing.
virtual void EmitInt8(uint8_t Byte, const Twine &Comment = "") = 0;
virtual void EmitSLEB128(uint64_t DWord, const Twine &Comment = "") = 0;
- virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "") = 0;
+ virtual void EmitULEB128(uint64_t DWord, const Twine &Comment = "", unsigned PadTo = 0) = 0;
};
class APByteStreamer final : public ByteStreamer {
@@ -49,7 +48,7 @@ public:
AP.OutStreamer->AddComment(Comment);
AP.EmitSLEB128(DWord);
}
- void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
AP.OutStreamer->AddComment(Comment);
AP.EmitULEB128(DWord);
}
@@ -66,7 +65,7 @@ class HashingByteStreamer final : public ByteStreamer {
void EmitSLEB128(uint64_t DWord, const Twine &Comment) override {
Hash.addSLEB128(DWord);
}
- void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
Hash.addULEB128(DWord);
}
};
@@ -103,9 +102,9 @@ public:
}
}
- void EmitULEB128(uint64_t DWord, const Twine &Comment) override {
+ void EmitULEB128(uint64_t DWord, const Twine &Comment, unsigned PadTo) override {
raw_svector_ostream OSE(Buffer);
- unsigned Length = encodeULEB128(DWord, OSE);
+ unsigned Length = encodeULEB128(DWord, OSE, PadTo);
if (GenerateComments) {
Comments.push_back(Comment.str());
// Add some empty comments to keep the Buffer and Comments vectors aligned
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 8cabad4ad312..932959c311fa 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -1,9 +1,8 @@
//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,7 @@
#include "llvm/Config/llvm-config.h"
#include "llvm/DebugInfo/CodeView/CVTypeVisitor.h"
#include "llvm/DebugInfo/CodeView/CodeView.h"
+#include "llvm/DebugInfo/CodeView/CodeViewRecordIO.h"
#include "llvm/DebugInfo/CodeView/ContinuationRecordBuilder.h"
#include "llvm/DebugInfo/CodeView/DebugInlineeLinesSubsection.h"
#include "llvm/DebugInfo/CodeView/EnumTables.h"
@@ -51,6 +51,7 @@
#include "llvm/DebugInfo/CodeView/TypeIndex.h"
#include "llvm/DebugInfo/CodeView/TypeRecord.h"
#include "llvm/DebugInfo/CodeView/TypeTableCollection.h"
+#include "llvm/DebugInfo/CodeView/TypeVisitorCallbackPipeline.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DebugInfoMetadata.h"
@@ -67,6 +68,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/BinaryByteStream.h"
#include "llvm/Support/BinaryStreamReader.h"
+#include "llvm/Support/BinaryStreamWriter.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -93,6 +95,26 @@
using namespace llvm;
using namespace llvm::codeview;
+namespace {
+class CVMCAdapter : public CodeViewRecordStreamer {
+public:
+ CVMCAdapter(MCStreamer &OS) : OS(&OS) {}
+
+ void EmitBytes(StringRef Data) { OS->EmitBytes(Data); }
+
+ void EmitIntValue(uint64_t Value, unsigned Size) {
+ OS->EmitIntValueInHex(Value, Size);
+ }
+
+ void EmitBinaryData(StringRef Data) { OS->EmitBinaryData(Data); }
+
+ void AddComment(const Twine &T) { OS->AddComment(T); }
+
+private:
+ MCStreamer *OS = nullptr;
+};
+} // namespace
+
static CPUType mapArchToCVCPUType(Triple::ArchType Type) {
switch (Type) {
case Triple::ArchType::x86:
@@ -273,7 +295,7 @@ static const DISubprogram *getQualifiedNameComponents(
StringRef ScopeName = getPrettyScopeName(Scope);
if (!ScopeName.empty())
QualifiedNameComponents.push_back(ScopeName);
- Scope = Scope->getScope().resolve();
+ Scope = Scope->getScope();
}
return ClosestSubprogram;
}
@@ -309,7 +331,7 @@ struct CodeViewDebug::TypeLoweringScope {
};
static std::string getFullyQualifiedName(const DIScope *Ty) {
- const DIScope *Scope = Ty->getScope().resolve();
+ const DIScope *Scope = Ty->getScope();
return getFullyQualifiedName(Scope, getPrettyScopeName(Ty));
}
@@ -344,7 +366,7 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
// MSVC.
StringRef DisplayName = SP->getName().split('<').first;
- const DIScope *Scope = SP->getScope().resolve();
+ const DIScope *Scope = SP->getScope();
TypeIndex TI;
if (const auto *Class = dyn_cast_or_null<DICompositeType>(Scope)) {
// If the scope is a DICompositeType, then this must be a method. Member
@@ -364,8 +386,8 @@ TypeIndex CodeViewDebug::getFuncIdForSubprogram(const DISubprogram *SP) {
return recordTypeIndexForDINode(SP, TI);
}
-static bool isTrivial(const DICompositeType *DCTy) {
- return ((DCTy->getFlags() & DINode::FlagTrivial) == DINode::FlagTrivial);
+static bool isNonTrivial(const DICompositeType *DCTy) {
+ return ((DCTy->getFlags() & DINode::FlagNonTrivial) == DINode::FlagNonTrivial);
}
static FunctionOptions
@@ -376,16 +398,16 @@ getFunctionOptions(const DISubroutineType *Ty,
const DIType *ReturnTy = nullptr;
if (auto TypeArray = Ty->getTypeArray()) {
if (TypeArray.size())
- ReturnTy = TypeArray[0].resolve();
+ ReturnTy = TypeArray[0];
}
if (auto *ReturnDCTy = dyn_cast_or_null<DICompositeType>(ReturnTy)) {
- if (!isTrivial(ReturnDCTy))
+ if (isNonTrivial(ReturnDCTy))
FO |= FunctionOptions::CxxReturnUdt;
}
// DISubroutineType is unnamed. Use DISubprogram's i.e. SPName in comparison.
- if (ClassTy && !isTrivial(ClassTy) && SPName == ClassTy->getName()) {
+ if (ClassTy && isNonTrivial(ClassTy) && SPName == ClassTy->getName()) {
FO |= FunctionOptions::Constructor;
// TODO: put the FunctionOptions::ConstructorWithVirtualBases flag.
@@ -582,8 +604,9 @@ void CodeViewDebug::endModule() {
clear();
}
-static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
- unsigned MaxFixedRecordLength = 0xF00) {
+static void
+emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
+ unsigned MaxFixedRecordLength = 0xF00) {
// The maximum CV record length is 0xFF00. Most of the strings we emit appear
// after a fixed length portion of the record. The fixed length portion should
// always be less than 0xF00 (3840) bytes, so truncate the string so that the
@@ -594,6 +617,13 @@ static void emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
OS.EmitBytes(NullTerminatedString);
}
+static StringRef getTypeLeafName(TypeLeafKind TypeKind) {
+ for (const EnumEntry<TypeLeafKind> &EE : getTypeLeafNames())
+ if (EE.Value == TypeKind)
+ return EE.Name;
+ return "";
+}
+
void CodeViewDebug::emitTypeInformation() {
if (TypeTable.empty())
return;
@@ -610,31 +640,55 @@ void CodeViewDebug::emitTypeInformation() {
}
TypeTableCollection Table(TypeTable.records());
+ SmallString<512> CommentBlock;
+ raw_svector_ostream CommentOS(CommentBlock);
+ std::unique_ptr<ScopedPrinter> SP;
+ std::unique_ptr<TypeDumpVisitor> TDV;
+ TypeVisitorCallbackPipeline Pipeline;
+
+ if (OS.isVerboseAsm()) {
+ // To construct block comment describing the type record for readability.
+ SP = llvm::make_unique<ScopedPrinter>(CommentOS);
+ SP->setPrefix(CommentPrefix);
+ TDV = llvm::make_unique<TypeDumpVisitor>(Table, SP.get(), false);
+ Pipeline.addCallbackToPipeline(*TDV);
+ }
+
+ // To emit type record using Codeview MCStreamer adapter
+ CVMCAdapter CVMCOS(OS);
+ TypeRecordMapping typeMapping(CVMCOS);
+ Pipeline.addCallbackToPipeline(typeMapping);
+
Optional<TypeIndex> B = Table.getFirst();
while (B) {
// This will fail if the record data is invalid.
CVType Record = Table.getType(*B);
+ CommentBlock.clear();
+
+ auto RecordLen = Record.length();
+ auto RecordKind = Record.kind();
+ if (OS.isVerboseAsm())
+ CVMCOS.AddComment("Record length");
+ CVMCOS.EmitIntValue(RecordLen - 2, 2);
+ if (OS.isVerboseAsm())
+ CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind));
+ CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind));
+
+ Error E = codeview::visitTypeRecord(Record, *B, Pipeline);
+
+ if (E) {
+ logAllUnhandledErrors(std::move(E), errs(), "error: ");
+ llvm_unreachable("produced malformed type record");
+ }
+
if (OS.isVerboseAsm()) {
- // Emit a block comment describing the type record for readability.
- SmallString<512> CommentBlock;
- raw_svector_ostream CommentOS(CommentBlock);
- ScopedPrinter SP(CommentOS);
- SP.setPrefix(CommentPrefix);
- TypeDumpVisitor TDV(Table, &SP, false);
-
- Error E = codeview::visitTypeRecord(Record, *B, TDV);
- if (E) {
- logAllUnhandledErrors(std::move(E), errs(), "error: ");
- llvm_unreachable("produced malformed type record");
- }
// emitRawComment will insert its own tab and comment string before
// the first line, so strip off our first one. It also prints its own
// newline.
OS.emitRawComment(
CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
}
- OS.EmitBinaryData(Record.str_data());
B = Table.getNext(*B);
}
}
@@ -700,6 +754,8 @@ static SourceLanguage MapDWLangToCVLang(unsigned DWLang) {
return SourceLanguage::Java;
case dwarf::DW_LANG_D:
return SourceLanguage::D;
+ case dwarf::DW_LANG_Swift:
+ return SourceLanguage::Swift;
default:
// There's no CodeView representation for this language, and CV doesn't
// have an "unknown" option for the language field, so we'll use MASM,
@@ -973,8 +1029,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
// If we have a display name, build the fully qualified name by walking the
// chain of scopes.
if (!SP->getName().empty())
- FuncName =
- getFullyQualifiedName(SP->getScope().resolve(), SP->getName());
+ FuncName = getFullyQualifiedName(SP->getScope(), SP->getName());
// If our DISubprogram name is empty, use the mangled name.
if (FuncName.empty())
@@ -1071,6 +1126,28 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
endSymbolRecord(AnnotEnd);
}
+ for (auto HeapAllocSite : FI.HeapAllocSites) {
+ MCSymbol *BeginLabel = std::get<0>(HeapAllocSite);
+ MCSymbol *EndLabel = std::get<1>(HeapAllocSite);
+
+ // The labels might not be defined if the instruction was replaced
+ // somewhere in the codegen pipeline.
+ if (!BeginLabel->isDefined() || !EndLabel->isDefined())
+ continue;
+
+ DIType *DITy = std::get<2>(HeapAllocSite);
+ MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
+ OS.AddComment("Call site offset");
+ OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0);
+ OS.AddComment("Call site section index");
+ OS.EmitCOFFSectionIndex(BeginLabel);
+ OS.AddComment("Call instruction length");
+ OS.emitAbsoluteSymbolDiff(EndLabel, BeginLabel, 2);
+ OS.AddComment("Type index");
+ OS.EmitIntValue(getCompleteTypeIndex(DITy).getIndex(), 4);
+ endSymbolRecord(HeapAllocEnd);
+ }
+
if (SP != nullptr)
emitDebugInfoForUDTs(LocalUDTs);
@@ -1118,9 +1195,15 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
// If the variable has an attached offset expression, extract it.
// FIXME: Try to handle DW_OP_deref as well.
int64_t ExprOffset = 0;
- if (VI.Expr)
- if (!VI.Expr->extractIfOffset(ExprOffset))
+ bool Deref = false;
+ if (VI.Expr) {
+ // If there is one DW_OP_deref element, use offset of 0 and keep going.
+ if (VI.Expr->getNumElements() == 1 &&
+ VI.Expr->getElement(0) == llvm::dwarf::DW_OP_deref)
+ Deref = true;
+ else if (!VI.Expr->extractIfOffset(ExprOffset))
continue;
+ }
// Get the frame register used and the offset.
unsigned FrameReg = 0;
@@ -1130,6 +1213,7 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
// Calculate the label ranges.
LocalVarDefRange DefRange =
createDefRangeMem(CVReg, FrameOffset + ExprOffset);
+
for (const InsnRange &Range : Scope->getRanges()) {
const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
const MCSymbol *End = getLabelAfterInsn(Range.second);
@@ -1140,6 +1224,9 @@ void CodeViewDebug::collectVariableInfoFromMFTable(
LocalVariable Var;
Var.DIVar = VI.Var;
Var.DefRanges.emplace_back(std::move(DefRange));
+ if (Deref)
+ Var.UseReferenceType = true;
+
recordLocalVariable(std::move(Var), Scope);
}
}
@@ -1153,13 +1240,15 @@ static bool needsReferenceType(const DbgVariableLocation &Loc) {
}
void CodeViewDebug::calculateRanges(
- LocalVariable &Var, const DbgValueHistoryMap::InstrRanges &Ranges) {
+ LocalVariable &Var, const DbgValueHistoryMap::Entries &Entries) {
const TargetRegisterInfo *TRI = Asm->MF->getSubtarget().getRegisterInfo();
// Calculate the definition ranges.
- for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
- const InsnRange &Range = *I;
- const MachineInstr *DVInst = Range.first;
+ for (auto I = Entries.begin(), E = Entries.end(); I != E; ++I) {
+ const auto &Entry = *I;
+ if (!Entry.isDbgValue())
+ continue;
+ const MachineInstr *DVInst = Entry.getInstr();
assert(DVInst->isDebugValue() && "Invalid History entry");
// FIXME: Find a way to represent constant variables, since they are
// relatively common.
@@ -1186,7 +1275,7 @@ void CodeViewDebug::calculateRanges(
// Start over using that.
Var.UseReferenceType = true;
Var.DefRanges.clear();
- calculateRanges(Var, Ranges);
+ calculateRanges(Var, Entries);
return;
}
@@ -1214,21 +1303,15 @@ void CodeViewDebug::calculateRanges(
}
// Compute the label range.
- const MCSymbol *Begin = getLabelBeforeInsn(Range.first);
- const MCSymbol *End = getLabelAfterInsn(Range.second);
- if (!End) {
- // This range is valid until the next overlapping bitpiece. In the
- // common case, ranges will not be bitpieces, so they will overlap.
- auto J = std::next(I);
- const DIExpression *DIExpr = DVInst->getDebugExpression();
- while (J != E &&
- !DIExpr->fragmentsOverlap(J->first->getDebugExpression()))
- ++J;
- if (J != E)
- End = getLabelBeforeInsn(J->first);
- else
- End = Asm->getFunctionEnd();
- }
+ const MCSymbol *Begin = getLabelBeforeInsn(Entry.getInstr());
+ const MCSymbol *End;
+ if (Entry.getEndIndex() != DbgValueHistoryMap::NoEntry) {
+ auto &EndingEntry = Entries[Entry.getEndIndex()];
+ End = EndingEntry.isDbgValue()
+ ? getLabelBeforeInsn(EndingEntry.getInstr())
+ : getLabelAfterInsn(EndingEntry.getInstr());
+ } else
+ End = Asm->getFunctionEnd();
// If the last range end is our begin, just extend the last range.
// Otherwise make a new range.
@@ -1256,7 +1339,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
const DILocation *InlinedAt = IV.second;
// Instruction ranges, specifying where IV is accessible.
- const auto &Ranges = I.second;
+ const auto &Entries = I.second;
LexicalScope *Scope = nullptr;
if (InlinedAt)
@@ -1270,7 +1353,7 @@ void CodeViewDebug::collectVariableInfo(const DISubprogram *SP) {
LocalVariable Var;
Var.DIVar = DIVar;
- calculateRanges(Var, Ranges);
+ calculateRanges(Var, Entries);
recordLocalVariable(std::move(Var), Scope);
}
}
@@ -1340,8 +1423,8 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
FPO |= FrameProcedureOptions::SecurityChecks;
FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedLocalFramePtrReg) << 14U);
FPO |= FrameProcedureOptions(uint32_t(CurFn->EncodedParamFramePtrReg) << 16U);
- if (Asm->TM.getOptLevel() != CodeGenOpt::None && !GV.optForSize() &&
- !GV.hasFnAttribute(Attribute::OptimizeNone))
+ if (Asm->TM.getOptLevel() != CodeGenOpt::None &&
+ !GV.hasOptSize() && !GV.hasOptNone())
FPO |= FrameProcedureOptions::OptimizedForSpeed;
// FIXME: Set GuardCfg when it is implemented.
CurFn->FrameProcOpts = FPO;
@@ -1379,7 +1462,7 @@ static bool shouldEmitUdt(const DIType *T) {
// MSVC does not emit UDTs for typedefs that are scoped to classes.
if (T->getTag() == dwarf::DW_TAG_typedef) {
- if (DIScope *Scope = T->getScope().resolve()) {
+ if (DIScope *Scope = T->getScope()) {
switch (Scope->getTag()) {
case dwarf::DW_TAG_structure_type:
case dwarf::DW_TAG_class_type:
@@ -1396,7 +1479,7 @@ static bool shouldEmitUdt(const DIType *T) {
const DIDerivedType *DT = dyn_cast<DIDerivedType>(T);
if (!DT)
return true;
- T = DT->getBaseType().resolve();
+ T = DT->getBaseType();
}
return true;
}
@@ -1409,8 +1492,8 @@ void CodeViewDebug::addToUDTs(const DIType *Ty) {
return;
SmallVector<StringRef, 5> QualifiedNameComponents;
- const DISubprogram *ClosestSubprogram = getQualifiedNameComponents(
- Ty->getScope().resolve(), QualifiedNameComponents);
+ const DISubprogram *ClosestSubprogram =
+ getQualifiedNameComponents(Ty->getScope(), QualifiedNameComponents);
std::string FullyQualifiedName =
getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty));
@@ -1479,8 +1562,7 @@ TypeIndex CodeViewDebug::lowerType(const DIType *Ty, const DIType *ClassTy) {
}
TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
- DITypeRef UnderlyingTypeRef = Ty->getBaseType();
- TypeIndex UnderlyingTypeIndex = getTypeIndex(UnderlyingTypeRef);
+ TypeIndex UnderlyingTypeIndex = getTypeIndex(Ty->getBaseType());
StringRef TypeName = Ty->getName();
addToUDTs(Ty);
@@ -1496,14 +1578,14 @@ TypeIndex CodeViewDebug::lowerTypeAlias(const DIDerivedType *Ty) {
}
TypeIndex CodeViewDebug::lowerTypeArray(const DICompositeType *Ty) {
- DITypeRef ElementTypeRef = Ty->getBaseType();
- TypeIndex ElementTypeIndex = getTypeIndex(ElementTypeRef);
+ const DIType *ElementType = Ty->getBaseType();
+ TypeIndex ElementTypeIndex = getTypeIndex(ElementType);
// IndexType is size_t, which depends on the bitness of the target.
TypeIndex IndexType = getPointerSizeInBytes() == 8
? TypeIndex(SimpleTypeKind::UInt64Quad)
: TypeIndex(SimpleTypeKind::UInt32Long);
- uint64_t ElementSize = getBaseTypeSize(ElementTypeRef) / 8;
+ uint64_t ElementSize = getBaseTypeSize(ElementType) / 8;
// Add subranges to array type.
DINodeArray Elements = Ty->getElements();
@@ -1764,7 +1846,7 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
break;
}
if (IsModifier)
- BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType().resolve();
+ BaseTy = cast<DIDerivedType>(BaseTy)->getBaseType();
}
// Check if the inner type will use an LF_POINTER record. If so, the
@@ -1797,8 +1879,8 @@ TypeIndex CodeViewDebug::lowerTypeModifier(const DIDerivedType *Ty) {
TypeIndex CodeViewDebug::lowerTypeFunction(const DISubroutineType *Ty) {
SmallVector<TypeIndex, 8> ReturnAndArgTypeIndices;
- for (DITypeRef ArgTypeRef : Ty->getTypeArray())
- ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgTypeRef));
+ for (const DIType *ArgType : Ty->getTypeArray())
+ ReturnAndArgTypeIndices.push_back(getTypeIndex(ArgType));
// MSVC uses type none for variadic argument.
if (ReturnAndArgTypeIndices.size() > 1 &&
@@ -1836,7 +1918,10 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
unsigned Index = 0;
SmallVector<TypeIndex, 8> ArgTypeIndices;
- TypeIndex ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+ TypeIndex ReturnTypeIndex = TypeIndex::Void();
+ if (ReturnAndArgs.size() > Index) {
+ ReturnTypeIndex = getTypeIndex(ReturnAndArgs[Index++]);
+ }
// If the first argument is a pointer type and this isn't a static method,
// treat it as the special 'this' parameter, which is encoded separately from
@@ -1844,7 +1929,7 @@ TypeIndex CodeViewDebug::lowerTypeMemberFunction(const DISubroutineType *Ty,
TypeIndex ThisTypeIndex;
if (!IsStaticMethod && ReturnAndArgs.size() > Index) {
if (const DIDerivedType *PtrTy =
- dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index].resolve())) {
+ dyn_cast_or_null<DIDerivedType>(ReturnAndArgs[Index])) {
if (PtrTy->getTag() == dwarf::DW_TAG_pointer_type) {
ThisTypeIndex = getTypeIndexForThisPtr(PtrTy, Ty);
Index++;
@@ -1942,7 +2027,7 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
// Put the Nested flag on a type if it appears immediately inside a tag type.
// Do not walk the scope chain. Do not attempt to compute ContainsNestedClass
// here. That flag is only set on definitions, and not forward declarations.
- const DIScope *ImmediateScope = Ty->getScope().resolve();
+ const DIScope *ImmediateScope = Ty->getScope();
if (ImmediateScope && isa<DICompositeType>(ImmediateScope))
CO |= ClassOptions::Nested;
@@ -1955,7 +2040,7 @@ static ClassOptions getCommonClassOptions(const DICompositeType *Ty) {
CO |= ClassOptions::Scoped;
} else {
for (const DIScope *Scope = ImmediateScope; Scope != nullptr;
- Scope = Scope->getScope().resolve()) {
+ Scope = Scope->getScope()) {
if (isa<DISubprogram>(Scope)) {
CO |= ClassOptions::Scoped;
break;
@@ -2075,7 +2160,7 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
// succeeds, and drop the member if that fails.
assert((DDTy->getOffsetInBits() % 8) == 0 && "Unnamed bitfield member!");
uint64_t Offset = DDTy->getOffsetInBits();
- const DIType *Ty = DDTy->getBaseType().resolve();
+ const DIType *Ty = DDTy->getBaseType();
bool FullyResolved = false;
while (!FullyResolved) {
switch (Ty->getTag()) {
@@ -2083,7 +2168,7 @@ void CodeViewDebug::collectMemberInfo(ClassInfo &Info,
case dwarf::DW_TAG_volatile_type:
// FIXME: we should apply the qualifier types to the indirect fields
// rather than dropping them.
- Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+ Ty = cast<DIDerivedType>(Ty)->getBaseType();
break;
default:
FullyResolved = true;
@@ -2184,6 +2269,14 @@ TypeIndex CodeViewDebug::lowerCompleteTypeClass(const DICompositeType *Ty) {
if (ContainsNestedClass)
CO |= ClassOptions::ContainsNestedClass;
+ // MSVC appears to set this flag by searching any destructor or method with
+ // FunctionOptions::Constructor among the emitted members. Clang AST has all
+ // the members, however special member functions are not yet emitted into
+ // debug information. For now checking a class's non-triviality seems enough.
+ // FIXME: not true for a nested unnamed struct.
+ if (isNonTrivial(Ty))
+ CO |= ClassOptions::HasConstructorOrDestructor;
+
std::string FullName = getFullyQualifiedName(Ty);
uint64_t SizeInBytes = Ty->getSizeInBits() / 8;
@@ -2358,7 +2451,7 @@ CodeViewDebug::lowerRecordFieldList(const DICompositeType *Ty) {
// Create nested classes.
for (const DIType *Nested : Info.NestedTypes) {
- NestedTypeRecord R(getTypeIndex(DITypeRef(Nested)), Nested->getName());
+ NestedTypeRecord R(getTypeIndex(Nested), Nested->getName());
ContinuationBuilder.writeMemberType(R);
MemberCount++;
}
@@ -2385,10 +2478,7 @@ TypeIndex CodeViewDebug::getVBPTypeIndex() {
return VBPType;
}
-TypeIndex CodeViewDebug::getTypeIndex(DITypeRef TypeRef, DITypeRef ClassTyRef) {
- const DIType *Ty = TypeRef.resolve();
- const DIType *ClassTy = ClassTyRef.resolve();
-
+TypeIndex CodeViewDebug::getTypeIndex(const DIType *Ty, const DIType *ClassTy) {
// The null DIType is the void type. Don't try to hash it.
if (!Ty)
return TypeIndex::Void();
@@ -2431,8 +2521,7 @@ CodeViewDebug::getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
return recordTypeIndexForDINode(PtrTy, TI, SubroutineTy);
}
-TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
- DIType *Ty = TypeRef.resolve();
+TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(const DIType *Ty) {
PointerRecord PR(getTypeIndex(Ty),
getPointerSizeInBytes() == 8 ? PointerKind::Near64
: PointerKind::Near32,
@@ -2441,9 +2530,7 @@ TypeIndex CodeViewDebug::getTypeIndexForReferenceTo(DITypeRef TypeRef) {
return TypeTable.writeLeafType(PR);
}
-TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
- const DIType *Ty = TypeRef.resolve();
-
+TypeIndex CodeViewDebug::getCompleteTypeIndex(const DIType *Ty) {
// The null DIType is the void type. Don't try to hash it.
if (!Ty)
return TypeIndex::Void();
@@ -2454,7 +2541,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
if (Ty->getTag() == dwarf::DW_TAG_typedef)
(void)getTypeIndex(Ty);
while (Ty->getTag() == dwarf::DW_TAG_typedef)
- Ty = cast<DIDerivedType>(Ty)->getBaseType().resolve();
+ Ty = cast<DIDerivedType>(Ty)->getBaseType();
// If this is a non-record type, the complete type index is the same as the
// normal type index. Just call getTypeIndex.
@@ -2467,11 +2554,7 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
return getTypeIndex(Ty);
}
- // Check if we've already translated the complete record type.
const auto *CTy = cast<DICompositeType>(Ty);
- auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
- if (!InsertResult.second)
- return InsertResult.first->second;
TypeLoweringScope S(*this);
@@ -2489,6 +2572,13 @@ TypeIndex CodeViewDebug::getCompleteTypeIndex(DITypeRef TypeRef) {
return FwdDeclTI;
}
+ // Check if we've already translated the complete record type.
+ // Insert the type with a null TypeIndex to signify that the type is currently
+ // being lowered.
+ auto InsertResult = CompleteTypeIndices.insert({CTy, TypeIndex()});
+ if (!InsertResult.second)
+ return InsertResult.first->second;
+
TypeIndex TI;
switch (CTy->getTag()) {
case dwarf::DW_TAG_class_type:
@@ -2799,6 +2889,7 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
}
CurFn->Annotations = MF->getCodeViewAnnotations();
+ CurFn->HeapAllocSites = MF->getCodeViewHeapAllocSites();
CurFn->End = Asm->getFunctionEnd();
@@ -2914,10 +3005,19 @@ void CodeViewDebug::collectGlobalVariableInfo() {
for (const MDNode *Node : CUs->operands()) {
const auto *CU = cast<DICompileUnit>(Node);
for (const auto *GVE : CU->getGlobalVariables()) {
+ const DIGlobalVariable *DIGV = GVE->getVariable();
+ const DIExpression *DIE = GVE->getExpression();
+
+ // Emit constant global variables in a global symbol section.
+ if (GlobalMap.count(GVE) == 0 && DIE->isConstant()) {
+ CVGlobalVariable CVGV = {DIGV, DIE};
+ GlobalVariables.emplace_back(std::move(CVGV));
+ }
+
const auto *GV = GlobalMap.lookup(GVE);
if (!GV || GV->isDeclarationForLinker())
continue;
- const DIGlobalVariable *DIGV = GVE->getVariable();
+
DIScope *Scope = DIGV->getScope();
SmallVector<CVGlobalVariable, 1> *VariableList;
if (Scope && isa<DILocalScope>(Scope)) {
@@ -2932,7 +3032,7 @@ void CodeViewDebug::collectGlobalVariableInfo() {
// Emit this global variable into a COMDAT section.
VariableList = &ComdatVariables;
else
- // Emit this globla variable in a single global symbol section.
+ // Emit this global variable in a single global symbol section.
VariableList = &GlobalVariables;
CVGlobalVariable CVGV = {DIGV, GV};
VariableList->emplace_back(std::move(CVGV));
@@ -2955,13 +3055,14 @@ void CodeViewDebug::emitDebugInfoForGlobals() {
// Second, emit each global that is in a comdat into its own .debug$S
// section along with its own symbol substream.
for (const CVGlobalVariable &CVGV : ComdatVariables) {
- MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
+ const GlobalVariable *GV = CVGV.GVInfo.get<const GlobalVariable *>();
+ MCSymbol *GVSym = Asm->getSymbol(GV);
OS.AddComment("Symbol subsection for " +
- Twine(GlobalValue::dropLLVMManglingEscape(CVGV.GV->getName())));
+ Twine(GlobalValue::dropLLVMManglingEscape(GV->getName())));
switchToDebugSectionForSymbol(GVSym);
MCSymbol *EndLabel = beginCVSubsection(DebugSubsectionKind::Symbols);
// FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
- emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
+ emitDebugInfoForGlobal(CVGV);
endCVSubsection(EndLabel);
}
}
@@ -2981,31 +3082,63 @@ void CodeViewDebug::emitDebugInfoForRetainedTypes() {
// Emit each global variable in the specified array.
void CodeViewDebug::emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals) {
for (const CVGlobalVariable &CVGV : Globals) {
- MCSymbol *GVSym = Asm->getSymbol(CVGV.GV);
// FIXME: emitDebugInfoForGlobal() doesn't handle DIExpressions.
- emitDebugInfoForGlobal(CVGV.DIGV, CVGV.GV, GVSym);
- }
-}
-
-void CodeViewDebug::emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
- const GlobalVariable *GV,
- MCSymbol *GVSym) {
- // DataSym record, see SymbolRecord.h for more info. Thread local data
- // happens to have the same format as global data.
- SymbolKind DataSym = GV->isThreadLocal()
- ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
- : SymbolKind::S_GTHREAD32)
- : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
- : SymbolKind::S_GDATA32);
- MCSymbol *DataEnd = beginSymbolRecord(DataSym);
- OS.AddComment("Type");
- OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
- OS.AddComment("DataOffset");
- OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
- OS.AddComment("Segment");
- OS.EmitCOFFSectionIndex(GVSym);
- OS.AddComment("Name");
- const unsigned LengthOfDataRecord = 12;
- emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
- endSymbolRecord(DataEnd);
+ emitDebugInfoForGlobal(CVGV);
+ }
+}
+
+void CodeViewDebug::emitDebugInfoForGlobal(const CVGlobalVariable &CVGV) {
+ const DIGlobalVariable *DIGV = CVGV.DIGV;
+ if (const GlobalVariable *GV =
+ CVGV.GVInfo.dyn_cast<const GlobalVariable *>()) {
+ // DataSym record, see SymbolRecord.h for more info. Thread local data
+ // happens to have the same format as global data.
+ MCSymbol *GVSym = Asm->getSymbol(GV);
+ SymbolKind DataSym = GV->isThreadLocal()
+ ? (DIGV->isLocalToUnit() ? SymbolKind::S_LTHREAD32
+ : SymbolKind::S_GTHREAD32)
+ : (DIGV->isLocalToUnit() ? SymbolKind::S_LDATA32
+ : SymbolKind::S_GDATA32);
+ MCSymbol *DataEnd = beginSymbolRecord(DataSym);
+ OS.AddComment("Type");
+ OS.EmitIntValue(getCompleteTypeIndex(DIGV->getType()).getIndex(), 4);
+ OS.AddComment("DataOffset");
+ OS.EmitCOFFSecRel32(GVSym, /*Offset=*/0);
+ OS.AddComment("Segment");
+ OS.EmitCOFFSectionIndex(GVSym);
+ OS.AddComment("Name");
+ const unsigned LengthOfDataRecord = 12;
+ emitNullTerminatedSymbolName(OS, DIGV->getName(), LengthOfDataRecord);
+ endSymbolRecord(DataEnd);
+ } else {
+ // FIXME: Currently this only emits the global variables in the IR metadata.
+ // This should also emit enums and static data members.
+ const DIExpression *DIE = CVGV.GVInfo.get<const DIExpression *>();
+ assert(DIE->isConstant() &&
+ "Global constant variables must contain a constant expression.");
+ uint64_t Val = DIE->getElement(1);
+
+ MCSymbol *SConstantEnd = beginSymbolRecord(SymbolKind::S_CONSTANT);
+ OS.AddComment("Type");
+ OS.EmitIntValue(getTypeIndex(DIGV->getType()).getIndex(), 4);
+ OS.AddComment("Value");
+
+ // Encoded integers shouldn't need more than 10 bytes.
+ uint8_t data[10];
+ BinaryStreamWriter Writer(data, llvm::support::endianness::little);
+ CodeViewRecordIO IO(Writer);
+ cantFail(IO.mapEncodedInteger(Val));
+ StringRef SRef((char *)data, Writer.getOffset());
+ OS.EmitBinaryData(SRef);
+
+ OS.AddComment("Name");
+ const DIScope *Scope = DIGV->getScope();
+ // For static data members, get the scope from the declaration.
+ if (const auto *MemberDecl = dyn_cast_or_null<DIDerivedType>(
+ DIGV->getRawStaticDataMemberDeclaration()))
+ Scope = MemberDecl->getScope();
+ emitNullTerminatedSymbolName(OS,
+ getFullyQualifiedName(Scope, DIGV->getName()));
+ endSymbolRecord(SConstantEnd);
+ }
}
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index 21557ed1be35..ce57b789d7fa 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -1,9 +1,8 @@
//===- llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PointerUnion.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
@@ -101,7 +101,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
struct CVGlobalVariable {
const DIGlobalVariable *DIGV;
- const GlobalVariable *GV;
+ PointerUnion<const GlobalVariable *, const DIExpression *> GVInfo;
};
struct InlineSite {
@@ -148,6 +148,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LexicalBlock *, 1> ChildBlocks;
std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
+ std::vector<std::tuple<MCSymbol *, MCSymbol *, DIType *>> HeapAllocSites;
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
@@ -223,7 +224,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
codeview::TypeIndex getFuncIdForSubprogram(const DISubprogram *SP);
void calculateRanges(LocalVariable &Var,
- const DbgValueHistoryMap::InstrRanges &Ranges);
+ const DbgValueHistoryMap::Entries &Entries);
static void collectInlineSiteChildren(SmallVectorImpl<unsigned> &Children,
const FunctionInfo &FI,
@@ -313,8 +314,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
void emitDebugInfoForGlobals();
void emitGlobalVariableList(ArrayRef<CVGlobalVariable> Globals);
- void emitDebugInfoForGlobal(const DIGlobalVariable *DIGV,
- const GlobalVariable *GV, MCSymbol *GVSym);
+ void emitDebugInfoForGlobal(const CVGlobalVariable &CVGV);
/// Opens a subsection of the given kind in a .debug$S codeview section.
/// Returns an end label for use with endCVSubsection when the subsection is
@@ -373,14 +373,14 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// Translates the DIType to codeview if necessary and returns a type index
/// for it.
- codeview::TypeIndex getTypeIndex(DITypeRef TypeRef,
- DITypeRef ClassTyRef = DITypeRef());
+ codeview::TypeIndex getTypeIndex(const DIType *Ty,
+ const DIType *ClassTy = nullptr);
codeview::TypeIndex
getTypeIndexForThisPtr(const DIDerivedType *PtrTy,
const DISubroutineType *SubroutineTy);
- codeview::TypeIndex getTypeIndexForReferenceTo(DITypeRef TypeRef);
+ codeview::TypeIndex getTypeIndexForReferenceTo(const DIType *Ty);
codeview::TypeIndex getMemberFunctionType(const DISubprogram *SP,
const DICompositeType *Class);
@@ -419,7 +419,7 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
/// use this entry point when generating symbol records. The complete and
/// incomplete type indices only differ for record types. All other types use
/// the same index.
- codeview::TypeIndex getCompleteTypeIndex(DITypeRef TypeRef);
+ codeview::TypeIndex getCompleteTypeIndex(const DIType *Ty);
codeview::TypeIndex lowerCompleteTypeClass(const DICompositeType *Ty);
codeview::TypeIndex lowerCompleteTypeUnion(const DICompositeType *Ty);
diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp
index e27659494f08..f4134da48caa 100644
--- a/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -1,9 +1,8 @@
//===--- lib/CodeGen/DIE.cpp - DWARF Info Entries -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -212,7 +211,7 @@ const DIE *DIE::getUnitDie() const {
return nullptr;
}
-const DIEUnit *DIE::getUnit() const {
+DIEUnit *DIE::getUnit() const {
const DIE *UnitDie = getUnitDie();
if (UnitDie)
return UnitDie->Owner.dyn_cast<DIEUnit*>();
@@ -507,6 +506,23 @@ LLVM_DUMP_METHOD
void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); }
//===----------------------------------------------------------------------===//
+// DIEBaseTypeRef Implementation
+//===----------------------------------------------------------------------===//
+
+void DIEBaseTypeRef::EmitValue(const AsmPrinter *AP, dwarf::Form Form) const {
+ uint64_t Offset = CU->ExprRefedBaseTypes[Index].Die->getOffset();
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ AP->EmitULEB128(Offset, nullptr, ULEB128PadSize);
+}
+
+unsigned DIEBaseTypeRef::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
+ return ULEB128PadSize;
+}
+
+LLVM_DUMP_METHOD
+void DIEBaseTypeRef::print(raw_ostream &O) const { O << "BaseTypeRef: " << Index; }
+
+//===----------------------------------------------------------------------===//
// DIEDelta Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.cpp b/lib/CodeGen/AsmPrinter/DIEHash.cpp
index b8f1202494d7..bfac8850a2a6 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.cpp
+++ b/lib/CodeGen/AsmPrinter/DIEHash.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -226,7 +225,7 @@ void DIEHash::hashLocList(const DIELocList &LocList) {
DwarfDebug &DD = *AP->getDwarfDebug();
const DebugLocStream &Locs = DD.getDebugLocs();
for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue())))
- DD.emitDebugLocEntry(Streamer, Entry);
+ DD.emitDebugLocEntry(Streamer, Entry, nullptr);
}
// Hash an individual attribute \param Attr based on the type of attribute and
@@ -310,6 +309,7 @@ void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) {
// FIXME: It's uncertain whether or not we should handle this at the moment.
case DIEValue::isExpr:
case DIEValue::isLabel:
+ case DIEValue::isBaseTypeRef:
case DIEValue::isDelta:
llvm_unreachable("Add support for additional value types.");
}
diff --git a/lib/CodeGen/AsmPrinter/DIEHash.h b/lib/CodeGen/AsmPrinter/DIEHash.h
index dae517ab2c29..2e49514c98be 100644
--- a/lib/CodeGen/AsmPrinter/DIEHash.h
+++ b/lib/CodeGen/AsmPrinter/DIEHash.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index 09867822c30a..ddd60575b6c0 100644
--- a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -1,15 +1,15 @@
//===- llvm/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DbgEntityHistoryCalculator.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -31,51 +31,62 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
+namespace {
+using EntryIndex = DbgValueHistoryMap::EntryIndex;
+}
+
// If @MI is a DBG_VALUE with debug value described by a
// defined register, returns the number of this register.
// In the other case, returns 0.
-static unsigned isDescribedByReg(const MachineInstr &MI) {
+static Register isDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue());
assert(MI.getNumOperands() == 4);
+ // If the location of variable is an entry value (DW_OP_entry_value)
+ // do not consider it as a register location.
+ if (MI.getDebugExpression()->isEntryValue())
+ return 0;
// If location of variable is described using a register (directly or
// indirectly), this register is always a first operand.
- return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
}
-void DbgValueHistoryMap::startInstrRange(InlinedEntity Var,
- const MachineInstr &MI) {
+bool DbgValueHistoryMap::startDbgValue(InlinedEntity Var,
+ const MachineInstr &MI,
+ EntryIndex &NewIndex) {
// Instruction range should start with a DBG_VALUE instruction for the
// variable.
assert(MI.isDebugValue() && "not a DBG_VALUE");
- auto &Ranges = VarInstrRanges[Var];
- if (!Ranges.empty() && Ranges.back().second == nullptr &&
- Ranges.back().first->isIdenticalTo(MI)) {
+ auto &Entries = VarEntries[Var];
+ if (!Entries.empty() && Entries.back().isDbgValue() &&
+ !Entries.back().isClosed() &&
+ Entries.back().getInstr()->isIdenticalTo(MI)) {
LLVM_DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n"
- << "\t" << Ranges.back().first << "\t" << MI << "\n");
- return;
+ << "\t" << Entries.back().getInstr() << "\t" << MI
+ << "\n");
+ return false;
}
- Ranges.push_back(std::make_pair(&MI, nullptr));
+ Entries.emplace_back(&MI, Entry::DbgValue);
+ NewIndex = Entries.size() - 1;
+ return true;
}
-void DbgValueHistoryMap::endInstrRange(InlinedEntity Var,
- const MachineInstr &MI) {
- auto &Ranges = VarInstrRanges[Var];
- // Verify that the current instruction range is not yet closed.
- assert(!Ranges.empty() && Ranges.back().second == nullptr);
- // For now, instruction ranges are not allowed to cross basic block
- // boundaries.
- assert(Ranges.back().first->getParent() == MI.getParent());
- Ranges.back().second = &MI;
+EntryIndex DbgValueHistoryMap::startClobber(InlinedEntity Var,
+ const MachineInstr &MI) {
+ auto &Entries = VarEntries[Var];
+ // If an instruction clobbers multiple registers that the variable is
+ // described by, then we may have already created a clobbering instruction.
+ if (Entries.back().isClobber() && Entries.back().getInstr() == &MI)
+ return Entries.size() - 1;
+ Entries.emplace_back(&MI, Entry::Clobber);
+ return Entries.size() - 1;
}
-unsigned DbgValueHistoryMap::getRegisterForVar(InlinedEntity Var) const {
- const auto &I = VarInstrRanges.find(Var);
- if (I == VarInstrRanges.end())
- return 0;
- const auto &Ranges = I->second;
- if (Ranges.empty() || Ranges.back().second != nullptr)
- return 0;
- return isDescribedByReg(*Ranges.back().first);
+void DbgValueHistoryMap::Entry::endEntry(EntryIndex Index) {
+ // For now, instruction ranges are not allowed to cross basic block
+ // boundaries.
+ assert(isDbgValue() && "Setting end index for non-debug value");
+ assert(!isClosed() && "End index has already been set");
+ EndIndex = Index;
}
void DbgLabelInstrMap::addInstr(InlinedEntity Label, const MachineInstr &MI) {
@@ -89,6 +100,12 @@ namespace {
using InlinedEntity = DbgValueHistoryMap::InlinedEntity;
using RegDescribedVarsMap = std::map<unsigned, SmallVector<InlinedEntity, 1>>;
+// Keeps track of the debug value entries that are currently live for each
+// inlined entity. As the history map entries are stored in a SmallVector, they
+// may be moved at insertion of new entries, so store indices rather than
+// pointers.
+using DbgValueEntriesMap = std::map<InlinedEntity, SmallSet<EntryIndex, 1>>;
+
} // end anonymous namespace
// Claim that @Var is not described by @RegNo anymore.
@@ -114,16 +131,88 @@ static void addRegDescribedVar(RegDescribedVarsMap &RegVars, unsigned RegNo,
VarSet.push_back(Var);
}
+/// Create a clobbering entry and end all open debug value entries
+/// for \p Var that are described by \p RegNo using that entry.
+static void clobberRegEntries(InlinedEntity Var, unsigned RegNo,
+ const MachineInstr &ClobberingInstr,
+ DbgValueEntriesMap &LiveEntries,
+ DbgValueHistoryMap &HistMap) {
+ EntryIndex ClobberIndex = HistMap.startClobber(Var, ClobberingInstr);
+
+ // Close all entries whose values are described by the register.
+ SmallVector<EntryIndex, 4> IndicesToErase;
+ for (auto Index : LiveEntries[Var]) {
+ auto &Entry = HistMap.getEntry(Var, Index);
+ assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries");
+ if (isDescribedByReg(*Entry.getInstr()) == RegNo) {
+ IndicesToErase.push_back(Index);
+ Entry.endEntry(ClobberIndex);
+ }
+ }
+
+ // Drop all entries that have ended.
+ for (auto Index : IndicesToErase)
+ LiveEntries[Var].erase(Index);
+}
+
+/// Add a new debug value for \p Var. Closes all overlapping debug values.
+static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV,
+ RegDescribedVarsMap &RegVars,
+ DbgValueEntriesMap &LiveEntries,
+ DbgValueHistoryMap &HistMap) {
+ EntryIndex NewIndex;
+ if (HistMap.startDbgValue(Var, DV, NewIndex)) {
+ SmallDenseMap<unsigned, bool, 4> TrackedRegs;
+
+ // If we have created a new debug value entry, close all preceding
+ // live entries that overlap.
+ SmallVector<EntryIndex, 4> IndicesToErase;
+ const DIExpression *DIExpr = DV.getDebugExpression();
+ for (auto Index : LiveEntries[Var]) {
+ auto &Entry = HistMap.getEntry(Var, Index);
+ assert(Entry.isDbgValue() && "Not a DBG_VALUE in LiveEntries");
+ const MachineInstr &DV = *Entry.getInstr();
+ bool Overlaps = DIExpr->fragmentsOverlap(DV.getDebugExpression());
+ if (Overlaps) {
+ IndicesToErase.push_back(Index);
+ Entry.endEntry(NewIndex);
+ }
+ if (unsigned Reg = isDescribedByReg(DV))
+ TrackedRegs[Reg] |= !Overlaps;
+ }
+
+ // If the new debug value is described by a register, add tracking of
+ // that register if it is not already tracked.
+ if (unsigned NewReg = isDescribedByReg(DV)) {
+ if (!TrackedRegs.count(NewReg))
+ addRegDescribedVar(RegVars, NewReg, Var);
+ LiveEntries[Var].insert(NewIndex);
+ TrackedRegs[NewReg] = true;
+ }
+
+ // Drop tracking of registers that are no longer used.
+ for (auto I : TrackedRegs)
+ if (!I.second)
+ dropRegDescribedVar(RegVars, I.first, Var);
+
+ // Drop all entries that have ended, and mark the new entry as live.
+ for (auto Index : IndicesToErase)
+ LiveEntries[Var].erase(Index);
+ LiveEntries[Var].insert(NewIndex);
+ }
+}
+
// Terminate the location range for variables described by register at
// @I by inserting @ClobberingInstr to their history.
static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
RegDescribedVarsMap::iterator I,
DbgValueHistoryMap &HistMap,
+ DbgValueEntriesMap &LiveEntries,
const MachineInstr &ClobberingInstr) {
// Iterate over all variables described by this register and add this
// instruction to their history, clobbering it.
for (const auto &Var : I->second)
- HistMap.endInstrRange(Var, ClobberingInstr);
+ clobberRegEntries(Var, I->first, ClobberingInstr, LiveEntries, HistMap);
RegVars.erase(I);
}
@@ -131,115 +220,25 @@ static void clobberRegisterUses(RegDescribedVarsMap &RegVars,
// @RegNo by inserting @ClobberingInstr to their history.
static void clobberRegisterUses(RegDescribedVarsMap &RegVars, unsigned RegNo,
DbgValueHistoryMap &HistMap,
+ DbgValueEntriesMap &LiveEntries,
const MachineInstr &ClobberingInstr) {
const auto &I = RegVars.find(RegNo);
if (I == RegVars.end())
return;
- clobberRegisterUses(RegVars, I, HistMap, ClobberingInstr);
-}
-
-// Returns the first instruction in @MBB which corresponds to
-// the function epilogue, or nullptr if @MBB doesn't contain an epilogue.
-static const MachineInstr *getFirstEpilogueInst(const MachineBasicBlock &MBB) {
- auto LastMI = MBB.getLastNonDebugInstr();
- if (LastMI == MBB.end() || !LastMI->isReturn())
- return nullptr;
- // Assume that epilogue starts with instruction having the same debug location
- // as the return instruction.
- DebugLoc LastLoc = LastMI->getDebugLoc();
- auto Res = LastMI;
- for (MachineBasicBlock::const_reverse_iterator I = LastMI.getReverse(),
- E = MBB.rend();
- I != E; ++I) {
- if (I->getDebugLoc() != LastLoc)
- return &*Res;
- Res = &*I;
- }
- // If all instructions have the same debug location, assume whole MBB is
- // an epilogue.
- return &*MBB.begin();
-}
-
-// Collect registers that are modified in the function body (their
-// contents is changed outside of the prologue and epilogue).
-static void collectChangingRegs(const MachineFunction *MF,
- const TargetRegisterInfo *TRI,
- BitVector &Regs) {
- for (const auto &MBB : *MF) {
- auto FirstEpilogueInst = getFirstEpilogueInst(MBB);
-
- for (const auto &MI : MBB) {
- // Avoid looking at prologue or epilogue instructions.
- if (&MI == FirstEpilogueInst)
- break;
- if (MI.getFlag(MachineInstr::FrameSetup))
- continue;
-
- // Look for register defs and register masks. Register masks are
- // typically on calls and they clobber everything not in the mask.
- for (const MachineOperand &MO : MI.operands()) {
- // Skip virtual registers since they are handled by the parent.
- if (MO.isReg() && MO.isDef() && MO.getReg() &&
- !TRI->isVirtualRegister(MO.getReg())) {
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
- ++AI)
- Regs.set(*AI);
- } else if (MO.isRegMask()) {
- Regs.setBitsNotInMask(MO.getRegMask());
- }
- }
- }
- }
+ clobberRegisterUses(RegVars, I, HistMap, LiveEntries, ClobberingInstr);
}
void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
const TargetRegisterInfo *TRI,
DbgValueHistoryMap &DbgValues,
DbgLabelInstrMap &DbgLabels) {
- BitVector ChangingRegs(TRI->getNumRegs());
- collectChangingRegs(MF, TRI, ChangingRegs);
-
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ unsigned FrameReg = TRI->getFrameRegister(*MF);
RegDescribedVarsMap RegVars;
+ DbgValueEntriesMap LiveEntries;
for (const auto &MBB : *MF) {
for (const auto &MI : MBB) {
- if (!MI.isDebugInstr()) {
- // Not a DBG_VALUE instruction. It may clobber registers which describe
- // some variables.
- for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && MO.isDef() && MO.getReg()) {
- // Ignore call instructions that claim to clobber SP. The AArch64
- // backend does this for aggregate function arguments.
- if (MI.isCall() && MO.getReg() == SP)
- continue;
- // If this is a virtual register, only clobber it since it doesn't
- // have aliases.
- if (TRI->isVirtualRegister(MO.getReg()))
- clobberRegisterUses(RegVars, MO.getReg(), DbgValues, MI);
- // If this is a register def operand, it may end a debug value
- // range.
- else {
- for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
- ++AI)
- if (ChangingRegs.test(*AI))
- clobberRegisterUses(RegVars, *AI, DbgValues, MI);
- }
- } else if (MO.isRegMask()) {
- // If this is a register mask operand, clobber all debug values in
- // non-CSRs.
- for (unsigned I : ChangingRegs.set_bits()) {
- // Don't consider SP to be clobbered by register masks.
- if (unsigned(I) != SP && TRI->isPhysicalRegister(I) &&
- MO.clobbersPhysReg(I)) {
- clobberRegisterUses(RegVars, I, DbgValues, MI);
- }
- }
- }
- }
- continue;
- }
-
if (MI.isDebugValue()) {
assert(MI.getNumOperands() > 1 && "Invalid DBG_VALUE instruction!");
// Use the base variable (without any DW_OP_piece expressions)
@@ -250,13 +249,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
"Expected inlined-at fields to agree");
InlinedEntity Var(RawVar, MI.getDebugLoc()->getInlinedAt());
- if (unsigned PrevReg = DbgValues.getRegisterForVar(Var))
- dropRegDescribedVar(RegVars, PrevReg, Var);
-
- DbgValues.startInstrRange(Var, MI);
-
- if (unsigned NewReg = isDescribedByReg(MI))
- addRegDescribedVar(RegVars, NewReg, Var);
+ handleNewDebugValue(Var, MI, RegVars, LiveEntries, DbgValues);
} else if (MI.isDebugLabel()) {
assert(MI.getNumOperands() == 1 && "Invalid DBG_LABEL instruction!");
const DILabel *RawLabel = MI.getDebugLabel();
@@ -268,18 +261,75 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
InlinedEntity L(RawLabel, MI.getDebugLoc()->getInlinedAt());
DbgLabels.addInstr(L, MI);
}
- }
- // Make sure locations for register-described variables are valid only
- // until the end of the basic block (unless it's the last basic block, in
- // which case let their liveness run off to the end of the function).
+ if (MI.isDebugInstr())
+ continue;
+
+ // Not a DBG_VALUE instruction. It may clobber registers which describe
+ // some variables.
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() && MO.getReg()) {
+ // Ignore call instructions that claim to clobber SP. The AArch64
+ // backend does this for aggregate function arguments.
+ if (MI.isCall() && MO.getReg() == SP)
+ continue;
+ // If this is a virtual register, only clobber it since it doesn't
+ // have aliases.
+ if (TRI->isVirtualRegister(MO.getReg()))
+ clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries,
+ MI);
+ // If this is a register def operand, it may end a debug value
+ // range. Ignore frame-register defs in the epilogue and prologue,
+ // we expect debuggers to understand that stack-locations are
+ // invalid outside of the function body.
+ else if (MO.getReg() != FrameReg ||
+ (!MI.getFlag(MachineInstr::FrameDestroy) &&
+ !MI.getFlag(MachineInstr::FrameSetup))) {
+ for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid();
+ ++AI)
+ clobberRegisterUses(RegVars, *AI, DbgValues, LiveEntries, MI);
+ }
+ } else if (MO.isRegMask()) {
+ // If this is a register mask operand, clobber all debug values in
+ // non-CSRs.
+ SmallVector<unsigned, 32> RegsToClobber;
+ // Don't consider SP to be clobbered by register masks.
+ for (auto It : RegVars) {
+ unsigned int Reg = It.first;
+ if (Reg != SP && TRI->isPhysicalRegister(Reg) &&
+ MO.clobbersPhysReg(Reg))
+ RegsToClobber.push_back(Reg);
+ }
+
+ for (unsigned Reg : RegsToClobber) {
+ clobberRegisterUses(RegVars, Reg, DbgValues, LiveEntries, MI);
+ }
+ }
+ } // End MO loop.
+ } // End instr loop.
+
+ // Make sure locations for all variables are valid only until the end of
+ // the basic block (unless it's the last basic block, in which case let
+ // their liveness run off to the end of the function).
if (!MBB.empty() && &MBB != &MF->back()) {
- for (auto I = RegVars.begin(), E = RegVars.end(); I != E;) {
- auto CurElem = I++; // CurElem can be erased below.
- if (TRI->isVirtualRegister(CurElem->first) ||
- ChangingRegs.test(CurElem->first))
- clobberRegisterUses(RegVars, CurElem, DbgValues, MBB.back());
+ // Iterate over all variables that have open debug values.
+ for (auto &Pair : LiveEntries) {
+ if (Pair.second.empty())
+ continue;
+
+ // Create a clobbering entry.
+ EntryIndex ClobIdx = DbgValues.startClobber(Pair.first, MBB.back());
+
+ // End all entries.
+ for (EntryIndex Idx : Pair.second) {
+ DbgValueHistoryMap::Entry &Ent = DbgValues.getEntry(Pair.first, Idx);
+ assert(Ent.isDbgValue() && !Ent.isClosed());
+ Ent.endEntry(ClobIdx);
+ }
}
+
+ LiveEntries.clear();
+ RegVars.clear();
}
}
}
@@ -289,7 +339,7 @@ LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
dbgs() << "DbgValueHistoryMap:\n";
for (const auto &VarRangePair : *this) {
const InlinedEntity &Var = VarRangePair.first;
- const InstrRanges &Ranges = VarRangePair.second;
+ const Entries &Entries = VarRangePair.second;
const DILocalVariable *LocalVar = cast<DILocalVariable>(Var.first);
const DILocation *Location = Var.second;
@@ -304,10 +354,20 @@ LLVM_DUMP_METHOD void DbgValueHistoryMap::dump() const {
dbgs() << " --\n";
- for (const InstrRange &Range : Ranges) {
- dbgs() << " Begin: " << *Range.first;
- if (Range.second)
- dbgs() << " End : " << *Range.second;
+ for (const auto &E : enumerate(Entries)) {
+ const auto &Entry = E.value();
+ dbgs() << " Entry[" << E.index() << "]: ";
+ if (Entry.isDbgValue())
+ dbgs() << "Debug value\n";
+ else
+ dbgs() << "Clobber\n";
+ dbgs() << " Instr: " << *Entry.getInstr();
+ if (Entry.isDbgValue()) {
+ if (Entry.getEndIndex() == NoEntry)
+ dbgs() << " - Valid until end of function\n";
+ else
+ dbgs() << " - Closed by Entry[" << Entry.getEndIndex() << "]\n";
+ }
dbgs() << "\n";
}
}
diff --git a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
index 551cd36d1984..22f458e4b03e 100644
--- a/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp
@@ -1,9 +1,8 @@
//===-- llvm/lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp -------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -141,10 +140,9 @@ DebugHandlerBase::getFunctionLocalOffsetAfterInsn(const MachineInstr *MI) {
}
/// If this type is derived from a base type then return base type size.
-uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
- DIType *Ty = TyRef.resolve();
+uint64_t DebugHandlerBase::getBaseTypeSize(const DIType *Ty) {
assert(Ty);
- DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty);
+ const DIDerivedType *DDTy = dyn_cast<DIDerivedType>(Ty);
if (!DDTy)
return Ty->getSizeInBits();
@@ -155,7 +153,7 @@ uint64_t DebugHandlerBase::getBaseTypeSize(const DITypeRef TyRef) {
Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_atomic_type)
return DDTy->getSizeInBits();
- DIType *BaseType = DDTy->getBaseType().resolve();
+ DIType *BaseType = DDTy->getBaseType();
if (!BaseType)
return 0;
@@ -212,36 +210,58 @@ void DebugHandlerBase::beginFunction(const MachineFunction *MF) {
// Request labels for the full history.
for (const auto &I : DbgValues) {
- const auto &Ranges = I.second;
- if (Ranges.empty())
+ const auto &Entries = I.second;
+ if (Entries.empty())
continue;
- // The first mention of a function argument gets the CurrentFnBegin
- // label, so arguments are visible when breaking at function entry.
- const DILocalVariable *DIVar = Ranges.front().first->getDebugVariable();
+ auto IsDescribedByReg = [](const MachineInstr *MI) {
+ return MI->getOperand(0).isReg() && MI->getOperand(0).getReg();
+ };
+
+ // The first mention of a function argument gets the CurrentFnBegin label,
+ // so arguments are visible when breaking at function entry.
+ //
+ // We do not change the label for values that are described by registers,
+ // as that could place them above their defining instructions. We should
+ // ideally not change the labels for constant debug values either, since
+ // doing that violates the ranges that are calculated in the history map.
+ // However, we currently do not emit debug values for constant arguments
+ // directly at the start of the function, so this code is still useful.
+ const DILocalVariable *DIVar =
+ Entries.front().getInstr()->getDebugVariable();
if (DIVar->isParameter() &&
getDISubprogram(DIVar->getScope())->describes(&MF->getFunction())) {
- LabelsBeforeInsn[Ranges.front().first] = Asm->getFunctionBegin();
- if (Ranges.front().first->getDebugExpression()->isFragment()) {
+ if (!IsDescribedByReg(Entries.front().getInstr()))
+ LabelsBeforeInsn[Entries.front().getInstr()] = Asm->getFunctionBegin();
+ if (Entries.front().getInstr()->getDebugExpression()->isFragment()) {
// Mark all non-overlapping initial fragments.
- for (auto I = Ranges.begin(); I != Ranges.end(); ++I) {
- const DIExpression *Fragment = I->first->getDebugExpression();
- if (std::all_of(Ranges.begin(), I,
- [&](DbgValueHistoryMap::InstrRange Pred) {
- return !Fragment->fragmentsOverlap(
- Pred.first->getDebugExpression());
+ for (auto I = Entries.begin(); I != Entries.end(); ++I) {
+ if (!I->isDbgValue())
+ continue;
+ const DIExpression *Fragment = I->getInstr()->getDebugExpression();
+ if (std::any_of(Entries.begin(), I,
+ [&](DbgValueHistoryMap::Entry Pred) {
+ return Pred.isDbgValue() &&
+ Fragment->fragmentsOverlap(
+ Pred.getInstr()->getDebugExpression());
}))
- LabelsBeforeInsn[I->first] = Asm->getFunctionBegin();
- else
break;
+ // The code that generates location lists for DWARF assumes that the
+ // entries' start labels are monotonically increasing, and since we
+ // don't change the label for fragments that are described by
+ // registers, we must bail out when encountering such a fragment.
+ if (IsDescribedByReg(I->getInstr()))
+ break;
+ LabelsBeforeInsn[I->getInstr()] = Asm->getFunctionBegin();
}
}
}
- for (const auto &Range : Ranges) {
- requestLabelBeforeInsn(Range.first);
- if (Range.second)
- requestLabelAfterInsn(Range.second);
+ for (const auto &Entry : Entries) {
+ if (Entry.isDbgValue())
+ requestLabelBeforeInsn(Entry.getInstr());
+ else
+ requestLabelAfterInsn(Entry.getInstr());
}
}
diff --git a/lib/CodeGen/AsmPrinter/DebugLocEntry.h b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
index befa4b941c8d..17e39b3d3268 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocEntry.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocEntry.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DebugLocEntry.h - Entry in debug_loc list -*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -21,6 +20,73 @@
namespace llvm {
class AsmPrinter;
+/// A single location or constant.
+class DbgValueLoc {
+ /// Any complex address location expression for this DbgValueLoc.
+ const DIExpression *Expression;
+
+ /// Type of entry that this represents.
+ enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
+ enum EntryType EntryKind;
+
+ /// Either a constant,
+ union {
+ int64_t Int;
+ const ConstantFP *CFP;
+ const ConstantInt *CIP;
+ } Constant;
+
+ /// Or a location in the machine frame.
+ MachineLocation Loc;
+
+public:
+ DbgValueLoc(const DIExpression *Expr, int64_t i)
+ : Expression(Expr), EntryKind(E_Integer) {
+ Constant.Int = i;
+ }
+ DbgValueLoc(const DIExpression *Expr, const ConstantFP *CFP)
+ : Expression(Expr), EntryKind(E_ConstantFP) {
+ Constant.CFP = CFP;
+ }
+ DbgValueLoc(const DIExpression *Expr, const ConstantInt *CIP)
+ : Expression(Expr), EntryKind(E_ConstantInt) {
+ Constant.CIP = CIP;
+ }
+ DbgValueLoc(const DIExpression *Expr, MachineLocation Loc)
+ : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
+ assert(cast<DIExpression>(Expr)->isValid());
+ }
+
+ bool isLocation() const { return EntryKind == E_Location; }
+ bool isInt() const { return EntryKind == E_Integer; }
+ bool isConstantFP() const { return EntryKind == E_ConstantFP; }
+ bool isConstantInt() const { return EntryKind == E_ConstantInt; }
+ int64_t getInt() const { return Constant.Int; }
+ const ConstantFP *getConstantFP() const { return Constant.CFP; }
+ const ConstantInt *getConstantInt() const { return Constant.CIP; }
+ MachineLocation getLoc() const { return Loc; }
+ bool isFragment() const { return getExpression()->isFragment(); }
+ bool isEntryVal() const { return getExpression()->isEntryValue(); }
+ const DIExpression *getExpression() const { return Expression; }
+ friend bool operator==(const DbgValueLoc &, const DbgValueLoc &);
+ friend bool operator<(const DbgValueLoc &, const DbgValueLoc &);
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+ LLVM_DUMP_METHOD void dump() const {
+ if (isLocation()) {
+ llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
+ if (Loc.isIndirect())
+ llvm::dbgs() << "+0";
+ llvm::dbgs() << "} ";
+ } else if (isConstantInt())
+ Constant.CIP->dump();
+ else if (isConstantFP())
+ Constant.CFP->dump();
+ if (Expression)
+ Expression->dump();
+ }
+#endif
+};
+
/// This struct describes location entries emitted in the .debug_loc
/// section.
class DebugLocEntry {
@@ -28,90 +94,20 @@ class DebugLocEntry {
const MCSymbol *Begin;
const MCSymbol *End;
-public:
- /// A single location or constant.
- struct Value {
- Value(const DIExpression *Expr, int64_t i)
- : Expression(Expr), EntryKind(E_Integer) {
- Constant.Int = i;
- }
- Value(const DIExpression *Expr, const ConstantFP *CFP)
- : Expression(Expr), EntryKind(E_ConstantFP) {
- Constant.CFP = CFP;
- }
- Value(const DIExpression *Expr, const ConstantInt *CIP)
- : Expression(Expr), EntryKind(E_ConstantInt) {
- Constant.CIP = CIP;
- }
- Value(const DIExpression *Expr, MachineLocation Loc)
- : Expression(Expr), EntryKind(E_Location), Loc(Loc) {
- assert(cast<DIExpression>(Expr)->isValid());
- }
-
- /// Any complex address location expression for this Value.
- const DIExpression *Expression;
-
- /// Type of entry that this represents.
- enum EntryType { E_Location, E_Integer, E_ConstantFP, E_ConstantInt };
- enum EntryType EntryKind;
-
- /// Either a constant,
- union {
- int64_t Int;
- const ConstantFP *CFP;
- const ConstantInt *CIP;
- } Constant;
-
- // Or a location in the machine frame.
- MachineLocation Loc;
-
- bool isLocation() const { return EntryKind == E_Location; }
- bool isInt() const { return EntryKind == E_Integer; }
- bool isConstantFP() const { return EntryKind == E_ConstantFP; }
- bool isConstantInt() const { return EntryKind == E_ConstantInt; }
- int64_t getInt() const { return Constant.Int; }
- const ConstantFP *getConstantFP() const { return Constant.CFP; }
- const ConstantInt *getConstantInt() const { return Constant.CIP; }
- MachineLocation getLoc() const { return Loc; }
- bool isFragment() const { return getExpression()->isFragment(); }
- const DIExpression *getExpression() const { return Expression; }
- friend bool operator==(const Value &, const Value &);
- friend bool operator<(const Value &, const Value &);
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const {
- if (isLocation()) {
- llvm::dbgs() << "Loc = { reg=" << Loc.getReg() << " ";
- if (Loc.isIndirect())
- llvm::dbgs() << "+0";
- llvm::dbgs() << "} ";
- }
- else if (isConstantInt())
- Constant.CIP->dump();
- else if (isConstantFP())
- Constant.CFP->dump();
- if (Expression)
- Expression->dump();
- }
-#endif
- };
-
-private:
/// A nonempty list of locations/constants belonging to this entry,
/// sorted by offset.
- SmallVector<Value, 1> Values;
+ SmallVector<DbgValueLoc, 1> Values;
public:
- DebugLocEntry(const MCSymbol *B, const MCSymbol *E, Value Val)
- : Begin(B), End(E) {
- Values.push_back(std::move(Val));
+ /// Create a location list entry for the range [\p Begin, \p End).
+ ///
+ /// \param Vals One or more values describing (parts of) the variable.
+ DebugLocEntry(const MCSymbol *Begin, const MCSymbol *End,
+ ArrayRef<DbgValueLoc> Vals)
+ : Begin(Begin), End(End) {
+ addValues(Vals);
}
- /// If this and Next are describing different pieces of the same
- /// variable, merge them by appending Next's values to the current
- /// list of values.
- /// Return true if the merge was successful.
- bool MergeValues(const DebugLocEntry &Next);
-
/// Attempt to merge this DebugLocEntry with Next and return
/// true if the merge was successful. Entries can be merged if they
/// share the same Loc/Constant and if Next immediately follows this
@@ -127,35 +123,36 @@ public:
const MCSymbol *getBeginSym() const { return Begin; }
const MCSymbol *getEndSym() const { return End; }
- ArrayRef<Value> getValues() const { return Values; }
- void addValues(ArrayRef<DebugLocEntry::Value> Vals) {
+ ArrayRef<DbgValueLoc> getValues() const { return Values; }
+ void addValues(ArrayRef<DbgValueLoc> Vals) {
Values.append(Vals.begin(), Vals.end());
sortUniqueValues();
- assert(all_of(Values, [](DebugLocEntry::Value V) {
- return V.isFragment();
- }) && "value must be a piece");
+ assert((Values.size() == 1 || all_of(Values, [](DbgValueLoc V) {
+ return V.isFragment();
+ })) && "must either have a single value or multiple pieces");
}
// Sort the pieces by offset.
// Remove any duplicate entries by dropping all but the first.
void sortUniqueValues() {
llvm::sort(Values);
- Values.erase(
- std::unique(
- Values.begin(), Values.end(), [](const Value &A, const Value &B) {
- return A.getExpression() == B.getExpression();
- }),
- Values.end());
+ Values.erase(std::unique(Values.begin(), Values.end(),
+ [](const DbgValueLoc &A, const DbgValueLoc &B) {
+ return A.getExpression() == B.getExpression();
+ }),
+ Values.end());
}
/// Lower this entry into a DWARF expression.
- void finalize(const AsmPrinter &AP, DebugLocStream::ListBuilder &List,
- const DIBasicType *BT);
+ void finalize(const AsmPrinter &AP,
+ DebugLocStream::ListBuilder &List,
+ const DIBasicType *BT,
+ DwarfCompileUnit &TheCU);
};
-/// Compare two Values for equality.
-inline bool operator==(const DebugLocEntry::Value &A,
- const DebugLocEntry::Value &B) {
+/// Compare two DbgValueLocs for equality.
+inline bool operator==(const DbgValueLoc &A,
+ const DbgValueLoc &B) {
if (A.EntryKind != B.EntryKind)
return false;
@@ -163,21 +160,21 @@ inline bool operator==(const DebugLocEntry::Value &A,
return false;
switch (A.EntryKind) {
- case DebugLocEntry::Value::E_Location:
+ case DbgValueLoc::E_Location:
return A.Loc == B.Loc;
- case DebugLocEntry::Value::E_Integer:
+ case DbgValueLoc::E_Integer:
return A.Constant.Int == B.Constant.Int;
- case DebugLocEntry::Value::E_ConstantFP:
+ case DbgValueLoc::E_ConstantFP:
return A.Constant.CFP == B.Constant.CFP;
- case DebugLocEntry::Value::E_ConstantInt:
+ case DbgValueLoc::E_ConstantInt:
return A.Constant.CIP == B.Constant.CIP;
}
llvm_unreachable("unhandled EntryKind");
}
/// Compare two fragments based on their offset.
-inline bool operator<(const DebugLocEntry::Value &A,
- const DebugLocEntry::Value &B) {
+inline bool operator<(const DbgValueLoc &A,
+ const DbgValueLoc &B) {
return A.getExpression()->getFragmentInfo()->OffsetInBits <
B.getExpression()->getFragmentInfo()->OffsetInBits;
}
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.cpp b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
index 7e8ed7104af3..f483d532ff07 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.cpp
@@ -1,9 +1,8 @@
//===- DebugLocStream.cpp - DWARF debug_loc stream --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 8dcf5cbc1889..789291771b5a 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -1,9 +1,8 @@
//===--- lib/CodeGen/DebugLocStream.h - DWARF debug_loc stream --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 1990456cc555..207a7284dafa 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 1dca3f0fce5b..9548ad9918c1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfCompileUnit.cpp - Dwarf Compile Units ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "DwarfUnit.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -104,7 +104,7 @@ unsigned DwarfCompileUnit::getOrCreateSourceID(const DIFile *File) {
// extend .file to support this.
unsigned CUID = Asm->OutStreamer->hasRawTextSupport() ? 0 : getUniqueID();
if (!File)
- return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", nullptr, None, CUID);
+ return Asm->OutStreamer->EmitDwarfFileDirective(0, "", "", None, None, CUID);
return Asm->OutStreamer->EmitDwarfFileDirective(
0, File->getDirectory(), File->getFilename(), getMD5AsBytes(File),
File->getSource(), CUID);
@@ -119,17 +119,19 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
assert(GV);
auto *GVContext = GV->getScope();
- auto *GTy = DD->resolve(GV->getType());
+ const DIType *GTy = GV->getType();
// Construct the context before querying for the existence of the DIE in
// case such construction creates the DIE.
- DIE *ContextDIE = getOrCreateContextDIE(GVContext);
+ auto *CB = GVContext ? dyn_cast<DICommonBlock>(GVContext) : nullptr;
+ DIE *ContextDIE = CB ? getOrCreateCommonBlock(CB, GlobalExprs)
+ : getOrCreateContextDIE(GVContext);
// Add to map.
DIE *VariableDIE = &createAndAddDIE(GV->getTag(), *ContextDIE, GV);
DIScope *DeclContext;
if (auto *SDMDecl = GV->getStaticDataMemberDeclaration()) {
- DeclContext = resolve(SDMDecl->getScope());
+ DeclContext = SDMDecl->getScope();
assert(SDMDecl->isStaticMember() && "Expected static member decl");
assert(GV->isDefinition());
// We need the declaration DIE that is in the static member's class.
@@ -137,7 +139,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
addDIEEntry(*VariableDIE, dwarf::DW_AT_specification, *VariableSpecDIE);
// If the global variable's type is different from the one in the class
// member type, assume that it's more specific and also emit it.
- if (GTy != DD->resolve(SDMDecl->getBaseType()))
+ if (GTy != SDMDecl->getBaseType())
addType(*VariableDIE, GTy);
} else {
DeclContext = GV->getScope();
@@ -166,8 +168,16 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
addTemplateParams(*VariableDIE, DINodeArray(TP));
// Add location.
+ addLocationAttribute(VariableDIE, GV, GlobalExprs);
+
+ return VariableDIE;
+}
+
+void DwarfCompileUnit::addLocationAttribute(
+ DIE *VariableDIE, const DIGlobalVariable *GV, ArrayRef<GlobalExpr> GlobalExprs) {
bool addToAccelTable = false;
DIELoc *Loc = nullptr;
+ Optional<unsigned> NVPTXAddressSpace;
std::unique_ptr<DIEDwarfExpression> DwarfExpr;
for (const auto &GE : GlobalExprs) {
const GlobalVariable *Global = GE.Var;
@@ -201,8 +211,24 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
}
- if (Expr)
+ if (Expr) {
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+ // sequence for the NVPTX + gdb target.
+ unsigned LocalNVPTXAddressSpace;
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ const DIExpression *NewExpr =
+ DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+ if (NewExpr != Expr) {
+ Expr = NewExpr;
+ NVPTXAddressSpace = LocalNVPTXAddressSpace;
+ }
+ }
DwarfExpr->addFragmentOffset(Expr);
+ }
if (Global) {
const MCSymbol *Sym = Asm->getSymbol(Global);
@@ -247,6 +273,15 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DwarfExpr->setMemoryLocationKind();
DwarfExpr->addExpression(Expr);
}
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ const unsigned NVPTX_ADDR_global_space = 5;
+ addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+ NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space);
+ }
if (Loc)
addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
@@ -262,8 +297,25 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
DD->useAllLinkageNames())
DD->addAccelName(*CUNode, GV->getLinkageName(), *VariableDIE);
}
+}
- return VariableDIE;
+DIE *DwarfCompileUnit::getOrCreateCommonBlock(
+ const DICommonBlock *CB, ArrayRef<GlobalExpr> GlobalExprs) {
+ // Construct the context before querying for the existence of the DIE in case
+ // such construction creates the DIE.
+ DIE *ContextDIE = getOrCreateContextDIE(CB->getScope());
+
+ if (DIE *NDie = getDIE(CB))
+ return NDie;
+ DIE &NDie = createAndAddDIE(dwarf::DW_TAG_common_block, *ContextDIE, CB);
+ StringRef Name = CB->getName().empty() ? "_BLNK_" : CB->getName();
+ addString(NDie, dwarf::DW_AT_name, Name);
+ addGlobalName(Name, NDie, CB->getScope());
+ if (CB->getFile())
+ addSourceLine(NDie, CB->getLineNo(), CB->getFile());
+ if (DIGlobalVariable *V = CB->getDecl())
+ getCU().addLocationAttribute(&NDie, V, GlobalExprs);
+ return &NDie;
}
void DwarfCompileUnit::addRange(RangeSpan Range) {
@@ -491,6 +543,8 @@ DIE *DwarfCompileUnit::constructInlinedScopeDIE(LexicalScope *Scope) {
addUInt(*ScopeDIE, dwarf::DW_AT_call_file, None,
getOrCreateSourceID(IA->getFile()));
addUInt(*ScopeDIE, dwarf::DW_AT_call_line, None, IA->getLine());
+ if (IA->getColumn())
+ addUInt(*ScopeDIE, dwarf::DW_AT_call_column, None, IA->getColumn());
if (IA->getDiscriminator() && DD->getDwarfVersion() >= 4)
addUInt(*ScopeDIE, dwarf::DW_AT_GNU_discriminator, None,
IA->getDiscriminator());
@@ -555,36 +609,27 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
return VariableDie;
}
- // Check if variable is described by a DBG_VALUE instruction.
- if (const MachineInstr *DVInsn = DV.getMInsn()) {
- assert(DVInsn->getNumOperands() == 4);
- if (DVInsn->getOperand(0).isReg()) {
- auto RegOp = DVInsn->getOperand(0);
- auto Op1 = DVInsn->getOperand(1);
- // If the second operand is an immediate, this is an indirect value.
- assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
- MachineLocation Location(RegOp.getReg(), Op1.isImm());
- addVariableAddress(DV, *VariableDie, Location);
- } else if (DVInsn->getOperand(0).isImm()) {
- // This variable is described by a single constant.
- // Check whether it has a DIExpression.
+ // Check if variable has a single location description.
+ if (auto *DVal = DV.getValueLoc()) {
+ if (DVal->isLocation())
+ addVariableAddress(DV, *VariableDie, DVal->getLoc());
+ else if (DVal->isInt()) {
auto *Expr = DV.getSingleExpression();
if (Expr && Expr->getNumElements()) {
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
// If there is an expression, emit raw unsigned bytes.
DwarfExpr.addFragmentOffset(Expr);
- DwarfExpr.addUnsignedConstant(DVInsn->getOperand(0).getImm());
+ DwarfExpr.addUnsignedConstant(DVal->getInt());
DwarfExpr.addExpression(Expr);
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
} else
- addConstantValue(*VariableDie, DVInsn->getOperand(0), DV.getType());
- } else if (DVInsn->getOperand(0).isFPImm())
- addConstantFPValue(*VariableDie, DVInsn->getOperand(0));
- else if (DVInsn->getOperand(0).isCImm())
- addConstantValue(*VariableDie, DVInsn->getOperand(0).getCImm(),
- DV.getType());
-
+ addConstantValue(*VariableDie, DVal->getInt(), DV.getType());
+ } else if (DVal->isConstantFP()) {
+ addConstantFPValue(*VariableDie, DVal->getConstantFP());
+ } else if (DVal->isConstantInt()) {
+ addConstantValue(*VariableDie, DVal->getConstantInt(), DV.getType());
+ }
return VariableDie;
}
@@ -592,6 +637,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
if (!DV.hasFrameIndexExprs())
return VariableDie;
+ Optional<unsigned> NVPTXAddressSpace;
DIELoc *Loc = new (DIEValueAllocator) DIELoc;
DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
for (auto &Fragment : DV.getFrameIndexExprs()) {
@@ -603,7 +649,23 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
SmallVector<uint64_t, 8> Ops;
Ops.push_back(dwarf::DW_OP_plus_uconst);
Ops.push_back(Offset);
- Ops.append(Expr->elements_begin(), Expr->elements_end());
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+ // sequence for the NVPTX + gdb target.
+ unsigned LocalNVPTXAddressSpace;
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ const DIExpression *NewExpr =
+ DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+ if (NewExpr != Expr) {
+ Expr = NewExpr;
+ NVPTXAddressSpace = LocalNVPTXAddressSpace;
+ }
+ }
+ if (Expr)
+ Ops.append(Expr->elements_begin(), Expr->elements_end());
DIExpressionCursor Cursor(Ops);
DwarfExpr.setMemoryLocationKind();
if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol())
@@ -613,7 +675,19 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
*Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
DwarfExpr.addExpression(std::move(Cursor));
}
+ if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+ // According to
+ // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+ // cuda-gdb requires DW_AT_address_class for all variables to be able to
+ // correctly interpret address space of the variable address.
+ const unsigned NVPTX_ADDR_local_space = 6;
+ addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+ NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space);
+ }
addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
+ if (DwarfExpr.TagOffset)
+ addUInt(*VariableDie, dwarf::DW_AT_LLVM_tag_offset, dwarf::DW_FORM_data1,
+ *DwarfExpr.TagOffset);
return VariableDie;
}
@@ -800,7 +874,7 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
ContextDIE = &getUnitDie();
getOrCreateSubprogramDIE(SPDecl);
} else {
- ContextDIE = getOrCreateContextDIE(resolve(SP->getScope()));
+ ContextDIE = getOrCreateContextDIE(SP->getScope());
// The scope may be shared with a subprogram that has already been
// constructed in another CU, in which case we need to construct this
// subprogram in the same CU.
@@ -849,7 +923,7 @@ DIE *DwarfCompileUnit::constructImportedEntityDIE(
DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
insertDIE(Module, IMDie);
DIE *EntityDie;
- auto *Entity = resolve(Module->getEntity());
+ auto *Entity = Module->getEntity();
if (auto *NS = dyn_cast<DINamespace>(Entity))
EntityDie = getOrCreateNameSpace(NS);
else if (auto *M = dyn_cast<DIModule>(Entity))
@@ -958,7 +1032,9 @@ bool DwarfCompileUnit::hasDwarfPubSections() const {
return true;
case DICompileUnit::DebugNameTableKind::Default:
return DD->tuneForGDB() && !includeMinimalInlineScopes() &&
- !CUNode->isDebugDirectivesOnly();
+ !CUNode->isDebugDirectivesOnly() &&
+ DD->getAccelTableKind() != AccelTableKind::Apple &&
+ DD->getDwarfVersion() < 5;
}
llvm_unreachable("Unhandled DICompileUnit::DebugNameTableKind enum");
}
@@ -1054,6 +1130,12 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
DwarfExpr.setMemoryLocationKind();
DIExpressionCursor Cursor(DIExpr);
+
+ if (DIExpr->isEntryValue()) {
+ DwarfExpr.setEntryValueFlag();
+ DwarfExpr.addEntryValueExpression(Cursor);
+ }
+
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
@@ -1112,7 +1194,7 @@ void DwarfCompileUnit::addAddressExpr(DIE &Die, dwarf::Attribute Attribute,
void DwarfCompileUnit::applySubprogramAttributesToDefinition(
const DISubprogram *SP, DIE &SPDie) {
auto *SPDecl = SP->getDeclaration();
- auto *Context = resolve(SPDecl ? SPDecl->getScope() : SP->getScope());
+ auto *Context = SPDecl ? SPDecl->getScope() : SP->getScope();
applySubprogramAttributes(SP, SPDie, includeMinimalInlineScopes());
addGlobalName(SP->getName(), SPDie, Context);
}
@@ -1121,6 +1203,10 @@ bool DwarfCompileUnit::isDwoUnit() const {
return DD->useSplitDwarf() && Skeleton;
}
+void DwarfCompileUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
+ constructTypeDIE(D, CTy);
+}
+
bool DwarfCompileUnit::includeMinimalInlineScopes() const {
return getCUNode()->getEmissionKind() == DICompileUnit::LineTablesOnly ||
(DD->useSplitDwarf() && !Skeleton);
@@ -1134,3 +1220,27 @@ void DwarfCompileUnit::addAddrTableBase() {
: dwarf::DW_AT_GNU_addr_base,
Label, TLOF.getDwarfAddrSection()->getBeginSymbol());
}
+
+void DwarfCompileUnit::addBaseTypeRef(DIEValueList &Die, int64_t Idx) {
+ Die.addValue(DIEValueAllocator, (dwarf::Attribute)0, dwarf::DW_FORM_udata,
+ new (DIEValueAllocator) DIEBaseTypeRef(this, Idx));
+}
+
+void DwarfCompileUnit::createBaseTypeDIEs() {
+ // Insert the base_type DIEs directly after the CU so that their offsets will
+ // fit in the fixed size ULEB128 used inside the location expressions.
+ // Maintain order by iterating backwards and inserting to the front of CU
+ // child list.
+ for (auto &Btr : reverse(ExprRefedBaseTypes)) {
+ DIE &Die = getUnitDie().addChildFront(
+ DIE::get(DIEValueAllocator, dwarf::DW_TAG_base_type));
+ SmallString<32> Str;
+ addString(Die, dwarf::DW_AT_name,
+ Twine(dwarf::AttributeEncodingString(Btr.Encoding) +
+ "_" + Twine(Btr.BitSize)).toStringRef(Str));
+ addUInt(Die, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, Btr.Encoding);
+ addUInt(Die, dwarf::DW_AT_byte_size, None, Btr.BitSize / 8);
+
+ Btr.Die = &Die;
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index 9ec22f68c12f..ea980dfda17e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfCompileUnit.h - Dwarf Compile Unit -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -101,6 +100,8 @@ class DwarfCompileUnit final : public DwarfUnit {
return DU->getAbstractEntities();
}
+ void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
+
public:
DwarfCompileUnit(unsigned UID, const DICompileUnit *Node, AsmPrinter *A,
DwarfDebug *DW, DwarfFile *DWU);
@@ -125,11 +126,27 @@ public:
const DIExpression *Expr;
};
+ struct BaseTypeRef {
+ BaseTypeRef(unsigned BitSize, dwarf::TypeKind Encoding) :
+ BitSize(BitSize), Encoding(Encoding) {}
+ unsigned BitSize;
+ dwarf::TypeKind Encoding;
+ DIE *Die = nullptr;
+ };
+
+ std::vector<BaseTypeRef> ExprRefedBaseTypes;
+
/// Get or create global variable DIE.
DIE *
getOrCreateGlobalVariableDIE(const DIGlobalVariable *GV,
ArrayRef<GlobalExpr> GlobalExprs);
+ DIE *getOrCreateCommonBlock(const DICommonBlock *CB,
+ ArrayRef<GlobalExpr> GlobalExprs);
+
+ void addLocationAttribute(DIE *ToDIE, const DIGlobalVariable *GV,
+ ArrayRef<GlobalExpr> GlobalExprs);
+
/// addLabelAddress - Add a dwarf label attribute data and value using
/// either DW_FORM_addr or DW_FORM_GNU_addr_index.
void addLabelAddress(DIE &Die, dwarf::Attribute Attribute,
@@ -200,6 +217,8 @@ public:
SmallVectorImpl<DIE *> &Children,
bool *HasNonScopeChildren = nullptr);
+ void createBaseTypeDIEs();
+
/// Construct a DIE for this subprogram scope.
DIE &constructSubprogramScopeDIE(const DISubprogram *Sub,
LexicalScope *Scope);
@@ -314,6 +333,8 @@ public:
void setDWOId(uint64_t DwoId) { DWOId = DwoId; }
bool hasDwarfPubSections() const;
+
+ void addBaseTypeRef(DIEValueList &Die, int64_t Idx);
};
} // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1de2ffb6cfa1..71bb2b0858cc 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfDebug.cpp - Dwarf Debug Framework ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,8 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/DebugInfo/DWARF/DWARFDataExtractor.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
@@ -162,6 +163,7 @@ static const char *const DWARFGroupName = "dwarf";
static const char *const DWARFGroupDescription = "DWARF Emission";
static const char *const DbgTimerName = "writer";
static const char *const DbgTimerDescription = "DWARF Debug Writer";
+static constexpr unsigned ULEB128PadSize = 4;
void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
BS.EmitInt8(
@@ -177,6 +179,15 @@ void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
BS.EmitULEB128(Value, Twine(Value));
}
+void DebugLocDwarfExpression::emitData1(uint8_t Value) {
+ BS.EmitInt8(Value, Twine(Value));
+}
+
+void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
+ assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit");
+ BS.EmitULEB128(Idx, Twine(Idx), ULEB128PadSize);
+}
+
bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) {
// This information is not available while emitting .debug_loc entries.
@@ -185,11 +196,11 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
bool DbgVariable::isBlockByrefVariable() const {
assert(getVariable() && "Invalid complex DbgVariable!");
- return getVariable()->getType().resolve()->isBlockByrefStruct();
+ return getVariable()->getType()->isBlockByrefStruct();
}
const DIType *DbgVariable::getType() const {
- DIType *Ty = getVariable()->getType().resolve();
+ DIType *Ty = getVariable()->getType();
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
// addresses instead.
if (Ty->isBlockByrefStruct()) {
@@ -221,18 +232,55 @@ const DIType *DbgVariable::getType() const {
uint16_t tag = Ty->getTag();
if (tag == dwarf::DW_TAG_pointer_type)
- subType = resolve(cast<DIDerivedType>(Ty)->getBaseType());
+ subType = cast<DIDerivedType>(Ty)->getBaseType();
auto Elements = cast<DICompositeType>(subType)->getElements();
for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
auto *DT = cast<DIDerivedType>(Elements[i]);
if (getName() == DT->getName())
- return resolve(DT->getBaseType());
+ return DT->getBaseType();
}
}
return Ty;
}
+/// Get .debug_loc entry for the instruction range starting at MI.
+static DbgValueLoc getDebugLocValue(const MachineInstr *MI) {
+ const DIExpression *Expr = MI->getDebugExpression();
+ assert(MI->getNumOperands() == 4);
+ if (MI->getOperand(0).isReg()) {
+ auto RegOp = MI->getOperand(0);
+ auto Op1 = MI->getOperand(1);
+ // If the second operand is an immediate, this is a
+ // register-indirect address.
+ assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
+ MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
+ return DbgValueLoc(Expr, MLoc);
+ }
+ if (MI->getOperand(0).isImm())
+ return DbgValueLoc(Expr, MI->getOperand(0).getImm());
+ if (MI->getOperand(0).isFPImm())
+ return DbgValueLoc(Expr, MI->getOperand(0).getFPImm());
+ if (MI->getOperand(0).isCImm())
+ return DbgValueLoc(Expr, MI->getOperand(0).getCImm());
+
+ llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
+}
+
+void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
+ assert(FrameIndexExprs.empty() && "Already initialized?");
+ assert(!ValueLoc.get() && "Already initialized?");
+
+ assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
+ assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
+ "Wrong inlined-at");
+
+ ValueLoc = llvm::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
+ if (auto *E = DbgValue->getDebugExpression())
+ if (E->getNumElements())
+ FrameIndexExprs.push_back({0, E});
+}
+
ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
if (FrameIndexExprs.size() == 1)
return FrameIndexExprs;
@@ -252,8 +300,8 @@ ArrayRef<DbgVariable::FrameIndexExpr> DbgVariable::getFrameIndexExprs() const {
}
void DbgVariable::addMMIEntry(const DbgVariable &V) {
- assert(DebugLocListIndex == ~0U && !MInsn && "not an MMI entry");
- assert(V.DebugLocListIndex == ~0U && !V.MInsn && "not an MMI entry");
+ assert(DebugLocListIndex == ~0U && !ValueLoc.get() && "not an MMI entry");
+ assert(V.DebugLocListIndex == ~0U && !V.ValueLoc.get() && "not an MMI entry");
assert(V.getVariable() == getVariable() && "conflicting variable");
assert(V.getInlinedAt() == getInlinedAt() && "conflicting inlined-at location");
@@ -315,7 +363,7 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M)
IsDarwin(A->TM.getTargetTriple().isOSDarwin()) {
const Triple &TT = Asm->TM.getTargetTriple();
- // Make sure we know our "debugger tuning." The target option takes
+ // Make sure we know our "debugger tuning". The target option takes
// precedence; fall back to triple-based defaults.
if (Asm->TM.Options.DebuggerTuning != DebuggerKind::Default)
DebuggerTuning = Asm->TM.Options.DebuggerTuning;
@@ -658,6 +706,11 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
}
+ // Create DIEs for function declarations used for call site debug info.
+ for (auto Scope : DIUnit->getRetainedTypes())
+ if (auto *SP = dyn_cast_or_null<DISubprogram>(Scope))
+ NewCU.getOrCreateSubprogramDIE(SP);
+
CUMap.insert({DIUnit, &NewCU});
CUDieMap.insert({&NewCU.getUnitDie(), &NewCU});
return NewCU;
@@ -890,13 +943,6 @@ void DwarfDebug::finalizeModuleInfo() {
// ranges for all subprogram DIEs for mach-o.
DwarfCompileUnit &U = SkCU ? *SkCU : TheCU;
- // We don't keep track of which addresses are used in which CU so this
- // is a bit pessimistic under LTO.
- if (!AddrPool.isEmpty() &&
- (getDwarfVersion() >= 5 ||
- (SkCU && !empty(TheCU.getUnitDie().children()))))
- U.addAddrTableBase();
-
if (unsigned NumRanges = TheCU.getRanges().size()) {
if (NumRanges > 1 && useRangesSection())
// A DW_AT_low_pc attribute may also be specified in combination with
@@ -909,6 +955,13 @@ void DwarfDebug::finalizeModuleInfo() {
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
+ // We don't keep track of which addresses are used in which CU so this
+ // is a bit pessimistic under LTO.
+ if (!AddrPool.isEmpty() &&
+ (getDwarfVersion() >= 5 ||
+ (SkCU && !empty(TheCU.getUnitDie().children()))))
+ U.addAddrTableBase();
+
if (getDwarfVersion() >= 5) {
if (U.hasRangeLists())
U.addRnglistsBase();
@@ -941,6 +994,11 @@ void DwarfDebug::endModule() {
assert(CurFn == nullptr);
assert(CurMI == nullptr);
+ for (const auto &P : CUMap) {
+ auto &CU = *P.second;
+ CU.createBaseTypeDIEs();
+ }
+
// If we aren't actually generating debug info (check beginModule -
// conditionalized on !DisableDebugInfoPrinting and the presence of the
// llvm.dbg.cu metadata node)
@@ -1059,161 +1117,177 @@ void DwarfDebug::collectVariableInfoFromMFTable(
}
}
-// Get .debug_loc entry for the instruction range starting at MI.
-static DebugLocEntry::Value getDebugLocValue(const MachineInstr *MI) {
- const DIExpression *Expr = MI->getDebugExpression();
- assert(MI->getNumOperands() == 4);
- if (MI->getOperand(0).isReg()) {
- auto RegOp = MI->getOperand(0);
- auto Op1 = MI->getOperand(1);
- // If the second operand is an immediate, this is a
- // register-indirect address.
- assert((!Op1.isImm() || (Op1.getImm() == 0)) && "unexpected offset");
- MachineLocation MLoc(RegOp.getReg(), Op1.isImm());
- return DebugLocEntry::Value(Expr, MLoc);
- }
- if (MI->getOperand(0).isImm())
- return DebugLocEntry::Value(Expr, MI->getOperand(0).getImm());
- if (MI->getOperand(0).isFPImm())
- return DebugLocEntry::Value(Expr, MI->getOperand(0).getFPImm());
- if (MI->getOperand(0).isCImm())
- return DebugLocEntry::Value(Expr, MI->getOperand(0).getCImm());
-
- llvm_unreachable("Unexpected 4-operand DBG_VALUE instruction!");
-}
+/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
+/// enclosing lexical scope. The check ensures there are no other instructions
+/// in the same lexical scope preceding the DBG_VALUE and that its range is
+/// either open or otherwise rolls off the end of the scope.
+static bool validThroughout(LexicalScopes &LScopes,
+ const MachineInstr *DbgValue,
+ const MachineInstr *RangeEnd) {
+ assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
+ auto MBB = DbgValue->getParent();
+ auto DL = DbgValue->getDebugLoc();
+ auto *LScope = LScopes.findLexicalScope(DL);
+ // Scope doesn't exist; this is a dead DBG_VALUE.
+ if (!LScope)
+ return false;
+ auto &LSRange = LScope->getRanges();
+ if (LSRange.size() == 0)
+ return false;
-/// If this and Next are describing different fragments of the same
-/// variable, merge them by appending Next's values to the current
-/// list of values.
-/// Return true if the merge was successful.
-bool DebugLocEntry::MergeValues(const DebugLocEntry &Next) {
- if (Begin == Next.Begin) {
- auto *FirstExpr = cast<DIExpression>(Values[0].Expression);
- auto *FirstNextExpr = cast<DIExpression>(Next.Values[0].Expression);
- if (!FirstExpr->isFragment() || !FirstNextExpr->isFragment())
+ // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
+ const MachineInstr *LScopeBegin = LSRange.front().first;
+ // Early exit if the lexical scope begins outside of the current block.
+ if (LScopeBegin->getParent() != MBB)
+ return false;
+ MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
+ for (++Pred; Pred != MBB->rend(); ++Pred) {
+ if (Pred->getFlag(MachineInstr::FrameSetup))
+ break;
+ auto PredDL = Pred->getDebugLoc();
+ if (!PredDL || Pred->isMetaInstruction())
+ continue;
+ // Check whether the instruction preceding the DBG_VALUE is in the same
+ // (sub)scope as the DBG_VALUE.
+ if (DL->getScope() == PredDL->getScope())
+ return false;
+ auto *PredScope = LScopes.findLexicalScope(PredDL);
+ if (!PredScope || LScope->dominates(PredScope))
return false;
+ }
- // We can only merge entries if none of the fragments overlap any others.
- // In doing so, we can take advantage of the fact that both lists are
- // sorted.
- for (unsigned i = 0, j = 0; i < Values.size(); ++i) {
- for (; j < Next.Values.size(); ++j) {
- int res = cast<DIExpression>(Values[i].Expression)->fragmentCmp(
- cast<DIExpression>(Next.Values[j].Expression));
- if (res == 0) // The two expressions overlap, we can't merge.
- return false;
- // Values[i] is entirely before Next.Values[j],
- // so go back to the next entry of Values.
- else if (res == -1)
- break;
- // Next.Values[j] is entirely before Values[i], so go on to the
- // next entry of Next.Values.
- }
- }
+ // If the range of the DBG_VALUE is open-ended, report success.
+ if (!RangeEnd)
+ return true;
- addValues(Next.Values);
- End = Next.End;
+ // Fail if there are instructions belonging to our scope in another block.
+ const MachineInstr *LScopeEnd = LSRange.back().second;
+ if (LScopeEnd->getParent() != MBB)
+ return false;
+
+ // Single, constant DBG_VALUEs in the prologue are promoted to be live
+ // throughout the function. This is a hack, presumably for DWARF v2 and not
+ // necessarily correct. It would be much better to use a dbg.declare instead
+ // if we know the constant is live throughout the scope.
+ if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
return true;
- }
+
return false;
}
/// Build the location list for all DBG_VALUEs in the function that
-/// describe the same variable. If the ranges of several independent
-/// fragments of the same variable overlap partially, split them up and
-/// combine the ranges. The resulting DebugLocEntries are will have
+/// describe the same variable. The resulting DebugLocEntries will have
/// strict monotonically increasing begin addresses and will never
-/// overlap.
+/// overlap. If the resulting list has only one entry that is valid
+/// throughout variable's scope return true.
+//
+// See the definition of DbgValueHistoryMap::Entry for an explanation of the
+// different kinds of history map entries. One thing to be aware of is that if
+// a debug value is ended by another entry (rather than being valid until the
+// end of the function), that entry's instruction may or may not be included in
+// the range, depending on if the entry is a clobbering entry (it has an
+// instruction that clobbers one or more preceding locations), or if it is an
+// (overlapping) debug value entry. This distinction can be seen in the example
+// below. The first debug value is ended by the clobbering entry 2, and the
+// second and third debug values are ended by the overlapping debug value entry
+// 4.
//
// Input:
//
-// Ranges History [var, loc, fragment ofs size]
-// 0 | [x, (reg0, fragment 0, 32)]
-// 1 | | [x, (reg1, fragment 32, 32)] <- IsFragmentOfPrevEntry
-// 2 | | ...
-// 3 | [clobber reg0]
-// 4 [x, (mem, fragment 0, 64)] <- overlapping with both previous fragments of
-// x.
+// History map entries [type, end index, mi]
//
-// Output:
+// 0 | [DbgValue, 2, DBG_VALUE $reg0, [...] (fragment 0, 32)]
+// 1 | | [DbgValue, 4, DBG_VALUE $reg1, [...] (fragment 32, 32)]
+// 2 | | [Clobber, $reg0 = [...], -, -]
+// 3 | | [DbgValue, 4, DBG_VALUE 123, [...] (fragment 64, 32)]
+// 4 [DbgValue, ~0, DBG_VALUE @g, [...] (fragment 0, 96)]
//
-// [0-1] [x, (reg0, fragment 0, 32)]
-// [1-3] [x, (reg0, fragment 0, 32), (reg1, fragment 32, 32)]
-// [3-4] [x, (reg1, fragment 32, 32)]
-// [4- ] [x, (mem, fragment 0, 64)]
-void
-DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::InstrRanges &Ranges) {
- SmallVector<DebugLocEntry::Value, 4> OpenRanges;
-
- for (auto I = Ranges.begin(), E = Ranges.end(); I != E; ++I) {
- const MachineInstr *Begin = I->first;
- const MachineInstr *End = I->second;
- assert(Begin->isDebugValue() && "Invalid History entry");
-
- // Check if a variable is inaccessible in this range.
- if (Begin->getNumOperands() > 1 &&
- Begin->getOperand(0).isReg() && !Begin->getOperand(0).getReg()) {
- OpenRanges.clear();
- continue;
- }
-
- // If this fragment overlaps with any open ranges, truncate them.
- const DIExpression *DIExpr = Begin->getDebugExpression();
- auto Last = remove_if(OpenRanges, [&](DebugLocEntry::Value R) {
- return DIExpr->fragmentsOverlap(R.getExpression());
- });
+// Output [start, end) [Value...]:
+//
+// [0-1) [(reg0, fragment 0, 32)]
+// [1-3) [(reg0, fragment 0, 32), (reg1, fragment 32, 32)]
+// [3-4) [(reg1, fragment 32, 32), (123, fragment 64, 32)]
+// [4-) [(@g, fragment 0, 96)]
+bool DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries) {
+ using OpenRange =
+ std::pair<DbgValueHistoryMap::EntryIndex, DbgValueLoc>;
+ SmallVector<OpenRange, 4> OpenRanges;
+ bool isSafeForSingleLocation = true;
+ const MachineInstr *StartDebugMI = nullptr;
+ const MachineInstr *EndMI = nullptr;
+
+ for (auto EB = Entries.begin(), EI = EB, EE = Entries.end(); EI != EE; ++EI) {
+ const MachineInstr *Instr = EI->getInstr();
+
+ // Remove all values that are no longer live.
+ size_t Index = std::distance(EB, EI);
+ auto Last =
+ remove_if(OpenRanges, [&](OpenRange &R) { return R.first <= Index; });
OpenRanges.erase(Last, OpenRanges.end());
- const MCSymbol *StartLabel = getLabelBeforeInsn(Begin);
- assert(StartLabel && "Forgot label before DBG_VALUE starting a range!");
+ // If we are dealing with a clobbering entry, this iteration will result in
+ // a location list entry starting after the clobbering instruction.
+ const MCSymbol *StartLabel =
+ EI->isClobber() ? getLabelAfterInsn(Instr) : getLabelBeforeInsn(Instr);
+ assert(StartLabel &&
+ "Forgot label before/after instruction starting a range!");
const MCSymbol *EndLabel;
- if (End != nullptr)
- EndLabel = getLabelAfterInsn(End);
- else if (std::next(I) == Ranges.end())
+ if (std::next(EI) == Entries.end()) {
EndLabel = Asm->getFunctionEnd();
+ if (EI->isClobber())
+ EndMI = EI->getInstr();
+ }
+ else if (std::next(EI)->isClobber())
+ EndLabel = getLabelAfterInsn(std::next(EI)->getInstr());
else
- EndLabel = getLabelBeforeInsn(std::next(I)->first);
+ EndLabel = getLabelBeforeInsn(std::next(EI)->getInstr());
assert(EndLabel && "Forgot label after instruction ending a range!");
- LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Begin << "\n");
+ if (EI->isDbgValue())
+ LLVM_DEBUG(dbgs() << "DotDebugLoc: " << *Instr << "\n");
+
+ // If this history map entry has a debug value, add that to the list of
+ // open ranges and check if its location is valid for a single value
+ // location.
+ if (EI->isDbgValue()) {
+ // Do not add undef debug values, as they are redundant information in
+ // the location list entries. An undef debug results in an empty location
+ // description. If there are any non-undef fragments then padding pieces
+ // with empty location descriptions will automatically be inserted, and if
+ // all fragments are undef then the whole location list entry is
+ // redundant.
+ if (!Instr->isUndefDebugValue()) {
+ auto Value = getDebugLocValue(Instr);
+ OpenRanges.emplace_back(EI->getEndIndex(), Value);
+
+ // TODO: Add support for single value fragment locations.
+ if (Instr->getDebugExpression()->isFragment())
+ isSafeForSingleLocation = false;
+
+ if (!StartDebugMI)
+ StartDebugMI = Instr;
+ } else {
+ isSafeForSingleLocation = false;
+ }
+ }
- auto Value = getDebugLocValue(Begin);
+ // Location list entries with empty location descriptions are redundant
+ // information in DWARF, so do not emit those.
+ if (OpenRanges.empty())
+ continue;
// Omit entries with empty ranges as they do not have any effect in DWARF.
if (StartLabel == EndLabel) {
- // If this is a fragment, we must still add the value to the list of
- // open ranges, since it may describe non-overlapping parts of the
- // variable.
- if (DIExpr->isFragment())
- OpenRanges.push_back(Value);
LLVM_DEBUG(dbgs() << "Omitting location list entry with empty range.\n");
continue;
}
- DebugLocEntry Loc(StartLabel, EndLabel, Value);
- bool couldMerge = false;
-
- // If this is a fragment, it may belong to the current DebugLocEntry.
- if (DIExpr->isFragment()) {
- // Add this value to the list of open ranges.
- OpenRanges.push_back(Value);
-
- // Attempt to add the fragment to the last entry.
- if (!DebugLoc.empty())
- if (DebugLoc.back().MergeValues(Loc))
- couldMerge = true;
- }
-
- if (!couldMerge) {
- // Need to add a new DebugLocEntry. Add all values from still
- // valid non-overlapping fragments.
- if (OpenRanges.size())
- Loc.addValues(OpenRanges);
-
- DebugLoc.push_back(std::move(Loc));
- }
+ SmallVector<DbgValueLoc, 4> Values;
+ for (auto &R : OpenRanges)
+ Values.push_back(R.second);
+ DebugLoc.emplace_back(StartLabel, EndLabel, Values);
// Attempt to coalesce the ranges of two otherwise identical
// DebugLocEntries.
@@ -1229,6 +1303,9 @@ DwarfDebug::buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
if (PrevEntry != DebugLoc.rend() && PrevEntry->MergeRanges(*CurEntry))
DebugLoc.pop_back();
}
+
+ return DebugLoc.size() == 1 && isSafeForSingleLocation &&
+ validThroughout(LScopes, StartDebugMI, EndMI);
}
DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
@@ -1253,64 +1330,6 @@ DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
return ConcreteEntities.back().get();
}
-/// Determine whether a *singular* DBG_VALUE is valid for the entirety of its
-/// enclosing lexical scope. The check ensures there are no other instructions
-/// in the same lexical scope preceding the DBG_VALUE and that its range is
-/// either open or otherwise rolls off the end of the scope.
-static bool validThroughout(LexicalScopes &LScopes,
- const MachineInstr *DbgValue,
- const MachineInstr *RangeEnd) {
- assert(DbgValue->getDebugLoc() && "DBG_VALUE without a debug location");
- auto MBB = DbgValue->getParent();
- auto DL = DbgValue->getDebugLoc();
- auto *LScope = LScopes.findLexicalScope(DL);
- // Scope doesn't exist; this is a dead DBG_VALUE.
- if (!LScope)
- return false;
- auto &LSRange = LScope->getRanges();
- if (LSRange.size() == 0)
- return false;
-
- // Determine if the DBG_VALUE is valid at the beginning of its lexical block.
- const MachineInstr *LScopeBegin = LSRange.front().first;
- // Early exit if the lexical scope begins outside of the current block.
- if (LScopeBegin->getParent() != MBB)
- return false;
- MachineBasicBlock::const_reverse_iterator Pred(DbgValue);
- for (++Pred; Pred != MBB->rend(); ++Pred) {
- if (Pred->getFlag(MachineInstr::FrameSetup))
- break;
- auto PredDL = Pred->getDebugLoc();
- if (!PredDL || Pred->isMetaInstruction())
- continue;
- // Check whether the instruction preceding the DBG_VALUE is in the same
- // (sub)scope as the DBG_VALUE.
- if (DL->getScope() == PredDL->getScope())
- return false;
- auto *PredScope = LScopes.findLexicalScope(PredDL);
- if (!PredScope || LScope->dominates(PredScope))
- return false;
- }
-
- // If the range of the DBG_VALUE is open-ended, report success.
- if (!RangeEnd)
- return true;
-
- // Fail if there are instructions belonging to our scope in another block.
- const MachineInstr *LScopeEnd = LSRange.back().second;
- if (LScopeEnd->getParent() != MBB)
- return false;
-
- // Single, constant DBG_VALUEs in the prologue are promoted to be live
- // throughout the function. This is a hack, presumably for DWARF v2 and not
- // necessarily correct. It would be much better to use a dbg.declare instead
- // if we know the constant is live throughout the scope.
- if (DbgValue->getOperand(0).isImm() && MBB->pred_empty())
- return true;
-
- return false;
-}
-
// Find variables for each lexical scope.
void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
const DISubprogram *SP,
@@ -1324,8 +1343,8 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
continue;
// Instruction ranges, specifying where IV is accessible.
- const auto &Ranges = I.second;
- if (Ranges.empty())
+ const auto &HistoryMapEntries = I.second;
+ if (HistoryMapEntries.empty())
continue;
LexicalScope *Scope = nullptr;
@@ -1342,15 +1361,24 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
DbgVariable *RegVar = cast<DbgVariable>(createConcreteEntity(TheCU,
*Scope, LocalVar, IV.second));
- const MachineInstr *MInsn = Ranges.front().first;
+ const MachineInstr *MInsn = HistoryMapEntries.front().getInstr();
assert(MInsn->isDebugValue() && "History must begin with debug value");
// Check if there is a single DBG_VALUE, valid throughout the var's scope.
- if (Ranges.size() == 1 &&
- validThroughout(LScopes, MInsn, Ranges.front().second)) {
- RegVar->initializeDbgValue(MInsn);
- continue;
+ // If the history map contains a single debug value, there may be an
+ // additional entry which clobbers the debug value.
+ size_t HistSize = HistoryMapEntries.size();
+ bool SingleValueWithClobber =
+ HistSize == 2 && HistoryMapEntries[1].isClobber();
+ if (HistSize == 1 || SingleValueWithClobber) {
+ const auto *End =
+ SingleValueWithClobber ? HistoryMapEntries[1].getInstr() : nullptr;
+ if (validThroughout(LScopes, MInsn, End)) {
+ RegVar->initializeDbgValue(MInsn);
+ continue;
+ }
}
+
// Do not emit location lists if .debug_loc secton is disabled.
if (!useLocSection())
continue;
@@ -1360,7 +1388,15 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Build the location list for this variable.
SmallVector<DebugLocEntry, 8> Entries;
- buildLocationList(Entries, Ranges);
+ bool isValidSingleLocation = buildLocationList(Entries, HistoryMapEntries);
+
+ // Check whether buildLocationList managed to merge all locations to one
+ // that is valid throughout the variable's scope. If so, produce single
+ // value location.
+ if (isValidSingleLocation) {
+ RegVar->initializeDbgValue(Entries[0].getValues()[0]);
+ continue;
+ }
// If the variable has a DIBasicType, extract it. Basic types cannot have
// unique identifiers, so don't bother resolving the type with the
@@ -1370,7 +1406,7 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
// Finalize the entry by lowering it into a DWARF bytestream.
for (auto &Entry : Entries)
- Entry.finalize(*Asm, List, BT);
+ Entry.finalize(*Asm, List, BT, TheCU);
}
// For each InlinedEntity collected from DBG_LABEL instructions, convert to
@@ -1489,7 +1525,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) {
// We have an explicit location, different from the previous location.
// Don't repeat a line-0 record, but otherwise emit the new location.
// (The new location might be an explicit line 0, which we do emit.)
- if (PrevInstLoc && DL.getLine() == 0 && LastAsmLine == 0)
+ if (DL.getLine() == 0 && LastAsmLine == 0)
return;
unsigned Flags = 0;
if (DL == PrologEndLoc) {
@@ -1521,6 +1557,46 @@ static DebugLoc findPrologueEndLoc(const MachineFunction *MF) {
return DebugLoc();
}
+/// Register a source line with debug info. Returns the unique label that was
+/// emitted and which provides correspondence to the source line list.
+static void recordSourceLine(AsmPrinter &Asm, unsigned Line, unsigned Col,
+ const MDNode *S, unsigned Flags, unsigned CUID,
+ uint16_t DwarfVersion,
+ ArrayRef<std::unique_ptr<DwarfCompileUnit>> DCUs) {
+ StringRef Fn;
+ unsigned FileNo = 1;
+ unsigned Discriminator = 0;
+ if (auto *Scope = cast_or_null<DIScope>(S)) {
+ Fn = Scope->getFilename();
+ if (Line != 0 && DwarfVersion >= 4)
+ if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
+ Discriminator = LBF->getDiscriminator();
+
+ FileNo = static_cast<DwarfCompileUnit &>(*DCUs[CUID])
+ .getOrCreateSourceID(Scope->getFile());
+ }
+ Asm.OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
+ Discriminator, Fn);
+}
+
+DebugLoc DwarfDebug::emitInitialLocDirective(const MachineFunction &MF,
+ unsigned CUID) {
+ // Get beginning of function.
+ if (DebugLoc PrologEndLoc = findPrologueEndLoc(&MF)) {
+ // Ensure the compile unit is created if the function is called before
+ // beginFunction().
+ (void)getOrCreateDwarfCompileUnit(
+ MF.getFunction().getSubprogram()->getUnit());
+ // We'd like to list the prologue as "not statements" but GDB behaves
+ // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
+ const DISubprogram *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
+ ::recordSourceLine(*Asm, SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT,
+ CUID, getDwarfVersion(), getUnits());
+ return PrologEndLoc;
+ }
+ return DebugLoc();
+}
+
// Gather pre-function debug information. Assumes being called immediately
// after the function entry point has been emitted.
void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
@@ -1543,13 +1619,8 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
Asm->OutStreamer->getContext().setDwarfCompileUnitID(CU.getUniqueID());
// Record beginning of function.
- PrologEndLoc = findPrologueEndLoc(MF);
- if (PrologEndLoc) {
- // We'd like to list the prologue as "not statements" but GDB behaves
- // poorly if we do that. Revisit this with caution/GDB (7.5+) testing.
- auto *SP = PrologEndLoc->getInlinedAtScope()->getSubprogram();
- recordSourceLine(SP->getScopeLine(), 0, SP, DWARF2_FLAG_IS_STMT);
- }
+ PrologEndLoc = emitInitialLocDirective(
+ *MF, Asm->OutStreamer->getContext().getDwarfCompileUnitID());
}
void DwarfDebug::skippedNonDebugFunction() {
@@ -1647,21 +1718,9 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
// emitted and which provides correspondence to the source line list.
void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S,
unsigned Flags) {
- StringRef Fn;
- unsigned FileNo = 1;
- unsigned Discriminator = 0;
- if (auto *Scope = cast_or_null<DIScope>(S)) {
- Fn = Scope->getFilename();
- if (Line != 0 && getDwarfVersion() >= 4)
- if (auto *LBF = dyn_cast<DILexicalBlockFile>(Scope))
- Discriminator = LBF->getDiscriminator();
-
- unsigned CUID = Asm->OutStreamer->getContext().getDwarfCompileUnitID();
- FileNo = static_cast<DwarfCompileUnit &>(*InfoHolder.getUnits()[CUID])
- .getOrCreateSourceID(Scope->getFile());
- }
- Asm->OutStreamer->EmitDwarfLocDirective(FileNo, Line, Col, Flags, 0,
- Discriminator, Fn);
+ ::recordSourceLine(*Asm, Line, Col, S, Flags,
+ Asm->OutStreamer->getContext().getDwarfCompileUnitID(),
+ getDwarfVersion(), getUnits());
}
//===----------------------------------------------------------------------===//
@@ -1890,17 +1949,59 @@ void DwarfDebug::emitDebugStr() {
}
void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
- const DebugLocStream::Entry &Entry) {
+ const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU) {
auto &&Comments = DebugLocs.getComments(Entry);
auto Comment = Comments.begin();
auto End = Comments.end();
- for (uint8_t Byte : DebugLocs.getBytes(Entry))
- Streamer.EmitInt8(Byte, Comment != End ? *(Comment++) : "");
+
+ // The expressions are inserted into a byte stream rather early (see
+ // DwarfExpression::addExpression) so for those ops (e.g. DW_OP_convert) that
+ // need to reference a base_type DIE the offset of that DIE is not yet known.
+ // To deal with this we instead insert a placeholder early and then extract
+ // it here and replace it with the real reference.
+ unsigned PtrSize = Asm->MAI->getCodePointerSize();
+ DWARFDataExtractor Data(StringRef(DebugLocs.getBytes(Entry).data(),
+ DebugLocs.getBytes(Entry).size()),
+ Asm->getDataLayout().isLittleEndian(), PtrSize);
+ DWARFExpression Expr(Data, getDwarfVersion(), PtrSize);
+
+ using Encoding = DWARFExpression::Operation::Encoding;
+ uint32_t Offset = 0;
+ for (auto &Op : Expr) {
+ assert(Op.getCode() != dwarf::DW_OP_const_type &&
+ "3 operand ops not yet supported");
+ Streamer.EmitInt8(Op.getCode(), Comment != End ? *(Comment++) : "");
+ Offset++;
+ for (unsigned I = 0; I < 2; ++I) {
+ if (Op.getDescription().Op[I] == Encoding::SizeNA)
+ continue;
+ if (Op.getDescription().Op[I] == Encoding::BaseTypeRef) {
+ if (CU) {
+ uint64_t Offset = CU->ExprRefedBaseTypes[Op.getRawOperand(I)].Die->getOffset();
+ assert(Offset < (1ULL << (ULEB128PadSize * 7)) && "Offset wont fit");
+ Asm->EmitULEB128(Offset, nullptr, ULEB128PadSize);
+ } else {
+ // Emit a reference to the 'generic type'.
+ Asm->EmitULEB128(0, nullptr, ULEB128PadSize);
+ }
+ // Make sure comments stay aligned.
+ for (unsigned J = 0; J < ULEB128PadSize; ++J)
+ if (Comment != End)
+ Comment++;
+ } else {
+ for (uint32_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
+ Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
+ }
+ Offset = Op.getOperandEndOffset(I);
+ }
+ assert(Offset == Op.getEndOffset());
+ }
}
-static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
- const DebugLocEntry::Value &Value,
- DwarfExpression &DwarfExpr) {
+void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+ const DbgValueLoc &Value,
+ DwarfExpression &DwarfExpr) {
auto *DIExpr = Value.getExpression();
DIExpressionCursor ExprCursor(DIExpr);
DwarfExpr.addFragmentOffset(DIExpr);
@@ -1916,6 +2017,12 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
if (Location.isIndirect())
DwarfExpr.setMemoryLocationKind();
DIExpressionCursor Cursor(DIExpr);
+
+ if (DIExpr->isEntryValue()) {
+ DwarfExpr.setEntryValueFlag();
+ DwarfExpr.addEntryValueExpression(Cursor);
+ }
+
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
if (!DwarfExpr.addMachineRegExpression(TRI, Cursor, Location.getReg()))
return;
@@ -1929,38 +2036,50 @@ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
void DebugLocEntry::finalize(const AsmPrinter &AP,
DebugLocStream::ListBuilder &List,
- const DIBasicType *BT) {
+ const DIBasicType *BT,
+ DwarfCompileUnit &TheCU) {
+ assert(!Values.empty() &&
+ "location list entries without values are redundant");
assert(Begin != End && "unexpected location list entry with empty range");
DebugLocStream::EntryBuilder Entry(List, Begin, End);
BufferByteStreamer Streamer = Entry.getStreamer();
- DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer);
- const DebugLocEntry::Value &Value = Values[0];
+ DebugLocDwarfExpression DwarfExpr(AP.getDwarfVersion(), Streamer, TheCU);
+ const DbgValueLoc &Value = Values[0];
if (Value.isFragment()) {
// Emit all fragments that belong to the same variable and range.
- assert(llvm::all_of(Values, [](DebugLocEntry::Value P) {
+ assert(llvm::all_of(Values, [](DbgValueLoc P) {
return P.isFragment();
}) && "all values are expected to be fragments");
assert(std::is_sorted(Values.begin(), Values.end()) &&
"fragments are expected to be sorted");
for (auto Fragment : Values)
- emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
+ DwarfDebug::emitDebugLocValue(AP, BT, Fragment, DwarfExpr);
} else {
assert(Values.size() == 1 && "only fragments may have >1 value");
- emitDebugLocValue(AP, BT, Value, DwarfExpr);
+ DwarfDebug::emitDebugLocValue(AP, BT, Value, DwarfExpr);
}
DwarfExpr.finalize();
}
-void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry) {
+void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU) {
// Emit the size.
Asm->OutStreamer->AddComment("Loc expr size");
- Asm->emitInt16(DebugLocs.getBytes(Entry).size());
-
+ if (getDwarfVersion() >= 5)
+ Asm->EmitULEB128(DebugLocs.getBytes(Entry).size());
+ else if (DebugLocs.getBytes(Entry).size() <= std::numeric_limits<uint16_t>::max())
+ Asm->emitInt16(DebugLocs.getBytes(Entry).size());
+ else {
+ // The entry is too big to fit into 16 bit, drop it as there is nothing we
+ // can do.
+ Asm->emitInt16(0);
+ return;
+ }
// Emit the entry.
APByteStreamer Streamer(*Asm);
- emitDebugLocEntry(Streamer, Entry);
+ emitDebugLocEntry(Streamer, Entry, CU);
}
// Emit the common part of the DWARF 5 range/locations list tables header.
@@ -2060,7 +2179,7 @@ void DwarfDebug::emitDebugLoc() {
Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
}
- emitDebugLocEntryLocation(Entry);
+ emitDebugLocEntryLocation(Entry, CU);
continue;
}
@@ -2081,7 +2200,7 @@ void DwarfDebug::emitDebugLoc() {
Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
}
- emitDebugLocEntryLocation(Entry);
+ emitDebugLocEntryLocation(Entry, CU);
}
if (IsLocLists) {
@@ -2100,9 +2219,9 @@ void DwarfDebug::emitDebugLoc() {
}
void DwarfDebug::emitDebugLocDWO() {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLocDWOSection());
for (const auto &List : DebugLocs.getLists()) {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocDWOSection());
Asm->OutStreamer->EmitLabel(List.Label);
for (const auto &Entry : DebugLocs.getEntries(List)) {
// GDB only supports startx_length in pre-standard split-DWARF.
@@ -2117,7 +2236,7 @@ void DwarfDebug::emitDebugLocDWO() {
Asm->EmitULEB128(idx);
Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
- emitDebugLocEntryLocation(Entry);
+ emitDebugLocEntryLocation(Entry, List.CU);
}
Asm->emitInt8(dwarf::DW_LLE_end_of_list);
}
@@ -2170,19 +2289,18 @@ void DwarfDebug::emitDebugARanges() {
}
// Sort the symbols by offset within the section.
- std::stable_sort(
- List.begin(), List.end(), [&](const SymbolCU &A, const SymbolCU &B) {
- unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
- unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
-
- // Symbols with no order assigned should be placed at the end.
- // (e.g. section end labels)
- if (IA == 0)
- return false;
- if (IB == 0)
- return true;
- return IA < IB;
- });
+ llvm::stable_sort(List, [&](const SymbolCU &A, const SymbolCU &B) {
+ unsigned IA = A.Sym ? Asm->OutStreamer->GetSymbolOrder(A.Sym) : 0;
+ unsigned IB = B.Sym ? Asm->OutStreamer->GetSymbolOrder(B.Sym) : 0;
+
+ // Symbols with no order assigned should be placed at the end.
+ // (e.g. section end labels)
+ if (IA == 0)
+ return false;
+ if (IB == 0)
+ return true;
+ return IA < IB;
+ });
// Insert a final terminator.
List.push_back(SymbolCU(nullptr, Asm->OutStreamer->endSection(Section)));
@@ -2687,6 +2805,22 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
CU.addDIETypeSignature(RefDie, Signature);
}
+DwarfDebug::NonTypeUnitContext::NonTypeUnitContext(DwarfDebug *DD)
+ : DD(DD),
+ TypeUnitsUnderConstruction(std::move(DD->TypeUnitsUnderConstruction)) {
+ DD->TypeUnitsUnderConstruction.clear();
+ assert(TypeUnitsUnderConstruction.empty() || !DD->AddrPool.hasBeenUsed());
+}
+
+DwarfDebug::NonTypeUnitContext::~NonTypeUnitContext() {
+ DD->TypeUnitsUnderConstruction = std::move(TypeUnitsUnderConstruction);
+ DD->AddrPool.resetUsedFlag();
+}
+
+DwarfDebug::NonTypeUnitContext DwarfDebug::enterNonTypeUnitContext() {
+ return NonTypeUnitContext(this);
+}
+
// Add the Name along with its companion DIE to the appropriate accelerator
// table (for AccelTableKind::Dwarf it's always AccelDebugNames, for
// AccelTableKind::Apple, we use the table we got as an argument). If
@@ -2699,7 +2833,7 @@ void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
return;
if (getAccelTableKind() != AccelTableKind::Apple &&
- CU.getNameTableKind() == DICompileUnit::DebugNameTableKind::None)
+ CU.getNameTableKind() != DICompileUnit::DebugNameTableKind::Default)
return;
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 8a31e989b289..3ac474e2bdda 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfDebug.h - Dwarf Debug Framework --------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "AddressPool.h"
#include "DebugLocStream.h"
+#include "DebugLocEntry.h"
#include "DwarfFile.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -52,6 +52,7 @@ class ByteStreamer;
class DebugLocEntry;
class DIE;
class DwarfCompileUnit;
+class DwarfExpression;
class DwarfTypeUnit;
class DwarfUnit;
class LexicalScope;
@@ -111,12 +112,14 @@ public:
///
/// Variables can be created from \c DBG_VALUE instructions. Those whose
/// location changes over time use \a DebugLocListIndex, while those with a
-/// single instruction use \a MInsn and (optionally) a single entry of \a Expr.
+/// single location use \a ValueLoc and (optionally) a single entry of \a Expr.
///
/// Variables that have been optimized out use none of these fields.
class DbgVariable : public DbgEntity {
- unsigned DebugLocListIndex = ~0u; /// Offset in DebugLocs.
- const MachineInstr *MInsn = nullptr; /// DBG_VALUE instruction.
+ /// Offset in DebugLocs.
+ unsigned DebugLocListIndex = ~0u;
+ /// Single value location description.
+ std::unique_ptr<DbgValueLoc> ValueLoc = nullptr;
struct FrameIndexExpr {
int FI;
@@ -136,7 +139,7 @@ public:
/// Initialize from the MMI table.
void initializeMMI(const DIExpression *E, int FI) {
assert(FrameIndexExprs.empty() && "Already initialized?");
- assert(!MInsn && "Already initialized?");
+ assert(!ValueLoc.get() && "Already initialized?");
assert((!E || E->isValid()) && "Expected valid expression");
assert(FI != std::numeric_limits<int>::max() && "Expected valid index");
@@ -144,35 +147,35 @@ public:
FrameIndexExprs.push_back({FI, E});
}
- /// Initialize from a DBG_VALUE instruction.
- void initializeDbgValue(const MachineInstr *DbgValue) {
+ // Initialize variable's location.
+ void initializeDbgValue(DbgValueLoc Value) {
assert(FrameIndexExprs.empty() && "Already initialized?");
- assert(!MInsn && "Already initialized?");
+ assert(!ValueLoc && "Already initialized?");
+ assert(!Value.getExpression()->isFragment() && "Fragments not supported.");
- assert(getVariable() == DbgValue->getDebugVariable() && "Wrong variable");
- assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
- "Wrong inlined-at");
-
- MInsn = DbgValue;
- if (auto *E = DbgValue->getDebugExpression())
+ ValueLoc = llvm::make_unique<DbgValueLoc>(Value);
+ if (auto *E = ValueLoc->getExpression())
if (E->getNumElements())
FrameIndexExprs.push_back({0, E});
}
+ /// Initialize from a DBG_VALUE instruction.
+ void initializeDbgValue(const MachineInstr *DbgValue);
+
// Accessors.
const DILocalVariable *getVariable() const {
return cast<DILocalVariable>(getEntity());
}
const DIExpression *getSingleExpression() const {
- assert(MInsn && FrameIndexExprs.size() <= 1);
+ assert(ValueLoc.get() && FrameIndexExprs.size() <= 1);
return FrameIndexExprs.size() ? FrameIndexExprs[0].Expr : nullptr;
}
void setDebugLocListIndex(unsigned O) { DebugLocListIndex = O; }
unsigned getDebugLocListIndex() const { return DebugLocListIndex; }
StringRef getName() const { return getVariable()->getName(); }
- const MachineInstr *getMInsn() const { return MInsn; }
+ const DbgValueLoc *getValueLoc() const { return ValueLoc.get(); }
/// Get the FI entries, sorted by fragment offset.
ArrayRef<FrameIndexExpr> getFrameIndexExprs() const;
bool hasFrameIndexExprs() const { return !FrameIndexExprs.empty(); }
@@ -205,7 +208,7 @@ public:
}
bool hasComplexAddress() const {
- assert(MInsn && "Expected DBG_VALUE, not MMI variable");
+ assert(ValueLoc.get() && "Expected DBG_VALUE, not MMI variable");
assert((FrameIndexExprs.empty() ||
(FrameIndexExprs.size() == 1 &&
FrameIndexExprs[0].Expr->getNumElements())) &&
@@ -219,11 +222,6 @@ public:
static bool classof(const DbgEntity *N) {
return N->getDbgEntityID() == DbgVariableKind;
}
-
-private:
- template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return Ref.resolve();
- }
};
//===----------------------------------------------------------------------===//
@@ -254,11 +252,6 @@ public:
static bool classof(const DbgEntity *N) {
return N->getDbgEntityID() == DbgLabelKind;
}
-
-private:
- template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return Ref.resolve();
- }
};
/// Helper used to pair up a symbol and its DWARF compile unit.
@@ -558,9 +551,11 @@ class DwarfDebug : public DebugHandlerBase {
DenseSet<InlinedEntity> &ProcessedVars);
/// Build the location list for all DBG_VALUEs in the
- /// function that describe the same variable.
- void buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
- const DbgValueHistoryMap::InstrRanges &Ranges);
+ /// function that describe the same variable. If the resulting
+ /// list has only one entry that is valid for entire variable's
+ /// scope return true.
+ bool buildLocationList(SmallVectorImpl<DebugLocEntry> &DebugLoc,
+ const DbgValueHistoryMap::Entries &Entries);
/// Collect variable information from the side table maintained by MF.
void collectVariableInfoFromMFTable(DwarfCompileUnit &TheCU,
@@ -593,6 +588,9 @@ public:
/// Emit all Dwarf sections that should come after the content.
void endModule() override;
+ /// Emits inital debug location directive.
+ DebugLoc emitInitialLocDirective(const MachineFunction &MF, unsigned CUID);
+
/// Process beginning of an instruction.
void beginInstruction(const MachineInstr *MI) override;
@@ -604,6 +602,19 @@ public:
void addDwarfTypeUnitType(DwarfCompileUnit &CU, StringRef Identifier,
DIE &Die, const DICompositeType *CTy);
+ friend class NonTypeUnitContext;
+ class NonTypeUnitContext {
+ DwarfDebug *DD;
+ decltype(DwarfDebug::TypeUnitsUnderConstruction) TypeUnitsUnderConstruction;
+ friend class DwarfDebug;
+ NonTypeUnitContext(DwarfDebug *DD);
+ public:
+ NonTypeUnitContext(NonTypeUnitContext&&) = default;
+ ~NonTypeUnitContext();
+ };
+
+ NonTypeUnitContext enterNonTypeUnitContext();
+
/// Add a label so that arange data can be generated for it.
void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); }
@@ -680,15 +691,12 @@ public:
/// Emit an entry for the debug loc section. This can be used to
/// handle an entry that's going to be emitted into the debug loc section.
void emitDebugLocEntry(ByteStreamer &Streamer,
- const DebugLocStream::Entry &Entry);
+ const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU);
/// Emit the location for a debug loc entry, including the size header.
- void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry);
-
- /// Find the MDNode for the given reference.
- template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return Ref.resolve();
- }
+ void emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
+ const DwarfCompileUnit *CU);
void addSubprogramNames(const DICompileUnit &CU, const DISubprogram *SP,
DIE &Die);
@@ -728,6 +736,10 @@ public:
void addSectionLabel(const MCSymbol *Sym);
const MCSymbol *getSectionLabel(const MCSection *S);
+
+ static void emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
+ const DbgValueLoc &Value,
+ DwarfExpression &DwarfExpr);
};
} // end namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index b57ea8fc6322..24bbf58b91ec 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -1,9 +1,8 @@
//===-- DwarfException.h - Dwarf Exception Framework -----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 19c350afbf17..2858afaa1cf1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfExpression.cpp - Dwarf Debug Framework -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,6 +11,7 @@
//===----------------------------------------------------------------------===//
#include "DwarfExpression.h"
+#include "DwarfCompileUnit.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -40,7 +40,7 @@ void DwarfExpression::emitConstu(uint64_t Value) {
void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
- assert((LocationKind == Unknown || LocationKind == Register) &&
+ assert((isUnknownLocation() || isRegisterLocation()) &&
"location description already locked down");
LocationKind = Register;
if (DwarfReg < 32) {
@@ -53,7 +53,7 @@ void DwarfExpression::addReg(int DwarfReg, const char *Comment) {
void DwarfExpression::addBReg(int DwarfReg, int Offset) {
assert(DwarfReg >= 0 && "invalid negative dwarf register number");
- assert(LocationKind != Register && "location description already locked down");
+ assert(!isRegisterLocation() && "location description already locked down");
if (DwarfReg < 32) {
emitOp(dwarf::DW_OP_breg0 + DwarfReg);
} else {
@@ -184,20 +184,20 @@ void DwarfExpression::addStackValue() {
}
void DwarfExpression::addSignedConstant(int64_t Value) {
- assert(LocationKind == Implicit || LocationKind == Unknown);
+ assert(isImplicitLocation() || isUnknownLocation());
LocationKind = Implicit;
emitOp(dwarf::DW_OP_consts);
emitSigned(Value);
}
void DwarfExpression::addUnsignedConstant(uint64_t Value) {
- assert(LocationKind == Implicit || LocationKind == Unknown);
+ assert(isImplicitLocation() || isUnknownLocation());
LocationKind = Implicit;
emitConstu(Value);
}
void DwarfExpression::addUnsignedConstant(const APInt &Value) {
- assert(LocationKind == Implicit || LocationKind == Unknown);
+ assert(isImplicitLocation() || isUnknownLocation());
LocationKind = Implicit;
unsigned Size = Value.getBitWidth();
@@ -242,12 +242,16 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
}
// Handle simple register locations.
- if (LocationKind != Memory && !HasComplexExpression) {
+ if (!isMemoryLocation() && !HasComplexExpression) {
for (auto &Reg : DwarfRegs) {
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
addOpPiece(Reg.Size);
}
+
+ if (isEntryValue() && DwarfVersion >= 4)
+ emitOp(dwarf::DW_OP_stack_value);
+
DwarfRegs.clear();
return true;
}
@@ -296,6 +300,19 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return true;
}
+void DwarfExpression::addEntryValueExpression(DIExpressionCursor &ExprCursor) {
+ auto Op = ExprCursor.take();
+ assert(Op && Op->getOp() == dwarf::DW_OP_entry_value);
+ assert(!isMemoryLocation() &&
+ "We don't support entry values of memory locations yet");
+
+ if (DwarfVersion >= 5)
+ emitOp(dwarf::DW_OP_entry_value);
+ else
+ emitOp(dwarf::DW_OP_GNU_entry_value);
+ emitUnsigned(Op->getArg(0));
+}
+
/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
static bool isMemoryLocation(DIExpressionCursor ExprCursor) {
while (ExprCursor) {
@@ -319,6 +336,8 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
if (SubRegisterSizeInBits && N && (N->getOp() != dwarf::DW_OP_LLVM_fragment))
maskSubRegister();
+ Optional<DIExpression::ExprOperand> PrevConvertOp = None;
+
while (ExprCursor) {
auto Op = ExprCursor.take();
switch (Op->getOp()) {
@@ -341,7 +360,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
SizeInBits = std::min<unsigned>(SizeInBits, SubRegisterSizeInBits);
// Emit a DW_OP_stack_value for implicit location descriptions.
- if (LocationKind == Implicit)
+ if (isImplicitLocation())
addStackValue();
// Emit the DW_OP_piece.
@@ -352,7 +371,7 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
return;
}
case dwarf::DW_OP_plus_uconst:
- assert(LocationKind != Register);
+ assert(!isRegisterLocation());
emitOp(dwarf::DW_OP_plus_uconst);
emitUnsigned(Op->getArg(0));
break;
@@ -373,8 +392,8 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
emitOp(Op->getOp());
break;
case dwarf::DW_OP_deref:
- assert(LocationKind != Register);
- if (LocationKind != Memory && ::isMemoryLocation(ExprCursor))
+ assert(!isRegisterLocation());
+ if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
// Turning this into a memory location description makes the deref
// implicit.
LocationKind = Memory;
@@ -382,26 +401,69 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
emitOp(dwarf::DW_OP_deref);
break;
case dwarf::DW_OP_constu:
- assert(LocationKind != Register);
+ assert(!isRegisterLocation());
emitConstu(Op->getArg(0));
break;
+ case dwarf::DW_OP_LLVM_convert: {
+ unsigned BitSize = Op->getArg(0);
+ dwarf::TypeKind Encoding = static_cast<dwarf::TypeKind>(Op->getArg(1));
+ if (DwarfVersion >= 5) {
+ emitOp(dwarf::DW_OP_convert);
+ // Reuse the base_type if we already have one in this CU otherwise we
+ // create a new one.
+ unsigned I = 0, E = CU.ExprRefedBaseTypes.size();
+ for (; I != E; ++I)
+ if (CU.ExprRefedBaseTypes[I].BitSize == BitSize &&
+ CU.ExprRefedBaseTypes[I].Encoding == Encoding)
+ break;
+
+ if (I == E)
+ CU.ExprRefedBaseTypes.emplace_back(BitSize, Encoding);
+
+ // If targeting a location-list; simply emit the index into the raw
+ // byte stream as ULEB128, DwarfDebug::emitDebugLocEntry has been
+ // fitted with means to extract it later.
+ // If targeting a inlined DW_AT_location; insert a DIEBaseTypeRef
+ // (containing the index and a resolve mechanism during emit) into the
+ // DIE value list.
+ emitBaseTypeRef(I);
+ } else {
+ if (PrevConvertOp && PrevConvertOp->getArg(0) < BitSize) {
+ if (Encoding == dwarf::DW_ATE_signed)
+ emitLegacySExt(PrevConvertOp->getArg(0));
+ else if (Encoding == dwarf::DW_ATE_unsigned)
+ emitLegacyZExt(PrevConvertOp->getArg(0));
+ PrevConvertOp = None;
+ } else {
+ PrevConvertOp = Op;
+ }
+ }
+ break;
+ }
case dwarf::DW_OP_stack_value:
LocationKind = Implicit;
break;
case dwarf::DW_OP_swap:
- assert(LocationKind != Register);
+ assert(!isRegisterLocation());
emitOp(dwarf::DW_OP_swap);
break;
case dwarf::DW_OP_xderef:
- assert(LocationKind != Register);
+ assert(!isRegisterLocation());
emitOp(dwarf::DW_OP_xderef);
break;
+ case dwarf::DW_OP_deref_size:
+ emitOp(dwarf::DW_OP_deref_size);
+ emitData1(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_LLVM_tag_offset:
+ TagOffset = Op->getArg(0);
+ break;
default:
llvm_unreachable("unhandled opcode found in expression");
}
}
- if (LocationKind == Implicit)
+ if (isImplicitLocation())
// Turn this into an implicit location description.
addStackValue();
}
@@ -437,3 +499,25 @@ void DwarfExpression::addFragmentOffset(const DIExpression *Expr) {
addOpPiece(FragmentOffset - OffsetInBits);
OffsetInBits = FragmentOffset;
}
+
+void DwarfExpression::emitLegacySExt(unsigned FromBits) {
+ // (((X >> (FromBits - 1)) * (~0)) << FromBits) | X
+ emitOp(dwarf::DW_OP_dup);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(FromBits - 1);
+ emitOp(dwarf::DW_OP_shr);
+ emitOp(dwarf::DW_OP_lit0);
+ emitOp(dwarf::DW_OP_not);
+ emitOp(dwarf::DW_OP_mul);
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned(FromBits);
+ emitOp(dwarf::DW_OP_shl);
+ emitOp(dwarf::DW_OP_or);
+}
+
+void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
+ // (X & (1 << FromBits - 1))
+ emitOp(dwarf::DW_OP_constu);
+ emitUnsigned((1ULL << FromBits) - 1);
+ emitOp(dwarf::DW_OP_and);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 91568ba6d107..ec2ef6e575f7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfExpression.h - Dwarf Compile Unit ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,7 +27,7 @@ namespace llvm {
class AsmPrinter;
class APInt;
class ByteStreamer;
-class DwarfUnit;
+class DwarfCompileUnit;
class DIELoc;
class TargetRegisterInfo;
@@ -105,23 +104,56 @@ protected:
const char *Comment;
};
+ DwarfCompileUnit &CU;
+
/// The register location, if any.
SmallVector<Register, 2> DwarfRegs;
/// Current Fragment Offset in Bits.
uint64_t OffsetInBits = 0;
- unsigned DwarfVersion;
/// Sometimes we need to add a DW_OP_bit_piece to describe a subregister.
- unsigned SubRegisterSizeInBits = 0;
- unsigned SubRegisterOffsetInBits = 0;
+ unsigned SubRegisterSizeInBits : 16;
+ unsigned SubRegisterOffsetInBits : 16;
/// The kind of location description being produced.
- enum { Unknown = 0, Register, Memory, Implicit } LocationKind = Unknown;
+ enum { Unknown = 0, Register, Memory, Implicit };
+
+ /// The flags of location description being produced.
+ enum { EntryValue = 1 };
+
+ unsigned LocationKind : 3;
+ unsigned LocationFlags : 2;
+ unsigned DwarfVersion : 4;
+
+public:
+ bool isUnknownLocation() const {
+ return LocationKind == Unknown;
+ }
+
+ bool isMemoryLocation() const {
+ return LocationKind == Memory;
+ }
+
+ bool isRegisterLocation() const {
+ return LocationKind == Register;
+ }
+
+ bool isImplicitLocation() const {
+ return LocationKind == Implicit;
+ }
+
+ bool isEntryValue() const {
+ return LocationFlags & EntryValue;
+ }
+ Optional<uint8_t> TagOffset;
+
+protected:
/// Push a DW_OP_piece / DW_OP_bit_piece for emitting later, if one is needed
/// to represent a subregister.
void setSubRegisterPiece(unsigned SizeInBits, unsigned OffsetInBits) {
+ assert(SizeInBits < 65536 && OffsetInBits < 65536);
SubRegisterSizeInBits = SizeInBits;
SubRegisterOffsetInBits = OffsetInBits;
}
@@ -138,6 +170,10 @@ protected:
/// Emit a raw unsigned value.
virtual void emitUnsigned(uint64_t Value) = 0;
+ virtual void emitData1(uint8_t Value) = 0;
+
+ virtual void emitBaseTypeRef(uint64_t Idx) = 0;
+
/// Emit a normalized unsigned constant.
void emitConstu(uint64_t Value);
@@ -200,7 +236,10 @@ protected:
~DwarfExpression() = default;
public:
- DwarfExpression(unsigned DwarfVersion) : DwarfVersion(DwarfVersion) {}
+ DwarfExpression(unsigned DwarfVersion, DwarfCompileUnit &CU)
+ : CU(CU), SubRegisterSizeInBits(0), SubRegisterOffsetInBits(0),
+ LocationKind(Unknown), LocationFlags(Unknown),
+ DwarfVersion(DwarfVersion) {}
/// This needs to be called last to commit any pending changes.
void finalize();
@@ -214,15 +253,17 @@ public:
/// Emit an unsigned constant.
void addUnsignedConstant(const APInt &Value);
- bool isMemoryLocation() const { return LocationKind == Memory; }
- bool isUnknownLocation() const { return LocationKind == Unknown; }
-
/// Lock this down to become a memory location description.
void setMemoryLocationKind() {
- assert(LocationKind == Unknown);
+ assert(isUnknownLocation());
LocationKind = Memory;
}
+ /// Lock this down to become an entry value location.
+ void setEntryValueFlag() {
+ LocationFlags |= EntryValue;
+ }
+
/// Emit a machine register location. As an optimization this may also consume
/// the prefix of a DwarfExpression if a more efficient representation for
/// combining the register location and the first operation exists.
@@ -237,6 +278,9 @@ public:
DIExpressionCursor &Expr, unsigned MachineReg,
unsigned FragmentOffsetInBits = 0);
+ /// Emit entry value dwarf operation.
+ void addEntryValueExpression(DIExpressionCursor &ExprCursor);
+
/// Emit all remaining operations in the DIExpressionCursor.
///
/// \param FragmentOffsetInBits If this is one fragment out of multiple
@@ -248,6 +292,9 @@ public:
/// If applicable, emit an empty DW_OP_piece / DW_OP_bit_piece to advance to
/// the fragment described by \c Expr.
void addFragmentOffset(const DIExpression *Expr);
+
+ void emitLegacySExt(unsigned FromBits);
+ void emitLegacyZExt(unsigned FromBits);
};
/// DwarfExpression implementation for .debug_loc entries.
@@ -257,27 +304,30 @@ class DebugLocDwarfExpression final : public DwarfExpression {
void emitOp(uint8_t Op, const char *Comment = nullptr) override;
void emitSigned(int64_t Value) override;
void emitUnsigned(uint64_t Value) override;
+ void emitData1(uint8_t Value) override;
+ void emitBaseTypeRef(uint64_t Idx) override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
public:
- DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS)
- : DwarfExpression(DwarfVersion), BS(BS) {}
+ DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS, DwarfCompileUnit &CU)
+ : DwarfExpression(DwarfVersion, CU), BS(BS) {}
};
/// DwarfExpression implementation for singular DW_AT_location.
class DIEDwarfExpression final : public DwarfExpression {
const AsmPrinter &AP;
- DwarfUnit &DU;
DIELoc &DIE;
void emitOp(uint8_t Op, const char *Comment = nullptr) override;
void emitSigned(int64_t Value) override;
void emitUnsigned(uint64_t Value) override;
+ void emitData1(uint8_t Value) override;
+ void emitBaseTypeRef(uint64_t Idx) override;
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
public:
- DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU, DIELoc &DIE);
+ DIEDwarfExpression(const AsmPrinter &AP, DwarfCompileUnit &CU, DIELoc &DIE);
DIELoc *finalize() {
DwarfExpression::finalize();
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.cpp b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
index 78ccad481411..e3c9095d1343 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfFile.cpp - Dwarf Debug Framework -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -44,6 +43,11 @@ void DwarfFile::emitUnit(DwarfUnit *TheU, bool UseOffsets) {
if (!S)
return;
+ // Skip CUs that ended up not being needed (split CUs that were abandoned
+ // because they added no information beyond the non-split CU)
+ if (llvm::empty(TheU->getUnitDie().values()))
+ return;
+
Asm->OutStreamer->SwitchSection(S);
TheU->emitHeader(UseOffsets);
Asm->emitDwarfDIE(TheU->getUnitDie());
@@ -63,6 +67,11 @@ void DwarfFile::computeSizeAndOffsets() {
if (TheU->getCUNode()->isDebugDirectivesOnly())
continue;
+ // Skip CUs that ended up not being needed (split CUs that were abandoned
+ // because they added no information beyond the non-split CU)
+ if (llvm::empty(TheU->getUnitDie().values()))
+ return;
+
TheU->setDebugSectionOffset(SecOffset);
SecOffset += computeSizeAndOffsetsForUnit(TheU.get());
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 51acca8c1e53..244678ce9dc1 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfFile.h - Dwarf Debug Framework ---------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -59,7 +58,6 @@ public:
MCSymbol *getSym() const { return RangeSym; }
const DwarfCompileUnit &getCU() const { return *CU; }
const SmallVectorImpl<RangeSpan> &getRanges() const { return Ranges; }
- void addRange(RangeSpan Range) { Ranges.push_back(Range); }
};
class DwarfFile {
@@ -148,7 +146,7 @@ public:
void emitUnits(bool UseOffsets);
/// Emit the given unit to its section.
- void emitUnit(DwarfUnit *U, bool UseOffsets);
+ void emitUnit(DwarfUnit *TheU, bool UseOffsets);
/// Emit a set of abbreviations to the specific section.
void emitAbbrevs(MCSection *);
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
index 02016534a774..2a76dcb1b082 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfStringPool.cpp - Dwarf Debug Framework -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DwarfStringPool.h b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
index f484540d8d37..c5f5637fdae3 100644
--- a/lib/CodeGen/AsmPrinter/DwarfStringPool.h
+++ b/lib/CodeGen/AsmPrinter/DwarfStringPool.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/DwarfStringPool.h - Dwarf Debug Framework ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 80b365f1aa43..991ab94b50ab 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DwarfUnit.cpp - Dwarf Type and Compile Units ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -47,21 +46,30 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
-DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP, DwarfUnit &DU,
+DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP,
+ DwarfCompileUnit &CU,
DIELoc &DIE)
- : DwarfExpression(AP.getDwarfVersion()), AP(AP), DU(DU),
+ : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP),
DIE(DIE) {}
void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
- DU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
+ CU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
}
void DIEDwarfExpression::emitSigned(int64_t Value) {
- DU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
+ CU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
}
void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
- DU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+ CU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+}
+
+void DIEDwarfExpression::emitData1(uint8_t Value) {
+ CU.addUInt(DIE, dwarf::DW_FORM_data1, Value);
+}
+
+void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
+ CU.addBaseTypeRef(DIE, Idx);
}
bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
@@ -285,21 +293,21 @@ void DwarfUnit::addSectionOffset(DIE &Die, dwarf::Attribute Attribute,
addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer);
}
-MD5::MD5Result *DwarfUnit::getMD5AsBytes(const DIFile *File) const {
+Optional<MD5::MD5Result> DwarfUnit::getMD5AsBytes(const DIFile *File) const {
assert(File);
if (DD->getDwarfVersion() < 5)
- return nullptr;
+ return None;
Optional<DIFile::ChecksumInfo<StringRef>> Checksum = File->getChecksum();
if (!Checksum || Checksum->Kind != DIFile::CSK_MD5)
- return nullptr;
+ return None;
// Convert the string checksum to an MD5Result for the streamer.
// The verifier validates the checksum so we assume it's okay.
// An MD5 checksum is 16 bytes.
std::string ChecksumString = fromHex(Checksum->Value);
- void *CKMem = Asm->OutStreamer->getContext().allocate(16, 1);
- memcpy(CKMem, ChecksumString.data(), 16);
- return reinterpret_cast<MD5::MD5Result *>(CKMem);
+ MD5::MD5Result CKMem;
+ std::copy(ChecksumString.begin(), ChecksumString.end(), CKMem.Bytes.data());
+ return CKMem;
}
unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
@@ -311,7 +319,9 @@ unsigned DwarfTypeUnit::getOrCreateSourceID(const DIFile *File) {
addSectionOffset(getUnitDie(), dwarf::DW_AT_stmt_list, 0);
}
return SplitLineTable->getFile(File->getDirectory(), File->getFilename(),
- getMD5AsBytes(File), File->getSource());
+ getMD5AsBytes(File),
+ Asm->OutContext.getDwarfVersion(),
+ File->getSource());
}
void DwarfUnit::addOpAddress(DIELoc &Die, const MCSymbol *Sym) {
@@ -393,7 +403,6 @@ void DwarfUnit::addSourceLine(DIE &Die, unsigned Line, const DIFile *File) {
return;
unsigned FileID = getOrCreateSourceID(File);
- assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, None, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, None, Line);
}
@@ -462,9 +471,8 @@ static bool isUnsignedDIType(DwarfDebug *DD, const DIType *Ty) {
assert(T == dwarf::DW_TAG_typedef || T == dwarf::DW_TAG_const_type ||
T == dwarf::DW_TAG_volatile_type ||
T == dwarf::DW_TAG_restrict_type || T == dwarf::DW_TAG_atomic_type);
- DITypeRef Deriv = DTy->getBaseType();
- assert(Deriv && "Expected valid base type");
- return isUnsignedDIType(DD, DD->resolve(Deriv));
+ assert(DTy->getBaseType() && "Expected valid base type");
+ return isUnsignedDIType(DD, DTy->getBaseType());
}
auto *BTy = cast<DIBasicType>(Ty);
@@ -523,6 +531,10 @@ void DwarfUnit::addConstantValue(DIE &Die, const MachineOperand &MO,
addConstantValue(Die, isUnsignedDIType(DD, Ty), MO.getImm());
}
+void DwarfUnit::addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty) {
+ addConstantValue(Die, isUnsignedDIType(DD, Ty), Val);
+}
+
void DwarfUnit::addConstantValue(DIE &Die, bool Unsigned, uint64_t Val) {
// FIXME: This is a bit conservative/simple - it emits negative values always
// sign extended to 64 bits rather than minimizing the number of bytes.
@@ -603,8 +615,8 @@ DIE *DwarfUnit::getOrCreateContextDIE(const DIScope *Context) {
return getDIE(Context);
}
-DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) {
- auto *Context = resolve(Ty->getScope());
+DIE *DwarfUnit::createTypeDIE(const DICompositeType *Ty) {
+ auto *Context = Ty->getScope();
DIE *ContextDIE = getOrCreateContextDIE(Context);
if (DIE *TyDIE = getDIE(Ty))
@@ -619,6 +631,37 @@ DIE *DwarfTypeUnit::createTypeDIE(const DICompositeType *Ty) {
return &TyDIE;
}
+DIE *DwarfUnit::createTypeDIE(const DIScope *Context, DIE &ContextDIE,
+ const DIType *Ty) {
+ // Create new type.
+ DIE &TyDIE = createAndAddDIE(Ty->getTag(), ContextDIE, Ty);
+
+ updateAcceleratorTables(Context, Ty, TyDIE);
+
+ if (auto *BT = dyn_cast<DIBasicType>(Ty))
+ constructTypeDIE(TyDIE, BT);
+ else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
+ constructTypeDIE(TyDIE, STy);
+ else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
+ if (DD->generateTypeUnits() && !Ty->isForwardDecl() &&
+ (Ty->getRawName() || CTy->getRawIdentifier())) {
+ // Skip updating the accelerator tables since this is not the full type.
+ if (MDString *TypeId = CTy->getRawIdentifier())
+ DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
+ else {
+ auto X = DD->enterNonTypeUnitContext();
+ finishNonUnitTypeDIE(TyDIE, CTy);
+ }
+ return &TyDIE;
+ }
+ constructTypeDIE(TyDIE, CTy);
+ } else {
+ constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
+ }
+
+ return &TyDIE;
+}
+
DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
if (!TyNode)
return nullptr;
@@ -627,43 +670,23 @@ DIE *DwarfUnit::getOrCreateTypeDIE(const MDNode *TyNode) {
// DW_TAG_restrict_type is not supported in DWARF2
if (Ty->getTag() == dwarf::DW_TAG_restrict_type && DD->getDwarfVersion() <= 2)
- return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+ return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
// DW_TAG_atomic_type is not supported in DWARF < 5
if (Ty->getTag() == dwarf::DW_TAG_atomic_type && DD->getDwarfVersion() < 5)
- return getOrCreateTypeDIE(resolve(cast<DIDerivedType>(Ty)->getBaseType()));
+ return getOrCreateTypeDIE(cast<DIDerivedType>(Ty)->getBaseType());
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
- auto *Context = resolve(Ty->getScope());
+ auto *Context = Ty->getScope();
DIE *ContextDIE = getOrCreateContextDIE(Context);
assert(ContextDIE);
if (DIE *TyDIE = getDIE(Ty))
return TyDIE;
- // Create new type.
- DIE &TyDIE = createAndAddDIE(Ty->getTag(), *ContextDIE, Ty);
-
- updateAcceleratorTables(Context, Ty, TyDIE);
-
- if (auto *BT = dyn_cast<DIBasicType>(Ty))
- constructTypeDIE(TyDIE, BT);
- else if (auto *STy = dyn_cast<DISubroutineType>(Ty))
- constructTypeDIE(TyDIE, STy);
- else if (auto *CTy = dyn_cast<DICompositeType>(Ty)) {
- if (DD->generateTypeUnits() && !Ty->isForwardDecl())
- if (MDString *TypeId = CTy->getRawIdentifier()) {
- DD->addDwarfTypeUnitType(getCU(), TypeId->getString(), TyDIE, CTy);
- // Skip updating the accelerator tables since this is not the full type.
- return &TyDIE;
- }
- constructTypeDIE(TyDIE, CTy);
- } else {
- constructTypeDIE(TyDIE, cast<DIDerivedType>(Ty));
- }
-
- return &TyDIE;
+ return static_cast<DwarfUnit *>(ContextDIE->getUnit())
+ ->createTypeDIE(Context, *ContextDIE, Ty);
}
void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
@@ -679,7 +702,7 @@ void DwarfUnit::updateAcceleratorTables(const DIScope *Context,
DD->addAccelType(*CUNode, Ty->getName(), TyDIE, Flags);
if (!Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
- isa<DINamespace>(Context))
+ isa<DINamespace>(Context) || isa<DICommonBlock>(Context))
addGlobalType(Ty, TyDIE, Context);
}
}
@@ -702,8 +725,8 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
SmallVector<const DIScope *, 1> Parents;
while (!isa<DICompileUnit>(Context)) {
Parents.push_back(Context);
- if (Context->getScope())
- Context = resolve(Context->getScope());
+ if (const DIScope *S = Context->getScope())
+ Context = S;
else
// Structure, etc types will have a NULL context if they're at the top
// level.
@@ -754,7 +777,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
uint16_t Tag = Buffer.getTag();
// Map to main type, void will not have a type.
- const DIType *FromTy = resolve(DTy->getBaseType());
+ const DIType *FromTy = DTy->getBaseType();
if (FromTy)
addType(Buffer, FromTy);
@@ -770,24 +793,23 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DIDerivedType *DTy) {
addUInt(Buffer, dwarf::DW_AT_byte_size, None, Size);
if (Tag == dwarf::DW_TAG_ptr_to_member_type)
- addDIEEntry(
- Buffer, dwarf::DW_AT_containing_type,
- *getOrCreateTypeDIE(resolve(cast<DIDerivedType>(DTy)->getClassType())));
+ addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
+ *getOrCreateTypeDIE(cast<DIDerivedType>(DTy)->getClassType()));
// Add source line info if available and TyDesc is not a forward declaration.
if (!DTy->isForwardDecl())
addSourceLine(Buffer, DTy);
- // If DWARF address space value is other than None, add it for pointer and
- // reference types as DW_AT_address_class.
- if (DTy->getDWARFAddressSpace() && (Tag == dwarf::DW_TAG_pointer_type ||
- Tag == dwarf::DW_TAG_reference_type))
+ // If DWARF address space value is other than None, add it. The IR
+ // verifier checks that DWARF address space only exists for pointer
+ // or reference types.
+ if (DTy->getDWARFAddressSpace())
addUInt(Buffer, dwarf::DW_AT_address_class, dwarf::DW_FORM_data4,
DTy->getDWARFAddressSpace().getValue());
}
void DwarfUnit::constructSubprogramArguments(DIE &Buffer, DITypeRefArray Args) {
for (unsigned i = 1, N = Args.size(); i < N; ++i) {
- const DIType *Ty = resolve(Args[i]);
+ const DIType *Ty = Args[i];
if (!Ty) {
assert(i == N-1 && "Unspecified parameter must be the last argument");
createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer);
@@ -804,7 +826,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DISubroutineType *CTy) {
// Add return type. A void return won't have a type.
auto Elements = cast<DISubroutineType>(CTy)->getTypeArray();
if (Elements.size())
- if (auto RTy = resolve(Elements[0]))
+ if (auto RTy = Elements[0])
addType(Buffer, RTy);
bool isPrototyped = true;
@@ -875,7 +897,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
else if (auto *DDTy = dyn_cast<DIDerivedType>(Element)) {
if (DDTy->getTag() == dwarf::DW_TAG_friend) {
DIE &ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer);
- addType(ElemDie, resolve(DDTy->getBaseType()), dwarf::DW_AT_friend);
+ addType(ElemDie, DDTy->getBaseType(), dwarf::DW_AT_friend);
} else if (DDTy->isStaticMember()) {
getOrCreateStaticMemberDIE(DDTy);
} else if (Tag == dwarf::DW_TAG_variant_part) {
@@ -884,7 +906,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
DIE &Variant = createAndAddDIE(dwarf::DW_TAG_variant, Buffer);
if (const ConstantInt *CI =
dyn_cast_or_null<ConstantInt>(DDTy->getDiscriminantValue())) {
- if (isUnsignedDIType(DD, resolve(Discriminator->getBaseType())))
+ if (isUnsignedDIType(DD, Discriminator->getBaseType()))
addUInt(Variant, dwarf::DW_AT_discr_value, None, CI->getZExtValue());
else
addSInt(Variant, dwarf::DW_AT_discr_value, None, CI->getSExtValue());
@@ -898,7 +920,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
StringRef PropertyName = Property->getName();
addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName);
if (Property->getType())
- addType(ElemDie, resolve(Property->getType()));
+ addType(ElemDie, Property->getType());
addSourceLine(ElemDie, Property);
StringRef GetterName = Property->getGetterName();
if (!GetterName.empty())
@@ -924,7 +946,7 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
// inside C++ composite types to point to the base class with the vtable.
// Rust uses DW_AT_containing_type to link a vtable to the type
// for which it was created.
- if (auto *ContainingType = resolve(CTy->getVTableHolder()))
+ if (auto *ContainingType = CTy->getVTableHolder())
addDIEEntry(Buffer, dwarf::DW_AT_containing_type,
*getOrCreateTypeDIE(ContainingType));
@@ -994,7 +1016,7 @@ void DwarfUnit::constructTemplateTypeParameterDIE(
createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer);
// Add the type if it exists, it could be void and therefore no type.
if (TP->getType())
- addType(ParamDIE, resolve(TP->getType()));
+ addType(ParamDIE, TP->getType());
if (!TP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, TP->getName());
}
@@ -1006,12 +1028,12 @@ void DwarfUnit::constructTemplateValueParameterDIE(
// Add the type if there is one, template template and template parameter
// packs will not have a type.
if (VP->getTag() == dwarf::DW_TAG_template_value_parameter)
- addType(ParamDIE, resolve(VP->getType()));
+ addType(ParamDIE, VP->getType());
if (!VP->getName().empty())
addString(ParamDIE, dwarf::DW_AT_name, VP->getName());
if (Metadata *Val = VP->getValue()) {
if (ConstantInt *CI = mdconst::dyn_extract<ConstantInt>(Val))
- addConstantValue(ParamDIE, CI, resolve(VP->getType()));
+ addConstantValue(ParamDIE, CI, VP->getType());
else if (GlobalValue *GV = mdconst::dyn_extract<GlobalValue>(Val)) {
// We cannot describe the location of dllimport'd entities: the
// computation of their address requires loads from the IAT.
@@ -1085,7 +1107,7 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
// such construction creates the DIE (as is the case for member function
// declarations).
DIE *ContextDIE =
- Minimal ? &getUnitDie() : getOrCreateContextDIE(resolve(SP->getScope()));
+ Minimal ? &getUnitDie() : getOrCreateContextDIE(SP->getScope());
if (DIE *SPDie = getDIE(SP))
return SPDie;
@@ -1107,7 +1129,8 @@ DIE *DwarfUnit::getOrCreateSubprogramDIE(const DISubprogram *SP, bool Minimal) {
if (SP->isDefinition())
return &SPDie;
- applySubprogramAttributes(SP, SPDie);
+ static_cast<DwarfUnit *>(SPDie.getUnit())
+ ->applySubprogramAttributes(SP, SPDie);
return &SPDie;
}
@@ -1197,7 +1220,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
// Add a return type. If this is a type like a C/C++ void type we don't add a
// return type.
if (Args.size())
- if (auto Ty = resolve(Args[0]))
+ if (auto Ty = Args[0])
addType(SPDie, Ty);
unsigned VK = SP->getVirtuality();
@@ -1209,8 +1232,7 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
addUInt(*Block, dwarf::DW_FORM_udata, SP->getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block);
}
- ContainingTypeMap.insert(
- std::make_pair(&SPDie, resolve(SP->getContainingType())));
+ ContainingTypeMap.insert(std::make_pair(&SPDie, SP->getContainingType()));
}
if (!SP->isDefinition()) {
@@ -1261,6 +1283,12 @@ void DwarfUnit::applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
if (SP->isMainSubprogram())
addFlag(SPDie, dwarf::DW_AT_main_subprogram);
+ if (SP->isPure())
+ addFlag(SPDie, dwarf::DW_AT_pure);
+ if (SP->isElemental())
+ addFlag(SPDie, dwarf::DW_AT_elemental);
+ if (SP->isRecursive())
+ addFlag(SPDie, dwarf::DW_AT_recursive);
}
void DwarfUnit::constructSubrangeDIE(DIE &Buffer, const DISubrange *SR,
@@ -1310,7 +1338,7 @@ static bool hasVectorBeenPadded(const DICompositeType *CTy) {
const uint64_t ActualSize = CTy->getSizeInBits();
// Obtain the size of each element in the vector.
- DIType *BaseTy = CTy->getBaseType().resolve();
+ DIType *BaseTy = CTy->getBaseType();
assert(BaseTy && "Unknown vector element type.");
const uint64_t ElementSize = BaseTy->getSizeInBits();
@@ -1338,7 +1366,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
}
// Emit the element type.
- addType(Buffer, resolve(CTy->getBaseType()));
+ addType(Buffer, CTy->getBaseType());
// Get an anonymous type for index type.
// FIXME: This type should be passed down from the front end
@@ -1356,7 +1384,7 @@ void DwarfUnit::constructArrayTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
}
void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
- const DIType *DTy = resolve(CTy->getBaseType());
+ const DIType *DTy = CTy->getBaseType();
bool IsUnsigned = DTy && isUnsignedDIType(DD, DTy);
if (DTy) {
if (DD->getDwarfVersion() >= 3)
@@ -1365,6 +1393,9 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
addFlag(Buffer, dwarf::DW_AT_enum_class);
}
+ auto *Context = CTy->getScope();
+ bool IndexEnumerators = !Context || isa<DICompileUnit>(Context) || isa<DIFile>(Context) ||
+ isa<DINamespace>(Context) || isa<DICommonBlock>(Context);
DINodeArray Elements = CTy->getElements();
// Add enumerators to enumeration type.
@@ -1376,6 +1407,8 @@ void DwarfUnit::constructEnumTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
addString(Enumerator, dwarf::DW_AT_name, Name);
auto Value = static_cast<uint64_t>(Enum->getValue());
addConstantValue(Enumerator, IsUnsigned, Value);
+ if (IndexEnumerators)
+ addGlobalName(Name, Enumerator, Context);
}
}
}
@@ -1400,7 +1433,7 @@ DIE &DwarfUnit::constructMemberDIE(DIE &Buffer, const DIDerivedType *DT) {
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, Name);
- if (DIType *Resolved = resolve(DT->getBaseType()))
+ if (DIType *Resolved = DT->getBaseType())
addType(MemberDie, Resolved);
addSourceLine(MemberDie, DT);
@@ -1509,7 +1542,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
// Construct the context before querying for the existence of the DIE in case
// such construction creates the DIE.
- DIE *ContextDIE = getOrCreateContextDIE(resolve(DT->getScope()));
+ DIE *ContextDIE = getOrCreateContextDIE(DT->getScope());
assert(dwarf::isType(ContextDIE->getTag()) &&
"Static member should belong to a type.");
@@ -1518,7 +1551,7 @@ DIE *DwarfUnit::getOrCreateStaticMemberDIE(const DIDerivedType *DT) {
DIE &StaticMemberDIE = createAndAddDIE(DT->getTag(), *ContextDIE, DT);
- const DIType *Ty = resolve(DT->getBaseType());
+ const DIType *Ty = DT->getBaseType();
addString(StaticMemberDIE, dwarf::DW_AT_name, DT->getName());
addType(StaticMemberDIE, Ty);
@@ -1671,3 +1704,11 @@ void DwarfUnit::addLoclistsBase() {
DU->getLoclistsTableBaseSym(),
TLOF.getDwarfLoclistsSection()->getBeginSymbol());
}
+
+void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
+ addFlag(D, dwarf::DW_AT_declaration);
+ StringRef Name = CTy->getName();
+ if (!Name.empty())
+ addString(D, dwarf::DW_AT_name, Name);
+ getCU().createTypeDIE(CTy);
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index a59ebb7c1465..56c934a35ae8 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/DwarfUnit.h - Dwarf Compile Unit ---*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -199,6 +198,7 @@ public:
void addConstantValue(DIE &Die, const ConstantInt *CI, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, const DIType *Ty);
void addConstantValue(DIE &Die, const APInt &Val, bool Unsigned);
+ void addConstantValue(DIE &Die, uint64_t Val, const DIType *Ty);
void addConstantValue(DIE &Die, bool Unsigned, uint64_t Val);
/// Add constant value entry in variable DIE.
@@ -237,6 +237,9 @@ public:
void applySubprogramAttributes(const DISubprogram *SP, DIE &SPDie,
bool SkipSPAttributes = false);
+ /// Creates type DIE with specific context.
+ DIE *createTypeDIE(const DIScope *Context, DIE &ContextDIE, const DIType *Ty);
+
/// Find existing DIE or create new DIE for the given type.
DIE *getOrCreateTypeDIE(const MDNode *TyNode);
@@ -294,7 +297,10 @@ public:
/// If the \p File has an MD5 checksum, return it as an MD5Result
/// allocated in the MCContext.
- MD5::MD5Result *getMD5AsBytes(const DIFile *File) const;
+ Optional<MD5::MD5Result> getMD5AsBytes(const DIFile *File) const;
+
+ /// Get context owner's DIE.
+ DIE *createTypeDIE(const DICompositeType *Ty);
protected:
~DwarfUnit();
@@ -306,17 +312,6 @@ protected:
/// create a new ID and insert it in the line table.
virtual unsigned getOrCreateSourceID(const DIFile *File) = 0;
- /// Look in the DwarfDebug map for the MDNode that corresponds to the
- /// reference.
- template <typename T> T *resolve(TypedDINodeRef<T> Ref) const {
- return Ref.resolve();
- }
-
- /// If this is a named finished type then include it in the list of types for
- /// the accelerator tables.
- void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
- const DIE &TyDIE);
-
/// Emit the common part of the header for this unit.
void emitCommonHeader(bool UseOffsets, dwarf::UnitType UT);
@@ -344,6 +339,13 @@ private:
/// Set D as anonymous type for index which can be reused later.
void setIndexTyDie(DIE *D) { IndexTyDie = D; }
+ virtual void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) = 0;
+
+ /// If this is a named finished type then include it in the list of types for
+ /// the accelerator tables.
+ void updateAcceleratorTables(const DIScope *Context, const DIType *Ty,
+ const DIE &TyDIE);
+
virtual bool isDwoUnit() const = 0;
const MCSymbol *getCrossSectionRelativeBaseAddress() const override;
};
@@ -356,6 +358,7 @@ class DwarfTypeUnit final : public DwarfUnit {
bool UsedLineTable = false;
unsigned getOrCreateSourceID(const DIFile *File) override;
+ void finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) override;
bool isDwoUnit() const override;
public:
@@ -365,9 +368,6 @@ public:
void setTypeSignature(uint64_t Signature) { TypeSignature = Signature; }
void setType(const DIE *Ty) { this->Ty = Ty; }
- /// Get context owner's DIE.
- DIE *createTypeDIE(const DICompositeType *Ty);
-
/// Emit the header for this unit, not including the initial length field.
void emitHeader(bool UseOffsets) override;
unsigned getHeaderSize() const override {
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 7599121de2b0..99e3687b36b8 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -1,9 +1,8 @@
//===- CodeGen/AsmPrinter/EHStreamer.cpp - Exception Directive Streamer ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -379,7 +378,8 @@ MCSymbol *EHStreamer::emitExceptionTable() {
bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
bool IsWasm = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Wasm;
unsigned CallSiteEncoding =
- IsSJLJ ? dwarf::DW_EH_PE_udata4 : dwarf::DW_EH_PE_uleb128;
+ IsSJLJ ? static_cast<unsigned>(dwarf::DW_EH_PE_udata4) :
+ Asm->getObjFileLowering().getCallSiteEncoding();
bool HaveTTData = !TypeInfos.empty() || !FilterIds.empty();
// Type infos.
@@ -524,24 +524,24 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// Offset of the call site relative to the start of the procedure.
if (VerboseAsm)
Asm->OutStreamer->AddComment(">> Call Site " + Twine(++Entry) + " <<");
- Asm->EmitLabelDifferenceAsULEB128(BeginLabel, EHFuncBeginSym);
+ Asm->EmitCallSiteOffset(BeginLabel, EHFuncBeginSym, CallSiteEncoding);
if (VerboseAsm)
Asm->OutStreamer->AddComment(Twine(" Call between ") +
BeginLabel->getName() + " and " +
EndLabel->getName());
- Asm->EmitLabelDifferenceAsULEB128(EndLabel, BeginLabel);
+ Asm->EmitCallSiteOffset(EndLabel, BeginLabel, CallSiteEncoding);
// Offset of the landing pad relative to the start of the procedure.
if (!S.LPad) {
if (VerboseAsm)
Asm->OutStreamer->AddComment(" has no landing pad");
- Asm->EmitULEB128(0);
+ Asm->EmitCallSiteValue(0, CallSiteEncoding);
} else {
if (VerboseAsm)
Asm->OutStreamer->AddComment(Twine(" jumps to ") +
S.LPad->LandingPadLabel->getName());
- Asm->EmitLabelDifferenceAsULEB128(S.LPad->LandingPadLabel,
- EHFuncBeginSym);
+ Asm->EmitCallSiteOffset(S.LPad->LandingPadLabel, EHFuncBeginSym,
+ CallSiteEncoding);
}
// Offset of the first associated action record, relative to the start of
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.h b/lib/CodeGen/AsmPrinter/EHStreamer.h
index ce912d032c6d..e62cf17a05d4 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.h
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.h
@@ -1,9 +1,8 @@
//===- EHStreamer.h - Exception Handling Directive Streamer -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 34677ecc9e69..39392b79e960 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -1,9 +1,8 @@
//===- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3479a00def23..3145cc90dc73 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -1,9 +1,8 @@
//===- OcamlGCPrinter.cpp - Ocaml frametable emitter ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/WasmException.cpp b/lib/CodeGen/AsmPrinter/WasmException.cpp
index 527e5ae50146..444b0ed17b6d 100644
--- a/lib/CodeGen/AsmPrinter/WasmException.cpp
+++ b/lib/CodeGen/AsmPrinter/WasmException.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/WasmException.cpp - Wasm Exception Impl --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -19,10 +18,10 @@
using namespace llvm;
void WasmException::endModule() {
- // This is the symbol used in 'throw' and 'if_except' instruction to denote
+ // This is the symbol used in 'throw' and 'br_on_exn' instruction to denote
// this is a C++ exception. This symbol has to be emitted somewhere once in
// the module. Check if the symbol has already been created, i.e., we have at
- // least one 'throw' or 'if_except' instruction in the module, and emit the
+ // least one 'throw' or 'br_on_exn' instruction in the module, and emit the
// symbol only if so.
SmallString<60> NameStr;
Mangler::getNameWithPrefix(NameStr, "__cpp_exception", Asm->getDataLayout());
diff --git a/lib/CodeGen/AsmPrinter/WasmException.h b/lib/CodeGen/AsmPrinter/WasmException.h
index cbdb42457cf8..1893b6b2df43 100644
--- a/lib/CodeGen/AsmPrinter/WasmException.h
+++ b/lib/CodeGen/AsmPrinter/WasmException.h
@@ -1,9 +1,8 @@
//===-- WasmException.h - Wasm Exception Framework -------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.cpp b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
index 18d37caf57ee..290be81c6baa 100644
--- a/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
+++ b/lib/CodeGen/AsmPrinter/WinCFGuard.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/WinCFGuard.cpp - Control Flow Guard Impl ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/WinCFGuard.h b/lib/CodeGen/AsmPrinter/WinCFGuard.h
index 28f119e35966..def0a59ab007 100644
--- a/lib/CodeGen/AsmPrinter/WinCFGuard.h
+++ b/lib/CodeGen/AsmPrinter/WinCFGuard.h
@@ -1,9 +1,8 @@
//===-- WinCFGuard.h - Windows Control Flow Guard Handling ----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index cf8e8c69bc2a..155e91ce61a1 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen/AsmPrinter/WinException.cpp - Dwarf Exception Impl ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -110,6 +109,12 @@ void WinException::beginFunction(const MachineFunction *MF) {
beginFunclet(MF->front(), Asm->CurrentFnSym);
}
+void WinException::markFunctionEnd() {
+ if (isAArch64 && CurrentFuncletEntry &&
+ (shouldEmitMoves || shouldEmitPersonality))
+ Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+}
+
/// endFunction - Gather and emit post-function exception information.
///
void WinException::endFunction(const MachineFunction *MF) {
@@ -129,7 +134,7 @@ void WinException::endFunction(const MachineFunction *MF) {
NonConstMF->tidyLandingPads();
}
- endFunclet();
+ endFuncletImpl();
// endFunclet will emit the necessary .xdata tables for x64 SEH.
if (Per == EHPersonality::MSVC_Win64SEH && MF->hasEHFunclets())
@@ -232,6 +237,15 @@ void WinException::beginFunclet(const MachineBasicBlock &MBB,
}
void WinException::endFunclet() {
+ if (isAArch64 && CurrentFuncletEntry &&
+ (shouldEmitMoves || shouldEmitPersonality)) {
+ Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
+ Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
+ }
+ endFuncletImpl();
+}
+
+void WinException::endFuncletImpl() {
// No funclet to process? Great, we have nothing to do.
if (!CurrentFuncletEntry)
return;
@@ -247,8 +261,6 @@ void WinException::endFunclet() {
// to EmitWinEHHandlerData below can calculate the size of the funclet or
// function.
if (isAArch64) {
- Asm->OutStreamer->SwitchSection(CurrentFuncletTextSection);
- Asm->OutStreamer->EmitWinCFIFuncletOrFuncEnd();
MCSection *XData = Asm->OutStreamer->getAssociatedXDataSection(
Asm->OutStreamer->getCurrentSectionOnly());
Asm->OutStreamer->SwitchSection(XData);
@@ -545,15 +557,17 @@ void WinException::emitCSpecificHandlerTable(const MachineFunction *MF) {
OS.AddComment(Comment);
};
- // Emit a label assignment with the SEH frame offset so we can use it for
- // llvm.eh.recoverfp.
- StringRef FLinkageName =
- GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
- MCSymbol *ParentFrameOffset =
- Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
- const MCExpr *MCOffset =
- MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
- Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+ if (!isAArch64) {
+ // Emit a label assignment with the SEH frame offset so we can use it for
+ // llvm.eh.recoverfp.
+ StringRef FLinkageName =
+ GlobalValue::dropLLVMManglingEscape(MF->getFunction().getName());
+ MCSymbol *ParentFrameOffset =
+ Ctx.getOrCreateParentFrameOffsetSymbol(FLinkageName);
+ const MCExpr *MCOffset =
+ MCConstantExpr::create(FuncInfo.SEHSetFrameOffset, Ctx);
+ Asm->OutStreamer->EmitAssignment(ParentFrameOffset, MCOffset);
+ }
// Use the assembler to compute the number of table entries through label
// difference and division.
@@ -936,8 +950,7 @@ void WinException::emitEHRegistrationOffsetLabel(const WinEHFuncInfo &FuncInfo,
int FI = FuncInfo.EHRegNodeFrameIndex;
if (FI != INT_MAX) {
const TargetFrameLowering *TFI = Asm->MF->getSubtarget().getFrameLowering();
- unsigned UnusedReg;
- Offset = TFI->getFrameIndexReference(*Asm->MF, FI, UnusedReg);
+ Offset = TFI->getNonLocalFrameIndexReference(*Asm->MF, FI);
}
MCContext &Ctx = Asm->OutContext;
diff --git a/lib/CodeGen/AsmPrinter/WinException.h b/lib/CodeGen/AsmPrinter/WinException.h
index 37c796f89765..dc5036302131 100644
--- a/lib/CodeGen/AsmPrinter/WinException.h
+++ b/lib/CodeGen/AsmPrinter/WinException.h
@@ -1,9 +1,8 @@
//===-- WinException.h - Windows Exception Handling ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,6 +85,7 @@ class LLVM_LIBRARY_VISIBILITY WinException : public EHStreamer {
/// only), it is relative to the frame pointer.
int getFrameIndexOffset(int FrameIndex, const WinEHFuncInfo &FuncInfo);
+ void endFuncletImpl();
public:
//===--------------------------------------------------------------------===//
// Main entry points.
@@ -100,6 +100,8 @@ public:
/// immediately after the function entry point.
void beginFunction(const MachineFunction *MF) override;
+ void markFunctionEnd() override;
+
/// Gather and emit post-function exception information.
void endFunction(const MachineFunction *) override;
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index 95581c09dd1c..dc7eaf6a5fe7 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -1,9 +1,8 @@
//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -361,7 +360,7 @@ bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
/// Get the iX type with the same bitwidth as T.
IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
const DataLayout &DL) {
- EVT VT = TLI->getValueType(DL, T);
+ EVT VT = TLI->getMemValueType(DL, T);
unsigned BitWidth = VT.getStoreSizeInBits();
assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
return IntegerType::get(T->getContext(), BitWidth);
@@ -382,7 +381,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
Addr->getType()->getPointerAddressSpace());
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
- auto *NewLI = Builder.CreateLoad(NewAddr);
+ auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
NewLI->setAlignment(LI->getAlignment());
NewLI->setVolatile(LI->isVolatile());
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
@@ -431,6 +430,9 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
IRBuilder<> Builder(LI);
AtomicOrdering Order = LI->getOrdering();
+ if (Order == AtomicOrdering::Unordered)
+ Order = AtomicOrdering::Monotonic;
+
Value *Addr = LI->getPointerOperand();
Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
Constant *DummyVal = Constant::getNullValue(Ty);
@@ -496,11 +498,26 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
Value *Loaded, Value *NewVal,
AtomicOrdering MemOpOrder,
Value *&Success, Value *&NewLoaded) {
+ Type *OrigTy = NewVal->getType();
+
+ // This code can go away when cmpxchg supports FP types.
+ bool NeedBitcast = OrigTy->isFloatingPointTy();
+ if (NeedBitcast) {
+ IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
+ unsigned AS = Addr->getType()->getPointerAddressSpace();
+ Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
+ NewVal = Builder.CreateBitCast(NewVal, IntTy);
+ Loaded = Builder.CreateBitCast(Loaded, IntTy);
+ }
+
Value* Pair = Builder.CreateAtomicCmpXchg(
Addr, Loaded, NewVal, MemOpOrder,
AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
Success = Builder.CreateExtractValue(Pair, 1, "success");
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
+
+ if (NeedBitcast)
+ NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
@@ -535,6 +552,10 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
case AtomicRMWInst::UMin:
NewVal = Builder.CreateICmpULE(Loaded, Inc);
return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
+ case AtomicRMWInst::FAdd:
+ return Builder.CreateFAdd(Loaded, Inc, "new");
+ case AtomicRMWInst::FSub:
+ return Builder.CreateFSub(Loaded, Inc, "new");
default:
llvm_unreachable("Unknown atomic op");
}
@@ -564,6 +585,10 @@ bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(AI);
if (ValueSize < MinCASSize) {
+ // TODO: Handle atomicrmw fadd/fsub
+ if (AI->getType()->isFloatingPointTy())
+ return false;
+
expandPartwordAtomicRMW(AI,
TargetLoweringBase::AtomicExpansionKind::CmpXChg);
} else {
@@ -1090,11 +1115,11 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
SuccessOrder != AtomicOrdering::Monotonic &&
SuccessOrder != AtomicOrdering::Acquire &&
- !F->optForMinSize();
+ !F->hasMinSize();
// There's no overhead for sinking the release barrier in a weak cmpxchg, so
// do it even on minsize.
- bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
+ bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
// Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
//
@@ -1533,6 +1558,8 @@ static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
case AtomicRMWInst::Min:
case AtomicRMWInst::UMax:
case AtomicRMWInst::UMin:
+ case AtomicRMWInst::FAdd:
+ case AtomicRMWInst::FSub:
// No atomic libcalls are available for max/min/umax/umin.
return {};
}
@@ -1671,16 +1698,25 @@ bool AtomicExpand::expandAtomicOpToLibcall(
}
// 'ptr' argument.
- Value *PtrVal =
- Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
+ // note: This assumes all address spaces share a common libfunc
+ // implementation and that addresses are convertable. For systems without
+ // that property, we'd need to extend this mechanism to support AS-specific
+ // families of atomic intrinsics.
+ auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
+ Value *PtrVal = Builder.CreateBitCast(PointerOperand,
+ Type::getInt8PtrTy(Ctx, PtrTypeAS));
+ PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
Args.push_back(PtrVal);
// 'expected' argument, if present.
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
AllocaCASExpected->setAlignment(AllocaAlignment);
+ unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
+
AllocaCASExpected_i8 =
- Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
+ Builder.CreateBitCast(AllocaCASExpected,
+ Type::getInt8PtrTy(Ctx, AllocaAS));
Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
Args.push_back(AllocaCASExpected_i8);
@@ -1707,8 +1743,9 @@ bool AtomicExpand::expandAtomicOpToLibcall(
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
AllocaResult->setAlignment(AllocaAlignment);
+ unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
AllocaResult_i8 =
- Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
+ Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
Args.push_back(AllocaResult_i8);
}
@@ -1734,7 +1771,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
for (Value *Arg : Args)
ArgTys.push_back(Arg->getType());
FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
- Constant *LibcallFn =
+ FunctionCallee LibcallFn =
M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
CallInst *Call = Builder.CreateCall(LibcallFn, Args);
Call->setAttributes(Attr);
@@ -1749,8 +1786,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// from call}
Type *FinalResultTy = I->getType();
Value *V = UndefValue::get(FinalResultTy);
- Value *ExpectedOut =
- Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
+ Value *ExpectedOut = Builder.CreateAlignedLoad(
+ CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
V = Builder.CreateInsertValue(V, ExpectedOut, 0);
V = Builder.CreateInsertValue(V, Result, 1);
@@ -1760,7 +1797,8 @@ bool AtomicExpand::expandAtomicOpToLibcall(
if (UseSizedLibcall)
V = Builder.CreateBitOrPointerCast(Result, I->getType());
else {
- V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
+ V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
+ AllocaAlignment);
Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
}
I->replaceAllUsesWith(V);
diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp
index d11f375b176e..57cefae2066a 100644
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -1,9 +1,8 @@
//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index efbfd5f4ab2c..fb54b5d6c8d8 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -1,9 +1,8 @@
//===- BranchFolding.cpp - Fold machine code branch instructions ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -722,7 +721,7 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
// branch instruction, which is likely to be smaller than the 2
// instructions that would be deleted in the merge.
MachineFunction *MF = MBB1->getParent();
- return EffectiveTailLen >= 2 && MF->getFunction().optForSize() &&
+ return EffectiveTailLen >= 2 && MF->getFunction().hasOptSize() &&
(I1 == MBB1->begin() || I2 == MBB2->begin());
}
@@ -1071,31 +1070,29 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
bool MadeChange = false;
- if (!EnableTailMerge) return MadeChange;
+ if (!EnableTailMerge)
+ return MadeChange;
// First find blocks with no successors.
- // Block placement does not create new tail merging opportunities for these
- // blocks.
- if (!AfterBlockPlacement) {
- MergePotentials.clear();
- for (MachineBasicBlock &MBB : MF) {
- if (MergePotentials.size() == TailMergeThreshold)
- break;
- if (!TriedMerging.count(&MBB) && MBB.succ_empty())
- MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
- }
-
- // If this is a large problem, avoid visiting the same basic blocks
- // multiple times.
+ // Block placement may create new tail merging opportunities for these blocks.
+ MergePotentials.clear();
+ for (MachineBasicBlock &MBB : MF) {
if (MergePotentials.size() == TailMergeThreshold)
- for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
- TriedMerging.insert(MergePotentials[i].getBlock());
-
- // See if we can do any tail merging on those.
- if (MergePotentials.size() >= 2)
- MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
+ break;
+ if (!TriedMerging.count(&MBB) && MBB.succ_empty())
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
}
+ // If this is a large problem, avoid visiting the same basic blocks
+ // multiple times.
+ if (MergePotentials.size() == TailMergeThreshold)
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ TriedMerging.insert(MergePotentials[i].getBlock());
+
+ // See if we can do any tail merging on those.
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(nullptr, nullptr, MinCommonTailLength);
+
// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
// (1) temporarily removing any unconditional branch from the predecessor
@@ -1183,29 +1180,6 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
}
}
- // Failing case: the only way IBB can be reached from PBB is via
- // exception handling. Happens for landing pads. Would be nice to have
- // a bit in the edge so we didn't have to do all this.
- if (IBB->isEHPad()) {
- MachineFunction::iterator IP = ++PBB->getIterator();
- MachineBasicBlock *PredNextBB = nullptr;
- if (IP != MF.end())
- PredNextBB = &*IP;
- if (!TBB) {
- if (IBB != PredNextBB) // fallthrough
- continue;
- } else if (FBB) {
- if (TBB != IBB && FBB != IBB) // cbr then ubr
- continue;
- } else if (Cond.empty()) {
- if (TBB != IBB) // ubr
- continue;
- } else {
- if (TBB != IBB && IBB != PredNextBB) // cbr
- continue;
- }
- }
-
// Remove the unconditional branch at the end, if any.
if (TBB && (Cond.empty() || FBB)) {
DebugLoc dl = PBB->findBranchDebugLoc();
@@ -1598,7 +1572,7 @@ ReoptimizeBlock:
}
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 &&
- MF.getFunction().optForSize()) {
+ MF.getFunction().hasOptSize()) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing
// performance. Therefore, only consider this for optsize functions.
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index accd0ab7317b..761ff9c7d54e 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -1,9 +1,8 @@
//===- BranchFolding.h - Fold machine code branch instructions --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index c092da2b6602..3ad6266d4f35 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -1,9 +1,8 @@
//===- BranchRelaxation.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp
index 210699cbf239..cc4b2caa9bed 100644
--- a/lib/CodeGen/BreakFalseDeps.cpp
+++ b/lib/CodeGen/BreakFalseDeps.cpp
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/BreakFalseDeps.cpp - Break False Dependency Fix -*- C++ -*==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/BuiltinGCs.cpp b/lib/CodeGen/BuiltinGCs.cpp
index 93939e573b7b..bfc10cb3fef2 100644
--- a/lib/CodeGen/BuiltinGCs.cpp
+++ b/lib/CodeGen/BuiltinGCs.cpp
@@ -1,9 +1,8 @@
//===- BuiltinGCs.cpp - Boilerplate for our built in GC types -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/CFIInstrInserter.cpp b/lib/CodeGen/CFIInstrInserter.cpp
index c4799855a2b3..1a4d54231cfd 100644
--- a/lib/CodeGen/CFIInstrInserter.cpp
+++ b/lib/CodeGen/CFIInstrInserter.cpp
@@ -1,9 +1,8 @@
//===------ CFIInstrInserter.cpp - Insert additional CFI instructions -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 02347b9f0b5c..7164fdfb7886 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -1,9 +1,8 @@
//===- CalcSpillWeights.cpp -----------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 3593089b206d..497fcb147849 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -1,9 +1,8 @@
//===-- CallingConvLower.cpp - Calling Conventions ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index 66166482c78b..c37ed57781d4 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -1,9 +1,8 @@
//===-- CodeGen.cpp -------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -31,14 +30,15 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeEarlyIfConverterPass(Registry);
initializeEarlyMachineLICMPass(Registry);
initializeEarlyTailDuplicatePass(Registry);
- initializeExpandISelPseudosPass(Registry);
initializeExpandMemCmpPassPass(Registry);
initializeExpandPostRAPass(Registry);
initializeFEntryInserterPass(Registry);
+ initializeFinalizeISelPass(Registry);
initializeFinalizeMachineBundlesPass(Registry);
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
+ initializeHardwareLoopsPass(Registry);
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandPassPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index c35f8666fa3c..52b4bbea012b 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -1,9 +1,8 @@
//===- CodeGenPrepare.cpp - Prepare a function for code generation --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -16,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/PointerIntPair.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -32,6 +32,7 @@
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/Analysis/VectorUtils.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
@@ -292,15 +293,16 @@ class TypePromotionTransaction;
/// Keep track of SExt promoted.
ValueToSExts ValToSExtendedUses;
- /// True if CFG is modified in any way.
- bool ModifiedDT;
-
/// True if optimizing for size.
bool OptSize;
/// DataLayout for the Function being processed.
const DataLayout *DL = nullptr;
+ /// Building the dominator tree can be expensive, so we only build it
+ /// lazily and update it when required.
+ std::unique_ptr<DominatorTree> DT;
+
public:
static char ID; // Pass identification, replacement for typeid
@@ -339,6 +341,13 @@ class TypePromotionTransaction;
}
}
+ // Get the DominatorTree, building if necessary.
+ DominatorTree &getDT(Function &F) {
+ if (!DT)
+ DT = llvm::make_unique<DominatorTree>(F);
+ return *DT;
+ }
+
bool eliminateFallThrough(Function &F);
bool eliminateMostlyEmptyBlocks(Function &F);
BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -355,11 +364,12 @@ class TypePromotionTransaction;
bool optimizeExt(Instruction *&I);
bool optimizeExtUses(Instruction *I);
bool optimizeLoadExt(LoadInst *Load);
+ bool optimizeShiftInst(BinaryOperator *BO);
bool optimizeSelectInst(SelectInst *SI);
bool optimizeShuffleVectorInst(ShuffleVectorInst *SVI);
bool optimizeSwitchInst(SwitchInst *SI);
bool optimizeExtractElementInst(Instruction *Inst);
- bool dupRetToEnableTailCallOpts(BasicBlock *BB);
+ bool dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT);
bool placeDbgValues(Function &F);
bool canFormExtLd(const SmallVectorImpl<Instruction *> &MovedExts,
LoadInst *&LI, Instruction *&Inst, bool HasPromoted);
@@ -374,8 +384,15 @@ class TypePromotionTransaction;
bool AllowPromotionWithoutCommonHeader,
bool HasPromoted, TypePromotionTransaction &TPT,
SmallVectorImpl<Instruction *> &SpeculativelyMovedExts);
- bool splitBranchCondition(Function &F);
+ bool splitBranchCondition(Function &F, bool &ModifiedDT);
bool simplifyOffsetableRelocate(Instruction &I);
+
+ bool tryToSinkFreeOperands(Instruction *I);
+ bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, CmpInst *Cmp,
+ Intrinsic::ID IID);
+ bool optimizeCmp(CmpInst *Cmp, bool &ModifiedDT);
+ bool combineToUSubWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
+ bool combineToUAddWithOverflow(CmpInst *Cmp, bool &ModifiedDT);
};
} // end anonymous namespace
@@ -401,7 +418,6 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
InsertedInsts.clear();
PromotedInsts.clear();
- ModifiedDT = false;
if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
TM = &TPC->getTM<TargetMachine>();
SubtargetInfo = TM->getSubtargetImpl(F);
@@ -413,7 +429,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
BFI.reset(new BlockFrequencyInfo(F, *BPI, *LI));
- OptSize = F.optForSize();
+ OptSize = F.hasOptSize();
ProfileSummaryInfo *PSI =
&getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
@@ -444,8 +460,9 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
// unconditional branch.
EverMadeChange |= eliminateMostlyEmptyBlocks(F);
+ bool ModifiedDT = false;
if (!DisableBranchOpts)
- EverMadeChange |= splitBranchCondition(F);
+ EverMadeChange |= splitBranchCondition(F, ModifiedDT);
// Split some critical edges where one of the sources is an indirect branch,
// to help generate sane code for PHIs involving such edges.
@@ -454,6 +471,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
bool MadeChange = true;
while (MadeChange) {
MadeChange = false;
+ DT.reset();
for (Function::iterator I = F.begin(); I != F.end(); ) {
BasicBlock *BB = &*I++;
bool ModifiedDTOnIteration = false;
@@ -654,6 +672,16 @@ bool CodeGenPrepare::isMergingEmptyBlockProfitable(BasicBlock *BB,
BB->getSinglePredecessor()->getSingleSuccessor()))
return false;
+ // Skip merging if the block's successor is also a successor to any callbr
+ // that leads to this block.
+ // FIXME: Is this really needed? Is this a correctness issue?
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ if (auto *CBI = dyn_cast<CallBrInst>((*PI)->getTerminator()))
+ for (unsigned i = 0, e = CBI->getNumSuccessors(); i != e; ++i)
+ if (DestBB == CBI->getSuccessor(i))
+ return false;
+ }
+
// Try to skip merging if the unique predecessor of BB is terminated by a
// switch or indirect branch instruction, and BB is used as an incoming block
// of PHIs in DestBB. In such case, merging BB and DestBB would cause ISel to
@@ -1040,7 +1068,7 @@ bool CodeGenPrepare::simplifyOffsetableRelocate(Instruction &I) {
return MadeChange;
}
-/// SinkCast - Sink the specified cast instruction into its user blocks
+/// Sink the specified cast instruction into its user blocks.
static bool SinkCast(CastInst *CI) {
BasicBlock *DefBB = CI->getParent();
@@ -1114,8 +1142,8 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
// Sink only "cheap" (or nop) address-space casts. This is a weaker condition
// than sinking only nop casts, but is helpful on some platforms.
if (auto *ASC = dyn_cast<AddrSpaceCastInst>(CI)) {
- if (!TLI.isCheapAddrSpaceCast(ASC->getSrcAddressSpace(),
- ASC->getDestAddressSpace()))
+ if (!TLI.isFreeAddrSpaceCast(ASC->getSrcAddressSpace(),
+ ASC->getDestAddressSpace()))
return false;
}
@@ -1148,54 +1176,169 @@ static bool OptimizeNoopCopyExpression(CastInst *CI, const TargetLowering &TLI,
return SinkCast(CI);
}
-/// Try to combine CI into a call to the llvm.uadd.with.overflow intrinsic if
-/// possible.
-///
-/// Return true if any changes were made.
-static bool CombineUAddWithOverflow(CmpInst *CI) {
- Value *A, *B;
- Instruction *AddI;
- if (!match(CI,
- m_UAddWithOverflow(m_Value(A), m_Value(B), m_Instruction(AddI))))
+bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
+ CmpInst *Cmp,
+ Intrinsic::ID IID) {
+ if (BO->getParent() != Cmp->getParent()) {
+ // We used to use a dominator tree here to allow multi-block optimization.
+ // But that was problematic because:
+ // 1. It could cause a perf regression by hoisting the math op into the
+ // critical path.
+ // 2. It could cause a perf regression by creating a value that was live
+ // across multiple blocks and increasing register pressure.
+ // 3. Use of a dominator tree could cause large compile-time regression.
+ // This is because we recompute the DT on every change in the main CGP
+ // run-loop. The recomputing is probably unnecessary in many cases, so if
+ // that was fixed, using a DT here would be ok.
+ return false;
+ }
+
+ // We allow matching the canonical IR (add X, C) back to (usubo X, -C).
+ Value *Arg0 = BO->getOperand(0);
+ Value *Arg1 = BO->getOperand(1);
+ if (BO->getOpcode() == Instruction::Add &&
+ IID == Intrinsic::usub_with_overflow) {
+ assert(isa<Constant>(Arg1) && "Unexpected input for usubo");
+ Arg1 = ConstantExpr::getNeg(cast<Constant>(Arg1));
+ }
+
+ // Insert at the first instruction of the pair.
+ Instruction *InsertPt = nullptr;
+ for (Instruction &Iter : *Cmp->getParent()) {
+ if (&Iter == BO || &Iter == Cmp) {
+ InsertPt = &Iter;
+ break;
+ }
+ }
+ assert(InsertPt != nullptr && "Parent block did not contain cmp or binop");
+
+ IRBuilder<> Builder(InsertPt);
+ Value *MathOV = Builder.CreateBinaryIntrinsic(IID, Arg0, Arg1);
+ Value *Math = Builder.CreateExtractValue(MathOV, 0, "math");
+ Value *OV = Builder.CreateExtractValue(MathOV, 1, "ov");
+ BO->replaceAllUsesWith(Math);
+ Cmp->replaceAllUsesWith(OV);
+ BO->eraseFromParent();
+ Cmp->eraseFromParent();
+ return true;
+}
+
+/// Match special-case patterns that check for unsigned add overflow.
+static bool matchUAddWithOverflowConstantEdgeCases(CmpInst *Cmp,
+ BinaryOperator *&Add) {
+ // Add = add A, 1; Cmp = icmp eq A,-1 (overflow if A is max val)
+ // Add = add A,-1; Cmp = icmp ne A, 0 (overflow if A is non-zero)
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ if (isa<Constant>(A))
+ return false;
+
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_AllOnes()))
+ B = ConstantInt::get(B->getType(), 1);
+ else if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt()))
+ B = ConstantInt::get(B->getType(), -1);
+ else
return false;
- Type *Ty = AddI->getType();
- if (!isa<IntegerType>(Ty))
+ // Check the users of the variable operand of the compare looking for an add
+ // with the adjusted constant.
+ for (User *U : A->users()) {
+ if (match(U, m_Add(m_Specific(A), m_Specific(B)))) {
+ Add = cast<BinaryOperator>(U);
+ return true;
+ }
+ }
+ return false;
+}
+
+/// Try to combine the compare into a call to the llvm.uadd.with.overflow
+/// intrinsic. Return true if any changes were made.
+bool CodeGenPrepare::combineToUAddWithOverflow(CmpInst *Cmp,
+ bool &ModifiedDT) {
+ Value *A, *B;
+ BinaryOperator *Add;
+ if (!match(Cmp, m_UAddWithOverflow(m_Value(A), m_Value(B), m_BinOp(Add))))
+ if (!matchUAddWithOverflowConstantEdgeCases(Cmp, Add))
+ return false;
+
+ if (!TLI->shouldFormOverflowOp(ISD::UADDO,
+ TLI->getValueType(*DL, Add->getType())))
return false;
- // We don't want to move around uses of condition values this late, so we we
+ // We don't want to move around uses of condition values this late, so we
// check if it is legal to create the call to the intrinsic in the basic
- // block containing the icmp:
+ // block containing the icmp.
+ if (Add->getParent() != Cmp->getParent() && !Add->hasOneUse())
+ return false;
- if (AddI->getParent() != CI->getParent() && !AddI->hasOneUse())
+ if (!replaceMathCmpWithIntrinsic(Add, Cmp, Intrinsic::uadd_with_overflow))
return false;
-#ifndef NDEBUG
- // Someday m_UAddWithOverflow may get smarter, but this is a safe assumption
- // for now:
- if (AddI->hasOneUse())
- assert(*AddI->user_begin() == CI && "expected!");
-#endif
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = true;
+ return true;
+}
+
+bool CodeGenPrepare::combineToUSubWithOverflow(CmpInst *Cmp,
+ bool &ModifiedDT) {
+ // We are not expecting non-canonical/degenerate code. Just bail out.
+ Value *A = Cmp->getOperand(0), *B = Cmp->getOperand(1);
+ if (isa<Constant>(A) && isa<Constant>(B))
+ return false;
+
+ // Convert (A u> B) to (A u< B) to simplify pattern matching.
+ ICmpInst::Predicate Pred = Cmp->getPredicate();
+ if (Pred == ICmpInst::ICMP_UGT) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ // Convert special-case: (A == 0) is the same as (A u< 1).
+ if (Pred == ICmpInst::ICMP_EQ && match(B, m_ZeroInt())) {
+ B = ConstantInt::get(B->getType(), 1);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ // Convert special-case: (A != 0) is the same as (0 u< A).
+ if (Pred == ICmpInst::ICMP_NE && match(B, m_ZeroInt())) {
+ std::swap(A, B);
+ Pred = ICmpInst::ICMP_ULT;
+ }
+ if (Pred != ICmpInst::ICMP_ULT)
+ return false;
+
+ // Walk the users of a variable operand of a compare looking for a subtract or
+ // add with that same operand. Also match the 2nd operand of the compare to
+ // the add/sub, but that may be a negated constant operand of an add.
+ Value *CmpVariableOperand = isa<Constant>(A) ? B : A;
+ BinaryOperator *Sub = nullptr;
+ for (User *U : CmpVariableOperand->users()) {
+ // A - B, A u< B --> usubo(A, B)
+ if (match(U, m_Sub(m_Specific(A), m_Specific(B)))) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+
+ // A + (-C), A u< C (canonicalized form of (sub A, C))
+ const APInt *CmpC, *AddC;
+ if (match(U, m_Add(m_Specific(A), m_APInt(AddC))) &&
+ match(B, m_APInt(CmpC)) && *AddC == -(*CmpC)) {
+ Sub = cast<BinaryOperator>(U);
+ break;
+ }
+ }
+ if (!Sub)
+ return false;
+
+ if (!TLI->shouldFormOverflowOp(ISD::USUBO,
+ TLI->getValueType(*DL, Sub->getType())))
+ return false;
+
+ if (!replaceMathCmpWithIntrinsic(Sub, Cmp, Intrinsic::usub_with_overflow))
+ return false;
- Module *M = CI->getModule();
- Value *F = Intrinsic::getDeclaration(M, Intrinsic::uadd_with_overflow, Ty);
-
- auto *InsertPt = AddI->hasOneUse() ? CI : AddI;
-
- DebugLoc Loc = CI->getDebugLoc();
- auto *UAddWithOverflow =
- CallInst::Create(F, {A, B}, "uadd.overflow", InsertPt);
- UAddWithOverflow->setDebugLoc(Loc);
- auto *UAdd = ExtractValueInst::Create(UAddWithOverflow, 0, "uadd", InsertPt);
- UAdd->setDebugLoc(Loc);
- auto *Overflow =
- ExtractValueInst::Create(UAddWithOverflow, 1, "overflow", InsertPt);
- Overflow->setDebugLoc(Loc);
-
- CI->replaceAllUsesWith(Overflow);
- AddI->replaceAllUsesWith(UAdd);
- CI->eraseFromParent();
- AddI->eraseFromParent();
+ // Reset callers - do not crash by iterating over a dead instruction.
+ ModifiedDT = true;
return true;
}
@@ -1205,18 +1348,19 @@ static bool CombineUAddWithOverflow(CmpInst *CI) {
/// lose; some adjustment may be wanted there.
///
/// Return true if any changes are made.
-static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
- BasicBlock *DefBB = CI->getParent();
+static bool sinkCmpExpression(CmpInst *Cmp, const TargetLowering &TLI) {
+ if (TLI.hasMultipleConditionRegisters())
+ return false;
// Avoid sinking soft-FP comparisons, since this can move them into a loop.
- if (TLI && TLI->useSoftFloat() && isa<FCmpInst>(CI))
+ if (TLI.useSoftFloat() && isa<FCmpInst>(Cmp))
return false;
// Only insert a cmp in each block once.
DenseMap<BasicBlock*, CmpInst*> InsertedCmps;
bool MadeChange = false;
- for (Value::user_iterator UI = CI->user_begin(), E = CI->user_end();
+ for (Value::user_iterator UI = Cmp->user_begin(), E = Cmp->user_end();
UI != E; ) {
Use &TheUse = UI.getUse();
Instruction *User = cast<Instruction>(*UI);
@@ -1230,6 +1374,7 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
// Figure out which BB this cmp is used in.
BasicBlock *UserBB = User->getParent();
+ BasicBlock *DefBB = Cmp->getParent();
// If this user is in the same block as the cmp, don't change the cmp.
if (UserBB == DefBB) continue;
@@ -1241,10 +1386,11 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
BasicBlock::iterator InsertPt = UserBB->getFirstInsertionPt();
assert(InsertPt != UserBB->end());
InsertedCmp =
- CmpInst::Create(CI->getOpcode(), CI->getPredicate(),
- CI->getOperand(0), CI->getOperand(1), "", &*InsertPt);
+ CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(),
+ Cmp->getOperand(0), Cmp->getOperand(1), "",
+ &*InsertPt);
// Propagate the debug info.
- InsertedCmp->setDebugLoc(CI->getDebugLoc());
+ InsertedCmp->setDebugLoc(Cmp->getDebugLoc());
}
// Replace a use of the cmp with a use of the new cmp.
@@ -1254,19 +1400,22 @@ static bool SinkCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
}
// If we removed all uses, nuke the cmp.
- if (CI->use_empty()) {
- CI->eraseFromParent();
+ if (Cmp->use_empty()) {
+ Cmp->eraseFromParent();
MadeChange = true;
}
return MadeChange;
}
-static bool OptimizeCmpExpression(CmpInst *CI, const TargetLowering *TLI) {
- if (SinkCmpExpression(CI, TLI))
+bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, bool &ModifiedDT) {
+ if (sinkCmpExpression(Cmp, *TLI))
return true;
- if (CombineUAddWithOverflow(CI))
+ if (combineToUAddWithOverflow(Cmp, ModifiedDT))
+ return true;
+
+ if (combineToUSubWithOverflow(Cmp, ModifiedDT))
return true;
return false;
@@ -1301,7 +1450,7 @@ static bool sinkAndCmp0Expression(Instruction *AndI,
for (auto *U : AndI->users()) {
Instruction *User = cast<Instruction>(U);
- // Only sink for and mask feeding icmp with 0.
+ // Only sink 'and' feeding icmp with 0.
if (!isa<ICmpInst>(User))
return false;
@@ -1704,9 +1853,23 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
if (II) {
switch (II->getIntrinsicID()) {
default: break;
+ case Intrinsic::experimental_widenable_condition: {
+ // Give up on future widening oppurtunties so that we can fold away dead
+ // paths and merge blocks before going into block-local instruction
+ // selection.
+ if (II->use_empty()) {
+ II->eraseFromParent();
+ return true;
+ }
+ Constant *RetVal = ConstantInt::getTrue(II->getContext());
+ resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
+ replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
+ });
+ return true;
+ }
case Intrinsic::objectsize: {
// Lower all uses of llvm.objectsize.*
- ConstantInt *RetVal =
+ Value *RetVal =
lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
@@ -1735,6 +1898,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
InsertedInsts.insert(ExtVal);
return true;
}
+
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group: {
Value *ArgVal = II->getArgOperand(0);
@@ -1818,7 +1982,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
/// %tmp2 = tail call i32 @f2()
/// ret i32 %tmp2
/// @endcode
-bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
+bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT) {
if (!TLI)
return false;
@@ -1846,10 +2010,8 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
// return is the first instruction in the block.
if (PN) {
BasicBlock::iterator BI = BB->begin();
- do { ++BI; } while (isa<DbgInfoIntrinsic>(BI));
- if (&*BI == BCI)
- // Also skip over the bitcast.
- ++BI;
+ // Skip over debug and the bitcast.
+ do { ++BI; } while (isa<DbgInfoIntrinsic>(BI) || &*BI == BCI);
if (&*BI != RetI)
return false;
} else {
@@ -1865,7 +2027,9 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB) {
SmallVector<CallInst*, 4> TailCalls;
if (PN) {
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
- CallInst *CI = dyn_cast<CallInst>(PN->getIncomingValue(I));
+ // Look through bitcasts.
+ Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
+ CallInst *CI = dyn_cast<CallInst>(IncomingVal);
// Make sure the phi value is indeed produced by the tail call.
if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
TLI->mayBeEmittedAsTailCall(CI) &&
@@ -1929,6 +2093,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
Value *BaseReg = nullptr;
Value *ScaledReg = nullptr;
Value *OriginalValue = nullptr;
+ bool InBounds = true;
enum FieldName {
NoField = 0x00,
@@ -1940,6 +2105,7 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
MultipleFields = 0xff
};
+
ExtAddrMode() = default;
void print(raw_ostream &OS) const;
@@ -1958,6 +2124,10 @@ struct ExtAddrMode : public TargetLowering::AddrMode {
ScaledReg->getType() != other.ScaledReg->getType())
return MultipleFields;
+ // Conservatively reject 'inbounds' mismatches.
+ if (InBounds != other.InBounds)
+ return MultipleFields;
+
// Check each field to see if it differs.
unsigned Result = NoField;
if (BaseReg != other.BaseReg)
@@ -2056,6 +2226,8 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const ExtAddrMode &AM) {
void ExtAddrMode::print(raw_ostream &OS) const {
bool NeedPlus = false;
OS << "[";
+ if (InBounds)
+ OS << "inbounds ";
if (BaseGV) {
OS << (NeedPlus ? " + " : "")
<< "GV:";
@@ -3126,6 +3298,8 @@ private:
PhiNodeSet &PhiNodesToMatch) {
SmallVector<PHIPair, 8> WorkList;
Matcher.insert({ PHI, Candidate });
+ SmallSet<PHINode *, 8> MatchedPHIs;
+ MatchedPHIs.insert(PHI);
WorkList.push_back({ PHI, Candidate });
SmallSet<PHIPair, 8> Visited;
while (!WorkList.empty()) {
@@ -3158,8 +3332,10 @@ private:
if (Matcher.count({ FirstPhi, SecondPhi }))
continue;
// So the values are different and does not match. So we need them to
- // match.
- Matcher.insert({ FirstPhi, SecondPhi });
+ // match. (But we register no more than one match per PHI node, so that
+ // we won't later try to replace them twice.)
+ if (!MatchedPHIs.insert(FirstPhi).second)
+ Matcher.insert({ FirstPhi, SecondPhi });
// But me must check it.
WorkList.push_back({ FirstPhi, SecondPhi });
}
@@ -3354,6 +3530,7 @@ bool AddressingModeMatcher::matchScaledValue(Value *ScaleReg, int64_t Scale,
ConstantInt *CI = nullptr; Value *AddLHS = nullptr;
if (isa<Instruction>(ScaleReg) && // not a constant expr.
match(ScaleReg, m_Add(m_Value(AddLHS), m_ConstantInt(CI)))) {
+ TestAddrMode.InBounds = false;
TestAddrMode.ScaledReg = AddLHS;
TestAddrMode.BaseOffs += CI->getSExtValue()*TestAddrMode.Scale;
@@ -3928,6 +4105,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
TypePromotionTransaction::ConstRestorationPt LastKnownGood =
TPT.getRestorationPoint();
+ AddrMode.InBounds = false;
if (matchAddr(AddrInst->getOperand(1), Depth+1) &&
matchAddr(AddrInst->getOperand(0), Depth+1))
return true;
@@ -3954,6 +4132,7 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::Mul:
case Instruction::Shl: {
// Can only handle X*C and X << C.
+ AddrMode.InBounds = false;
ConstantInt *RHS = dyn_cast<ConstantInt>(AddrInst->getOperand(1));
if (!RHS || RHS->getBitWidth() > 64)
return false;
@@ -4005,8 +4184,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (ConstantOffset == 0 ||
TLI.isLegalAddressingMode(DL, AddrMode, AccessTy, AddrSpace)) {
// Check to see if we can fold the base pointer in too.
- if (matchAddr(AddrInst->getOperand(0), Depth+1))
+ if (matchAddr(AddrInst->getOperand(0), Depth+1)) {
+ if (!cast<GEPOperator>(AddrInst)->isInBounds())
+ AddrMode.InBounds = false;
return true;
+ }
} else if (EnableGEPOffsetSplit && isa<GetElementPtrInst>(AddrInst) &&
TLI.shouldConsiderGEPOffsetSplit() && Depth == 0 &&
ConstantOffset > 0) {
@@ -4020,15 +4202,11 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
if (isa<Argument>(Base) || isa<GlobalValue>(Base) ||
(BaseI && !isa<CastInst>(BaseI) &&
!isa<GetElementPtrInst>(BaseI))) {
- // If the base is an instruction, make sure the GEP is not in the same
- // basic block as the base. If the base is an argument or global
- // value, make sure the GEP is not in the entry block. Otherwise,
- // instruction selection can undo the split. Also make sure the
- // parent block allows inserting non-PHI instructions before the
- // terminator.
+ // Make sure the parent block allows inserting non-PHI instructions
+ // before the terminator.
BasicBlock *Parent =
BaseI ? BaseI->getParent() : &GEP->getFunction()->getEntryBlock();
- if (GEP->getParent() != Parent && !Parent->getTerminator()->isEHPad())
+ if (!Parent->getTerminator()->isEHPad())
LargeOffsetGEP = std::make_pair(GEP, ConstantOffset);
}
}
@@ -4042,6 +4220,8 @@ bool AddressingModeMatcher::matchOperationAddr(User *AddrInst, unsigned Opcode,
// See if the scale and offset amount is valid for this target.
AddrMode.BaseOffs += ConstantOffset;
+ if (!cast<GEPOperator>(AddrInst)->isInBounds())
+ AddrMode.InBounds = false;
// Match the base operand of the GEP.
if (!matchAddr(AddrInst->getOperand(0), Depth+1)) {
@@ -4268,7 +4448,7 @@ static bool FindAllMemoryUses(
if (!MightBeFoldableInst(I))
return true;
- const bool OptSize = I->getFunction()->optForSize();
+ const bool OptSize = I->getFunction()->hasOptSize();
// Loop over all the uses, recursively processing them.
for (Use &U : I->uses()) {
@@ -4556,8 +4736,7 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
InsertedInsts, PromotedInsts, TPT, LargeOffsetGEP);
GetElementPtrInst *GEP = LargeOffsetGEP.first;
- if (GEP && GEP->getParent() != MemoryInst->getParent() &&
- !NewGEPBases.count(GEP)) {
+ if (GEP && !NewGEPBases.count(GEP)) {
// If splitting the underlying data structure can reduce the offset of a
// GEP, collect the GEP. Skip the GEPs that are the new bases of
// previously split data structures.
@@ -4727,7 +4906,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// SDAG consecutive load/store merging.
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- ResultPtr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ ResultPtr =
+ AddrMode.InBounds
+ ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr")
+ : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
ResultIndex = V;
@@ -4738,7 +4921,11 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
} else {
if (ResultPtr->getType() != I8PtrTy)
ResultPtr = Builder.CreatePointerCast(ResultPtr, I8PtrTy);
- SunkAddr = Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
+ SunkAddr =
+ AddrMode.InBounds
+ ? Builder.CreateInBoundsGEP(I8Ty, ResultPtr, ResultIndex,
+ "sunkaddr")
+ : Builder.CreateGEP(I8Ty, ResultPtr, ResultIndex, "sunkaddr");
}
if (SunkAddr->getType() != Addr->getType())
@@ -5037,7 +5224,6 @@ bool CodeGenPrepare::tryToPromoteExts(
/// Merging redundant sexts when one is dominating the other.
bool CodeGenPrepare::mergeSExts(Function &F) {
- DominatorTree DT(F);
bool Changed = false;
for (auto &Entry : ValToSExtendedUses) {
SExts &Insts = Entry.second;
@@ -5048,7 +5234,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
continue;
bool inserted = false;
for (auto &Pt : CurPts) {
- if (DT.dominates(Inst, Pt)) {
+ if (getDT(F).dominates(Inst, Pt)) {
Pt->replaceAllUsesWith(Inst);
RemovedInsts.insert(Pt);
Pt->removeFromParent();
@@ -5057,7 +5243,7 @@ bool CodeGenPrepare::mergeSExts(Function &F) {
Changed = true;
break;
}
- if (!DT.dominates(Pt, Inst))
+ if (!getDT(F).dominates(Pt, Inst))
// Give up if we need to merge in a common dominator as the
// experiments show it is not profitable.
continue;
@@ -5715,7 +5901,7 @@ static bool isFormingBranchFromSelectProfitable(const TargetTransformInfo *TTI,
static Value *getTrueOrFalseValue(
SelectInst *SI, bool isTrue,
const SmallPtrSet<const Instruction *, 2> &Selects) {
- Value *V;
+ Value *V = nullptr;
for (SelectInst *DefSI = SI; DefSI != nullptr && Selects.count(DefSI);
DefSI = dyn_cast<SelectInst>(V)) {
@@ -5723,9 +5909,44 @@ static Value *getTrueOrFalseValue(
"The condition of DefSI does not match with SI");
V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue());
}
+
+ assert(V && "Failed to get select true/false value");
return V;
}
+bool CodeGenPrepare::optimizeShiftInst(BinaryOperator *Shift) {
+ assert(Shift->isShift() && "Expected a shift");
+
+ // If this is (1) a vector shift, (2) shifts by scalars are cheaper than
+ // general vector shifts, and (3) the shift amount is a select-of-splatted
+ // values, hoist the shifts before the select:
+ // shift Op0, (select Cond, TVal, FVal) -->
+ // select Cond, (shift Op0, TVal), (shift Op0, FVal)
+ //
+ // This is inverting a generic IR transform when we know that the cost of a
+ // general vector shift is more than the cost of 2 shift-by-scalars.
+ // We can't do this effectively in SDAG because we may not be able to
+ // determine if the select operands are splats from within a basic block.
+ Type *Ty = Shift->getType();
+ if (!Ty->isVectorTy() || !TLI->isVectorShiftByScalarCheap(Ty))
+ return false;
+ Value *Cond, *TVal, *FVal;
+ if (!match(Shift->getOperand(1),
+ m_OneUse(m_Select(m_Value(Cond), m_Value(TVal), m_Value(FVal)))))
+ return false;
+ if (!isSplatValue(TVal) || !isSplatValue(FVal))
+ return false;
+
+ IRBuilder<> Builder(Shift);
+ BinaryOperator::BinaryOps Opcode = Shift->getOpcode();
+ Value *NewTVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), TVal);
+ Value *NewFVal = Builder.CreateBinOp(Opcode, Shift->getOperand(0), FVal);
+ Value *NewSel = Builder.CreateSelect(Cond, NewTVal, NewFVal);
+ Shift->replaceAllUsesWith(NewSel);
+ Shift->eraseFromParent();
+ return true;
+}
+
/// If we have a SelectInst that will likely profit from branch prediction,
/// turn it into a branch.
bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
@@ -5769,7 +5990,11 @@ bool CodeGenPrepare::optimizeSelectInst(SelectInst *SI) {
!isFormingBranchFromSelectProfitable(TTI, TLI, SI))
return false;
- ModifiedDT = true;
+ // The DominatorTree needs to be rebuilt by any consumers after this
+ // transformation. We simply reset here rather than setting the ModifiedDT
+ // flag to avoid restarting the function walk in runOnFunction for each
+ // select optimized.
+ DT.reset();
// Transform a sequence like this:
// start:
@@ -5943,6 +6168,7 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
InsertedShuffle =
new ShuffleVectorInst(SVI->getOperand(0), SVI->getOperand(1),
SVI->getOperand(2), "", &*InsertPt);
+ InsertedShuffle->setDebugLoc(SVI->getDebugLoc());
}
UI->replaceUsesOfWith(SVI, InsertedShuffle);
@@ -5958,6 +6184,48 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
return MadeChange;
}
+bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
+ // If the operands of I can be folded into a target instruction together with
+ // I, duplicate and sink them.
+ SmallVector<Use *, 4> OpsToSink;
+ if (!TLI || !TLI->shouldSinkOperands(I, OpsToSink))
+ return false;
+
+ // OpsToSink can contain multiple uses in a use chain (e.g.
+ // (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
+ // uses must come first, which means they are sunk first, temporarily creating
+ // invalid IR. This will be fixed once their dominated users are sunk and
+ // updated.
+ BasicBlock *TargetBB = I->getParent();
+ bool Changed = false;
+ SmallVector<Use *, 4> ToReplace;
+ for (Use *U : OpsToSink) {
+ auto *UI = cast<Instruction>(U->get());
+ if (UI->getParent() == TargetBB || isa<PHINode>(UI))
+ continue;
+ ToReplace.push_back(U);
+ }
+
+ SmallPtrSet<Instruction *, 4> MaybeDead;
+ for (Use *U : ToReplace) {
+ auto *UI = cast<Instruction>(U->get());
+ Instruction *NI = UI->clone();
+ MaybeDead.insert(UI);
+ LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
+ NI->insertBefore(I);
+ InsertedInsts.insert(NI);
+ U->set(NI);
+ Changed = true;
+ }
+
+ // Remove instructions that are dead after sinking.
+ for (auto *I : MaybeDead)
+ if (!I->hasNUsesOrMore(1))
+ I->eraseFromParent();
+
+ return Changed;
+}
+
bool CodeGenPrepare::optimizeSwitchInst(SwitchInst *SI) {
if (!TLI || !DL)
return false;
@@ -6412,14 +6680,17 @@ static bool splitMergedValStore(StoreInst &SI, const DataLayout &DL,
const TargetLowering &TLI) {
// Handle simple but common cases only.
Type *StoreType = SI.getValueOperand()->getType();
- if (DL.getTypeStoreSizeInBits(StoreType) != DL.getTypeSizeInBits(StoreType) ||
+ if (!DL.typeSizeEqualsStoreSize(StoreType) ||
DL.getTypeSizeInBits(StoreType) == 0)
return false;
unsigned HalfValBitSize = DL.getTypeSizeInBits(StoreType) / 2;
Type *SplitStoreType = Type::getIntNTy(SI.getContext(), HalfValBitSize);
- if (DL.getTypeStoreSizeInBits(SplitStoreType) !=
- DL.getTypeSizeInBits(SplitStoreType))
+ if (!DL.typeSizeEqualsStoreSize(SplitStoreType))
+ return false;
+
+ // Don't split the store if it is volatile.
+ if (SI.isVolatile())
return false;
// Match the following patterns:
@@ -6658,11 +6929,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
if (InsertedInsts.count(I))
return false;
+ // TODO: Move into the switch on opcode below here.
if (PHINode *P = dyn_cast<PHINode>(I)) {
// It is possible for very late stage optimizations (such as SimplifyCFG)
// to introduce PHI nodes too late to be cleaned up. If we detect such a
// trivial PHI, go ahead and zap it here.
if (Value *V = SimplifyInstruction(P, {*DL, TLInfo})) {
+ LargeOffsetGEPMap.erase(P);
P->replaceAllUsesWith(V);
P->eraseFromParent();
++NumPHIsElim;
@@ -6700,9 +6973,9 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return false;
}
- if (CmpInst *CI = dyn_cast<CmpInst>(I))
- if (!TLI || !TLI->hasMultipleConditionRegisters())
- return OptimizeCmpExpression(CI, TLI);
+ if (auto *Cmp = dyn_cast<CmpInst>(I))
+ if (TLI && optimizeCmp(Cmp, ModifiedDT))
+ return true;
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
@@ -6745,13 +7018,13 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
EnableAndCmpSinking && TLI)
return sinkAndCmp0Expression(BinOp, *TLI, InsertedInsts);
+ // TODO: Move this into the switch on opcode - it handles shifts already.
if (BinOp && (BinOp->getOpcode() == Instruction::AShr ||
BinOp->getOpcode() == Instruction::LShr)) {
ConstantInt *CI = dyn_cast<ConstantInt>(BinOp->getOperand(1));
if (TLI && CI && TLI->hasExtractBitsInsn())
- return OptimizeExtractBits(BinOp, CI, *TLI, *DL);
-
- return false;
+ if (OptimizeExtractBits(BinOp, CI, *TLI, *DL))
+ return true;
}
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(I)) {
@@ -6772,20 +7045,25 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
return false;
}
- if (CallInst *CI = dyn_cast<CallInst>(I))
- return optimizeCallInst(CI, ModifiedDT);
-
- if (SelectInst *SI = dyn_cast<SelectInst>(I))
- return optimizeSelectInst(SI);
-
- if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
- return optimizeShuffleVectorInst(SVI);
-
- if (auto *Switch = dyn_cast<SwitchInst>(I))
- return optimizeSwitchInst(Switch);
+ if (tryToSinkFreeOperands(I))
+ return true;
- if (isa<ExtractElementInst>(I))
- return optimizeExtractElementInst(I);
+ switch (I->getOpcode()) {
+ case Instruction::Shl:
+ case Instruction::LShr:
+ case Instruction::AShr:
+ return optimizeShiftInst(cast<BinaryOperator>(I));
+ case Instruction::Call:
+ return optimizeCallInst(cast<CallInst>(I), ModifiedDT);
+ case Instruction::Select:
+ return optimizeSelectInst(cast<SelectInst>(I));
+ case Instruction::ShuffleVector:
+ return optimizeShuffleVectorInst(cast<ShuffleVectorInst>(I));
+ case Instruction::Switch:
+ return optimizeSwitchInst(cast<SwitchInst>(I));
+ case Instruction::ExtractElement:
+ return optimizeExtractElementInst(cast<ExtractElementInst>(I));
+ }
return false;
}
@@ -6833,7 +7111,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
}
}
}
- MadeChange |= dupRetToEnableTailCallOpts(&BB);
+ MadeChange |= dupRetToEnableTailCallOpts(&BB, ModifiedDT);
return MadeChange;
}
@@ -6909,7 +7187,7 @@ static void scaleWeights(uint64_t &NewTrue, uint64_t &NewFalse) {
///
/// FIXME: Remove the (equivalent?) implementation in SelectionDAG.
///
-bool CodeGenPrepare::splitBranchCondition(Function &F) {
+bool CodeGenPrepare::splitBranchCondition(Function &F, bool &ModifiedDT) {
if (!TM || !TM->Options.EnableFastISel || !TLI || TLI->isJumpExpensive())
return false;
@@ -6983,11 +7261,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
std::swap(TBB, FBB);
// Replace the old BB with the new BB.
- for (PHINode &PN : TBB->phis()) {
- int i;
- while ((i = PN.getBasicBlockIndex(&BB)) >= 0)
- PN.setIncomingBlock(i, TmpBB);
- }
+ TBB->replacePhiUsesWith(&BB, TmpBB);
// Add another incoming edge form the new BB.
for (PHINode &PN : FBB->phis()) {
@@ -7066,10 +7340,7 @@ bool CodeGenPrepare::splitBranchCondition(Function &F) {
}
}
- // Note: No point in getting fancy here, since the DT info is never
- // available to CodeGenPrepare.
ModifiedDT = true;
-
MadeChange = true;
LLVM_DEBUG(dbgs() << "After branch condition splitting\n"; BB.dump();
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 5a5960b16130..4144c243a341 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -1,9 +1,8 @@
//===- CriticalAntiDepBreaker.cpp - Anti-dep breaker ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 09c4423a2f05..4e127ce525c8 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/CriticalAntiDepBreaker.h - Anti-Dep Support -*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index 68034afe98d5..b99be5d7a87c 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -1,9 +1,8 @@
//=- llvm/CodeGen/DFAPacketizer.cpp - DFA Packetizer for VLIW -*- C++ -*-=====//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// This class implements a deterministic finite automaton (DFA) based
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index ff44c5660bad..049ce7063307 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -1,9 +1,8 @@
//===- DeadMachineInstructionElim.cpp - Remove dead machine instructions --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -82,9 +81,11 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
return false;
} else {
- if (!MRI->use_nodbg_empty(Reg))
- // This def has a non-debug use. Don't delete the instruction!
- return false;
+ for (const MachineInstr &Use : MRI->use_nodbg_instructions(Reg)) {
+ if (&Use != MI)
+ // This def has a non-debug use. Don't delete the instruction!
+ return false;
+ }
}
}
}
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index c83db476a4de..fe78acf4d80a 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -1,9 +1,8 @@
//===- DetectDeadLanes.cpp - SubRegister Lane Usage Analysis --*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 4586649d17f0..ddd6cec5a178 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -1,9 +1,8 @@
//===- DwarfEHPrepare - Prepare exception handling for code generation ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -46,7 +45,7 @@ namespace {
class DwarfEHPrepare : public FunctionPass {
// RewindFunction - _Unwind_Resume or the target equivalent.
- Constant *RewindFunction = nullptr;
+ FunctionCallee RewindFunction = nullptr;
DominatorTree *DT = nullptr;
const TargetLowering *TLI = nullptr;
@@ -146,7 +145,7 @@ size_t DwarfEHPrepare::pruneUnreachableResumes(
size_t ResumeIndex = 0;
for (auto *RI : Resumes) {
for (auto *LP : CleanupLPads) {
- if (isPotentiallyReachable(LP, RI, DT)) {
+ if (isPotentiallyReachable(LP, RI, nullptr, DT)) {
ResumeReachable.set(ResumeIndex);
break;
}
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 364e1f030942..0a83760befaa 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -1,9 +1,8 @@
//===-- EarlyIfConversion.cpp - If-conversion on SSA form machine code ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
index 54c53eb16312..486720cadd27 100644
--- a/lib/CodeGen/EdgeBundles.cpp
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -1,9 +1,8 @@
//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,7 +27,7 @@ ViewEdgeBundles("view-edge-bundles", cl::Hidden,
char EdgeBundles::ID = 0;
INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
- /* cfg = */true, /* analysis = */ true)
+ /* cfg = */true, /* is_analysis = */ true)
char &llvm::EdgeBundlesID = EdgeBundles::ID;
diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp
index 458dcf2b0e26..a2dd5eee33b7 100644
--- a/lib/CodeGen/ExecutionDomainFix.cpp
+++ b/lib/CodeGen/ExecutionDomainFix.cpp
@@ -1,9 +1,8 @@
//===- ExecutionDomainFix.cpp - Fix execution domain issues ----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -337,11 +336,10 @@ void ExecutionDomainFix::visitSoftInstr(MachineInstr *mi, unsigned mask) {
}
// Sorted insertion.
// Enables giving priority to the latest domains during merging.
- auto I = std::upper_bound(
- Regs.begin(), Regs.end(), rx, [&](int LHS, const int RHS) {
- return RDA->getReachingDef(mi, RC->getRegister(LHS)) <
- RDA->getReachingDef(mi, RC->getRegister(RHS));
- });
+ const int Def = RDA->getReachingDef(mi, RC->getRegister(rx));
+ auto I = partition_point(Regs, [&](int I) {
+ return RDA->getReachingDef(mi, RC->getRegister(I)) <= Def;
+ });
Regs.insert(I, rx);
}
diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp
index ee7683adbcdd..b425482e6adf 100644
--- a/lib/CodeGen/ExpandMemCmp.cpp
+++ b/lib/CodeGen/ExpandMemCmp.cpp
@@ -1,9 +1,8 @@
//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,6 +36,14 @@ static cl::opt<unsigned> MemCmpEqZeroNumLoadsPerBlock(
cl::desc("The number of loads per basic block for inline expansion of "
"memcmp that is only being compared against zero."));
+static cl::opt<unsigned> MaxLoadsPerMemcmp(
+ "max-loads-per-memcmp", cl::Hidden,
+ cl::desc("Set maximum number of loads used in expanded memcmp"));
+
+static cl::opt<unsigned> MaxLoadsPerMemcmpOptSize(
+ "max-loads-per-memcmp-opt-size", cl::Hidden,
+ cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
+
namespace {
@@ -106,8 +113,7 @@ class MemCmpExpansion {
public:
MemCmpExpansion(CallInst *CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout);
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout);
unsigned getNumBlocks();
uint64_t getNumLoads() const { return LoadSequence.size(); }
@@ -196,16 +202,10 @@ MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
MemCmpExpansion::MemCmpExpansion(
CallInst *const CI, uint64_t Size,
const TargetTransformInfo::MemCmpExpansionOptions &Options,
- const unsigned MaxNumLoads, const bool IsUsedForZeroCmp,
- const unsigned MaxLoadsPerBlockForZeroCmp, const DataLayout &TheDataLayout)
- : CI(CI),
- Size(Size),
- MaxLoadSize(0),
- NumLoadsNonOneByte(0),
- NumLoadsPerBlockForZeroCmp(MaxLoadsPerBlockForZeroCmp),
- IsUsedForZeroCmp(IsUsedForZeroCmp),
- DL(TheDataLayout),
- Builder(CI) {
+ const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout)
+ : CI(CI), Size(Size), MaxLoadSize(0), NumLoadsNonOneByte(0),
+ NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
+ IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), Builder(CI) {
assert(Size > 0 && "zero blocks");
// Scale the max size down if the target can load more bytes than we need.
llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
@@ -216,17 +216,17 @@ MemCmpExpansion::MemCmpExpansion(
MaxLoadSize = LoadSizes.front();
// Compute the decomposition.
unsigned GreedyNumLoadsNonOneByte = 0;
- LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, MaxNumLoads,
+ LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
GreedyNumLoadsNonOneByte);
NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
- assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+ assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
// If we allow overlapping loads and the load sequence is not already optimal,
// use overlapping loads.
if (Options.AllowOverlappingLoads &&
(LoadSequence.empty() || LoadSequence.size() > 2)) {
unsigned OverlappingNumLoadsNonOneByte = 0;
auto OverlappingLoads = computeOverlappingLoadSequence(
- Size, MaxLoadSize, MaxNumLoads, OverlappingNumLoadsNonOneByte);
+ Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
if (!OverlappingLoads.empty() &&
(LoadSequence.empty() ||
OverlappingLoads.size() < LoadSequence.size())) {
@@ -234,7 +234,7 @@ MemCmpExpansion::MemCmpExpansion(
NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
}
}
- assert(LoadSequence.size() <= MaxNumLoads && "broken invariant");
+ assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
}
unsigned MemCmpExpansion::getNumBlocks() {
@@ -316,7 +316,7 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
assert(LoadIndex < getNumLoads() &&
"getCompareLoadPairs() called with no remaining loads");
std::vector<Value *> XorList, OrList;
- Value *Diff;
+ Value *Diff = nullptr;
const unsigned NumLoads =
std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
@@ -393,6 +393,8 @@ Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
while (OrList.size() != 1) {
OrList = pairWiseOr(OrList);
}
+
+ assert(Diff && "Failed to find comparison diff");
Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
}
@@ -722,7 +724,7 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
NumMemCmpCalls++;
// Early exit from expansion if -Oz.
- if (CI->getFunction()->optForMinSize())
+ if (CI->getFunction()->hasMinSize())
return false;
// Early exit from expansion if size is not a constant.
@@ -739,18 +741,21 @@ static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
// TTI call to check if target would like to expand memcmp. Also, get the
// available load sizes.
const bool IsUsedForZeroCmp = isOnlyUsedInZeroEqualityComparison(CI);
- const auto *const Options = TTI->enableMemCmpExpansion(IsUsedForZeroCmp);
+ auto Options = TTI->enableMemCmpExpansion(CI->getFunction()->hasOptSize(),
+ IsUsedForZeroCmp);
if (!Options) return false;
- const unsigned MaxNumLoads =
- TLI->getMaxExpandSizeMemcmp(CI->getFunction()->optForSize());
+ if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
+ Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
+
+ if (CI->getFunction()->hasOptSize() &&
+ MaxLoadsPerMemcmpOptSize.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
- unsigned NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences()
- ? MemCmpEqZeroNumLoadsPerBlock
- : TLI->getMemcmpEqZeroLoadsPerBlock();
+ if (!CI->getFunction()->hasOptSize() && MaxLoadsPerMemcmp.getNumOccurrences())
+ Options.MaxNumLoads = MaxLoadsPerMemcmp;
- MemCmpExpansion Expansion(CI, SizeVal, *Options, MaxNumLoads,
- IsUsedForZeroCmp, NumLoadsPerBlock, *DL);
+ MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL);
// Don't expand if this will require more loads than desired by the target.
if (Expansion.getNumLoads() == 0) {
@@ -824,7 +829,8 @@ bool ExpandMemCmpPass::runOnBlock(
}
LibFunc Func;
if (TLI->getLibFunc(ImmutableCallSite(CI), Func) &&
- Func == LibFunc_memcmp && expandMemCmp(CI, TTI, TL, &DL)) {
+ (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
+ expandMemCmp(CI, TTI, TL, &DL)) {
return true;
}
}
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index f2a2bcbb94b1..0ab70aff7dc4 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -1,9 +1,8 @@
//===-- ExpandPostRAPseudos.cpp - Pseudo instruction expansion pass -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/ExpandReductions.cpp b/lib/CodeGen/ExpandReductions.cpp
index 7552ba8cd85d..1069a2423b8b 100644
--- a/lib/CodeGen/ExpandReductions.cpp
+++ b/lib/CodeGen/ExpandReductions.cpp
@@ -1,9 +1,8 @@
//===--- ExpandReductions.cpp - Expand experimental reduction intrinsics --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,9 +29,9 @@ namespace {
unsigned getOpcode(Intrinsic::ID ID) {
switch (ID) {
- case Intrinsic::experimental_vector_reduce_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
return Instruction::FAdd;
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
return Instruction::FMul;
case Intrinsic::experimental_vector_reduce_add:
return Instruction::Add;
@@ -84,22 +83,33 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
Worklist.push_back(II);
for (auto *II : Worklist) {
+ if (!TTI->shouldExpandReduction(II))
+ continue;
+
+ FastMathFlags FMF =
+ isa<FPMathOperator>(II) ? II->getFastMathFlags() : FastMathFlags{};
+ Intrinsic::ID ID = II->getIntrinsicID();
+ RecurrenceDescriptor::MinMaxRecurrenceKind MRK = getMRK(ID);
+
+ Value *Rdx = nullptr;
IRBuilder<> Builder(II);
- bool IsOrdered = false;
- Value *Acc = nullptr;
- Value *Vec = nullptr;
- auto ID = II->getIntrinsicID();
- auto MRK = RecurrenceDescriptor::MRK_Invalid;
+ IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
+ Builder.setFastMathFlags(FMF);
switch (ID) {
- case Intrinsic::experimental_vector_reduce_fadd:
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul: {
// FMFs must be attached to the call, otherwise it's an ordered reduction
// and it can't be handled by generating a shuffle sequence.
- if (!II->getFastMathFlags().isFast())
- IsOrdered = true;
- Acc = II->getArgOperand(0);
- Vec = II->getArgOperand(1);
- break;
+ Value *Acc = II->getArgOperand(0);
+ Value *Vec = II->getArgOperand(1);
+ if (!FMF.allowReassoc())
+ Rdx = getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK);
+ else {
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ Rdx = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(ID),
+ Acc, Rdx, "bin.rdx");
+ }
+ } break;
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -110,18 +120,13 @@ bool expandReductions(Function &F, const TargetTransformInfo *TTI) {
case Intrinsic::experimental_vector_reduce_umax:
case Intrinsic::experimental_vector_reduce_umin:
case Intrinsic::experimental_vector_reduce_fmax:
- case Intrinsic::experimental_vector_reduce_fmin:
- Vec = II->getArgOperand(0);
- MRK = getMRK(ID);
- break;
+ case Intrinsic::experimental_vector_reduce_fmin: {
+ Value *Vec = II->getArgOperand(0);
+ Rdx = getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
+ } break;
default:
continue;
}
- if (!TTI->shouldExpandReduction(II))
- continue;
- Value *Rdx =
- IsOrdered ? getOrderedReduction(Builder, Acc, Vec, getOpcode(ID), MRK)
- : getShuffleReduction(Builder, Vec, getOpcode(ID), MRK);
II->replaceAllUsesWith(Rdx);
II->eraseFromParent();
Changed = true;
diff --git a/lib/CodeGen/FEntryInserter.cpp b/lib/CodeGen/FEntryInserter.cpp
index 4ddf9f92836c..a122f490884e 100644
--- a/lib/CodeGen/FEntryInserter.cpp
+++ b/lib/CodeGen/FEntryInserter.cpp
@@ -1,9 +1,8 @@
//===-- FEntryInsertion.cpp - Patchable prologues for LLVM -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/FaultMaps.cpp b/lib/CodeGen/FaultMaps.cpp
index 361558a0e562..600f72d320eb 100644
--- a/lib/CodeGen/FaultMaps.cpp
+++ b/lib/CodeGen/FaultMaps.cpp
@@ -1,9 +1,8 @@
//===- FaultMaps.cpp ------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/FinalizeISel.cpp
index ec586a2caea3..772d7f71bb37 100644
--- a/lib/CodeGen/ExpandISelPseudos.cpp
+++ b/lib/CodeGen/FinalizeISel.cpp
@@ -1,16 +1,16 @@
-//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//===-- llvm/CodeGen/FinalizeISel.cpp ---------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
-// Expand Pseudo-instructions produced by ISel. These are usually to allow
-// the expansion to contain control flow, such as a conditional move
-// implemented with a conditional branch and a phi, or an atomic operation
-// implemented with a loop.
+/// This pass expands Pseudo-instructions produced by ISel, fixes register
+/// reservations and may do machine frame information adjustments.
+/// The pseudo instructions are used to allow the expansion to contain control
+/// flow, such as a conditional move implemented with a conditional branch and a
+/// phi, or an atomic operation implemented with a loop.
//
//===----------------------------------------------------------------------===//
@@ -22,13 +22,13 @@
#include "llvm/Support/Debug.h"
using namespace llvm;
-#define DEBUG_TYPE "expand-isel-pseudos"
+#define DEBUG_TYPE "finalize-isel"
namespace {
- class ExpandISelPseudos : public MachineFunctionPass {
+ class FinalizeISel : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- ExpandISelPseudos() : MachineFunctionPass(ID) {}
+ FinalizeISel() : MachineFunctionPass(ID) {}
private:
bool runOnMachineFunction(MachineFunction &MF) override;
@@ -39,12 +39,12 @@ namespace {
};
} // end anonymous namespace
-char ExpandISelPseudos::ID = 0;
-char &llvm::ExpandISelPseudosID = ExpandISelPseudos::ID;
-INITIALIZE_PASS(ExpandISelPseudos, DEBUG_TYPE,
- "Expand ISel Pseudo-instructions", false, false)
+char FinalizeISel::ID = 0;
+char &llvm::FinalizeISelID = FinalizeISel::ID;
+INITIALIZE_PASS(FinalizeISel, DEBUG_TYPE,
+ "Finalize ISel and expand pseudo-instructions", false, false)
-bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+bool FinalizeISel::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
@@ -70,5 +70,7 @@ bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
}
}
+ TLI->finalizeLowering(MF);
+
return Changed;
}
diff --git a/lib/CodeGen/FuncletLayout.cpp b/lib/CodeGen/FuncletLayout.cpp
index 581cd423f2d4..75f6d0b8f0bf 100644
--- a/lib/CodeGen/FuncletLayout.cpp
+++ b/lib/CodeGen/FuncletLayout.cpp
@@ -1,9 +1,8 @@
//===-- FuncletLayout.cpp - Contiguously lay out funclets -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 1c80556dfef5..9c53550eaa9d 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -1,9 +1,8 @@
//===-- GCMetadata.cpp - Garbage collector metadata -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/GCMetadataPrinter.cpp b/lib/CodeGen/GCMetadataPrinter.cpp
index bc7beb6f6c2d..500dba9aea37 100644
--- a/lib/CodeGen/GCMetadataPrinter.cpp
+++ b/lib/CodeGen/GCMetadataPrinter.cpp
@@ -1,9 +1,8 @@
//===- GCMetadataPrinter.cpp - Garbage collection infrastructure ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index e8ccd84b0b93..90571d090bfb 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -1,9 +1,8 @@
//===-- GCRootLowering.cpp - Garbage collection infrastructure ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -214,7 +213,7 @@ bool LowerIntrinsics::DoLowering(Function &F, GCStrategy &S) {
}
case Intrinsic::gcread: {
// Replace a read barrier with a simple load.
- Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI);
+ Value *Ld = new LoadInst(CI->getType(), CI->getArgOperand(1), "", CI);
Ld->takeName(CI);
CI->replaceAllUsesWith(Ld);
CI->eraseFromParent();
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 6be4c16c6301..43d06b0f82e9 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -1,9 +1,8 @@
//===- GCStrategy.cpp - Garbage Collector Description ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 89c525c5ba15..4518dbee1a9f 100644
--- a/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -1,9 +1,8 @@
//===- CSEInfo.cpp ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,8 +27,8 @@ void UniqueMachineInstr::Profile(FoldingSetNodeID &ID) {
}
/// -----------------------------------------
-/// --------- CSEConfig ---------- ///
-bool CSEConfig::shouldCSEOpc(unsigned Opc) {
+/// --------- CSEConfigFull ---------- ///
+bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
switch (Opc) {
default:
break;
@@ -61,6 +60,17 @@ bool CSEConfig::shouldCSEOpc(unsigned Opc) {
bool CSEConfigConstantOnly::shouldCSEOpc(unsigned Opc) {
return Opc == TargetOpcode::G_CONSTANT;
}
+
+std::unique_ptr<CSEConfigBase>
+llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) {
+ std::unique_ptr<CSEConfigBase> Config;
+ if (Level == CodeGenOpt::None)
+ Config = make_unique<CSEConfigConstantOnly>();
+ else
+ Config = make_unique<CSEConfigFull>();
+ return Config;
+}
+
/// -----------------------------------------
/// -------- GISelCSEInfo -------------//
@@ -139,7 +149,7 @@ MachineInstr *GISelCSEInfo::getMachineInstrIfExists(FoldingSetNodeID &ID,
void *&InsertPos) {
handleRecordedInsts();
if (auto *Inst = getNodeIfExists(ID, MBB, InsertPos)) {
- LLVM_DEBUG(dbgs() << "CSEInfo: Found Instr " << *Inst->MI << "\n";);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Found Instr " << *Inst->MI;);
return const_cast<MachineInstr *>(Inst->MI);
}
return nullptr;
@@ -158,14 +168,14 @@ void GISelCSEInfo::countOpcodeHit(unsigned Opc) {
void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) {
if (shouldCSE(MI->getOpcode())) {
TemporaryInsts.insert(MI);
- LLVM_DEBUG(dbgs() << "CSEInfo: Recording new MI" << *MI << "\n";);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Recording new MI " << *MI);
}
}
void GISelCSEInfo::handleRecordedInst(MachineInstr *MI) {
assert(shouldCSE(MI->getOpcode()) && "Invalid instruction for CSE");
auto *UMI = InstrMapping.lookup(MI);
- LLVM_DEBUG(dbgs() << "CSEInfo: Handling recorded MI" << *MI << "\n";);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Handling recorded MI " << *MI);
if (UMI) {
// Invalidate this MI.
invalidateUniqueMachineInstr(UMI);
@@ -224,14 +234,14 @@ void GISelCSEInfo::analyze(MachineFunction &MF) {
for (MachineInstr &MI : MBB) {
if (!shouldCSE(MI.getOpcode()))
continue;
- LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI << "\n";);
+ LLVM_DEBUG(dbgs() << "CSEInfo::Add MI: " << MI);
insertInstr(&MI);
}
}
}
void GISelCSEInfo::releaseMemory() {
- // print();
+ print();
CSEMap.clear();
InstrMapping.clear();
UniqueInstrAllocator.Reset();
@@ -245,11 +255,11 @@ void GISelCSEInfo::releaseMemory() {
}
void GISelCSEInfo::print() {
-#ifndef NDEBUG
- for (auto &It : OpcodeHitTable) {
- dbgs() << "CSE Count for Opc " << It.first << " : " << It.second << "\n";
- };
-#endif
+ LLVM_DEBUG(for (auto &It
+ : OpcodeHitTable) {
+ dbgs() << "CSEInfo::CSE Hit for Opc " << It.first << " : " << It.second
+ << "\n";
+ };);
}
/// -----------------------------------------
// ---- Profiling methods for FoldingSetNode --- //
@@ -349,8 +359,9 @@ const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
return *this;
}
-GISelCSEInfo &GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfig> CSEOpt,
- bool Recompute) {
+GISelCSEInfo &
+GISelCSEAnalysisWrapper::get(std::unique_ptr<CSEConfigBase> CSEOpt,
+ bool Recompute) {
if (!AlreadyComputed || Recompute) {
Info.setCSEConfig(std::move(CSEOpt));
Info.analyze(*MF);
diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 863efe0c3e34..461bc6038c2c 100644
--- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/CSEMIRBuilder.cpp - MIBuilder--*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -40,6 +39,7 @@ CSEMIRBuilder::getDominatingInstrForID(FoldingSetNodeID &ID,
MachineInstr *MI =
CSEInfo->getMachineInstrIfExists(ID, CurMBB, NodeInsertPos);
if (MI) {
+ CSEInfo->countOpcodeHit(MI->getOpcode());
auto CurrPos = getInsertPt();
if (!dominates(MI, CurrPos))
CurMBB->splice(CurrPos, CurMBB, MI);
@@ -195,6 +195,12 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
constexpr unsigned Opc = TargetOpcode::G_CONSTANT;
if (!canPerformCSEForOpc(Opc))
return MachineIRBuilder::buildConstant(Res, Val);
+
+ // For vectors, CSE the element only for now.
+ LLT Ty = Res.getLLTTy(*getMRI());
+ if (Ty.isVector())
+ return buildSplatVector(Res, buildConstant(Ty.getElementType(), Val));
+
FoldingSetNodeID ID;
GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
void *InsertPos = nullptr;
@@ -206,6 +212,7 @@ MachineInstrBuilder CSEMIRBuilder::buildConstant(const DstOp &Res,
// Handle generating copies here.
return generateCopiesIfRequired({Res}, MIB);
}
+
MachineInstrBuilder NewMIB = MachineIRBuilder::buildConstant(Res, Val);
return memoizeMI(NewMIB, InsertPos);
}
@@ -215,6 +222,12 @@ MachineInstrBuilder CSEMIRBuilder::buildFConstant(const DstOp &Res,
constexpr unsigned Opc = TargetOpcode::G_FCONSTANT;
if (!canPerformCSEForOpc(Opc))
return MachineIRBuilder::buildFConstant(Res, Val);
+
+ // For vectors, CSE the element only for now.
+ LLT Ty = Res.getLLTTy(*getMRI());
+ if (Ty.isVector())
+ return buildSplatVector(Res, buildFConstant(Ty.getElementType(), Val));
+
FoldingSetNodeID ID;
GISelInstProfileBuilder ProfBuilder(ID, *getMRI());
void *InsertPos = nullptr;
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index 724ecedf3b3f..a5d8205a34a8 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/GlobalISel/CallLowering.cpp - Call lowering -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -13,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -21,13 +21,17 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Module.h"
+#define DEBUG_TYPE "call-lowering"
+
using namespace llvm;
void CallLowering::anchor() {}
-bool CallLowering::lowerCall(
- MachineIRBuilder &MIRBuilder, ImmutableCallSite CS, unsigned ResReg,
- ArrayRef<unsigned> ArgRegs, std::function<unsigned()> GetCalleeReg) const {
+bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
+ ArrayRef<Register> ResRegs,
+ ArrayRef<ArrayRef<Register>> ArgRegs,
+ Register SwiftErrorVReg,
+ std::function<unsigned()> GetCalleeReg) const {
auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
// First step is to marshall all the function's parameters into the correct
@@ -40,8 +44,8 @@ bool CallLowering::lowerCall(
ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
- // We don't currently support swifterror or swiftself args.
- if (OrigArg.Flags.isSwiftError() || OrigArg.Flags.isSwiftSelf())
+ // We don't currently support swiftself args.
+ if (OrigArg.Flags.isSwiftSelf())
return false;
OrigArgs.push_back(OrigArg);
++i;
@@ -53,11 +57,12 @@ bool CallLowering::lowerCall(
else
Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
- ArgInfo OrigRet{ResReg, CS.getType(), ISD::ArgFlagsTy{}};
+ ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}};
if (!OrigRet.Ty->isVoidTy())
setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS);
- return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs);
+ return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs,
+ SwiftErrorVReg);
}
template <typename FuncInfoTy>
@@ -84,7 +89,10 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
- Arg.Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
+
+ auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
+ Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
unsigned FrameAlign;
@@ -109,21 +117,78 @@ CallLowering::setArgFlags<CallInst>(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
const CallInst &FuncInfo) const;
+Register CallLowering::packRegs(ArrayRef<Register> SrcRegs, Type *PackedTy,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(SrcRegs.size() > 1 && "Nothing to pack");
+
+ const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
+ MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+ LLT PackedLLT = getLLTForType(*PackedTy, DL);
+
+ SmallVector<LLT, 8> LLTs;
+ SmallVector<uint64_t, 8> Offsets;
+ computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
+ assert(LLTs.size() == SrcRegs.size() && "Regs / types mismatch");
+
+ Register Dst = MRI->createGenericVirtualRegister(PackedLLT);
+ MIRBuilder.buildUndef(Dst);
+ for (unsigned i = 0; i < SrcRegs.size(); ++i) {
+ Register NewDst = MRI->createGenericVirtualRegister(PackedLLT);
+ MIRBuilder.buildInsert(NewDst, Dst, SrcRegs[i], Offsets[i]);
+ Dst = NewDst;
+ }
+
+ return Dst;
+}
+
+void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg,
+ Type *PackedTy,
+ MachineIRBuilder &MIRBuilder) const {
+ assert(DstRegs.size() > 1 && "Nothing to unpack");
+
+ const DataLayout &DL = MIRBuilder.getMF().getDataLayout();
+
+ SmallVector<LLT, 8> LLTs;
+ SmallVector<uint64_t, 8> Offsets;
+ computeValueLLTs(DL, *PackedTy, LLTs, &Offsets);
+ assert(LLTs.size() == DstRegs.size() && "Regs / types mismatch");
+
+ for (unsigned i = 0; i < DstRegs.size(); ++i)
+ MIRBuilder.buildExtract(DstRegs[i], SrcReg, Offsets[i]);
+}
+
bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
ArrayRef<ArgInfo> Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
- const DataLayout &DL = F.getParent()->getDataLayout();
-
SmallVector<CCValAssign, 16> ArgLocs;
CCState CCInfo(F.getCallingConv(), F.isVarArg(), MF, ArgLocs, F.getContext());
+ return handleAssignments(CCInfo, ArgLocs, MIRBuilder, Args, Handler);
+}
+
+bool CallLowering::handleAssignments(CCState &CCInfo,
+ SmallVectorImpl<CCValAssign> &ArgLocs,
+ MachineIRBuilder &MIRBuilder,
+ ArrayRef<ArgInfo> Args,
+ ValueHandler &Handler) const {
+ MachineFunction &MF = MIRBuilder.getMF();
+ const Function &F = MF.getFunction();
+ const DataLayout &DL = F.getParent()->getDataLayout();
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT CurVT = MVT::getVT(Args[i].Ty);
- if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
- return false;
+ if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) {
+ // Try to use the register type if we couldn't assign the VT.
+ if (!Handler.isArgumentHandler() || !CurVT.isValid())
+ return false;
+ CurVT = TLI->getRegisterTypeForCallingConv(
+ F.getContext(), F.getCallingConv(), EVT(CurVT));
+ if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
+ return false;
+ }
}
for (unsigned i = 0, e = Args.size(), j = 0; i != e; ++i, ++j) {
@@ -137,16 +202,49 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
continue;
}
- if (VA.isRegLoc())
- Handler.assignValueToReg(Args[i].Reg, VA.getLocReg(), VA);
- else if (VA.isMemLoc()) {
- unsigned Size = VA.getValVT() == MVT::iPTR
- ? DL.getPointerSize()
- : alignTo(VA.getValVT().getSizeInBits(), 8) / 8;
+ assert(Args[i].Regs.size() == 1 &&
+ "Can't handle multiple virtual regs yet");
+
+ // FIXME: Pack registers if we have more than one.
+ Register ArgReg = Args[i].Regs[0];
+
+ if (VA.isRegLoc()) {
+ MVT OrigVT = MVT::getVT(Args[i].Ty);
+ MVT VAVT = VA.getValVT();
+ if (Handler.isArgumentHandler() && VAVT != OrigVT) {
+ if (VAVT.getSizeInBits() < OrigVT.getSizeInBits())
+ return false; // Can't handle this type of arg yet.
+ const LLT VATy(VAVT);
+ Register NewReg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
+ Handler.assignValueToReg(NewReg, VA.getLocReg(), VA);
+ // If it's a vector type, we either need to truncate the elements
+ // or do an unmerge to get the lower block of elements.
+ if (VATy.isVector() &&
+ VATy.getNumElements() > OrigVT.getVectorNumElements()) {
+ const LLT OrigTy(OrigVT);
+ // Just handle the case where the VA type is 2 * original type.
+ if (VATy.getNumElements() != OrigVT.getVectorNumElements() * 2) {
+ LLVM_DEBUG(dbgs()
+ << "Incoming promoted vector arg has too many elts");
+ return false;
+ }
+ auto Unmerge = MIRBuilder.buildUnmerge({OrigTy, OrigTy}, {NewReg});
+ MIRBuilder.buildCopy(ArgReg, Unmerge.getReg(0));
+ } else {
+ MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
+ }
+ } else {
+ Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
+ }
+ } else if (VA.isMemLoc()) {
+ MVT VT = MVT::getVT(Args[i].Ty);
+ unsigned Size = VT == MVT::iPTR ? DL.getPointerSize()
+ : alignTo(VT.getSizeInBits(), 8) / 8;
unsigned Offset = VA.getLocMemOffset();
MachinePointerInfo MPO;
- unsigned StackAddr = Handler.getStackAddress(Size, Offset, MPO);
- Handler.assignValueToAddress(Args[i].Reg, StackAddr, Size, MPO, VA);
+ Register StackAddr = Handler.getStackAddress(Size, Offset, MPO);
+ Handler.assignValueToAddress(ArgReg, StackAddr, Size, MPO, VA);
} else {
// FIXME: Support byvals and other weirdness
return false;
@@ -155,9 +253,11 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
return true;
}
-unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
+Register CallLowering::ValueHandler::extendRegister(Register ValReg,
CCValAssign &VA) {
LLT LocTy{VA.getLocVT()};
+ if (LocTy.getSizeInBits() == MRI.getType(ValReg).getSizeInBits())
+ return ValReg;
switch (VA.getLocInfo()) {
default: break;
case CCValAssign::Full:
@@ -170,12 +270,12 @@ unsigned CallLowering::ValueHandler::extendRegister(unsigned ValReg,
return MIB->getOperand(0).getReg();
}
case CCValAssign::SExt: {
- unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+ Register NewReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildSExt(NewReg, ValReg);
return NewReg;
}
case CCValAssign::ZExt: {
- unsigned NewReg = MRI.createGenericVirtualRegister(LocTy);
+ Register NewReg = MRI.createGenericVirtualRegister(LocTy);
MIRBuilder.buildZExt(NewReg, ValReg);
return NewReg;
}
diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp
index 45b0e36fd7d9..31cb1dbbc9b5 100644
--- a/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/GlobalISel/Combiner.cpp -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,7 +50,7 @@ public:
}
void erasingInstr(MachineInstr &MI) override {
- LLVM_DEBUG(dbgs() << "Erased: " << MI << "\n");
+ LLVM_DEBUG(dbgs() << "Erasing: " << MI << "\n");
WorkList.remove(&MI);
}
void createdInstr(MachineInstr &MI) override {
@@ -130,9 +129,10 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
CurMI->eraseFromParentAndMarkDBGValuesForRemoval();
continue;
}
- WorkList.insert(CurMI);
+ WorkList.deferred_insert(CurMI);
}
}
+ WorkList.finalize();
// Main Loop. Process the instructions here.
while (!WorkList.empty()) {
MachineInstr *CurrInst = WorkList.pop_back_val();
diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index b1c5670a6dec..9cbf3dd83ff1 100644
--- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
@@ -23,8 +22,8 @@ CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
MachineIRBuilder &B)
: Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
-void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg,
- unsigned ToReg) const {
+void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
+ Register ToReg) const {
Observer.changingAllUsesOfReg(MRI, FromReg);
if (MRI.constrainRegAttrs(ToReg, FromReg))
@@ -37,7 +36,7 @@ void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, unsigned FromReg,
void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
MachineOperand &FromRegOp,
- unsigned ToReg) const {
+ Register ToReg) const {
assert(FromRegOp.getParent() && "Expected an operand in an MI");
Observer.changingInstr(*FromRegOp.getParent());
@@ -47,6 +46,13 @@ void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
}
bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
+ if (matchCombineCopy(MI)) {
+ applyCombineCopy(MI);
+ return true;
+ }
+ return false;
+}
+bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::COPY)
return false;
unsigned DstReg = MI.getOperand(0).getReg();
@@ -55,20 +61,18 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
LLT SrcTy = MRI.getType(SrcReg);
// Simple Copy Propagation.
// a(sx) = COPY b(sx) -> Replace all uses of a with b.
- if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy) {
- MI.eraseFromParent();
- replaceRegWith(MRI, DstReg, SrcReg);
+ if (DstTy.isValid() && SrcTy.isValid() && DstTy == SrcTy)
return true;
- }
return false;
}
+void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned SrcReg = MI.getOperand(1).getReg();
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, SrcReg);
+}
namespace {
-struct PreferredTuple {
- LLT Ty; // The result type of the extend.
- unsigned ExtendOpcode; // G_ANYEXT/G_SEXT/G_ZEXT
- MachineInstr *MI;
-};
/// Select a preference between two uses. CurrentUse is the current preference
/// while *ForCandidate is attributes of the candidate under consideration.
@@ -127,7 +131,8 @@ PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
/// want to try harder to find a dominating block.
static void InsertInsnsWithoutSideEffectsBeforeUse(
MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
- std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator)>
+ std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
+ MachineOperand &UseMO)>
Inserter) {
MachineInstr &UseMI = *UseMO.getParent();
@@ -143,26 +148,26 @@ static void InsertInsnsWithoutSideEffectsBeforeUse(
// the def instead of at the start of the block.
if (InsertBB == DefMI.getParent()) {
MachineBasicBlock::iterator InsertPt = &DefMI;
- Inserter(InsertBB, std::next(InsertPt));
+ Inserter(InsertBB, std::next(InsertPt), UseMO);
return;
}
// Otherwise we want the start of the BB
- Inserter(InsertBB, InsertBB->getFirstNonPHI());
+ Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
}
} // end anonymous namespace
bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
- struct InsertionPoint {
- MachineOperand *UseMO;
- MachineBasicBlock *InsertIntoBB;
- MachineBasicBlock::iterator InsertBefore;
- InsertionPoint(MachineOperand *UseMO, MachineBasicBlock *InsertIntoBB,
- MachineBasicBlock::iterator InsertBefore)
- : UseMO(UseMO), InsertIntoBB(InsertIntoBB), InsertBefore(InsertBefore) {
- }
- };
+ PreferredTuple Preferred;
+ if (matchCombineExtendingLoads(MI, Preferred)) {
+ applyCombineExtendingLoads(MI, Preferred);
+ return true;
+ }
+ return false;
+}
+bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
+ PreferredTuple &Preferred) {
// We match the loads and follow the uses to the extend instead of matching
// the extends and following the def to the load. This is because the load
// must remain in the same position for correctness (unless we also add code
@@ -182,6 +187,19 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
if (!LoadValueTy.isScalar())
return false;
+ // Most architectures are going to legalize <s8 loads into at least a 1 byte
+ // load, and the MMOs can only describe memory accesses in multiples of bytes.
+ // If we try to perform extload combining on those, we can end up with
+ // %a(s8) = extload %ptr (load 1 byte from %ptr)
+ // ... which is an illegal extload instruction.
+ if (LoadValueTy.getSizeInBits() < 8)
+ return false;
+
+ // For non power-of-2 types, they will very likely be legalized into multiple
+ // loads. Don't bother trying to match them into extending loads.
+ if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
+ return false;
+
// Find the preferred type aside from the any-extends (unless it's the only
// one) and non-extending ops. We'll emit an extending load to that type and
// and emit a variant of (extend (trunc X)) for the others according to the
@@ -192,7 +210,7 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
: MI.getOpcode() == TargetOpcode::G_SEXTLOAD
? TargetOpcode::G_SEXT
: TargetOpcode::G_ZEXT;
- PreferredTuple Preferred = {LLT(), PreferredOpcode, nullptr};
+ Preferred = {LLT(), PreferredOpcode, nullptr};
for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
@@ -211,9 +229,35 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
+ return true;
+}
+void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
+ PreferredTuple &Preferred) {
// Rewrite the load to the chosen extending load.
- unsigned ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+ Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
+
+ // Inserter to insert a truncate back to the original type at a given point
+ // with some basic CSE to limit truncate duplication to one per BB.
+ DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
+ auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
+ MachineBasicBlock::iterator InsertBefore,
+ MachineOperand &UseMO) {
+ MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
+ if (PreviouslyEmitted) {
+ Observer.changingInstr(*UseMO.getParent());
+ UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
+ Observer.changedInstr(*UseMO.getParent());
+ return;
+ }
+
+ Builder.setInsertPt(*InsertIntoBB, InsertBefore);
+ Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
+ MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
+ EmittedInsns[InsertIntoBB] = NewMI;
+ replaceRegOpWith(MRI, UseMO, NewDstReg);
+ };
+
Observer.changingInstr(MI);
MI.setDesc(
Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
@@ -223,10 +267,13 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
: TargetOpcode::G_LOAD));
// Rewrite all the uses to fix up the types.
- SmallVector<MachineInstr *, 1> ScheduleForErase;
- SmallVector<InsertionPoint, 4> ScheduleForInsert;
- for (auto &UseMO : MRI.use_operands(LoadValue.getReg())) {
- MachineInstr *UseMI = UseMO.getParent();
+ auto &LoadValue = MI.getOperand(0);
+ SmallVector<MachineOperand *, 4> Uses;
+ for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
+ Uses.push_back(&UseMO);
+
+ for (auto *UseMO : Uses) {
+ MachineInstr *UseMI = UseMO->getParent();
// If the extend is compatible with the preferred extend then we should fix
// up the type and extend so that it uses the preferred use.
@@ -247,7 +294,8 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// %2:_(s32) = G_SEXTLOAD ...
// ... = ... %2(s32)
replaceRegWith(MRI, UseDstReg, ChosenDstReg);
- ScheduleForErase.push_back(UseMO.getParent());
+ Observer.erasingInstr(*UseMO->getParent());
+ UseMO->getParent()->eraseFromParent();
} else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
// If the preferred size is smaller, then keep the extend but extend
// from the result of the extending load. For example:
@@ -272,59 +320,87 @@ bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
// %4:_(s8) = G_TRUNC %2:_(s32)
// %3:_(s64) = G_ZEXT %2:_(s8)
// ... = ... %3(s64)
- InsertInsnsWithoutSideEffectsBeforeUse(
- Builder, MI, UseMO,
- [&](MachineBasicBlock *InsertIntoBB,
- MachineBasicBlock::iterator InsertBefore) {
- ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
- });
+ InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
+ InsertTruncAt);
}
continue;
}
// The use is (one of) the uses of the preferred use we chose earlier.
// We're going to update the load to def this value later so just erase
// the old extend.
- ScheduleForErase.push_back(UseMO.getParent());
+ Observer.erasingInstr(*UseMO->getParent());
+ UseMO->getParent()->eraseFromParent();
continue;
}
// The use isn't an extend. Truncate back to the type we originally loaded.
// This is free on many targets.
- InsertInsnsWithoutSideEffectsBeforeUse(
- Builder, MI, UseMO,
- [&](MachineBasicBlock *InsertIntoBB,
- MachineBasicBlock::iterator InsertBefore) {
- ScheduleForInsert.emplace_back(&UseMO, InsertIntoBB, InsertBefore);
- });
+ InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
}
- DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
- for (auto &InsertionInfo : ScheduleForInsert) {
- MachineOperand *UseMO = InsertionInfo.UseMO;
- MachineBasicBlock *InsertIntoBB = InsertionInfo.InsertIntoBB;
- MachineBasicBlock::iterator InsertBefore = InsertionInfo.InsertBefore;
-
- MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
- if (PreviouslyEmitted) {
- Observer.changingInstr(*UseMO->getParent());
- UseMO->setReg(PreviouslyEmitted->getOperand(0).getReg());
- Observer.changedInstr(*UseMO->getParent());
- continue;
- }
-
- Builder.setInsertPt(*InsertIntoBB, InsertBefore);
- unsigned NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
- MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
- EmittedInsns[InsertIntoBB] = NewMI;
- replaceRegOpWith(MRI, *UseMO, NewDstReg);
- }
- for (auto &EraseMI : ScheduleForErase) {
- Observer.erasingInstr(*EraseMI);
- EraseMI->eraseFromParent();
- }
MI.getOperand(0).setReg(ChosenDstReg);
Observer.changedInstr(MI);
+}
+
+bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
+ // Try to match the following:
+ // bb1:
+ // %c(s32) = G_ICMP pred, %a, %b
+ // %c1(s1) = G_TRUNC %c(s32)
+ // G_BRCOND %c1, %bb2
+ // G_BR %bb3
+ // bb2:
+ // ...
+ // bb3:
+
+ // The above pattern does not have a fall through to the successor bb2, always
+ // resulting in a branch no matter which path is taken. Here we try to find
+ // and replace that pattern with conditional branch to bb3 and otherwise
+ // fallthrough to bb2.
+
+ MachineBasicBlock *MBB = MI.getParent();
+ MachineBasicBlock::iterator BrIt(MI);
+ if (BrIt == MBB->begin())
+ return false;
+ assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
+
+ MachineInstr *BrCond = &*std::prev(BrIt);
+ if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
+ return false;
+ // Check that the next block is the conditional branch target.
+ if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
+ return false;
+
+ MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
+ if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
+ !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
+ return false;
+ return true;
+}
+
+bool CombinerHelper::tryCombineBr(MachineInstr &MI) {
+ if (!matchCombineBr(MI))
+ return false;
+ MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
+ MachineBasicBlock::iterator BrIt(MI);
+ MachineInstr *BrCond = &*std::prev(BrIt);
+ MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
+
+ CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
+ (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
+
+ // Invert the G_ICMP condition.
+ Observer.changingInstr(*CmpMI);
+ CmpMI->getOperand(1).setPredicate(InversePred);
+ Observer.changedInstr(*CmpMI);
+
+ // Change the conditional branch target.
+ Observer.changingInstr(*BrCond);
+ BrCond->getOperand(1).setMBB(BrTarget);
+ Observer.changedInstr(*BrCond);
+ MI.eraseFromParent();
return true;
}
diff --git a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
index c693acbbf10b..62b903c30b89 100644
--- a/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
+++ b/lib/CodeGen/GlobalISel/GISelChangeObserver.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/GlobalISel/GISelChangeObserver.cpp --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,6 +26,7 @@ void GISelChangeObserver::changingAllUsesOfReg(
void GISelChangeObserver::finishedChangingAllUsesOfReg() {
for (auto *ChangedMI : ChangingAllUsesOfReg)
changedInstr(*ChangedMI);
+ ChangingAllUsesOfReg.clear();
}
RAIIDelegateInstaller::RAIIDelegateInstaller(MachineFunction &MF,
diff --git a/lib/CodeGen/GlobalISel/GlobalISel.cpp b/lib/CodeGen/GlobalISel/GlobalISel.cpp
index 00c6a9d63158..e0391e6f6467 100644
--- a/lib/CodeGen/GlobalISel/GlobalISel.cpp
+++ b/lib/CodeGen/GlobalISel/GlobalISel.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/GlobalIsel.cpp --- GlobalISel ----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 95f6274aa068..6e99bdbd8264 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/IRTranslator.cpp - IRTranslator ---*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -16,8 +15,11 @@
#include "llvm/ADT/ScopeExit.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/FunctionLoweringInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/LowLevelType.h"
@@ -106,9 +108,7 @@ static void reportTranslationError(MachineFunction &MF,
ORE.emit(R);
}
-IRTranslator::IRTranslator() : MachineFunctionPass(ID) {
- initializeIRTranslatorPass(*PassRegistry::getPassRegistry());
-}
+IRTranslator::IRTranslator() : MachineFunctionPass(ID) { }
#ifndef NDEBUG
namespace {
@@ -136,7 +136,11 @@ public:
LLVM_DEBUG(dbgs() << "Checking DILocation from " << *CurrInst
<< " was copied to " << MI);
#endif
- assert(CurrInst->getDebugLoc() == MI.getDebugLoc() &&
+ // We allow insts in the entry block to have a debug loc line of 0 because
+ // they could have originated from constants, and we don't want a jumpy
+ // debug experience.
+ assert((CurrInst->getDebugLoc() == MI.getDebugLoc() ||
+ MI.getDebugLoc().getLine() == 0) &&
"Line info was not transferred to all instructions");
}
};
@@ -152,36 +156,6 @@ void IRTranslator::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
-static void computeValueLLTs(const DataLayout &DL, Type &Ty,
- SmallVectorImpl<LLT> &ValueTys,
- SmallVectorImpl<uint64_t> *Offsets = nullptr,
- uint64_t StartingOffset = 0) {
- // Given a struct type, recursively traverse the elements.
- if (StructType *STy = dyn_cast<StructType>(&Ty)) {
- const StructLayout *SL = DL.getStructLayout(STy);
- for (unsigned I = 0, E = STy->getNumElements(); I != E; ++I)
- computeValueLLTs(DL, *STy->getElementType(I), ValueTys, Offsets,
- StartingOffset + SL->getElementOffset(I));
- return;
- }
- // Given an array type, recursively traverse the elements.
- if (ArrayType *ATy = dyn_cast<ArrayType>(&Ty)) {
- Type *EltTy = ATy->getElementType();
- uint64_t EltSize = DL.getTypeAllocSize(EltTy);
- for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i)
- computeValueLLTs(DL, *EltTy, ValueTys, Offsets,
- StartingOffset + i * EltSize);
- return;
- }
- // Interpret void as zero return values.
- if (Ty.isVoidTy())
- return;
- // Base case: we can get an LLT for this LLVM IR type.
- ValueTys.push_back(getLLTForType(Ty, DL));
- if (Offsets != nullptr)
- Offsets->push_back(StartingOffset * 8);
-}
-
IRTranslator::ValueToVRegInfo::VRegListT &
IRTranslator::allocateVRegs(const Value &Val) {
assert(!VMap.contains(Val) && "Value already allocated in VMap");
@@ -195,7 +169,7 @@ IRTranslator::allocateVRegs(const Value &Val) {
return *Regs;
}
-ArrayRef<unsigned> IRTranslator::getOrCreateVRegs(const Value &Val) {
+ArrayRef<Register> IRTranslator::getOrCreateVRegs(const Value &Val) {
auto VRegsIt = VMap.findVRegs(Val);
if (VRegsIt != VMap.vregs_end())
return *VRegsIt->second;
@@ -249,7 +223,7 @@ int IRTranslator::getOrCreateFrameIndex(const AllocaInst &AI) {
if (FrameIndices.find(&AI) != FrameIndices.end())
return FrameIndices[&AI];
- unsigned ElementSize = DL->getTypeStoreSize(AI.getAllocatedType());
+ unsigned ElementSize = DL->getTypeAllocSize(AI.getAllocatedType());
unsigned Size =
ElementSize * cast<ConstantInt>(AI.getArraySize())->getZExtValue();
@@ -311,21 +285,20 @@ void IRTranslator::addMachineCFGPred(CFGEdge Edge, MachineBasicBlock *NewPred) {
bool IRTranslator::translateBinaryOp(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
- // FIXME: handle signed/unsigned wrapping flags.
-
// Get or create a virtual register for each value.
// Unless the value is a Constant => loadimm cst?
// or inline constant each time?
// Creation of a virtual register needs to have a size.
- unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
- unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
- unsigned Res = getOrCreateVReg(U);
- auto FBinOp = MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op0).addUse(Op1);
+ Register Op0 = getOrCreateVReg(*U.getOperand(0));
+ Register Op1 = getOrCreateVReg(*U.getOperand(1));
+ Register Res = getOrCreateVReg(U);
+ uint16_t Flags = 0;
if (isa<Instruction>(U)) {
- MachineInstr *FBinOpMI = FBinOp.getInstr();
const Instruction &I = cast<Instruction>(U);
- FBinOpMI->copyIRFlags(I);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
}
+
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op0, Op1}, Flags);
return true;
}
@@ -333,27 +306,38 @@ bool IRTranslator::translateFSub(const User &U, MachineIRBuilder &MIRBuilder) {
// -0.0 - X --> G_FNEG
if (isa<Constant>(U.getOperand(0)) &&
U.getOperand(0) == ConstantFP::getZeroValueForNegation(U.getType())) {
- MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
- .addDef(getOrCreateVReg(U))
- .addUse(getOrCreateVReg(*U.getOperand(1)));
+ Register Op1 = getOrCreateVReg(*U.getOperand(1));
+ Register Res = getOrCreateVReg(U);
+ uint16_t Flags = 0;
+ if (isa<Instruction>(U)) {
+ const Instruction &I = cast<Instruction>(U);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
+ }
+ // Negate the last operand of the FSUB
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op1}, Flags);
return true;
}
return translateBinaryOp(TargetOpcode::G_FSUB, U, MIRBuilder);
}
bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
- MIRBuilder.buildInstr(TargetOpcode::G_FNEG)
- .addDef(getOrCreateVReg(U))
- .addUse(getOrCreateVReg(*U.getOperand(1)));
+ Register Op0 = getOrCreateVReg(*U.getOperand(0));
+ Register Res = getOrCreateVReg(U);
+ uint16_t Flags = 0;
+ if (isa<Instruction>(U)) {
+ const Instruction &I = cast<Instruction>(U);
+ Flags = MachineInstr::copyFlagsFromInstruction(I);
+ }
+ MIRBuilder.buildInstr(TargetOpcode::G_FNEG, {Res}, {Op0}, Flags);
return true;
}
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
const CmpInst *CI = dyn_cast<CmpInst>(&U);
- unsigned Op0 = getOrCreateVReg(*U.getOperand(0));
- unsigned Op1 = getOrCreateVReg(*U.getOperand(1));
- unsigned Res = getOrCreateVReg(U);
+ Register Op0 = getOrCreateVReg(*U.getOperand(0));
+ Register Op1 = getOrCreateVReg(*U.getOperand(1));
+ Register Res = getOrCreateVReg(U);
CmpInst::Predicate Pred =
CI ? CI->getPredicate() : static_cast<CmpInst::Predicate>(
cast<ConstantExpr>(U).getPredicate());
@@ -366,8 +350,8 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildCopy(
Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
else {
- auto FCmp = MIRBuilder.buildFCmp(Pred, Res, Op0, Op1);
- FCmp->copyIRFlags(*CI);
+ MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1},
+ MachineInstr::copyFlagsFromInstruction(*CI));
}
return true;
@@ -379,15 +363,20 @@ bool IRTranslator::translateRet(const User &U, MachineIRBuilder &MIRBuilder) {
if (Ret && DL->getTypeStoreSize(Ret->getType()) == 0)
Ret = nullptr;
- ArrayRef<unsigned> VRegs;
+ ArrayRef<Register> VRegs;
if (Ret)
VRegs = getOrCreateVRegs(*Ret);
+ Register SwiftErrorVReg = 0;
+ if (CLI->supportSwiftError() && SwiftError.getFunctionArg()) {
+ SwiftErrorVReg = SwiftError.getOrCreateVRegUseAt(
+ &RI, &MIRBuilder.getMBB(), SwiftError.getFunctionArg());
+ }
+
// The target may mess up with the insertion point, but
// this is not important as a return is the last instruction
// of the block anyway.
-
- return CLI->lowerReturn(MIRBuilder, Ret, VRegs);
+ return CLI->lowerReturn(MIRBuilder, Ret, VRegs, SwiftErrorVReg);
}
bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
@@ -395,7 +384,7 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
unsigned Succ = 0;
if (!BrInst.isUnconditional()) {
// We want a G_BRCOND to the true BB followed by an unconditional branch.
- unsigned Tst = getOrCreateVReg(*BrInst.getCondition());
+ Register Tst = getOrCreateVReg(*BrInst.getCondition());
const BasicBlock &TrueTgt = *cast<BasicBlock>(BrInst.getSuccessor(Succ++));
MachineBasicBlock &TrueBB = getMBB(TrueTgt);
MIRBuilder.buildBrCond(Tst, TrueBB);
@@ -415,48 +404,429 @@ bool IRTranslator::translateBr(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
-bool IRTranslator::translateSwitch(const User &U,
- MachineIRBuilder &MIRBuilder) {
- // For now, just translate as a chain of conditional branches.
- // FIXME: could we share most of the logic/code in
- // SelectionDAGBuilder::visitSwitch between SelectionDAG and GlobalISel?
- // At first sight, it seems most of the logic in there is independent of
- // SelectionDAG-specifics and a lot of work went in to optimize switch
- // lowering in there.
-
- const SwitchInst &SwInst = cast<SwitchInst>(U);
- const unsigned SwCondValue = getOrCreateVReg(*SwInst.getCondition());
- const BasicBlock *OrigBB = SwInst.getParent();
-
- LLT LLTi1 = getLLTForType(*Type::getInt1Ty(U.getContext()), *DL);
- for (auto &CaseIt : SwInst.cases()) {
- const unsigned CaseValueReg = getOrCreateVReg(*CaseIt.getCaseValue());
- const unsigned Tst = MRI->createGenericVirtualRegister(LLTi1);
- MIRBuilder.buildICmp(CmpInst::ICMP_EQ, Tst, CaseValueReg, SwCondValue);
- MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
- const BasicBlock *TrueBB = CaseIt.getCaseSuccessor();
- MachineBasicBlock &TrueMBB = getMBB(*TrueBB);
-
- MIRBuilder.buildBrCond(Tst, TrueMBB);
- CurMBB.addSuccessor(&TrueMBB);
- addMachineCFGPred({OrigBB, TrueBB}, &CurMBB);
-
- MachineBasicBlock *FalseMBB =
- MF->CreateMachineBasicBlock(SwInst.getParent());
- // Insert the comparison blocks one after the other.
- MF->insert(std::next(CurMBB.getIterator()), FalseMBB);
- MIRBuilder.buildBr(*FalseMBB);
- CurMBB.addSuccessor(FalseMBB);
-
- MIRBuilder.setMBB(*FalseMBB);
- }
- // handle default case
- const BasicBlock *DefaultBB = SwInst.getDefaultDest();
- MachineBasicBlock &DefaultMBB = getMBB(*DefaultBB);
- MIRBuilder.buildBr(DefaultMBB);
- MachineBasicBlock &CurMBB = MIRBuilder.getMBB();
- CurMBB.addSuccessor(&DefaultMBB);
- addMachineCFGPred({OrigBB, DefaultBB}, &CurMBB);
+void IRTranslator::addSuccessorWithProb(MachineBasicBlock *Src,
+ MachineBasicBlock *Dst,
+ BranchProbability Prob) {
+ if (!FuncInfo.BPI) {
+ Src->addSuccessorWithoutProb(Dst);
+ return;
+ }
+ if (Prob.isUnknown())
+ Prob = getEdgeProbability(Src, Dst);
+ Src->addSuccessor(Dst, Prob);
+}
+
+BranchProbability
+IRTranslator::getEdgeProbability(const MachineBasicBlock *Src,
+ const MachineBasicBlock *Dst) const {
+ const BasicBlock *SrcBB = Src->getBasicBlock();
+ const BasicBlock *DstBB = Dst->getBasicBlock();
+ if (!FuncInfo.BPI) {
+ // If BPI is not available, set the default probability as 1 / N, where N is
+ // the number of successors.
+ auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
+ return BranchProbability(1, SuccSize);
+ }
+ return FuncInfo.BPI->getEdgeProbability(SrcBB, DstBB);
+}
+
+bool IRTranslator::translateSwitch(const User &U, MachineIRBuilder &MIB) {
+ using namespace SwitchCG;
+ // Extract cases from the switch.
+ const SwitchInst &SI = cast<SwitchInst>(U);
+ BranchProbabilityInfo *BPI = FuncInfo.BPI;
+ CaseClusterVector Clusters;
+ Clusters.reserve(SI.getNumCases());
+ for (auto &I : SI.cases()) {
+ MachineBasicBlock *Succ = &getMBB(*I.getCaseSuccessor());
+ assert(Succ && "Could not find successor mbb in mapping");
+ const ConstantInt *CaseVal = I.getCaseValue();
+ BranchProbability Prob =
+ BPI ? BPI->getEdgeProbability(SI.getParent(), I.getSuccessorIndex())
+ : BranchProbability(1, SI.getNumCases() + 1);
+ Clusters.push_back(CaseCluster::range(CaseVal, CaseVal, Succ, Prob));
+ }
+
+ MachineBasicBlock *DefaultMBB = &getMBB(*SI.getDefaultDest());
+
+ // Cluster adjacent cases with the same destination. We do this at all
+ // optimization levels because it's cheap to do and will make codegen faster
+ // if there are many clusters.
+ sortAndRangeify(Clusters);
+
+ MachineBasicBlock *SwitchMBB = &getMBB(*SI.getParent());
+
+ // If there is only the default destination, jump there directly.
+ if (Clusters.empty()) {
+ SwitchMBB->addSuccessor(DefaultMBB);
+ if (DefaultMBB != SwitchMBB->getNextNode())
+ MIB.buildBr(*DefaultMBB);
+ return true;
+ }
+
+ SL->findJumpTables(Clusters, &SI, DefaultMBB);
+
+ LLVM_DEBUG({
+ dbgs() << "Case clusters: ";
+ for (const CaseCluster &C : Clusters) {
+ if (C.Kind == CC_JumpTable)
+ dbgs() << "JT:";
+ if (C.Kind == CC_BitTests)
+ dbgs() << "BT:";
+
+ C.Low->getValue().print(dbgs(), true);
+ if (C.Low != C.High) {
+ dbgs() << '-';
+ C.High->getValue().print(dbgs(), true);
+ }
+ dbgs() << ' ';
+ }
+ dbgs() << '\n';
+ });
+
+ assert(!Clusters.empty());
+ SwitchWorkList WorkList;
+ CaseClusterIt First = Clusters.begin();
+ CaseClusterIt Last = Clusters.end() - 1;
+ auto DefaultProb = getEdgeProbability(SwitchMBB, DefaultMBB);
+ WorkList.push_back({SwitchMBB, First, Last, nullptr, nullptr, DefaultProb});
+
+ // FIXME: At the moment we don't do any splitting optimizations here like
+ // SelectionDAG does, so this worklist only has one entry.
+ while (!WorkList.empty()) {
+ SwitchWorkListItem W = WorkList.back();
+ WorkList.pop_back();
+ if (!lowerSwitchWorkItem(W, SI.getCondition(), SwitchMBB, DefaultMBB, MIB))
+ return false;
+ }
+ return true;
+}
+
+void IRTranslator::emitJumpTable(SwitchCG::JumpTable &JT,
+ MachineBasicBlock *MBB) {
+ // Emit the code for the jump table
+ assert(JT.Reg != -1U && "Should lower JT Header first!");
+ MachineIRBuilder MIB(*MBB->getParent());
+ MIB.setMBB(*MBB);
+ MIB.setDebugLoc(CurBuilder->getDebugLoc());
+
+ Type *PtrIRTy = Type::getInt8PtrTy(MF->getFunction().getContext());
+ const LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
+
+ auto Table = MIB.buildJumpTable(PtrTy, JT.JTI);
+ MIB.buildBrJT(Table.getReg(0), JT.JTI, JT.Reg);
+}
+
+bool IRTranslator::emitJumpTableHeader(SwitchCG::JumpTable &JT,
+ SwitchCG::JumpTableHeader &JTH,
+ MachineBasicBlock *HeaderBB) {
+ MachineIRBuilder MIB(*HeaderBB->getParent());
+ MIB.setMBB(*HeaderBB);
+ MIB.setDebugLoc(CurBuilder->getDebugLoc());
+
+ const Value &SValue = *JTH.SValue;
+ // Subtract the lowest switch case value from the value being switched on.
+ const LLT SwitchTy = getLLTForType(*SValue.getType(), *DL);
+ Register SwitchOpReg = getOrCreateVReg(SValue);
+ auto FirstCst = MIB.buildConstant(SwitchTy, JTH.First);
+ auto Sub = MIB.buildSub({SwitchTy}, SwitchOpReg, FirstCst);
+
+ // This value may be smaller or larger than the target's pointer type, and
+ // therefore require extension or truncating.
+ Type *PtrIRTy = SValue.getType()->getPointerTo();
+ const LLT PtrScalarTy = LLT::scalar(DL->getTypeSizeInBits(PtrIRTy));
+ Sub = MIB.buildZExtOrTrunc(PtrScalarTy, Sub);
+
+ JT.Reg = Sub.getReg(0);
+
+ if (JTH.OmitRangeCheck) {
+ if (JT.MBB != HeaderBB->getNextNode())
+ MIB.buildBr(*JT.MBB);
+ return true;
+ }
+
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the
+ // largest case in the switch.
+ auto Cst = getOrCreateVReg(
+ *ConstantInt::get(SValue.getType(), JTH.Last - JTH.First));
+ Cst = MIB.buildZExtOrTrunc(PtrScalarTy, Cst).getReg(0);
+ auto Cmp = MIB.buildICmp(CmpInst::ICMP_UGT, LLT::scalar(1), Sub, Cst);
+
+ auto BrCond = MIB.buildBrCond(Cmp.getReg(0), *JT.Default);
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != HeaderBB->getNextNode())
+ BrCond = MIB.buildBr(*JT.MBB);
+ return true;
+}
+
+void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
+ MachineBasicBlock *SwitchBB,
+ MachineIRBuilder &MIB) {
+ Register CondLHS = getOrCreateVReg(*CB.CmpLHS);
+ Register Cond;
+ DebugLoc OldDbgLoc = MIB.getDebugLoc();
+ MIB.setDebugLoc(CB.DbgLoc);
+ MIB.setMBB(*CB.ThisBB);
+
+ if (CB.PredInfo.NoCmp) {
+ // Branch or fall through to TrueBB.
+ addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
+ CB.ThisBB);
+ CB.ThisBB->normalizeSuccProbs();
+ if (CB.TrueBB != CB.ThisBB->getNextNode())
+ MIB.buildBr(*CB.TrueBB);
+ MIB.setDebugLoc(OldDbgLoc);
+ return;
+ }
+
+ const LLT i1Ty = LLT::scalar(1);
+ // Build the compare.
+ if (!CB.CmpMHS) {
+ Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+ Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
+ } else {
+ assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE &&
+ "Can only handle ULE ranges");
+
+ const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
+ const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
+
+ Register CmpOpReg = getOrCreateVReg(*CB.CmpMHS);
+ if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
+ Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
+ Cond =
+ MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0);
+ } else {
+ const LLT &CmpTy = MRI->getType(CmpOpReg);
+ auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
+ auto Diff = MIB.buildConstant(CmpTy, High - Low);
+ Cond = MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, Sub, Diff).getReg(0);
+ }
+ }
+
+ // Update successor info
+ addSuccessorWithProb(CB.ThisBB, CB.TrueBB, CB.TrueProb);
+
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.TrueBB->getBasicBlock()},
+ CB.ThisBB);
+
+ // TrueBB and FalseBB are always different unless the incoming IR is
+ // degenerate. This only happens when running llc on weird IR.
+ if (CB.TrueBB != CB.FalseBB)
+ addSuccessorWithProb(CB.ThisBB, CB.FalseBB, CB.FalseProb);
+ CB.ThisBB->normalizeSuccProbs();
+
+ // if (SwitchBB->getBasicBlock() != CB.FalseBB->getBasicBlock())
+ addMachineCFGPred({SwitchBB->getBasicBlock(), CB.FalseBB->getBasicBlock()},
+ CB.ThisBB);
+
+ // If the lhs block is the next block, invert the condition so that we can
+ // fall through to the lhs instead of the rhs block.
+ if (CB.TrueBB == CB.ThisBB->getNextNode()) {
+ std::swap(CB.TrueBB, CB.FalseBB);
+ auto True = MIB.buildConstant(i1Ty, 1);
+ Cond = MIB.buildInstr(TargetOpcode::G_XOR, {i1Ty}, {Cond, True}, None)
+ .getReg(0);
+ }
+
+ MIB.buildBrCond(Cond, *CB.TrueBB);
+ MIB.buildBr(*CB.FalseBB);
+ MIB.setDebugLoc(OldDbgLoc);
+}
+
+bool IRTranslator::lowerJumpTableWorkItem(SwitchCG::SwitchWorkListItem W,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *CurMBB,
+ MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB,
+ MachineFunction::iterator BBI,
+ BranchProbability UnhandledProbs,
+ SwitchCG::CaseClusterIt I,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = SwitchMBB->getParent();
+ // FIXME: Optimize away range check based on pivot comparisons.
+ JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
+ SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
+ BranchProbability DefaultProb = W.DefaultProb;
+
+ // The jump block hasn't been inserted yet; insert it here.
+ MachineBasicBlock *JumpMBB = JT->MBB;
+ CurMF->insert(BBI, JumpMBB);
+
+ // Since the jump table block is separate from the switch block, we need
+ // to keep track of it as a machine predecessor to the default block,
+ // otherwise we lose the phi edges.
+ addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
+ CurMBB);
+ addMachineCFGPred({SwitchMBB->getBasicBlock(), DefaultMBB->getBasicBlock()},
+ JumpMBB);
+
+ auto JumpProb = I->Prob;
+ auto FallthroughProb = UnhandledProbs;
+
+ // If the default statement is a target of the jump table, we evenly
+ // distribute the default probability to successors of CurMBB. Also
+ // update the probability on the edge from JumpMBB to Fallthrough.
+ for (MachineBasicBlock::succ_iterator SI = JumpMBB->succ_begin(),
+ SE = JumpMBB->succ_end();
+ SI != SE; ++SI) {
+ if (*SI == DefaultMBB) {
+ JumpProb += DefaultProb / 2;
+ FallthroughProb -= DefaultProb / 2;
+ JumpMBB->setSuccProbability(SI, DefaultProb / 2);
+ JumpMBB->normalizeSuccProbs();
+ } else {
+ // Also record edges from the jump table block to it's successors.
+ addMachineCFGPred({SwitchMBB->getBasicBlock(), (*SI)->getBasicBlock()},
+ JumpMBB);
+ }
+ }
+
+ // Skip the range check if the fallthrough block is unreachable.
+ if (FallthroughUnreachable)
+ JTH->OmitRangeCheck = true;
+
+ if (!JTH->OmitRangeCheck)
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
+ CurMBB->normalizeSuccProbs();
+
+ // The jump table header will be inserted in our current block, do the
+ // range check, and fall through to our fallthrough block.
+ JTH->HeaderBB = CurMBB;
+ JT->Default = Fallthrough; // FIXME: Move Default to JumpTableHeader.
+
+ // If we're in the right place, emit the jump table header right now.
+ if (CurMBB == SwitchMBB) {
+ if (!emitJumpTableHeader(*JT, *JTH, CurMBB))
+ return false;
+ JTH->Emitted = true;
+ }
+ return true;
+}
+bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
+ Value *Cond,
+ MachineBasicBlock *Fallthrough,
+ bool FallthroughUnreachable,
+ BranchProbability UnhandledProbs,
+ MachineBasicBlock *CurMBB,
+ MachineIRBuilder &MIB,
+ MachineBasicBlock *SwitchMBB) {
+ using namespace SwitchCG;
+ const Value *RHS, *LHS, *MHS;
+ CmpInst::Predicate Pred;
+ if (I->Low == I->High) {
+ // Check Cond == I->Low.
+ Pred = CmpInst::ICMP_EQ;
+ LHS = Cond;
+ RHS = I->Low;
+ MHS = nullptr;
+ } else {
+ // Check I->Low <= Cond <= I->High.
+ Pred = CmpInst::ICMP_ULE;
+ LHS = I->Low;
+ MHS = Cond;
+ RHS = I->High;
+ }
+
+ // If Fallthrough is unreachable, fold away the comparison.
+ // The false probability is the sum of all unhandled cases.
+ CaseBlock CB(Pred, FallthroughUnreachable, LHS, RHS, MHS, I->MBB, Fallthrough,
+ CurMBB, MIB.getDebugLoc(), I->Prob, UnhandledProbs);
+
+ emitSwitchCase(CB, SwitchMBB, MIB);
+ return true;
+}
+
+bool IRTranslator::lowerSwitchWorkItem(SwitchCG::SwitchWorkListItem W,
+ Value *Cond,
+ MachineBasicBlock *SwitchMBB,
+ MachineBasicBlock *DefaultMBB,
+ MachineIRBuilder &MIB) {
+ using namespace SwitchCG;
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *NextMBB = nullptr;
+ MachineFunction::iterator BBI(W.MBB);
+ if (++BBI != FuncInfo.MF->end())
+ NextMBB = &*BBI;
+
+ if (EnableOpts) {
+ // Here, we order cases by probability so the most likely case will be
+ // checked first. However, two clusters can have the same probability in
+ // which case their relative ordering is non-deterministic. So we use Low
+ // as a tie-breaker as clusters are guaranteed to never overlap.
+ llvm::sort(W.FirstCluster, W.LastCluster + 1,
+ [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Prob != b.Prob
+ ? a.Prob > b.Prob
+ : a.Low->getValue().slt(b.Low->getValue());
+ });
+
+ // Rearrange the case blocks so that the last one falls through if possible
+ // without changing the order of probabilities.
+ for (CaseClusterIt I = W.LastCluster; I > W.FirstCluster;) {
+ --I;
+ if (I->Prob > W.LastCluster->Prob)
+ break;
+ if (I->Kind == CC_Range && I->MBB == NextMBB) {
+ std::swap(*I, *W.LastCluster);
+ break;
+ }
+ }
+ }
+
+ // Compute total probability.
+ BranchProbability DefaultProb = W.DefaultProb;
+ BranchProbability UnhandledProbs = DefaultProb;
+ for (CaseClusterIt I = W.FirstCluster; I <= W.LastCluster; ++I)
+ UnhandledProbs += I->Prob;
+
+ MachineBasicBlock *CurMBB = W.MBB;
+ for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+ bool FallthroughUnreachable = false;
+ MachineBasicBlock *Fallthrough;
+ if (I == W.LastCluster) {
+ // For the last cluster, fall through to the default destination.
+ Fallthrough = DefaultMBB;
+ FallthroughUnreachable = isa<UnreachableInst>(
+ DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
+ } else {
+ Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
+ CurMF->insert(BBI, Fallthrough);
+ }
+ UnhandledProbs -= I->Prob;
+
+ switch (I->Kind) {
+ case CC_BitTests: {
+ LLVM_DEBUG(dbgs() << "Switch to bit test optimization unimplemented");
+ return false; // Bit tests currently unimplemented.
+ }
+ case CC_JumpTable: {
+ if (!lowerJumpTableWorkItem(W, SwitchMBB, CurMBB, DefaultMBB, MIB, BBI,
+ UnhandledProbs, I, Fallthrough,
+ FallthroughUnreachable)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower jump table");
+ return false;
+ }
+ break;
+ }
+ case CC_Range: {
+ if (!lowerSwitchRangeWorkItem(I, Cond, Fallthrough,
+ FallthroughUnreachable, UnhandledProbs,
+ CurMBB, MIB, SwitchMBB)) {
+ LLVM_DEBUG(dbgs() << "Failed to lower switch range");
+ return false;
+ }
+ break;
+ }
+ }
+ CurMBB = Fallthrough;
+ }
return true;
}
@@ -465,7 +835,7 @@ bool IRTranslator::translateIndirectBr(const User &U,
MachineIRBuilder &MIRBuilder) {
const IndirectBrInst &BrInst = cast<IndirectBrInst>(U);
- const unsigned Tgt = getOrCreateVReg(*BrInst.getAddress());
+ const Register Tgt = getOrCreateVReg(*BrInst.getAddress());
MIRBuilder.buildBrIndirect(Tgt);
// Link successors.
@@ -476,6 +846,14 @@ bool IRTranslator::translateIndirectBr(const User &U,
return true;
}
+static bool isSwiftError(const Value *V) {
+ if (auto Arg = dyn_cast<Argument>(V))
+ return Arg->hasSwiftErrorAttr();
+ if (auto AI = dyn_cast<AllocaInst>(V))
+ return AI->isSwiftError();
+ return false;
+}
+
bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
const LoadInst &LI = cast<LoadInst>(U);
@@ -486,13 +864,25 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
if (DL->getTypeStoreSize(LI.getType()) == 0)
return true;
- ArrayRef<unsigned> Regs = getOrCreateVRegs(LI);
+ ArrayRef<Register> Regs = getOrCreateVRegs(LI);
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(LI);
- unsigned Base = getOrCreateVReg(*LI.getPointerOperand());
+ Register Base = getOrCreateVReg(*LI.getPointerOperand());
+
+ Type *OffsetIRTy = DL->getIntPtrType(LI.getPointerOperandType());
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+
+ if (CLI->supportSwiftError() && isSwiftError(LI.getPointerOperand())) {
+ assert(Regs.size() == 1 && "swifterror should be single pointer");
+ Register VReg = SwiftError.getOrCreateVRegUseAt(&LI, &MIRBuilder.getMBB(),
+ LI.getPointerOperand());
+ MIRBuilder.buildCopy(Regs[0], VReg);
+ return true;
+ }
+
for (unsigned i = 0; i < Regs.size(); ++i) {
- unsigned Addr = 0;
- MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+ Register Addr;
+ MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(LI.getPointerOperand(), Offsets[i] / 8);
unsigned BaseAlign = getMemOpAlignment(LI);
@@ -515,13 +905,25 @@ bool IRTranslator::translateStore(const User &U, MachineIRBuilder &MIRBuilder) {
if (DL->getTypeStoreSize(SI.getValueOperand()->getType()) == 0)
return true;
- ArrayRef<unsigned> Vals = getOrCreateVRegs(*SI.getValueOperand());
+ ArrayRef<Register> Vals = getOrCreateVRegs(*SI.getValueOperand());
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*SI.getValueOperand());
- unsigned Base = getOrCreateVReg(*SI.getPointerOperand());
+ Register Base = getOrCreateVReg(*SI.getPointerOperand());
+
+ Type *OffsetIRTy = DL->getIntPtrType(SI.getPointerOperandType());
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+
+ if (CLI->supportSwiftError() && isSwiftError(SI.getPointerOperand())) {
+ assert(Vals.size() == 1 && "swifterror should be single pointer");
+
+ Register VReg = SwiftError.getOrCreateVRegDefAt(&SI, &MIRBuilder.getMBB(),
+ SI.getPointerOperand());
+ MIRBuilder.buildCopy(VReg, Vals[0]);
+ return true;
+ }
for (unsigned i = 0; i < Vals.size(); ++i) {
- unsigned Addr = 0;
- MIRBuilder.materializeGEP(Addr, Base, LLT::scalar(64), Offsets[i] / 8);
+ Register Addr;
+ MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
MachinePointerInfo Ptr(SI.getPointerOperand(), Offsets[i] / 8);
unsigned BaseAlign = getMemOpAlignment(SI);
@@ -562,10 +964,9 @@ bool IRTranslator::translateExtractValue(const User &U,
MachineIRBuilder &MIRBuilder) {
const Value *Src = U.getOperand(0);
uint64_t Offset = getOffsetFromIndices(U, *DL);
- ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
+ ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
ArrayRef<uint64_t> Offsets = *VMap.getOffsets(*Src);
- unsigned Idx = std::lower_bound(Offsets.begin(), Offsets.end(), Offset) -
- Offsets.begin();
+ unsigned Idx = llvm::lower_bound(Offsets, Offset) - Offsets.begin();
auto &DstRegs = allocateVRegs(U);
for (unsigned i = 0; i < DstRegs.size(); ++i)
@@ -580,8 +981,8 @@ bool IRTranslator::translateInsertValue(const User &U,
uint64_t Offset = getOffsetFromIndices(U, *DL);
auto &DstRegs = allocateVRegs(U);
ArrayRef<uint64_t> DstOffsets = *VMap.getOffsets(U);
- ArrayRef<unsigned> SrcRegs = getOrCreateVRegs(*Src);
- ArrayRef<unsigned> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
+ ArrayRef<Register> SrcRegs = getOrCreateVRegs(*Src);
+ ArrayRef<Register> InsertedRegs = getOrCreateVRegs(*U.getOperand(1));
auto InsertedIt = InsertedRegs.begin();
for (unsigned i = 0; i < DstRegs.size(); ++i) {
@@ -596,19 +997,19 @@ bool IRTranslator::translateInsertValue(const User &U,
bool IRTranslator::translateSelect(const User &U,
MachineIRBuilder &MIRBuilder) {
- unsigned Tst = getOrCreateVReg(*U.getOperand(0));
- ArrayRef<unsigned> ResRegs = getOrCreateVRegs(U);
- ArrayRef<unsigned> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
- ArrayRef<unsigned> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
+ Register Tst = getOrCreateVReg(*U.getOperand(0));
+ ArrayRef<Register> ResRegs = getOrCreateVRegs(U);
+ ArrayRef<Register> Op0Regs = getOrCreateVRegs(*U.getOperand(1));
+ ArrayRef<Register> Op1Regs = getOrCreateVRegs(*U.getOperand(2));
const SelectInst &SI = cast<SelectInst>(U);
- const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition());
+ uint16_t Flags = 0;
+ if (const CmpInst *Cmp = dyn_cast<CmpInst>(SI.getCondition()))
+ Flags = MachineInstr::copyFlagsFromInstruction(*Cmp);
+
for (unsigned i = 0; i < ResRegs.size(); ++i) {
- auto Select =
- MIRBuilder.buildSelect(ResRegs[i], Tst, Op0Regs[i], Op1Regs[i]);
- if (Cmp && isa<FPMathOperator>(Cmp)) {
- Select->copyIRFlags(*Cmp);
- }
+ MIRBuilder.buildInstr(TargetOpcode::G_SELECT, {ResRegs[i]},
+ {Tst, Op0Regs[i], Op1Regs[i]}, Flags);
}
return true;
@@ -619,7 +1020,7 @@ bool IRTranslator::translateBitCast(const User &U,
// If we're bitcasting to the source type, we can reuse the source vreg.
if (getLLTForType(*U.getOperand(0)->getType(), *DL) ==
getLLTForType(*U.getType(), *DL)) {
- unsigned SrcReg = getOrCreateVReg(*U.getOperand(0));
+ Register SrcReg = getOrCreateVReg(*U.getOperand(0));
auto &Regs = *VMap.getVRegs(U);
// If we already assigned a vreg for this bitcast, we can't change that.
// Emit a copy to satisfy the users we already emitted.
@@ -636,9 +1037,9 @@ bool IRTranslator::translateBitCast(const User &U,
bool IRTranslator::translateCast(unsigned Opcode, const User &U,
MachineIRBuilder &MIRBuilder) {
- unsigned Op = getOrCreateVReg(*U.getOperand(0));
- unsigned Res = getOrCreateVReg(U);
- MIRBuilder.buildInstr(Opcode).addDef(Res).addUse(Op);
+ Register Op = getOrCreateVReg(*U.getOperand(0));
+ Register Res = getOrCreateVReg(U);
+ MIRBuilder.buildInstr(Opcode, {Res}, {Op});
return true;
}
@@ -649,7 +1050,7 @@ bool IRTranslator::translateGetElementPtr(const User &U,
return false;
Value &Op0 = *U.getOperand(0);
- unsigned BaseReg = getOrCreateVReg(Op0);
+ Register BaseReg = getOrCreateVReg(Op0);
Type *PtrIRTy = Op0.getType();
LLT PtrTy = getLLTForType(*PtrIRTy, *DL);
Type *OffsetIRTy = DL->getIntPtrType(PtrIRTy);
@@ -674,43 +1075,43 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
- unsigned OffsetReg =
- getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
- MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetReg);
+ Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+ LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
+ auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
+ MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0));
BaseReg = NewBaseReg;
Offset = 0;
}
- unsigned IdxReg = getOrCreateVReg(*Idx);
+ Register IdxReg = getOrCreateVReg(*Idx);
if (MRI->getType(IdxReg) != OffsetTy) {
- unsigned NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
+ Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
IdxReg = NewIdxReg;
}
// N = N + Idx * ElementSize;
// Avoid doing it for ElementSize of 1.
- unsigned GepOffsetReg;
+ Register GepOffsetReg;
if (ElementSize != 1) {
- unsigned ElementSizeReg =
- getOrCreateVReg(*ConstantInt::get(OffsetIRTy, ElementSize));
-
GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildMul(GepOffsetReg, ElementSizeReg, IdxReg);
+ auto ElementSizeMIB = MIRBuilder.buildConstant(
+ getLLTForType(*OffsetIRTy, *DL), ElementSize);
+ MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg);
} else
GepOffsetReg = IdxReg;
- unsigned NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
+ Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);
BaseReg = NewBaseReg;
}
}
if (Offset != 0) {
- unsigned OffsetReg = getOrCreateVReg(*ConstantInt::get(OffsetIRTy, Offset));
- MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetReg);
+ auto OffsetMIB =
+ MIRBuilder.buildConstant(getLLTForType(*OffsetIRTy, *DL), Offset);
+ MIRBuilder.buildGEP(getOrCreateVReg(U), BaseReg, OffsetMIB.getReg(0));
return true;
}
@@ -721,6 +1122,19 @@ bool IRTranslator::translateGetElementPtr(const User &U,
bool IRTranslator::translateMemfunc(const CallInst &CI,
MachineIRBuilder &MIRBuilder,
unsigned ID) {
+
+ // If the source is undef, then just emit a nop.
+ if (isa<UndefValue>(CI.getArgOperand(1))) {
+ switch (ID) {
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::memset:
+ return true;
+ default:
+ break;
+ }
+ }
+
LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
Type *DstTy = CI.getArgOperand(0)->getType();
if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
@@ -752,10 +1166,10 @@ bool IRTranslator::translateMemfunc(const CallInst &CI,
return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
MachineOperand::CreateES(Callee),
- CallLowering::ArgInfo(0, CI.getType()), Args);
+ CallLowering::ArgInfo({0}, CI.getType()), Args);
}
-void IRTranslator::getStackGuard(unsigned DstReg,
+void IRTranslator::getStackGuard(Register DstReg,
MachineIRBuilder &MIRBuilder) {
const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
MRI->setRegClass(DstReg, TRI->getPointerRegClass(*MF));
@@ -778,7 +1192,7 @@ void IRTranslator::getStackGuard(unsigned DstReg,
bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
MachineIRBuilder &MIRBuilder) {
- ArrayRef<unsigned> ResRegs = getOrCreateVRegs(CI);
+ ArrayRef<Register> ResRegs = getOrCreateVRegs(CI);
MIRBuilder.buildInstr(Op)
.addDef(ResRegs[0])
.addDef(ResRegs[1])
@@ -788,19 +1202,123 @@ bool IRTranslator::translateOverflowIntrinsic(const CallInst &CI, unsigned Op,
return true;
}
+unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
+ switch (ID) {
+ default:
+ break;
+ case Intrinsic::bswap:
+ return TargetOpcode::G_BSWAP;
+ case Intrinsic::ceil:
+ return TargetOpcode::G_FCEIL;
+ case Intrinsic::cos:
+ return TargetOpcode::G_FCOS;
+ case Intrinsic::ctpop:
+ return TargetOpcode::G_CTPOP;
+ case Intrinsic::exp:
+ return TargetOpcode::G_FEXP;
+ case Intrinsic::exp2:
+ return TargetOpcode::G_FEXP2;
+ case Intrinsic::fabs:
+ return TargetOpcode::G_FABS;
+ case Intrinsic::copysign:
+ return TargetOpcode::G_FCOPYSIGN;
+ case Intrinsic::minnum:
+ return TargetOpcode::G_FMINNUM;
+ case Intrinsic::maxnum:
+ return TargetOpcode::G_FMAXNUM;
+ case Intrinsic::minimum:
+ return TargetOpcode::G_FMINIMUM;
+ case Intrinsic::maximum:
+ return TargetOpcode::G_FMAXIMUM;
+ case Intrinsic::canonicalize:
+ return TargetOpcode::G_FCANONICALIZE;
+ case Intrinsic::floor:
+ return TargetOpcode::G_FFLOOR;
+ case Intrinsic::fma:
+ return TargetOpcode::G_FMA;
+ case Intrinsic::log:
+ return TargetOpcode::G_FLOG;
+ case Intrinsic::log2:
+ return TargetOpcode::G_FLOG2;
+ case Intrinsic::log10:
+ return TargetOpcode::G_FLOG10;
+ case Intrinsic::nearbyint:
+ return TargetOpcode::G_FNEARBYINT;
+ case Intrinsic::pow:
+ return TargetOpcode::G_FPOW;
+ case Intrinsic::rint:
+ return TargetOpcode::G_FRINT;
+ case Intrinsic::round:
+ return TargetOpcode::G_INTRINSIC_ROUND;
+ case Intrinsic::sin:
+ return TargetOpcode::G_FSIN;
+ case Intrinsic::sqrt:
+ return TargetOpcode::G_FSQRT;
+ case Intrinsic::trunc:
+ return TargetOpcode::G_INTRINSIC_TRUNC;
+ }
+ return Intrinsic::not_intrinsic;
+}
+
+bool IRTranslator::translateSimpleIntrinsic(const CallInst &CI,
+ Intrinsic::ID ID,
+ MachineIRBuilder &MIRBuilder) {
+
+ unsigned Op = getSimpleIntrinsicOpcode(ID);
+
+ // Is this a simple intrinsic?
+ if (Op == Intrinsic::not_intrinsic)
+ return false;
+
+ // Yes. Let's translate it.
+ SmallVector<llvm::SrcOp, 4> VRegs;
+ for (auto &Arg : CI.arg_operands())
+ VRegs.push_back(getOrCreateVReg(*Arg));
+
+ MIRBuilder.buildInstr(Op, {getOrCreateVReg(CI)}, VRegs,
+ MachineInstr::copyFlagsFromInstruction(CI));
+ return true;
+}
+
bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MachineIRBuilder &MIRBuilder) {
+
+ // If this is a simple intrinsic (that is, we just need to add a def of
+ // a vreg, and uses for each arg operand, then translate it.
+ if (translateSimpleIntrinsic(CI, ID, MIRBuilder))
+ return true;
+
switch (ID) {
default:
break;
case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- // Stack coloring is not enabled in O0 (which we care about now) so we can
- // drop these. Make sure someone notices when we start compiling at higher
- // opts though.
- if (MF->getTarget().getOptLevel() != CodeGenOpt::None)
- return false;
+ case Intrinsic::lifetime_end: {
+ // No stack colouring in O0, discard region information.
+ if (MF->getTarget().getOptLevel() == CodeGenOpt::None)
+ return true;
+
+ unsigned Op = ID == Intrinsic::lifetime_start ? TargetOpcode::LIFETIME_START
+ : TargetOpcode::LIFETIME_END;
+
+ // Get the underlying objects for the location passed on the lifetime
+ // marker.
+ SmallVector<const Value *, 4> Allocas;
+ GetUnderlyingObjects(CI.getArgOperand(1), Allocas, *DL);
+
+ // Iterate over each underlying object, creating lifetime markers for each
+ // static alloca. Quit if we find a non-static alloca.
+ for (const Value *V : Allocas) {
+ const AllocaInst *AI = dyn_cast<AllocaInst>(V);
+ if (!AI)
+ continue;
+
+ if (!AI->isStaticAlloca())
+ return true;
+
+ MIRBuilder.buildInstr(Op).addFrameIndex(getOrCreateFrameIndex(*AI));
+ }
return true;
+ }
case Intrinsic::dbg_declare: {
const DbgDeclareInst &DI = cast<DbgDeclareInst>(CI);
assert(DI.getVariable() && "Missing variable");
@@ -848,10 +1366,11 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
Value *Ptr = CI.getArgOperand(0);
unsigned ListSize = TLI.getVaListSizeInBits(*DL) / 8;
+ // FIXME: Get alignment
MIRBuilder.buildInstr(TargetOpcode::G_VASTART)
.addUse(getOrCreateVReg(*Ptr))
.addMemOperand(MF->getMachineMemOperand(
- MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 0));
+ MachinePointerInfo(Ptr), MachineMemOperand::MOStore, ListSize, 1));
return true;
}
case Intrinsic::dbg_value: {
@@ -868,7 +1387,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
} else if (const auto *CI = dyn_cast<Constant>(V)) {
MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
} else {
- unsigned Reg = getOrCreateVReg(*V);
+ Register Reg = getOrCreateVReg(*V);
// FIXME: This does not handle register-indirect values at offset 0. The
// direct/indirect thing shouldn't really be handled by something as
// implicit as reg+noreg vs reg+imm in the first palce, but it seems
@@ -889,94 +1408,25 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateOverflowIntrinsic(CI, TargetOpcode::G_UMULO, MIRBuilder);
case Intrinsic::smul_with_overflow:
return translateOverflowIntrinsic(CI, TargetOpcode::G_SMULO, MIRBuilder);
- case Intrinsic::pow: {
- auto Pow = MIRBuilder.buildInstr(TargetOpcode::G_FPOW)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
- .addUse(getOrCreateVReg(*CI.getArgOperand(1)));
- Pow->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::exp: {
- auto Exp = MIRBuilder.buildInstr(TargetOpcode::G_FEXP)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Exp->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::exp2: {
- auto Exp2 = MIRBuilder.buildInstr(TargetOpcode::G_FEXP2)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Exp2->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::log: {
- auto Log = MIRBuilder.buildInstr(TargetOpcode::G_FLOG)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Log->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::log2: {
- auto Log2 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG2)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Log2->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::log10: {
- auto Log10 = MIRBuilder.buildInstr(TargetOpcode::G_FLOG10)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Log10->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::fabs: {
- auto Fabs = MIRBuilder.buildInstr(TargetOpcode::G_FABS)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- Fabs->copyIRFlags(CI);
- return true;
- }
- case Intrinsic::trunc:
- MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_TRUNC)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- return true;
- case Intrinsic::round:
- MIRBuilder.buildInstr(TargetOpcode::G_INTRINSIC_ROUND)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- return true;
- case Intrinsic::fma: {
- auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)))
- .addUse(getOrCreateVReg(*CI.getArgOperand(1)))
- .addUse(getOrCreateVReg(*CI.getArgOperand(2)));
- FMA->copyIRFlags(CI);
- return true;
- }
case Intrinsic::fmuladd: {
const TargetMachine &TM = MF->getTarget();
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
- unsigned Dst = getOrCreateVReg(CI);
- unsigned Op0 = getOrCreateVReg(*CI.getArgOperand(0));
- unsigned Op1 = getOrCreateVReg(*CI.getArgOperand(1));
- unsigned Op2 = getOrCreateVReg(*CI.getArgOperand(2));
+ Register Dst = getOrCreateVReg(CI);
+ Register Op0 = getOrCreateVReg(*CI.getArgOperand(0));
+ Register Op1 = getOrCreateVReg(*CI.getArgOperand(1));
+ Register Op2 = getOrCreateVReg(*CI.getArgOperand(2));
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) {
// TODO: Revisit this to see if we should move this part of the
// lowering to the combiner.
- auto FMA = MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2});
- FMA->copyIRFlags(CI);
+ MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2},
+ MachineInstr::copyFlagsFromInstruction(CI));
} else {
LLT Ty = getLLTForType(*CI.getType(), *DL);
- auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1});
- FMul->copyIRFlags(CI);
- auto FAdd = MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2});
- FAdd->copyIRFlags(CI);
+ auto FMul = MIRBuilder.buildInstr(TargetOpcode::G_FMUL, {Ty}, {Op0, Op1},
+ MachineInstr::copyFlagsFromInstruction(CI));
+ MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Dst}, {FMul, Op2},
+ MachineInstr::copyFlagsFromInstruction(CI));
}
return true;
}
@@ -986,7 +1436,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return translateMemfunc(CI, MIRBuilder, ID);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
- unsigned Reg = getOrCreateVReg(CI);
+ Register Reg = getOrCreateVReg(CI);
unsigned TypeID = MF->getTypeIDFor(GV);
MIRBuilder.buildConstant(Reg, TypeID);
return true;
@@ -1008,7 +1458,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
return true;
case Intrinsic::stackprotector: {
LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
- unsigned GuardVal = MRI->createGenericVirtualRegister(PtrTy);
+ Register GuardVal = MRI->createGenericVirtualRegister(PtrTy);
getStackGuard(GuardVal, MIRBuilder);
AllocaInst *Slot = cast<AllocaInst>(CI.getArgOperand(1));
@@ -1023,6 +1473,34 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
PtrTy.getSizeInBits() / 8, 8));
return true;
}
+ case Intrinsic::stacksave: {
+ // Save the stack pointer to the location provided by the intrinsic.
+ Register Reg = getOrCreateVReg(CI);
+ Register StackPtr = MF->getSubtarget()
+ .getTargetLowering()
+ ->getStackPointerRegisterToSaveRestore();
+
+ // If the target doesn't specify a stack pointer, then fall back.
+ if (!StackPtr)
+ return false;
+
+ MIRBuilder.buildCopy(Reg, StackPtr);
+ return true;
+ }
+ case Intrinsic::stackrestore: {
+ // Restore the stack pointer from the location provided by the intrinsic.
+ Register Reg = getOrCreateVReg(*CI.getArgOperand(0));
+ Register StackPtr = MF->getSubtarget()
+ .getTargetLowering()
+ ->getStackPointerRegisterToSaveRestore();
+
+ // If the target doesn't specify a stack pointer, then fall back.
+ if (!StackPtr)
+ return false;
+
+ MIRBuilder.buildCopy(StackPtr, Reg);
+ return true;
+ }
case Intrinsic::cttz:
case Intrinsic::ctlz: {
ConstantInt *Cst = cast<ConstantInt>(CI.getArgOperand(1));
@@ -1037,24 +1515,18 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
.addUse(getOrCreateVReg(*CI.getArgOperand(0)));
return true;
}
- case Intrinsic::ctpop: {
- MIRBuilder.buildInstr(TargetOpcode::G_CTPOP)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
- return true;
- }
case Intrinsic::invariant_start: {
LLT PtrTy = getLLTForType(*CI.getArgOperand(0)->getType(), *DL);
- unsigned Undef = MRI->createGenericVirtualRegister(PtrTy);
+ Register Undef = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildUndef(Undef);
return true;
}
case Intrinsic::invariant_end:
return true;
- case Intrinsic::ceil:
- MIRBuilder.buildInstr(TargetOpcode::G_FCEIL)
- .addDef(getOrCreateVReg(CI))
- .addUse(getOrCreateVReg(*CI.getArgOperand(0)));
+ case Intrinsic::assume:
+ case Intrinsic::var_annotation:
+ case Intrinsic::sideeffect:
+ // Discard annotate attributes, assumptions, and artificial side-effects.
return true;
}
return false;
@@ -1079,34 +1551,6 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI,
return true;
}
-unsigned IRTranslator::packRegs(const Value &V,
- MachineIRBuilder &MIRBuilder) {
- ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
- ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
- LLT BigTy = getLLTForType(*V.getType(), *DL);
-
- if (Regs.size() == 1)
- return Regs[0];
-
- unsigned Dst = MRI->createGenericVirtualRegister(BigTy);
- MIRBuilder.buildUndef(Dst);
- for (unsigned i = 0; i < Regs.size(); ++i) {
- unsigned NewDst = MRI->createGenericVirtualRegister(BigTy);
- MIRBuilder.buildInsert(NewDst, Dst, Regs[i], Offsets[i]);
- Dst = NewDst;
- }
- return Dst;
-}
-
-void IRTranslator::unpackRegs(const Value &V, unsigned Src,
- MachineIRBuilder &MIRBuilder) {
- ArrayRef<unsigned> Regs = getOrCreateVRegs(V);
- ArrayRef<uint64_t> Offsets = *VMap.getOffsets(V);
-
- for (unsigned i = 0; i < Regs.size(); ++i)
- MIRBuilder.buildExtract(Regs[i], Src, Offsets[i]);
-}
-
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const CallInst &CI = cast<CallInst>(U);
auto TII = MF->getTarget().getIntrinsicInfo();
@@ -1126,23 +1570,32 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
}
- bool IsSplitType = valueIsSplit(CI);
if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
- unsigned Res = IsSplitType ? MRI->createGenericVirtualRegister(
- getLLTForType(*CI.getType(), *DL))
- : getOrCreateVReg(CI);
-
- SmallVector<unsigned, 8> Args;
- for (auto &Arg: CI.arg_operands())
- Args.push_back(packRegs(*Arg, MIRBuilder));
+ ArrayRef<Register> Res = getOrCreateVRegs(CI);
+
+ SmallVector<ArrayRef<Register>, 8> Args;
+ Register SwiftInVReg = 0;
+ Register SwiftErrorVReg = 0;
+ for (auto &Arg: CI.arg_operands()) {
+ if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+ assert(SwiftInVReg == 0 && "Expected only one swift error argument");
+ LLT Ty = getLLTForType(*Arg->getType(), *DL);
+ SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
+ &CI, &MIRBuilder.getMBB(), Arg));
+ Args.emplace_back(makeArrayRef(SwiftInVReg));
+ SwiftErrorVReg =
+ SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg);
+ continue;
+ }
+ Args.push_back(getOrCreateVRegs(*Arg));
+ }
MF->getFrameInfo().setHasCalls(true);
- bool Success = CLI->lowerCall(MIRBuilder, &CI, Res, Args, [&]() {
- return getOrCreateVReg(*CI.getCalledValue());
- });
+ bool Success =
+ CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg,
+ [&]() { return getOrCreateVReg(*CI.getCalledValue()); });
- if (IsSplitType)
- unpackRegs(CI, Res, MIRBuilder);
return Success;
}
@@ -1151,35 +1604,39 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (translateKnownIntrinsic(CI, ID, MIRBuilder))
return true;
- unsigned Res = 0;
- if (!CI.getType()->isVoidTy()) {
- if (IsSplitType)
- Res =
- MRI->createGenericVirtualRegister(getLLTForType(*CI.getType(), *DL));
- else
- Res = getOrCreateVReg(CI);
- }
+ ArrayRef<Register> ResultRegs;
+ if (!CI.getType()->isVoidTy())
+ ResultRegs = getOrCreateVRegs(CI);
+
+ // Ignore the callsite attributes. Backend code is most likely not expecting
+ // an intrinsic to sometimes have side effects and sometimes not.
MachineInstrBuilder MIB =
- MIRBuilder.buildIntrinsic(ID, Res, !CI.doesNotAccessMemory());
+ MIRBuilder.buildIntrinsic(ID, ResultRegs, !F->doesNotAccessMemory());
+ if (isa<FPMathOperator>(CI))
+ MIB->copyIRFlags(CI);
for (auto &Arg : CI.arg_operands()) {
// Some intrinsics take metadata parameters. Reject them.
if (isa<MetadataAsValue>(Arg))
return false;
- MIB.addUse(packRegs(*Arg, MIRBuilder));
+ ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg);
+ if (VRegs.size() > 1)
+ return false;
+ MIB.addUse(VRegs[0]);
}
- if (IsSplitType)
- unpackRegs(CI, Res, MIRBuilder);
-
// Add a MachineMemOperand if it is a target mem intrinsic.
const TargetLowering &TLI = *MF->getSubtarget().getTargetLowering();
TargetLowering::IntrinsicInfo Info;
// TODO: Add a GlobalISel version of getTgtMemIntrinsic.
if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
+ unsigned Align = Info.align;
+ if (Align == 0)
+ Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()));
+
uint64_t Size = Info.memVT.getStoreSize();
MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
- Info.flags, Size, Info.align));
+ Info.flags, Size, Align));
}
return true;
@@ -1215,18 +1672,32 @@ bool IRTranslator::translateInvoke(const User &U,
MCSymbol *BeginSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
- unsigned Res =
- MRI->createGenericVirtualRegister(getLLTForType(*I.getType(), *DL));
- SmallVector<unsigned, 8> Args;
- for (auto &Arg: I.arg_operands())
- Args.push_back(packRegs(*Arg, MIRBuilder));
+ ArrayRef<Register> Res;
+ if (!I.getType()->isVoidTy())
+ Res = getOrCreateVRegs(I);
+ SmallVector<ArrayRef<Register>, 8> Args;
+ Register SwiftErrorVReg = 0;
+ Register SwiftInVReg = 0;
+ for (auto &Arg : I.arg_operands()) {
+ if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+ assert(SwiftInVReg == 0 && "Expected only one swift error argument");
+ LLT Ty = getLLTForType(*Arg->getType(), *DL);
+ SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
+ &I, &MIRBuilder.getMBB(), Arg));
+ Args.push_back(makeArrayRef(SwiftInVReg));
+ SwiftErrorVReg =
+ SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg);
+ continue;
+ }
+
+ Args.push_back(getOrCreateVRegs(*Arg));
+ }
- if (!CLI->lowerCall(MIRBuilder, &I, Res, Args,
+ if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg,
[&]() { return getOrCreateVReg(*I.getCalledValue()); }))
return false;
- unpackRegs(I, Res, MIRBuilder);
-
MCSymbol *EndSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(EndSymbol);
@@ -1241,6 +1712,12 @@ bool IRTranslator::translateInvoke(const User &U,
return true;
}
+bool IRTranslator::translateCallBr(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ // FIXME: Implement this.
+ return false;
+}
+
bool IRTranslator::translateLandingPad(const User &U,
MachineIRBuilder &MIRBuilder) {
const LandingPadInst &LP = cast<LandingPadInst>(U);
@@ -1270,7 +1747,7 @@ bool IRTranslator::translateLandingPad(const User &U,
.addSym(MF->addLandingPad(&MBB));
LLT Ty = getLLTForType(*LP.getType(), *DL);
- unsigned Undef = MRI->createGenericVirtualRegister(Ty);
+ Register Undef = MRI->createGenericVirtualRegister(Ty);
MIRBuilder.buildUndef(Undef);
SmallVector<LLT, 2> Tys;
@@ -1279,20 +1756,20 @@ bool IRTranslator::translateLandingPad(const User &U,
assert(Tys.size() == 2 && "Only two-valued landingpads are supported");
// Mark exception register as live in.
- unsigned ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
+ Register ExceptionReg = TLI.getExceptionPointerRegister(PersonalityFn);
if (!ExceptionReg)
return false;
MBB.addLiveIn(ExceptionReg);
- ArrayRef<unsigned> ResRegs = getOrCreateVRegs(LP);
+ ArrayRef<Register> ResRegs = getOrCreateVRegs(LP);
MIRBuilder.buildCopy(ResRegs[0], ExceptionReg);
- unsigned SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
+ Register SelectorReg = TLI.getExceptionSelectorRegister(PersonalityFn);
if (!SelectorReg)
return false;
MBB.addLiveIn(SelectorReg);
- unsigned PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
+ Register PtrVReg = MRI->createGenericVirtualRegister(Tys[0]);
MIRBuilder.buildCopy(PtrVReg, SelectorReg);
MIRBuilder.buildCast(ResRegs[1], PtrVReg);
@@ -1304,10 +1781,10 @@ bool IRTranslator::translateAlloca(const User &U,
auto &AI = cast<AllocaInst>(U);
if (AI.isSwiftError())
- return false;
+ return true;
if (AI.isStaticAlloca()) {
- unsigned Res = getOrCreateVReg(AI);
+ Register Res = getOrCreateVReg(AI);
int FI = getOrCreateFrameIndex(AI);
MIRBuilder.buildFrameIndex(Res, FI);
return true;
@@ -1322,29 +1799,29 @@ bool IRTranslator::translateAlloca(const User &U,
unsigned Align =
std::max((unsigned)DL->getPrefTypeAlignment(Ty), AI.getAlignment());
- unsigned NumElts = getOrCreateVReg(*AI.getArraySize());
+ Register NumElts = getOrCreateVReg(*AI.getArraySize());
Type *IntPtrIRTy = DL->getIntPtrType(AI.getType());
LLT IntPtrTy = getLLTForType(*IntPtrIRTy, *DL);
if (MRI->getType(NumElts) != IntPtrTy) {
- unsigned ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
+ Register ExtElts = MRI->createGenericVirtualRegister(IntPtrTy);
MIRBuilder.buildZExtOrTrunc(ExtElts, NumElts);
NumElts = ExtElts;
}
- unsigned AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
- unsigned TySize =
+ Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
+ Register TySize =
getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
MIRBuilder.buildMul(AllocSize, NumElts, TySize);
LLT PtrTy = getLLTForType(*AI.getType(), *DL);
auto &TLI = *MF->getSubtarget().getTargetLowering();
- unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
- unsigned SPTmp = MRI->createGenericVirtualRegister(PtrTy);
+ Register SPTmp = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildCopy(SPTmp, SPReg);
- unsigned AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
+ Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
// Handle alignment. We have to realign if the allocation granule was smaller
@@ -1357,7 +1834,7 @@ bool IRTranslator::translateAlloca(const User &U,
// Round the size of the allocation up to the stack alignment size
// by add SA-1 to the size. This doesn't overflow because we're computing
// an address inside an alloca.
- unsigned AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
+ Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
AllocTmp = AlignedAlloc;
}
@@ -1387,7 +1864,7 @@ bool IRTranslator::translateInsertElement(const User &U,
// If it is a <1 x Ty> vector, use the scalar as it is
// not a legal vector type in LLT.
if (U.getType()->getVectorNumElements() == 1) {
- unsigned Elt = getOrCreateVReg(*U.getOperand(1));
+ Register Elt = getOrCreateVReg(*U.getOperand(1));
auto &Regs = *VMap.getVRegs(U);
if (Regs.empty()) {
Regs.push_back(Elt);
@@ -1398,10 +1875,10 @@ bool IRTranslator::translateInsertElement(const User &U,
return true;
}
- unsigned Res = getOrCreateVReg(U);
- unsigned Val = getOrCreateVReg(*U.getOperand(0));
- unsigned Elt = getOrCreateVReg(*U.getOperand(1));
- unsigned Idx = getOrCreateVReg(*U.getOperand(2));
+ Register Res = getOrCreateVReg(U);
+ Register Val = getOrCreateVReg(*U.getOperand(0));
+ Register Elt = getOrCreateVReg(*U.getOperand(1));
+ Register Idx = getOrCreateVReg(*U.getOperand(2));
MIRBuilder.buildInsertVectorElement(Res, Val, Elt, Idx);
return true;
}
@@ -1411,7 +1888,7 @@ bool IRTranslator::translateExtractElement(const User &U,
// If it is a <1 x Ty> vector, use the scalar as it is
// not a legal vector type in LLT.
if (U.getOperand(0)->getType()->getVectorNumElements() == 1) {
- unsigned Elt = getOrCreateVReg(*U.getOperand(0));
+ Register Elt = getOrCreateVReg(*U.getOperand(0));
auto &Regs = *VMap.getVRegs(U);
if (Regs.empty()) {
Regs.push_back(Elt);
@@ -1421,11 +1898,11 @@ bool IRTranslator::translateExtractElement(const User &U,
}
return true;
}
- unsigned Res = getOrCreateVReg(U);
- unsigned Val = getOrCreateVReg(*U.getOperand(0));
+ Register Res = getOrCreateVReg(U);
+ Register Val = getOrCreateVReg(*U.getOperand(0));
const auto &TLI = *MF->getSubtarget().getTargetLowering();
unsigned PreferredVecIdxWidth = TLI.getVectorIdxTy(*DL).getSizeInBits();
- unsigned Idx = 0;
+ Register Idx;
if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(1))) {
if (CI->getBitWidth() != PreferredVecIdxWidth) {
APInt NewIdx = CI->getValue().sextOrTrunc(PreferredVecIdxWidth);
@@ -1481,11 +1958,11 @@ bool IRTranslator::translateAtomicCmpXchg(const User &U,
Type *ValType = ResType->Type::getStructElementType(0);
auto Res = getOrCreateVRegs(I);
- unsigned OldValRes = Res[0];
- unsigned SuccessRes = Res[1];
- unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
- unsigned Cmp = getOrCreateVReg(*I.getCompareOperand());
- unsigned NewVal = getOrCreateVReg(*I.getNewValOperand());
+ Register OldValRes = Res[0];
+ Register SuccessRes = Res[1];
+ Register Addr = getOrCreateVReg(*I.getPointerOperand());
+ Register Cmp = getOrCreateVReg(*I.getCompareOperand());
+ Register NewVal = getOrCreateVReg(*I.getNewValOperand());
MIRBuilder.buildAtomicCmpXchgWithSuccess(
OldValRes, SuccessRes, Addr, Cmp, NewVal,
@@ -1507,9 +1984,9 @@ bool IRTranslator::translateAtomicRMW(const User &U,
Type *ResType = I.getType();
- unsigned Res = getOrCreateVReg(I);
- unsigned Addr = getOrCreateVReg(*I.getPointerOperand());
- unsigned Val = getOrCreateVReg(*I.getValOperand());
+ Register Res = getOrCreateVReg(I);
+ Register Addr = getOrCreateVReg(*I.getPointerOperand());
+ Register Val = getOrCreateVReg(*I.getValOperand());
unsigned Opcode = 0;
switch (I.getOperation()) {
@@ -1560,6 +2037,14 @@ bool IRTranslator::translateAtomicRMW(const User &U,
return true;
}
+bool IRTranslator::translateFence(const User &U,
+ MachineIRBuilder &MIRBuilder) {
+ const FenceInst &Fence = cast<FenceInst>(U);
+ MIRBuilder.buildFence(static_cast<unsigned>(Fence.getOrdering()),
+ Fence.getSyncScopeID());
+ return true;
+}
+
void IRTranslator::finishPendingPhis() {
#ifndef NDEBUG
DILocationVerifier Verifier;
@@ -1569,27 +2054,20 @@ void IRTranslator::finishPendingPhis() {
for (auto &Phi : PendingPHIs) {
const PHINode *PI = Phi.first;
ArrayRef<MachineInstr *> ComponentPHIs = Phi.second;
+ MachineBasicBlock *PhiMBB = ComponentPHIs[0]->getParent();
EntryBuilder->setDebugLoc(PI->getDebugLoc());
#ifndef NDEBUG
Verifier.setCurrentInst(PI);
#endif // ifndef NDEBUG
- // All MachineBasicBlocks exist, add them to the PHI. We assume IRTranslator
- // won't create extra control flow here, otherwise we need to find the
- // dominating predecessor here (or perhaps force the weirder IRTranslators
- // to provide a simple boundary).
- SmallSet<const BasicBlock *, 4> HandledPreds;
-
+ SmallSet<const MachineBasicBlock *, 16> SeenPreds;
for (unsigned i = 0; i < PI->getNumIncomingValues(); ++i) {
auto IRPred = PI->getIncomingBlock(i);
- if (HandledPreds.count(IRPred))
- continue;
-
- HandledPreds.insert(IRPred);
- ArrayRef<unsigned> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
+ ArrayRef<Register> ValRegs = getOrCreateVRegs(*PI->getIncomingValue(i));
for (auto Pred : getMachinePredBBs({IRPred, PI->getParent()})) {
- assert(Pred->isSuccessor(ComponentPHIs[0]->getParent()) &&
- "incorrect CFG at MachineBasicBlock level");
+ if (SeenPreds.count(Pred) || !PhiMBB->isPredecessor(Pred))
+ continue;
+ SeenPreds.insert(Pred);
for (unsigned j = 0; j < ValRegs.size(); ++j) {
MachineInstrBuilder MIB(*MF, ComponentPHIs[j]);
MIB.addUse(ValRegs[j]);
@@ -1611,8 +2089,15 @@ bool IRTranslator::valueIsSplit(const Value &V,
bool IRTranslator::translate(const Instruction &Inst) {
CurBuilder->setDebugLoc(Inst.getDebugLoc());
- EntryBuilder->setDebugLoc(Inst.getDebugLoc());
- switch(Inst.getOpcode()) {
+ // We only emit constants into the entry block from here. To prevent jumpy
+ // debug behaviour set the line to 0.
+ if (const DebugLoc &DL = Inst.getDebugLoc())
+ EntryBuilder->setDebugLoc(
+ DebugLoc::get(0, 0, DL.getScope(), DL.getInlinedAt()));
+ else
+ EntryBuilder->setDebugLoc(DebugLoc());
+
+ switch (Inst.getOpcode()) {
#define HANDLE_INST(NUM, OPCODE, CLASS) \
case Instruction::OPCODE: \
return translate##OPCODE(Inst, *CurBuilder.get());
@@ -1622,7 +2107,7 @@ bool IRTranslator::translate(const Instruction &Inst) {
}
}
-bool IRTranslator::translate(const Constant &C, unsigned Reg) {
+bool IRTranslator::translate(const Constant &C, Register Reg) {
if (auto CI = dyn_cast<ConstantInt>(&C))
EntryBuilder->buildConstant(Reg, *CI);
else if (auto CF = dyn_cast<ConstantFP>(&C))
@@ -1635,7 +2120,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
unsigned NullSize = DL->getTypeSizeInBits(C.getType());
auto *ZeroTy = Type::getIntNTy(C.getContext(), NullSize);
auto *ZeroVal = ConstantInt::get(ZeroTy, 0);
- unsigned ZeroReg = getOrCreateVReg(*ZeroVal);
+ Register ZeroReg = getOrCreateVReg(*ZeroVal);
EntryBuilder->buildCast(Reg, ZeroReg);
} else if (auto GV = dyn_cast<GlobalValue>(&C))
EntryBuilder->buildGlobalValue(Reg, GV);
@@ -1645,7 +2130,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
// Return the scalar if it is a <1 x Ty> vector.
if (CAZ->getNumElements() == 1)
return translate(*CAZ->getElementValue(0u), Reg);
- SmallVector<unsigned, 4> Ops;
+ SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CAZ->getNumElements(); ++i) {
Constant &Elt = *CAZ->getElementValue(i);
Ops.push_back(getOrCreateVReg(Elt));
@@ -1655,7 +2140,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
// Return the scalar if it is a <1 x Ty> vector.
if (CV->getNumElements() == 1)
return translate(*CV->getElementAsConstant(0), Reg);
- SmallVector<unsigned, 4> Ops;
+ SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumElements(); ++i) {
Constant &Elt = *CV->getElementAsConstant(i);
Ops.push_back(getOrCreateVReg(Elt));
@@ -1673,7 +2158,7 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
} else if (auto CV = dyn_cast<ConstantVector>(&C)) {
if (CV->getNumOperands() == 1)
return translate(*CV->getOperand(0), Reg);
- SmallVector<unsigned, 4> Ops;
+ SmallVector<Register, 4> Ops;
for (unsigned i = 0; i < CV->getNumOperands(); ++i) {
Ops.push_back(getOrCreateVReg(*CV->getOperand(i)));
}
@@ -1686,6 +2171,17 @@ bool IRTranslator::translate(const Constant &C, unsigned Reg) {
return true;
}
+void IRTranslator::finalizeBasicBlock() {
+ for (auto &JTCase : SL->JTCases) {
+ // Emit header first, if it wasn't already emitted.
+ if (!JTCase.first.Emitted)
+ emitJumpTableHeader(JTCase.second, JTCase.first, JTCase.first.HeaderBB);
+
+ emitJumpTable(JTCase.second, JTCase.second.MBB);
+ }
+ SL->JTCases.clear();
+}
+
void IRTranslator::finalizeFunction() {
// Release the memory used by the different maps we
// needed during the translation.
@@ -1698,6 +2194,7 @@ void IRTranslator::finalizeFunction() {
// destroying it twice (in ~IRTranslator() and ~LLVMContext())
EntryBuilder.reset();
CurBuilder.reset();
+ FuncInfo.clear();
}
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
@@ -1710,13 +2207,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Set the CSEConfig and run the analysis.
GISelCSEInfo *CSEInfo = nullptr;
TPC = &getAnalysis<TargetPassConfig>();
- bool IsO0 = TPC->getOptLevel() == CodeGenOpt::Level::None;
- // Disable CSE for O0.
- bool EnableCSE = !IsO0 && EnableCSEInIRTranslator;
+ bool EnableCSE = EnableCSEInIRTranslator.getNumOccurrences()
+ ? EnableCSEInIRTranslator
+ : TPC->isGISelCSEEnabled();
+
if (EnableCSE) {
EntryBuilder = make_unique<CSEMIRBuilder>(CurMF);
- std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
- CSEInfo = &Wrapper.get(std::move(Config));
+ CSEInfo = &Wrapper.get(TPC->getCSEConfig());
EntryBuilder->setCSEInfo(CSEInfo);
CurBuilder = make_unique<CSEMIRBuilder>(CurMF);
CurBuilder->setCSEInfo(CSEInfo);
@@ -1730,6 +2227,14 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
+ FuncInfo.MF = MF;
+ FuncInfo.BPI = nullptr;
+ const auto &TLI = *MF->getSubtarget().getTargetLowering();
+ const TargetMachine &TM = MF->getTarget();
+ SL = make_unique<GISelSwitchLowering>(this, FuncInfo);
+ SL->init(TLI, TM, *DL);
+
+ EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F);
assert(PendingPHIs.empty() && "stale PHIs");
@@ -1749,6 +2254,10 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MF->push_back(EntryBB);
EntryBuilder->setMBB(*EntryBB);
+ DebugLoc DbgLoc = F.getEntryBlock().getFirstNonPHI()->getDebugLoc();
+ SwiftError.setFunction(CurMF);
+ SwiftError.createEntriesInEntryBlock(DbgLoc);
+
// Create all blocks, in IR order, to preserve the layout.
for (const BasicBlock &BB: F) {
auto *&MBB = BBToMBB[&BB];
@@ -1764,20 +2273,25 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
EntryBB->addSuccessor(&getMBB(F.front()));
// Lower the actual args into this basic block.
- SmallVector<unsigned, 8> VRegArgs;
+ SmallVector<ArrayRef<Register>, 8> VRegArgs;
for (const Argument &Arg: F.args()) {
if (DL->getTypeStoreSize(Arg.getType()) == 0)
continue; // Don't handle zero sized types.
- VRegArgs.push_back(
- MRI->createGenericVirtualRegister(getLLTForType(*Arg.getType(), *DL)));
+ ArrayRef<Register> VRegs = getOrCreateVRegs(Arg);
+ VRegArgs.push_back(VRegs);
+
+ if (Arg.hasSwiftErrorAttr()) {
+ assert(VRegs.size() == 1 && "Too many vregs for Swift error");
+ SwiftError.setCurrentVReg(EntryBB, SwiftError.getFunctionArg(), VRegs[0]);
+ }
}
// We don't currently support translating swifterror or swiftself functions.
for (auto &Arg : F.args()) {
- if (Arg.hasSwiftErrorAttr() || Arg.hasSwiftSelfAttr()) {
+ if (Arg.hasSwiftSelfAttr()) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
- R << "unable to lower arguments due to swifterror/swiftself: "
+ R << "unable to lower arguments due to swiftself: "
<< ore::NV("Prototype", F.getType());
reportTranslationError(*MF, *TPC, *ORE, R);
return false;
@@ -1792,20 +2306,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
return false;
}
- auto ArgIt = F.arg_begin();
- for (auto &VArg : VRegArgs) {
- // If the argument is an unsplit scalar then don't use unpackRegs to avoid
- // creating redundant copies.
- if (!valueIsSplit(*ArgIt, VMap.getOffsets(*ArgIt))) {
- auto &VRegs = *VMap.getVRegs(cast<Value>(*ArgIt));
- assert(VRegs.empty() && "VRegs already populated?");
- VRegs.push_back(VArg);
- } else {
- unpackRegs(*ArgIt, VArg, *EntryBuilder.get());
- }
- ArgIt++;
- }
-
// Need to visit defs before uses when translating instructions.
GISelObserverWrapper WrapperObserver;
if (EnableCSE && CSEInfo)
@@ -1845,6 +2345,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
reportTranslationError(*MF, *TPC, *ORE, R);
return false;
}
+
+ finalizeBasicBlock();
}
#ifndef NDEBUG
WrapperObserver.removeObserver(&Verifier);
@@ -1853,6 +2355,8 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
finishPendingPhis();
+ SwiftError.propagateVRegs();
+
// Merge the argument lowering and constants block with its single
// successor, the LLVM-IR entry block. We want the basic block to
// be maximal.
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index c83c791327e4..70694fe6b6c8 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/InstructionSelect.cpp - InstructionSelect ---==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -50,9 +49,7 @@ INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
-InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) {
- initializeInstructionSelectPass(*PassRegistry::getPassRegistry());
-}
+InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { }
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
@@ -90,10 +87,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
"instruction is not legal", *MI);
return false;
}
-#endif
// FIXME: We could introduce new blocks and will need to fix the outer loop.
// Until then, keep track of the number of blocks to assert that we don't.
const size_t NumBlocks = MF.size();
+#endif
for (MachineBasicBlock *MBB : post_order(&MF)) {
if (MBB->empty())
@@ -145,8 +142,6 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
}
}
- const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
-
for (MachineBasicBlock &MBB : MF) {
if (MBB.empty())
continue;
@@ -178,6 +173,8 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
}
}
+#ifndef NDEBUG
+ const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
// Now that selection is complete, there are no more generic vregs. Verify
// that the size of the now-constrained vreg is unchanged and that it has a
// register class.
@@ -216,7 +213,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
reportGISelFailure(MF, TPC, MORE, R);
return false;
}
-
+#endif
auto &TLI = *MF.getSubtarget().getTargetLowering();
TLI.finalizeLowering(MF);
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 38913e4afcba..2ad35b3a72c9 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/InstructionSelector.cpp --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,16 +41,16 @@ bool InstructionSelector::constrainOperandRegToRegClass(
MachineFunction &MF = *MBB.getParent();
MachineRegisterInfo &MRI = MF.getRegInfo();
- return
- constrainRegToClass(MRI, TII, RBI, I, I.getOperand(OpIdx).getReg(), RC);
+ return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, I, RC,
+ I.getOperand(OpIdx), OpIdx);
}
bool InstructionSelector::isOperandImmEqual(
const MachineOperand &MO, int64_t Value,
const MachineRegisterInfo &MRI) const {
if (MO.isReg() && MO.getReg())
- if (auto VRegVal = getConstantVRegVal(MO.getReg(), MRI))
- return *VRegVal == Value;
+ if (auto VRegVal = getConstantVRegValWithLookThrough(MO.getReg(), MRI))
+ return VRegVal->Value == Value;
return false;
}
@@ -79,6 +78,6 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
std::next(MI.getIterator()) == IntoMI.getIterator())
return true;
- return !MI.mayLoadOrStore() && !MI.hasUnmodeledSideEffects() &&
- empty(MI.implicit_operands());
+ return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
+ !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands());
}
diff --git a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
index 94eab9ae00c8..601d50e9806f 100644
--- a/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
+++ b/lib/CodeGen/GlobalISel/LegalityPredicates.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/GlobalISel/LegalizerPredicates.cpp - Predicates --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -39,15 +38,19 @@ LegalityPredicate LegalityPredicates::typePairInSet(
};
}
-LegalityPredicate LegalityPredicates::typePairAndMemSizeInSet(
+LegalityPredicate LegalityPredicates::typePairAndMemDescInSet(
unsigned TypeIdx0, unsigned TypeIdx1, unsigned MMOIdx,
- std::initializer_list<TypePairAndMemSize> TypesAndMemSizeInit) {
- SmallVector<TypePairAndMemSize, 4> TypesAndMemSize = TypesAndMemSizeInit;
+ std::initializer_list<TypePairAndMemDesc> TypesAndMemDescInit) {
+ SmallVector<TypePairAndMemDesc, 4> TypesAndMemDesc = TypesAndMemDescInit;
return [=](const LegalityQuery &Query) {
- TypePairAndMemSize Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
- Query.MMODescrs[MMOIdx].SizeInBits};
- return std::find(TypesAndMemSize.begin(), TypesAndMemSize.end(), Match) !=
- TypesAndMemSize.end();
+ TypePairAndMemDesc Match = {Query.Types[TypeIdx0], Query.Types[TypeIdx1],
+ Query.MMODescrs[MMOIdx].SizeInBits,
+ Query.MMODescrs[MMOIdx].AlignInBits};
+ return std::find_if(
+ TypesAndMemDesc.begin(), TypesAndMemDesc.end(),
+ [=](const TypePairAndMemDesc &Entry) ->bool {
+ return Match.isCompatible(Entry);
+ }) != TypesAndMemDesc.end();
};
}
@@ -57,10 +60,30 @@ LegalityPredicate LegalityPredicates::isScalar(unsigned TypeIdx) {
};
}
+LegalityPredicate LegalityPredicates::isVector(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isVector();
+ };
+}
+
+LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx].isPointer();
+ };
+}
+
+LegalityPredicate LegalityPredicates::isPointer(unsigned TypeIdx,
+ unsigned AddrSpace) {
+ return [=](const LegalityQuery &Query) {
+ LLT Ty = Query.Types[TypeIdx];
+ return Ty.isPointer() && Ty.getAddressSpace() == AddrSpace;
+ };
+}
+
LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx,
unsigned Size) {
return [=](const LegalityQuery &Query) {
- const LLT &QueryTy = Query.Types[TypeIdx];
+ const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isScalar() && QueryTy.getSizeInBits() < Size;
};
}
@@ -68,18 +91,49 @@ LegalityPredicate LegalityPredicates::narrowerThan(unsigned TypeIdx,
LegalityPredicate LegalityPredicates::widerThan(unsigned TypeIdx,
unsigned Size) {
return [=](const LegalityQuery &Query) {
- const LLT &QueryTy = Query.Types[TypeIdx];
+ const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isScalar() && QueryTy.getSizeInBits() > Size;
};
}
+LegalityPredicate LegalityPredicates::scalarOrEltNarrowerThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.getScalarSizeInBits() < Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarOrEltWiderThan(unsigned TypeIdx,
+ unsigned Size) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.getScalarSizeInBits() > Size;
+ };
+}
+
+LegalityPredicate LegalityPredicates::scalarOrEltSizeNotPow2(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return !isPowerOf2_32(QueryTy.getScalarSizeInBits());
+ };
+}
+
LegalityPredicate LegalityPredicates::sizeNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
- const LLT &QueryTy = Query.Types[TypeIdx];
+ const LLT QueryTy = Query.Types[TypeIdx];
return QueryTy.isScalar() && !isPowerOf2_32(QueryTy.getSizeInBits());
};
}
+LegalityPredicate LegalityPredicates::sameSize(unsigned TypeIdx0,
+ unsigned TypeIdx1) {
+ return [=](const LegalityQuery &Query) {
+ return Query.Types[TypeIdx0].getSizeInBits() ==
+ Query.Types[TypeIdx1].getSizeInBits();
+ };
+}
+
LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
return [=](const LegalityQuery &Query) {
return !isPowerOf2_32(Query.MMODescrs[MMOIdx].SizeInBits / 8);
@@ -88,8 +142,8 @@ LegalityPredicate LegalityPredicates::memSizeInBytesNotPow2(unsigned MMOIdx) {
LegalityPredicate LegalityPredicates::numElementsNotPow2(unsigned TypeIdx) {
return [=](const LegalityQuery &Query) {
- const LLT &QueryTy = Query.Types[TypeIdx];
- return QueryTy.isVector() && isPowerOf2_32(QueryTy.getNumElements());
+ const LLT QueryTy = Query.Types[TypeIdx];
+ return QueryTy.isVector() && !isPowerOf2_32(QueryTy.getNumElements());
};
}
diff --git a/lib/CodeGen/GlobalISel/LegalizeMutations.cpp b/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
index a29b32ecdc03..fcbecf90a845 100644
--- a/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizeMutations.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/GlobalISel/LegalizerMutations.cpp - Mutations ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -27,25 +26,46 @@ LegalizeMutation LegalizeMutations::changeTo(unsigned TypeIdx,
};
}
-LegalizeMutation LegalizeMutations::widenScalarToNextPow2(unsigned TypeIdx,
- unsigned Min) {
+LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
+ unsigned FromTypeIdx) {
return [=](const LegalityQuery &Query) {
- unsigned NewSizeInBits =
- 1 << Log2_32_Ceil(Query.Types[TypeIdx].getSizeInBits());
- if (NewSizeInBits < Min)
- NewSizeInBits = Min;
- return std::make_pair(TypeIdx, LLT::scalar(NewSizeInBits));
+ const LLT OldTy = Query.Types[TypeIdx];
+ const LLT NewTy = Query.Types[FromTypeIdx];
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewTy));
+ };
+}
+
+LegalizeMutation LegalizeMutations::changeElementTo(unsigned TypeIdx,
+ LLT NewEltTy) {
+ return [=](const LegalityQuery &Query) {
+ const LLT OldTy = Query.Types[TypeIdx];
+ return std::make_pair(TypeIdx, OldTy.changeElementType(NewEltTy));
+ };
+}
+
+LegalizeMutation LegalizeMutations::widenScalarOrEltToNextPow2(unsigned TypeIdx,
+ unsigned Min) {
+ return [=](const LegalityQuery &Query) {
+ const LLT Ty = Query.Types[TypeIdx];
+ unsigned NewEltSizeInBits =
+ std::max(1u << Log2_32_Ceil(Ty.getScalarSizeInBits()), Min);
+ return std::make_pair(TypeIdx, Ty.changeElementSize(NewEltSizeInBits));
};
}
LegalizeMutation LegalizeMutations::moreElementsToNextPow2(unsigned TypeIdx,
unsigned Min) {
return [=](const LegalityQuery &Query) {
- const LLT &VecTy = Query.Types[TypeIdx];
- unsigned NewNumElements = 1 << Log2_32_Ceil(VecTy.getNumElements());
- if (NewNumElements < Min)
- NewNumElements = Min;
- return std::make_pair(
- TypeIdx, LLT::vector(NewNumElements, VecTy.getScalarSizeInBits()));
+ const LLT VecTy = Query.Types[TypeIdx];
+ unsigned NewNumElements =
+ std::max(1u << Log2_32_Ceil(VecTy.getNumElements()), Min);
+ return std::make_pair(TypeIdx,
+ LLT::vector(NewNumElements, VecTy.getElementType()));
+ };
+}
+
+LegalizeMutation LegalizeMutations::scalarize(unsigned TypeIdx) {
+ return [=](const LegalityQuery &Query) {
+ return std::make_pair(TypeIdx, Query.Types[TypeIdx].getElementType());
};
}
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index 84131e59948c..b5b26bff34bb 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/Legalizer.cpp -----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -28,6 +27,7 @@
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetMachine.h"
#include <iterator>
@@ -50,9 +50,7 @@ INITIALIZE_PASS_END(Legalizer, DEBUG_TYPE,
"Legalize the Machine IR a function's Machine IR", false,
false)
-Legalizer::Legalizer() : MachineFunctionPass(ID) {
- initializeLegalizerPass(*PassRegistry::getPassRegistry());
-}
+Legalizer::Legalizer() : MachineFunctionPass(ID) { }
void Legalizer::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
@@ -77,6 +75,7 @@ static bool isArtifact(const MachineInstr &MI) {
case TargetOpcode::G_UNMERGE_VALUES:
case TargetOpcode::G_CONCAT_VECTORS:
case TargetOpcode::G_BUILD_VECTOR:
+ case TargetOpcode::G_EXTRACT:
return true;
}
}
@@ -87,12 +86,15 @@ namespace {
class LegalizerWorkListManager : public GISelChangeObserver {
InstListTy &InstList;
ArtifactListTy &ArtifactList;
+#ifndef NDEBUG
+ SmallVector<MachineInstr *, 4> NewMIs;
+#endif
public:
LegalizerWorkListManager(InstListTy &Insts, ArtifactListTy &Arts)
: InstList(Insts), ArtifactList(Arts) {}
- void createdInstr(MachineInstr &MI) override {
+ void createdOrChangedInstr(MachineInstr &MI) {
// Only legalize pre-isel generic instructions.
// Legalization process could generate Target specific pseudo
// instructions with generic types. Don't record them
@@ -102,7 +104,20 @@ public:
else
InstList.insert(&MI);
}
+ }
+
+ void createdInstr(MachineInstr &MI) override {
LLVM_DEBUG(dbgs() << ".. .. New MI: " << MI);
+ LLVM_DEBUG(NewMIs.push_back(&MI));
+ createdOrChangedInstr(MI);
+ }
+
+ void printNewInstrs() {
+ LLVM_DEBUG({
+ for (const auto *MI : NewMIs)
+ dbgs() << ".. .. New MI: " << *MI;
+ NewMIs.clear();
+ });
}
void erasingInstr(MachineInstr &MI) override {
@@ -119,7 +134,7 @@ public:
// When insts change, we want to revisit them to legalize them again.
// We'll consider them the same as created.
LLVM_DEBUG(dbgs() << ".. .. Changed MI: " << MI);
- createdInstr(MI);
+ createdOrChangedInstr(MI);
}
};
} // namespace
@@ -155,20 +170,22 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
if (!isPreISelGenericOpcode(MI.getOpcode()))
continue;
if (isArtifact(MI))
- ArtifactList.insert(&MI);
+ ArtifactList.deferred_insert(&MI);
else
- InstList.insert(&MI);
+ InstList.deferred_insert(&MI);
}
}
+ ArtifactList.finalize();
+ InstList.finalize();
std::unique_ptr<MachineIRBuilder> MIRBuilder;
GISelCSEInfo *CSEInfo = nullptr;
- bool IsO0 = TPC.getOptLevel() == CodeGenOpt::Level::None;
- // Disable CSE for O0.
- bool EnableCSE = !IsO0 && EnableCSEInLegalizer;
+ bool EnableCSE = EnableCSEInLegalizer.getNumOccurrences()
+ ? EnableCSEInLegalizer
+ : TPC.isGISelCSEEnabled();
+
if (EnableCSE) {
MIRBuilder = make_unique<CSEMIRBuilder>();
- std::unique_ptr<CSEConfig> Config = make_unique<CSEConfig>();
- CSEInfo = &Wrapper.get(std::move(Config));
+ CSEInfo = &Wrapper.get(TPC.getCSEConfig());
MIRBuilder->setCSEInfo(CSEInfo);
} else
MIRBuilder = make_unique<MachineIRBuilder>();
@@ -210,6 +227,7 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
"unable to legalize instruction", MI);
return false;
}
+ WorkListObserver.printNewInstrs();
Changed |= Res == LegalizerHelper::Legalized;
}
while (!ArtifactList.empty()) {
@@ -222,7 +240,9 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
continue;
}
SmallVector<MachineInstr *, 4> DeadInstructions;
- if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions)) {
+ if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
+ WrapperObserver)) {
+ WorkListObserver.printNewInstrs();
for (auto *DeadMI : DeadInstructions) {
LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
RemoveDeadInstFromLists(DeadMI);
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index b3fc94cdec60..f5cf7fc9bd9b 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -30,6 +29,39 @@
using namespace llvm;
using namespace LegalizeActions;
+/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
+///
+/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
+/// with any leftover piece as type \p LeftoverTy
+///
+/// Returns -1 in the first element of the pair if the breakdown is not
+/// satisfiable.
+static std::pair<int, int>
+getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
+ assert(!LeftoverTy.isValid() && "this is an out argument");
+
+ unsigned Size = OrigTy.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ unsigned NumParts = Size / NarrowSize;
+ unsigned LeftoverSize = Size - NumParts * NarrowSize;
+ assert(Size > NarrowSize);
+
+ if (LeftoverSize == 0)
+ return {NumParts, 0};
+
+ if (NarrowTy.isVector()) {
+ unsigned EltSize = OrigTy.getScalarSizeInBits();
+ if (LeftoverSize % EltSize != 0)
+ return {-1, -1};
+ LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+ } else {
+ LeftoverTy = LLT::scalar(LeftoverSize);
+ }
+
+ int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
+ return std::make_pair(NumParts, NumLeftover);
+}
+
LegalizerHelper::LegalizerHelper(MachineFunction &MF,
GISelChangeObserver &Observer,
MachineIRBuilder &Builder)
@@ -50,6 +82,10 @@ LegalizerHelper::LegalizeResult
LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Legalizing: "; MI.print(dbgs()));
+ if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
+ MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
+ return LI.legalizeIntrinsic(MI, MRI, MIRBuilder) ? Legalized
+ : UnableToLegalize;
auto Step = LI.getAction(MI, MRI);
switch (Step.Action) {
case Legal:
@@ -70,6 +106,9 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
case FewerElements:
LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
+ case MoreElements:
+ LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
+ return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
case Custom:
LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
return LI.legalizeCustom(MI, MRI, MIRBuilder, Observer) ? Legalized
@@ -80,13 +119,103 @@ LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
}
}
-void LegalizerHelper::extractParts(unsigned Reg, LLT Ty, int NumParts,
- SmallVectorImpl<unsigned> &VRegs) {
+void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
+ SmallVectorImpl<Register> &VRegs) {
for (int i = 0; i < NumParts; ++i)
VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
MIRBuilder.buildUnmerge(VRegs, Reg);
}
+bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
+ LLT MainTy, LLT &LeftoverTy,
+ SmallVectorImpl<Register> &VRegs,
+ SmallVectorImpl<Register> &LeftoverRegs) {
+ assert(!LeftoverTy.isValid() && "this is an out argument");
+
+ unsigned RegSize = RegTy.getSizeInBits();
+ unsigned MainSize = MainTy.getSizeInBits();
+ unsigned NumParts = RegSize / MainSize;
+ unsigned LeftoverSize = RegSize - NumParts * MainSize;
+
+ // Use an unmerge when possible.
+ if (LeftoverSize == 0) {
+ for (unsigned I = 0; I < NumParts; ++I)
+ VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
+ MIRBuilder.buildUnmerge(VRegs, Reg);
+ return true;
+ }
+
+ if (MainTy.isVector()) {
+ unsigned EltSize = MainTy.getScalarSizeInBits();
+ if (LeftoverSize % EltSize != 0)
+ return false;
+ LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
+ } else {
+ LeftoverTy = LLT::scalar(LeftoverSize);
+ }
+
+ // For irregular sizes, extract the individual parts.
+ for (unsigned I = 0; I != NumParts; ++I) {
+ Register NewReg = MRI.createGenericVirtualRegister(MainTy);
+ VRegs.push_back(NewReg);
+ MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
+ }
+
+ for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
+ Offset += LeftoverSize) {
+ Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
+ LeftoverRegs.push_back(NewReg);
+ MIRBuilder.buildExtract(NewReg, Reg, Offset);
+ }
+
+ return true;
+}
+
+void LegalizerHelper::insertParts(Register DstReg,
+ LLT ResultTy, LLT PartTy,
+ ArrayRef<Register> PartRegs,
+ LLT LeftoverTy,
+ ArrayRef<Register> LeftoverRegs) {
+ if (!LeftoverTy.isValid()) {
+ assert(LeftoverRegs.empty());
+
+ if (!ResultTy.isVector()) {
+ MIRBuilder.buildMerge(DstReg, PartRegs);
+ return;
+ }
+
+ if (PartTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, PartRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, PartRegs);
+ return;
+ }
+
+ unsigned PartSize = PartTy.getSizeInBits();
+ unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
+
+ Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
+ MIRBuilder.buildUndef(CurResultReg);
+
+ unsigned Offset = 0;
+ for (Register PartReg : PartRegs) {
+ Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
+ MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
+ CurResultReg = NewResultReg;
+ Offset += PartSize;
+ }
+
+ for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
+ // Use the original output register for the final insert to avoid a copy.
+ Register NewResultReg = (I + 1 == E) ?
+ DstReg : MRI.createGenericVirtualRegister(ResultTy);
+
+ MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
+ CurResultReg = NewResultReg;
+ Offset += LeftoverPartSize;
+ }
+}
+
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
switch (Opcode) {
case TargetOpcode::G_SDIV:
@@ -116,6 +245,12 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
case TargetOpcode::G_FDIV:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::DIV_F64 : RTLIB::DIV_F32;
+ case TargetOpcode::G_FEXP:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::EXP_F64 : RTLIB::EXP_F32;
+ case TargetOpcode::G_FEXP2:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::EXP2_F64 : RTLIB::EXP2_F32;
case TargetOpcode::G_FREM:
return Size == 64 ? RTLIB::REM_F64 : RTLIB::REM_F32;
case TargetOpcode::G_FPOW:
@@ -123,6 +258,32 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
case TargetOpcode::G_FMA:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::FMA_F64 : RTLIB::FMA_F32;
+ case TargetOpcode::G_FSIN:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::SIN_F128
+ : Size == 64 ? RTLIB::SIN_F64 : RTLIB::SIN_F32;
+ case TargetOpcode::G_FCOS:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::COS_F128
+ : Size == 64 ? RTLIB::COS_F64 : RTLIB::COS_F32;
+ case TargetOpcode::G_FLOG10:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::LOG10_F128
+ : Size == 64 ? RTLIB::LOG10_F64 : RTLIB::LOG10_F32;
+ case TargetOpcode::G_FLOG:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::LOG_F128
+ : Size == 64 ? RTLIB::LOG_F64 : RTLIB::LOG_F32;
+ case TargetOpcode::G_FLOG2:
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ return Size == 128 ? RTLIB::LOG2_F128
+ : Size == 64 ? RTLIB::LOG2_F64 : RTLIB::LOG2_F32;
+ case TargetOpcode::G_FCEIL:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::CEIL_F64 : RTLIB::CEIL_F32;
+ case TargetOpcode::G_FFLOOR:
+ assert((Size == 32 || Size == 64) && "Unsupported size");
+ return Size == 64 ? RTLIB::FLOOR_F64 : RTLIB::FLOOR_F32;
}
llvm_unreachable("Unknown libcall function");
}
@@ -214,7 +375,20 @@ LegalizerHelper::libcall(MachineInstr &MI) {
case TargetOpcode::G_FDIV:
case TargetOpcode::G_FMA:
case TargetOpcode::G_FPOW:
- case TargetOpcode::G_FREM: {
+ case TargetOpcode::G_FREM:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR: {
+ if (Size > 64) {
+ LLVM_DEBUG(dbgs() << "Size " << Size << " too large to legalize.\n");
+ return UnableToLegalize;
+ }
Type *HLTy = Size == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx);
auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
if (Status != Legalized)
@@ -250,10 +424,11 @@ LegalizerHelper::libcall(MachineInstr &MI) {
// FIXME: Support other types
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if (ToSize != 32 || (FromSize != 32 && FromSize != 64))
+ if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
- MI, MIRBuilder, Type::getInt32Ty(Ctx),
+ MI, MIRBuilder,
+ ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
if (Status != Legalized)
return Status;
@@ -264,12 +439,12 @@ LegalizerHelper::libcall(MachineInstr &MI) {
// FIXME: Support other types
unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
- if (FromSize != 32 || (ToSize != 32 && ToSize != 64))
+ if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
return UnableToLegalize;
LegalizeResult Status = conversionLibcall(
MI, MIRBuilder,
ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
- Type::getInt32Ty(Ctx));
+ FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
if (Status != Legalized)
return Status;
break;
@@ -283,10 +458,6 @@ LegalizerHelper::libcall(MachineInstr &MI) {
LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
unsigned TypeIdx,
LLT NarrowTy) {
- // FIXME: Don't know how to handle secondary types yet.
- if (TypeIdx != 0 && MI.getOpcode() != TargetOpcode::G_EXTRACT)
- return UnableToLegalize;
-
MIRBuilder.setInstr(MI);
uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
@@ -302,12 +473,12 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
return UnableToLegalize;
int NumParts = SizeOp0 / NarrowSize;
- SmallVector<unsigned, 2> DstRegs;
+ SmallVector<Register, 2> DstRegs;
for (int i = 0; i < NumParts; ++i)
DstRegs.push_back(
MIRBuilder.buildUndef(NarrowTy)->getOperand(0).getReg());
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if(MRI.getType(DstReg).isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
@@ -315,6 +486,38 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_CONSTANT: {
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ const APInt &Val = MI.getOperand(1).getCImm()->getValue();
+ unsigned TotalSize = Ty.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ int NumParts = TotalSize / NarrowSize;
+
+ SmallVector<Register, 4> PartRegs;
+ for (int I = 0; I != NumParts; ++I) {
+ unsigned Offset = I * NarrowSize;
+ auto K = MIRBuilder.buildConstant(NarrowTy,
+ Val.lshr(Offset).trunc(NarrowSize));
+ PartRegs.push_back(K.getReg(0));
+ }
+
+ LLT LeftoverTy;
+ unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
+ SmallVector<Register, 1> LeftoverRegs;
+ if (LeftoverBits != 0) {
+ LeftoverTy = LLT::scalar(LeftoverBits);
+ auto K = MIRBuilder.buildConstant(
+ LeftoverTy,
+ Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
+ LeftoverRegs.push_back(K.getReg(0));
+ }
+
+ insertParts(MI.getOperand(0).getReg(),
+ Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_ADD: {
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
@@ -323,16 +526,16 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// Expand in terms of carry-setting/consuming G_ADDE instructions.
int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
- SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
+ SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
- unsigned CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
MIRBuilder.buildConstant(CarryIn, 0);
for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
Src2Regs[i], CarryIn);
@@ -340,67 +543,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
DstRegs.push_back(DstReg);
CarryIn = CarryOut;
}
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_EXTRACT: {
- if (TypeIdx != 1)
- return UnableToLegalize;
-
- int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
- // FIXME: add support for when SizeOp1 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp1 % NarrowSize != 0)
- return UnableToLegalize;
- int NumParts = SizeOp1 / NarrowSize;
-
- SmallVector<unsigned, 2> SrcRegs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
-
- unsigned OpReg = MI.getOperand(0).getReg();
- uint64_t OpStart = MI.getOperand(2).getImm();
- uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
- for (int i = 0; i < NumParts; ++i) {
- unsigned SrcStart = i * NarrowSize;
-
- if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
- // No part of the extract uses this subregister, ignore it.
- continue;
- } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
- // The entire subregister is extracted, forward the value.
- DstRegs.push_back(SrcRegs[i]);
- continue;
- }
-
- // OpSegStart is where this destination segment would start in OpReg if it
- // extended infinitely in both directions.
- int64_t ExtractOffset;
- uint64_t SegSize;
- if (OpStart < SrcStart) {
- ExtractOffset = 0;
- SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
- } else {
- ExtractOffset = OpStart - SrcStart;
- SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
- }
-
- unsigned SegReg = SrcRegs[i];
- if (ExtractOffset != 0 || SegSize != NarrowSize) {
- // A genuine extract is needed.
- SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
- MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
- }
-
- DstRegs.push_back(SegReg);
- }
-
- unsigned DstReg = MI.getOperand(0).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
if(MRI.getType(DstReg).isVector())
MIRBuilder.buildBuildVector(DstReg, DstRegs);
else
@@ -408,178 +551,117 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
- case TargetOpcode::G_INSERT: {
+ case TargetOpcode::G_SUB: {
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
if (SizeOp0 % NarrowSize != 0)
return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
-
- SmallVector<unsigned, 2> SrcRegs, DstRegs;
- SmallVector<uint64_t, 2> Indexes;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+ int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
- unsigned OpReg = MI.getOperand(2).getReg();
- uint64_t OpStart = MI.getOperand(3).getImm();
- uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstStart = i * NarrowSize;
-
- if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
- // No part of the insert affects this subregister, forward the original.
- DstRegs.push_back(SrcRegs[i]);
- continue;
- } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
- // The entire subregister is defined by this insert, forward the new
- // value.
- DstRegs.push_back(OpReg);
- continue;
- }
+ SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
+ extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
- // OpSegStart is where this destination segment would start in OpReg if it
- // extended infinitely in both directions.
- int64_t ExtractOffset, InsertOffset;
- uint64_t SegSize;
- if (OpStart < DstStart) {
- InsertOffset = 0;
- ExtractOffset = DstStart - OpStart;
- SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
- } else {
- InsertOffset = OpStart - DstStart;
- ExtractOffset = 0;
- SegSize =
- std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
- }
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ Register BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ MIRBuilder.buildInstr(TargetOpcode::G_USUBO, {DstReg, BorrowOut},
+ {Src1Regs[0], Src2Regs[0]});
+ DstRegs.push_back(DstReg);
+ Register BorrowIn = BorrowOut;
+ for (int i = 1; i < NumParts; ++i) {
+ DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ BorrowOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
- unsigned SegReg = OpReg;
- if (ExtractOffset != 0 || SegSize != OpSize) {
- // A genuine extract is needed.
- SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
- MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
- }
+ MIRBuilder.buildInstr(TargetOpcode::G_USUBE, {DstReg, BorrowOut},
+ {Src1Regs[i], Src2Regs[i], BorrowIn});
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
DstRegs.push_back(DstReg);
+ BorrowIn = BorrowOut;
}
-
- assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_MUL:
+ case TargetOpcode::G_UMULH:
+ return narrowScalarMul(MI, NarrowTy);
+ case TargetOpcode::G_EXTRACT:
+ return narrowScalarExtract(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_INSERT:
+ return narrowScalarInsert(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_LOAD: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
-
const auto &MMO = **MI.memoperands_begin();
- // This implementation doesn't work for atomics. Give up instead of doing
- // something invalid.
- if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
- MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
- LLT OffsetTy = LLT::scalar(
- MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
-
- SmallVector<unsigned, 2> DstRegs;
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned SrcReg = 0;
- unsigned Adjustment = i * NarrowSize / 8;
- unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
-
- MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
- MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
- NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
- MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
-
- MIRBuilder.materializeGEP(SrcReg, MI.getOperand(1).getReg(), OffsetTy,
- Adjustment);
+ if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ auto &MMO = **MI.memoperands_begin();
+ MIRBuilder.buildLoad(TmpReg, MI.getOperand(1).getReg(), MMO);
+ MIRBuilder.buildAnyExt(DstReg, TmpReg);
+ MI.eraseFromParent();
+ return Legalized;
+ }
- MIRBuilder.buildLoad(DstReg, SrcReg, *SplitMMO);
+ return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
+ }
+ case TargetOpcode::G_ZEXTLOAD:
+ case TargetOpcode::G_SEXTLOAD: {
+ bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
- DstRegs.push_back(DstReg);
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ auto &MMO = **MI.memoperands_begin();
+ if (MMO.getSizeInBits() == NarrowSize) {
+ MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
+ } else {
+ unsigned ExtLoad = ZExt ? TargetOpcode::G_ZEXTLOAD
+ : TargetOpcode::G_SEXTLOAD;
+ MIRBuilder.buildInstr(ExtLoad)
+ .addDef(TmpReg)
+ .addUse(PtrReg)
+ .addMemOperand(&MMO);
}
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ if (ZExt)
+ MIRBuilder.buildZExt(DstReg, TmpReg);
else
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ MIRBuilder.buildSExt(DstReg, TmpReg);
+
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_STORE: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
- return UnableToLegalize;
-
const auto &MMO = **MI.memoperands_begin();
- // This implementation doesn't work for atomics. Give up instead of doing
- // something invalid.
- if (MMO.getOrdering() != AtomicOrdering::NotAtomic ||
- MMO.getFailureOrdering() != AtomicOrdering::NotAtomic)
+
+ Register SrcReg = MI.getOperand(0).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (SrcTy.isVector())
return UnableToLegalize;
int NumParts = SizeOp0 / NarrowSize;
- LLT OffsetTy = LLT::scalar(
- MRI.getType(MI.getOperand(1).getReg()).getScalarSizeInBits());
-
- SmallVector<unsigned, 2> SrcRegs;
- extractParts(MI.getOperand(0).getReg(), NarrowTy, NumParts, SrcRegs);
-
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = 0;
- unsigned Adjustment = i * NarrowSize / 8;
- unsigned Alignment = MinAlign(MMO.getAlignment(), Adjustment);
-
- MachineMemOperand *SplitMMO = MIRBuilder.getMF().getMachineMemOperand(
- MMO.getPointerInfo().getWithOffset(Adjustment), MMO.getFlags(),
- NarrowSize / 8, Alignment, MMO.getAAInfo(), MMO.getRanges(),
- MMO.getSyncScopeID(), MMO.getOrdering(), MMO.getFailureOrdering());
-
- MIRBuilder.materializeGEP(DstReg, MI.getOperand(1).getReg(), OffsetTy,
- Adjustment);
-
- MIRBuilder.buildStore(SrcRegs[i], DstReg, *SplitMMO);
- }
- MI.eraseFromParent();
- return Legalized;
- }
- case TargetOpcode::G_CONSTANT: {
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
+ unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
+ unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
+ if (SrcTy.isVector() && LeftoverBits != 0)
return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
- const APInt &Cst = MI.getOperand(1).getCImm()->getValue();
- LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
- SmallVector<unsigned, 2> DstRegs;
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- ConstantInt *CI =
- ConstantInt::get(Ctx, Cst.lshr(NarrowSize * i).trunc(NarrowSize));
- MIRBuilder.buildConstant(DstReg, *CI);
- DstRegs.push_back(DstReg);
+ if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ auto &MMO = **MI.memoperands_begin();
+ MIRBuilder.buildTrunc(TmpReg, SrcReg);
+ MIRBuilder.buildStore(TmpReg, MI.getOperand(1).getReg(), MMO);
+ MI.eraseFromParent();
+ return Legalized;
}
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
+
+ return reduceLoadStoreWidth(MI, 0, NarrowTy);
}
+ case TargetOpcode::G_SELECT:
+ return narrowScalarSelect(MI, TypeIdx, NarrowTy);
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR: {
@@ -592,44 +674,112 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
// ...
// AN = BinOp<Ty/N> BN, CN
// A = G_MERGE_VALUES A1, ..., AN
+ return narrowScalarBasic(MI, TypeIdx, NarrowTy);
+ }
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_ASHR:
+ return narrowScalarShift(MI, TypeIdx, NarrowTy);
+ case TargetOpcode::G_CTLZ:
+ case TargetOpcode::G_CTLZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTTZ:
+ case TargetOpcode::G_CTTZ_ZERO_UNDEF:
+ case TargetOpcode::G_CTPOP:
+ if (TypeIdx != 0)
+ return UnableToLegalize; // TODO
- // FIXME: add support for when SizeOp0 isn't an exact multiple of
- // NarrowSize.
- if (SizeOp0 % NarrowSize != 0)
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_INTTOPTR:
+ if (TypeIdx != 1)
return UnableToLegalize;
- int NumParts = SizeOp0 / NarrowSize;
- // List the registers where the destination will be scattered.
- SmallVector<unsigned, 2> DstRegs;
- // List the registers where the first argument will be split.
- SmallVector<unsigned, 2> SrcsReg1;
- // List the registers where the second argument will be split.
- SmallVector<unsigned, 2> SrcsReg2;
- // Create all the temporary registers.
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned SrcReg1 = MRI.createGenericVirtualRegister(NarrowTy);
- unsigned SrcReg2 = MRI.createGenericVirtualRegister(NarrowTy);
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, NarrowTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PTRTOINT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
- DstRegs.push_back(DstReg);
- SrcsReg1.push_back(SrcReg1);
- SrcsReg2.push_back(SrcReg2);
+ Observer.changingInstr(MI);
+ narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PHI: {
+ unsigned NumParts = SizeOp0 / NarrowSize;
+ SmallVector<Register, 2> DstRegs;
+ SmallVector<SmallVector<Register, 2>, 2> SrcRegs;
+ DstRegs.resize(NumParts);
+ SrcRegs.resize(MI.getNumOperands() / 2);
+ Observer.changingInstr(MI);
+ for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
+ SrcRegs[i / 2]);
}
- // Explode the big arguments into smaller chunks.
- MIRBuilder.buildUnmerge(SrcsReg1, MI.getOperand(1).getReg());
- MIRBuilder.buildUnmerge(SrcsReg2, MI.getOperand(2).getReg());
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIRBuilder.setInsertPt(MBB, MI);
+ for (unsigned i = 0; i < NumParts; ++i) {
+ DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
+ MachineInstrBuilder MIB =
+ MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
+ for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
+ MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
+ }
+ MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
+ Observer.changedInstr(MI);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ case TargetOpcode::G_INSERT_VECTOR_ELT: {
+ if (TypeIdx != 2)
+ return UnableToLegalize;
- // Do the operation on each small part.
- for (int i = 0; i < NumParts; ++i)
- MIRBuilder.buildInstr(MI.getOpcode(), {DstRegs[i]},
- {SrcsReg1[i], SrcsReg2[i]});
+ int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, NarrowTy, OpIdx);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_ICMP: {
+ uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+ if (NarrowSize * 2 != SrcSize)
+ return UnableToLegalize;
- // Gather the destination registers into the final destination.
- unsigned DstReg = MI.getOperand(0).getReg();
- if(MRI.getType(DstReg).isVector())
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
- else
- MIRBuilder.buildMerge(DstReg, DstRegs);
+ Observer.changingInstr(MI);
+ Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
+ Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2).getReg());
+
+ Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
+ Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3).getReg());
+
+ CmpInst::Predicate Pred =
+ static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
+ MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
+ MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
+ MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
+ MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
+ MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
+ } else {
+ const LLT s1 = LLT::scalar(1);
+ MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH);
+ MachineInstrBuilder CmpHEQ =
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH);
+ MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
+ ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL);
+ MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
+ }
+ Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
@@ -643,15 +793,322 @@ void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
MO.setReg(ExtB->getOperand(0).getReg());
}
+void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ auto ExtB = MIRBuilder.buildInstr(TargetOpcode::G_TRUNC, {NarrowTy},
+ {MO.getReg()});
+ MO.setReg(ExtB->getOperand(0).getReg());
+}
+
void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
unsigned OpIdx, unsigned TruncOpcode) {
MachineOperand &MO = MI.getOperand(OpIdx);
- unsigned DstExt = MRI.createGenericVirtualRegister(WideTy);
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
MIRBuilder.buildInstr(TruncOpcode, {MO.getReg()}, {DstExt});
MO.setReg(DstExt);
}
+void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
+ unsigned OpIdx, unsigned ExtOpcode) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildInstr(ExtOpcode, {MO.getReg()}, {DstTrunc});
+ MO.setReg(DstTrunc);
+}
+
+void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildExtract(MO.getReg(), DstExt, 0);
+ MO.setReg(DstExt);
+}
+
+void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
+ unsigned OpIdx) {
+ MachineOperand &MO = MI.getOperand(OpIdx);
+
+ LLT OldTy = MRI.getType(MO.getReg());
+ unsigned OldElts = OldTy.getNumElements();
+ unsigned NewElts = MoreTy.getNumElements();
+
+ unsigned NumParts = NewElts / OldElts;
+
+ // Use concat_vectors if the result is a multiple of the number of elements.
+ if (NumParts * OldElts == NewElts) {
+ SmallVector<Register, 8> Parts;
+ Parts.push_back(MO.getReg());
+
+ Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
+ for (unsigned I = 1; I != NumParts; ++I)
+ Parts.push_back(ImpDef);
+
+ auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
+ MO.setReg(Concat.getReg(0));
+ return;
+ }
+
+ Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
+ Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
+ MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
+ MO.setReg(MoreReg);
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ Register Src1 = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(Src1);
+ const int DstSize = DstTy.getSizeInBits();
+ const int SrcSize = SrcTy.getSizeInBits();
+ const int WideSize = WideTy.getSizeInBits();
+ const int NumMerge = (DstSize + WideSize - 1) / WideSize;
+
+ unsigned NumOps = MI.getNumOperands();
+ unsigned NumSrc = MI.getNumOperands() - 1;
+ unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
+
+ if (WideSize >= DstSize) {
+ // Directly pack the bits in the target type.
+ Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
+
+ for (unsigned I = 2; I != NumOps; ++I) {
+ const unsigned Offset = (I - 1) * PartSize;
+
+ Register SrcReg = MI.getOperand(I).getReg();
+ assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
+
+ auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
+
+ Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
+ MRI.createGenericVirtualRegister(WideTy);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
+ auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
+ MIRBuilder.buildOr(NextResult, ResultReg, Shl);
+ ResultReg = NextResult;
+ }
+
+ if (WideSize > DstSize)
+ MIRBuilder.buildTrunc(DstReg, ResultReg);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Unmerge the original values to the GCD type, and recombine to the next
+ // multiple greater than the original type.
+ //
+ // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
+ // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
+ // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
+ // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
+ // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
+ // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
+ // %12:_(s12) = G_MERGE_VALUES %10, %11
+ //
+ // Padding with undef if necessary:
+ //
+ // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
+ // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
+ // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
+ // %7:_(s2) = G_IMPLICIT_DEF
+ // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
+ // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
+ // %10:_(s12) = G_MERGE_VALUES %8, %9
+
+ const int GCD = greatestCommonDivisor(SrcSize, WideSize);
+ LLT GCDTy = LLT::scalar(GCD);
+
+ SmallVector<Register, 8> Parts;
+ SmallVector<Register, 8> NewMergeRegs;
+ SmallVector<Register, 8> Unmerges;
+ LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
+
+ // Decompose the original operands if they don't evenly divide.
+ for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ Register SrcReg = MI.getOperand(I).getReg();
+ if (GCD == SrcSize) {
+ Unmerges.push_back(SrcReg);
+ } else {
+ auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
+ Unmerges.push_back(Unmerge.getReg(J));
+ }
+ }
+
+ // Pad with undef to the next size that is a multiple of the requested size.
+ if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
+ Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
+ for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
+ Unmerges.push_back(UndefReg);
+ }
+
+ const int PartsPerGCD = WideSize / GCD;
+
+ // Build merges of each piece.
+ ArrayRef<Register> Slicer(Unmerges);
+ for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
+ auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
+ NewMergeRegs.push_back(Merge.getReg(0));
+ }
+
+ // A truncate may be necessary if the requested type doesn't evenly divide the
+ // original result type.
+ if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
+ MIRBuilder.buildMerge(DstReg, NewMergeRegs);
+ } else {
+ auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
+ MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ unsigned NumDst = MI.getNumOperands() - 1;
+ Register SrcReg = MI.getOperand(NumDst).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ if (!SrcTy.isScalar())
+ return UnableToLegalize;
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst0Reg);
+ if (!DstTy.isScalar())
+ return UnableToLegalize;
+
+ unsigned NewSrcSize = NumDst * WideTy.getSizeInBits();
+ LLT NewSrcTy = LLT::scalar(NewSrcSize);
+ unsigned SizeDiff = WideTy.getSizeInBits() - DstTy.getSizeInBits();
+
+ auto WideSrc = MIRBuilder.buildZExt(NewSrcTy, SrcReg);
+
+ for (unsigned I = 1; I != NumDst; ++I) {
+ auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, SizeDiff * I);
+ auto Shl = MIRBuilder.buildShl(NewSrcTy, WideSrc, ShiftAmt);
+ WideSrc = MIRBuilder.buildOr(NewSrcTy, WideSrc, Shl);
+ }
+
+ Observer.changingInstr(MI);
+
+ MI.getOperand(NumDst).setReg(WideSrc->getOperand(0).getReg());
+ for (unsigned I = 0; I != NumDst; ++I)
+ widenScalarDst(MI, WideTy, I);
+
+ Observer.changedInstr(MI);
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ LLT DstTy = MRI.getType(DstReg);
+ unsigned Offset = MI.getOperand(2).getImm();
+
+ if (TypeIdx == 0) {
+ if (SrcTy.isVector() || DstTy.isVector())
+ return UnableToLegalize;
+
+ SrcOp Src(SrcReg);
+ if (SrcTy.isPointer()) {
+ // Extracts from pointers can be handled only if they are really just
+ // simple integers.
+ const DataLayout &DL = MIRBuilder.getDataLayout();
+ if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
+ return UnableToLegalize;
+
+ LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
+ Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
+ SrcTy = SrcAsIntTy;
+ }
+
+ if (DstTy.isPointer())
+ return UnableToLegalize;
+
+ if (Offset == 0) {
+ // Avoid a shift in the degenerate case.
+ MIRBuilder.buildTrunc(DstReg,
+ MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ // Do a shift in the source type.
+ LLT ShiftTy = SrcTy;
+ if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
+ Src = MIRBuilder.buildAnyExt(WideTy, Src);
+ ShiftTy = WideTy;
+ } else if (WideTy.getSizeInBits() > SrcTy.getSizeInBits())
+ return UnableToLegalize;
+
+ auto LShr = MIRBuilder.buildLShr(
+ ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
+ MIRBuilder.buildTrunc(DstReg, LShr);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ if (SrcTy.isScalar()) {
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ if (!SrcTy.isVector())
+ return UnableToLegalize;
+
+ if (DstTy != SrcTy.getElementType())
+ return UnableToLegalize;
+
+ if (Offset % SrcTy.getScalarSizeInBits() != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+
+ MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
+ Offset);
+ widenScalarDst(MI, WideTy.getScalarType(), 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
+ LLT WideTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
MIRBuilder.setInstr(MI);
@@ -659,6 +1116,14 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
switch (MI.getOpcode()) {
default:
return UnableToLegalize;
+ case TargetOpcode::G_EXTRACT:
+ return widenScalarExtract(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_INSERT:
+ return widenScalarInsert(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_MERGE_VALUES:
+ return widenScalarMergeValues(MI, TypeIdx, WideTy);
+ case TargetOpcode::G_UNMERGE_VALUES:
+ return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
case TargetOpcode::G_UADDO:
case TargetOpcode::G_USUBO: {
if (TypeIdx == 1)
@@ -690,19 +1155,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_CTLZ:
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTPOP: {
+ if (TypeIdx == 0) {
+ Observer.changingInstr(MI);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ Register SrcReg = MI.getOperand(1).getReg();
+
// First ZEXT the input.
- auto MIBSrc = MIRBuilder.buildZExt(WideTy, MI.getOperand(1).getReg());
- LLT CurTy = MRI.getType(MI.getOperand(0).getReg());
+ auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
+ LLT CurTy = MRI.getType(SrcReg);
if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
// The count is the same in the larger type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit =
APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
- MIBSrc = MIRBuilder.buildInstr(
- TargetOpcode::G_OR, {WideTy},
- {MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit.getSExtValue())});
+ MIBSrc = MIRBuilder.buildOr(
+ WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
}
+
// Perform the operation at the larger size.
auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
// This is already the correct result for CTPOP and CTTZs
@@ -714,22 +1188,43 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
TargetOpcode::G_SUB, {WideTy},
{MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff)});
}
- auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
- // Make the original instruction a trunc now, and update its source.
+
+ MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_BSWAP: {
Observer.changingInstr(MI);
- MI.setDesc(TII.get(TargetOpcode::G_TRUNC));
- MI.getOperand(1).setReg(MIBNewOp->getOperand(0).getReg());
+ Register DstReg = MI.getOperand(0).getReg();
+
+ Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+
+ MI.getOperand(0).setReg(DstExt);
+
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+ LLT Ty = MRI.getType(DstReg);
+ unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
+ MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
+ MIRBuilder.buildInstr(TargetOpcode::G_LSHR)
+ .addDef(ShrReg)
+ .addUse(DstExt)
+ .addUse(ShiftAmtReg);
+
+ MIRBuilder.buildTrunc(DstReg, ShrReg);
Observer.changedInstr(MI);
return Legalized;
}
-
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
case TargetOpcode::G_SUB:
- // Perform operation at larger width (any extension is fine here, high bits
+ // Perform operation at larger width (any extension is fines here, high bits
// don't affect the result) and then truncate the result back to the
// original type.
Observer.changingInstr(MI);
@@ -741,16 +1236,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_SHL:
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- // The "number of bits to shift" operand must preserve its value as an
- // unsigned integer:
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- widenScalarDst(MI, WideTy);
+
+ if (TypeIdx == 0) {
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy);
+ } else {
+ assert(TypeIdx == 1);
+ // The "number of bits to shift" operand must preserve its value as an
+ // unsigned integer:
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ }
+
Observer.changedInstr(MI);
return Legalized;
case TargetOpcode::G_SDIV:
case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
@@ -759,18 +1262,28 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
- // The "number of bits to shift" operand must preserve its value as an
- // unsigned integer:
- widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
- widenScalarDst(MI, WideTy);
+
+ if (TypeIdx == 0) {
+ unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
+ TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
+
+ widenScalarSrc(MI, WideTy, 1, CvtOp);
+ widenScalarDst(MI, WideTy);
+ } else {
+ assert(TypeIdx == 1);
+ // The "number of bits to shift" operand must preserve its value as an
+ // unsigned integer:
+ widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
+ }
+
Observer.changedInstr(MI);
return Legalized;
-
case TargetOpcode::G_UDIV:
case TargetOpcode::G_UREM:
- case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX:
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
@@ -788,8 +1301,9 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
widenScalarDst(MI, WideTy);
} else {
+ bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
// Explicit extension is required here since high bits affect the result.
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
}
Observer.changedInstr(MI);
return Legalized;
@@ -819,23 +1333,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
- case TargetOpcode::G_INSERT:
- if (TypeIdx != 0)
- return UnableToLegalize;
- Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
- widenScalarDst(MI, WideTy);
- Observer.changedInstr(MI);
- return Legalized;
-
case TargetOpcode::G_LOAD:
- // For some types like i24, we might try to widen to i32. To properly handle
- // this we should be using a dedicated extending load, until then avoid
- // trying to legalize.
- if (alignTo(MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(), 8) !=
- WideTy.getSizeInBits())
- return UnableToLegalize;
- LLVM_FALLTHROUGH;
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD:
Observer.changingInstr(MI);
@@ -844,12 +1342,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
return Legalized;
case TargetOpcode::G_STORE: {
- if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(1) ||
- WideTy != LLT::scalar(8))
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT Ty = MRI.getType(MI.getOperand(0).getReg());
+ if (!isPowerOf2_32(Ty.getSizeInBits()))
return UnableToLegalize;
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ZEXT);
+
+ unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
+ TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
+ widenScalarSrc(MI, WideTy, 0, ExtType);
+
Observer.changedInstr(MI);
return Legalized;
}
@@ -871,14 +1376,19 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
bool LosesInfo;
switch (WideTy.getSizeInBits()) {
case 32:
- Val.convert(APFloat::IEEEsingle(), APFloat::rmTowardZero, &LosesInfo);
+ Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
+ &LosesInfo);
break;
case 64:
- Val.convert(APFloat::IEEEdouble(), APFloat::rmTowardZero, &LosesInfo);
+ Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
+ &LosesInfo);
break;
default:
- llvm_unreachable("Unhandled fp widen type");
+ return UnableToLegalize;
}
+
+ assert(!LosesInfo && "extend should always be lossless");
+
Observer.changingInstr(MI);
SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
@@ -894,7 +1404,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
}
case TargetOpcode::G_BRCOND:
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 0, TargetOpcode::G_ANYEXT);
+ widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
Observer.changedInstr(MI);
return Legalized;
@@ -947,23 +1457,103 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
- case TargetOpcode::G_EXTRACT_VECTOR_ELT:
+ case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
+ if (TypeIdx == 0) {
+ Register VecReg = MI.getOperand(1).getReg();
+ LLT VecTy = MRI.getType(VecReg);
+ Observer.changingInstr(MI);
+
+ widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
+ WideTy.getSizeInBits()),
+ 1, TargetOpcode::G_SEXT);
+
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
if (TypeIdx != 2)
return UnableToLegalize;
Observer.changingInstr(MI);
widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
Observer.changedInstr(MI);
return Legalized;
-
+ }
+ case TargetOpcode::G_FADD:
+ case TargetOpcode::G_FMUL:
+ case TargetOpcode::G_FSUB:
+ case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FNEG:
+ case TargetOpcode::G_FABS:
+ case TargetOpcode::G_FCANONICALIZE:
+ case TargetOpcode::G_FMINNUM:
+ case TargetOpcode::G_FMAXNUM:
+ case TargetOpcode::G_FMINNUM_IEEE:
+ case TargetOpcode::G_FMAXNUM_IEEE:
+ case TargetOpcode::G_FMINIMUM:
+ case TargetOpcode::G_FMAXIMUM:
+ case TargetOpcode::G_FDIV:
+ case TargetOpcode::G_FREM:
case TargetOpcode::G_FCEIL:
+ case TargetOpcode::G_FFLOOR:
+ case TargetOpcode::G_FCOS:
+ case TargetOpcode::G_FSIN:
+ case TargetOpcode::G_FLOG10:
+ case TargetOpcode::G_FLOG:
+ case TargetOpcode::G_FLOG2:
+ case TargetOpcode::G_FRINT:
+ case TargetOpcode::G_FNEARBYINT:
+ case TargetOpcode::G_FSQRT:
+ case TargetOpcode::G_FEXP:
+ case TargetOpcode::G_FEXP2:
+ case TargetOpcode::G_FPOW:
+ case TargetOpcode::G_INTRINSIC_TRUNC:
+ case TargetOpcode::G_INTRINSIC_ROUND:
+ assert(TypeIdx == 0);
+ Observer.changingInstr(MI);
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
+ widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
+
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_INTTOPTR:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PTRTOINT:
if (TypeIdx != 0)
return UnableToLegalize;
+
Observer.changingInstr(MI);
- widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
- widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+ widenScalarDst(MI, WideTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_BUILD_VECTOR: {
+ Observer.changingInstr(MI);
+
+ const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
+ for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
+ widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
+
+ // Avoid changing the result vector type if the source element type was
+ // requested.
+ if (TypeIdx == 1) {
+ auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ MI.setDesc(TII.get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
+ } else {
+ widenScalarDst(MI, WideTy, 0);
+ }
+
Observer.changedInstr(MI);
return Legalized;
}
+ }
}
LegalizerHelper::LegalizeResult
@@ -976,13 +1566,13 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
case TargetOpcode::G_SREM:
case TargetOpcode::G_UREM: {
- unsigned QuotReg = MRI.createGenericVirtualRegister(Ty);
+ Register QuotReg = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV)
.addDef(QuotReg)
.addUse(MI.getOperand(1).getReg())
.addUse(MI.getOperand(2).getReg());
- unsigned ProdReg = MRI.createGenericVirtualRegister(Ty);
+ Register ProdReg = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildMul(ProdReg, QuotReg, MI.getOperand(2).getReg());
MIRBuilder.buildSub(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
ProdReg);
@@ -993,10 +1583,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
// result.
- unsigned Res = MI.getOperand(0).getReg();
- unsigned Overflow = MI.getOperand(1).getReg();
- unsigned LHS = MI.getOperand(2).getReg();
- unsigned RHS = MI.getOperand(3).getReg();
+ Register Res = MI.getOperand(0).getReg();
+ Register Overflow = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
MIRBuilder.buildMul(Res, LHS, RHS);
@@ -1004,20 +1594,20 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
? TargetOpcode::G_SMULH
: TargetOpcode::G_UMULH;
- unsigned HiPart = MRI.createGenericVirtualRegister(Ty);
+ Register HiPart = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildInstr(Opcode)
.addDef(HiPart)
.addUse(LHS)
.addUse(RHS);
- unsigned Zero = MRI.createGenericVirtualRegister(Ty);
+ Register Zero = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildConstant(Zero, 0);
// For *signed* multiply, overflow is detected by checking:
// (hi != (lo >> bitwidth-1))
if (Opcode == TargetOpcode::G_SMULH) {
- unsigned Shifted = MRI.createGenericVirtualRegister(Ty);
- unsigned ShiftAmt = MRI.createGenericVirtualRegister(Ty);
+ Register Shifted = MRI.createGenericVirtualRegister(Ty);
+ Register ShiftAmt = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildConstant(ShiftAmt, Ty.getSizeInBits() - 1);
MIRBuilder.buildInstr(TargetOpcode::G_ASHR)
.addDef(Shifted)
@@ -1035,7 +1625,7 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// represent them.
if (Ty.isVector())
return UnableToLegalize;
- unsigned Res = MI.getOperand(0).getReg();
+ Register Res = MI.getOperand(0).getReg();
Type *ZeroTy;
LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
switch (Ty.getSizeInBits()) {
@@ -1057,10 +1647,10 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
ConstantFP &ZeroForNegation =
*cast<ConstantFP>(ConstantFP::getZeroValueForNegation(ZeroTy));
auto Zero = MIRBuilder.buildFConstant(Ty, ZeroForNegation);
- MIRBuilder.buildInstr(TargetOpcode::G_FSUB)
- .addDef(Res)
- .addUse(Zero->getOperand(0).getReg())
- .addUse(MI.getOperand(1).getReg());
+ Register SubByReg = MI.getOperand(1).getReg();
+ Register ZeroReg = Zero->getOperand(0).getReg();
+ MIRBuilder.buildInstr(TargetOpcode::G_FSUB, {Res}, {ZeroReg, SubByReg},
+ MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
@@ -1070,24 +1660,21 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
return UnableToLegalize;
- unsigned Res = MI.getOperand(0).getReg();
- unsigned LHS = MI.getOperand(1).getReg();
- unsigned RHS = MI.getOperand(2).getReg();
- unsigned Neg = MRI.createGenericVirtualRegister(Ty);
+ Register Res = MI.getOperand(0).getReg();
+ Register LHS = MI.getOperand(1).getReg();
+ Register RHS = MI.getOperand(2).getReg();
+ Register Neg = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildInstr(TargetOpcode::G_FNEG).addDef(Neg).addUse(RHS);
- MIRBuilder.buildInstr(TargetOpcode::G_FADD)
- .addDef(Res)
- .addUse(LHS)
- .addUse(Neg);
+ MIRBuilder.buildInstr(TargetOpcode::G_FADD, {Res}, {LHS, Neg}, MI.getFlags());
MI.eraseFromParent();
return Legalized;
}
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
- unsigned OldValRes = MI.getOperand(0).getReg();
- unsigned SuccessRes = MI.getOperand(1).getReg();
- unsigned Addr = MI.getOperand(2).getReg();
- unsigned CmpVal = MI.getOperand(3).getReg();
- unsigned NewVal = MI.getOperand(4).getReg();
+ Register OldValRes = MI.getOperand(0).getReg();
+ Register SuccessRes = MI.getOperand(1).getReg();
+ Register Addr = MI.getOperand(2).getReg();
+ Register CmpVal = MI.getOperand(3).getReg();
+ Register NewVal = MI.getOperand(4).getReg();
MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
**MI.memoperands_begin());
MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
@@ -1098,8 +1685,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_SEXTLOAD:
case TargetOpcode::G_ZEXTLOAD: {
// Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned PtrReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
auto &MMO = **MI.memoperands_begin();
@@ -1114,8 +1701,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
if (DstTy.isScalar()) {
- unsigned TmpReg = MRI.createGenericVirtualRegister(
- LLT::scalar(MMO.getSize() /* in bytes */ * 8));
+ Register TmpReg =
+ MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
switch (MI.getOpcode()) {
default:
@@ -1142,15 +1729,27 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case TargetOpcode::G_CTTZ:
case TargetOpcode::G_CTPOP:
return lowerBitCount(MI, TypeIdx, Ty);
+ case G_UADDO: {
+ Register Res = MI.getOperand(0).getReg();
+ Register CarryOut = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+
+ MIRBuilder.buildAdd(Res, LHS, RHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
case G_UADDE: {
- unsigned Res = MI.getOperand(0).getReg();
- unsigned CarryOut = MI.getOperand(1).getReg();
- unsigned LHS = MI.getOperand(2).getReg();
- unsigned RHS = MI.getOperand(3).getReg();
- unsigned CarryIn = MI.getOperand(4).getReg();
+ Register Res = MI.getOperand(0).getReg();
+ Register CarryOut = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ Register CarryIn = MI.getOperand(4).getReg();
- unsigned TmpRes = MRI.createGenericVirtualRegister(Ty);
- unsigned ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
+ Register TmpRes = MRI.createGenericVirtualRegister(Ty);
+ Register ZExtCarryIn = MRI.createGenericVirtualRegister(Ty);
MIRBuilder.buildAdd(TmpRes, LHS, RHS);
MIRBuilder.buildZExt(ZExtCarryIn, CarryIn);
@@ -1160,113 +1759,1325 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case G_USUBO: {
+ Register Res = MI.getOperand(0).getReg();
+ Register BorrowOut = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+
+ MIRBuilder.buildSub(Res, LHS, RHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_USUBE: {
+ Register Res = MI.getOperand(0).getReg();
+ Register BorrowOut = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ Register BorrowIn = MI.getOperand(4).getReg();
+
+ Register TmpRes = MRI.createGenericVirtualRegister(Ty);
+ Register ZExtBorrowIn = MRI.createGenericVirtualRegister(Ty);
+ Register LHS_EQ_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
+ Register LHS_ULT_RHS = MRI.createGenericVirtualRegister(LLT::scalar(1));
+
+ MIRBuilder.buildSub(TmpRes, LHS, RHS);
+ MIRBuilder.buildZExt(ZExtBorrowIn, BorrowIn);
+ MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
+ MIRBuilder.buildICmp(CmpInst::ICMP_EQ, LHS_EQ_RHS, LHS, RHS);
+ MIRBuilder.buildICmp(CmpInst::ICMP_ULT, LHS_ULT_RHS, LHS, RHS);
+ MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_UITOFP:
+ return lowerUITOFP(MI, TypeIdx, Ty);
+ case G_SITOFP:
+ return lowerSITOFP(MI, TypeIdx, Ty);
+ case G_SMIN:
+ case G_SMAX:
+ case G_UMIN:
+ case G_UMAX:
+ return lowerMinMax(MI, TypeIdx, Ty);
+ case G_FCOPYSIGN:
+ return lowerFCopySign(MI, TypeIdx, Ty);
+ case G_FMINNUM:
+ case G_FMAXNUM:
+ return lowerFMinNumMaxNum(MI);
}
}
+LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
+ MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
+ SmallVector<Register, 2> DstRegs;
+
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ Register DstReg = MI.getOperand(0).getReg();
+ unsigned Size = MRI.getType(DstReg).getSizeInBits();
+ int NumParts = Size / NarrowSize;
+ // FIXME: Don't know how to handle the situation where the small vectors
+ // aren't all the same size yet.
+ if (Size % NarrowSize != 0)
+ return UnableToLegalize;
+
+ for (int i = 0; i < NumParts; ++i) {
+ Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildUndef(TmpReg);
+ DstRegs.push_back(TmpReg);
+ }
+
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
- LLT NarrowTy) {
- // FIXME: Don't know how to handle secondary types yet.
+LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ const unsigned Opc = MI.getOpcode();
+ const unsigned NumOps = MI.getNumOperands() - 1;
+ const unsigned NarrowSize = NarrowTy.getSizeInBits();
+ const Register DstReg = MI.getOperand(0).getReg();
+ const unsigned Flags = MI.getFlags();
+ const LLT DstTy = MRI.getType(DstReg);
+ const unsigned Size = DstTy.getSizeInBits();
+ const int NumParts = Size / NarrowSize;
+ const LLT EltTy = DstTy.getElementType();
+ const unsigned EltSize = EltTy.getSizeInBits();
+ const unsigned BitsForNumParts = NarrowSize * NumParts;
+
+ // Check if we have any leftovers. If we do, then only handle the case where
+ // the leftover is one element.
+ if (BitsForNumParts != Size && BitsForNumParts + EltSize != Size)
+ return UnableToLegalize;
+
+ if (BitsForNumParts != Size) {
+ Register AccumDstReg = MRI.createGenericVirtualRegister(DstTy);
+ MIRBuilder.buildUndef(AccumDstReg);
+
+ // Handle the pieces which evenly divide into the requested type with
+ // extract/op/insert sequence.
+ for (unsigned Offset = 0; Offset < BitsForNumParts; Offset += NarrowSize) {
+ SmallVector<SrcOp, 4> SrcOps;
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ Register PartOpReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(), Offset);
+ SrcOps.push_back(PartOpReg);
+ }
+
+ Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
+
+ Register PartInsertReg = MRI.createGenericVirtualRegister(DstTy);
+ MIRBuilder.buildInsert(PartInsertReg, AccumDstReg, PartDstReg, Offset);
+ AccumDstReg = PartInsertReg;
+ }
+
+ // Handle the remaining element sized leftover piece.
+ SmallVector<SrcOp, 4> SrcOps;
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ Register PartOpReg = MRI.createGenericVirtualRegister(EltTy);
+ MIRBuilder.buildExtract(PartOpReg, MI.getOperand(I).getReg(),
+ BitsForNumParts);
+ SrcOps.push_back(PartOpReg);
+ }
+
+ Register PartDstReg = MRI.createGenericVirtualRegister(EltTy);
+ MIRBuilder.buildInstr(Opc, {PartDstReg}, SrcOps, Flags);
+ MIRBuilder.buildInsert(DstReg, AccumDstReg, PartDstReg, BitsForNumParts);
+ MI.eraseFromParent();
+
+ return Legalized;
+ }
+
+ SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
+
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src0Regs);
+
+ if (NumOps >= 2)
+ extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src1Regs);
+
+ if (NumOps >= 3)
+ extractParts(MI.getOperand(3).getReg(), NarrowTy, NumParts, Src2Regs);
+
+ for (int i = 0; i < NumParts; ++i) {
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+
+ if (NumOps == 1)
+ MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i]}, Flags);
+ else if (NumOps == 2) {
+ MIRBuilder.buildInstr(Opc, {DstReg}, {Src0Regs[i], Src1Regs[i]}, Flags);
+ } else if (NumOps == 3) {
+ MIRBuilder.buildInstr(Opc, {DstReg},
+ {Src0Regs[i], Src1Regs[i], Src2Regs[i]}, Flags);
+ }
+
+ DstRegs.push_back(DstReg);
+ }
+
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+// Handle splitting vector operations which need to have the same number of
+// elements in each type index, but each type index may have a different element
+// type.
+//
+// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
+// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+//
+// Also handles some irregular breakdown cases, e.g.
+// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
+// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
+// s64 = G_SHL s64, s32
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorMultiEltType(
+ MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
if (TypeIdx != 0)
return UnableToLegalize;
- MIRBuilder.setInstr(MI);
- switch (MI.getOpcode()) {
- default:
+ const LLT NarrowTy0 = NarrowTyArg;
+ const unsigned NewNumElts =
+ NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
+
+ const Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT LeftoverTy0;
+
+ // All of the operands need to have the same number of elements, so if we can
+ // determine a type breakdown for the result type, we can for all of the
+ // source types.
+ int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
+ if (NumParts < 0)
return UnableToLegalize;
- case TargetOpcode::G_IMPLICIT_DEF: {
- SmallVector<unsigned, 2> DstRegs;
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned Size = MRI.getType(DstReg).getSizeInBits();
- int NumParts = Size / NarrowSize;
- // FIXME: Don't know how to handle the situation where the small vectors
- // aren't all the same size yet.
- if (Size % NarrowSize != 0)
+ SmallVector<MachineInstrBuilder, 4> NewInsts;
+
+ SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
+ SmallVector<Register, 4> PartRegs, LeftoverRegs;
+
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
+ LLT LeftoverTy;
+ Register SrcReg = MI.getOperand(I).getReg();
+ LLT SrcTyI = MRI.getType(SrcReg);
+ LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
+ LLT LeftoverTyI;
+
+ // Split this operand into the requested typed registers, and any leftover
+ // required to reproduce the original type.
+ if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
+ LeftoverRegs))
return UnableToLegalize;
- for (int i = 0; i < NumParts; ++i) {
- unsigned TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildUndef(TmpReg);
- DstRegs.push_back(TmpReg);
+ if (I == 1) {
+ // For the first operand, create an instruction for each part and setup
+ // the result.
+ for (Register PartReg : PartRegs) {
+ Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+ NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
+ .addDef(PartDstReg)
+ .addUse(PartReg));
+ DstRegs.push_back(PartDstReg);
+ }
+
+ for (Register LeftoverReg : LeftoverRegs) {
+ Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
+ NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
+ .addDef(PartDstReg)
+ .addUse(LeftoverReg));
+ LeftoverDstRegs.push_back(PartDstReg);
+ }
+ } else {
+ assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
+
+ // Add the newly created operand splits to the existing instructions. The
+ // odd-sized pieces are ordered after the requested NarrowTyArg sized
+ // pieces.
+ unsigned InstCount = 0;
+ for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
+ NewInsts[InstCount++].addUse(PartRegs[J]);
+ for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
+ NewInsts[InstCount++].addUse(LeftoverRegs[J]);
}
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- else
- MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ PartRegs.clear();
+ LeftoverRegs.clear();
+ }
- MI.eraseFromParent();
- return Legalized;
+ // Insert the newly built operations and rebuild the result register.
+ for (auto &MIB : NewInsts)
+ MIRBuilder.insertInstr(MIB);
+
+ insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ LLT NarrowTy0 = NarrowTy;
+ LLT NarrowTy1;
+ unsigned NumParts;
+
+ if (NarrowTy.isVector()) {
+ // Uneven breakdown not handled.
+ NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
+ if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
+ return UnableToLegalize;
+
+ NarrowTy1 = LLT::vector(NumParts, SrcTy.getElementType().getSizeInBits());
+ } else {
+ NumParts = DstTy.getNumElements();
+ NarrowTy1 = SrcTy.getElementType();
}
- case TargetOpcode::G_ADD: {
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned Size = MRI.getType(DstReg).getSizeInBits();
- int NumParts = Size / NarrowSize;
+
+ SmallVector<Register, 4> SrcRegs, DstRegs;
+ extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
+
+ for (unsigned I = 0; I < NumParts; ++I) {
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+ MachineInstr *NewInst = MIRBuilder.buildInstr(MI.getOpcode())
+ .addDef(DstReg)
+ .addUse(SrcRegs[I]);
+
+ NewInst->setFlags(MI.getFlags());
+ DstRegs.push_back(DstReg);
+ }
+
+ if (NarrowTy.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0Reg = MI.getOperand(2).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = MRI.getType(Src0Reg);
+
+ unsigned NumParts;
+ LLT NarrowTy0, NarrowTy1;
+
+ if (TypeIdx == 0) {
+ unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+ unsigned OldElts = DstTy.getNumElements();
+
+ NarrowTy0 = NarrowTy;
+ NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
+ NarrowTy1 = NarrowTy.isVector() ?
+ LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
+ SrcTy.getElementType();
+
+ } else {
+ unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+ unsigned OldElts = SrcTy.getNumElements();
+
+ NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
+ NarrowTy.getNumElements();
+ NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
+ DstTy.getScalarSizeInBits());
+ NarrowTy1 = NarrowTy;
+ }
+
+ // FIXME: Don't know how to handle the situation where the small vectors
+ // aren't all the same size yet.
+ if (NarrowTy1.isVector() &&
+ NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
+ return UnableToLegalize;
+
+ CmpInst::Predicate Pred
+ = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+
+ SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
+ extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
+ extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
+
+ for (unsigned I = 0; I < NumParts; ++I) {
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+ DstRegs.push_back(DstReg);
+
+ if (MI.getOpcode() == TargetOpcode::G_ICMP)
+ MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
+ else {
+ MachineInstr *NewCmp
+ = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
+ NewCmp->setFlags(MI.getFlags());
+ }
+ }
+
+ if (NarrowTy1.isVector())
+ MIRBuilder.buildConcatVectors(DstReg, DstRegs);
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register CondReg = MI.getOperand(1).getReg();
+
+ unsigned NumParts = 0;
+ LLT NarrowTy0, NarrowTy1;
+
+ LLT DstTy = MRI.getType(DstReg);
+ LLT CondTy = MRI.getType(CondReg);
+ unsigned Size = DstTy.getSizeInBits();
+
+ assert(TypeIdx == 0 || CondTy.isVector());
+
+ if (TypeIdx == 0) {
+ NarrowTy0 = NarrowTy;
+ NarrowTy1 = CondTy;
+
+ unsigned NarrowSize = NarrowTy0.getSizeInBits();
// FIXME: Don't know how to handle the situation where the small vectors
// aren't all the same size yet.
if (Size % NarrowSize != 0)
return UnableToLegalize;
- SmallVector<unsigned, 2> Src1Regs, Src2Regs, DstRegs;
- extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
- extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
+ NumParts = Size / NarrowSize;
- for (int i = 0; i < NumParts; ++i) {
- unsigned DstReg = MRI.createGenericVirtualRegister(NarrowTy);
- MIRBuilder.buildAdd(DstReg, Src1Regs[i], Src2Regs[i]);
- DstRegs.push_back(DstReg);
+ // Need to break down the condition type
+ if (CondTy.isVector()) {
+ if (CondTy.getNumElements() == NumParts)
+ NarrowTy1 = CondTy.getElementType();
+ else
+ NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
+ CondTy.getScalarSizeInBits());
+ }
+ } else {
+ NumParts = CondTy.getNumElements();
+ if (NarrowTy.isVector()) {
+ // TODO: Handle uneven breakdown.
+ if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
+ return UnableToLegalize;
+
+ return UnableToLegalize;
+ } else {
+ NarrowTy0 = DstTy.getElementType();
+ NarrowTy1 = NarrowTy;
}
+ }
+
+ SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
+ if (CondTy.isVector())
+ extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
+
+ extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
+ extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
+ for (unsigned i = 0; i < NumParts; ++i) {
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
+ MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
+ Src1Regs[i], Src2Regs[i]);
+ DstRegs.push_back(DstReg);
+ }
+
+ if (NarrowTy0.isVector())
MIRBuilder.buildConcatVectors(DstReg, DstRegs);
- MI.eraseFromParent();
- return Legalized;
+ else
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ const Register DstReg = MI.getOperand(0).getReg();
+ LLT PhiTy = MRI.getType(DstReg);
+ LLT LeftoverTy;
+
+ // All of the operands need to have the same number of elements, so if we can
+ // determine a type breakdown for the result type, we can for all of the
+ // source types.
+ int NumParts, NumLeftover;
+ std::tie(NumParts, NumLeftover)
+ = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
+ if (NumParts < 0)
+ return UnableToLegalize;
+
+ SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
+ SmallVector<MachineInstrBuilder, 4> NewInsts;
+
+ const int TotalNumParts = NumParts + NumLeftover;
+
+ // Insert the new phis in the result block first.
+ for (int I = 0; I != TotalNumParts; ++I) {
+ LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
+ Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
+ NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
+ .addDef(PartDstReg));
+ if (I < NumParts)
+ DstRegs.push_back(PartDstReg);
+ else
+ LeftoverDstRegs.push_back(PartDstReg);
}
- case TargetOpcode::G_LOAD:
- case TargetOpcode::G_STORE: {
- bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
- unsigned ValReg = MI.getOperand(0).getReg();
- unsigned AddrReg = MI.getOperand(1).getReg();
- unsigned NarrowSize = NarrowTy.getSizeInBits();
- unsigned Size = MRI.getType(ValReg).getSizeInBits();
- unsigned NumParts = Size / NarrowSize;
-
- SmallVector<unsigned, 8> NarrowRegs;
- if (!IsLoad)
- extractParts(ValReg, NarrowTy, NumParts, NarrowRegs);
-
- const LLT OffsetTy =
- LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
- MachineFunction &MF = *MI.getMF();
- MachineMemOperand *MMO = *MI.memoperands_begin();
- for (unsigned Idx = 0; Idx < NumParts; ++Idx) {
- unsigned Adjustment = Idx * NarrowTy.getSizeInBits() / 8;
- unsigned Alignment = MinAlign(MMO->getAlignment(), Adjustment);
- unsigned NewAddrReg = 0;
- MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, Adjustment);
- MachineMemOperand &NewMMO = *MF.getMachineMemOperand(
- MMO->getPointerInfo().getWithOffset(Adjustment), MMO->getFlags(),
- NarrowTy.getSizeInBits() / 8, Alignment);
+
+ MachineBasicBlock *MBB = MI.getParent();
+ MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
+ insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
+
+ SmallVector<Register, 4> PartRegs, LeftoverRegs;
+
+ // Insert code to extract the incoming values in each predecessor block.
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+ PartRegs.clear();
+ LeftoverRegs.clear();
+
+ Register SrcReg = MI.getOperand(I).getReg();
+ MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+
+ LLT Unused;
+ if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
+ LeftoverRegs))
+ return UnableToLegalize;
+
+ // Add the newly created operand splits to the existing instructions. The
+ // odd-sized pieces are ordered after the requested NarrowTyArg sized
+ // pieces.
+ for (int J = 0; J != TotalNumParts; ++J) {
+ MachineInstrBuilder MIB = NewInsts[J];
+ MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
+ MIB.addMBB(&OpMBB);
+ }
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ // FIXME: Don't know how to handle secondary types yet.
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ MachineMemOperand *MMO = *MI.memoperands_begin();
+
+ // This implementation doesn't work for atomics. Give up instead of doing
+ // something invalid.
+ if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
+ MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
+ return UnableToLegalize;
+
+ bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
+ Register ValReg = MI.getOperand(0).getReg();
+ Register AddrReg = MI.getOperand(1).getReg();
+ LLT ValTy = MRI.getType(ValReg);
+
+ int NumParts = -1;
+ int NumLeftover = -1;
+ LLT LeftoverTy;
+ SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
+ if (IsLoad) {
+ std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
+ } else {
+ if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
+ NarrowLeftoverRegs)) {
+ NumParts = NarrowRegs.size();
+ NumLeftover = NarrowLeftoverRegs.size();
+ }
+ }
+
+ if (NumParts == -1)
+ return UnableToLegalize;
+
+ const LLT OffsetTy = LLT::scalar(MRI.getType(AddrReg).getScalarSizeInBits());
+
+ unsigned TotalSize = ValTy.getSizeInBits();
+
+ // Split the load/store into PartTy sized pieces starting at Offset. If this
+ // is a load, return the new registers in ValRegs. For a store, each elements
+ // of ValRegs should be PartTy. Returns the next offset that needs to be
+ // handled.
+ auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
+ unsigned Offset) -> unsigned {
+ MachineFunction &MF = MIRBuilder.getMF();
+ unsigned PartSize = PartTy.getSizeInBits();
+ for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
+ Offset += PartSize, ++Idx) {
+ unsigned ByteSize = PartSize / 8;
+ unsigned ByteOffset = Offset / 8;
+ Register NewAddrReg;
+
+ MIRBuilder.materializeGEP(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
+
+ MachineMemOperand *NewMMO =
+ MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
+
if (IsLoad) {
- unsigned Dst = MRI.createGenericVirtualRegister(NarrowTy);
- NarrowRegs.push_back(Dst);
- MIRBuilder.buildLoad(Dst, NewAddrReg, NewMMO);
+ Register Dst = MRI.createGenericVirtualRegister(PartTy);
+ ValRegs.push_back(Dst);
+ MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
} else {
- MIRBuilder.buildStore(NarrowRegs[Idx], NewAddrReg, NewMMO);
+ MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
}
}
- if (IsLoad) {
- if (NarrowTy.isVector())
- MIRBuilder.buildConcatVectors(ValReg, NarrowRegs);
- else
- MIRBuilder.buildBuildVector(ValReg, NarrowRegs);
- }
+
+ return Offset;
+ };
+
+ unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
+
+ // Handle the rest of the register if this isn't an even type breakdown.
+ if (LeftoverTy.isValid())
+ splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
+
+ if (IsLoad) {
+ insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
+ LeftoverTy, NarrowLeftoverRegs);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ using namespace TargetOpcode;
+
+ MIRBuilder.setInstr(MI);
+ switch (MI.getOpcode()) {
+ case G_IMPLICIT_DEF:
+ return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
+ case G_AND:
+ case G_OR:
+ case G_XOR:
+ case G_ADD:
+ case G_SUB:
+ case G_MUL:
+ case G_SMULH:
+ case G_UMULH:
+ case G_FADD:
+ case G_FMUL:
+ case G_FSUB:
+ case G_FNEG:
+ case G_FABS:
+ case G_FCANONICALIZE:
+ case G_FDIV:
+ case G_FREM:
+ case G_FMA:
+ case G_FPOW:
+ case G_FEXP:
+ case G_FEXP2:
+ case G_FLOG:
+ case G_FLOG2:
+ case G_FLOG10:
+ case G_FNEARBYINT:
+ case G_FCEIL:
+ case G_FFLOOR:
+ case G_FRINT:
+ case G_INTRINSIC_ROUND:
+ case G_INTRINSIC_TRUNC:
+ case G_FCOS:
+ case G_FSIN:
+ case G_FSQRT:
+ case G_BSWAP:
+ case G_SDIV:
+ case G_SMIN:
+ case G_SMAX:
+ case G_UMIN:
+ case G_UMAX:
+ case G_FMINNUM:
+ case G_FMAXNUM:
+ case G_FMINNUM_IEEE:
+ case G_FMAXNUM_IEEE:
+ case G_FMINIMUM:
+ case G_FMAXIMUM:
+ return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
+ case G_SHL:
+ case G_LSHR:
+ case G_ASHR:
+ case G_CTLZ:
+ case G_CTLZ_ZERO_UNDEF:
+ case G_CTTZ:
+ case G_CTTZ_ZERO_UNDEF:
+ case G_CTPOP:
+ case G_FCOPYSIGN:
+ return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
+ case G_ZEXT:
+ case G_SEXT:
+ case G_ANYEXT:
+ case G_FPEXT:
+ case G_FPTRUNC:
+ case G_SITOFP:
+ case G_UITOFP:
+ case G_FPTOSI:
+ case G_FPTOUI:
+ case G_INTTOPTR:
+ case G_PTRTOINT:
+ case G_ADDRSPACE_CAST:
+ return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
+ case G_ICMP:
+ case G_FCMP:
+ return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
+ case G_SELECT:
+ return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
+ case G_PHI:
+ return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
+ case G_LOAD:
+ case G_STORE:
+ return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
+ default:
+ return UnableToLegalize;
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
+ const LLT HalfTy, const LLT AmtTy) {
+
+ Register InL = MRI.createGenericVirtualRegister(HalfTy);
+ Register InH = MRI.createGenericVirtualRegister(HalfTy);
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+
+ if (Amt.isNullValue()) {
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {InL, InH});
MI.eraseFromParent();
return Legalized;
}
+
+ LLT NVT = HalfTy;
+ unsigned NVTBits = HalfTy.getSizeInBits();
+ unsigned VTBits = 2 * NVTBits;
+
+ SrcOp Lo(Register(0)), Hi(Register(0));
+ if (MI.getOpcode() == TargetOpcode::G_SHL) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildConstant(NVT, 0);
+ Hi = MIRBuilder.buildShl(NVT, InL,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ } else if (Amt == NVTBits) {
+ Lo = MIRBuilder.buildConstant(NVT, 0);
+ Hi = InL;
+ } else {
+ Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
+ auto OrLHS =
+ MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
+ auto OrRHS = MIRBuilder.buildLShr(
+ NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+ Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ }
+ } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ if (Amt.ugt(VTBits)) {
+ Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildLShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = MIRBuilder.buildConstant(NVT, 0);
+ } else {
+ auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+ auto OrRHS = MIRBuilder.buildShl(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+ Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
+ }
+ } else {
+ if (Amt.ugt(VTBits)) {
+ Hi = Lo = MIRBuilder.buildAShr(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else if (Amt.ugt(NVTBits)) {
+ Lo = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
+ Hi = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else if (Amt == NVTBits) {
+ Lo = InH;
+ Hi = MIRBuilder.buildAShr(NVT, InH,
+ MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
+ } else {
+ auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
+ auto OrRHS = MIRBuilder.buildShl(
+ NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
+
+ Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
+ Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
+ }
}
+
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {Lo.getReg(), Hi.getReg()});
+ MI.eraseFromParent();
+
+ return Legalized;
+}
+
+// TODO: Optimize if constant shift amount.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
+ LLT RequestedTy) {
+ if (TypeIdx == 1) {
+ Observer.changingInstr(MI);
+ narrowScalarSrc(MI, RequestedTy, 2);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ if (DstTy.isVector())
+ return UnableToLegalize;
+
+ Register Amt = MI.getOperand(2).getReg();
+ LLT ShiftAmtTy = MRI.getType(Amt);
+ const unsigned DstEltSize = DstTy.getScalarSizeInBits();
+ if (DstEltSize % 2 != 0)
+ return UnableToLegalize;
+
+ // Ignore the input type. We can only go to exactly half the size of the
+ // input. If that isn't small enough, the resulting pieces will be further
+ // legalized.
+ const unsigned NewBitSize = DstEltSize / 2;
+ const LLT HalfTy = LLT::scalar(NewBitSize);
+ const LLT CondTy = LLT::scalar(1);
+
+ if (const MachineInstr *KShiftAmt =
+ getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
+ return narrowScalarShiftByConstant(
+ MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
+ }
+
+ // TODO: Expand with known bits.
+
+ // Handle the fully general expansion by an unknown amount.
+ auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
+
+ Register InL = MRI.createGenericVirtualRegister(HalfTy);
+ Register InH = MRI.createGenericVirtualRegister(HalfTy);
+ MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1).getReg());
+
+ auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
+ auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
+
+ auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
+ auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
+ auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
+
+ Register ResultRegs[2];
+ switch (MI.getOpcode()) {
+ case TargetOpcode::G_SHL: {
+ // Short: ShAmt < NewBitSize
+ auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+
+ auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+ auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
+ auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+ // Long: ShAmt >= NewBitSize
+ auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
+ auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
+
+ auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
+ auto Hi = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ case TargetOpcode::G_LSHR: {
+ // Short: ShAmt < NewBitSize
+ auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+ auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
+ auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+ // Long: ShAmt >= NewBitSize
+ auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
+ auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+
+ auto Lo = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
+ auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ case TargetOpcode::G_ASHR: {
+ // Short: ShAmt < NewBitSize
+ auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
+
+ auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+ auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
+ auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+
+ // Long: ShAmt >= NewBitSize
+
+ // Sign of Hi part.
+ auto HiL = MIRBuilder.buildAShr(
+ HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
+
+ auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+
+ auto Lo = MIRBuilder.buildSelect(
+ HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
+
+ auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
+
+ ResultRegs[0] = Lo.getReg(0);
+ ResultRegs[1] = Hi.getReg(0);
+ break;
+ }
+ default:
+ llvm_unreachable("not a shift");
+ }
+
+ MIRBuilder.buildMerge(DstReg, ResultRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
+ LLT MoreTy) {
+ assert(TypeIdx == 0 && "Expecting only Idx 0");
+
+ Observer.changingInstr(MI);
+ for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
+ MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
+ MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
+ moreElementsVectorSrc(MI, MoreTy, I);
+ }
+
+ MachineBasicBlock &MBB = *MI.getParent();
+ MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
+ LLT MoreTy) {
+ MIRBuilder.setInstr(MI);
+ unsigned Opc = MI.getOpcode();
+ switch (Opc) {
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ Observer.changingInstr(MI);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_AND:
+ case TargetOpcode::G_OR:
+ case TargetOpcode::G_XOR:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX: {
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+ case TargetOpcode::G_EXTRACT:
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_INSERT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 1);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_SELECT:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ if (MRI.getType(MI.getOperand(1).getReg()).isVector())
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 2);
+ moreElementsVectorSrc(MI, MoreTy, 3);
+ moreElementsVectorDst(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
+ case TargetOpcode::G_PHI:
+ return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
+ default:
+ return UnableToLegalize;
+ }
+}
+
+void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
+ ArrayRef<Register> Src1Regs,
+ ArrayRef<Register> Src2Regs,
+ LLT NarrowTy) {
+ MachineIRBuilder &B = MIRBuilder;
+ unsigned SrcParts = Src1Regs.size();
+ unsigned DstParts = DstRegs.size();
+
+ unsigned DstIdx = 0; // Low bits of the result.
+ Register FactorSum =
+ B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
+ DstRegs[DstIdx] = FactorSum;
+
+ unsigned CarrySumPrevDstIdx;
+ SmallVector<Register, 4> Factors;
+
+ for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
+ // Collect low parts of muls for DstIdx.
+ for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
+ i <= std::min(DstIdx, SrcParts - 1); ++i) {
+ MachineInstrBuilder Mul =
+ B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
+ Factors.push_back(Mul.getReg(0));
+ }
+ // Collect high parts of muls from previous DstIdx.
+ for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
+ i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
+ MachineInstrBuilder Umulh =
+ B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
+ Factors.push_back(Umulh.getReg(0));
+ }
+ // Add CarrySum from additons calculated for previous DstIdx.
+ if (DstIdx != 1) {
+ Factors.push_back(CarrySumPrevDstIdx);
+ }
+
+ Register CarrySum;
+ // Add all factors and accumulate all carries into CarrySum.
+ if (DstIdx != DstParts - 1) {
+ MachineInstrBuilder Uaddo =
+ B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
+ FactorSum = Uaddo.getReg(0);
+ CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
+ for (unsigned i = 2; i < Factors.size(); ++i) {
+ MachineInstrBuilder Uaddo =
+ B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
+ FactorSum = Uaddo.getReg(0);
+ MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
+ CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
+ }
+ } else {
+ // Since value for the next index is not calculated, neither is CarrySum.
+ FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
+ for (unsigned i = 2; i < Factors.size(); ++i)
+ FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
+ }
+
+ CarrySumPrevDstIdx = CarrySum;
+ DstRegs[DstIdx] = FactorSum;
+ Factors.clear();
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src1 = MI.getOperand(1).getReg();
+ Register Src2 = MI.getOperand(2).getReg();
+
+ LLT Ty = MRI.getType(DstReg);
+ if (Ty.isVector())
+ return UnableToLegalize;
+
+ unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
+ unsigned DstSize = Ty.getSizeInBits();
+ unsigned NarrowSize = NarrowTy.getSizeInBits();
+ if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
+ return UnableToLegalize;
+
+ unsigned NumDstParts = DstSize / NarrowSize;
+ unsigned NumSrcParts = SrcSize / NarrowSize;
+ bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
+ unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
+
+ SmallVector<Register, 2> Src1Parts, Src2Parts, DstTmpRegs;
+ extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
+ extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
+ DstTmpRegs.resize(DstTmpParts);
+ multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
+
+ // Take only high half of registers if this is high mul.
+ ArrayRef<Register> DstRegs(
+ IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
+
+ int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ // FIXME: add support for when SizeOp1 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp1 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp1 / NarrowSize;
+
+ SmallVector<Register, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ Register OpReg = MI.getOperand(0).getReg();
+ uint64_t OpStart = MI.getOperand(2).getImm();
+ uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned SrcStart = i * NarrowSize;
+
+ if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
+ // No part of the extract uses this subregister, ignore it.
+ continue;
+ } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is extracted, forward the value.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset;
+ uint64_t SegSize;
+ if (OpStart < SrcStart) {
+ ExtractOffset = 0;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
+ } else {
+ ExtractOffset = OpStart - SrcStart;
+ SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
+ }
+
+ Register SegReg = SrcRegs[i];
+ if (ExtractOffset != 0 || SegSize != NarrowSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
+ }
+
+ DstRegs.push_back(SegReg);
+ }
+
+ Register DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ // FIXME: Don't know how to handle secondary types yet.
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ uint64_t NarrowSize = NarrowTy.getSizeInBits();
+
+ // FIXME: add support for when SizeOp0 isn't an exact multiple of
+ // NarrowSize.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+
+ int NumParts = SizeOp0 / NarrowSize;
+
+ SmallVector<Register, 2> SrcRegs, DstRegs;
+ SmallVector<uint64_t, 2> Indexes;
+ extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
+
+ Register OpReg = MI.getOperand(2).getReg();
+ uint64_t OpStart = MI.getOperand(3).getImm();
+ uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
+ for (int i = 0; i < NumParts; ++i) {
+ unsigned DstStart = i * NarrowSize;
+
+ if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
+ // No part of the insert affects this subregister, forward the original.
+ DstRegs.push_back(SrcRegs[i]);
+ continue;
+ } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
+ // The entire subregister is defined by this insert, forward the new
+ // value.
+ DstRegs.push_back(OpReg);
+ continue;
+ }
+
+ // OpSegStart is where this destination segment would start in OpReg if it
+ // extended infinitely in both directions.
+ int64_t ExtractOffset, InsertOffset;
+ uint64_t SegSize;
+ if (OpStart < DstStart) {
+ InsertOffset = 0;
+ ExtractOffset = DstStart - OpStart;
+ SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
+ } else {
+ InsertOffset = OpStart - DstStart;
+ ExtractOffset = 0;
+ SegSize =
+ std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
+ }
+
+ Register SegReg = OpReg;
+ if (ExtractOffset != 0 || SegSize != OpSize) {
+ // A genuine extract is needed.
+ SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
+ MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
+ }
+
+ Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
+ DstRegs.push_back(DstReg);
+ }
+
+ assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
+ Register DstReg = MI.getOperand(0).getReg();
+ if(MRI.getType(DstReg).isVector())
+ MIRBuilder.buildBuildVector(DstReg, DstRegs);
+ else
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ assert(MI.getNumOperands() == 3 && TypeIdx == 0);
+
+ SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
+ SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
+ SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
+ LLT LeftoverTy;
+ if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
+ Src0Regs, Src0LeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused;
+ if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
+ Src1Regs, Src1LeftoverRegs))
+ llvm_unreachable("inconsistent extractParts result");
+
+ for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
+ auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
+ {Src0Regs[I], Src1Regs[I]});
+ DstRegs.push_back(Inst->getOperand(0).getReg());
+ }
+
+ for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
+ auto Inst = MIRBuilder.buildInstr(
+ MI.getOpcode(),
+ {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
+ DstLeftoverRegs.push_back(Inst->getOperand(0).getReg());
+ }
+
+ insertParts(DstReg, DstTy, NarrowTy, DstRegs,
+ LeftoverTy, DstLeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register CondReg = MI.getOperand(1).getReg();
+ LLT CondTy = MRI.getType(CondReg);
+ if (CondTy.isVector()) // TODO: Handle vselect
+ return UnableToLegalize;
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+
+ SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
+ SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
+ SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
+ LLT LeftoverTy;
+ if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
+ Src1Regs, Src1LeftoverRegs))
+ return UnableToLegalize;
+
+ LLT Unused;
+ if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
+ Src2Regs, Src2LeftoverRegs))
+ llvm_unreachable("inconsistent extractParts result");
+
+ for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
+ auto Select = MIRBuilder.buildSelect(NarrowTy,
+ CondReg, Src1Regs[I], Src2Regs[I]);
+ DstRegs.push_back(Select->getOperand(0).getReg());
+ }
+
+ for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
+ auto Select = MIRBuilder.buildSelect(
+ LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
+ DstLeftoverRegs.push_back(Select->getOperand(0).getReg());
+ }
+
+ insertParts(DstReg, DstTy, NarrowTy, DstRegs,
+ LeftoverTy, DstLeftoverRegs);
+
+ MI.eraseFromParent();
+ return Legalized;
}
LegalizerHelper::LegalizeResult
@@ -1288,9 +3099,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_CTLZ: {
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
unsigned Len = Ty.getSizeInBits();
- if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty}})) {
+ if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {Ty, Ty}})) {
// If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
auto MIBCtlzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTLZ_ZERO_UNDEF,
{Ty}, {SrcReg});
@@ -1314,7 +3125,7 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
// return Len - popcount(x);
//
// Ref: "Hacker's Delight" by Henry Warren
- unsigned Op = SrcReg;
+ Register Op = SrcReg;
unsigned NewLen = PowerOf2Ceil(Len);
for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
auto MIBShiftAmt = MIRBuilder.buildConstant(Ty, 1ULL << i);
@@ -1338,9 +3149,9 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return Legalized;
}
case TargetOpcode::G_CTTZ: {
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
unsigned Len = Ty.getSizeInBits();
- if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty}})) {
+ if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {Ty, Ty}})) {
// If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
// zero.
auto MIBCttzZU = MIRBuilder.buildInstr(TargetOpcode::G_CTTZ_ZERO_UNDEF,
@@ -1365,8 +3176,8 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
TargetOpcode::G_AND, {Ty},
{MIBNot, MIRBuilder.buildInstr(TargetOpcode::G_ADD, {Ty},
{SrcReg, MIBCstNeg1})});
- if (!isSupported({TargetOpcode::G_CTPOP, {Ty}}) &&
- isSupported({TargetOpcode::G_CTLZ, {Ty}})) {
+ if (!isSupported({TargetOpcode::G_CTPOP, {Ty, Ty}}) &&
+ isSupported({TargetOpcode::G_CTLZ, {Ty, Ty}})) {
auto MIBCstLen = MIRBuilder.buildConstant(Ty, Len);
MIRBuilder.buildInstr(
TargetOpcode::G_SUB, {MI.getOperand(0).getReg()},
@@ -1381,3 +3192,230 @@ LegalizerHelper::lowerBitCount(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
}
}
}
+
+// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
+// representation.
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
+
+ // unsigned cul2f(ulong u) {
+ // uint lz = clz(u);
+ // uint e = (u != 0) ? 127U + 63U - lz : 0;
+ // u = (u << lz) & 0x7fffffffffffffffUL;
+ // ulong t = u & 0xffffffffffUL;
+ // uint v = (e << 23) | (uint)(u >> 40);
+ // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
+ // return as_float(v + r);
+ // }
+
+ auto Zero32 = MIRBuilder.buildConstant(S32, 0);
+ auto Zero64 = MIRBuilder.buildConstant(S64, 0);
+
+ auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
+
+ auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
+ auto Sub = MIRBuilder.buildSub(S32, K, LZ);
+
+ auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
+ auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
+
+ auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
+ auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
+
+ auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
+
+ auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
+ auto T = MIRBuilder.buildAnd(S64, U, Mask1);
+
+ auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
+ auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
+ auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
+
+ auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
+ auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
+ auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
+ auto One = MIRBuilder.buildConstant(S32, 1);
+
+ auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
+ auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
+ auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
+ MIRBuilder.buildAdd(Dst, V, R);
+
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerUITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (SrcTy != LLT::scalar(64))
+ return UnableToLegalize;
+
+ if (DstTy == LLT::scalar(32)) {
+ // TODO: SelectionDAG has several alternative expansions to port which may
+ // be more reasonble depending on the available instructions. If a target
+ // has sitofp, does not have CTLZ, or can efficiently use f64 as an
+ // intermediate type, this is probably worse.
+ return lowerU64ToF32BitOps(MI);
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+ const LLT S1 = LLT::scalar(1);
+
+ if (SrcTy != S64)
+ return UnableToLegalize;
+
+ if (DstTy == S32) {
+ // signed cl2f(long l) {
+ // long s = l >> 63;
+ // float r = cul2f((l + s) ^ s);
+ // return s ? -r : r;
+ // }
+ Register L = Src;
+ auto SignBit = MIRBuilder.buildConstant(S64, 63);
+ auto S = MIRBuilder.buildAShr(S64, L, SignBit);
+
+ auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
+ auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
+ auto R = MIRBuilder.buildUITOFP(S32, Xor);
+
+ auto RNeg = MIRBuilder.buildFNeg(S32, R);
+ auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
+ MIRBuilder.buildConstant(S64, 0));
+ MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
+ switch (Opc) {
+ case TargetOpcode::G_SMIN:
+ return CmpInst::ICMP_SLT;
+ case TargetOpcode::G_SMAX:
+ return CmpInst::ICMP_SGT;
+ case TargetOpcode::G_UMIN:
+ return CmpInst::ICMP_ULT;
+ case TargetOpcode::G_UMAX:
+ return CmpInst::ICMP_UGT;
+ default:
+ llvm_unreachable("not in integer min/max");
+ }
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerMinMax(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+
+ const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
+ LLT CmpType = MRI.getType(Dst).changeElementSize(1);
+
+ auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
+ MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFCopySign(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+
+ const LLT Src0Ty = MRI.getType(Src0);
+ const LLT Src1Ty = MRI.getType(Src1);
+
+ const int Src0Size = Src0Ty.getScalarSizeInBits();
+ const int Src1Size = Src1Ty.getScalarSizeInBits();
+
+ auto SignBitMask = MIRBuilder.buildConstant(
+ Src0Ty, APInt::getSignMask(Src0Size));
+
+ auto NotSignBitMask = MIRBuilder.buildConstant(
+ Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
+
+ auto And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask);
+ MachineInstr *Or;
+
+ if (Src0Ty == Src1Ty) {
+ auto And1 = MIRBuilder.buildAnd(Src1Ty, Src0, SignBitMask);
+ Or = MIRBuilder.buildOr(Dst, And0, And1);
+ } else if (Src0Size > Src1Size) {
+ auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
+ auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
+ auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
+ auto And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask);
+ Or = MIRBuilder.buildOr(Dst, And0, And1);
+ } else {
+ auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
+ auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
+ auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
+ auto And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask);
+ Or = MIRBuilder.buildOr(Dst, And0, And1);
+ }
+
+ // Be careful about setting nsz/nnan/ninf on every instruction, since the
+ // constants are a nan and -0.0, but the final result should preserve
+ // everything.
+ if (unsigned Flags = MI.getFlags())
+ Or->setFlags(Flags);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
+ unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
+ TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
+
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src0 = MI.getOperand(1).getReg();
+ Register Src1 = MI.getOperand(2).getReg();
+ LLT Ty = MRI.getType(Dst);
+
+ if (!MI.getFlag(MachineInstr::FmNoNans)) {
+ // Insert canonicalizes if it's possible we need to quiet to get correct
+ // sNaN behavior.
+
+ // Note this must be done here, and not as an optimization combine in the
+ // absence of a dedicate quiet-snan instruction as we're using an
+ // omni-purpose G_FCANONICALIZE.
+ if (!isKnownNeverSNaN(Src0, MRI))
+ Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
+
+ if (!isKnownNeverSNaN(Src1, MRI))
+ Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
+ }
+
+ // If there are no nans, it's safe to simply replace this with the non-IEEE
+ // version.
+ MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index fa36ede5b976..6e1de95b3277 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/GlobalISel/LegalizerInfo.cpp - Legalizer ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,6 +42,45 @@ cl::opt<bool> llvm::DisableGISelLegalityCheck(
cl::desc("Don't verify that MIR is fully legal between GlobalISel passes"),
cl::Hidden);
+raw_ostream &llvm::operator<<(raw_ostream &OS, LegalizeAction Action) {
+ switch (Action) {
+ case Legal:
+ OS << "Legal";
+ break;
+ case NarrowScalar:
+ OS << "NarrowScalar";
+ break;
+ case WidenScalar:
+ OS << "WidenScalar";
+ break;
+ case FewerElements:
+ OS << "FewerElements";
+ break;
+ case MoreElements:
+ OS << "MoreElements";
+ break;
+ case Lower:
+ OS << "Lower";
+ break;
+ case Libcall:
+ OS << "Libcall";
+ break;
+ case Custom:
+ OS << "Custom";
+ break;
+ case Unsupported:
+ OS << "Unsupported";
+ break;
+ case NotFound:
+ OS << "NotFound";
+ break;
+ case UseLegacyRules:
+ OS << "UseLegacyRules";
+ break;
+ }
+ return OS;
+}
+
raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
OS << Opcode << ", Tys={";
for (const auto &Type : Types) {
@@ -59,6 +97,86 @@ raw_ostream &LegalityQuery::print(raw_ostream &OS) const {
return OS;
}
+#ifndef NDEBUG
+// Make sure the rule won't (trivially) loop forever.
+static bool hasNoSimpleLoops(const LegalizeRule &Rule, const LegalityQuery &Q,
+ const std::pair<unsigned, LLT> &Mutation) {
+ switch (Rule.getAction()) {
+ case Custom:
+ case Lower:
+ case MoreElements:
+ case FewerElements:
+ break;
+ default:
+ return Q.Types[Mutation.first] != Mutation.second;
+ }
+ return true;
+}
+
+// Make sure the returned mutation makes sense for the match type.
+static bool mutationIsSane(const LegalizeRule &Rule,
+ const LegalityQuery &Q,
+ std::pair<unsigned, LLT> Mutation) {
+ // If the user wants a custom mutation, then we can't really say much about
+ // it. Return true, and trust that they're doing the right thing.
+ if (Rule.getAction() == Custom)
+ return true;
+
+ const unsigned TypeIdx = Mutation.first;
+ const LLT OldTy = Q.Types[TypeIdx];
+ const LLT NewTy = Mutation.second;
+
+ switch (Rule.getAction()) {
+ case FewerElements:
+ case MoreElements: {
+ if (!OldTy.isVector())
+ return false;
+
+ if (NewTy.isVector()) {
+ if (Rule.getAction() == FewerElements) {
+ // Make sure the element count really decreased.
+ if (NewTy.getNumElements() >= OldTy.getNumElements())
+ return false;
+ } else {
+ // Make sure the element count really increased.
+ if (NewTy.getNumElements() <= OldTy.getNumElements())
+ return false;
+ }
+ }
+
+ // Make sure the element type didn't change.
+ return NewTy.getScalarType() == OldTy.getElementType();
+ }
+ case NarrowScalar:
+ case WidenScalar: {
+ if (OldTy.isVector()) {
+ // Number of elements should not change.
+ if (!NewTy.isVector() || OldTy.getNumElements() != NewTy.getNumElements())
+ return false;
+ } else {
+ // Both types must be vectors
+ if (NewTy.isVector())
+ return false;
+ }
+
+ if (Rule.getAction() == NarrowScalar) {
+ // Make sure the size really decreased.
+ if (NewTy.getScalarSizeInBits() >= OldTy.getScalarSizeInBits())
+ return false;
+ } else {
+ // Make sure the size really increased.
+ if (NewTy.getScalarSizeInBits() <= OldTy.getScalarSizeInBits())
+ return false;
+ }
+
+ return true;
+ }
+ default:
+ return true;
+ }
+}
+#endif
+
LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const {
LLVM_DEBUG(dbgs() << "Applying legalizer ruleset to: "; Query.print(dbgs());
dbgs() << "\n");
@@ -66,17 +184,15 @@ LegalizeActionStep LegalizeRuleSet::apply(const LegalityQuery &Query) const {
LLVM_DEBUG(dbgs() << ".. fallback to legacy rules (no rules defined)\n");
return {LegalizeAction::UseLegacyRules, 0, LLT{}};
}
- for (const auto &Rule : Rules) {
+ for (const LegalizeRule &Rule : Rules) {
if (Rule.match(Query)) {
LLVM_DEBUG(dbgs() << ".. match\n");
std::pair<unsigned, LLT> Mutation = Rule.determineMutation(Query);
- LLVM_DEBUG(dbgs() << ".. .. " << (unsigned)Rule.getAction() << ", "
+ LLVM_DEBUG(dbgs() << ".. .. " << Rule.getAction() << ", "
<< Mutation.first << ", " << Mutation.second << "\n");
- assert((Query.Types[Mutation.first] != Mutation.second ||
- Rule.getAction() == Lower ||
- Rule.getAction() == MoreElements ||
- Rule.getAction() == FewerElements) &&
- "Simple loop detected");
+ assert(mutationIsSane(Rule, Query, Mutation) &&
+ "legality mutation invalid for match");
+ assert(hasNoSimpleLoops(Rule, Query, Mutation) && "Simple loop detected");
return {Rule.getAction(), Mutation.first, Mutation.second};
} else
LLVM_DEBUG(dbgs() << ".. no match\n");
@@ -180,16 +296,14 @@ void LegalizerInfo::computeTables() {
if (TypeIdx < ScalarSizeChangeStrategies[OpcodeIdx].size() &&
ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx] != nullptr)
S = ScalarSizeChangeStrategies[OpcodeIdx][TypeIdx];
- llvm::sort(ScalarSpecifiedActions.begin(),
- ScalarSpecifiedActions.end());
+ llvm::sort(ScalarSpecifiedActions);
checkPartialSizeAndActionsVector(ScalarSpecifiedActions);
setScalarAction(Opcode, TypeIdx, S(ScalarSpecifiedActions));
}
// 2. Handle pointer types
for (auto PointerSpecifiedActions : AddressSpace2SpecifiedActions) {
- llvm::sort(PointerSpecifiedActions.second.begin(),
- PointerSpecifiedActions.second.end());
+ llvm::sort(PointerSpecifiedActions.second);
checkPartialSizeAndActionsVector(PointerSpecifiedActions.second);
// For pointer types, we assume that there isn't a meaningfull way
// to change the number of bits used in the pointer.
@@ -201,8 +315,7 @@ void LegalizerInfo::computeTables() {
// 3. Handle vector types
SizeAndActionsVec ElementSizesSeen;
for (auto VectorSpecifiedActions : ElemSize2SpecifiedActions) {
- llvm::sort(VectorSpecifiedActions.second.begin(),
- VectorSpecifiedActions.second.end());
+ llvm::sort(VectorSpecifiedActions.second);
const uint16_t ElementSize = VectorSpecifiedActions.first;
ElementSizesSeen.push_back({ElementSize, Legal});
checkPartialSizeAndActionsVector(VectorSpecifiedActions.second);
@@ -328,9 +441,8 @@ LegalizerInfo::getAction(const LegalityQuery &Query) const {
for (unsigned i = 0; i < Query.Types.size(); ++i) {
auto Action = getAspectAction({Query.Opcode, i, Query.Types[i]});
if (Action.first != Legal) {
- LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i
- << " Action=" << (unsigned)Action.first << ", "
- << Action.second << "\n");
+ LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Action="
+ << Action.first << ", " << Action.second << "\n");
return {Action.first, i, Action.second};
} else
LLVM_DEBUG(dbgs() << ".. (legacy) Type " << i << " Legal\n");
@@ -364,8 +476,9 @@ LegalizerInfo::getAction(const MachineInstr &MI,
SmallVector<LegalityQuery::MemDesc, 2> MemDescrs;
for (const auto &MMO : MI.memoperands())
- MemDescrs.push_back(
- {MMO->getSize() /* in bytes */ * 8, MMO->getOrdering()});
+ MemDescrs.push_back({8 * MMO->getSize() /* in bits */,
+ 8 * MMO->getAlignment(),
+ MMO->getOrdering()});
return getAction({MI.getOpcode(), Types, MemDescrs});
}
@@ -375,6 +488,14 @@ bool LegalizerInfo::isLegal(const MachineInstr &MI,
return getAction(MI, MRI).Action == Legal;
}
+bool LegalizerInfo::isLegalOrCustom(const MachineInstr &MI,
+ const MachineRegisterInfo &MRI) const {
+ auto Action = getAction(MI, MRI).Action;
+ // If the action is custom, it may not necessarily modify the instruction,
+ // so we have to assume it's legal.
+ return Action == Legal || Action == Custom;
+}
+
bool LegalizerInfo::legalizeCustom(MachineInstr &MI, MachineRegisterInfo &MRI,
MachineIRBuilder &MIRBuilder,
GISelChangeObserver &Observer) const {
@@ -423,14 +544,10 @@ LegalizerInfo::findAction(const SizeAndActionsVec &Vec, const uint32_t Size) {
// Find the last element in Vec that has a bitsize equal to or smaller than
// the requested bit size.
// That is the element just before the first element that is bigger than Size.
- auto VecIt = std::upper_bound(
- Vec.begin(), Vec.end(), Size,
- [](const uint32_t Size, const SizeAndAction lhs) -> bool {
- return Size < lhs.first;
- });
- assert(VecIt != Vec.begin() && "Does Vec not start with size 1?");
- --VecIt;
- int VecIdx = VecIt - Vec.begin();
+ auto It = partition_point(
+ Vec, [=](const SizeAndAction &A) { return A.first <= Size; });
+ assert(It != Vec.begin() && "Does Vec not start with size 1?");
+ int VecIdx = It - Vec.begin() - 1;
LegalizeAction Action = Vec[VecIdx].second;
switch (Action) {
@@ -541,6 +658,12 @@ LegalizerInfo::findVectorLegalAction(const InstrAspect &Aspect) const {
IntermediateType.getScalarSizeInBits())};
}
+bool LegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
+ MachineRegisterInfo &MRI,
+ MachineIRBuilder &MIRBuilder) const {
+ return true;
+}
+
/// \pre Type indices of every opcode form a dense set starting from 0.
void LegalizerInfo::verify(const MCInstrInfo &MII) const {
#ifndef NDEBUG
@@ -584,7 +707,8 @@ const MachineInstr *llvm::machineFunctionIsIllegal(const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (const MachineBasicBlock &MBB : MF)
for (const MachineInstr &MI : MBB)
- if (isPreISelGenericOpcode(MI.getOpcode()) && !MLI->isLegal(MI, MRI))
+ if (isPreISelGenericOpcode(MI.getOpcode()) &&
+ !MLI->isLegalOrCustom(MI, MRI))
return &MI;
}
return nullptr;
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
index 52b340753a50..3592409710a7 100644
--- a/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -1,9 +1,8 @@
//===- Localizer.cpp ---------------------- Localize some instrs -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -11,8 +10,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/Localizer.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
@@ -21,17 +20,53 @@
using namespace llvm;
char Localizer::ID = 0;
-INITIALIZE_PASS(Localizer, DEBUG_TYPE,
- "Move/duplicate certain instructions close to their use", false,
- false)
+INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_END(Localizer, DEBUG_TYPE,
+ "Move/duplicate certain instructions close to their use",
+ false, false)
-Localizer::Localizer() : MachineFunctionPass(ID) {
- initializeLocalizerPass(*PassRegistry::getPassRegistry());
-}
+Localizer::Localizer() : MachineFunctionPass(ID) { }
-void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); }
+void Localizer::init(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(MF.getFunction());
+}
bool Localizer::shouldLocalize(const MachineInstr &MI) {
+ // Assuming a spill and reload of a value has a cost of 1 instruction each,
+ // this helper function computes the maximum number of uses we should consider
+ // for remat. E.g. on arm64 global addresses take 2 insts to materialize. We
+ // break even in terms of code size when the original MI has 2 users vs
+ // choosing to potentially spill. Any more than 2 users we we have a net code
+ // size increase. This doesn't take into account register pressure though.
+ auto maxUses = [](unsigned RematCost) {
+ // A cost of 1 means remats are basically free.
+ if (RematCost == 1)
+ return UINT_MAX;
+ if (RematCost == 2)
+ return 2U;
+
+ // Remat is too expensive, only sink if there's one user.
+ if (RematCost > 2)
+ return 1U;
+ llvm_unreachable("Unexpected remat cost");
+ };
+
+ // Helper to walk through uses and terminate if we've reached a limit. Saves
+ // us spending time traversing uses if all we want to know is if it's >= min.
+ auto isUsesAtMost = [&](unsigned Reg, unsigned MaxUses) {
+ unsigned NumUses = 0;
+ auto UI = MRI->use_instr_nodbg_begin(Reg), UE = MRI->use_instr_nodbg_end();
+ for (; UI != UE && NumUses < MaxUses; ++UI) {
+ NumUses++;
+ }
+ // If we haven't reached the end yet then there are more than MaxUses users.
+ return UI == UE;
+ };
+
switch (MI.getOpcode()) {
default:
return false;
@@ -40,11 +75,22 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {
case TargetOpcode::G_CONSTANT:
case TargetOpcode::G_FCONSTANT:
case TargetOpcode::G_FRAME_INDEX:
+ case TargetOpcode::G_INTTOPTR:
return true;
+ case TargetOpcode::G_GLOBAL_VALUE: {
+ unsigned RematCost = TTI->getGISelRematGlobalCost();
+ unsigned Reg = MI.getOperand(0).getReg();
+ unsigned MaxUses = maxUses(RematCost);
+ if (MaxUses == UINT_MAX)
+ return true; // Remats are "free" so always localize.
+ bool B = isUsesAtMost(Reg, MaxUses);
+ return B;
+ }
}
}
void Localizer::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<TargetTransformInfoWrapperPass>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -58,6 +104,107 @@ bool Localizer::isLocalUse(MachineOperand &MOUse, const MachineInstr &Def,
return InsertMBB == Def.getParent();
}
+bool Localizer::localizeInterBlock(MachineFunction &MF,
+ LocalizedSetVecT &LocalizedInstrs) {
+ bool Changed = false;
+ DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
+
+ // Since the IRTranslator only emits constants into the entry block, and the
+ // rest of the GISel pipeline generally emits constants close to their users,
+ // we only localize instructions in the entry block here. This might change if
+ // we start doing CSE across blocks.
+ auto &MBB = MF.front();
+ for (auto RI = MBB.rbegin(), RE = MBB.rend(); RI != RE; ++RI) {
+ MachineInstr &MI = *RI;
+ if (!shouldLocalize(MI))
+ continue;
+ LLVM_DEBUG(dbgs() << "Should localize: " << MI);
+ assert(MI.getDesc().getNumDefs() == 1 &&
+ "More than one definition not supported yet");
+ unsigned Reg = MI.getOperand(0).getReg();
+ // Check if all the users of MI are local.
+ // We are going to invalidation the list of use operands, so we
+ // can't use range iterator.
+ for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
+ MOIt != MOItEnd;) {
+ MachineOperand &MOUse = *MOIt++;
+ // Check if the use is already local.
+ MachineBasicBlock *InsertMBB;
+ LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
+ dbgs() << "Checking use: " << MIUse
+ << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
+ if (isLocalUse(MOUse, MI, InsertMBB))
+ continue;
+ LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
+ Changed = true;
+ auto MBBAndReg = std::make_pair(InsertMBB, Reg);
+ auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
+ if (NewVRegIt == MBBWithLocalDef.end()) {
+ // Create the localized instruction.
+ MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
+ LocalizedInstrs.insert(LocalizedMI);
+ MachineInstr &UseMI = *MOUse.getParent();
+ if (MRI->hasOneUse(Reg) && !UseMI.isPHI())
+ InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(UseMI), LocalizedMI);
+ else
+ InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
+ LocalizedMI);
+
+ // Set a new register for the definition.
+ unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
+ MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
+ LocalizedMI->getOperand(0).setReg(NewReg);
+ NewVRegIt =
+ MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
+ LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
+ }
+ LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
+ << '\n');
+ // Update the user reg.
+ MOUse.setReg(NewVRegIt->second);
+ }
+ }
+ return Changed;
+}
+
+bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
+ bool Changed = false;
+
+ // For each already-localized instruction which has multiple users, then we
+ // scan the block top down from the current position until we hit one of them.
+
+ // FIXME: Consider doing inst duplication if live ranges are very long due to
+ // many users, but this case may be better served by regalloc improvements.
+
+ for (MachineInstr *MI : LocalizedInstrs) {
+ unsigned Reg = MI->getOperand(0).getReg();
+ MachineBasicBlock &MBB = *MI->getParent();
+ // All of the user MIs of this reg.
+ SmallPtrSet<MachineInstr *, 32> Users;
+ for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
+ if (!UseMI.isPHI())
+ Users.insert(&UseMI);
+ }
+ // If all the users were PHIs then they're not going to be in our block,
+ // don't try to move this instruction.
+ if (Users.empty())
+ continue;
+
+ MachineBasicBlock::iterator II(MI);
+ ++II;
+ while (II != MBB.end() && !Users.count(&*II))
+ ++II;
+
+ LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II
+ << "\n");
+ assert(II != MBB.end() && "Didn't find the user in the MBB");
+ MI->removeFromParent();
+ MBB.insert(II, MI);
+ Changed = true;
+ }
+ return Changed;
+}
+
bool Localizer::runOnMachineFunction(MachineFunction &MF) {
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
@@ -68,62 +215,10 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
init(MF);
- bool Changed = false;
- // Keep track of the instructions we localized.
- // We won't need to process them if we see them later in the CFG.
- SmallPtrSet<MachineInstr *, 16> LocalizedInstrs;
- DenseMap<std::pair<MachineBasicBlock *, unsigned>, unsigned> MBBWithLocalDef;
- // TODO: Do bottom up traversal.
- for (MachineBasicBlock &MBB : MF) {
- for (MachineInstr &MI : MBB) {
- if (LocalizedInstrs.count(&MI) || !shouldLocalize(MI))
- continue;
- LLVM_DEBUG(dbgs() << "Should localize: " << MI);
- assert(MI.getDesc().getNumDefs() == 1 &&
- "More than one definition not supported yet");
- unsigned Reg = MI.getOperand(0).getReg();
- // Check if all the users of MI are local.
- // We are going to invalidation the list of use operands, so we
- // can't use range iterator.
- for (auto MOIt = MRI->use_begin(Reg), MOItEnd = MRI->use_end();
- MOIt != MOItEnd;) {
- MachineOperand &MOUse = *MOIt++;
- // Check if the use is already local.
- MachineBasicBlock *InsertMBB;
- LLVM_DEBUG(MachineInstr &MIUse = *MOUse.getParent();
- dbgs() << "Checking use: " << MIUse
- << " #Opd: " << MIUse.getOperandNo(&MOUse) << '\n');
- if (isLocalUse(MOUse, MI, InsertMBB))
- continue;
- LLVM_DEBUG(dbgs() << "Fixing non-local use\n");
- Changed = true;
- auto MBBAndReg = std::make_pair(InsertMBB, Reg);
- auto NewVRegIt = MBBWithLocalDef.find(MBBAndReg);
- if (NewVRegIt == MBBWithLocalDef.end()) {
- // Create the localized instruction.
- MachineInstr *LocalizedMI = MF.CloneMachineInstr(&MI);
- LocalizedInstrs.insert(LocalizedMI);
- // Don't try to be smart for the insertion point.
- // There is no guarantee that the first seen use is the first
- // use in the block.
- InsertMBB->insert(InsertMBB->SkipPHIsAndLabels(InsertMBB->begin()),
- LocalizedMI);
+ // Keep track of the instructions we localized. We'll do a second pass of
+ // intra-block localization to further reduce live ranges.
+ LocalizedSetVecT LocalizedInstrs;
- // Set a new register for the definition.
- unsigned NewReg =
- MRI->createGenericVirtualRegister(MRI->getType(Reg));
- MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
- LocalizedMI->getOperand(0).setReg(NewReg);
- NewVRegIt =
- MBBWithLocalDef.insert(std::make_pair(MBBAndReg, NewReg)).first;
- LLVM_DEBUG(dbgs() << "Inserted: " << *LocalizedMI);
- }
- LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second)
- << '\n');
- // Update the user reg.
- MOUse.setReg(NewVRegIt->second);
- }
- }
- }
- return Changed;
+ bool Changed = localizeInterBlock(MF, LocalizedInstrs);
+ return Changed |= localizeIntraBlock(LocalizedInstrs);
}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index 1f5611061994..b7a73326b85c 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/GlobalISel/MachineIRBuilder.cpp - MIBuilder--*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -17,6 +16,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DebugInfo.h"
@@ -87,7 +87,7 @@ MachineInstrBuilder MachineIRBuilder::insertInstr(MachineInstrBuilder MIB) {
}
MachineInstrBuilder
-MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
+MachineIRBuilder::buildDirectDbgValue(Register Reg, const MDNode *Variable,
const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
@@ -100,7 +100,7 @@ MachineIRBuilder::buildDirectDbgValue(unsigned Reg, const MDNode *Variable,
}
MachineInstrBuilder
-MachineIRBuilder::buildIndirectDbgValue(unsigned Reg, const MDNode *Variable,
+MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable,
const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
@@ -160,23 +160,32 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
return MIB.addMetadata(Label);
}
-MachineInstrBuilder MachineIRBuilder::buildFrameIndex(unsigned Res, int Idx) {
- assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
- return buildInstr(TargetOpcode::G_FRAME_INDEX)
- .addDef(Res)
- .addFrameIndex(Idx);
+MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res,
+ int Idx) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ auto MIB = buildInstr(TargetOpcode::G_FRAME_INDEX);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addFrameIndex(Idx);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildGlobalValue(unsigned Res,
+MachineInstrBuilder MachineIRBuilder::buildGlobalValue(const DstOp &Res,
const GlobalValue *GV) {
- assert(getMRI()->getType(Res).isPointer() && "invalid operand type");
- assert(getMRI()->getType(Res).getAddressSpace() ==
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
+ assert(Res.getLLTTy(*getMRI()).getAddressSpace() ==
GV->getType()->getAddressSpace() &&
"address space mismatch");
- return buildInstr(TargetOpcode::G_GLOBAL_VALUE)
- .addDef(Res)
- .addGlobalAddress(GV);
+ auto MIB = buildInstr(TargetOpcode::G_GLOBAL_VALUE);
+ Res.addDefToMIB(*getMRI(), MIB);
+ MIB.addGlobalAddress(GV);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildJumpTable(const LLT PtrTy,
+ unsigned JTI) {
+ return buildInstr(TargetOpcode::G_JUMP_TABLE, {PtrTy}, {})
+ .addJumpTableIndex(JTI);
}
void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
@@ -185,20 +194,28 @@ void MachineIRBuilder::validateBinaryOp(const LLT &Res, const LLT &Op0,
assert((Res == Op0 && Res == Op1) && "type mismatch");
}
-MachineInstrBuilder MachineIRBuilder::buildGEP(unsigned Res, unsigned Op0,
- unsigned Op1) {
- assert(getMRI()->getType(Res).isPointer() &&
- getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
- assert(getMRI()->getType(Op1).isScalar() && "invalid offset type");
+void MachineIRBuilder::validateShiftOp(const LLT &Res, const LLT &Op0,
+ const LLT &Op1) {
+ assert((Res.isScalar() || Res.isVector()) && "invalid operand type");
+ assert((Res == Op0) && "type mismatch");
+}
+
+MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() &&
+ Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
+ assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type");
- return buildInstr(TargetOpcode::G_GEP)
- .addDef(Res)
- .addUse(Op0)
- .addUse(Op1);
+ auto MIB = buildInstr(TargetOpcode::G_GEP);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Op0.addSrcToMIB(MIB);
+ Op1.addSrcToMIB(MIB);
+ return MIB;
}
Optional<MachineInstrBuilder>
-MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
+MachineIRBuilder::materializeGEP(Register &Res, Register Op0,
const LLT &ValueTy, uint64_t Value) {
assert(Res == 0 && "Res is a result argument");
assert(ValueTy.isScalar() && "invalid offset type");
@@ -209,32 +226,43 @@ MachineIRBuilder::materializeGEP(unsigned &Res, unsigned Op0,
}
Res = getMRI()->createGenericVirtualRegister(getMRI()->getType(Op0));
- unsigned TmpReg = getMRI()->createGenericVirtualRegister(ValueTy);
-
- buildConstant(TmpReg, Value);
- return buildGEP(Res, Op0, TmpReg);
+ auto Cst = buildConstant(ValueTy, Value);
+ return buildGEP(Res, Op0, Cst.getReg(0));
}
-MachineInstrBuilder MachineIRBuilder::buildPtrMask(unsigned Res, unsigned Op0,
+MachineInstrBuilder MachineIRBuilder::buildPtrMask(const DstOp &Res,
+ const SrcOp &Op0,
uint32_t NumBits) {
- assert(getMRI()->getType(Res).isPointer() &&
- getMRI()->getType(Res) == getMRI()->getType(Op0) && "type mismatch");
+ assert(Res.getLLTTy(*getMRI()).isPointer() &&
+ Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
- return buildInstr(TargetOpcode::G_PTR_MASK)
- .addDef(Res)
- .addUse(Op0)
- .addImm(NumBits);
+ auto MIB = buildInstr(TargetOpcode::G_PTR_MASK);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Op0.addSrcToMIB(MIB);
+ MIB.addImm(NumBits);
+ return MIB;
}
MachineInstrBuilder MachineIRBuilder::buildBr(MachineBasicBlock &Dest) {
return buildInstr(TargetOpcode::G_BR).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilder::buildBrIndirect(unsigned Tgt) {
+MachineInstrBuilder MachineIRBuilder::buildBrIndirect(Register Tgt) {
assert(getMRI()->getType(Tgt).isPointer() && "invalid branch destination");
return buildInstr(TargetOpcode::G_BRINDIRECT).addUse(Tgt);
}
+MachineInstrBuilder MachineIRBuilder::buildBrJT(Register TablePtr,
+ unsigned JTI,
+ Register IndexReg) {
+ assert(getMRI()->getType(TablePtr).isPointer() &&
+ "Table reg must be a pointer");
+ return buildInstr(TargetOpcode::G_BRJT)
+ .addUse(TablePtr)
+ .addJumpTableIndex(JTI)
+ .addUse(IndexReg);
+}
+
MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
const SrcOp &Op) {
return buildInstr(TargetOpcode::COPY, Res, Op);
@@ -243,36 +271,60 @@ MachineInstrBuilder MachineIRBuilder::buildCopy(const DstOp &Res,
MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
const ConstantInt &Val) {
LLT Ty = Res.getLLTTy(*getMRI());
+ LLT EltTy = Ty.getScalarType();
+ assert(EltTy.getScalarSizeInBits() == Val.getBitWidth() &&
+ "creating constant with the wrong size");
+
+ if (Ty.isVector()) {
+ auto Const = buildInstr(TargetOpcode::G_CONSTANT)
+ .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+ .addCImm(&Val);
+ return buildSplatVector(Res, Const);
+ }
- assert((Ty.isScalar() || Ty.isPointer()) && "invalid operand type");
-
- const ConstantInt *NewVal = &Val;
- if (Ty.getSizeInBits() != Val.getBitWidth())
- NewVal = ConstantInt::get(getMF().getFunction().getContext(),
- Val.getValue().sextOrTrunc(Ty.getSizeInBits()));
-
- auto MIB = buildInstr(TargetOpcode::G_CONSTANT);
- Res.addDefToMIB(*getMRI(), MIB);
- MIB.addCImm(NewVal);
- return MIB;
+ auto Const = buildInstr(TargetOpcode::G_CONSTANT);
+ Res.addDefToMIB(*getMRI(), Const);
+ Const.addCImm(&Val);
+ return Const;
}
MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
int64_t Val) {
auto IntN = IntegerType::get(getMF().getFunction().getContext(),
- Res.getLLTTy(*getMRI()).getSizeInBits());
+ Res.getLLTTy(*getMRI()).getScalarSizeInBits());
ConstantInt *CI = ConstantInt::get(IntN, Val, true);
return buildConstant(Res, *CI);
}
MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
const ConstantFP &Val) {
- assert(Res.getLLTTy(*getMRI()).isScalar() && "invalid operand type");
+ LLT Ty = Res.getLLTTy(*getMRI());
+ LLT EltTy = Ty.getScalarType();
- auto MIB = buildInstr(TargetOpcode::G_FCONSTANT);
- Res.addDefToMIB(*getMRI(), MIB);
- MIB.addFPImm(&Val);
- return MIB;
+ assert(APFloat::getSizeInBits(Val.getValueAPF().getSemantics())
+ == EltTy.getSizeInBits() &&
+ "creating fconstant with the wrong size");
+
+ assert(!Ty.isPointer() && "invalid operand type");
+
+ if (Ty.isVector()) {
+ auto Const = buildInstr(TargetOpcode::G_FCONSTANT)
+ .addDef(getMRI()->createGenericVirtualRegister(EltTy))
+ .addFPImm(&Val);
+
+ return buildSplatVector(Res, Const);
+ }
+
+ auto Const = buildInstr(TargetOpcode::G_FCONSTANT);
+ Res.addDefToMIB(*getMRI(), Const);
+ Const.addFPImm(&Val);
+ return Const;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildConstant(const DstOp &Res,
+ const APInt &Val) {
+ ConstantInt *CI = ConstantInt::get(getMF().getFunction().getContext(), Val);
+ return buildConstant(Res, *CI);
}
MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
@@ -280,44 +332,62 @@ MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
LLT DstTy = Res.getLLTTy(*getMRI());
auto &Ctx = getMF().getFunction().getContext();
auto *CFP =
- ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getSizeInBits()));
+ ConstantFP::get(Ctx, getAPFloatFromSize(Val, DstTy.getScalarSizeInBits()));
return buildFConstant(Res, *CFP);
}
-MachineInstrBuilder MachineIRBuilder::buildBrCond(unsigned Tst,
+MachineInstrBuilder MachineIRBuilder::buildFConstant(const DstOp &Res,
+ const APFloat &Val) {
+ auto &Ctx = getMF().getFunction().getContext();
+ auto *CFP = ConstantFP::get(Ctx, Val);
+ return buildFConstant(Res, *CFP);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBrCond(Register Tst,
MachineBasicBlock &Dest) {
assert(getMRI()->getType(Tst).isScalar() && "invalid operand type");
return buildInstr(TargetOpcode::G_BRCOND).addUse(Tst).addMBB(&Dest);
}
-MachineInstrBuilder MachineIRBuilder::buildLoad(unsigned Res, unsigned Addr,
+MachineInstrBuilder MachineIRBuilder::buildLoad(const DstOp &Res,
+ const SrcOp &Addr,
MachineMemOperand &MMO) {
return buildLoadInstr(TargetOpcode::G_LOAD, Res, Addr, MMO);
}
MachineInstrBuilder MachineIRBuilder::buildLoadInstr(unsigned Opcode,
- unsigned Res,
- unsigned Addr,
+ const DstOp &Res,
+ const SrcOp &Addr,
MachineMemOperand &MMO) {
- assert(getMRI()->getType(Res).isValid() && "invalid operand type");
- assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
+ assert(Res.getLLTTy(*getMRI()).isValid() && "invalid operand type");
+ assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
- return buildInstr(Opcode)
- .addDef(Res)
- .addUse(Addr)
- .addMemOperand(&MMO);
+ auto MIB = buildInstr(Opcode);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Addr.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
}
-MachineInstrBuilder MachineIRBuilder::buildStore(unsigned Val, unsigned Addr,
+MachineInstrBuilder MachineIRBuilder::buildStore(const SrcOp &Val,
+ const SrcOp &Addr,
MachineMemOperand &MMO) {
- assert(getMRI()->getType(Val).isValid() && "invalid operand type");
- assert(getMRI()->getType(Addr).isPointer() && "invalid operand type");
+ assert(Val.getLLTTy(*getMRI()).isValid() && "invalid operand type");
+ assert(Addr.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
- return buildInstr(TargetOpcode::G_STORE)
- .addUse(Val)
- .addUse(Addr)
- .addMemOperand(&MMO);
+ auto MIB = buildInstr(TargetOpcode::G_STORE);
+ Val.addSrcToMIB(MIB);
+ Addr.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUAddo(const DstOp &Res,
+ const DstOp &CarryOut,
+ const SrcOp &Op0,
+ const SrcOp &Op1) {
+ return buildInstr(TargetOpcode::G_UADDO, {Res, CarryOut}, {Op0, Op1});
}
MachineInstrBuilder MachineIRBuilder::buildUAdde(const DstOp &Res,
@@ -344,6 +414,25 @@ MachineInstrBuilder MachineIRBuilder::buildZExt(const DstOp &Res,
return buildInstr(TargetOpcode::G_ZEXT, Res, Op);
}
+unsigned MachineIRBuilder::getBoolExtOp(bool IsVec, bool IsFP) const {
+ const auto *TLI = getMF().getSubtarget().getTargetLowering();
+ switch (TLI->getBooleanContents(IsVec, IsFP)) {
+ case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
+ return TargetOpcode::G_SEXT;
+ case TargetLoweringBase::ZeroOrOneBooleanContent:
+ return TargetOpcode::G_ZEXT;
+ default:
+ return TargetOpcode::G_ANYEXT;
+ }
+}
+
+MachineInstrBuilder MachineIRBuilder::buildBoolExt(const DstOp &Res,
+ const SrcOp &Op,
+ bool IsFP) {
+ unsigned ExtOp = getBoolExtOp(getMRI()->getType(Op.getReg()).isVector(), IsFP);
+ return buildInstr(ExtOp, Res, Op);
+}
+
MachineInstrBuilder MachineIRBuilder::buildExtOrTrunc(unsigned ExtOpc,
const DstOp &Res,
const SrcOp &Op) {
@@ -403,29 +492,32 @@ MachineInstrBuilder MachineIRBuilder::buildCast(const DstOp &Dst,
return buildInstr(Opcode, Dst, Src);
}
-MachineInstrBuilder MachineIRBuilder::buildExtract(unsigned Res, unsigned Src,
+MachineInstrBuilder MachineIRBuilder::buildExtract(const DstOp &Dst,
+ const SrcOp &Src,
uint64_t Index) {
+ LLT SrcTy = Src.getLLTTy(*getMRI());
+ LLT DstTy = Dst.getLLTTy(*getMRI());
+
#ifndef NDEBUG
- assert(getMRI()->getType(Src).isValid() && "invalid operand type");
- assert(getMRI()->getType(Res).isValid() && "invalid operand type");
- assert(Index + getMRI()->getType(Res).getSizeInBits() <=
- getMRI()->getType(Src).getSizeInBits() &&
+ assert(SrcTy.isValid() && "invalid operand type");
+ assert(DstTy.isValid() && "invalid operand type");
+ assert(Index + DstTy.getSizeInBits() <= SrcTy.getSizeInBits() &&
"extracting off end of register");
#endif
- if (getMRI()->getType(Res).getSizeInBits() ==
- getMRI()->getType(Src).getSizeInBits()) {
+ if (DstTy.getSizeInBits() == SrcTy.getSizeInBits()) {
assert(Index == 0 && "insertion past the end of a register");
- return buildCast(Res, Src);
+ return buildCast(Dst, Src);
}
- return buildInstr(TargetOpcode::G_EXTRACT)
- .addDef(Res)
- .addUse(Src)
- .addImm(Index);
+ auto Extract = buildInstr(TargetOpcode::G_EXTRACT);
+ Dst.addDefToMIB(*getMRI(), Extract);
+ Src.addSrcToMIB(Extract);
+ Extract.addImm(Index);
+ return Extract;
}
-void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
+void MachineIRBuilder::buildSequence(Register Res, ArrayRef<Register> Ops,
ArrayRef<uint64_t> Indices) {
#ifndef NDEBUG
assert(Ops.size() == Indices.size() && "incompatible args");
@@ -454,11 +546,11 @@ void MachineIRBuilder::buildSequence(unsigned Res, ArrayRef<unsigned> Ops,
return;
}
- unsigned ResIn = getMRI()->createGenericVirtualRegister(ResTy);
+ Register ResIn = getMRI()->createGenericVirtualRegister(ResTy);
buildUndef(ResIn);
for (unsigned i = 0; i < Ops.size(); ++i) {
- unsigned ResOut = i + 1 == Ops.size()
+ Register ResOut = i + 1 == Ops.size()
? Res
: getMRI()->createGenericVirtualRegister(ResTy);
buildInsert(ResOut, ResIn, Ops[i], Indices[i]);
@@ -471,11 +563,12 @@ MachineInstrBuilder MachineIRBuilder::buildUndef(const DstOp &Res) {
}
MachineInstrBuilder MachineIRBuilder::buildMerge(const DstOp &Res,
- ArrayRef<unsigned> Ops) {
+ ArrayRef<Register> Ops) {
// Unfortunately to convert from ArrayRef<LLT> to ArrayRef<SrcOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
+ assert(TmpVec.size() > 1);
return buildInstr(TargetOpcode::G_MERGE_VALUES, Res, TmpVec);
}
@@ -485,31 +578,48 @@ MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<LLT> Res,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ assert(TmpVec.size() > 1);
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
-MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<unsigned> Res,
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(LLT Res,
+ const SrcOp &Op) {
+ unsigned NumReg = Op.getLLTTy(*getMRI()).getSizeInBits() / Res.getSizeInBits();
+ SmallVector<Register, 8> TmpVec;
+ for (unsigned I = 0; I != NumReg; ++I)
+ TmpVec.push_back(getMRI()->createGenericVirtualRegister(Res));
+ return buildUnmerge(TmpVec, Op);
+}
+
+MachineInstrBuilder MachineIRBuilder::buildUnmerge(ArrayRef<Register> Res,
const SrcOp &Op) {
- // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<DstOp>,
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<DstOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<DstOp, 8> TmpVec(Res.begin(), Res.end());
+ assert(TmpVec.size() > 1);
return buildInstr(TargetOpcode::G_UNMERGE_VALUES, TmpVec, Op);
}
MachineInstrBuilder MachineIRBuilder::buildBuildVector(const DstOp &Res,
- ArrayRef<unsigned> Ops) {
- // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
}
+MachineInstrBuilder MachineIRBuilder::buildSplatVector(const DstOp &Res,
+ const SrcOp &Src) {
+ SmallVector<SrcOp, 8> TmpVec(Res.getLLTTy(*getMRI()).getNumElements(), Src);
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, Res, TmpVec);
+}
+
MachineInstrBuilder
MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
- ArrayRef<unsigned> Ops) {
- // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+ ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
@@ -517,16 +627,16 @@ MachineIRBuilder::buildBuildVectorTrunc(const DstOp &Res,
}
MachineInstrBuilder
-MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<unsigned> Ops) {
- // Unfortunately to convert from ArrayRef<unsigned> to ArrayRef<SrcOp>,
+MachineIRBuilder::buildConcatVectors(const DstOp &Res, ArrayRef<Register> Ops) {
+ // Unfortunately to convert from ArrayRef<Register> to ArrayRef<SrcOp>,
// we need some temporary storage for the DstOp objects. Here we use a
// sufficiently large SmallVector to not go through the heap.
SmallVector<SrcOp, 8> TmpVec(Ops.begin(), Ops.end());
return buildInstr(TargetOpcode::G_CONCAT_VECTORS, Res, TmpVec);
}
-MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
- unsigned Op, unsigned Index) {
+MachineInstrBuilder MachineIRBuilder::buildInsert(Register Res, Register Src,
+ Register Op, unsigned Index) {
assert(Index + getMRI()->getType(Op).getSizeInBits() <=
getMRI()->getType(Res).getSizeInBits() &&
"insertion past the end of a register");
@@ -544,13 +654,25 @@ MachineInstrBuilder MachineIRBuilder::buildInsert(unsigned Res, unsigned Src,
}
MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
- unsigned Res,
+ ArrayRef<Register> ResultRegs,
bool HasSideEffects) {
auto MIB =
buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
: TargetOpcode::G_INTRINSIC);
- if (Res)
- MIB.addDef(Res);
+ for (unsigned ResultReg : ResultRegs)
+ MIB.addDef(ResultReg);
+ MIB.addIntrinsicID(ID);
+ return MIB;
+}
+
+MachineInstrBuilder MachineIRBuilder::buildIntrinsic(Intrinsic::ID ID,
+ ArrayRef<DstOp> Results,
+ bool HasSideEffects) {
+ auto MIB =
+ buildInstr(HasSideEffects ? TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS
+ : TargetOpcode::G_INTRINSIC);
+ for (DstOp Result : Results)
+ Result.addDefToMIB(*getMRI(), MIB);
MIB.addIntrinsicID(ID);
return MIB;
}
@@ -601,8 +723,8 @@ MachineIRBuilder::buildExtractVectorElement(const DstOp &Res, const SrcOp &Val,
}
MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
- unsigned OldValRes, unsigned SuccessRes, unsigned Addr, unsigned CmpVal,
- unsigned NewVal, MachineMemOperand &MMO) {
+ Register OldValRes, Register SuccessRes, Register Addr, Register CmpVal,
+ Register NewVal, MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
LLT SuccessResTy = getMRI()->getType(SuccessRes);
@@ -628,8 +750,8 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicCmpXchgWithSuccess(
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
- unsigned CmpVal, unsigned NewVal,
+MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr,
+ Register CmpVal, Register NewVal,
MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
@@ -653,9 +775,9 @@ MachineIRBuilder::buildAtomicCmpXchg(unsigned OldValRes, unsigned Addr,
}
MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
- unsigned OldValRes,
- unsigned Addr,
- unsigned Val,
+ Register OldValRes,
+ Register Addr,
+ Register Val,
MachineMemOperand &MMO) {
#ifndef NDEBUG
LLT OldValResTy = getMRI()->getType(OldValRes);
@@ -675,75 +797,82 @@ MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWXchg(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXchg(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XCHG, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWAdd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAdd(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_ADD, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWSub(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWSub(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_SUB, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWAnd(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWAnd(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_AND, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWNand(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWNand(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_NAND, OldValRes, Addr, Val,
MMO);
}
-MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(unsigned OldValRes,
- unsigned Addr,
- unsigned Val,
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMWOr(Register OldValRes,
+ Register Addr,
+ Register Val,
MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_OR, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWXor(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWXor(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_XOR, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWMax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMax(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MAX, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWMin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWMin(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_MIN, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWUmax(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmax(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMAX, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildAtomicRMWUmin(unsigned OldValRes, unsigned Addr,
- unsigned Val, MachineMemOperand &MMO) {
+MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr,
+ Register Val, MachineMemOperand &MMO) {
return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_UMIN, OldValRes, Addr, Val,
MMO);
}
MachineInstrBuilder
-MachineIRBuilder::buildBlockAddress(unsigned Res, const BlockAddress *BA) {
+MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
+ return buildInstr(TargetOpcode::G_FENCE)
+ .addImm(Ordering)
+ .addImm(Scope);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildBlockAddress(Register Res, const BlockAddress *BA) {
#ifndef NDEBUG
assert(getMRI()->getType(Res).isPointer() && "invalid res type");
#endif
@@ -803,17 +932,18 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
}
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
- case TargetOpcode::G_ASHR:
- case TargetOpcode::G_LSHR:
case TargetOpcode::G_MUL:
case TargetOpcode::G_OR:
- case TargetOpcode::G_SHL:
case TargetOpcode::G_SUB:
case TargetOpcode::G_XOR:
case TargetOpcode::G_UDIV:
case TargetOpcode::G_SDIV:
case TargetOpcode::G_UREM:
- case TargetOpcode::G_SREM: {
+ case TargetOpcode::G_SREM:
+ case TargetOpcode::G_SMIN:
+ case TargetOpcode::G_SMAX:
+ case TargetOpcode::G_UMIN:
+ case TargetOpcode::G_UMAX: {
// All these are binary ops.
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 2 && "Invalid Srcs");
@@ -821,6 +951,17 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI()),
SrcOps[1].getLLTTy(*getMRI()));
break;
+ }
+ case TargetOpcode::G_SHL:
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR: {
+ assert(DstOps.size() == 1 && "Invalid Dst");
+ assert(SrcOps.size() == 2 && "Invalid Srcs");
+ validateShiftOp(DstOps[0].getLLTTy(*getMRI()),
+ SrcOps[0].getLLTTy(*getMRI()),
+ SrcOps[1].getLLTTy(*getMRI()));
+ break;
+ }
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
@@ -830,7 +971,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
SrcOps[0].getLLTTy(*getMRI()), true);
break;
case TargetOpcode::G_TRUNC:
- case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FPTRUNC: {
assert(DstOps.size() == 1 && "Invalid Dst");
assert(SrcOps.size() == 1 && "Invalid Srcs");
validateTruncExt(DstOps[0].getLLTTy(*getMRI()),
@@ -839,10 +980,8 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
}
case TargetOpcode::COPY:
assert(DstOps.size() == 1 && "Invalid Dst");
- assert(SrcOps.size() == 1 && "Invalid Srcs");
- assert(DstOps[0].getLLTTy(*getMRI()) == LLT() ||
- SrcOps[0].getLLTTy(*getMRI()) == LLT() ||
- DstOps[0].getLLTTy(*getMRI()) == SrcOps[0].getLLTTy(*getMRI()));
+ // If the caller wants to add a subreg source it has to be done separately
+ // so we may not have any SrcOps at this point yet.
break;
case TargetOpcode::G_FCMP:
case TargetOpcode::G_ICMP: {
@@ -943,7 +1082,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
- "input scalars do not exactly cover the outpur vector register");
+ "input scalars do not exactly cover the output vector register");
break;
}
case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
@@ -976,7 +1115,7 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
"type mismatch in input list");
assert(SrcOps.size() * SrcOps[0].getLLTTy(*getMRI()).getSizeInBits() ==
DstOps[0].getLLTTy(*getMRI()).getSizeInBits() &&
- "input vectors do not exactly cover the outpur vector register");
+ "input vectors do not exactly cover the output vector register");
break;
}
case TargetOpcode::G_UADDE: {
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index dcc8b7cc23c5..42be88fcf947 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -72,7 +71,6 @@ INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
RegBankSelect::RegBankSelect(Mode RunningMode)
: MachineFunctionPass(ID), OptMode(RunningMode) {
- initializeRegBankSelectPass(*PassRegistry::getPassRegistry());
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
if (RegBankSelectMode != RunningMode)
@@ -110,7 +108,7 @@ void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
}
bool RegBankSelect::assignmentMatch(
- unsigned Reg, const RegisterBankInfo::ValueMapping &ValMapping,
+ Register Reg, const RegisterBankInfo::ValueMapping &ValMapping,
bool &OnlyAssign) const {
// By default we assume we will have to repair something.
OnlyAssign = false;
@@ -135,34 +133,84 @@ bool RegBankSelect::assignmentMatch(
bool RegBankSelect::repairReg(
MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
RegBankSelect::RepairingPlacement &RepairPt,
- const iterator_range<SmallVectorImpl<unsigned>::const_iterator> &NewVRegs) {
- if (ValMapping.NumBreakDowns != 1 && !TPC->isGlobalISelAbortEnabled())
- return false;
- assert(ValMapping.NumBreakDowns == 1 && "Not yet implemented");
+ const iterator_range<SmallVectorImpl<Register>::const_iterator> &NewVRegs) {
+
+ assert(ValMapping.NumBreakDowns == (unsigned)size(NewVRegs) &&
+ "need new vreg for each breakdown");
+
// An empty range of new register means no repairing.
assert(!empty(NewVRegs) && "We should not have to repair");
- // Assume we are repairing a use and thus, the original reg will be
- // the source of the repairing.
- unsigned Src = MO.getReg();
- unsigned Dst = *NewVRegs.begin();
-
- // If we repair a definition, swap the source and destination for
- // the repairing.
- if (MO.isDef())
- std::swap(Src, Dst);
-
- assert((RepairPt.getNumInsertPoints() == 1 ||
- TargetRegisterInfo::isPhysicalRegister(Dst)) &&
- "We are about to create several defs for Dst");
-
- // Build the instruction used to repair, then clone it at the right
- // places. Avoiding buildCopy bypasses the check that Src and Dst have the
- // same types because the type is a placeholder when this function is called.
- MachineInstr *MI =
- MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY).addDef(Dst).addUse(Src);
- LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
- << '\n');
+ MachineInstr *MI;
+ if (ValMapping.NumBreakDowns == 1) {
+ // Assume we are repairing a use and thus, the original reg will be
+ // the source of the repairing.
+ Register Src = MO.getReg();
+ Register Dst = *NewVRegs.begin();
+
+ // If we repair a definition, swap the source and destination for
+ // the repairing.
+ if (MO.isDef())
+ std::swap(Src, Dst);
+
+ assert((RepairPt.getNumInsertPoints() == 1 ||
+ TargetRegisterInfo::isPhysicalRegister(Dst)) &&
+ "We are about to create several defs for Dst");
+
+ // Build the instruction used to repair, then clone it at the right
+ // places. Avoiding buildCopy bypasses the check that Src and Dst have the
+ // same types because the type is a placeholder when this function is called.
+ MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY)
+ .addDef(Dst)
+ .addUse(Src);
+ LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
+ << '\n');
+ } else {
+ // TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT
+ // sequence.
+ assert(ValMapping.partsAllUniform() && "irregular breakdowns not supported");
+
+ LLT RegTy = MRI->getType(MO.getReg());
+ if (MO.isDef()) {
+ unsigned MergeOp;
+ if (RegTy.isVector()) {
+ if (ValMapping.NumBreakDowns == RegTy.getNumElements())
+ MergeOp = TargetOpcode::G_BUILD_VECTOR;
+ else {
+ assert(
+ (ValMapping.BreakDown[0].Length * ValMapping.NumBreakDowns ==
+ RegTy.getSizeInBits()) &&
+ (ValMapping.BreakDown[0].Length % RegTy.getScalarSizeInBits() ==
+ 0) &&
+ "don't understand this value breakdown");
+
+ MergeOp = TargetOpcode::G_CONCAT_VECTORS;
+ }
+ } else
+ MergeOp = TargetOpcode::G_MERGE_VALUES;
+
+ auto MergeBuilder =
+ MIRBuilder.buildInstrNoInsert(MergeOp)
+ .addDef(MO.getReg());
+
+ for (Register SrcReg : NewVRegs)
+ MergeBuilder.addUse(SrcReg);
+
+ MI = MergeBuilder;
+ } else {
+ MachineInstrBuilder UnMergeBuilder =
+ MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES);
+ for (Register DefReg : NewVRegs)
+ UnMergeBuilder.addDef(DefReg);
+
+ UnMergeBuilder.addUse(MO.getReg());
+ MI = UnMergeBuilder;
+ }
+ }
+
+ if (RepairPt.getNumInsertPoints() != 1)
+ report_fatal_error("need testcase to support multiple insertion points");
+
// TODO:
// Check if MI is legal. if not, we need to legalize all the
// instructions we are going to insert.
@@ -195,7 +243,8 @@ uint64_t RegBankSelect::getRepairCost(
const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
// If MO does not have a register bank, we should have just been
// able to set one unless we have to break the value down.
- assert((!IsSameNumOfValues || CurRegBank) && "We should not have to repair");
+ assert(CurRegBank || MO.isDef());
+
// Def: Val <- NewDefs
// Same number of values: copy
// Different number: Val = build_sequence Defs1, Defs2, ...
@@ -206,6 +255,9 @@ uint64_t RegBankSelect::getRepairCost(
// We should remember that this value is available somewhere else to
// coalesce the value.
+ if (ValMapping.NumBreakDowns != 1)
+ return RBI->getBreakDownCost(ValMapping, CurRegBank);
+
if (IsSameNumOfValues) {
const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
// If we repair a definition, swap the source and destination for
@@ -345,7 +397,7 @@ void RegBankSelect::tryAvoidingSplit(
// repairing.
// Check if this is a physical or virtual register.
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
// We are going to split every outgoing edges.
// Check that this is possible.
@@ -416,7 +468,7 @@ RegBankSelect::MappingCost RegBankSelect::computeMapping(
const MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n');
@@ -542,7 +594,7 @@ bool RegBankSelect::applyMapping(
MachineOperand &MO = MI.getOperand(OpIdx);
const RegisterBankInfo::ValueMapping &ValMapping =
InstrMapping.getOperandMapping(OpIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
switch (RepairPt.getKind()) {
case RepairingPlacement::Reassign:
@@ -605,7 +657,7 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
const Function &F = MF.getFunction();
Mode SaveOptMode = OptMode;
- if (F.hasFnAttribute(Attribute::OptimizeNone))
+ if (F.hasOptNone())
OptMode = Mode::Fast;
init(MF);
@@ -644,8 +696,21 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
"unable to map instruction", MI);
return false;
}
+
+ // It's possible the mapping changed control flow, and moved the following
+ // instruction to a new block, so figure out the new parent.
+ if (MII != End) {
+ MachineBasicBlock *NextInstBB = MII->getParent();
+ if (NextInstBB != MBB) {
+ LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n");
+ MBB = NextInstBB;
+ MIRBuilder.setMBB(*MBB);
+ End = MBB->end();
+ }
+ }
}
}
+
OptMode = SaveOptMode;
return false;
}
@@ -692,7 +757,7 @@ RegBankSelect::RepairingPlacement::RepairingPlacement(
MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB();
// Check if we can move the insertion point prior to the
// terminators of the predecessor.
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr();
for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It)
if (It->modifiesRegister(Reg, &TRI)) {
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 16f67a217ce1..4e41f338934d 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/RegisterBank.cpp - Register Bank --*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 28404e52d6ea..159422e38878 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/RegisterBankInfo.cpp --------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -81,7 +80,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
}
const RegisterBank *
-RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
+RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
if (TargetRegisterInfo::isPhysicalRegister(Reg))
return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
@@ -96,7 +95,7 @@ RegisterBankInfo::getRegBank(unsigned Reg, const MachineRegisterInfo &MRI,
}
const TargetRegisterClass &
-RegisterBankInfo::getMinimalPhysRegClass(unsigned Reg,
+RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
const TargetRegisterInfo &TRI) const {
assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
"Reg must be a physreg");
@@ -126,7 +125,7 @@ const RegisterBank *RegisterBankInfo::getRegBankFromConstraints(
}
const TargetRegisterClass *RegisterBankInfo::constrainGenericRegister(
- unsigned Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
+ Register Reg, const TargetRegisterClass &RC, MachineRegisterInfo &MRI) {
// If the register already has a class, fallback to MRI::constrainRegClass.
auto &RegClassOrBank = MRI.getRegClassOrRegBank(Reg);
@@ -181,7 +180,7 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
const MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
// The register bank of Reg is just a side effect of the current
@@ -208,19 +207,49 @@ RegisterBankInfo::getInstrMappingImpl(const MachineInstr &MI) const {
continue;
}
}
- const ValueMapping *ValMapping =
- &getValueMapping(0, getSizeInBits(Reg, MRI, TRI), *CurRegBank);
+
+ unsigned Size = getSizeInBits(Reg, MRI, TRI);
+ const ValueMapping *ValMapping = &getValueMapping(0, Size, *CurRegBank);
if (IsCopyLike) {
- OperandsMapping[0] = ValMapping;
+ if (!OperandsMapping[0]) {
+ if (MI.isRegSequence()) {
+ // For reg_sequence, the result size does not match the input.
+ unsigned ResultSize = getSizeInBits(MI.getOperand(0).getReg(),
+ MRI, TRI);
+ OperandsMapping[0] = &getValueMapping(0, ResultSize, *CurRegBank);
+ } else {
+ OperandsMapping[0] = ValMapping;
+ }
+ }
+
+ // The default handling assumes any register bank can be copied to any
+ // other. If this isn't the case, the target should specially deal with
+ // reg_sequence/phi. There may also be unsatisfiable copies.
+ for (; OpIdx != EndIdx; ++OpIdx) {
+ const MachineOperand &MO = MI.getOperand(OpIdx);
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
+ if (!Reg)
+ continue;
+
+ const RegisterBank *AltRegBank = getRegBank(Reg, MRI, TRI);
+ if (AltRegBank &&
+ cannotCopy(*CurRegBank, *AltRegBank, getSizeInBits(Reg, MRI, TRI)))
+ return getInvalidInstructionMapping();
+ }
+
CompleteMapping = true;
break;
}
+
OperandsMapping[OpIdx] = ValMapping;
}
- if (IsCopyLike && !CompleteMapping)
+ if (IsCopyLike && !CompleteMapping) {
// No way to deduce the type from what we have.
return getInvalidInstructionMapping();
+ }
assert(CompleteMapping && "Setting an uncomplete mapping");
return getInstructionMapping(
@@ -363,11 +392,8 @@ RegisterBankInfo::getInstructionMappingImpl(
++NumInstructionMappingsCreated;
auto &InstrMapping = MapOfInstructionMappings[Hash];
- if (IsInvalid)
- InstrMapping = llvm::make_unique<InstructionMapping>();
- else
- InstrMapping = llvm::make_unique<InstructionMapping>(
- ID, Cost, OperandsMapping, NumOperands);
+ InstrMapping = llvm::make_unique<InstructionMapping>(
+ ID, Cost, OperandsMapping, NumOperands);
return *InstrMapping;
}
@@ -382,8 +408,12 @@ RegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
RegisterBankInfo::InstructionMappings
RegisterBankInfo::getInstrPossibleMappings(const MachineInstr &MI) const {
InstructionMappings PossibleMappings;
- // Put the default mapping first.
- PossibleMappings.push_back(&getInstrMapping(MI));
+ const auto &Mapping = getInstrMapping(MI);
+ if (Mapping.isValid()) {
+ // Put the default mapping first.
+ PossibleMappings.push_back(&Mapping);
+ }
+
// Then the alternative mapping, if any.
InstructionMappings AltMappings = getInstrAlternativeMappings(MI);
for (const InstructionMapping *AltMapping : AltMappings)
@@ -424,14 +454,14 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
assert(OpdMapper.getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns ==
1 &&
"This mapping is too complex for this function");
- iterator_range<SmallVectorImpl<unsigned>::const_iterator> NewRegs =
+ iterator_range<SmallVectorImpl<Register>::const_iterator> NewRegs =
OpdMapper.getVRegs(OpIdx);
if (empty(NewRegs)) {
LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
- unsigned OrigReg = MO.getReg();
- unsigned NewReg = *NewRegs.begin();
+ Register OrigReg = MO.getReg();
+ Register NewReg = *NewRegs.begin();
LLVM_DEBUG(dbgs() << " changed, replace " << printReg(OrigReg, nullptr));
MO.setReg(NewReg);
LLVM_DEBUG(dbgs() << " with " << printReg(NewReg, nullptr));
@@ -456,7 +486,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
}
}
-unsigned RegisterBankInfo::getSizeInBits(unsigned Reg,
+unsigned RegisterBankInfo::getSizeInBits(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -498,6 +528,19 @@ void RegisterBankInfo::PartialMapping::print(raw_ostream &OS) const {
OS << "nullptr";
}
+bool RegisterBankInfo::ValueMapping::partsAllUniform() const {
+ if (NumBreakDowns < 2)
+ return true;
+
+ const PartialMapping *First = begin();
+ for (const PartialMapping *Part = First + 1; Part != end(); ++Part) {
+ if (Part->Length != First->Length || Part->RegBank != First->RegBank)
+ return false;
+ }
+
+ return true;
+}
+
bool RegisterBankInfo::ValueMapping::verify(unsigned MeaningfulBitWidth) const {
assert(NumBreakDowns && "Value mapped nowhere?!");
unsigned OrigValueBitWidth = 0;
@@ -565,7 +608,7 @@ bool RegisterBankInfo::InstructionMapping::verify(
"We should not care about non-reg mapping");
continue;
}
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
assert(getOperandMapping(Idx).isValid() &&
@@ -610,7 +653,7 @@ RegisterBankInfo::OperandsMapper::OperandsMapper(
assert(InstrMapping.verify(MI) && "Invalid mapping for MI");
}
-iterator_range<SmallVectorImpl<unsigned>::iterator>
+iterator_range<SmallVectorImpl<Register>::iterator>
RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
unsigned NumPartialVal =
@@ -626,18 +669,18 @@ RegisterBankInfo::OperandsMapper::getVRegsMem(unsigned OpIdx) {
for (unsigned i = 0; i < NumPartialVal; ++i)
NewVRegs.push_back(0);
}
- SmallVectorImpl<unsigned>::iterator End =
+ SmallVectorImpl<Register>::iterator End =
getNewVRegsEnd(StartIdx, NumPartialVal);
return make_range(&NewVRegs[StartIdx], End);
}
-SmallVectorImpl<unsigned>::const_iterator
+SmallVectorImpl<Register>::const_iterator
RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
unsigned NumVal) const {
return const_cast<OperandsMapper *>(this)->getNewVRegsEnd(StartIdx, NumVal);
}
-SmallVectorImpl<unsigned>::iterator
+SmallVectorImpl<Register>::iterator
RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
unsigned NumVal) {
assert((NewVRegs.size() == StartIdx + NumVal ||
@@ -649,11 +692,11 @@ RegisterBankInfo::OperandsMapper::getNewVRegsEnd(unsigned StartIdx,
void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
- iterator_range<SmallVectorImpl<unsigned>::iterator> NewVRegsForOpIdx =
+ iterator_range<SmallVectorImpl<Register>::iterator> NewVRegsForOpIdx =
getVRegsMem(OpIdx);
const ValueMapping &ValMapping = getInstrMapping().getOperandMapping(OpIdx);
const PartialMapping *PartMap = ValMapping.begin();
- for (unsigned &NewVReg : NewVRegsForOpIdx) {
+ for (Register &NewVReg : NewVRegsForOpIdx) {
assert(PartMap != ValMapping.end() && "Out-of-bound access");
assert(NewVReg == 0 && "Register has already been created");
// The new registers are always bound to scalar with the right size.
@@ -669,7 +712,7 @@ void RegisterBankInfo::OperandsMapper::createVRegs(unsigned OpIdx) {
void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
unsigned PartialMapIdx,
- unsigned NewVReg) {
+ Register NewVReg) {
assert(OpIdx < getInstrMapping().getNumOperands() && "Out-of-bound access");
assert(getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns >
PartialMapIdx &&
@@ -681,7 +724,7 @@ void RegisterBankInfo::OperandsMapper::setVRegs(unsigned OpIdx,
NewVRegs[OpToNewVRegIdx[OpIdx] + PartialMapIdx] = NewVReg;
}
-iterator_range<SmallVectorImpl<unsigned>::const_iterator>
+iterator_range<SmallVectorImpl<Register>::const_iterator>
RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
bool ForDebug) const {
(void)ForDebug;
@@ -693,12 +736,12 @@ RegisterBankInfo::OperandsMapper::getVRegs(unsigned OpIdx,
unsigned PartMapSize =
getInstrMapping().getOperandMapping(OpIdx).NumBreakDowns;
- SmallVectorImpl<unsigned>::const_iterator End =
+ SmallVectorImpl<Register>::const_iterator End =
getNewVRegsEnd(StartIdx, PartMapSize);
- iterator_range<SmallVectorImpl<unsigned>::const_iterator> Res =
+ iterator_range<SmallVectorImpl<Register>::const_iterator> Res =
make_range(&NewVRegs[StartIdx], End);
#ifndef NDEBUG
- for (unsigned VReg : Res)
+ for (Register VReg : Res)
assert((VReg || ForDebug) && "Some registers are uninitialized");
#endif
return Res;
@@ -747,7 +790,7 @@ void RegisterBankInfo::OperandsMapper::print(raw_ostream &OS,
IsFirst = false;
OS << '(' << printReg(getMI().getOperand(Idx).getReg(), TRI) << ", [";
bool IsFirstNewVReg = true;
- for (unsigned VReg : getVRegs(Idx)) {
+ for (Register VReg : getVRegs(Idx)) {
if (!IsFirstNewVReg)
OS << ", ";
IsFirstNewVReg = false;
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 59cbf93e7cd1..766ea1d60bac 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/GlobalISel/Utils.cpp -------------------------*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file This file implements the utility functions used by the GlobalISel
@@ -30,16 +29,10 @@ using namespace llvm;
unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
const TargetInstrInfo &TII,
- const RegisterBankInfo &RBI,
- MachineInstr &InsertPt, unsigned Reg,
+ const RegisterBankInfo &RBI, unsigned Reg,
const TargetRegisterClass &RegClass) {
- if (!RBI.constrainGenericRegister(Reg, RegClass, MRI)) {
- unsigned NewReg = MRI.createVirtualRegister(&RegClass);
- BuildMI(*InsertPt.getParent(), InsertPt, InsertPt.getDebugLoc(),
- TII.get(TargetOpcode::COPY), NewReg)
- .addReg(Reg);
- return NewReg;
- }
+ if (!RBI.constrainGenericRegister(Reg, RegClass, MRI))
+ return MRI.createVirtualRegister(&RegClass);
return Reg;
}
@@ -47,6 +40,37 @@ unsigned llvm::constrainRegToClass(MachineRegisterInfo &MRI,
unsigned llvm::constrainOperandRegClass(
const MachineFunction &MF, const TargetRegisterInfo &TRI,
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
+ const RegisterBankInfo &RBI, MachineInstr &InsertPt,
+ const TargetRegisterClass &RegClass, const MachineOperand &RegMO,
+ unsigned OpIdx) {
+ unsigned Reg = RegMO.getReg();
+ // Assume physical registers are properly constrained.
+ assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ "PhysReg not implemented");
+
+ unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
+ // If we created a new virtual register because the class is not compatible
+ // then create a copy between the new and the old register.
+ if (ConstrainedReg != Reg) {
+ MachineBasicBlock::iterator InsertIt(&InsertPt);
+ MachineBasicBlock &MBB = *InsertPt.getParent();
+ if (RegMO.isUse()) {
+ BuildMI(MBB, InsertIt, InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), ConstrainedReg)
+ .addReg(Reg);
+ } else {
+ assert(RegMO.isDef() && "Must be a definition");
+ BuildMI(MBB, std::next(InsertIt), InsertPt.getDebugLoc(),
+ TII.get(TargetOpcode::COPY), Reg)
+ .addReg(ConstrainedReg);
+ }
+ }
+ return ConstrainedReg;
+}
+
+unsigned llvm::constrainOperandRegClass(
+ const MachineFunction &MF, const TargetRegisterInfo &TRI,
+ MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
const MachineOperand &RegMO, unsigned OpIdx) {
unsigned Reg = RegMO.getReg();
@@ -82,7 +106,8 @@ unsigned llvm::constrainOperandRegClass(
// and they never reach this function.
return Reg;
}
- return constrainRegToClass(MRI, TII, RBI, InsertPt, Reg, *RegClass);
+ return constrainOperandRegClass(MF, TRI, MRI, TII, RBI, InsertPt, *RegClass,
+ RegMO, OpIdx);
}
bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
@@ -184,18 +209,71 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
const MachineRegisterInfo &MRI) {
- MachineInstr *MI = MRI.getVRegDef(VReg);
- if (MI->getOpcode() != TargetOpcode::G_CONSTANT)
+ Optional<ValueAndVReg> ValAndVReg =
+ getConstantVRegValWithLookThrough(VReg, MRI, /*LookThroughInstrs*/ false);
+ assert((!ValAndVReg || ValAndVReg->VReg == VReg) &&
+ "Value found while looking through instrs");
+ if (!ValAndVReg)
+ return None;
+ return ValAndVReg->Value;
+}
+
+Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
+ unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
+ MachineInstr *MI;
+ while ((MI = MRI.getVRegDef(VReg)) &&
+ MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) {
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_SEXT:
+ case TargetOpcode::G_ZEXT:
+ SeenOpcodes.push_back(std::make_pair(
+ MI->getOpcode(),
+ MRI.getType(MI->getOperand(0).getReg()).getSizeInBits()));
+ VReg = MI->getOperand(1).getReg();
+ break;
+ case TargetOpcode::COPY:
+ VReg = MI->getOperand(1).getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(VReg))
+ return None;
+ break;
+ case TargetOpcode::G_INTTOPTR:
+ VReg = MI->getOperand(1).getReg();
+ break;
+ default:
+ return None;
+ }
+ }
+ if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT ||
+ (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm()))
return None;
- if (MI->getOperand(1).isImm())
- return MI->getOperand(1).getImm();
+ const MachineOperand &CstVal = MI->getOperand(1);
+ unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits();
+ APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
+ : CstVal.getCImm()->getValue();
+ assert(Val.getBitWidth() == BitWidth &&
+ "Value bitwidth doesn't match definition type");
+ while (!SeenOpcodes.empty()) {
+ std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val();
+ switch (OpcodeAndSize.first) {
+ case TargetOpcode::G_TRUNC:
+ Val = Val.trunc(OpcodeAndSize.second);
+ break;
+ case TargetOpcode::G_SEXT:
+ Val = Val.sext(OpcodeAndSize.second);
+ break;
+ case TargetOpcode::G_ZEXT:
+ Val = Val.zext(OpcodeAndSize.second);
+ break;
+ }
+ }
- if (MI->getOperand(1).isCImm() &&
- MI->getOperand(1).getCImm()->getBitWidth() <= 64)
- return MI->getOperand(1).getCImm()->getSExtValue();
+ if (Val.getBitWidth() > 64)
+ return None;
- return None;
+ return ValueAndVReg{Val.getSExtValue(), VReg};
}
const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
@@ -206,8 +284,8 @@ const llvm::ConstantFP* llvm::getConstantFPVRegVal(unsigned VReg,
return MI->getOperand(1).getFPImm();
}
-llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,
- const MachineRegisterInfo &MRI) {
+llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
+ const MachineRegisterInfo &MRI) {
auto *DefMI = MRI.getVRegDef(Reg);
auto DstTy = MRI.getType(DefMI->getOperand(0).getReg());
if (!DstTy.isValid())
@@ -219,7 +297,13 @@ llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, unsigned Reg,
break;
DefMI = MRI.getVRegDef(SrcReg);
}
- return DefMI->getOpcode() == Opcode ? DefMI : nullptr;
+ return DefMI;
+}
+
+llvm::MachineInstr *llvm::getOpcodeDef(unsigned Opcode, Register Reg,
+ const MachineRegisterInfo &MRI) {
+ MachineInstr *DefMI = getDefIgnoringCopies(Reg, MRI);
+ return DefMI && DefMI->getOpcode() == Opcode ? DefMI : nullptr;
}
APFloat llvm::getAPFloatFromSize(double Val, unsigned Size) {
@@ -286,6 +370,31 @@ Optional<APInt> llvm::ConstantFoldBinOp(unsigned Opcode, const unsigned Op1,
return None;
}
+bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
+ bool SNaN) {
+ const MachineInstr *DefMI = MRI.getVRegDef(Val);
+ if (!DefMI)
+ return false;
+
+ if (DefMI->getFlag(MachineInstr::FmNoNans))
+ return true;
+
+ if (SNaN) {
+ // FP operations quiet. For now, just handle the ones inserted during
+ // legalization.
+ switch (DefMI->getOpcode()) {
+ case TargetOpcode::G_FPEXT:
+ case TargetOpcode::G_FPTRUNC:
+ case TargetOpcode::G_FCANONICALIZE:
+ return true;
+ default:
+ return false;
+ }
+ }
+
+ return false;
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index d3364952f244..09201c2e7bae 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -1,9 +1,8 @@
//===- GlobalMerge.cpp - Internal globals merging -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -220,11 +219,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Module &M, bool isConst, unsigned AddrSpace) const {
auto &DL = M.getDataLayout();
// FIXME: Find better heuristics
- std::stable_sort(Globals.begin(), Globals.end(),
- [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
- return DL.getTypeAllocSize(GV1->getValueType()) <
- DL.getTypeAllocSize(GV2->getValueType());
- });
+ llvm::stable_sort(
+ Globals, [&DL](const GlobalVariable *GV1, const GlobalVariable *GV2) {
+ return DL.getTypeAllocSize(GV1->getValueType()) <
+ DL.getTypeAllocSize(GV2->getValueType());
+ });
// If we want to just blindly group all globals together, do so.
if (!GlobalMergeGroupByUse) {
@@ -331,7 +330,7 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
Function *ParentFn = I->getParent()->getParent();
// If we're only optimizing for size, ignore non-minsize functions.
- if (OnlyOptimizeForSize && !ParentFn->optForMinSize())
+ if (OnlyOptimizeForSize && !ParentFn->hasMinSize())
continue;
size_t UGSIdx = GlobalUsesByFunction[ParentFn];
@@ -386,11 +385,11 @@ bool GlobalMerge::doMerge(SmallVectorImpl<GlobalVariable*> &Globals,
//
// Multiply that by the size of the set to give us a crude profitability
// metric.
- std::stable_sort(UsedGlobalSets.begin(), UsedGlobalSets.end(),
- [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
- return UGS1.Globals.count() * UGS1.UsageCount <
- UGS2.Globals.count() * UGS2.UsageCount;
- });
+ llvm::stable_sort(UsedGlobalSets,
+ [](const UsedGlobalSet &UGS1, const UsedGlobalSet &UGS2) {
+ return UGS1.Globals.count() * UGS1.UsageCount <
+ UGS2.Globals.count() * UGS2.UsageCount;
+ });
// We can choose to merge all globals together, but ignore globals never used
// with another global. This catches the obviously non-profitable cases of
diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp
new file mode 100644
index 000000000000..5f57cabbe865
--- /dev/null
+++ b/lib/CodeGen/HardwareLoops.cpp
@@ -0,0 +1,463 @@
+//===-- HardwareLoops.cpp - Target Independent Hardware Loops --*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// Insert hardware loop intrinsics into loops which are deemed profitable by
+/// the target, by querying TargetTransformInfo. A hardware loop comprises of
+/// two intrinsics: one, outside the loop, to set the loop iteration count and
+/// another, in the exit block, to decrement the counter. The decremented value
+/// can either be carried through the loop via a phi or handled in some opaque
+/// way by the target.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Pass.h"
+#include "llvm/PassRegistry.h"
+#include "llvm/PassSupport.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionExpander.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+
+#define DEBUG_TYPE "hardware-loops"
+
+#define HW_LOOPS_NAME "Hardware Loop Insertion"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ForceHardwareLoops("force-hardware-loops", cl::Hidden, cl::init(false),
+ cl::desc("Force hardware loops intrinsics to be inserted"));
+
+static cl::opt<bool>
+ForceHardwareLoopPHI(
+ "force-hardware-loop-phi", cl::Hidden, cl::init(false),
+ cl::desc("Force hardware loop counter to be updated through a phi"));
+
+static cl::opt<bool>
+ForceNestedLoop("force-nested-hardware-loop", cl::Hidden, cl::init(false),
+ cl::desc("Force allowance of nested hardware loops"));
+
+static cl::opt<unsigned>
+LoopDecrement("hardware-loop-decrement", cl::Hidden, cl::init(1),
+ cl::desc("Set the loop decrement value"));
+
+static cl::opt<unsigned>
+CounterBitWidth("hardware-loop-counter-bitwidth", cl::Hidden, cl::init(32),
+ cl::desc("Set the loop counter bitwidth"));
+
+static cl::opt<bool>
+ForceGuardLoopEntry(
+ "force-hardware-loop-guard", cl::Hidden, cl::init(false),
+ cl::desc("Force generation of loop guard intrinsic"));
+
+STATISTIC(NumHWLoops, "Number of loops converted to hardware loops");
+
+namespace {
+
+ using TTI = TargetTransformInfo;
+
+ class HardwareLoops : public FunctionPass {
+ public:
+ static char ID;
+
+ HardwareLoops() : FunctionPass(ID) {
+ initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnFunction(Function &F) override;
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<LoopInfoWrapperPass>();
+ AU.addPreserved<LoopInfoWrapperPass>();
+ AU.addRequired<DominatorTreeWrapperPass>();
+ AU.addPreserved<DominatorTreeWrapperPass>();
+ AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addRequired<AssumptionCacheTracker>();
+ AU.addRequired<TargetTransformInfoWrapperPass>();
+ }
+
+ // Try to convert the given Loop into a hardware loop.
+ bool TryConvertLoop(Loop *L);
+
+ // Given that the target believes the loop to be profitable, try to
+ // convert it.
+ bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
+
+ private:
+ ScalarEvolution *SE = nullptr;
+ LoopInfo *LI = nullptr;
+ const DataLayout *DL = nullptr;
+ const TargetTransformInfo *TTI = nullptr;
+ DominatorTree *DT = nullptr;
+ bool PreserveLCSSA = false;
+ AssumptionCache *AC = nullptr;
+ TargetLibraryInfo *LibInfo = nullptr;
+ Module *M = nullptr;
+ bool MadeChange = false;
+ };
+
+ class HardwareLoop {
+ // Expand the trip count scev into a value that we can use.
+ Value *InitLoopCount();
+
+ // Insert the set_loop_iteration intrinsic.
+ void InsertIterationSetup(Value *LoopCountInit);
+
+ // Insert the loop_decrement intrinsic.
+ void InsertLoopDec();
+
+ // Insert the loop_decrement_reg intrinsic.
+ Instruction *InsertLoopRegDec(Value *EltsRem);
+
+ // If the target requires the counter value to be updated in the loop,
+ // insert a phi to hold the value. The intended purpose is for use by
+ // loop_decrement_reg.
+ PHINode *InsertPHICounter(Value *NumElts, Value *EltsRem);
+
+ // Create a new cmp, that checks the returned value of loop_decrement*,
+ // and update the exit branch to use it.
+ void UpdateBranch(Value *EltsRem);
+
+ public:
+ HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
+ const DataLayout &DL) :
+ SE(SE), DL(DL), L(Info.L), M(L->getHeader()->getModule()),
+ ExitCount(Info.ExitCount),
+ CountType(Info.CountType),
+ ExitBranch(Info.ExitBranch),
+ LoopDecrement(Info.LoopDecrement),
+ UsePHICounter(Info.CounterInReg),
+ UseLoopGuard(Info.PerformEntryTest) { }
+
+ void Create();
+
+ private:
+ ScalarEvolution &SE;
+ const DataLayout &DL;
+ Loop *L = nullptr;
+ Module *M = nullptr;
+ const SCEV *ExitCount = nullptr;
+ Type *CountType = nullptr;
+ BranchInst *ExitBranch = nullptr;
+ Value *LoopDecrement = nullptr;
+ bool UsePHICounter = false;
+ bool UseLoopGuard = false;
+ BasicBlock *BeginBB = nullptr;
+ };
+}
+
+char HardwareLoops::ID = 0;
+
+bool HardwareLoops::runOnFunction(Function &F) {
+ if (skipFunction(F))
+ return false;
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
+
+ LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ DL = &F.getParent()->getDataLayout();
+ auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
+ LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+ PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+ AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ M = F.getParent();
+
+ for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) {
+ Loop *L = *I;
+ if (!L->getParentLoop())
+ TryConvertLoop(L);
+ }
+
+ return MadeChange;
+}
+
+// Return true if the search should stop, which will be when an inner loop is
+// converted and the parent loop doesn't support containing a hardware loop.
+bool HardwareLoops::TryConvertLoop(Loop *L) {
+ // Process nested loops first.
+ for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+ if (TryConvertLoop(*I))
+ return true; // Stop search.
+
+ HardwareLoopInfo HWLoopInfo(L);
+ if (!HWLoopInfo.canAnalyze(*LI))
+ return false;
+
+ if (TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo) ||
+ ForceHardwareLoops) {
+
+ // Allow overriding of the counter width and loop decrement value.
+ if (CounterBitWidth.getNumOccurrences())
+ HWLoopInfo.CountType =
+ IntegerType::get(M->getContext(), CounterBitWidth);
+
+ if (LoopDecrement.getNumOccurrences())
+ HWLoopInfo.LoopDecrement =
+ ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+
+ MadeChange |= TryConvertLoop(HWLoopInfo);
+ return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
+ }
+
+ return false;
+}
+
+bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
+
+ Loop *L = HWLoopInfo.L;
+ LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
+
+ if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
+ ForceHardwareLoopPHI))
+ return false;
+
+ assert(
+ (HWLoopInfo.ExitBlock && HWLoopInfo.ExitBranch && HWLoopInfo.ExitCount) &&
+ "Hardware Loop must have set exit info.");
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+
+ // If we don't have a preheader, then insert one.
+ if (!Preheader)
+ Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
+ if (!Preheader)
+ return false;
+
+ HardwareLoop HWLoop(HWLoopInfo, *SE, *DL);
+ HWLoop.Create();
+ ++NumHWLoops;
+ return true;
+}
+
+void HardwareLoop::Create() {
+ LLVM_DEBUG(dbgs() << "HWLoops: Converting loop..\n");
+
+ Value *LoopCountInit = InitLoopCount();
+ if (!LoopCountInit)
+ return;
+
+ InsertIterationSetup(LoopCountInit);
+
+ if (UsePHICounter || ForceHardwareLoopPHI) {
+ Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
+ Value *EltsRem = InsertPHICounter(LoopCountInit, LoopDec);
+ LoopDec->setOperand(0, EltsRem);
+ UpdateBranch(LoopDec);
+ } else
+ InsertLoopDec();
+
+ // Run through the basic blocks of the loop and see if any of them have dead
+ // PHIs that can be removed.
+ for (auto I : L->blocks())
+ DeleteDeadPHIs(I);
+}
+
+static bool CanGenerateTest(Loop *L, Value *Count) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader->getSinglePredecessor())
+ return false;
+
+ BasicBlock *Pred = Preheader->getSinglePredecessor();
+ if (!isa<BranchInst>(Pred->getTerminator()))
+ return false;
+
+ auto *BI = cast<BranchInst>(Pred->getTerminator());
+ if (BI->isUnconditional() || !isa<ICmpInst>(BI->getCondition()))
+ return false;
+
+ // Check that the icmp is checking for equality of Count and zero and that
+ // a non-zero value results in entering the loop.
+ auto ICmp = cast<ICmpInst>(BI->getCondition());
+ LLVM_DEBUG(dbgs() << " - Found condition: " << *ICmp << "\n");
+ if (!ICmp->isEquality())
+ return false;
+
+ auto IsCompareZero = [](ICmpInst *ICmp, Value *Count, unsigned OpIdx) {
+ if (auto *Const = dyn_cast<ConstantInt>(ICmp->getOperand(OpIdx)))
+ return Const->isZero() && ICmp->getOperand(OpIdx ^ 1) == Count;
+ return false;
+ };
+
+ if (!IsCompareZero(ICmp, Count, 0) && !IsCompareZero(ICmp, Count, 1))
+ return false;
+
+ unsigned SuccIdx = ICmp->getPredicate() == ICmpInst::ICMP_NE ? 0 : 1;
+ if (BI->getSuccessor(SuccIdx) != Preheader)
+ return false;
+
+ return true;
+}
+
+Value *HardwareLoop::InitLoopCount() {
+ LLVM_DEBUG(dbgs() << "HWLoops: Initialising loop counter value:\n");
+ // Can we replace a conditional branch with an intrinsic that sets the
+ // loop counter and tests that is not zero?
+
+ SCEVExpander SCEVE(SE, DL, "loopcnt");
+ if (!ExitCount->getType()->isPointerTy() &&
+ ExitCount->getType() != CountType)
+ ExitCount = SE.getZeroExtendExpr(ExitCount, CountType);
+
+ ExitCount = SE.getAddExpr(ExitCount, SE.getOne(CountType));
+
+ // If we're trying to use the 'test and set' form of the intrinsic, we need
+ // to replace a conditional branch that is controlling entry to the loop. It
+ // is likely (guaranteed?) that the preheader has an unconditional branch to
+ // the loop header, so also check if it has a single predecessor.
+ if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
+ SE.getZero(ExitCount->getType()))) {
+ LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
+ UseLoopGuard |= ForceGuardLoopEntry;
+ } else
+ UseLoopGuard = false;
+
+ BasicBlock *BB = L->getLoopPreheader();
+ if (UseLoopGuard && BB->getSinglePredecessor() &&
+ cast<BranchInst>(BB->getTerminator())->isUnconditional())
+ BB = BB->getSinglePredecessor();
+
+ if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
+ LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
+ << *ExitCount << "\n");
+ return nullptr;
+ }
+
+ Value *Count = SCEVE.expandCodeFor(ExitCount, CountType,
+ BB->getTerminator());
+
+ // FIXME: We've expanded Count where we hope to insert the counter setting
+ // intrinsic. But, in the case of the 'test and set' form, we may fallback to
+ // the just 'set' form and in which case the insertion block is most likely
+ // different. It means there will be instruction(s) in a block that possibly
+ // aren't needed. The isLoopEntryGuardedByCond is trying to avoid this issue,
+ // but it's doesn't appear to work in all cases.
+
+ UseLoopGuard = UseLoopGuard && CanGenerateTest(L, Count);
+ BeginBB = UseLoopGuard ? BB : L->getLoopPreheader();
+ LLVM_DEBUG(dbgs() << " - Loop Count: " << *Count << "\n"
+ << " - Expanded Count in " << BB->getName() << "\n"
+ << " - Will insert set counter intrinsic into: "
+ << BeginBB->getName() << "\n");
+ return Count;
+}
+
+void HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
+ IRBuilder<> Builder(BeginBB->getTerminator());
+ Type *Ty = LoopCountInit->getType();
+ Intrinsic::ID ID = UseLoopGuard ?
+ Intrinsic::test_set_loop_iterations : Intrinsic::set_loop_iterations;
+ Function *LoopIter = Intrinsic::getDeclaration(M, ID, Ty);
+ Value *SetCount = Builder.CreateCall(LoopIter, LoopCountInit);
+
+ // Use the return value of the intrinsic to control the entry of the loop.
+ if (UseLoopGuard) {
+ assert((isa<BranchInst>(BeginBB->getTerminator()) &&
+ cast<BranchInst>(BeginBB->getTerminator())->isConditional()) &&
+ "Expected conditional branch");
+ auto *LoopGuard = cast<BranchInst>(BeginBB->getTerminator());
+ LoopGuard->setCondition(SetCount);
+ if (LoopGuard->getSuccessor(0) != L->getLoopPreheader())
+ LoopGuard->swapSuccessors();
+ }
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop counter: "
+ << *SetCount << "\n");
+}
+
+void HardwareLoop::InsertLoopDec() {
+ IRBuilder<> CondBuilder(ExitBranch);
+
+ Function *DecFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::loop_decrement,
+ LoopDecrement->getType());
+ Value *Ops[] = { LoopDecrement };
+ Value *NewCond = CondBuilder.CreateCall(DecFunc, Ops);
+ Value *OldCond = ExitBranch->getCondition();
+ ExitBranch->setCondition(NewCond);
+
+ // The false branch must exit the loop.
+ if (!L->contains(ExitBranch->getSuccessor(0)))
+ ExitBranch->swapSuccessors();
+
+ // The old condition may be dead now, and may have even created a dead PHI
+ // (the original induction variable).
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *NewCond << "\n");
+}
+
+Instruction* HardwareLoop::InsertLoopRegDec(Value *EltsRem) {
+ IRBuilder<> CondBuilder(ExitBranch);
+
+ Function *DecFunc =
+ Intrinsic::getDeclaration(M, Intrinsic::loop_decrement_reg,
+ { EltsRem->getType(), EltsRem->getType(),
+ LoopDecrement->getType()
+ });
+ Value *Ops[] = { EltsRem, LoopDecrement };
+ Value *Call = CondBuilder.CreateCall(DecFunc, Ops);
+
+ LLVM_DEBUG(dbgs() << "HWLoops: Inserted loop dec: " << *Call << "\n");
+ return cast<Instruction>(Call);
+}
+
+PHINode* HardwareLoop::InsertPHICounter(Value *NumElts, Value *EltsRem) {
+ BasicBlock *Preheader = L->getLoopPreheader();
+ BasicBlock *Header = L->getHeader();
+ BasicBlock *Latch = ExitBranch->getParent();
+ IRBuilder<> Builder(Header->getFirstNonPHI());
+ PHINode *Index = Builder.CreatePHI(NumElts->getType(), 2);
+ Index->addIncoming(NumElts, Preheader);
+ Index->addIncoming(EltsRem, Latch);
+ LLVM_DEBUG(dbgs() << "HWLoops: PHI Counter: " << *Index << "\n");
+ return Index;
+}
+
+void HardwareLoop::UpdateBranch(Value *EltsRem) {
+ IRBuilder<> CondBuilder(ExitBranch);
+ Value *NewCond =
+ CondBuilder.CreateICmpNE(EltsRem, ConstantInt::get(EltsRem->getType(), 0));
+ Value *OldCond = ExitBranch->getCondition();
+ ExitBranch->setCondition(NewCond);
+
+ // The false branch must exit the loop.
+ if (!L->contains(ExitBranch->getSuccessor(0)))
+ ExitBranch->swapSuccessors();
+
+ // The old condition may be dead now, and may have even created a dead PHI
+ // (the original induction variable).
+ RecursivelyDeleteTriviallyDeadInstructions(OldCond);
+}
+
+INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
+INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+
+FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index ceeba639ee09..b17a253fe23f 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -1,9 +1,8 @@
//===- IfConversion.cpp - Machine code if conversion pass -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1317,7 +1316,7 @@ void IfConverter::AnalyzeBlocks(
AnalyzeBlock(MBB, Tokens);
// Sort to favor more complex ifcvt scheme.
- std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp);
+ llvm::stable_sort(Tokens, IfcvtTokenCmp);
}
/// Returns true either if ToMBB is the next block after MBB or that all the
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index f411ee6745d0..1e82ea659617 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -1,9 +1,8 @@
//===- ImplicitNullChecks.cpp - Fold null checks into memory accesses -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -181,7 +180,8 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// Returns AR_NoAlias if \p MI memory operation does not alias with
/// \p PrevMI, AR_MayAlias if they may alias and AR_WillAliasEverything if
/// they may alias and any further memory operation may alias with \p PrevMI.
- AliasResult areMemoryOpsAliased(MachineInstr &MI, MachineInstr *PrevMI);
+ AliasResult areMemoryOpsAliased(const MachineInstr &MI,
+ const MachineInstr *PrevMI) const;
enum SuitabilityResult {
SR_Suitable,
@@ -195,7 +195,8 @@ class ImplicitNullChecks : public MachineFunctionPass {
/// no sense to continue lookup due to any other instruction will not be able
/// to be used. \p PrevInsts is the set of instruction seen since
/// the explicit null check on \p PointerReg.
- SuitabilityResult isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ SuitabilityResult isSuitableMemoryOp(const MachineInstr &MI,
+ unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts);
/// Return true if \p FaultingMI can be hoisted from after the
@@ -228,7 +229,8 @@ public:
} // end anonymous namespace
bool ImplicitNullChecks::canHandle(const MachineInstr *MI) {
- if (MI->isCall() || MI->hasUnmodeledSideEffects())
+ if (MI->isCall() || MI->mayRaiseFPException() ||
+ MI->hasUnmodeledSideEffects())
return false;
auto IsRegMask = [](const MachineOperand &MO) { return MO.isRegMask(); };
(void)IsRegMask;
@@ -319,8 +321,8 @@ static bool AnyAliasLiveIn(const TargetRegisterInfo *TRI,
}
ImplicitNullChecks::AliasResult
-ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
- MachineInstr *PrevMI) {
+ImplicitNullChecks::areMemoryOpsAliased(const MachineInstr &MI,
+ const MachineInstr *PrevMI) const {
// If it is not memory access, skip the check.
if (!(PrevMI->mayStore() || PrevMI->mayLoad()))
return AR_NoAlias;
@@ -357,10 +359,11 @@ ImplicitNullChecks::areMemoryOpsAliased(MachineInstr &MI,
}
ImplicitNullChecks::SuitabilityResult
-ImplicitNullChecks::isSuitableMemoryOp(MachineInstr &MI, unsigned PointerReg,
+ImplicitNullChecks::isSuitableMemoryOp(const MachineInstr &MI,
+ unsigned PointerReg,
ArrayRef<MachineInstr *> PrevInsts) {
int64_t Offset;
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI) ||
!BaseOp->isReg() || BaseOp->getReg() != PointerReg)
diff --git a/lib/CodeGen/IndirectBrExpandPass.cpp b/lib/CodeGen/IndirectBrExpandPass.cpp
index 7b05ebf820fd..7ac093ba4a71 100644
--- a/lib/CodeGen/IndirectBrExpandPass.cpp
+++ b/lib/CodeGen/IndirectBrExpandPass.cpp
@@ -1,9 +1,8 @@
//===- IndirectBrExpandPass.cpp - Expand indirectbr to switch -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -149,11 +148,9 @@ bool IndirectBrExpandPass::runOnFunction(Function &F) {
ConstantInt *BBIndexC = ConstantInt::get(ITy, BBIndex);
// Now rewrite the blockaddress to an integer constant based on the index.
- // FIXME: We could potentially preserve the uses as arguments to inline asm.
- // This would allow some uses such as diagnostic information in crashes to
- // have higher quality even when this transform is enabled, but would break
- // users that round-trip blockaddresses through inline assembly and then
- // back into an indirectbr.
+ // FIXME: This part doesn't properly recognize other uses of blockaddress
+ // expressions, for instance, where they are used to pass labels to
+ // asm-goto. This part of the pass needs a rework.
BA->replaceAllUsesWith(ConstantExpr::getIntToPtr(BBIndexC, BA->getType()));
}
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 007e9283d833..41ae8061a917 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -1,9 +1,8 @@
//===- InlineSpiller.cpp - Insert spills and restores inline --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -76,6 +75,10 @@ STATISTIC(NumRemats, "Number of rematerialized defs for spilling");
static cl::opt<bool> DisableHoisting("disable-spill-hoist", cl::Hidden,
cl::desc("Disable inline spill hoisting"));
+static cl::opt<bool>
+RestrictStatepointRemat("restrict-statepoint-remat",
+ cl::init(false), cl::Hidden,
+ cl::desc("Restrict remat for statepoint operands"));
namespace {
@@ -215,6 +218,7 @@ private:
void eliminateRedundantSpills(LiveInterval &LI, VNInfo *VNI);
void markValueUsed(LiveInterval*, VNInfo*);
+ bool canGuaranteeAssignmentAfterRemat(unsigned VReg, MachineInstr &MI);
bool reMaterializeFor(LiveInterval &, MachineInstr &MI);
void reMaterializeAll();
@@ -514,6 +518,28 @@ void InlineSpiller::markValueUsed(LiveInterval *LI, VNInfo *VNI) {
} while (!WorkList.empty());
}
+bool InlineSpiller::canGuaranteeAssignmentAfterRemat(unsigned VReg,
+ MachineInstr &MI) {
+ if (!RestrictStatepointRemat)
+ return true;
+ // Here's a quick explanation of the problem we're trying to handle here:
+ // * There are some pseudo instructions with more vreg uses than there are
+ // physical registers on the machine.
+ // * This is normally handled by spilling the vreg, and folding the reload
+ // into the user instruction. (Thus decreasing the number of used vregs
+ // until the remainder can be assigned to physregs.)
+ // * However, since we may try to spill vregs in any order, we can end up
+ // trying to spill each operand to the instruction, and then rematting it
+ // instead. When that happens, the new live intervals (for the remats) are
+ // expected to be trivially assignable (i.e. RS_Done). However, since we
+ // may have more remats than physregs, we're guaranteed to fail to assign
+ // one.
+ // At the moment, we only handle this for STATEPOINTs since they're the only
+ // psuedo op where we've seen this. If we start seeing other instructions
+ // with the same problem, we need to revisit this.
+ return (MI.getOpcode() != TargetOpcode::STATEPOINT);
+}
+
/// reMaterializeFor - Attempt to rematerialize before MI instead of reloading.
bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
// Analyze instruction
@@ -569,6 +595,14 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, MachineInstr &MI) {
return true;
}
+ // If we can't guarantee that we'll be able to actually assign the new vreg,
+ // we can't remat.
+ if (!canGuaranteeAssignmentAfterRemat(VirtReg.reg, MI)) {
+ markValueUsed(&VirtReg, ParentVNI);
+ LLVM_DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << MI);
+ return false;
+ }
+
// Allocate a new register for the remat.
unsigned NewVReg = Edit->createFrom(Original);
@@ -799,11 +833,11 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
if (FoldOps.empty())
return false;
- MachineInstrSpan MIS(MI);
+ MachineInstrSpan MIS(MI, MI->getParent());
MachineInstr *FoldMI =
LoadMI ? TII.foldMemoryOperand(*MI, FoldOps, *LoadMI, &LIS)
- : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS);
+ : TII.foldMemoryOperand(*MI, FoldOps, StackSlot, &LIS, &VRM);
if (!FoldMI)
return false;
@@ -834,6 +868,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
HSpiller.rmFromMergeableSpills(*MI, FI))
--NumSpills;
LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
+ if (MI->isCall())
+ MI->getMF()->updateCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
// Insert any new instructions other than FoldMI into the LIS maps.
@@ -871,7 +907,7 @@ void InlineSpiller::insertReload(unsigned NewVReg,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
- MachineInstrSpan MIS(MI);
+ MachineInstrSpan MIS(MI, &MBB);
TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot,
MRI.getRegClass(NewVReg), &TRI);
@@ -901,7 +937,7 @@ void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
- MachineInstrSpan MIS(MI);
+ MachineInstrSpan MIS(MI, &MBB);
bool IsRealSpill = true;
if (isFullUndefDef(*MI)) {
// Don't spill undef value.
diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp
index 82f6e8d8e234..7b50dac4cd1a 100644
--- a/lib/CodeGen/InterferenceCache.cpp
+++ b/lib/CodeGen/InterferenceCache.cpp
@@ -1,9 +1,8 @@
//===- InterferenceCache.cpp - Caching per-block interference -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h
index 160e2b16e294..50c6ac62d194 100644
--- a/lib/CodeGen/InterferenceCache.h
+++ b/lib/CodeGen/InterferenceCache.h
@@ -1,9 +1,8 @@
//===- InterferenceCache.h - Caching per-block interference ----*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/InterleavedAccessPass.cpp b/lib/CodeGen/InterleavedAccessPass.cpp
index fd2ff162630a..14bc560a561c 100644
--- a/lib/CodeGen/InterleavedAccessPass.cpp
+++ b/lib/CodeGen/InterleavedAccessPass.cpp
@@ -1,9 +1,8 @@
//===- InterleavedAccessPass.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -164,14 +163,19 @@ static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
/// <0, 2, 4, 6> (mask of index 0 to extract even elements)
/// <1, 3, 5, 7> (mask of index 1 to extract odd elements)
static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
- unsigned &Index, unsigned MaxFactor) {
+ unsigned &Index, unsigned MaxFactor,
+ unsigned NumLoadElements) {
if (Mask.size() < 2)
return false;
// Check potential Factors.
- for (Factor = 2; Factor <= MaxFactor; Factor++)
+ for (Factor = 2; Factor <= MaxFactor; Factor++) {
+ // Make sure we don't produce a load wider than the input load.
+ if (Mask.size() * Factor > NumLoadElements)
+ return false;
if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
return true;
+ }
return false;
}
@@ -303,9 +307,10 @@ bool InterleavedAccess::lowerInterleavedLoad(
unsigned Factor, Index;
+ unsigned NumLoadElements = LI->getType()->getVectorNumElements();
// Check if the first shufflevector is DE-interleave shuffle.
if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index,
- MaxFactor))
+ MaxFactor, NumLoadElements))
return false;
// Holds the corresponding index for each DE-interleave shuffle.
diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 989fa164ad2d..9525da849e2a 100644
--- a/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -1,9 +1,8 @@
//===- InterleavedLoadCombine.cpp - Combine Interleaved Loads ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -961,6 +960,7 @@ public:
if (!PtrTy) {
Result = Polynomial();
BasePtr = nullptr;
+ return;
}
unsigned PointerBits =
DL.getIndexSizeInBits(PtrTy->getPointerAddressSpace());
@@ -1219,7 +1219,7 @@ bool InterleavedLoadCombineImpl::combine(std::list<VectorInfo> &InterleavedLoad,
"interleaved.wide.ptrcast");
// Create the wide load and update the MemorySSA.
- auto LI = Builder.CreateAlignedLoad(CI, InsertionPoint->getAlignment(),
+ auto LI = Builder.CreateAlignedLoad(ILTy, CI, InsertionPoint->getAlignment(),
"interleaved.wide.load");
auto MSSAU = MemorySSAUpdater(&MSSA);
MemoryUse *MSSALoad = cast<MemoryUse>(MSSAU.createMemoryAccessBefore(
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 707113bd973b..8cbd8bcaeabb 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -1,9 +1,8 @@
//===-- IntrinsicLowering.cpp - Intrinsic Lowering default implementation -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -24,39 +23,6 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
-template <class ArgIt>
-static void EnsureFunctionExists(Module &M, const char *Name,
- ArgIt ArgBegin, ArgIt ArgEnd,
- Type *RetTy) {
- // Insert a correctly-typed definition now.
- std::vector<Type *> ParamTys;
- for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
- ParamTys.push_back(I->getType());
- M.getOrInsertFunction(Name, FunctionType::get(RetTy, ParamTys, false));
-}
-
-static void EnsureFPIntrinsicsExist(Module &M, Function &Fn,
- const char *FName,
- const char *DName, const char *LDName) {
- // Insert definitions for all the floating point types.
- switch((int)Fn.arg_begin()->getType()->getTypeID()) {
- case Type::FloatTyID:
- EnsureFunctionExists(M, FName, Fn.arg_begin(), Fn.arg_end(),
- Type::getFloatTy(M.getContext()));
- break;
- case Type::DoubleTyID:
- EnsureFunctionExists(M, DName, Fn.arg_begin(), Fn.arg_end(),
- Type::getDoubleTy(M.getContext()));
- break;
- case Type::X86_FP80TyID:
- case Type::FP128TyID:
- case Type::PPC_FP128TyID:
- EnsureFunctionExists(M, LDName, Fn.arg_begin(), Fn.arg_end(),
- Fn.arg_begin()->getType());
- break;
- }
-}
-
/// This function is used when we want to lower an intrinsic call to a call of
/// an external function. This handles hard cases such as when there was already
/// a prototype for the external function, but that prototype doesn't match the
@@ -72,8 +38,8 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
std::vector<Type *> ParamTys;
for (ArgIt I = ArgBegin; I != ArgEnd; ++I)
ParamTys.push_back((*I)->getType());
- Constant* FCache = M->getOrInsertFunction(NewFn,
- FunctionType::get(RetTy, ParamTys, false));
+ FunctionCallee FCache =
+ M->getOrInsertFunction(NewFn, FunctionType::get(RetTy, ParamTys, false));
IRBuilder<> Builder(CI->getParent(), CI->getIterator());
SmallVector<Value *, 8> Args(ArgBegin, ArgEnd);
@@ -92,75 +58,6 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
# define setjmp_undefined_for_msvc
#endif
-void IntrinsicLowering::AddPrototypes(Module &M) {
- LLVMContext &Context = M.getContext();
- for (auto &F : M)
- if (F.isDeclaration() && !F.use_empty())
- switch (F.getIntrinsicID()) {
- default: break;
- case Intrinsic::setjmp:
- EnsureFunctionExists(M, "setjmp", F.arg_begin(), F.arg_end(),
- Type::getInt32Ty(M.getContext()));
- break;
- case Intrinsic::longjmp:
- EnsureFunctionExists(M, "longjmp", F.arg_begin(), F.arg_end(),
- Type::getVoidTy(M.getContext()));
- break;
- case Intrinsic::siglongjmp:
- EnsureFunctionExists(M, "abort", F.arg_end(), F.arg_end(),
- Type::getVoidTy(M.getContext()));
- break;
- case Intrinsic::memcpy:
- M.getOrInsertFunction("memcpy",
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context));
- break;
- case Intrinsic::memmove:
- M.getOrInsertFunction("memmove",
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- DL.getIntPtrType(Context));
- break;
- case Intrinsic::memset:
- M.getOrInsertFunction("memset",
- Type::getInt8PtrTy(Context),
- Type::getInt8PtrTy(Context),
- Type::getInt32Ty(M.getContext()),
- DL.getIntPtrType(Context));
- break;
- case Intrinsic::sqrt:
- EnsureFPIntrinsicsExist(M, F, "sqrtf", "sqrt", "sqrtl");
- break;
- case Intrinsic::sin:
- EnsureFPIntrinsicsExist(M, F, "sinf", "sin", "sinl");
- break;
- case Intrinsic::cos:
- EnsureFPIntrinsicsExist(M, F, "cosf", "cos", "cosl");
- break;
- case Intrinsic::pow:
- EnsureFPIntrinsicsExist(M, F, "powf", "pow", "powl");
- break;
- case Intrinsic::log:
- EnsureFPIntrinsicsExist(M, F, "logf", "log", "logl");
- break;
- case Intrinsic::log2:
- EnsureFPIntrinsicsExist(M, F, "log2f", "log2", "log2l");
- break;
- case Intrinsic::log10:
- EnsureFPIntrinsicsExist(M, F, "log10f", "log10", "log10l");
- break;
- case Intrinsic::exp:
- EnsureFPIntrinsicsExist(M, F, "expf", "exp", "expl");
- break;
- case Intrinsic::exp2:
- EnsureFPIntrinsicsExist(M, F, "exp2f", "exp2", "exp2l");
- break;
- }
-}
-
/// Emit the code to lower bswap of V before the specified instruction IP.
static Value *LowerBSWAP(LLVMContext &Context, Value *V, Instruction *IP) {
assert(V->getType()->isIntOrIntVectorTy() && "Can't bswap a non-integer type!");
@@ -601,7 +498,7 @@ bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
// Okay, we can do this xform, do so now.
Module *M = CI->getModule();
- Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
+ Function *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Ty);
Value *Op = CI->getArgOperand(0);
Op = CallInst::Create(Int, Op, CI->getName(), CI);
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 52e832cc38c1..886ae7e94adb 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -1,9 +1,8 @@
//===-- LLVMTargetMachine.cpp - Implement the LLVMTargetMachine class -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -202,6 +201,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
return true;
if (!TargetPassConfig::willCompleteCodeGenPipeline()) {
+ if (this->getTargetTriple().isOSAIX()) {
+ // On AIX, we might manifest MCSymbols during SDAG lowering. For MIR
+ // testing to be meaningful, we need to ensure that the symbols created
+ // are MCSymbolXCOFF variants, which requires that
+ // the TargetLoweringObjectFile instance has been initialized.
+ MCContext &Ctx = MMI->getContext();
+ const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering())
+ .Initialize(Ctx, *this);
+ }
PM.add(createPrintMIRPass(Out));
} else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext()))
return true;
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index f9f33a98a9d1..8a7a41d0f763 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -1,9 +1,8 @@
//===---- LatencyPriorityQueue.cpp - A latency-oriented priority queue ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 5b52cc66a297..200ac0ba15bf 100644
--- a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
///===- LazyMachineBlockFrequencyInfo.cpp - Lazy Machine Block Frequency --===//
///
-/// The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
///
///===---------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index d06821bdfcce..503821537ed9 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -1,9 +1,8 @@
//===- LexicalScopes.cpp - Collecting lexical scope info ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index fc0ebea2d36c..a669e64692b9 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -1,9 +1,8 @@
//===- LiveDebugValues.cpp - Tracking Debug Value MIs ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -21,6 +20,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseBitVector.h"
#include "llvm/ADT/Statistic.h"
@@ -35,13 +35,15 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/IR/DIBuilder.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DebugLoc.h"
#include "llvm/IR/Function.h"
@@ -57,6 +59,7 @@
#include <cstdint>
#include <functional>
#include <queue>
+#include <tuple>
#include <utility>
#include <vector>
@@ -68,12 +71,12 @@ STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
// If @MI is a DBG_VALUE with debug value described by a defined
// register, returns the number of this register. In the other case, returns 0.
-static unsigned isDbgValueDescribedByReg(const MachineInstr &MI) {
+static Register isDbgValueDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue() && "expected a DBG_VALUE");
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
// If location of variable is described using a register (directly
// or indirectly), this register is always a first operand.
- return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : 0;
+ return MI.getOperand(0).isReg() ? MI.getOperand(0).getReg() : Register();
}
namespace {
@@ -86,6 +89,8 @@ private:
BitVector CalleeSavedRegs;
LexicalScopes LS;
+ enum struct TransferKind { TransferCopy, TransferSpill, TransferRestore };
+
/// Keeps track of lexical scopes associated with a user value's source
/// location.
class UserValueScopes {
@@ -105,51 +110,134 @@ private:
}
};
- /// Based on std::pair so it can be used as an index into a DenseMap.
- using DebugVariableBase =
- std::pair<const DILocalVariable *, const DILocation *>;
- /// A potentially inlined instance of a variable.
- struct DebugVariable : public DebugVariableBase {
- DebugVariable(const DILocalVariable *Var, const DILocation *InlinedAt)
- : DebugVariableBase(Var, InlinedAt) {}
-
- const DILocalVariable *getVar() const { return this->first; }
- const DILocation *getInlinedAt() const { return this->second; }
-
- bool operator<(const DebugVariable &DV) const {
- if (getVar() == DV.getVar())
- return getInlinedAt() < DV.getInlinedAt();
- return getVar() < DV.getVar();
+ using FragmentInfo = DIExpression::FragmentInfo;
+ using OptFragmentInfo = Optional<DIExpression::FragmentInfo>;
+
+ /// Storage for identifying a potentially inlined instance of a variable,
+ /// or a fragment thereof.
+ class DebugVariable {
+ const DILocalVariable *Variable;
+ OptFragmentInfo Fragment;
+ const DILocation *InlinedAt;
+
+ /// Fragment that will overlap all other fragments. Used as default when
+ /// caller demands a fragment.
+ static const FragmentInfo DefaultFragment;
+
+ public:
+ DebugVariable(const DILocalVariable *Var, OptFragmentInfo &&FragmentInfo,
+ const DILocation *InlinedAt)
+ : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
+
+ DebugVariable(const DILocalVariable *Var, OptFragmentInfo &FragmentInfo,
+ const DILocation *InlinedAt)
+ : Variable(Var), Fragment(FragmentInfo), InlinedAt(InlinedAt) {}
+
+ DebugVariable(const DILocalVariable *Var, const DIExpression *DIExpr,
+ const DILocation *InlinedAt)
+ : DebugVariable(Var, DIExpr->getFragmentInfo(), InlinedAt) {}
+
+ DebugVariable(const MachineInstr &MI)
+ : DebugVariable(MI.getDebugVariable(),
+ MI.getDebugExpression()->getFragmentInfo(),
+ MI.getDebugLoc()->getInlinedAt()) {}
+
+ const DILocalVariable *getVar() const { return Variable; }
+ const OptFragmentInfo &getFragment() const { return Fragment; }
+ const DILocation *getInlinedAt() const { return InlinedAt; }
+
+ const FragmentInfo getFragmentDefault() const {
+ return Fragment.getValueOr(DefaultFragment);
+ }
+
+ static bool isFragmentDefault(FragmentInfo &F) {
+ return F == DefaultFragment;
+ }
+
+ bool operator==(const DebugVariable &Other) const {
+ return std::tie(Variable, Fragment, InlinedAt) ==
+ std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
+ }
+
+ bool operator<(const DebugVariable &Other) const {
+ return std::tie(Variable, Fragment, InlinedAt) <
+ std::tie(Other.Variable, Other.Fragment, Other.InlinedAt);
}
};
+ friend struct llvm::DenseMapInfo<DebugVariable>;
+
/// A pair of debug variable and value location.
struct VarLoc {
+ // The location at which a spilled variable resides. It consists of a
+ // register and an offset.
+ struct SpillLoc {
+ unsigned SpillBase;
+ int SpillOffset;
+ bool operator==(const SpillLoc &Other) const {
+ return SpillBase == Other.SpillBase && SpillOffset == Other.SpillOffset;
+ }
+ };
+
const DebugVariable Var;
const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
mutable UserValueScopes UVS;
- enum { InvalidKind = 0, RegisterKind } Kind = InvalidKind;
+ enum VarLocKind {
+ InvalidKind = 0,
+ RegisterKind,
+ SpillLocKind,
+ ImmediateKind,
+ EntryValueKind
+ } Kind = InvalidKind;
/// The value location. Stored separately to avoid repeatedly
/// extracting it from MI.
union {
uint64_t RegNo;
+ SpillLoc SpillLocation;
uint64_t Hash;
+ int64_t Immediate;
+ const ConstantFP *FPImm;
+ const ConstantInt *CImm;
} Loc;
- VarLoc(const MachineInstr &MI, LexicalScopes &LS)
- : Var(MI.getDebugVariable(), MI.getDebugLoc()->getInlinedAt()), MI(MI),
- UVS(MI.getDebugLoc(), LS) {
+ VarLoc(const MachineInstr &MI, LexicalScopes &LS,
+ VarLocKind K = InvalidKind)
+ : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS){
static_assert((sizeof(Loc) == sizeof(uint64_t)),
"hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
if (int RegNo = isDbgValueDescribedByReg(MI)) {
- Kind = RegisterKind;
+ Kind = MI.isDebugEntryValue() ? EntryValueKind : RegisterKind;
Loc.RegNo = RegNo;
+ } else if (MI.getOperand(0).isImm()) {
+ Kind = ImmediateKind;
+ Loc.Immediate = MI.getOperand(0).getImm();
+ } else if (MI.getOperand(0).isFPImm()) {
+ Kind = ImmediateKind;
+ Loc.FPImm = MI.getOperand(0).getFPImm();
+ } else if (MI.getOperand(0).isCImm()) {
+ Kind = ImmediateKind;
+ Loc.CImm = MI.getOperand(0).getCImm();
}
+ assert((Kind != ImmediateKind || !MI.isDebugEntryValue()) &&
+ "entry values must be register locations");
+ }
+
+ /// The constructor for spill locations.
+ VarLoc(const MachineInstr &MI, unsigned SpillBase, int SpillOffset,
+ LexicalScopes &LS)
+ : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS) {
+ assert(MI.isDebugValue() && "not a DBG_VALUE");
+ assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
+ Kind = SpillLocKind;
+ Loc.SpillLocation = {SpillBase, SpillOffset};
}
+ // Is the Loc field a constant or constant object?
+ bool isConstant() const { return Kind == ImmediateKind; }
+
/// If this variable is described by a register, return it,
/// otherwise return 0.
unsigned isDescribedByReg() const {
@@ -167,17 +255,18 @@ private:
#endif
bool operator==(const VarLoc &Other) const {
- return Var == Other.Var && Loc.Hash == Other.Loc.Hash;
+ return Kind == Other.Kind && Var == Other.Var &&
+ Loc.Hash == Other.Loc.Hash;
}
/// This operator guarantees that VarLocs are sorted by Variable first.
bool operator<(const VarLoc &Other) const {
- if (Var == Other.Var)
- return Loc.Hash < Other.Loc.Hash;
- return Var < Other.Var;
+ return std::tie(Var, Kind, Loc.Hash) <
+ std::tie(Other.Var, Other.Kind, Other.Loc.Hash);
}
};
+ using DebugParamMap = SmallDenseMap<const DILocalVariable *, MachineInstr *>;
using VarLocMap = UniqueVector<VarLoc>;
using VarLocSet = SparseBitVector<>;
using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
@@ -187,26 +276,35 @@ private:
};
using TransferMap = SmallVector<TransferDebugPair, 4>;
+ // Types for recording sets of variable fragments that overlap. For a given
+ // local variable, we record all other fragments of that variable that could
+ // overlap it, to reduce search time.
+ using FragmentOfVar =
+ std::pair<const DILocalVariable *, DIExpression::FragmentInfo>;
+ using OverlapMap =
+ DenseMap<FragmentOfVar, SmallVector<DIExpression::FragmentInfo, 1>>;
+
+ // Helper while building OverlapMap, a map of all fragments seen for a given
+ // DILocalVariable.
+ using VarToFragments =
+ DenseMap<const DILocalVariable *, SmallSet<FragmentInfo, 4>>;
+
/// This holds the working set of currently open ranges. For fast
/// access, this is done both as a set of VarLocIDs, and a map of
/// DebugVariable to recent VarLocID. Note that a DBG_VALUE ends all
/// previous open ranges for the same variable.
class OpenRangesSet {
VarLocSet VarLocs;
- SmallDenseMap<DebugVariableBase, unsigned, 8> Vars;
+ SmallDenseMap<DebugVariable, unsigned, 8> Vars;
+ OverlapMap &OverlappingFragments;
public:
+ OpenRangesSet(OverlapMap &_OLapMap) : OverlappingFragments(_OLapMap) {}
+
const VarLocSet &getVarLocs() const { return VarLocs; }
/// Terminate all open ranges for Var by removing it from the set.
- void erase(DebugVariable Var) {
- auto It = Vars.find(Var);
- if (It != Vars.end()) {
- unsigned ID = It->second;
- VarLocs.reset(ID);
- Vars.erase(It);
- }
- }
+ void erase(DebugVariable Var);
/// Terminate all open ranges listed in \c KillSet by removing
/// them from the set.
@@ -217,7 +315,7 @@ private:
}
/// Insert a new range into the set.
- void insert(unsigned VarLocID, DebugVariableBase Var) {
+ void insert(unsigned VarLocID, DebugVariable Var) {
VarLocs.set(VarLocID);
Vars.insert({Var, VarLocID});
}
@@ -237,24 +335,43 @@ private:
bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,
unsigned &Reg);
- int extractSpillBaseRegAndOffset(const MachineInstr &MI, unsigned &Reg);
+ /// If a given instruction is identified as a spill, return the spill location
+ /// and set \p Reg to the spilled register.
+ Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF,
+ unsigned &Reg);
+ /// Given a spill instruction, extract the register and offset used to
+ /// address the spill location in a target independent way.
+ VarLoc::SpillLoc extractSpillBaseRegAndOffset(const MachineInstr &MI);
void insertTransferDebugPair(MachineInstr &MI, OpenRangesSet &OpenRanges,
TransferMap &Transfers, VarLocMap &VarLocIDs,
- unsigned OldVarID, unsigned NewReg = 0);
+ unsigned OldVarID, TransferKind Kind,
+ unsigned NewReg = 0);
void transferDebugValue(const MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs);
- void transferSpillInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs, TransferMap &Transfers);
+ void transferSpillOrRestoreInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, TransferMap &Transfers);
+ void emitEntryValues(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ DebugParamMap &DebugEntryVals,
+ SparseBitVector<> &KillSet);
void transferRegisterCopy(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers);
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
- const VarLocMap &VarLocIDs);
+ VarLocMap &VarLocIDs, TransferMap &Transfers,
+ DebugParamMap &DebugEntryVals);
bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
+
bool process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers, bool transferChanges);
+ TransferMap &Transfers, DebugParamMap &DebugEntryVals,
+ bool transferChanges, OverlapMap &OverlapFragments,
+ VarToFragments &SeenFragments);
+
+ void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
+ OverlapMap &OLapMap);
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
@@ -289,10 +406,46 @@ public:
} // end anonymous namespace
+namespace llvm {
+
+template <> struct DenseMapInfo<LiveDebugValues::DebugVariable> {
+ using DV = LiveDebugValues::DebugVariable;
+ using OptFragmentInfo = LiveDebugValues::OptFragmentInfo;
+ using FragmentInfo = LiveDebugValues::FragmentInfo;
+
+ // Empty key: no key should be generated that has no DILocalVariable.
+ static inline DV getEmptyKey() {
+ return DV(nullptr, OptFragmentInfo(), nullptr);
+ }
+
+ // Difference in tombstone is that the Optional is meaningful
+ static inline DV getTombstoneKey() {
+ return DV(nullptr, OptFragmentInfo({0, 0}), nullptr);
+ }
+
+ static unsigned getHashValue(const DV &D) {
+ unsigned HV = 0;
+ const OptFragmentInfo &Fragment = D.getFragment();
+ if (Fragment)
+ HV = DenseMapInfo<FragmentInfo>::getHashValue(*Fragment);
+
+ return hash_combine(D.getVar(), HV, D.getInlinedAt());
+ }
+
+ static bool isEqual(const DV &A, const DV &B) { return A == B; }
+};
+
+} // namespace llvm
+
//===----------------------------------------------------------------------===//
// Implementation
//===----------------------------------------------------------------------===//
+const DIExpression::FragmentInfo
+ LiveDebugValues::DebugVariable::DefaultFragment = {
+ std::numeric_limits<uint64_t>::max(),
+ std::numeric_limits<uint64_t>::min()};
+
char LiveDebugValues::ID = 0;
char &llvm::LiveDebugValuesID = LiveDebugValues::ID;
@@ -312,6 +465,39 @@ void LiveDebugValues::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+/// Erase a variable from the set of open ranges, and additionally erase any
+/// fragments that may overlap it.
+void LiveDebugValues::OpenRangesSet::erase(DebugVariable Var) {
+ // Erasure helper.
+ auto DoErase = [this](DebugVariable VarToErase) {
+ auto It = Vars.find(VarToErase);
+ if (It != Vars.end()) {
+ unsigned ID = It->second;
+ VarLocs.reset(ID);
+ Vars.erase(It);
+ }
+ };
+
+ // Erase the variable/fragment that ends here.
+ DoErase(Var);
+
+ // Extract the fragment. Interpret an empty fragment as one that covers all
+ // possible bits.
+ FragmentInfo ThisFragment = Var.getFragmentDefault();
+
+ // There may be fragments that overlap the designated fragment. Look them up
+ // in the pre-computed overlap map, and erase them too.
+ auto MapIt = OverlappingFragments.find({Var.getVar(), ThisFragment});
+ if (MapIt != OverlappingFragments.end()) {
+ for (auto Fragment : MapIt->second) {
+ LiveDebugValues::OptFragmentInfo FragmentHolder;
+ if (!DebugVariable::isFragmentDefault(Fragment))
+ FragmentHolder = LiveDebugValues::OptFragmentInfo(Fragment);
+ DoErase({Var.getVar(), FragmentHolder, Var.getInlinedAt()});
+ }
+ }
+}
+
//===----------------------------------------------------------------------===//
// Debug Range Extension Implementation
//===----------------------------------------------------------------------===//
@@ -339,10 +525,8 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
}
#endif
-/// Given a spill instruction, extract the register and offset used to
-/// address the spill location in a target independent way.
-int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
- unsigned &Reg) {
+LiveDebugValues::VarLoc::SpillLoc
+LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI) {
assert(MI.hasOneMemOperand() &&
"Spill instruction does not have exactly one memory operand?");
auto MMOI = MI.memoperands_begin();
@@ -351,7 +535,9 @@ int LiveDebugValues::extractSpillBaseRegAndOffset(const MachineInstr &MI,
"Inconsistent memory operand in spill instruction");
int FI = cast<FixedStackPseudoSourceValue>(PVal)->getFrameIndex();
const MachineBasicBlock *MBB = MI.getParent();
- return TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ unsigned Reg;
+ int Offset = TFI->getFrameIndexReference(*MBB->getParent(), FI, Reg);
+ return {Reg, Offset};
}
/// End all previous ranges related to @MI and start a new range from @MI
@@ -362,21 +548,72 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
if (!MI.isDebugValue())
return;
const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
const DILocation *DebugLoc = MI.getDebugLoc();
const DILocation *InlinedAt = DebugLoc->getInlinedAt();
assert(Var->isValidLocationForIntrinsic(DebugLoc) &&
"Expected inlined-at fields to agree");
// End all previous ranges of Var.
- DebugVariable V(Var, InlinedAt);
+ DebugVariable V(Var, Expr, InlinedAt);
OpenRanges.erase(V);
// Add the VarLoc to OpenRanges from this DBG_VALUE.
- // TODO: Currently handles DBG_VALUE which has only reg as location.
- if (isDbgValueDescribedByReg(MI)) {
+ unsigned ID;
+ if (isDbgValueDescribedByReg(MI) || MI.getOperand(0).isImm() ||
+ MI.getOperand(0).isFPImm() || MI.getOperand(0).isCImm()) {
+ // Use normal VarLoc constructor for registers and immediates.
VarLoc VL(MI, LS);
- unsigned ID = VarLocIDs.insert(VL);
+ ID = VarLocIDs.insert(VL);
+ OpenRanges.insert(ID, VL.Var);
+ } else if (MI.hasOneMemOperand()) {
+ // It's a stack spill -- fetch spill base and offset.
+ VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
+ VarLoc VL(MI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS);
+ ID = VarLocIDs.insert(VL);
OpenRanges.insert(ID, VL.Var);
+ } else {
+ // This must be an undefined location. We should leave OpenRanges closed.
+ assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 &&
+ "Unexpected non-undef DBG_VALUE encountered");
+ }
+}
+
+void LiveDebugValues::emitEntryValues(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ TransferMap &Transfers,
+ DebugParamMap &DebugEntryVals,
+ SparseBitVector<> &KillSet) {
+ MachineFunction *MF = MI.getParent()->getParent();
+ for (unsigned ID : KillSet) {
+ if (!VarLocIDs[ID].Var.getVar()->isParameter())
+ continue;
+
+ const MachineInstr *CurrDebugInstr = &VarLocIDs[ID].MI;
+
+ // If parameter's DBG_VALUE is not in the map that means we can't
+ // generate parameter's entry value.
+ if (!DebugEntryVals.count(CurrDebugInstr->getDebugVariable()))
+ continue;
+
+ auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()];
+ DIExpression *NewExpr = DIExpression::prepend(
+ ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue);
+ MachineInstr *EntryValDbgMI =
+ BuildMI(*MF, ParamDebugInstr->getDebugLoc(), ParamDebugInstr->getDesc(),
+ ParamDebugInstr->isIndirectDebugValue(),
+ ParamDebugInstr->getOperand(0).getReg(),
+ ParamDebugInstr->getDebugVariable(), NewExpr);
+
+ if (ParamDebugInstr->isIndirectDebugValue())
+ EntryValDbgMI->getOperand(1).setImm(
+ ParamDebugInstr->getOperand(1).getImm());
+
+ Transfers.push_back({&MI, EntryValDbgMI});
+ VarLoc VL(*EntryValDbgMI, LS);
+ unsigned EntryValLocID = VarLocIDs.insert(VL);
+ OpenRanges.insert(EntryValLocID, VL.Var);
}
}
@@ -387,51 +624,92 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
/// otherwise it is variable's location on the stack.
void LiveDebugValues::insertTransferDebugPair(
MachineInstr &MI, OpenRangesSet &OpenRanges, TransferMap &Transfers,
- VarLocMap &VarLocIDs, unsigned OldVarID, unsigned NewReg) {
- const MachineInstr *DMI = &VarLocIDs[OldVarID].MI;
+ VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind,
+ unsigned NewReg) {
+ const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
MachineFunction *MF = MI.getParent()->getParent();
- MachineInstr *NewDMI;
- if (NewReg) {
+ MachineInstr *NewDebugInstr;
+
+ auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr,
+ &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) {
+ unsigned LocId = VarLocIDs.insert(VL);
+
+ // Close this variable's previous location range.
+ DebugVariable V(*DebugInstr);
+ OpenRanges.erase(V);
+
+ OpenRanges.insert(LocId, VL.Var);
+ // The newly created DBG_VALUE instruction NewDebugInstr must be inserted
+ // after MI. Keep track of the pairing.
+ TransferDebugPair MIP = {&MI, NewDebugInstr};
+ Transfers.push_back(MIP);
+ };
+
+ // End all previous ranges of Var.
+ OpenRanges.erase(VarLocIDs[OldVarID].Var);
+ switch (Kind) {
+ case TransferKind::TransferCopy: {
+ assert(NewReg &&
+ "No register supplied when handling a copy of a debug value");
// Create a DBG_VALUE instruction to describe the Var in its new
// register location.
- NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(),
- DMI->isIndirectDebugValue(), NewReg,
- DMI->getDebugVariable(), DMI->getDebugExpression());
- if (DMI->isIndirectDebugValue())
- NewDMI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+ NewDebugInstr = BuildMI(
+ *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(),
+ DebugInstr->isIndirectDebugValue(), NewReg,
+ DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression());
+ if (DebugInstr->isIndirectDebugValue())
+ NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
+ VarLoc VL(*NewDebugInstr, LS);
+ ProcessVarLoc(VL, NewDebugInstr);
LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: ";
- NewDMI->print(dbgs(), false, false, false, TII));
- } else {
+ NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+ /*SkipOpers*/false, /*SkipDebugLoc*/false,
+ /*AddNewLine*/true, TII));
+ return;
+ }
+ case TransferKind::TransferSpill: {
// Create a DBG_VALUE instruction to describe the Var in its spilled
// location.
- unsigned SpillBase;
- int SpillOffset = extractSpillBaseRegAndOffset(MI, SpillBase);
- auto *SpillExpr = DIExpression::prepend(DMI->getDebugExpression(),
- DIExpression::NoDeref, SpillOffset);
- NewDMI = BuildMI(*MF, DMI->getDebugLoc(), DMI->getDesc(), true, SpillBase,
- DMI->getDebugVariable(), SpillExpr);
+ VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
+ auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(),
+ DIExpression::ApplyOffset,
+ SpillLocation.SpillOffset);
+ NewDebugInstr = BuildMI(
+ *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true,
+ SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr);
+ VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase,
+ SpillLocation.SpillOffset, LS);
+ ProcessVarLoc(VL, NewDebugInstr);
LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
- NewDMI->print(dbgs(), false, false, false, TII));
+ NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+ /*SkipOpers*/false, /*SkipDebugLoc*/false,
+ /*AddNewLine*/true, TII));
+ return;
}
-
- // The newly created DBG_VALUE instruction NewDMI must be inserted after
- // MI. Keep track of the pairing.
- TransferDebugPair MIP = {&MI, NewDMI};
- Transfers.push_back(MIP);
-
- // End all previous ranges of Var.
- OpenRanges.erase(VarLocIDs[OldVarID].Var);
-
- // Add the VarLoc to OpenRanges.
- VarLoc VL(*NewDMI, LS);
- unsigned LocID = VarLocIDs.insert(VL);
- OpenRanges.insert(LocID, VL.Var);
+ case TransferKind::TransferRestore: {
+ assert(NewReg &&
+ "No register supplied when handling a restore of a debug value");
+ MachineFunction *MF = MI.getMF();
+ DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
+ NewDebugInstr =
+ BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
+ NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
+ VarLoc VL(*NewDebugInstr, LS);
+ ProcessVarLoc(VL, NewDebugInstr);
+ LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
+ NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
+ /*SkipOpers*/false, /*SkipDebugLoc*/false,
+ /*AddNewLine*/true, TII));
+ return;
+ }
+ }
+ llvm_unreachable("Invalid transfer kind");
}
/// A definition of a register may mark the end of a range.
-void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- const VarLocMap &VarLocIDs) {
+void LiveDebugValues::transferRegisterDef(
+ MachineInstr &MI, OpenRangesSet &OpenRanges, VarLocMap &VarLocIDs,
+ TransferMap &Transfers, DebugParamMap &DebugEntryVals) {
MachineFunction *MF = MI.getMF();
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
@@ -461,6 +739,13 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
}
}
OpenRanges.erase(KillSet, VarLocIDs);
+
+ if (auto *TPC = getAnalysisIfAvailable<TargetPassConfig>()) {
+ auto &TM = TPC->getTM<TargetMachine>();
+ if (TM.Options.EnableDebugEntryValues)
+ emitEntryValues(MI, OpenRanges, VarLocIDs, Transfers, DebugEntryVals,
+ KillSet);
+ }
}
/// Decide if @MI is a spill instruction and return true if it is. We use 2
@@ -471,24 +756,15 @@ void LiveDebugValues::transferRegisterDef(MachineInstr &MI,
/// other spills). We do not handle this yet (more than one memory operand).
bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
MachineFunction *MF, unsigned &Reg) {
- const MachineFrameInfo &FrameInfo = MF->getFrameInfo();
- int FI;
SmallVector<const MachineMemOperand*, 1> Accesses;
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
return false;
- // To identify a spill instruction, use the same criteria as in AsmPrinter.
- if (!((TII->isStoreToStackSlotPostFE(MI, FI) &&
- FrameInfo.isSpillSlotObjectIndex(FI)) ||
- (TII->hasStoreToStackSlot(MI, Accesses) &&
- llvm::any_of(Accesses, [&FrameInfo](const MachineMemOperand *MMO) {
- return FrameInfo.isSpillSlotObjectIndex(
- cast<FixedStackPseudoSourceValue>(MMO->getPseudoValue())
- ->getFrameIndex());
- }))))
- return false;
+ if (!MI.getSpillSize(TII) && !MI.getFoldedSpillSize(TII))
+ return false; // This is not a spill instruction, since no valid size was
+ // returned from either function.
auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
if (!MO.isReg() || !MO.isUse()) {
@@ -525,29 +801,67 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
return false;
}
+Optional<LiveDebugValues::VarLoc::SpillLoc>
+LiveDebugValues::isRestoreInstruction(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!MI.hasOneMemOperand())
+ return None;
+
+ // FIXME: Handle folded restore instructions with more than one memory
+ // operand.
+ if (MI.getRestoreSize(TII)) {
+ Reg = MI.getOperand(0).getReg();
+ return extractSpillBaseRegAndOffset(MI);
+ }
+ return None;
+}
+
/// A spilled register may indicate that we have to end the current range of
/// a variable and create a new one for the spill location.
+/// A restored register may indicate the reverse situation.
/// We don't want to insert any instructions in process(), so we just create
/// the DBG_VALUE without inserting it and keep track of it in \p Transfers.
/// It will be inserted into the BB when we're done iterating over the
/// instructions.
-void LiveDebugValues::transferSpillInst(MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocMap &VarLocIDs,
- TransferMap &Transfers) {
- unsigned Reg;
+void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
+ OpenRangesSet &OpenRanges,
+ VarLocMap &VarLocIDs,
+ TransferMap &Transfers) {
MachineFunction *MF = MI.getMF();
- if (!isSpillInstruction(MI, MF, Reg))
- return;
+ TransferKind TKind;
+ unsigned Reg;
+ Optional<VarLoc::SpillLoc> Loc;
- // Check if the register is the location of a debug value.
+ LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
+
+ if (isSpillInstruction(MI, MF, Reg)) {
+ TKind = TransferKind::TransferSpill;
+ LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump(););
+ LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
+ << "\n");
+ } else {
+ if (!(Loc = isRestoreInstruction(MI, MF, Reg)))
+ return;
+ TKind = TransferKind::TransferRestore;
+ LLVM_DEBUG(dbgs() << "Recognized as restore: "; MI.dump(););
+ LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
+ << "\n");
+ }
+ // Check if the register or spill location is the location of a debug value.
for (unsigned ID : OpenRanges.getVarLocs()) {
- if (VarLocIDs[ID].isDescribedByReg() == Reg) {
+ if (TKind == TransferKind::TransferSpill &&
+ VarLocIDs[ID].isDescribedByReg() == Reg) {
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
- insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID);
- return;
- }
+ } else if (TKind == TransferKind::TransferRestore &&
+ VarLocIDs[ID].Loc.SpillLocation == *Loc) {
+ LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
+ << VarLocIDs[ID].Var.getVar()->getName() << ")\n");
+ } else
+ continue;
+ insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID, TKind,
+ Reg);
+ return;
}
}
@@ -585,7 +899,7 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
for (unsigned ID : OpenRanges.getVarLocs()) {
if (VarLocIDs[ID].isDescribedByReg() == SrcReg) {
insertTransferDebugPair(MI, OpenRanges, Transfers, VarLocIDs, ID,
- DestReg);
+ TransferKind::TransferCopy, DestReg);
return;
}
}
@@ -612,20 +926,92 @@ bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
});
VarLocSet &VLS = OutLocs[CurMBB];
Changed = VLS |= OpenRanges.getVarLocs();
+ // New OutLocs set may be different due to spill, restore or register
+ // copy instruction processing.
+ if (Changed)
+ VLS = OpenRanges.getVarLocs();
OpenRanges.clear();
return Changed;
}
+/// Accumulate a mapping between each DILocalVariable fragment and other
+/// fragments of that DILocalVariable which overlap. This reduces work during
+/// the data-flow stage from "Find any overlapping fragments" to "Check if the
+/// known-to-overlap fragments are present".
+/// \param MI A previously unprocessed DEBUG_VALUE instruction to analyze for
+/// fragment usage.
+/// \param SeenFragments Map from DILocalVariable to all fragments of that
+/// Variable which are known to exist.
+/// \param OverlappingFragments The overlap map being constructed, from one
+/// Var/Fragment pair to a vector of fragments known to overlap.
+void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
+ VarToFragments &SeenFragments,
+ OverlapMap &OverlappingFragments) {
+ DebugVariable MIVar(MI);
+ FragmentInfo ThisFragment = MIVar.getFragmentDefault();
+
+ // If this is the first sighting of this variable, then we are guaranteed
+ // there are currently no overlapping fragments either. Initialize the set
+ // of seen fragments, record no overlaps for the current one, and return.
+ auto SeenIt = SeenFragments.find(MIVar.getVar());
+ if (SeenIt == SeenFragments.end()) {
+ SmallSet<FragmentInfo, 4> OneFragment;
+ OneFragment.insert(ThisFragment);
+ SeenFragments.insert({MIVar.getVar(), OneFragment});
+
+ OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+ return;
+ }
+
+ // If this particular Variable/Fragment pair already exists in the overlap
+ // map, it has already been accounted for.
+ auto IsInOLapMap =
+ OverlappingFragments.insert({{MIVar.getVar(), ThisFragment}, {}});
+ if (!IsInOLapMap.second)
+ return;
+
+ auto &ThisFragmentsOverlaps = IsInOLapMap.first->second;
+ auto &AllSeenFragments = SeenIt->second;
+
+ // Otherwise, examine all other seen fragments for this variable, with "this"
+ // fragment being a previously unseen fragment. Record any pair of
+ // overlapping fragments.
+ for (auto &ASeenFragment : AllSeenFragments) {
+ // Does this previously seen fragment overlap?
+ if (DIExpression::fragmentsOverlap(ThisFragment, ASeenFragment)) {
+ // Yes: Mark the current fragment as being overlapped.
+ ThisFragmentsOverlaps.push_back(ASeenFragment);
+ // Mark the previously seen fragment as being overlapped by the current
+ // one.
+ auto ASeenFragmentsOverlaps =
+ OverlappingFragments.find({MIVar.getVar(), ASeenFragment});
+ assert(ASeenFragmentsOverlaps != OverlappingFragments.end() &&
+ "Previously seen var fragment has no vector of overlaps");
+ ASeenFragmentsOverlaps->second.push_back(ThisFragment);
+ }
+ }
+
+ AllSeenFragments.insert(ThisFragment);
+}
+
/// This routine creates OpenRanges and OutLocs.
bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers, bool transferChanges) {
+ TransferMap &Transfers, DebugParamMap &DebugEntryVals,
+ bool transferChanges,
+ OverlapMap &OverlapFragments,
+ VarToFragments &SeenFragments) {
bool Changed = false;
transferDebugValue(MI, OpenRanges, VarLocIDs);
- transferRegisterDef(MI, OpenRanges, VarLocIDs);
+ transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers,
+ DebugEntryVals);
if (transferChanges) {
transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
- transferSpillInst(MI, OpenRanges, VarLocIDs, Transfers);
+ transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
+ } else {
+ // Build up a map of overlapping fragments on the first run through.
+ if (MI.isDebugValue())
+ accumulateFragmentMap(MI, SeenFragments, OverlapFragments);
}
Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
return Changed;
@@ -713,13 +1099,23 @@ bool LiveDebugValues::join(
// new range is started for the var from the mbb's beginning by inserting
// a new DBG_VALUE. process() will end this range however appropriate.
const VarLoc &DiffIt = VarLocIDs[ID];
- const MachineInstr *DMI = &DiffIt.MI;
- MachineInstr *MI =
- BuildMI(MBB, MBB.instr_begin(), DMI->getDebugLoc(), DMI->getDesc(),
- DMI->isIndirectDebugValue(), DMI->getOperand(0).getReg(),
- DMI->getDebugVariable(), DMI->getDebugExpression());
- if (DMI->isIndirectDebugValue())
- MI->getOperand(1).setImm(DMI->getOperand(1).getImm());
+ const MachineInstr *DebugInstr = &DiffIt.MI;
+ MachineInstr *MI = nullptr;
+ if (DiffIt.isConstant()) {
+ MachineOperand MO(DebugInstr->getOperand(0));
+ MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
+ DebugInstr->getDesc(), false, MO,
+ DebugInstr->getDebugVariable(),
+ DebugInstr->getDebugExpression());
+ } else {
+ MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
+ DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(),
+ DebugInstr->getOperand(0).getReg(),
+ DebugInstr->getDebugVariable(),
+ DebugInstr->getDebugExpression());
+ if (DebugInstr->isIndirectDebugValue())
+ MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
+ }
LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
ILS.set(ID);
++NumInserted;
@@ -737,11 +1133,15 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
bool OLChanged = false;
bool MBBJoined = false;
- VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
- OpenRangesSet OpenRanges; // Ranges that are open until end of bb.
- VarLocInMBB OutLocs; // Ranges that exist beyond bb.
- VarLocInMBB InLocs; // Ranges that are incoming after joining.
- TransferMap Transfers; // DBG_VALUEs associated with spills.
+ VarLocMap VarLocIDs; // Map VarLoc<>unique ID for use in bitvectors.
+ OverlapMap OverlapFragments; // Map of overlapping variable fragments
+ OpenRangesSet OpenRanges(OverlapFragments);
+ // Ranges that are open until end of bb.
+ VarLocInMBB OutLocs; // Ranges that exist beyond bb.
+ VarLocInMBB InLocs; // Ranges that are incoming after joining.
+ TransferMap Transfers; // DBG_VALUEs associated with spills.
+
+ VarToFragments SeenFragments;
// Blocks which are artificial, i.e. blocks which exclusively contain
// instructions without locations, or with line 0 locations.
@@ -758,15 +1158,61 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
enum : bool { dontTransferChanges = false, transferChanges = true };
+ // Besides parameter's modification, check whether a DBG_VALUE is inlined
+ // in order to deduce whether the variable that it tracks comes from
+ // a different function. If that is the case we can't track its entry value.
+ auto IsUnmodifiedFuncParam = [&](const MachineInstr &MI) {
+ auto *DIVar = MI.getDebugVariable();
+ return DIVar->isParameter() && DIVar->isNotModified() &&
+ !MI.getDebugLoc()->getInlinedAt();
+ };
+
+ const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ unsigned FP = TRI->getFrameRegister(MF);
+ auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool {
+ return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP;
+ };
+
+ // Working set of currently collected debug variables mapped to DBG_VALUEs
+ // representing candidates for production of debug entry values.
+ DebugParamMap DebugEntryVals;
+
+ MachineBasicBlock &First_MBB = *(MF.begin());
+ // Only in the case of entry MBB collect DBG_VALUEs representing
+ // function parameters in order to generate debug entry values for them.
+ // Currently, we generate debug entry values only for parameters that are
+ // unmodified throughout the function and located in a register.
+ // TODO: Add support for parameters that are described as fragments.
+ // TODO: Add support for modified arguments that can be expressed
+ // by using its entry value.
+ // TODO: Add support for local variables that are expressed in terms of
+ // parameters entry values.
+ for (auto &MI : First_MBB)
+ if (MI.isDebugValue() && IsUnmodifiedFuncParam(MI) &&
+ !MI.isIndirectDebugValue() && IsRegOtherThanSPAndFP(MI.getOperand(0)) &&
+ !DebugEntryVals.count(MI.getDebugVariable()) &&
+ !MI.getDebugExpression()->isFragment())
+ DebugEntryVals[MI.getDebugVariable()] = &MI;
+
// Initialize every mbb with OutLocs.
// We are not looking at any spill instructions during the initial pass
// over the BBs. The LiveDebugVariables pass has already created DBG_VALUE
// instructions for spills of registers that are known to be user variables
// within the BB in which the spill occurs.
- for (auto &MBB : MF)
- for (auto &MI : MBB)
- process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
- dontTransferChanges);
+ for (auto &MBB : MF) {
+ for (auto &MI : MBB) {
+ process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, DebugEntryVals,
+ dontTransferChanges, OverlapFragments, SeenFragments);
+ }
+ // Add any entry DBG_VALUE instructions necessitated by parameter
+ // clobbering.
+ for (auto &TR : Transfers) {
+ MBB.insertAfter(MachineBasicBlock::iterator(*TR.TransferInst),
+ TR.DebugInst);
+ }
+ Transfers.clear();
+ }
auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
if (const DebugLoc &DL = MI.getDebugLoc())
@@ -812,8 +1258,10 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
// examine spill instructions to see whether they spill registers that
// correspond to user variables.
for (auto &MI : *MBB)
- OLChanged |= process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
- transferChanges);
+ OLChanged |=
+ process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
+ DebugEntryVals, transferChanges, OverlapFragments,
+ SeenFragments);
// Add any DBG_VALUE instructions necessitated by spills.
for (auto &TR : Transfers)
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index d0d889782a35..656ec7d4bdfd 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -1,9 +1,8 @@
//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -71,6 +71,7 @@ EnableLDV("live-debug-variables", cl::init(true),
cl::desc("Enable the live debug variables pass"), cl::Hidden);
STATISTIC(NumInsertedDebugValues, "Number of DBG_VALUEs inserted");
+STATISTIC(NumInsertedDebugLabels, "Number of DBG_LABELs inserted");
char LiveDebugVariables::ID = 0;
@@ -166,10 +167,6 @@ class UserValue {
/// Map of slot indices where this value is live.
LocMap locInts;
- /// Set of interval start indexes that have been trimmed to the
- /// lexical scope.
- SmallSet<SlotIndex, 2> trimmedDefs;
-
/// Insert a DBG_VALUE into MBB at Idx for LocNo.
void insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
SlotIndex StopIdx, DbgValueLocation Loc, bool Spilled,
@@ -339,6 +336,37 @@ public:
void print(raw_ostream &, const TargetRegisterInfo *);
};
+/// A user label is a part of a debug info user label.
+class UserLabel {
+ const DILabel *Label; ///< The debug info label we are part of.
+ DebugLoc dl; ///< The debug location for the label. This is
+ ///< used by dwarf writer to find lexical scope.
+ SlotIndex loc; ///< Slot used by the debug label.
+
+ /// Insert a DBG_LABEL into MBB at Idx.
+ void insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+public:
+ /// Create a new UserLabel.
+ UserLabel(const DILabel *label, DebugLoc L, SlotIndex Idx)
+ : Label(label), dl(std::move(L)), loc(Idx) {}
+
+ /// Does this UserLabel match the parameters?
+ bool match(const DILabel *L, const DILocation *IA,
+ const SlotIndex Index) const {
+ return Label == L && dl->getInlinedAt() == IA && loc == Index;
+ }
+
+ /// Recreate DBG_LABEL instruction from data structures.
+ void emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+ /// Return DebugLoc of this UserLabel.
+ DebugLoc getDebugLoc() { return dl; }
+
+ void print(raw_ostream &, const TargetRegisterInfo *);
+};
+
/// Implementation of the LiveDebugVariables pass.
class LDVImpl {
LiveDebugVariables &pass;
@@ -356,6 +384,9 @@ class LDVImpl {
/// All allocated UserValue instances.
SmallVector<std::unique_ptr<UserValue>, 8> userValues;
+ /// All allocated UserLabel instances.
+ SmallVector<std::unique_ptr<UserLabel>, 2> userLabels;
+
/// Map virtual register to eq class leader.
using VRMap = DenseMap<unsigned, UserValue *>;
VRMap virtRegToEqClass;
@@ -379,6 +410,14 @@ class LDVImpl {
/// \returns True if the DBG_VALUE instruction should be deleted.
bool handleDebugValue(MachineInstr &MI, SlotIndex Idx);
+ /// Add DBG_LABEL instruction to UserLabel.
+ ///
+ /// \param MI DBG_LABEL instruction
+ /// \param Idx Last valid SlotIndex before instruction.
+ ///
+ /// \returns True if the DBG_LABEL instruction should be deleted.
+ bool handleDebugLabel(MachineInstr &MI, SlotIndex Idx);
+
/// Collect and erase all DBG_VALUE instructions, adding a UserValue def
/// for each instruction.
///
@@ -400,6 +439,7 @@ public:
void clear() {
MF = nullptr;
userValues.clear();
+ userLabels.clear();
virtRegToEqClass.clear();
userVarMap.clear();
// Make sure we call emitDebugValues if the machine function was modified.
@@ -445,13 +485,23 @@ static void printDebugLoc(const DebugLoc &DL, raw_ostream &CommentOS,
CommentOS << " ]";
}
-static void printExtendedName(raw_ostream &OS, const DILocalVariable *V,
+static void printExtendedName(raw_ostream &OS, const DINode *Node,
const DILocation *DL) {
- const LLVMContext &Ctx = V->getContext();
- StringRef Res = V->getName();
+ const LLVMContext &Ctx = Node->getContext();
+ StringRef Res;
+ unsigned Line;
+ if (const auto *V = dyn_cast<const DILocalVariable>(Node)) {
+ Res = V->getName();
+ Line = V->getLine();
+ } else if (const auto *L = dyn_cast<const DILabel>(Node)) {
+ Res = L->getName();
+ Line = L->getLine();
+ }
+
if (!Res.empty())
- OS << Res << "," << V->getLine();
- if (auto *InlinedAt = DL->getInlinedAt()) {
+ OS << Res << "," << Line;
+ auto *InlinedAt = DL ? DL->getInlinedAt() : nullptr;
+ if (InlinedAt) {
if (DebugLoc InlinedAtDL = InlinedAt) {
OS << " @[";
printDebugLoc(InlinedAtDL, OS, Ctx);
@@ -461,9 +511,8 @@ static void printExtendedName(raw_ostream &OS, const DILocalVariable *V,
}
void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
- auto *DV = cast<DILocalVariable>(Variable);
OS << "!\"";
- printExtendedName(OS, DV, dl);
+ printExtendedName(OS, Variable, dl);
OS << "\"\t";
for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
@@ -483,10 +532,22 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
OS << '\n';
}
+void UserLabel::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+ OS << "!\"";
+ printExtendedName(OS, Label, dl);
+
+ OS << "\"\t";
+ OS << loc;
+ OS << '\n';
+}
+
void LDVImpl::print(raw_ostream &OS) {
OS << "********** DEBUG VARIABLES **********\n";
- for (unsigned i = 0, e = userValues.size(); i != e; ++i)
- userValues[i]->print(OS, TRI);
+ for (auto &userValue : userValues)
+ userValue->print(OS, TRI);
+ OS << "********** DEBUG LABELS **********\n";
+ for (auto &userLabel : userLabels)
+ userLabel->print(OS, TRI);
}
#endif
@@ -556,7 +617,7 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
} else {
// The DBG_VALUE is only valid if either Reg is live out from Idx, or Reg
// is defined dead at Idx (where Idx is the slot index for the instruction
- // preceeding the DBG_VALUE).
+ // preceding the DBG_VALUE).
const LiveInterval &LI = LIS->getInterval(Reg);
LiveQueryResult LRQ = LI.Query(Idx);
if (!LRQ.valueOutOrDead()) {
@@ -587,6 +648,29 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
return true;
}
+bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
+ // DBG_LABEL label
+ if (MI.getNumOperands() != 1 || !MI.getOperand(0).isMetadata()) {
+ LLVM_DEBUG(dbgs() << "Can't handle " << MI);
+ return false;
+ }
+
+ // Get or create the UserLabel for label here.
+ const DILabel *Label = MI.getDebugLabel();
+ const DebugLoc &DL = MI.getDebugLoc();
+ bool Found = false;
+ for (auto const &L : userLabels) {
+ if (L->match(Label, DL->getInlinedAt(), Idx)) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found)
+ userLabels.push_back(llvm::make_unique<UserLabel>(Label, DL, Idx));
+
+ return true;
+}
+
bool LDVImpl::collectDebugValues(MachineFunction &mf) {
bool Changed = false;
for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
@@ -610,7 +694,8 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) {
do {
// Only handle DBG_VALUE in handleDebugValue(). Skip all other
// kinds of debug instructions.
- if (MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) {
+ if ((MBBI->isDebugValue() && handleDebugValue(*MBBI, Idx)) ||
+ (MBBI->isDebugLabel() && handleDebugLabel(*MBBI, Idx))) {
MBBI = MBB->erase(MBBI);
Changed = true;
} else
@@ -655,10 +740,8 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
}
// Limited by the next def.
- if (I.valid() && I.start() < Stop) {
+ if (I.valid() && I.start() < Stop)
Stop = I.start();
- ToEnd = false;
- }
// Limited by VNI's live range.
else if (!ToEnd && Kills)
Kills->push_back(Stop);
@@ -826,8 +909,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
++I;
// If the interval also overlaps the start of the "next" (i.e.
- // current) range create a new interval for the remainder (which
- // may be further trimmed).
+ // current) range create a new interval for the remainder
if (RStart < IStop)
I.insert(RStart, IStop, Loc);
}
@@ -837,13 +919,6 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
if (!I.valid())
return;
- if (I.start() < RStart) {
- // Interval start overlaps range - trim to the scope range.
- I.setStartUnchecked(RStart);
- // Remember that this interval was trimmed.
- trimmedDefs.insert(RStart);
- }
-
// The end of a lexical scope range is the last instruction in the
// range. To convert to an interval we need the index of the
// instruction after it.
@@ -1227,11 +1302,13 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// that the original virtual register was a pointer. Also, add the stack slot
// offset for the spilled register to the expression.
const DIExpression *Expr = Expression;
+ uint8_t DIExprFlags = DIExpression::ApplyOffset;
bool IsIndirect = Loc.wasIndirect();
if (Spilled) {
- auto Deref = IsIndirect ? DIExpression::WithDeref : DIExpression::NoDeref;
+ if (IsIndirect)
+ DIExprFlags |= DIExpression::DerefAfter;
Expr =
- DIExpression::prepend(Expr, DIExpression::NoDeref, SpillOffset, Deref);
+ DIExpression::prepend(Expr, DIExprFlags, SpillOffset);
IsIndirect = true;
}
@@ -1247,6 +1324,15 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
} while (I != MBB->end());
}
+void UserLabel::insertDebugLabel(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ ++NumInsertedDebugLabels;
+ BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_LABEL))
+ .addMetadata(Label);
+}
+
void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
const TargetInstrInfo &TII,
const TargetRegisterInfo &TRI,
@@ -1262,12 +1348,6 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
bool Spilled = SpillIt != SpillOffsets.end();
unsigned SpillOffset = Spilled ? SpillIt->second : 0;
- // If the interval start was trimmed to the lexical scope insert the
- // DBG_VALUE at the previous index (otherwise it appears after the
- // first instruction in the range).
- if (trimmedDefs.count(Start))
- Start = Start.getPrevIndex();
-
LLVM_DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << Loc.locNo());
MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start)->getIterator();
SlotIndex MBBEnd = LIS.getMBBEndIdx(&*MBB);
@@ -1295,16 +1375,31 @@ void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
}
}
+void UserLabel::emitDebugLabel(LiveIntervals &LIS, const TargetInstrInfo &TII) {
+ LLVM_DEBUG(dbgs() << "\t" << loc);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(loc)->getIterator();
+
+ LLVM_DEBUG(dbgs() << ' ' << printMBBReference(*MBB));
+ insertDebugLabel(&*MBB, loc, LIS, TII);
+
+ LLVM_DEBUG(dbgs() << '\n');
+}
+
void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
if (!MF)
return;
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
SpillOffsetMap SpillOffsets;
- for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
- LLVM_DEBUG(userValues[i]->print(dbgs(), TRI));
- userValues[i]->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
- userValues[i]->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
+ for (auto &userValue : userValues) {
+ LLVM_DEBUG(userValue->print(dbgs(), TRI));
+ userValue->rewriteLocations(*VRM, *MF, *TII, *TRI, SpillOffsets);
+ userValue->emitDebugValues(VRM, *LIS, *TII, *TRI, SpillOffsets);
+ }
+ LLVM_DEBUG(dbgs() << "********** EMITTING LIVE DEBUG LABELS **********\n");
+ for (auto &userLabel : userLabels) {
+ LLVM_DEBUG(userLabel->print(dbgs(), TRI));
+ userLabel->emitDebugLabel(*LIS, *TII);
}
EmitDone = true;
}
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
index 0060399c2b04..0cbe10c6a422 100644
--- a/lib/CodeGen/LiveDebugVariables.h
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -1,9 +1,8 @@
//===- LiveDebugVariables.h - Tracking debug info variables -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 2340b6abd87c..70b2a77fe800 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -1,9 +1,8 @@
//===- LiveInterval.cpp - Live Interval Representation --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -297,9 +296,7 @@ private:
iterator find(SlotIndex Pos) { return LR->find(Pos); }
- iterator findInsertPos(Segment S) {
- return std::upper_bound(LR->begin(), LR->end(), S.start);
- }
+ iterator findInsertPos(Segment S) { return llvm::upper_bound(*LR, S.start); }
};
//===----------------------------------------------------------------------===//
@@ -880,8 +877,53 @@ void LiveInterval::clearSubRanges() {
SubRanges = nullptr;
}
-void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
- LaneBitmask LaneMask, std::function<void(LiveInterval::SubRange&)> Apply) {
+/// For each VNI in \p SR, check whether or not that value defines part
+/// of the mask describe by \p LaneMask and if not, remove that value
+/// from \p SR.
+static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
+ LaneBitmask LaneMask,
+ const SlotIndexes &Indexes,
+ const TargetRegisterInfo &TRI) {
+ // Phys reg should not be tracked at subreg level.
+ // Same for noreg (Reg == 0).
+ if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg)
+ return;
+ // Remove the values that don't define those lanes.
+ SmallVector<VNInfo *, 8> ToBeRemoved;
+ for (VNInfo *VNI : SR.valnos) {
+ if (VNI->isUnused())
+ continue;
+ // PHI definitions don't have MI attached, so there is nothing
+ // we can use to strip the VNI.
+ if (VNI->isPHIDef())
+ continue;
+ const MachineInstr *MI = Indexes.getInstructionFromIndex(VNI->def);
+ assert(MI && "Cannot find the definition of a value");
+ bool hasDef = false;
+ for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ if (MOI->getReg() != Reg)
+ continue;
+ if ((TRI.getSubRegIndexLaneMask(MOI->getSubReg()) & LaneMask).none())
+ continue;
+ hasDef = true;
+ break;
+ }
+
+ if (!hasDef)
+ ToBeRemoved.push_back(VNI);
+ }
+ for (VNInfo *VNI : ToBeRemoved)
+ SR.removeValNo(VNI);
+
+ assert(!SR.empty() && "At least one value should be defined by this mask");
+}
+
+void LiveInterval::refineSubRanges(
+ BumpPtrAllocator &Allocator, LaneBitmask LaneMask,
+ std::function<void(LiveInterval::SubRange &)> Apply,
+ const SlotIndexes &Indexes, const TargetRegisterInfo &TRI) {
LaneBitmask ToApply = LaneMask;
for (SubRange &SR : subranges()) {
LaneBitmask SRMask = SR.LaneMask;
@@ -899,6 +941,10 @@ void LiveInterval::refineSubRanges(BumpPtrAllocator &Allocator,
SR.LaneMask = SRMask & ~Matching;
// Create a new subrange for the matching part
MatchingRange = createSubRangeFrom(Allocator, Matching, SR);
+ // Now that the subrange is split in half, make sure we
+ // only keep in the subranges the VNIs that touch the related half.
+ stripValuesNotDefiningMask(reg, *MatchingRange, Matching, Indexes, TRI);
+ stripValuesNotDefiningMask(reg, SR, SR.LaneMask, Indexes, TRI);
}
Apply(*MatchingRange);
ToApply &= ~Matching;
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
index 36428e0335f9..43fa8f2d7157 100644
--- a/lib/CodeGen/LiveIntervalUnion.cpp
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -1,9 +1,8 @@
//===- LiveIntervalUnion.cpp - Live interval union data structure ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp
index 471775f8706b..aa85569063b3 100644
--- a/lib/CodeGen/LiveIntervals.cpp
+++ b/lib/CodeGen/LiveIntervals.cpp
@@ -1,9 +1,8 @@
//===- LiveIntervals.cpp - Live Interval Analysis -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -901,8 +900,7 @@ bool LiveIntervals::checkRegMaskInterference(LiveInterval &LI,
// We are going to enumerate all the register mask slots contained in LI.
// Start with a binary search of RegMaskSlots to find a starting point.
- ArrayRef<SlotIndex>::iterator SlotI =
- std::lower_bound(Slots.begin(), Slots.end(), LiveI->start);
+ ArrayRef<SlotIndex>::iterator SlotI = llvm::lower_bound(Slots, LiveI->start);
ArrayRef<SlotIndex>::iterator SlotE = Slots.end();
// No slots in range, LI begins after the last call.
@@ -1371,8 +1369,7 @@ private:
void updateRegMaskSlots() {
SmallVectorImpl<SlotIndex>::iterator RI =
- std::lower_bound(LIS.RegMaskSlots.begin(), LIS.RegMaskSlots.end(),
- OldIdx);
+ llvm::lower_bound(LIS.RegMaskSlots, OldIdx);
assert(RI != LIS.RegMaskSlots.end() && *RI == OldIdx.getRegSlot() &&
"No RegMask at OldIdx.");
*RI = NewIdx.getRegSlot();
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index 619643acb6d3..cd3d248ac878 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -1,9 +1,8 @@
//===--- LivePhysRegs.cpp - Live Physical Register Set --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index 70e135ab1aff..d670f28df6ba 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -1,9 +1,8 @@
//===- LiveRangeCalc.cpp - Calculate live ranges --------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -96,10 +95,11 @@ void LiveRangeCalc::calculate(LiveInterval &LI, bool TrackSubRegs) {
}
LI.refineSubRanges(*Alloc, SubMask,
- [&MO, this](LiveInterval::SubRange &SR) {
- if (MO.isDef())
- createDeadDef(*Indexes, *Alloc, SR, MO);
- });
+ [&MO, this](LiveInterval::SubRange &SR) {
+ if (MO.isDef())
+ createDeadDef(*Indexes, *Alloc, SR, MO);
+ },
+ *Indexes, TRI);
}
// Create the def in the main liverange. We do not have to do this if
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
index 9f226b154a67..11aea5a3b016 100644
--- a/lib/CodeGen/LiveRangeCalc.h
+++ b/lib/CodeGen/LiveRangeCalc.h
@@ -1,9 +1,8 @@
//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 8dfe8b68c3af..882e562ba95c 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -1,9 +1,8 @@
//===-- LiveRangeEdit.cpp - Basic tools for editing a register live range -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -232,6 +231,8 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
return false;
LLVM_DEBUG(dbgs() << " folded: " << *FoldMI);
LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
+ if (UseMI->isCall())
+ UseMI->getMF()->updateCallSiteInfo(UseMI, FoldMI);
UseMI->eraseFromParent();
DefMI->addRegisterDead(LI->reg, nullptr);
Dead.push_back(DefMI);
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
index f75d513c89f5..8818f1ce0ad9 100644
--- a/lib/CodeGen/LiveRangeShrink.cpp
+++ b/lib/CodeGen/LiveRangeShrink.cpp
@@ -1,9 +1,8 @@
//===- LiveRangeShrink.cpp - Move instructions to shrink live range -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
///===---------------------------------------------------------------------===//
///
diff --git a/lib/CodeGen/LiveRangeUtils.h b/lib/CodeGen/LiveRangeUtils.h
index bd57609c3d84..0e6bfeb0d4a5 100644
--- a/lib/CodeGen/LiveRangeUtils.h
+++ b/lib/CodeGen/LiveRangeUtils.h
@@ -1,9 +1,8 @@
//===-- LiveRangeUtils.h - Live Range modification utilities ----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index e72977b02675..ce99e5535c25 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -1,9 +1,8 @@
//===- LiveRegMatrix.cpp - Track register interference --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index c22681385492..6afb7fb7aa11 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -1,9 +1,8 @@
//===- LiveRegUnits.cpp - Register Unit Set -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -126,13 +125,15 @@ void LiveRegUnits::addPristines(const MachineFunction &MF) {
void LiveRegUnits::addLiveOuts(const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
- if (!MBB.succ_empty()) {
- addPristines(MF);
- // To get the live-outs we simply merge the live-ins of all successors.
- for (const MachineBasicBlock *Succ : MBB.successors())
- addBlockLiveIns(*this, *Succ);
- } else if (MBB.isReturnBlock()) {
- // For the return block: Add all callee saved registers.
+
+ addPristines(MF);
+
+ // To get the live-outs we simply merge the live-ins of all successors.
+ for (const MachineBasicBlock *Succ : MBB.successors())
+ addBlockLiveIns(*this, *Succ);
+
+ // For the return block: Add all callee saved registers.
+ if (MBB.isReturnBlock()) {
const MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.isCalleeSavedInfoValid())
addCalleeSavedRegs(*this, MF);
diff --git a/lib/CodeGen/LiveStacks.cpp b/lib/CodeGen/LiveStacks.cpp
index 80ecfdb7a507..f55977d72723 100644
--- a/lib/CodeGen/LiveStacks.cpp
+++ b/lib/CodeGen/LiveStacks.cpp
@@ -1,9 +1,8 @@
//===-- LiveStacks.cpp - Live Stack Slot Analysis -------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 0b92eab83806..aaff982ef1b0 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -1,9 +1,8 @@
//===-- LiveVariables.cpp - Live Variable Analysis for Machine Code -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -401,7 +400,7 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) {
true/*IsImp*/, true/*IsKill*/));
else {
MachineOperand *MO =
- LastRefOrPartRef->findRegisterDefOperand(Reg, false, TRI);
+ LastRefOrPartRef->findRegisterDefOperand(Reg, false, false, TRI);
bool NeedEC = MO->isEarlyClobber() && MO->getReg() != Reg;
// If the last reference is the last def, then it's not used at all.
// That is, unless we are currently processing the last reference itself.
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 795028e97929..b14d76a585f7 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -1,9 +1,8 @@
//===- LocalStackSlotAllocation.cpp - Pre-allocate locals to stack slots --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -200,19 +199,27 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure that the stack protector comes before the local variables on the
// stack.
SmallSet<int, 16> ProtectedObjs;
- if (MFI.getStackProtectorIndex() >= 0) {
+ if (MFI.hasStackProtectorIndex()) {
+ int StackProtectorFI = MFI.getStackProtectorIndex();
+
+ // We need to make sure we didn't pre-allocate the stack protector when
+ // doing this.
+ // If we already have a stack protector, this will re-assign it to a slot
+ // that is **not** covering the protected objects.
+ assert(!MFI.isObjectPreAllocated(StackProtectorFI) &&
+ "Stack protector pre-allocated in LocalStackSlotAllocation");
+
StackObjSet LargeArrayObjs;
StackObjSet SmallArrayObjs;
StackObjSet AddrOfObjs;
- AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), Offset,
- StackGrowsDown, MaxAlign);
+ AdjustStackOffset(MFI, StackProtectorFI, Offset, StackGrowsDown, MaxAlign);
// Assign large stack objects first.
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI.getStackProtectorIndex() == (int)i)
+ if (StackProtectorFI == (int)i)
continue;
switch (MFI.getObjectSSPLayout(i)) {
diff --git a/lib/CodeGen/LoopTraversal.cpp b/lib/CodeGen/LoopTraversal.cpp
index a02d10e09d7d..9490dfc40a82 100644
--- a/lib/CodeGen/LoopTraversal.cpp
+++ b/lib/CodeGen/LoopTraversal.cpp
@@ -1,9 +1,8 @@
//===- LoopTraversal.cpp - Optimal basic block traversal order --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/LowLevelType.cpp b/lib/CodeGen/LowLevelType.cpp
index 1c682e72fa49..ca0daa14fedf 100644
--- a/lib/CodeGen/LowLevelType.cpp
+++ b/lib/CodeGen/LowLevelType.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/LowLevelType.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp
index 36c1d358a9bd..c8cf6abda4fc 100644
--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@@ -1,9 +1,8 @@
//===- LowerEmuTLS.cpp - Add __emutls_[vt].* variables --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp
index f17c23619ed5..f49bc854e23f 100644
--- a/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -1,9 +1,8 @@
//===-------------- MIRCanonicalizer.cpp - MIR Canonicalizer --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -105,6 +104,8 @@ INITIALIZE_PASS_END(MIRCanonicalizer, "mir-canonicalizer",
"Rename Register Operands Canonically", false, false)
static std::vector<MachineBasicBlock *> GetRPOList(MachineFunction &MF) {
+ if (MF.empty())
+ return {};
ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
std::vector<MachineBasicBlock *> RPOList;
for (auto MBB : RPOT) {
@@ -179,6 +180,8 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
}
std::map<MachineInstr *, std::vector<MachineInstr *>> MultiUsers;
+ std::map<unsigned, MachineInstr *> MultiUserLookup;
+ unsigned UseToBringDefCloserToCount = 0;
std::vector<MachineInstr *> PseudoIdempotentInstructions;
std::vector<unsigned> PhysRegDefs;
for (auto *II : Instructions) {
@@ -254,6 +257,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
if (Delta < Distance) {
Distance = Delta;
UseToBringDefCloserTo = UseInst;
+ MultiUserLookup[UseToBringDefCloserToCount++] = UseToBringDefCloserTo;
}
}
@@ -293,11 +297,11 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
}
// Sort the defs for users of multiple defs lexographically.
- for (const auto &E : MultiUsers) {
+ for (const auto &E : MultiUserLookup) {
auto UseI =
std::find_if(MBB->instr_begin(), MBB->instr_end(),
- [&](MachineInstr &MI) -> bool { return &MI == E.first; });
+ [&](MachineInstr &MI) -> bool { return &MI == E.second; });
if (UseI == MBB->instr_end())
continue;
@@ -305,7 +309,8 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
LLVM_DEBUG(
dbgs() << "Rescheduling Multi-Use Instructions Lexographically.";);
Changed |= rescheduleLexographically(
- E.second, MBB, [&]() -> MachineBasicBlock::iterator { return UseI; });
+ MultiUsers[E.second], MBB,
+ [&]() -> MachineBasicBlock::iterator { return UseI; });
}
PseudoIdempotentInstCount = PseudoIdempotentInstructions.size();
@@ -342,15 +347,23 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
continue;
if (!TargetRegisterInfo::isVirtualRegister(Src))
continue;
+ // Not folding COPY instructions if regbankselect has not set the RCs.
+ // Why are we only considering Register Classes? Because the verifier
+ // sometimes gets upset if the register classes don't match even if the
+ // types do. A future patch might add COPY folding for matching types in
+ // pre-registerbankselect code.
+ if (!MRI.getRegClassOrNull(Dst))
+ continue;
if (MRI.getRegClass(Dst) != MRI.getRegClass(Src))
continue;
- for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
- MachineOperand *MO = &*UI;
+ std::vector<MachineOperand *> Uses;
+ for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI)
+ Uses.push_back(&*UI);
+ for (auto *MO : Uses)
MO->setReg(Src);
- Changed = true;
- }
+ Changed = true;
MI->eraseFromParent();
}
@@ -474,18 +487,14 @@ class NamedVRegCursor {
unsigned virtualVRegNumber;
public:
- NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI) {
- unsigned VRegGapIndex = 0;
- const unsigned VR_GAP = (++VRegGapIndex * 1000);
-
- unsigned I = MRI.createIncompleteVirtualRegister();
- const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
-
- virtualVRegNumber = E;
- }
+ NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {}
void SkipVRegs() {
unsigned VRegGapIndex = 1;
+ if (!virtualVRegNumber) {
+ VRegGapIndex = 0;
+ virtualVRegNumber = MRI.createIncompleteVirtualRegister();
+ }
const unsigned VR_GAP = (++VRegGapIndex * 1000);
unsigned I = virtualVRegNumber;
@@ -501,14 +510,17 @@ public:
return virtualVRegNumber;
}
- unsigned createVirtualRegister(const TargetRegisterClass *RC) {
+ unsigned createVirtualRegister(unsigned VReg) {
+ if (!virtualVRegNumber)
+ SkipVRegs();
std::string S;
raw_string_ostream OS(S);
OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
OS.flush();
virtualVRegNumber++;
-
- return MRI.createVirtualRegister(RC, OS.str());
+ if (auto RC = MRI.getRegClassOrNull(VReg))
+ return MRI.createVirtualRegister(RC, OS.str());
+ return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
}
};
} // namespace
@@ -558,7 +570,7 @@ GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
continue;
}
- auto Rename = NVC.createVirtualRegister(MRI.getRegClass(Reg));
+ auto Rename = NVC.createVirtualRegister(Reg);
if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
LLVM_DEBUG(dbgs() << "Mapping vreg ";);
@@ -735,14 +747,15 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
// of the MachineBasicBlock so that they are named in the order that we sorted
// them alphabetically. Eventually we wont need SkipVRegs because we will use
// named vregs instead.
- NVC.SkipVRegs();
+ if (IdempotentInstCount)
+ NVC.SkipVRegs();
auto MII = MBB->begin();
for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
MachineInstr &MI = *MII++;
Changed = true;
unsigned vRegToRename = MI.getOperand(0).getReg();
- auto Rename = NVC.createVirtualRegister(MRI.getRegClass(vRegToRename));
+ auto Rename = NVC.createVirtualRegister(vRegToRename);
std::vector<MachineOperand *> RenameMOs;
for (auto &MO : MRI.reg_operands(vRegToRename)) {
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index 265877c2f5b4..4899bd3f5811 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -1,9 +1,8 @@
//===- MILexer.cpp - Machine instructions lexer implementation ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -205,6 +204,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("nuw" , MIToken::kw_nuw)
.Case("nsw" , MIToken::kw_nsw)
.Case("exact" , MIToken::kw_exact)
+ .Case("fpexcept", MIToken::kw_fpexcept)
.Case("debug-location", MIToken::kw_debug_location)
.Case("same_value", MIToken::kw_cfi_same_value)
.Case("offset", MIToken::kw_cfi_offset)
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index ceff79087d81..0fe3f9f706db 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -1,9 +1,8 @@
//===- MILexer.h - Lexer for machine instructions ---------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,6 +73,7 @@ struct MIToken {
kw_nuw,
kw_nsw,
kw_exact,
+ kw_fpexcept,
kw_debug_location,
kw_cfi_same_value,
kw_cfi_offset,
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index 6f2d8bb53ac8..c0b800a0b870 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -1,9 +1,8 @@
//===- MIParser.cpp - Machine instructions parser implementation ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -11,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "MIParser.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "MILexer.h"
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/APSInt.h"
@@ -27,6 +26,8 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/AsmParser/Parser.h"
#include "llvm/AsmParser/SlotMapping.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
+#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -81,12 +82,242 @@
using namespace llvm;
+void PerTargetMIParsingState::setTarget(
+ const TargetSubtargetInfo &NewSubtarget) {
+
+ // If the subtarget changed, over conservatively assume everything is invalid.
+ if (&Subtarget == &NewSubtarget)
+ return;
+
+ Names2InstrOpCodes.clear();
+ Names2Regs.clear();
+ Names2RegMasks.clear();
+ Names2SubRegIndices.clear();
+ Names2TargetIndices.clear();
+ Names2DirectTargetFlags.clear();
+ Names2BitmaskTargetFlags.clear();
+ Names2MMOTargetFlags.clear();
+
+ initNames2RegClasses();
+ initNames2RegBanks();
+}
+
+void PerTargetMIParsingState::initNames2Regs() {
+ if (!Names2Regs.empty())
+ return;
+
+ // The '%noreg' register is the register 0.
+ Names2Regs.insert(std::make_pair("noreg", 0));
+ const auto *TRI = Subtarget.getRegisterInfo();
+ assert(TRI && "Expected target register info");
+
+ for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
+ bool WasInserted =
+ Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I))
+ .second;
+ (void)WasInserted;
+ assert(WasInserted && "Expected registers to be unique case-insensitively");
+ }
+}
+
+bool PerTargetMIParsingState::getRegisterByName(StringRef RegName,
+ unsigned &Reg) {
+ initNames2Regs();
+ auto RegInfo = Names2Regs.find(RegName);
+ if (RegInfo == Names2Regs.end())
+ return true;
+ Reg = RegInfo->getValue();
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2InstrOpCodes() {
+ if (!Names2InstrOpCodes.empty())
+ return;
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I)
+ Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I));
+}
+
+bool PerTargetMIParsingState::parseInstrName(StringRef InstrName,
+ unsigned &OpCode) {
+ initNames2InstrOpCodes();
+ auto InstrInfo = Names2InstrOpCodes.find(InstrName);
+ if (InstrInfo == Names2InstrOpCodes.end())
+ return true;
+ OpCode = InstrInfo->getValue();
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2RegMasks() {
+ if (!Names2RegMasks.empty())
+ return;
+ const auto *TRI = Subtarget.getRegisterInfo();
+ assert(TRI && "Expected target register info");
+ ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks();
+ ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames();
+ assert(RegMasks.size() == RegMaskNames.size());
+ for (size_t I = 0, E = RegMasks.size(); I < E; ++I)
+ Names2RegMasks.insert(
+ std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I]));
+}
+
+const uint32_t *PerTargetMIParsingState::getRegMask(StringRef Identifier) {
+ initNames2RegMasks();
+ auto RegMaskInfo = Names2RegMasks.find(Identifier);
+ if (RegMaskInfo == Names2RegMasks.end())
+ return nullptr;
+ return RegMaskInfo->getValue();
+}
+
+void PerTargetMIParsingState::initNames2SubRegIndices() {
+ if (!Names2SubRegIndices.empty())
+ return;
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
+ Names2SubRegIndices.insert(
+ std::make_pair(TRI->getSubRegIndexName(I), I));
+}
+
+unsigned PerTargetMIParsingState::getSubRegIndex(StringRef Name) {
+ initNames2SubRegIndices();
+ auto SubRegInfo = Names2SubRegIndices.find(Name);
+ if (SubRegInfo == Names2SubRegIndices.end())
+ return 0;
+ return SubRegInfo->getValue();
+}
+
+void PerTargetMIParsingState::initNames2TargetIndices() {
+ if (!Names2TargetIndices.empty())
+ return;
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Indices = TII->getSerializableTargetIndices();
+ for (const auto &I : Indices)
+ Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getTargetIndex(StringRef Name, int &Index) {
+ initNames2TargetIndices();
+ auto IndexInfo = Names2TargetIndices.find(Name);
+ if (IndexInfo == Names2TargetIndices.end())
+ return true;
+ Index = IndexInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2DirectTargetFlags() {
+ if (!Names2DirectTargetFlags.empty())
+ return;
+
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2DirectTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getDirectTargetFlag(StringRef Name,
+ unsigned &Flag) {
+ initNames2DirectTargetFlags();
+ auto FlagInfo = Names2DirectTargetFlags.find(Name);
+ if (FlagInfo == Names2DirectTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2BitmaskTargetFlags() {
+ if (!Names2BitmaskTargetFlags.empty())
+ return;
+
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2BitmaskTargetFlags.insert(
+ std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getBitmaskTargetFlag(StringRef Name,
+ unsigned &Flag) {
+ initNames2BitmaskTargetFlags();
+ auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
+ if (FlagInfo == Names2BitmaskTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2MMOTargetFlags() {
+ if (!Names2MMOTargetFlags.empty())
+ return;
+
+ const auto *TII = Subtarget.getInstrInfo();
+ assert(TII && "Expected target instruction info");
+ auto Flags = TII->getSerializableMachineMemOperandTargetFlags();
+ for (const auto &I : Flags)
+ Names2MMOTargetFlags.insert(std::make_pair(StringRef(I.second), I.first));
+}
+
+bool PerTargetMIParsingState::getMMOTargetFlag(StringRef Name,
+ MachineMemOperand::Flags &Flag) {
+ initNames2MMOTargetFlags();
+ auto FlagInfo = Names2MMOTargetFlags.find(Name);
+ if (FlagInfo == Names2MMOTargetFlags.end())
+ return true;
+ Flag = FlagInfo->second;
+ return false;
+}
+
+void PerTargetMIParsingState::initNames2RegClasses() {
+ if (!Names2RegClasses.empty())
+ return;
+
+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
+ for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
+ const auto *RC = TRI->getRegClass(I);
+ Names2RegClasses.insert(
+ std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
+ }
+}
+
+void PerTargetMIParsingState::initNames2RegBanks() {
+ if (!Names2RegBanks.empty())
+ return;
+
+ const RegisterBankInfo *RBI = Subtarget.getRegBankInfo();
+ // If the target does not support GlobalISel, we may not have a
+ // register bank info.
+ if (!RBI)
+ return;
+
+ for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) {
+ const auto &RegBank = RBI->getRegBank(I);
+ Names2RegBanks.insert(
+ std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank));
+ }
+}
+
+const TargetRegisterClass *
+PerTargetMIParsingState::getRegClass(StringRef Name) {
+ auto RegClassInfo = Names2RegClasses.find(Name);
+ if (RegClassInfo == Names2RegClasses.end())
+ return nullptr;
+ return RegClassInfo->getValue();
+}
+
+const RegisterBank *PerTargetMIParsingState::getRegBank(StringRef Name) {
+ auto RegBankInfo = Names2RegBanks.find(Name);
+ if (RegBankInfo == Names2RegBanks.end())
+ return nullptr;
+ return RegBankInfo->getValue();
+}
+
PerFunctionMIParsingState::PerFunctionMIParsingState(MachineFunction &MF,
- SourceMgr &SM, const SlotMapping &IRSlots,
- const Name2RegClassMap &Names2RegClasses,
- const Name2RegBankMap &Names2RegBanks)
- : MF(MF), SM(&SM), IRSlots(IRSlots), Names2RegClasses(Names2RegClasses),
- Names2RegBanks(Names2RegBanks) {
+ SourceMgr &SM, const SlotMapping &IRSlots, PerTargetMIParsingState &T)
+ : MF(MF), SM(&SM), IRSlots(IRSlots), Target(T) {
}
VRegInfo &PerFunctionMIParsingState::getVRegInfo(unsigned Num) {
@@ -137,26 +368,10 @@ class MIParser {
StringRef Source, CurrentSource;
MIToken Token;
PerFunctionMIParsingState &PFS;
- /// Maps from instruction names to op codes.
- StringMap<unsigned> Names2InstrOpCodes;
- /// Maps from register names to registers.
- StringMap<unsigned> Names2Regs;
- /// Maps from register mask names to register masks.
- StringMap<const uint32_t *> Names2RegMasks;
- /// Maps from subregister names to subregister indices.
- StringMap<unsigned> Names2SubRegIndices;
/// Maps from slot numbers to function's unnamed basic blocks.
DenseMap<unsigned, const BasicBlock *> Slots2BasicBlocks;
/// Maps from slot numbers to function's unnamed values.
DenseMap<unsigned, const Value *> Slots2Values;
- /// Maps from target index names to target indices.
- StringMap<int> Names2TargetIndices;
- /// Maps from direct target flag names to the direct target flag values.
- StringMap<unsigned> Names2DirectTargetFlags;
- /// Maps from direct target flag names to the bitmask target flag values.
- StringMap<unsigned> Names2BitmaskTargetFlags;
- /// Maps from MMO target flag names to MMO target flag values.
- StringMap<MachineMemOperand::Flags> Names2MMOTargetFlags;
public:
MIParser(PerFunctionMIParsingState &PFS, SMDiagnostic &Error,
@@ -281,12 +496,6 @@ private:
/// Otherwise return false.
bool consumeIfPresent(MIToken::TokenKind TokenKind);
- void initNames2InstrOpCodes();
-
- /// Try to convert an instruction name to an opcode. Return true if the
- /// instruction name is invalid.
- bool parseInstrName(StringRef InstrName, unsigned &OpCode);
-
bool parseInstruction(unsigned &OpCode, unsigned &Flags);
bool assignRegisterTies(MachineInstr &MI,
@@ -295,62 +504,11 @@ private:
bool verifyImplicitOperands(ArrayRef<ParsedMachineOperand> Operands,
const MCInstrDesc &MCID);
- void initNames2Regs();
-
- /// Try to convert a register name to a register number. Return true if the
- /// register name is invalid.
- bool getRegisterByName(StringRef RegName, unsigned &Reg);
-
- void initNames2RegMasks();
-
- /// Check if the given identifier is a name of a register mask.
- ///
- /// Return null if the identifier isn't a register mask.
- const uint32_t *getRegMask(StringRef Identifier);
-
- void initNames2SubRegIndices();
-
- /// Check if the given identifier is a name of a subregister index.
- ///
- /// Return 0 if the name isn't a subregister index class.
- unsigned getSubRegIndex(StringRef Name);
-
const BasicBlock *getIRBlock(unsigned Slot);
const BasicBlock *getIRBlock(unsigned Slot, const Function &F);
const Value *getIRValue(unsigned Slot);
- void initNames2TargetIndices();
-
- /// Try to convert a name of target index to the corresponding target index.
- ///
- /// Return true if the name isn't a name of a target index.
- bool getTargetIndex(StringRef Name, int &Index);
-
- void initNames2DirectTargetFlags();
-
- /// Try to convert a name of a direct target flag to the corresponding
- /// target flag.
- ///
- /// Return true if the name isn't a name of a direct flag.
- bool getDirectTargetFlag(StringRef Name, unsigned &Flag);
-
- void initNames2BitmaskTargetFlags();
-
- /// Try to convert a name of a bitmask target flag to the corresponding
- /// target flag.
- ///
- /// Return true if the name isn't a name of a bitmask target flag.
- bool getBitmaskTargetFlag(StringRef Name, unsigned &Flag);
-
- void initNames2MMOTargetFlags();
-
- /// Try to convert a name of a MachineMemOperand target flag to the
- /// corresponding target flag.
- ///
- /// Return true if the name isn't a name of a target MMO flag.
- bool getMMOTargetFlag(StringRef Name, MachineMemOperand::Flags &Flag);
-
/// Get or create an MCSymbol for a given name.
MCSymbol *getOrCreateMCSymbol(StringRef Name);
@@ -978,7 +1136,8 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Token.is(MIToken::kw_reassoc) ||
Token.is(MIToken::kw_nuw) ||
Token.is(MIToken::kw_nsw) ||
- Token.is(MIToken::kw_exact)) {
+ Token.is(MIToken::kw_exact) ||
+ Token.is(MIToken::kw_fpexcept)) {
// Mine frame and fast math flags
if (Token.is(MIToken::kw_frame_setup))
Flags |= MachineInstr::FrameSetup;
@@ -1004,13 +1163,15 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
Flags |= MachineInstr::NoSWrap;
if (Token.is(MIToken::kw_exact))
Flags |= MachineInstr::IsExact;
+ if (Token.is(MIToken::kw_fpexcept))
+ Flags |= MachineInstr::FPExcept;
lex();
}
if (Token.isNot(MIToken::Identifier))
return error("expected a machine instruction");
StringRef InstrName = Token.stringValue();
- if (parseInstrName(InstrName, OpCode))
+ if (PFS.Target.parseInstrName(InstrName, OpCode))
return error(Twine("unknown machine instruction name '") + InstrName + "'");
lex();
return false;
@@ -1019,7 +1180,7 @@ bool MIParser::parseInstruction(unsigned &OpCode, unsigned &Flags) {
bool MIParser::parseNamedRegister(unsigned &Reg) {
assert(Token.is(MIToken::NamedRegister) && "Needs NamedRegister token");
StringRef Name = Token.stringValue();
- if (getRegisterByName(Name, Reg))
+ if (PFS.Target.getRegisterByName(Name, Reg))
return error(Twine("unknown register name '") + Name + "'");
return false;
}
@@ -1070,21 +1231,20 @@ bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
StringRef Name = Token.stringValue();
// Was it a register class?
- auto RCNameI = PFS.Names2RegClasses.find(Name);
- if (RCNameI != PFS.Names2RegClasses.end()) {
+ const TargetRegisterClass *RC = PFS.Target.getRegClass(Name);
+ if (RC) {
lex();
- const TargetRegisterClass &RC = *RCNameI->getValue();
switch (RegInfo.Kind) {
case VRegInfo::UNKNOWN:
case VRegInfo::NORMAL:
RegInfo.Kind = VRegInfo::NORMAL;
- if (RegInfo.Explicit && RegInfo.D.RC != &RC) {
+ if (RegInfo.Explicit && RegInfo.D.RC != RC) {
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
return error(Loc, Twine("conflicting register classes, previously: ") +
Twine(TRI.getRegClassName(RegInfo.D.RC)));
}
- RegInfo.D.RC = &RC;
+ RegInfo.D.RC = RC;
RegInfo.Explicit = true;
return false;
@@ -1098,10 +1258,9 @@ bool MIParser::parseRegisterClassOrBank(VRegInfo &RegInfo) {
// Should be a register bank or a generic register.
const RegisterBank *RegBank = nullptr;
if (Name != "_") {
- auto RBNameI = PFS.Names2RegBanks.find(Name);
- if (RBNameI == PFS.Names2RegBanks.end())
+ RegBank = PFS.Target.getRegBank(Name);
+ if (!RegBank)
return error(Loc, "expected '_', register class, or register bank name");
- RegBank = RBNameI->getValue();
}
lex();
@@ -1173,7 +1332,7 @@ bool MIParser::parseSubRegisterIndex(unsigned &SubReg) {
if (Token.isNot(MIToken::Identifier))
return error("expected a subregister index after '.'");
auto Name = Token.stringValue();
- SubReg = getSubRegIndex(Name);
+ SubReg = PFS.Target.getSubRegIndex(Name);
if (!SubReg)
return error(Twine("use of unknown subregister index '") + Name + "'");
lex();
@@ -1341,6 +1500,19 @@ bool MIParser::parseIRConstant(StringRef::iterator Loc, const Constant *&C) {
return false;
}
+// See LLT implemntation for bit size limits.
+static bool verifyScalarSize(uint64_t Size) {
+ return Size != 0 && isUInt<16>(Size);
+}
+
+static bool verifyVectorElementCount(uint64_t NumElts) {
+ return NumElts != 0 && isUInt<16>(NumElts);
+}
+
+static bool verifyAddrSpace(uint64_t AddrSpace) {
+ return isUInt<24>(AddrSpace);
+}
+
bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
if (Token.range().front() == 's' || Token.range().front() == 'p') {
StringRef SizeStr = Token.range().drop_front();
@@ -1349,12 +1521,19 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
}
if (Token.range().front() == 's') {
- Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
+ auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyScalarSize(ScalarSize))
+ return error("invalid size for scalar type");
+
+ Ty = LLT::scalar(ScalarSize);
lex();
return false;
} else if (Token.range().front() == 'p') {
const DataLayout &DL = MF.getDataLayout();
- unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+ uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyAddrSpace(AS))
+ return error("invalid address space number");
+
Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
lex();
return false;
@@ -1369,6 +1548,9 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
if (Token.isNot(MIToken::IntegerLiteral))
return error(Loc, "expected <M x sN> or <M x pA> for vector type");
uint64_t NumElements = Token.integerValue().getZExtValue();
+ if (!verifyVectorElementCount(NumElements))
+ return error("invalid number of vector elements");
+
lex();
if (Token.isNot(MIToken::Identifier) || Token.stringValue() != "x")
@@ -1381,11 +1563,17 @@ bool MIParser::parseLowLevelType(StringRef::iterator Loc, LLT &Ty) {
if (SizeStr.size() == 0 || !llvm::all_of(SizeStr, isdigit))
return error("expected integers after 's'/'p' type character");
- if (Token.range().front() == 's')
- Ty = LLT::scalar(APSInt(Token.range().drop_front()).getZExtValue());
- else if (Token.range().front() == 'p') {
+ if (Token.range().front() == 's') {
+ auto ScalarSize = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyScalarSize(ScalarSize))
+ return error("invalid size for scalar type");
+ Ty = LLT::scalar(ScalarSize);
+ } else if (Token.range().front() == 'p') {
const DataLayout &DL = MF.getDataLayout();
- unsigned AS = APSInt(Token.range().drop_front()).getZExtValue();
+ uint64_t AS = APSInt(Token.range().drop_front()).getZExtValue();
+ if (!verifyAddrSpace(AS))
+ return error("invalid address space number");
+
Ty = LLT::pointer(AS, DL.getPointerSizeInBits(AS));
} else
return error(Loc, "expected <M x sN> or <M x pA> for vector type");
@@ -1625,7 +1813,7 @@ bool MIParser::parseMCSymbolOperand(MachineOperand &Dest) {
bool MIParser::parseSubRegisterIndexOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::SubRegisterIndex));
StringRef Name = Token.stringValue();
- unsigned SubRegIndex = getSubRegIndex(Token.stringValue());
+ unsigned SubRegIndex = PFS.Target.getSubRegIndex(Token.stringValue());
if (SubRegIndex == 0)
return error(Twine("unknown subregister index '") + Name + "'");
lex();
@@ -1669,6 +1857,11 @@ bool MIParser::parseDIExpression(MDNode *&Expr) {
Elements.push_back(Op);
continue;
}
+ if (unsigned Enc = dwarf::getAttributeEncoding(Token.stringValue())) {
+ lex();
+ Elements.push_back(Enc);
+ continue;
+ }
return error(Twine("invalid DWARF op '") + Token.stringValue() + "'");
}
@@ -2100,7 +2293,7 @@ bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
if (Token.isNot(MIToken::Identifier))
return error("expected the name of the target index");
int Index = 0;
- if (getTargetIndex(Token.stringValue(), Index))
+ if (PFS.Target.getTargetIndex(Token.stringValue(), Index))
return error("use of undefined target index '" + Token.stringValue() + "'");
lex();
if (expectAndConsume(MIToken::rparen))
@@ -2242,7 +2435,7 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
case MIToken::Error:
return true;
case MIToken::Identifier:
- if (const auto *RegMask = getRegMask(Token.stringValue())) {
+ if (const auto *RegMask = PFS.Target.getRegMask(Token.stringValue())) {
Dest = MachineOperand::CreateRegMask(RegMask);
lex();
break;
@@ -2268,8 +2461,8 @@ bool MIParser::parseMachineOperandAndTargetFlags(
return true;
if (Token.isNot(MIToken::Identifier))
return error("expected the name of the target flag");
- if (getDirectTargetFlag(Token.stringValue(), TF)) {
- if (getBitmaskTargetFlag(Token.stringValue(), TF))
+ if (PFS.Target.getDirectTargetFlag(Token.stringValue(), TF)) {
+ if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), TF))
return error("use of undefined target flag '" + Token.stringValue() +
"'");
}
@@ -2279,7 +2472,7 @@ bool MIParser::parseMachineOperandAndTargetFlags(
if (Token.isNot(MIToken::Identifier))
return error("expected the name of the target flag");
unsigned BitFlag = 0;
- if (getBitmaskTargetFlag(Token.stringValue(), BitFlag))
+ if (PFS.Target.getBitmaskTargetFlag(Token.stringValue(), BitFlag))
return error("use of undefined target flag '" + Token.stringValue() +
"'");
// TODO: Report an error when using a duplicate bit target flag.
@@ -2325,6 +2518,10 @@ bool MIParser::parseAlignment(unsigned &Alignment) {
if (getUnsigned(Alignment))
return true;
lex();
+
+ if (!isPowerOf2_32(Alignment))
+ return error("expected a power-of-2 literal after 'align'");
+
return false;
}
@@ -2436,7 +2633,7 @@ bool MIParser::parseMemoryOperandFlag(MachineMemOperand::Flags &Flags) {
break;
case MIToken::StringConstant: {
MachineMemOperand::Flags TF;
- if (getMMOTargetFlag(Token.stringValue(), TF))
+ if (PFS.Target.getMMOTargetFlag(Token.stringValue(), TF))
return error("use of undefined target MMO flag '" + Token.stringValue() +
"'");
Flags |= TF;
@@ -2711,87 +2908,6 @@ bool MIParser::parsePreOrPostInstrSymbol(MCSymbol *&Symbol) {
return false;
}
-void MIParser::initNames2InstrOpCodes() {
- if (!Names2InstrOpCodes.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- for (unsigned I = 0, E = TII->getNumOpcodes(); I < E; ++I)
- Names2InstrOpCodes.insert(std::make_pair(StringRef(TII->getName(I)), I));
-}
-
-bool MIParser::parseInstrName(StringRef InstrName, unsigned &OpCode) {
- initNames2InstrOpCodes();
- auto InstrInfo = Names2InstrOpCodes.find(InstrName);
- if (InstrInfo == Names2InstrOpCodes.end())
- return true;
- OpCode = InstrInfo->getValue();
- return false;
-}
-
-void MIParser::initNames2Regs() {
- if (!Names2Regs.empty())
- return;
- // The '%noreg' register is the register 0.
- Names2Regs.insert(std::make_pair("noreg", 0));
- const auto *TRI = MF.getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- for (unsigned I = 0, E = TRI->getNumRegs(); I < E; ++I) {
- bool WasInserted =
- Names2Regs.insert(std::make_pair(StringRef(TRI->getName(I)).lower(), I))
- .second;
- (void)WasInserted;
- assert(WasInserted && "Expected registers to be unique case-insensitively");
- }
-}
-
-bool MIParser::getRegisterByName(StringRef RegName, unsigned &Reg) {
- initNames2Regs();
- auto RegInfo = Names2Regs.find(RegName);
- if (RegInfo == Names2Regs.end())
- return true;
- Reg = RegInfo->getValue();
- return false;
-}
-
-void MIParser::initNames2RegMasks() {
- if (!Names2RegMasks.empty())
- return;
- const auto *TRI = MF.getSubtarget().getRegisterInfo();
- assert(TRI && "Expected target register info");
- ArrayRef<const uint32_t *> RegMasks = TRI->getRegMasks();
- ArrayRef<const char *> RegMaskNames = TRI->getRegMaskNames();
- assert(RegMasks.size() == RegMaskNames.size());
- for (size_t I = 0, E = RegMasks.size(); I < E; ++I)
- Names2RegMasks.insert(
- std::make_pair(StringRef(RegMaskNames[I]).lower(), RegMasks[I]));
-}
-
-const uint32_t *MIParser::getRegMask(StringRef Identifier) {
- initNames2RegMasks();
- auto RegMaskInfo = Names2RegMasks.find(Identifier);
- if (RegMaskInfo == Names2RegMasks.end())
- return nullptr;
- return RegMaskInfo->getValue();
-}
-
-void MIParser::initNames2SubRegIndices() {
- if (!Names2SubRegIndices.empty())
- return;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- for (unsigned I = 1, E = TRI->getNumSubRegIndices(); I < E; ++I)
- Names2SubRegIndices.insert(
- std::make_pair(StringRef(TRI->getSubRegIndexName(I)).lower(), I));
-}
-
-unsigned MIParser::getSubRegIndex(StringRef Name) {
- initNames2SubRegIndices();
- auto SubRegInfo = Names2SubRegIndices.find(Name);
- if (SubRegInfo == Names2SubRegIndices.end())
- return 0;
- return SubRegInfo->getValue();
-}
-
static void initSlots2BasicBlocks(
const Function &F,
DenseMap<unsigned, const BasicBlock *> &Slots2BasicBlocks) {
@@ -2861,86 +2977,6 @@ const Value *MIParser::getIRValue(unsigned Slot) {
return ValueInfo->second;
}
-void MIParser::initNames2TargetIndices() {
- if (!Names2TargetIndices.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- auto Indices = TII->getSerializableTargetIndices();
- for (const auto &I : Indices)
- Names2TargetIndices.insert(std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getTargetIndex(StringRef Name, int &Index) {
- initNames2TargetIndices();
- auto IndexInfo = Names2TargetIndices.find(Name);
- if (IndexInfo == Names2TargetIndices.end())
- return true;
- Index = IndexInfo->second;
- return false;
-}
-
-void MIParser::initNames2DirectTargetFlags() {
- if (!Names2DirectTargetFlags.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- auto Flags = TII->getSerializableDirectMachineOperandTargetFlags();
- for (const auto &I : Flags)
- Names2DirectTargetFlags.insert(
- std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getDirectTargetFlag(StringRef Name, unsigned &Flag) {
- initNames2DirectTargetFlags();
- auto FlagInfo = Names2DirectTargetFlags.find(Name);
- if (FlagInfo == Names2DirectTargetFlags.end())
- return true;
- Flag = FlagInfo->second;
- return false;
-}
-
-void MIParser::initNames2BitmaskTargetFlags() {
- if (!Names2BitmaskTargetFlags.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- auto Flags = TII->getSerializableBitmaskMachineOperandTargetFlags();
- for (const auto &I : Flags)
- Names2BitmaskTargetFlags.insert(
- std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getBitmaskTargetFlag(StringRef Name, unsigned &Flag) {
- initNames2BitmaskTargetFlags();
- auto FlagInfo = Names2BitmaskTargetFlags.find(Name);
- if (FlagInfo == Names2BitmaskTargetFlags.end())
- return true;
- Flag = FlagInfo->second;
- return false;
-}
-
-void MIParser::initNames2MMOTargetFlags() {
- if (!Names2MMOTargetFlags.empty())
- return;
- const auto *TII = MF.getSubtarget().getInstrInfo();
- assert(TII && "Expected target instruction info");
- auto Flags = TII->getSerializableMachineMemOperandTargetFlags();
- for (const auto &I : Flags)
- Names2MMOTargetFlags.insert(
- std::make_pair(StringRef(I.second), I.first));
-}
-
-bool MIParser::getMMOTargetFlag(StringRef Name,
- MachineMemOperand::Flags &Flag) {
- initNames2MMOTargetFlags();
- auto FlagInfo = Names2MMOTargetFlags.find(Name);
- if (FlagInfo == Names2MMOTargetFlags.end())
- return true;
- Flag = FlagInfo->second;
- return false;
-}
-
MCSymbol *MIParser::getOrCreateMCSymbol(StringRef Name) {
// FIXME: Currently we can't recognize temporary or local symbols and call all
// of the appropriate forms to create them. However, this handles basic cases
diff --git a/lib/CodeGen/MIRParser/MIParser.h b/lib/CodeGen/MIRParser/MIParser.h
deleted file mode 100644
index b06ceb21b740..000000000000
--- a/lib/CodeGen/MIRParser/MIParser.h
+++ /dev/null
@@ -1,125 +0,0 @@
-//===- MIParser.h - Machine Instructions Parser -----------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares the function that parses the machine instructions.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
-#define LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringMap.h"
-#include "llvm/Support/Allocator.h"
-
-namespace llvm {
-
-class MachineBasicBlock;
-class MachineFunction;
-class MDNode;
-class RegisterBank;
-struct SlotMapping;
-class SMDiagnostic;
-class SourceMgr;
-class StringRef;
-class TargetRegisterClass;
-
-struct VRegInfo {
- enum uint8_t {
- UNKNOWN, NORMAL, GENERIC, REGBANK
- } Kind = UNKNOWN;
- bool Explicit = false; ///< VReg was explicitly specified in the .mir file.
- union {
- const TargetRegisterClass *RC;
- const RegisterBank *RegBank;
- } D;
- unsigned VReg;
- unsigned PreferredReg = 0;
-};
-
-using Name2RegClassMap = StringMap<const TargetRegisterClass *>;
-using Name2RegBankMap = StringMap<const RegisterBank *>;
-
-struct PerFunctionMIParsingState {
- BumpPtrAllocator Allocator;
- MachineFunction &MF;
- SourceMgr *SM;
- const SlotMapping &IRSlots;
- const Name2RegClassMap &Names2RegClasses;
- const Name2RegBankMap &Names2RegBanks;
-
- DenseMap<unsigned, MachineBasicBlock *> MBBSlots;
- DenseMap<unsigned, VRegInfo*> VRegInfos;
- StringMap<VRegInfo*> VRegInfosNamed;
- DenseMap<unsigned, int> FixedStackObjectSlots;
- DenseMap<unsigned, int> StackObjectSlots;
- DenseMap<unsigned, unsigned> ConstantPoolSlots;
- DenseMap<unsigned, unsigned> JumpTableSlots;
-
- PerFunctionMIParsingState(MachineFunction &MF, SourceMgr &SM,
- const SlotMapping &IRSlots,
- const Name2RegClassMap &Names2RegClasses,
- const Name2RegBankMap &Names2RegBanks);
-
- VRegInfo &getVRegInfo(unsigned Num);
- VRegInfo &getVRegInfoNamed(StringRef RegName);
-};
-
-/// Parse the machine basic block definitions, and skip the machine
-/// instructions.
-///
-/// This function runs the first parsing pass on the machine function's body.
-/// It parses only the machine basic block definitions and creates the machine
-/// basic blocks in the given machine function.
-///
-/// The machine instructions aren't parsed during the first pass because all
-/// the machine basic blocks aren't defined yet - this makes it impossible to
-/// resolve the machine basic block references.
-///
-/// Return true if an error occurred.
-bool parseMachineBasicBlockDefinitions(PerFunctionMIParsingState &PFS,
- StringRef Src, SMDiagnostic &Error);
-
-/// Parse the machine instructions.
-///
-/// This function runs the second parsing pass on the machine function's body.
-/// It skips the machine basic block definitions and parses only the machine
-/// instructions and basic block attributes like liveins and successors.
-///
-/// The second parsing pass assumes that the first parsing pass already ran
-/// on the given source string.
-///
-/// Return true if an error occurred.
-bool parseMachineInstructions(PerFunctionMIParsingState &PFS, StringRef Src,
- SMDiagnostic &Error);
-
-bool parseMBBReference(PerFunctionMIParsingState &PFS,
- MachineBasicBlock *&MBB, StringRef Src,
- SMDiagnostic &Error);
-
-bool parseRegisterReference(PerFunctionMIParsingState &PFS,
- unsigned &Reg, StringRef Src,
- SMDiagnostic &Error);
-
-bool parseNamedRegisterReference(PerFunctionMIParsingState &PFS, unsigned &Reg,
- StringRef Src, SMDiagnostic &Error);
-
-bool parseVirtualRegisterReference(PerFunctionMIParsingState &PFS,
- VRegInfo *&Info, StringRef Src,
- SMDiagnostic &Error);
-
-bool parseStackObjectReference(PerFunctionMIParsingState &PFS, int &FI,
- StringRef Src, SMDiagnostic &Error);
-
-bool parseMDNode(PerFunctionMIParsingState &PFS, MDNode *&Node, StringRef Src,
- SMDiagnostic &Error);
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_MIRPARSER_MIPARSER_H
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index 00da92a92ec6..b242934def80 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -1,9 +1,8 @@
//===- MIRParser.cpp - MIR serialization format parser implementation -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -13,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MIRParser/MIRParser.h"
-#include "MIParser.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringMap.h"
@@ -22,12 +20,14 @@
#include "llvm/AsmParser/SlotMapping.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
+#include "llvm/CodeGen/MIRParser/MIParser.h"
#include "llvm/CodeGen/MIRYamlMapping.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/DiagnosticInfo.h"
@@ -40,6 +40,7 @@
#include "llvm/Support/SMLoc.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/YAMLTraits.h"
+#include "llvm/Target/TargetMachine.h"
#include <memory>
using namespace llvm;
@@ -54,10 +55,8 @@ class MIRParserImpl {
StringRef Filename;
LLVMContext &Context;
SlotMapping IRSlots;
- /// Maps from register class names to register classes.
- Name2RegClassMap Names2RegClasses;
- /// Maps from register bank names to register banks.
- Name2RegBankMap Names2RegBanks;
+ std::unique_ptr<PerTargetMIParsingState> Target;
+
/// True when the MIR file doesn't have LLVM IR. Dummy IR functions are
/// created and inserted into the given module when this is true.
bool NoLLVMIR = false;
@@ -117,6 +116,9 @@ public:
bool initializeFrameInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF);
+ bool initializeCallSiteInfo(PerFunctionMIParsingState &PFS,
+ const yaml::MachineFunction &YamlMF);
+
bool parseCalleeSavedRegister(PerFunctionMIParsingState &PFS,
std::vector<CalleeSavedInfo> &CSIInfo,
const yaml::StringValue &RegisterSource,
@@ -151,20 +153,6 @@ private:
SMDiagnostic diagFromBlockStringDiag(const SMDiagnostic &Error,
SMRange SourceRange);
- void initNames2RegClasses(const MachineFunction &MF);
- void initNames2RegBanks(const MachineFunction &MF);
-
- /// Check if the given identifier is a name of a register class.
- ///
- /// Return null if the name isn't a register class.
- const TargetRegisterClass *getRegClass(const MachineFunction &MF,
- StringRef Name);
-
- /// Check if the given identifier is a name of a register bank.
- ///
- /// Return null if the name isn't a register bank.
- const RegisterBank *getRegBank(const MachineFunction &MF, StringRef Name);
-
void computeFunctionProperties(MachineFunction &MF);
};
@@ -271,8 +259,9 @@ bool MIRParserImpl::parseMachineFunctions(Module &M, MachineModuleInfo &MMI) {
/// Create an empty function with the given name.
static Function *createDummyFunction(StringRef Name, Module &M) {
auto &Context = M.getContext();
- Function *F = cast<Function>(M.getOrInsertFunction(
- Name, FunctionType::get(Type::getVoidTy(Context), false)));
+ Function *F =
+ Function::Create(FunctionType::get(Type::getVoidTy(Context), false),
+ Function::ExternalLinkage, Name, M);
BasicBlock *BB = BasicBlock::Create(Context, "entry", F);
new UnreachableInst(Context, BB);
return F;
@@ -282,6 +271,11 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
// Parse the yaml.
yaml::MachineFunction YamlMF;
yaml::EmptyContext Ctx;
+
+ const LLVMTargetMachine &TM = MMI.getTarget();
+ YamlMF.MachineFuncInfo = std::unique_ptr<yaml::MachineFunctionInfo>(
+ TM.createDefaultFuncInfoYAML());
+
yaml::yamlize(In, YamlMF, false, Ctx);
if (In.error())
return true;
@@ -346,12 +340,58 @@ void MIRParserImpl::computeFunctionProperties(MachineFunction &MF) {
Properties.set(MachineFunctionProperties::Property::NoVRegs);
}
+bool MIRParserImpl::initializeCallSiteInfo(
+ PerFunctionMIParsingState &PFS, const yaml::MachineFunction &YamlMF) {
+ MachineFunction &MF = PFS.MF;
+ SMDiagnostic Error;
+ const LLVMTargetMachine &TM = MF.getTarget();
+ for (auto YamlCSInfo : YamlMF.CallSitesInfo) {
+ yaml::CallSiteInfo::MachineInstrLoc MILoc = YamlCSInfo.CallLocation;
+ if (MILoc.BlockNum >= MF.size())
+ return error(Twine(MF.getName()) +
+ Twine(" call instruction block out of range.") +
+ " Unable to reference bb:" + Twine(MILoc.BlockNum));
+ auto CallB = std::next(MF.begin(), MILoc.BlockNum);
+ if (MILoc.Offset >= CallB->size())
+ return error(Twine(MF.getName()) +
+ Twine(" call instruction offset out of range.") +
+ "Unable to reference instruction at bb: " +
+ Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset));
+ auto CallI = std::next(CallB->begin(), MILoc.Offset);
+ if (!CallI->isCall())
+ return error(Twine(MF.getName()) +
+ Twine(" call site info should reference call "
+ "instruction. Instruction at bb:") +
+ Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset) +
+ " is not a call instruction");
+ MachineFunction::CallSiteInfo CSInfo;
+ for (auto ArgRegPair : YamlCSInfo.ArgForwardingRegs) {
+ unsigned Reg = 0;
+ if (parseNamedRegisterReference(PFS, Reg, ArgRegPair.Reg.Value, Error))
+ return error(Error, ArgRegPair.Reg.SourceRange);
+ CSInfo.emplace_back(Reg, ArgRegPair.ArgNo);
+ }
+
+ if (TM.Options.EnableDebugEntryValues)
+ MF.addCallArgsForwardingRegs(&*CallI, std::move(CSInfo));
+ }
+
+ if (YamlMF.CallSitesInfo.size() && !TM.Options.EnableDebugEntryValues)
+ return error(Twine("Call site info provided but not used"));
+ return false;
+}
+
bool
MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
MachineFunction &MF) {
// TODO: Recreate the machine function.
- initNames2RegClasses(MF);
- initNames2RegBanks(MF);
+ if (Target) {
+ // Avoid clearing state if we're using the same subtarget again.
+ Target->setTarget(MF.getSubtarget());
+ } else {
+ Target.reset(new PerTargetMIParsingState(MF.getSubtarget()));
+ }
+
if (YamlMF.Alignment)
MF.setAlignment(YamlMF.Alignment);
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
@@ -367,8 +407,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (YamlMF.FailedISel)
MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
- PerFunctionMIParsingState PFS(MF, SM, IRSlots, Names2RegClasses,
- Names2RegBanks);
+ PerFunctionMIParsingState PFS(MF, SM, IRSlots, *Target);
if (parseRegisterInfo(PFS, YamlMF))
return true;
if (!YamlMF.Constants.empty()) {
@@ -419,8 +458,32 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
if (setupRegisterInfo(PFS, YamlMF))
return true;
+ if (YamlMF.MachineFuncInfo) {
+ const LLVMTargetMachine &TM = MF.getTarget();
+ // Note this is called after the initial constructor of the
+ // MachineFunctionInfo based on the MachineFunction, which may depend on the
+ // IR.
+
+ SMRange SrcRange;
+ if (TM.parseMachineFunctionInfo(*YamlMF.MachineFuncInfo, PFS, Error,
+ SrcRange)) {
+ return error(Error, SrcRange);
+ }
+ }
+
+ // Set the reserved registers after parsing MachineFuncInfo. The target may
+ // have been recording information used to select the reserved registers
+ // there.
+ // FIXME: This is a temporary workaround until the reserved registers can be
+ // serialized.
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ MRI.freezeReservedRegs(MF);
+
computeFunctionProperties(MF);
+ if (initializeCallSiteInfo(PFS, YamlMF))
+ return false;
+
MF.getSubtarget().mirFileLoaded(MF);
MF.verify();
@@ -449,12 +512,12 @@ bool MIRParserImpl::parseRegisterInfo(PerFunctionMIParsingState &PFS,
Info.Kind = VRegInfo::GENERIC;
Info.D.RegBank = nullptr;
} else {
- const auto *RC = getRegClass(MF, VReg.Class.Value);
+ const auto *RC = Target->getRegClass(VReg.Class.Value);
if (RC) {
Info.Kind = VRegInfo::NORMAL;
Info.D.RC = RC;
} else {
- const RegisterBank *RegBank = getRegBank(MF, VReg.Class.Value);
+ const RegisterBank *RegBank = Target->getRegBank(VReg.Class.Value);
if (!RegBank)
return error(
VReg.Class.SourceRange.Start,
@@ -557,9 +620,6 @@ bool MIRParserImpl::setupRegisterInfo(const PerFunctionMIParsingState &PFS,
}
}
- // FIXME: This is a temporary workaround until the reserved registers can be
- // serialized.
- MRI.freezeReservedRegs(MF);
return Error;
}
@@ -567,6 +627,7 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
const yaml::MachineFunction &YamlMF) {
MachineFunction &MF = PFS.MF;
MachineFrameInfo &MFI = MF.getFrameInfo();
+ const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();
const Function &F = MF.getFunction();
const yaml::MachineFrameInfo &YamlMFI = YamlMF.FrameInfo;
MFI.setFrameAddressIsTaken(YamlMFI.IsFrameAddressTaken);
@@ -608,8 +669,12 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
Object.IsImmutable, Object.IsAliased);
else
ObjectIdx = MFI.CreateFixedSpillStackObject(Object.Size, Object.Offset);
- MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
+
+ if (!TFI->isSupportedStackID(Object.StackID))
+ return error(Object.ID.SourceRange.Start,
+ Twine("StackID is not supported by target"));
MFI.setStackID(ObjectIdx, Object.StackID);
+ MFI.setObjectAlignment(ObjectIdx, Object.Alignment);
if (!PFS.FixedStackObjectSlots.insert(std::make_pair(Object.ID.Value,
ObjectIdx))
.second)
@@ -637,14 +702,17 @@ bool MIRParserImpl::initializeFrameInfo(PerFunctionMIParsingState &PFS,
"' isn't defined in the function '" + F.getName() +
"'");
}
+ if (!TFI->isSupportedStackID(Object.StackID))
+ return error(Object.ID.SourceRange.Start,
+ Twine("StackID is not supported by target"));
if (Object.Type == yaml::MachineStackObject::VariableSized)
ObjectIdx = MFI.CreateVariableSizedObject(Object.Alignment, Alloca);
else
ObjectIdx = MFI.CreateStackObject(
Object.Size, Object.Alignment,
- Object.Type == yaml::MachineStackObject::SpillSlot, Alloca);
+ Object.Type == yaml::MachineStackObject::SpillSlot, Alloca,
+ Object.StackID);
MFI.setObjectOffset(ObjectIdx, Object.Offset);
- MFI.setStackID(ObjectIdx, Object.StackID);
if (!PFS.StackObjectSlots.insert(std::make_pair(Object.ID.Value, ObjectIdx))
.second)
@@ -844,48 +912,6 @@ SMDiagnostic MIRParserImpl::diagFromBlockStringDiag(const SMDiagnostic &Error,
Error.getFixIts());
}
-void MIRParserImpl::initNames2RegClasses(const MachineFunction &MF) {
- if (!Names2RegClasses.empty())
- return;
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; ++I) {
- const auto *RC = TRI->getRegClass(I);
- Names2RegClasses.insert(
- std::make_pair(StringRef(TRI->getRegClassName(RC)).lower(), RC));
- }
-}
-
-void MIRParserImpl::initNames2RegBanks(const MachineFunction &MF) {
- if (!Names2RegBanks.empty())
- return;
- const RegisterBankInfo *RBI = MF.getSubtarget().getRegBankInfo();
- // If the target does not support GlobalISel, we may not have a
- // register bank info.
- if (!RBI)
- return;
- for (unsigned I = 0, E = RBI->getNumRegBanks(); I < E; ++I) {
- const auto &RegBank = RBI->getRegBank(I);
- Names2RegBanks.insert(
- std::make_pair(StringRef(RegBank.getName()).lower(), &RegBank));
- }
-}
-
-const TargetRegisterClass *MIRParserImpl::getRegClass(const MachineFunction &MF,
- StringRef Name) {
- auto RegClassInfo = Names2RegClasses.find(Name);
- if (RegClassInfo == Names2RegClasses.end())
- return nullptr;
- return RegClassInfo->getValue();
-}
-
-const RegisterBank *MIRParserImpl::getRegBank(const MachineFunction &MF,
- StringRef Name) {
- auto RegBankInfo = Names2RegBanks.find(Name);
- if (RegBankInfo == Names2RegBanks.end())
- return nullptr;
- return RegBankInfo->getValue();
-}
-
MIRParser::MIRParser(std::unique_ptr<MIRParserImpl> Impl)
: Impl(std::move(Impl)) {}
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index d9dcc428943f..0a95a0ced0f5 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -1,9 +1,8 @@
//===- MIRPrinter.cpp - MIR serialization format printer ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -36,6 +35,7 @@
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@@ -129,6 +129,9 @@ public:
const MachineJumpTableInfo &JTI);
void convertStackObjects(yaml::MachineFunction &YMF,
const MachineFunction &MF, ModuleSlotTracker &MST);
+ void convertCallSiteObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST);
private:
void initRegisterMaskIds(const MachineFunction &MF);
@@ -212,10 +215,16 @@ void MIRPrinter::print(const MachineFunction &MF) {
MST.incorporateFunction(MF.getFunction());
convert(MST, YamlMF.FrameInfo, MF.getFrameInfo());
convertStackObjects(YamlMF, MF, MST);
+ convertCallSiteObjects(YamlMF, MF, MST);
if (const auto *ConstantPool = MF.getConstantPool())
convert(YamlMF, *ConstantPool);
if (const auto *JumpTableInfo = MF.getJumpTableInfo())
convert(MST, YamlMF.JumpTableInfo, *JumpTableInfo);
+
+ const TargetMachine &TM = MF.getTarget();
+ YamlMF.MachineFuncInfo =
+ std::unique_ptr<yaml::MachineFunctionInfo>(TM.convertFuncInfoToYAML(MF));
+
raw_string_ostream StrOS(YamlMF.Body.Value.Value);
bool IsNewlineNeeded = false;
for (const auto &MBB : MF) {
@@ -352,7 +361,7 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
// Process fixed stack objects.
unsigned ID = 0;
- for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) {
+ for (int I = MFI.getObjectIndexBegin(); I < 0; ++I, ++ID) {
if (MFI.isDeadObjectIndex(I))
continue;
@@ -364,17 +373,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.Offset = MFI.getObjectOffset(I);
YamlObject.Size = MFI.getObjectSize(I);
YamlObject.Alignment = MFI.getObjectAlignment(I);
- YamlObject.StackID = MFI.getStackID(I);
+ YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
YamlObject.IsImmutable = MFI.isImmutableObjectIndex(I);
YamlObject.IsAliased = MFI.isAliasedObjectIndex(I);
YMF.FixedStackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(
- std::make_pair(I, FrameIndexOperand::createFixed(ID++)));
+ std::make_pair(I, FrameIndexOperand::createFixed(ID)));
}
// Process ordinary stack objects.
ID = 0;
- for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I) {
+ for (int I = 0, E = MFI.getObjectIndexEnd(); I < E; ++I, ++ID) {
if (MFI.isDeadObjectIndex(I))
continue;
@@ -391,14 +400,17 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
YamlObject.Offset = MFI.getObjectOffset(I);
YamlObject.Size = MFI.getObjectSize(I);
YamlObject.Alignment = MFI.getObjectAlignment(I);
- YamlObject.StackID = MFI.getStackID(I);
+ YamlObject.StackID = (TargetStackID::Value)MFI.getStackID(I);
YMF.StackObjects.push_back(YamlObject);
StackObjectOperandMapping.insert(std::make_pair(
- I, FrameIndexOperand::create(YamlObject.Name.Value, ID++)));
+ I, FrameIndexOperand::create(YamlObject.Name.Value, ID)));
}
for (const auto &CSInfo : MFI.getCalleeSavedInfo()) {
+ if (!CSInfo.isSpilledToReg() && MFI.isDeadObjectIndex(CSInfo.getFrameIdx()))
+ continue;
+
yaml::StringValue Reg;
printRegMIR(CSInfo.getReg(), Reg, TRI);
if (!CSInfo.isSpilledToReg()) {
@@ -452,6 +464,39 @@ void MIRPrinter::convertStackObjects(yaml::MachineFunction &YMF,
}
}
+void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF,
+ const MachineFunction &MF,
+ ModuleSlotTracker &MST) {
+ const auto *TRI = MF.getSubtarget().getRegisterInfo();
+ for (auto CSInfo : MF.getCallSitesInfo()) {
+ yaml::CallSiteInfo YmlCS;
+ yaml::CallSiteInfo::MachineInstrLoc CallLocation;
+
+ // Prepare instruction position.
+ MachineBasicBlock::const_iterator CallI = CSInfo.first->getIterator();
+ CallLocation.BlockNum = CallI->getParent()->getNumber();
+ // Get call instruction offset from the beginning of block.
+ CallLocation.Offset = std::distance(CallI->getParent()->begin(), CallI);
+ YmlCS.CallLocation = CallLocation;
+ // Construct call arguments and theirs forwarding register info.
+ for (auto ArgReg : CSInfo.second) {
+ yaml::CallSiteInfo::ArgRegPair YmlArgReg;
+ YmlArgReg.ArgNo = ArgReg.ArgNo;
+ printRegMIR(ArgReg.Reg, YmlArgReg.Reg, TRI);
+ YmlCS.ArgForwardingRegs.emplace_back(YmlArgReg);
+ }
+ YMF.CallSitesInfo.push_back(YmlCS);
+ }
+
+ // Sort call info by position of call instructions.
+ llvm::sort(YMF.CallSitesInfo.begin(), YMF.CallSitesInfo.end(),
+ [](yaml::CallSiteInfo A, yaml::CallSiteInfo B) {
+ if (A.CallLocation.BlockNum == B.CallLocation.BlockNum)
+ return A.CallLocation.Offset < B.CallLocation.Offset;
+ return A.CallLocation.BlockNum < B.CallLocation.BlockNum;
+ });
+}
+
void MIRPrinter::convert(yaml::MachineFunction &MF,
const MachineConstantPool &ConstantPool) {
unsigned ID = 0;
@@ -706,6 +751,8 @@ void MIPrinter::print(const MachineInstr &MI) {
OS << "nsw ";
if (MI.getFlag(MachineInstr::IsExact))
OS << "exact ";
+ if (MI.getFlag(MachineInstr::FPExcept))
+ OS << "fpexcept ";
OS << TII->getName(MI.getOpcode());
if (I < E)
diff --git a/lib/CodeGen/MIRPrintingPass.cpp b/lib/CodeGen/MIRPrintingPass.cpp
index 1a8427430ea0..e032fffd658c 100644
--- a/lib/CodeGen/MIRPrintingPass.cpp
+++ b/lib/CodeGen/MIRPrintingPass.cpp
@@ -1,9 +1,8 @@
//===- MIRPrintingPass.cpp - Pass that prints out using the MIR format ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 03771bc5dae1..4d29e883d879 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/MachineBasicBlock.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -133,8 +132,12 @@ void ilist_traits<MachineInstr>::transferNodesFromList(ilist_traits &FromList,
instr_iterator First,
instr_iterator Last) {
assert(Parent->getParent() == FromList.Parent->getParent() &&
- "MachineInstr parent mismatch!");
- assert(this != &FromList && "Called without a real transfer...");
+ "cannot transfer MachineInstrs between MachineFunctions");
+
+ // If it's within the same BB, there's nothing to do.
+ if (this == &FromList)
+ return;
+
assert(Parent != FromList.Parent && "Two lists have the same parent?");
// If splicing between two blocks within the same function, just update the
@@ -995,7 +998,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
while (!KilledRegs.empty()) {
unsigned Reg = KilledRegs.pop_back_val();
for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
- if (!(--I)->addRegisterKilled(Reg, TRI, /* addIfNotFound= */ false))
+ if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false))
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
LV->getVarInfo(Reg).Kills.push_back(&*I);
diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
index 3459a9f71a73..53a35b7e89c2 100644
--- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp
@@ -1,9 +1,8 @@
//===- MachineBlockFrequencyInfo.cpp - MBB Frequency Analysis -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 4fee9c4ea027..639b588766a1 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -1,9 +1,8 @@
//===- MachineBlockPlacement.cpp - Basic Block Code Layout optimization ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -452,15 +451,28 @@ class MachineBlockPlacement : public MachineFunctionPass {
void buildChain(const MachineBasicBlock *BB, BlockChain &Chain,
BlockFilterSet *BlockFilter = nullptr);
+ bool canMoveBottomBlockToTop(const MachineBasicBlock *BottomBlock,
+ const MachineBasicBlock *OldTop);
+ bool hasViableTopFallthrough(const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet);
+ BlockFrequency TopFallThroughFreq(const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet);
+ BlockFrequency FallThroughGains(const MachineBasicBlock *NewTop,
+ const MachineBasicBlock *OldTop,
+ const MachineBasicBlock *ExitBB,
+ const BlockFilterSet &LoopBlockSet);
+ MachineBasicBlock *findBestLoopTopHelper(MachineBasicBlock *OldTop,
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopTop(
const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
MachineBasicBlock *findBestLoopExit(
- const MachineLoop &L, const BlockFilterSet &LoopBlockSet);
+ const MachineLoop &L, const BlockFilterSet &LoopBlockSet,
+ BlockFrequency &ExitFreq);
BlockFilterSet collectLoopBlockSet(const MachineLoop &L);
void buildLoopChains(const MachineLoop &L);
void rotateLoop(
BlockChain &LoopChain, const MachineBasicBlock *ExitingBB,
- const BlockFilterSet &LoopBlockSet);
+ BlockFrequency ExitFreq, const BlockFilterSet &LoopBlockSet);
void rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet);
@@ -938,8 +950,8 @@ MachineBlockPlacement::getBestNonConflictingEdges(
// Sort for highest frequency.
auto Cmp = [](WeightedEdge A, WeightedEdge B) { return A.Weight > B.Weight; };
- std::stable_sort(Edges[0].begin(), Edges[0].end(), Cmp);
- std::stable_sort(Edges[1].begin(), Edges[1].end(), Cmp);
+ llvm::stable_sort(Edges[0], Cmp);
+ llvm::stable_sort(Edges[1], Cmp);
auto BestA = Edges[0].begin();
auto BestB = Edges[1].begin();
// Arrange for the correct answer to be in BestA and BestB
@@ -1527,15 +1539,12 @@ MachineBlockPlacement::selectBestSuccessor(
// profitable than BestSucc. Position is important because we preserve it and
// prefer first best match. Here we aren't comparing in order, so we capture
// the position instead.
- if (DupCandidates.size() != 0) {
- auto cmp =
- [](const std::tuple<BranchProbability, MachineBasicBlock *> &a,
- const std::tuple<BranchProbability, MachineBasicBlock *> &b) {
- return std::get<0>(a) > std::get<0>(b);
- };
- std::stable_sort(DupCandidates.begin(), DupCandidates.end(), cmp);
- }
- for(auto &Tup : DupCandidates) {
+ llvm::stable_sort(DupCandidates,
+ [](std::tuple<BranchProbability, MachineBasicBlock *> L,
+ std::tuple<BranchProbability, MachineBasicBlock *> R) {
+ return std::get<0>(L) > std::get<0>(R);
+ });
+ for (auto &Tup : DupCandidates) {
BranchProbability DupProb;
MachineBasicBlock *Succ;
std::tie(DupProb, Succ) = Tup;
@@ -1757,63 +1766,238 @@ void MachineBlockPlacement::buildChain(
<< getBlockName(*Chain.begin()) << "\n");
}
-/// Find the best loop top block for layout.
+// If bottom of block BB has only one successor OldTop, in most cases it is
+// profitable to move it before OldTop, except the following case:
+//
+// -->OldTop<-
+// | . |
+// | . |
+// | . |
+// ---Pred |
+// | |
+// BB-----
+//
+// If BB is moved before OldTop, Pred needs a taken branch to BB, and it can't
+// layout the other successor below it, so it can't reduce taken branch.
+// In this case we keep its original layout.
+bool
+MachineBlockPlacement::canMoveBottomBlockToTop(
+ const MachineBasicBlock *BottomBlock,
+ const MachineBasicBlock *OldTop) {
+ if (BottomBlock->pred_size() != 1)
+ return true;
+ MachineBasicBlock *Pred = *BottomBlock->pred_begin();
+ if (Pred->succ_size() != 2)
+ return true;
+
+ MachineBasicBlock *OtherBB = *Pred->succ_begin();
+ if (OtherBB == BottomBlock)
+ OtherBB = *Pred->succ_rbegin();
+ if (OtherBB == OldTop)
+ return false;
+
+ return true;
+}
+
+// Find out the possible fall through frequence to the top of a loop.
+BlockFrequency
+MachineBlockPlacement::TopFallThroughFreq(
+ const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet) {
+ BlockFrequency MaxFreq = 0;
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ // Found a Pred block can be placed before Top.
+ // Check if Top is the best successor of Pred.
+ auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+ bool TopOK = true;
+ for (MachineBasicBlock *Succ : Pred->successors()) {
+ auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ // Check if Succ can be placed after Pred.
+ // Succ should not be in any chain, or it is the head of some chain.
+ if (!LoopBlockSet.count(Succ) && (SuccProb > TopProb) &&
+ (!SuccChain || Succ == *SuccChain->begin())) {
+ TopOK = false;
+ break;
+ }
+ }
+ if (TopOK) {
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, Top);
+ if (EdgeFreq > MaxFreq)
+ MaxFreq = EdgeFreq;
+ }
+ }
+ }
+ return MaxFreq;
+}
+
+// Compute the fall through gains when move NewTop before OldTop.
+//
+// In following diagram, edges marked as "-" are reduced fallthrough, edges
+// marked as "+" are increased fallthrough, this function computes
+//
+// SUM(increased fallthrough) - SUM(decreased fallthrough)
+//
+// |
+// | -
+// V
+// --->OldTop
+// | .
+// | .
+// +| . +
+// | Pred --->
+// | |-
+// | V
+// --- NewTop <---
+// |-
+// V
+//
+BlockFrequency
+MachineBlockPlacement::FallThroughGains(
+ const MachineBasicBlock *NewTop,
+ const MachineBasicBlock *OldTop,
+ const MachineBasicBlock *ExitBB,
+ const BlockFilterSet &LoopBlockSet) {
+ BlockFrequency FallThrough2Top = TopFallThroughFreq(OldTop, LoopBlockSet);
+ BlockFrequency FallThrough2Exit = 0;
+ if (ExitBB)
+ FallThrough2Exit = MBFI->getBlockFreq(NewTop) *
+ MBPI->getEdgeProbability(NewTop, ExitBB);
+ BlockFrequency BackEdgeFreq = MBFI->getBlockFreq(NewTop) *
+ MBPI->getEdgeProbability(NewTop, OldTop);
+
+ // Find the best Pred of NewTop.
+ MachineBasicBlock *BestPred = nullptr;
+ BlockFrequency FallThroughFromPred = 0;
+ for (MachineBasicBlock *Pred : NewTop->predecessors()) {
+ if (!LoopBlockSet.count(Pred))
+ continue;
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!PredChain || Pred == *std::prev(PredChain->end())) {
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, NewTop);
+ if (EdgeFreq > FallThroughFromPred) {
+ FallThroughFromPred = EdgeFreq;
+ BestPred = Pred;
+ }
+ }
+ }
+
+ // If NewTop is not placed after Pred, another successor can be placed
+ // after Pred.
+ BlockFrequency NewFreq = 0;
+ if (BestPred) {
+ for (MachineBasicBlock *Succ : BestPred->successors()) {
+ if ((Succ == NewTop) || (Succ == BestPred) || !LoopBlockSet.count(Succ))
+ continue;
+ if (ComputedEdges.find(Succ) != ComputedEdges.end())
+ continue;
+ BlockChain *SuccChain = BlockToChain[Succ];
+ if ((SuccChain && (Succ != *SuccChain->begin())) ||
+ (SuccChain == BlockToChain[BestPred]))
+ continue;
+ BlockFrequency EdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, Succ);
+ if (EdgeFreq > NewFreq)
+ NewFreq = EdgeFreq;
+ }
+ BlockFrequency OrigEdgeFreq = MBFI->getBlockFreq(BestPred) *
+ MBPI->getEdgeProbability(BestPred, NewTop);
+ if (NewFreq > OrigEdgeFreq) {
+ // If NewTop is not the best successor of Pred, then Pred doesn't
+ // fallthrough to NewTop. So there is no FallThroughFromPred and
+ // NewFreq.
+ NewFreq = 0;
+ FallThroughFromPred = 0;
+ }
+ }
+
+ BlockFrequency Result = 0;
+ BlockFrequency Gains = BackEdgeFreq + NewFreq;
+ BlockFrequency Lost = FallThrough2Top + FallThrough2Exit +
+ FallThroughFromPred;
+ if (Gains > Lost)
+ Result = Gains - Lost;
+ return Result;
+}
+
+/// Helper function of findBestLoopTop. Find the best loop top block
+/// from predecessors of old top.
+///
+/// Look for a block which is strictly better than the old top for laying
+/// out before the old top of the loop. This looks for only two patterns:
+///
+/// 1. a block has only one successor, the old loop top
+///
+/// Because such a block will always result in an unconditional jump,
+/// rotating it in front of the old top is always profitable.
+///
+/// 2. a block has two successors, one is old top, another is exit
+/// and it has more than one predecessors
///
-/// Look for a block which is strictly better than the loop header for laying
-/// out at the top of the loop. This looks for one and only one pattern:
-/// a latch block with no conditional exit. This block will cause a conditional
-/// jump around it or will be the bottom of the loop if we lay it out in place,
-/// but if it it doesn't end up at the bottom of the loop for any reason,
-/// rotation alone won't fix it. Because such a block will always result in an
-/// unconditional jump (for the backedge) rotating it in front of the loop
-/// header is always profitable.
+/// If it is below one of its predecessors P, only P can fall through to
+/// it, all other predecessors need a jump to it, and another conditional
+/// jump to loop header. If it is moved before loop header, all its
+/// predecessors jump to it, then fall through to loop header. So all its
+/// predecessors except P can reduce one taken branch.
+/// At the same time, move it before old top increases the taken branch
+/// to loop exit block, so the reduced taken branch will be compared with
+/// the increased taken branch to the loop exit block.
MachineBasicBlock *
-MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
- const BlockFilterSet &LoopBlockSet) {
- // Placing the latch block before the header may introduce an extra branch
- // that skips this block the first time the loop is executed, which we want
- // to avoid when optimising for size.
- // FIXME: in theory there is a case that does not introduce a new branch,
- // i.e. when the layout predecessor does not fallthrough to the loop header.
- // In practice this never happens though: there always seems to be a preheader
- // that can fallthrough and that is also placed before the header.
- if (F->getFunction().optForSize())
- return L.getHeader();
-
+MachineBlockPlacement::findBestLoopTopHelper(
+ MachineBasicBlock *OldTop,
+ const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
// Check that the header hasn't been fused with a preheader block due to
// crazy branches. If it has, we need to start with the header at the top to
// prevent pulling the preheader into the loop body.
- BlockChain &HeaderChain = *BlockToChain[L.getHeader()];
+ BlockChain &HeaderChain = *BlockToChain[OldTop];
if (!LoopBlockSet.count(*HeaderChain.begin()))
- return L.getHeader();
+ return OldTop;
- LLVM_DEBUG(dbgs() << "Finding best loop top for: "
- << getBlockName(L.getHeader()) << "\n");
+ LLVM_DEBUG(dbgs() << "Finding best loop top for: " << getBlockName(OldTop)
+ << "\n");
- BlockFrequency BestPredFreq;
+ BlockFrequency BestGains = 0;
MachineBasicBlock *BestPred = nullptr;
- for (MachineBasicBlock *Pred : L.getHeader()->predecessors()) {
+ for (MachineBasicBlock *Pred : OldTop->predecessors()) {
if (!LoopBlockSet.count(Pred))
continue;
- LLVM_DEBUG(dbgs() << " header pred: " << getBlockName(Pred) << ", has "
+ if (Pred == L.getHeader())
+ continue;
+ LLVM_DEBUG(dbgs() << " old top pred: " << getBlockName(Pred) << ", has "
<< Pred->succ_size() << " successors, ";
MBFI->printBlockFreq(dbgs(), Pred) << " freq\n");
- if (Pred->succ_size() > 1)
+ if (Pred->succ_size() > 2)
continue;
- BlockFrequency PredFreq = MBFI->getBlockFreq(Pred);
- if (!BestPred || PredFreq > BestPredFreq ||
- (!(PredFreq < BestPredFreq) &&
- Pred->isLayoutSuccessor(L.getHeader()))) {
+ MachineBasicBlock *OtherBB = nullptr;
+ if (Pred->succ_size() == 2) {
+ OtherBB = *Pred->succ_begin();
+ if (OtherBB == OldTop)
+ OtherBB = *Pred->succ_rbegin();
+ }
+
+ if (!canMoveBottomBlockToTop(Pred, OldTop))
+ continue;
+
+ BlockFrequency Gains = FallThroughGains(Pred, OldTop, OtherBB,
+ LoopBlockSet);
+ if ((Gains > 0) && (Gains > BestGains ||
+ ((Gains == BestGains) && Pred->isLayoutSuccessor(OldTop)))) {
BestPred = Pred;
- BestPredFreq = PredFreq;
+ BestGains = Gains;
}
}
// If no direct predecessor is fine, just use the loop header.
if (!BestPred) {
LLVM_DEBUG(dbgs() << " final top unchanged\n");
- return L.getHeader();
+ return OldTop;
}
// Walk backwards through any straight line of predecessors.
@@ -1826,6 +2010,34 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
return BestPred;
}
+/// Find the best loop top block for layout.
+///
+/// This function iteratively calls findBestLoopTopHelper, until no new better
+/// BB can be found.
+MachineBasicBlock *
+MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
+ const BlockFilterSet &LoopBlockSet) {
+ // Placing the latch block before the header may introduce an extra branch
+ // that skips this block the first time the loop is executed, which we want
+ // to avoid when optimising for size.
+ // FIXME: in theory there is a case that does not introduce a new branch,
+ // i.e. when the layout predecessor does not fallthrough to the loop header.
+ // In practice this never happens though: there always seems to be a preheader
+ // that can fallthrough and that is also placed before the header.
+ if (F->getFunction().hasOptSize())
+ return L.getHeader();
+
+ MachineBasicBlock *OldTop = nullptr;
+ MachineBasicBlock *NewTop = L.getHeader();
+ while (NewTop != OldTop) {
+ OldTop = NewTop;
+ NewTop = findBestLoopTopHelper(OldTop, L, LoopBlockSet);
+ if (NewTop != OldTop)
+ ComputedEdges[NewTop] = { OldTop, false };
+ }
+ return NewTop;
+}
+
/// Find the best loop exiting block for layout.
///
/// This routine implements the logic to analyze the loop looking for the best
@@ -1833,7 +2045,8 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
/// fallthrough opportunities.
MachineBasicBlock *
MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
- const BlockFilterSet &LoopBlockSet) {
+ const BlockFilterSet &LoopBlockSet,
+ BlockFrequency &ExitFreq) {
// We don't want to layout the loop linearly in all cases. If the loop header
// is just a normal basic block in the loop, we want to look for what block
// within the loop is the best one to layout at the top. However, if the loop
@@ -1944,9 +2157,43 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
LLVM_DEBUG(dbgs() << " Best exiting block: " << getBlockName(ExitingBB)
<< "\n");
+ ExitFreq = BestExitEdgeFreq;
return ExitingBB;
}
+/// Check if there is a fallthrough to loop header Top.
+///
+/// 1. Look for a Pred that can be layout before Top.
+/// 2. Check if Top is the most possible successor of Pred.
+bool
+MachineBlockPlacement::hasViableTopFallthrough(
+ const MachineBasicBlock *Top,
+ const BlockFilterSet &LoopBlockSet) {
+ for (MachineBasicBlock *Pred : Top->predecessors()) {
+ BlockChain *PredChain = BlockToChain[Pred];
+ if (!LoopBlockSet.count(Pred) &&
+ (!PredChain || Pred == *std::prev(PredChain->end()))) {
+ // Found a Pred block can be placed before Top.
+ // Check if Top is the best successor of Pred.
+ auto TopProb = MBPI->getEdgeProbability(Pred, Top);
+ bool TopOK = true;
+ for (MachineBasicBlock *Succ : Pred->successors()) {
+ auto SuccProb = MBPI->getEdgeProbability(Pred, Succ);
+ BlockChain *SuccChain = BlockToChain[Succ];
+ // Check if Succ can be placed after Pred.
+ // Succ should not be in any chain, or it is the head of some chain.
+ if ((!SuccChain || Succ == *SuccChain->begin()) && SuccProb > TopProb) {
+ TopOK = false;
+ break;
+ }
+ }
+ if (TopOK)
+ return true;
+ }
+ }
+ return false;
+}
+
/// Attempt to rotate an exiting block to the bottom of the loop.
///
/// Once we have built a chain, try to rotate it to line up the hot exit block
@@ -1955,6 +2202,7 @@ MachineBlockPlacement::findBestLoopExit(const MachineLoop &L,
/// of its bottom already, don't rotate it.
void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
const MachineBasicBlock *ExitingBB,
+ BlockFrequency ExitFreq,
const BlockFilterSet &LoopBlockSet) {
if (!ExitingBB)
return;
@@ -1966,15 +2214,7 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
if (Bottom == ExitingBB)
return;
- bool ViableTopFallthrough = false;
- for (MachineBasicBlock *Pred : Top->predecessors()) {
- BlockChain *PredChain = BlockToChain[Pred];
- if (!LoopBlockSet.count(Pred) &&
- (!PredChain || Pred == *std::prev(PredChain->end()))) {
- ViableTopFallthrough = true;
- break;
- }
- }
+ bool ViableTopFallthrough = hasViableTopFallthrough(Top, LoopBlockSet);
// If the header has viable fallthrough, check whether the current loop
// bottom is a viable exiting block. If so, bail out as rotating will
@@ -1986,6 +2226,12 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
(!SuccChain || Succ == *SuccChain->begin()))
return;
}
+
+ // Rotate will destroy the top fallthrough, we need to ensure the new exit
+ // frequency is larger than top fallthrough.
+ BlockFrequency FallThrough2Top = TopFallThroughFreq(Top, LoopBlockSet);
+ if (FallThrough2Top >= ExitFreq)
+ return;
}
BlockChain::iterator ExitIt = llvm::find(LoopChain, ExitingBB);
@@ -2041,8 +2287,6 @@ void MachineBlockPlacement::rotateLoop(BlockChain &LoopChain,
void MachineBlockPlacement::rotateLoopWithProfile(
BlockChain &LoopChain, const MachineLoop &L,
const BlockFilterSet &LoopBlockSet) {
- auto HeaderBB = L.getHeader();
- auto HeaderIter = llvm::find(LoopChain, HeaderBB);
auto RotationPos = LoopChain.end();
BlockFrequency SmallestRotationCost = BlockFrequency::getMaxFrequency();
@@ -2062,12 +2306,13 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// chain head is not the loop header. As we only consider natural loops with
// single header, this computation can be done only once.
BlockFrequency HeaderFallThroughCost(0);
- for (auto *Pred : HeaderBB->predecessors()) {
+ MachineBasicBlock *ChainHeaderBB = *LoopChain.begin();
+ for (auto *Pred : ChainHeaderBB->predecessors()) {
BlockChain *PredChain = BlockToChain[Pred];
if (!LoopBlockSet.count(Pred) &&
(!PredChain || Pred == *std::prev(PredChain->end()))) {
- auto EdgeFreq =
- MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, HeaderBB);
+ auto EdgeFreq = MBFI->getBlockFreq(Pred) *
+ MBPI->getEdgeProbability(Pred, ChainHeaderBB);
auto FallThruCost = ScaleBlockFrequency(EdgeFreq, MisfetchCost);
// If the predecessor has only an unconditional jump to the header, we
// need to consider the cost of this jump.
@@ -2117,7 +2362,7 @@ void MachineBlockPlacement::rotateLoopWithProfile(
// If the current BB is the loop header, we need to take into account the
// cost of the missed fall through edge from outside of the loop to the
// header.
- if (Iter != HeaderIter)
+ if (Iter != LoopChain.begin())
Cost += HeaderFallThroughCost;
// Collect the loop exit cost by summing up frequencies of all exit edges
@@ -2238,9 +2483,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// loop. This will default to the header, but may end up as one of the
// predecessors to the header if there is one which will result in strictly
// fewer branches in the loop body.
- // When we use profile data to rotate the loop, this is unnecessary.
- MachineBasicBlock *LoopTop =
- RotateLoopWithProfile ? L.getHeader() : findBestLoopTop(L, LoopBlockSet);
+ MachineBasicBlock *LoopTop = findBestLoopTop(L, LoopBlockSet);
// If we selected just the header for the loop top, look for a potentially
// profitable exit block in the event that rotating the loop can eliminate
@@ -2249,8 +2492,9 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
// Loops are processed innermost to uttermost, make sure we clear
// PreferredLoopExit before processing a new loop.
PreferredLoopExit = nullptr;
+ BlockFrequency ExitFreq;
if (!RotateLoopWithProfile && LoopTop == L.getHeader())
- PreferredLoopExit = findBestLoopExit(L, LoopBlockSet);
+ PreferredLoopExit = findBestLoopExit(L, LoopBlockSet, ExitFreq);
BlockChain &LoopChain = *BlockToChain[LoopTop];
@@ -2270,7 +2514,7 @@ void MachineBlockPlacement::buildLoopChains(const MachineLoop &L) {
if (RotateLoopWithProfile)
rotateLoopWithProfile(LoopChain, L, LoopBlockSet);
else
- rotateLoop(LoopChain, PreferredLoopExit, LoopBlockSet);
+ rotateLoop(LoopChain, PreferredLoopExit, ExitFreq, LoopBlockSet);
LLVM_DEBUG({
// Crash at the end so we get all of the debugging output first.
@@ -2497,8 +2741,8 @@ void MachineBlockPlacement::alignBlocks() {
// exclusively on the loop info here so that we can align backedges in
// unnatural CFGs and backedges that were introduced purely because of the
// loop rotations done during this layout pass.
- if (F->getFunction().optForMinSize() ||
- (F->getFunction().optForSize() && !TLI->alignLoopsWithOptSize()))
+ if (F->getFunction().hasMinSize() ||
+ (F->getFunction().hasOptSize() && !TLI->alignLoopsWithOptSize()))
return;
BlockChain &FunctionChain = *BlockToChain[&F->front()];
if (FunctionChain.begin() == FunctionChain.end())
@@ -2773,7 +3017,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (allowTailDupPlacement()) {
MPDT = &getAnalysis<MachinePostDominatorTree>();
- if (MF.getFunction().optForSize())
+ if (MF.getFunction().hasOptSize())
TailDupSize = 1;
bool PreRegAlloc = false;
TailDup.initMF(MF, PreRegAlloc, MBPI, /* LayoutMode */ true, TailDupSize);
@@ -2796,7 +3040,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
- /*AfterBlockPlacement=*/true)) {
+ /*AfterPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
ComputedEdges.clear();
diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
index e4952aaaba06..d2277ce51746 100644
--- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp
+++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp
@@ -1,9 +1,8 @@
//===- MachineBranchProbabilityInfo.cpp - Machine Branch Probability Info -===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 6ee8571c28aa..2df6d40d9293 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -1,9 +1,8 @@
//===- MachineCSE.cpp - Machine Common Subexpression Elimination Pass -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,6 +19,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -50,6 +50,8 @@ using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
+STATISTIC(NumPREs, "Number of partial redundant expression"
+ " transformed to fully redundant");
STATISTIC(NumPhysCSEs,
"Number of physreg referencing common subexpr eliminated");
STATISTIC(NumCrossBBCSEs,
@@ -85,6 +87,7 @@ namespace {
void releaseMemory() override {
ScopeMap.clear();
+ PREMap.clear();
Exps.clear();
}
@@ -95,9 +98,12 @@ namespace {
ScopedHashTable<MachineInstr *, unsigned, MachineInstrExpressionTrait,
AllocatorTy>;
using ScopeType = ScopedHTType::ScopeTy;
+ using PhysDefVector = SmallVector<std::pair<unsigned, unsigned>, 2>;
unsigned LookAheadLimit = 0;
DenseMap<MachineBasicBlock *, ScopeType *> ScopeMap;
+ DenseMap<MachineInstr *, MachineBasicBlock *, MachineInstrExpressionTrait>
+ PREMap;
ScopedHTType VNT;
SmallVector<MachineInstr *, 64> Exps;
unsigned CurrVN = 0;
@@ -109,22 +115,24 @@ namespace {
MachineBasicBlock::const_iterator E) const;
bool hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs,
- SmallVectorImpl<unsigned> &PhysDefs,
- bool &PhysUseDef) const;
+ SmallSet<unsigned, 8> &PhysRefs,
+ PhysDefVector &PhysDefs, bool &PhysUseDef) const;
bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs,
- SmallVectorImpl<unsigned> &PhysDefs,
- bool &NonLocal) const;
+ SmallSet<unsigned, 8> &PhysRefs,
+ PhysDefVector &PhysDefs, bool &NonLocal) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
- MachineInstr *CSMI, MachineInstr *MI);
+ MachineBasicBlock *CSBB, MachineInstr *MI);
void EnterScope(MachineBasicBlock *MBB);
void ExitScope(MachineBasicBlock *MBB);
- bool ProcessBlock(MachineBasicBlock *MBB);
+ bool ProcessBlockCSE(MachineBasicBlock *MBB);
void ExitScopeIfDone(MachineDomTreeNode *Node,
DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
bool PerformCSE(MachineDomTreeNode *Node);
+
+ bool isPRECandidate(MachineInstr *MI);
+ bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
+ bool PerformSimplePRE(MachineDominatorTree *DT);
};
} // end anonymous namespace
@@ -256,9 +264,9 @@ static bool isCallerPreservedOrConstPhysReg(unsigned Reg,
/// instruction does not uses a physical register.
bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineBasicBlock *MBB,
- SmallSet<unsigned,8> &PhysRefs,
- SmallVectorImpl<unsigned> &PhysDefs,
- bool &PhysUseDef) const{
+ SmallSet<unsigned, 8> &PhysRefs,
+ PhysDefVector &PhysDefs,
+ bool &PhysUseDef) const {
// First, add all uses to PhysRefs.
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef())
@@ -278,7 +286,8 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
// (which currently contains only uses), set the PhysUseDef flag.
PhysUseDef = false;
MachineBasicBlock::const_iterator I = MI; I = std::next(I);
- for (const MachineOperand &MO : MI->operands()) {
+ for (const auto &MOP : llvm::enumerate(MI->operands())) {
+ const MachineOperand &MO = MOP.value();
if (!MO.isReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
@@ -293,20 +302,21 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
// common since this pass is run before livevariables. We can scan
// forward a few instructions and check if it is obviously dead.
if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
- PhysDefs.push_back(Reg);
+ PhysDefs.push_back(std::make_pair(MOP.index(), Reg));
}
// Finally, add all defs to PhysRefs as well.
for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
- for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
+ for (MCRegAliasIterator AI(PhysDefs[i].second, TRI, true); AI.isValid();
+ ++AI)
PhysRefs.insert(*AI);
return !PhysRefs.empty();
}
bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
- SmallSet<unsigned,8> &PhysRefs,
- SmallVectorImpl<unsigned> &PhysDefs,
+ SmallSet<unsigned, 8> &PhysRefs,
+ PhysDefVector &PhysDefs,
bool &NonLocal) const {
// For now conservatively returns false if the common subexpression is
// not in the same basic block as the given instruction. The only exception
@@ -320,7 +330,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
return false;
for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
- if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
+ if (MRI->isAllocatable(PhysDefs[i].second) ||
+ MRI->isReserved(PhysDefs[i].second))
// Avoid extending live range of physical registers if they are
//allocatable or reserved.
return false;
@@ -381,7 +392,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
// Ignore stuff that we obviously can't move.
if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
- MI->hasUnmodeledSideEffects())
+ MI->mayRaiseFPException() || MI->hasUnmodeledSideEffects())
return false;
if (MI->mayLoad()) {
@@ -404,9 +415,10 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
}
/// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
-/// common expression that defines Reg.
+/// common expression that defines Reg. CSBB is basic block where CSReg is
+/// defined.
bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
- MachineInstr *CSMI, MachineInstr *MI) {
+ MachineBasicBlock *CSBB, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
// If CSReg is used at all uses of Reg, CSE should not increase register
@@ -432,7 +444,6 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// an immediate predecessor. We don't want to increase register pressure and
// end up causing other computation to be spilled.
if (TII->isAsCheapAsAMove(*MI)) {
- MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
if (CSBB != BB && !CSBB->isSuccessor(BB))
return false;
@@ -487,7 +498,7 @@ void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
ScopeMap.erase(SI);
}
-bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
+bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
bool Changed = false;
SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
@@ -536,7 +547,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// It's also not safe if the instruction uses physical registers.
bool CrossMBBPhysDef = false;
SmallSet<unsigned, 8> PhysRefs;
- SmallVector<unsigned, 2> PhysDefs;
+ PhysDefVector PhysDefs;
bool PhysUseDef = false;
if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
PhysDefs, PhysUseDef)) {
@@ -597,7 +608,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
TargetRegisterInfo::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
- if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
+ if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
LLVM_DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
DoCSE = false;
break;
@@ -635,6 +646,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// we should make sure it is not dead at CSMI.
for (unsigned ImplicitDefToUpdate : ImplicitDefsToUpdate)
CSMI->getOperand(ImplicitDefToUpdate).setIsDead(false);
+ for (auto PhysDef : PhysDefs)
+ if (!MI->getOperand(PhysDef.first).isDead())
+ CSMI->getOperand(PhysDef.first).setIsDead(false);
// Go through implicit defs of CSMI and MI, and clear the kill flags on
// their uses in all the instructions between CSMI and MI.
@@ -663,9 +677,9 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
// Add physical register defs now coming in from a predecessor to MBB
// livein list.
while (!PhysDefs.empty()) {
- unsigned LiveIn = PhysDefs.pop_back_val();
- if (!MBB->isLiveIn(LiveIn))
- MBB->addLiveIn(LiveIn);
+ auto LiveIn = PhysDefs.pop_back_val();
+ if (!MBB->isLiveIn(LiveIn.second))
+ MBB->addLiveIn(LiveIn.second);
}
++NumCrossBBCSEs;
}
@@ -734,7 +748,7 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
for (MachineDomTreeNode *Node : Scopes) {
MachineBasicBlock *MBB = Node->getBlock();
EnterScope(MBB);
- Changed |= ProcessBlock(MBB);
+ Changed |= ProcessBlockCSE(MBB);
// If it's a leaf node, it's done. Traverse upwards to pop ancestors.
ExitScopeIfDone(Node, OpenChildren);
}
@@ -742,6 +756,104 @@ bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
return Changed;
}
+// We use stronger checks for PRE candidate rather than for CSE ones to embrace
+// checks inside ProcessBlockCSE(), not only inside isCSECandidate(). This helps
+// to exclude instrs created by PRE that won't be CSEed later.
+bool MachineCSE::isPRECandidate(MachineInstr *MI) {
+ if (!isCSECandidate(MI) ||
+ MI->isNotDuplicable() ||
+ MI->mayLoad() ||
+ MI->isAsCheapAsAMove() ||
+ MI->getNumDefs() != 1 ||
+ MI->getNumExplicitDefs() != 1)
+ return false;
+
+ for (auto def : MI->defs())
+ if (!TRI->isVirtualRegister(def.getReg()))
+ return false;
+
+ for (auto use : MI->uses())
+ if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
+ return false;
+
+ return true;
+}
+
+bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) {
+ MachineInstr *MI = &*I;
+ ++I;
+
+ if (!isPRECandidate(MI))
+ continue;
+
+ if (!PREMap.count(MI)) {
+ PREMap[MI] = MBB;
+ continue;
+ }
+
+ auto MBB1 = PREMap[MI];
+ assert(
+ !DT->properlyDominates(MBB, MBB1) &&
+ "MBB cannot properly dominate MBB1 while DFS through dominators tree!");
+ auto CMBB = DT->findNearestCommonDominator(MBB, MBB1);
+ if (!CMBB->isLegalToHoistInto())
+ continue;
+
+ // Two instrs are partial redundant if their basic blocks are reachable
+ // from one to another but one doesn't dominate another.
+ if (CMBB != MBB1) {
+ auto BB = MBB->getBasicBlock(), BB1 = MBB1->getBasicBlock();
+ if (BB != nullptr && BB1 != nullptr &&
+ (isPotentiallyReachable(BB1, BB) ||
+ isPotentiallyReachable(BB, BB1))) {
+
+ assert(MI->getOperand(0).isDef() &&
+ "First operand of instr with one explicit def must be this def");
+ unsigned VReg = MI->getOperand(0).getReg();
+ unsigned NewReg = MRI->cloneVirtualRegister(VReg);
+ if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
+ continue;
+ MachineInstr &NewMI =
+ TII->duplicate(*CMBB, CMBB->getFirstTerminator(), *MI);
+ NewMI.getOperand(0).setReg(NewReg);
+
+ PREMap[MI] = CMBB;
+ ++NumPREs;
+ Changed = true;
+ }
+ }
+ }
+ return Changed;
+}
+
+// This simple PRE (partial redundancy elimination) pass doesn't actually
+// eliminate partial redundancy but transforms it to full redundancy,
+// anticipating that the next CSE step will eliminate this created redundancy.
+// If CSE doesn't eliminate this, than created instruction will remain dead
+// and eliminated later by Remove Dead Machine Instructions pass.
+bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
+ SmallVector<MachineDomTreeNode *, 32> BBs;
+
+ PREMap.clear();
+ bool Changed = false;
+ BBs.push_back(DT->getRootNode());
+ do {
+ auto Node = BBs.pop_back_val();
+ const std::vector<MachineDomTreeNode *> &Children = Node->getChildren();
+ for (MachineDomTreeNode *Child : Children)
+ BBs.push_back(Child);
+
+ MachineBasicBlock *MBB = Node->getBlock();
+ Changed |= ProcessBlockPRE(DT, MBB);
+
+ } while (!BBs.empty());
+
+ return Changed;
+}
+
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -752,5 +864,8 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
- return PerformCSE(DT->getRootNode());
+ bool ChangedPRE, ChangedCSE;
+ ChangedPRE = PerformSimplePRE(DT);
+ ChangedCSE = PerformCSE(DT->getRootNode());
+ return ChangedPRE || ChangedCSE;
}
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index f51b482e20e3..0584ec0bd2b3 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -1,9 +1,8 @@
//===---- MachineCombiner.cpp - Instcombining on SSA form machine code ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -559,16 +558,15 @@ bool MachineCombiner::combineInstructions(MachineBasicBlock *MBB) {
continue;
LLVM_DEBUG(if (dump_intrs) {
- dbgs() << "\tFor the Pattern (" << (int)P << ") these instructions could be removed\n";
- for (auto const *InstrPtr : DelInstrs) {
- dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
- InstrPtr->print(dbgs(), false, false, false, TII);
- }
+ dbgs() << "\tFor the Pattern (" << (int)P
+ << ") these instructions could be removed\n";
+ for (auto const *InstrPtr : DelInstrs)
+ InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+ /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
dbgs() << "\tThese instructions could replace the removed ones\n";
- for (auto const *InstrPtr : InsInstrs) {
- dbgs() << "\t\t" << STI->getSchedInfoStr(*InstrPtr) << ": ";
- InstrPtr->print(dbgs(), false, false, false, TII);
- }
+ for (auto const *InstrPtr : InsInstrs)
+ InstrPtr->print(dbgs(), /*IsStandalone*/false, /*SkipOpers*/false,
+ /*SkipDebugLoc*/false, /*AddNewLine*/true, TII);
});
bool SubstituteAlways = false;
@@ -641,7 +639,7 @@ bool MachineCombiner::runOnMachineFunction(MachineFunction &MF) {
MLI = &getAnalysis<MachineLoopInfo>();
Traces = &getAnalysis<MachineTraceMetrics>();
MinInstr = nullptr;
- OptSize = MF.getFunction().optForSize();
+ OptSize = MF.getFunction().hasOptSize();
LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n');
if (!TII->useMachineCombiner()) {
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 19879fe89007..9fc12ac89e12 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -1,9 +1,8 @@
//===- MachineCopyPropagation.cpp - Machine Copy Propagation Pass ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineDominanceFrontier.cpp b/lib/CodeGen/MachineDominanceFrontier.cpp
index b559e4e513a6..6704298c17d6 100644
--- a/lib/CodeGen/MachineDominanceFrontier.cpp
+++ b/lib/CodeGen/MachineDominanceFrontier.cpp
@@ -1,9 +1,8 @@
//===- MachineDominanceFrontier.cpp ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 6b2802626456..1dfba8638c22 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -1,9 +1,8 @@
//===- MachineDominators.cpp - Machine Dominator Calculation --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp
index 0b316871dbdf..bae3a4333bda 100644
--- a/lib/CodeGen/MachineFrameInfo.cpp
+++ b/lib/CodeGen/MachineFrameInfo.cpp
@@ -1,9 +1,8 @@
//===-- MachineFrameInfo.cpp ---------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -57,7 +56,8 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
!IsSpillSlot, StackID));
int Index = (int)Objects.size() - NumFixedObjects - 1;
assert(Index >= 0 && "Bad frame index!");
- ensureMaxAlignment(Alignment);
+ if (StackID == 0)
+ ensureMaxAlignment(Alignment);
return Index;
}
@@ -92,7 +92,7 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
StackObject(Size, Alignment, SPOffset, IsImmutable,
- /*isSpillSlot=*/false, /*Alloca=*/nullptr,
+ /*IsSpillSlot=*/false, /*Alloca=*/nullptr,
IsAliased));
return -++NumFixedObjects;
}
@@ -142,11 +142,15 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
// should keep in mind that there's tight coupling between the two.
for (int i = getObjectIndexBegin(); i != 0; ++i) {
+ // Only estimate stack size of default stack.
+ if (getStackID(i) != TargetStackID::Default)
+ continue;
int FixedOff = -getObjectOffset(i);
if (FixedOff > Offset) Offset = FixedOff;
}
for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) {
- if (isDeadObjectIndex(i))
+ // Only estimate stack size of live objects on default stack.
+ if (isDeadObjectIndex(i) || getStackID(i) != TargetStackID::Default)
continue;
Offset += getObjectSize(i);
unsigned Align = getObjectAlignment(i);
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 3495319670a5..4df5ce2dcedc 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -1,9 +1,8 @@
//===- MachineFunction.cpp ------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -44,6 +43,7 @@
#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
@@ -165,7 +165,7 @@ void MachineFunction::init() {
!F.hasFnAttribute("no-realign-stack");
FrameInfo = new (Allocator) MachineFrameInfo(
getFnStackAlignment(STI, F), /*StackRealignable=*/CanRealignSP,
- /*ForceRealign=*/CanRealignSP &&
+ /*ForcedRealign=*/CanRealignSP &&
F.hasFnAttribute(Attribute::StackAlignment));
if (F.hasFnAttribute(Attribute::StackAlignment))
@@ -175,7 +175,7 @@ void MachineFunction::init() {
Alignment = STI->getTargetLowering()->getMinFunctionAlignment();
// FIXME: Shouldn't use pref alignment if explicit alignment is set on F.
- // FIXME: Use Function::optForSize().
+ // FIXME: Use Function::hasOptSize().
if (!F.hasFnAttribute(Attribute::OptimizeForSize))
Alignment = std::max(Alignment,
STI->getTargetLowering()->getPrefFunctionAlignment());
@@ -274,6 +274,12 @@ bool MachineFunction::shouldSplitStack() const {
return getFunction().hasFnAttribute("split-stack");
}
+LLVM_NODISCARD unsigned
+MachineFunction::addFrameInst(const MCCFIInstruction &Inst) {
+ FrameInstructions.push_back(Inst);
+ return FrameInstructions.size() - 1;
+}
+
/// This discards all of the MachineBasicBlock numbers and recomputes them.
/// This guarantees that the MBB numbers are sequential, dense, and match the
/// ordering of the blocks within the function. If a specific MachineBasicBlock
@@ -357,6 +363,13 @@ MachineInstr &MachineFunction::CloneMachineInstrBundle(MachineBasicBlock &MBB,
/// ~MachineInstr() destructor must be empty.
void
MachineFunction::DeleteMachineInstr(MachineInstr *MI) {
+ // Verify that a call site info is at valid state. This assertion should
+ // be triggered during the implementation of support for the
+ // call site info of a new architecture. If the assertion is triggered,
+ // back trace will tell where to insert a call to updateCallSiteInfo().
+ assert((!MI->isCall(MachineInstr::IgnoreBundle) ||
+ CallSitesInfo.find(MI) == CallSitesInfo.end()) &&
+ "Call site info was not updated!");
// Strip it for parts. The operand array and the MI object itself are
// independently recyclable.
if (MI->Operands)
@@ -396,19 +409,18 @@ MachineMemOperand *MachineFunction::getMachineMemOperand(
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
- if (MMO->getValue())
- return new (Allocator)
- MachineMemOperand(MachinePointerInfo(MMO->getValue(),
- MMO->getOffset()+Offset),
- MMO->getFlags(), Size, MMO->getBaseAlignment(),
- AAMDNodes(), nullptr, MMO->getSyncScopeID(),
- MMO->getOrdering(), MMO->getFailureOrdering());
+ const MachinePointerInfo &PtrInfo = MMO->getPointerInfo();
+
+ // If there is no pointer value, the offset isn't tracked so we need to adjust
+ // the base alignment.
+ unsigned Align = PtrInfo.V.isNull()
+ ? MinAlign(MMO->getBaseAlignment(), Offset)
+ : MMO->getBaseAlignment();
+
return new (Allocator)
- MachineMemOperand(MachinePointerInfo(MMO->getPseudoValue(),
- MMO->getOffset()+Offset),
- MMO->getFlags(), Size, MMO->getBaseAlignment(),
- AAMDNodes(), nullptr, MMO->getSyncScopeID(),
- MMO->getOrdering(), MMO->getFailureOrdering());
+ MachineMemOperand(PtrInfo.getWithOffset(Offset), MMO->getFlags(), Size,
+ Align, AAMDNodes(), nullptr, MMO->getSyncScopeID(),
+ MMO->getOrdering(), MMO->getFailureOrdering());
}
MachineMemOperand *
@@ -425,6 +437,15 @@ MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
MMO->getOrdering(), MMO->getFailureOrdering());
}
+MachineMemOperand *
+MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
+ MachineMemOperand::Flags Flags) {
+ return new (Allocator) MachineMemOperand(
+ MMO->getPointerInfo(), Flags, MMO->getSize(), MMO->getBaseAlignment(),
+ MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
+ MMO->getOrdering(), MMO->getFailureOrdering());
+}
+
MachineInstr::ExtraInfo *
MachineFunction::createMIExtraInfo(ArrayRef<MachineMemOperand *> MMOs,
MCSymbol *PreInstrSymbol,
@@ -802,6 +823,32 @@ try_next:;
return FilterID;
}
+void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD) {
+ MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true);
+ MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true);
+ I->setPreInstrSymbol(*this, BeginLabel);
+ I->setPostInstrSymbol(*this, EndLabel);
+
+ DIType *DI = dyn_cast<DIType>(MD);
+ CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI));
+}
+
+void MachineFunction::updateCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New) {
+ if (!Target.Options.EnableDebugEntryValues || Old == New)
+ return;
+
+ assert(Old->isCall() && (!New || New->isCall()) &&
+ "Call site info referes only to call instructions!");
+ CallSiteInfoMap::iterator CSIt = CallSitesInfo.find(Old);
+ if (CSIt == CallSitesInfo.end())
+ return;
+ CallSiteInfo CSInfo = std::move(CSIt->second);
+ CallSitesInfo.erase(CSIt);
+ if (New)
+ CallSitesInfo[New] = CSInfo;
+}
+
/// \}
//===----------------------------------------------------------------------===//
@@ -888,9 +935,11 @@ void MachineJumpTableInfo::print(raw_ostream &OS) const {
OS << "Jump Tables:\n";
for (unsigned i = 0, e = JumpTables.size(); i != e; ++i) {
- OS << printJumpTableEntryReference(i) << ": ";
+ OS << printJumpTableEntryReference(i) << ':';
for (unsigned j = 0, f = JumpTables[i].MBBs.size(); j != f; ++j)
OS << ' ' << printMBBReference(*JumpTables[i].MBBs[j]);
+ if (i != e)
+ OS << '\n';
}
OS << '\n';
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 5db4e299fa70..0da4cf3fc90c 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -1,9 +1,8 @@
//===-- MachineFunctionPass.cpp -------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp
index 9c96ba748778..0ea8975cc74c 100644
--- a/lib/CodeGen/MachineFunctionPrinterPass.cpp
+++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp
@@ -1,9 +1,8 @@
//===-- MachineFunctionPrinterPass.cpp ------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 764a84c7e132..e5c398a2d10c 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/MachineInstr.cpp ---------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -26,6 +25,7 @@
#include "llvm/Analysis/MemoryLocation.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
@@ -50,9 +50,9 @@
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/IR/Operator.h"
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
-#include "llvm/IR/Operator.h"
#include "llvm/MC/MCInstrDesc.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSymbol.h"
@@ -225,12 +225,13 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) {
}
#ifndef NDEBUG
- bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata;
+ bool isDebugOp = Op.getType() == MachineOperand::MO_Metadata ||
+ Op.getType() == MachineOperand::MO_MCSymbol;
// OpNo now points as the desired insertion point. Unless this is a variadic
// instruction, only implicit regs are allowed beyond MCID->getNumOperands().
// RegMask operands go between the explicit and implicit operands.
assert((isImpReg || Op.isRegMask() || MCID->isVariadic() ||
- OpNo < MCID->getNumOperands() || isMetaDataOp) &&
+ OpNo < MCID->getNumOperands() || isDebugOp) &&
"Trying to add an operand to a machine instr that is already done!");
#endif
@@ -512,45 +513,65 @@ void MachineInstr::setPostInstrSymbol(MachineFunction &MF, MCSymbol *Symbol) {
MF.createMIExtraInfo(memoperands(), getPreInstrSymbol(), Symbol));
}
+void MachineInstr::cloneInstrSymbols(MachineFunction &MF,
+ const MachineInstr &MI) {
+ if (this == &MI)
+ // Nothing to do for a self-clone!
+ return;
+
+ assert(&MF == MI.getMF() &&
+ "Invalid machine functions when cloning instruction symbols!");
+
+ setPreInstrSymbol(MF, MI.getPreInstrSymbol());
+ setPostInstrSymbol(MF, MI.getPostInstrSymbol());
+}
+
uint16_t MachineInstr::mergeFlagsWith(const MachineInstr &Other) const {
// For now, the just return the union of the flags. If the flags get more
// complicated over time, we might need more logic here.
return getFlags() | Other.getFlags();
}
-void MachineInstr::copyIRFlags(const Instruction &I) {
+uint16_t MachineInstr::copyFlagsFromInstruction(const Instruction &I) {
+ uint16_t MIFlags = 0;
// Copy the wrapping flags.
if (const OverflowingBinaryOperator *OB =
dyn_cast<OverflowingBinaryOperator>(&I)) {
if (OB->hasNoSignedWrap())
- setFlag(MachineInstr::MIFlag::NoSWrap);
+ MIFlags |= MachineInstr::MIFlag::NoSWrap;
if (OB->hasNoUnsignedWrap())
- setFlag(MachineInstr::MIFlag::NoUWrap);
+ MIFlags |= MachineInstr::MIFlag::NoUWrap;
}
// Copy the exact flag.
if (const PossiblyExactOperator *PE = dyn_cast<PossiblyExactOperator>(&I))
if (PE->isExact())
- setFlag(MachineInstr::MIFlag::IsExact);
+ MIFlags |= MachineInstr::MIFlag::IsExact;
// Copy the fast-math flags.
if (const FPMathOperator *FP = dyn_cast<FPMathOperator>(&I)) {
const FastMathFlags Flags = FP->getFastMathFlags();
if (Flags.noNaNs())
- setFlag(MachineInstr::MIFlag::FmNoNans);
+ MIFlags |= MachineInstr::MIFlag::FmNoNans;
if (Flags.noInfs())
- setFlag(MachineInstr::MIFlag::FmNoInfs);
+ MIFlags |= MachineInstr::MIFlag::FmNoInfs;
if (Flags.noSignedZeros())
- setFlag(MachineInstr::MIFlag::FmNsz);
+ MIFlags |= MachineInstr::MIFlag::FmNsz;
if (Flags.allowReciprocal())
- setFlag(MachineInstr::MIFlag::FmArcp);
+ MIFlags |= MachineInstr::MIFlag::FmArcp;
if (Flags.allowContract())
- setFlag(MachineInstr::MIFlag::FmContract);
+ MIFlags |= MachineInstr::MIFlag::FmContract;
if (Flags.approxFunc())
- setFlag(MachineInstr::MIFlag::FmAfn);
+ MIFlags |= MachineInstr::MIFlag::FmAfn;
if (Flags.allowReassoc())
- setFlag(MachineInstr::MIFlag::FmReassoc);
+ MIFlags |= MachineInstr::MIFlag::FmReassoc;
}
+
+ return MIFlags;
+}
+
+void MachineInstr::copyIRFlags(const Instruction &I) {
+ Flags = copyFlagsFromInstruction(I);
}
bool MachineInstr::hasPropertyInBundle(uint64_t Mask, QueryType Type) const {
@@ -1157,7 +1178,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
}
if (isPosition() || isDebugInstr() || isTerminator() ||
- hasUnmodeledSideEffects())
+ mayRaiseFPException() || hasUnmodeledSideEffects())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
@@ -1173,8 +1194,8 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
return true;
}
-bool MachineInstr::mayAlias(AliasAnalysis *AA, MachineInstr &Other,
- bool UseTBAA) {
+bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other,
+ bool UseTBAA) const {
const MachineFunction *MF = getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
const MachineFrameInfo &MFI = MF->getFrameInfo();
@@ -1304,7 +1325,11 @@ bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const {
const MachineFrameInfo &MFI = getParent()->getParent()->getFrameInfo();
for (MachineMemOperand *MMO : memoperands()) {
- if (MMO->isVolatile()) return false;
+ if (!MMO->isUnordered())
+ // If the memory operand has ordering side effects, we can't move the
+ // instruction. Such an instruction is technically an invariant load,
+ // but the caller code would need updated to expect that.
+ return false;
if (MMO->isStore()) return false;
if (MMO->isInvariant() && MMO->isDereferenceable())
continue;
@@ -1447,7 +1472,7 @@ void MachineInstr::print(raw_ostream &OS, bool IsStandalone, bool SkipOpers,
ModuleSlotTracker MST(M);
if (F)
MST.incorporateFunction(*F);
- print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, TII);
+ print(OS, MST, IsStandalone, SkipOpers, SkipDebugLoc, AddNewLine, TII);
}
void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
@@ -1519,6 +1544,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "nsw ";
if (getFlag(MachineInstr::IsExact))
OS << "exact ";
+ if (getFlag(MachineInstr::FPExcept))
+ OS << "fpexcept ";
// Print the opcode name.
if (TII)
@@ -1905,7 +1932,7 @@ void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
void MachineInstr::addRegisterDefined(unsigned Reg,
const TargetRegisterInfo *RegInfo) {
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo);
+ MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo);
if (MO)
return;
} else {
@@ -2050,7 +2077,7 @@ static const DIExpression *computeExprForSpill(const MachineInstr &MI) {
const DIExpression *Expr = MI.getDebugExpression();
if (MI.isIndirectDebugValue()) {
assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
- Expr = DIExpression::prepend(Expr, DIExpression::WithDeref);
+ Expr = DIExpression::prepend(Expr, DIExpression::DerefBefore);
}
return Expr;
}
@@ -2100,3 +2127,54 @@ void MachineInstr::changeDebugValuesDefReg(unsigned Reg) {
for (auto *DBI : DbgValues)
DBI->getOperand(0).setReg(Reg);
}
+
+using MMOList = SmallVector<const MachineMemOperand *, 2>;
+
+static unsigned getSpillSlotSize(MMOList &Accesses,
+ const MachineFrameInfo &MFI) {
+ unsigned Size = 0;
+ for (auto A : Accesses)
+ if (MFI.isSpillSlotObjectIndex(
+ cast<FixedStackPseudoSourceValue>(A->getPseudoValue())
+ ->getFrameIndex()))
+ Size += A->getSize();
+ return Size;
+}
+
+Optional<unsigned>
+MachineInstr::getSpillSize(const TargetInstrInfo *TII) const {
+ int FI;
+ if (TII->isStoreToStackSlotPostFE(*this, FI)) {
+ const MachineFrameInfo &MFI = getMF()->getFrameInfo();
+ if (MFI.isSpillSlotObjectIndex(FI))
+ return (*memoperands_begin())->getSize();
+ }
+ return None;
+}
+
+Optional<unsigned>
+MachineInstr::getFoldedSpillSize(const TargetInstrInfo *TII) const {
+ MMOList Accesses;
+ if (TII->hasStoreToStackSlot(*this, Accesses))
+ return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
+ return None;
+}
+
+Optional<unsigned>
+MachineInstr::getRestoreSize(const TargetInstrInfo *TII) const {
+ int FI;
+ if (TII->isLoadFromStackSlotPostFE(*this, FI)) {
+ const MachineFrameInfo &MFI = getMF()->getFrameInfo();
+ if (MFI.isSpillSlotObjectIndex(FI))
+ return (*memoperands_begin())->getSize();
+ }
+ return None;
+}
+
+Optional<unsigned>
+MachineInstr::getFoldedRestoreSize(const TargetInstrInfo *TII) const {
+ MMOList Accesses;
+ if (TII->hasLoadFromStackSlot(*this, Accesses))
+ return getSpillSlotSize(Accesses, getMF()->getFrameInfo());
+ return None;
+}
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index ae378cc8c464..32e266e9401e 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -1,9 +1,8 @@
//===-- lib/CodeGen/MachineInstrBundle.cpp --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 58fd1f238420..1107e609c258 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -1,9 +1,8 @@
//===- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index 2bce59235057..3b8b430d1b0f 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -1,9 +1,8 @@
//===- MachineLoopInfo.cpp - Natural Loop Calculator ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index 6ef8de88f8b1..aadcd7319799 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/MachineModuleInfo.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -206,11 +205,11 @@ MachineModuleInfo::~MachineModuleInfo() = default;
bool MachineModuleInfo::doInitialization(Module &M) {
ObjFileMMI = nullptr;
CurCallSite = 0;
- UsesVAFloatArgument = UsesMorestackAddr = false;
+ UsesMSVCFloatingPoint = UsesMorestackAddr = false;
HasSplitStack = HasNosplitStack = false;
AddrLabelSymbols = nullptr;
TheModule = &M;
- DbgInfoAvailable = !empty(M.debug_compile_units());
+ DbgInfoAvailable = !llvm::empty(M.debug_compile_units());
return false;
}
@@ -328,22 +327,3 @@ char FreeMachineFunction::ID;
FunctionPass *llvm::createFreeMachineFunctionPass() {
return new FreeMachineFunction();
}
-
-//===- MMI building helpers -----------------------------------------------===//
-
-void llvm::computeUsesVAFloatArgument(const CallInst &I,
- MachineModuleInfo &MMI) {
- FunctionType *FT =
- cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
- if (FT->isVarArg() && !MMI.usesVAFloatArgument()) {
- for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- Type *T = I.getArgOperand(i)->getType();
- for (auto i : post_order(T)) {
- if (i->isFloatingPointTy()) {
- MMI.setUsesVAFloatArgument(true);
- return;
- }
- }
- }
- }
-}
diff --git a/lib/CodeGen/MachineModuleInfoImpls.cpp b/lib/CodeGen/MachineModuleInfoImpls.cpp
index 7b4f64bfe60d..16d24880ebe4 100644
--- a/lib/CodeGen/MachineModuleInfoImpls.cpp
+++ b/lib/CodeGen/MachineModuleInfoImpls.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/MachineModuleInfoImpls.cpp ----------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp
index 05e51e1873cf..4fa4ea7f6cf5 100644
--- a/lib/CodeGen/MachineOperand.cpp
+++ b/lib/CodeGen/MachineOperand.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/MachineOperand.cpp -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -25,6 +24,7 @@
#include "llvm/IR/Constants.h"
#include "llvm/IR/IRPrintingPasses.h"
#include "llvm/IR/ModuleSlotTracker.h"
+#include "llvm/MC/MCDwarf.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
@@ -181,6 +181,19 @@ void MachineOperand::ChangeToES(const char *SymName,
setTargetFlags(TargetFlags);
}
+void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset,
+ unsigned char TargetFlags) {
+ assert((!isReg() || !isTied()) &&
+ "Cannot change a tied operand into a global address");
+
+ removeRegFromUses();
+
+ OpKind = MO_GlobalAddress;
+ Contents.OffsetedInfo.Val.GV = GV;
+ setOffset(Offset);
+ setTargetFlags(TargetFlags);
+}
+
void MachineOperand::ChangeToMCSymbol(MCSymbol *Sym) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into an MCSymbol");
@@ -329,7 +342,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
switch (MO.getType()) {
case MachineOperand::MO_Register:
// Register operands don't have target flags.
- return hash_combine(MO.getType(), MO.getReg(), MO.getSubReg(), MO.isDef());
+ return hash_combine(MO.getType(), (unsigned)MO.getReg(), MO.getSubReg(), MO.isDef());
case MachineOperand::MO_Immediate:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getImm());
case MachineOperand::MO_CImmediate:
@@ -348,7 +361,7 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIndex());
case MachineOperand::MO_ExternalSymbol:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getOffset(),
- MO.getSymbolName());
+ StringRef(MO.getSymbolName()));
case MachineOperand::MO_GlobalAddress:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getGlobal(),
MO.getOffset());
@@ -994,7 +1007,7 @@ MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, Flags f,
assert((PtrInfo.V.isNull() || PtrInfo.V.is<const PseudoSourceValue *>() ||
isa<PointerType>(PtrInfo.V.get<const Value *>()->getType())) &&
"invalid pointer value");
- assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
+ assert(getBaseAlignment() == a && a != 0 && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
AtomicInfo.SSID = static_cast<unsigned>(SSID);
@@ -1125,7 +1138,7 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
printLLVMNameWithoutPrefix(
OS, cast<ExternalSymbolPseudoSourceValue>(PVal)->getSymbol());
break;
- case PseudoSourceValue::TargetCustom:
+ default:
// FIXME: This is not necessarily the correct MIR serialization format for
// a custom pseudo source value, but at least it allows
// -print-machineinstrs to work on a target with custom pseudo source
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 906d5560d568..27db9106b337 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -1,9 +1,8 @@
///===- MachineOptimizationRemarkEmitter.cpp - Opt Diagnostic -*- C++ -*---===//
///
-/// The LLVM Compiler Infrastructure
-///
-/// This file is distributed under the University of Illinois Open Source
-/// License. See LICENSE.TXT for details.
+/// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+/// See https://llvm.org/LICENSE.txt for license information.
+/// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
///
///===---------------------------------------------------------------------===//
/// \file
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index ad96c0e579e4..80a235aeaa5c 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -1,9 +1,8 @@
//===---- MachineOutliner.cpp - Outline instructions -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -74,8 +73,6 @@
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <functional>
-#include <map>
-#include <sstream>
#include <tuple>
#include <vector>
@@ -1095,19 +1092,15 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
InstructionMapper &Mapper,
unsigned Name) {
- // Create the function name. This should be unique. For now, just hash the
- // module name and include it in the function name plus the number of this
- // function.
- std::ostringstream NameStream;
+ // Create the function name. This should be unique.
// FIXME: We should have a better naming scheme. This should be stable,
// regardless of changes to the outliner's cost model/traversal order.
- NameStream << "OUTLINED_FUNCTION_" << Name;
+ std::string FunctionName = ("OUTLINED_FUNCTION_" + Twine(Name)).str();
// Create the function using an IR-level function.
LLVMContext &C = M.getContext();
- Function *F = dyn_cast<Function>(
- M.getOrInsertFunction(NameStream.str(), Type::getVoidTy(C)));
- assert(F && "Function was null!");
+ Function *F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
+ Function::ExternalLinkage, FunctionName, M);
// NOTE: If this is linkonceodr, then we can take advantage of linker deduping
// which gives us better results when we outline from linkonceodr functions.
@@ -1205,11 +1198,10 @@ bool MachineOutliner::outline(Module &M,
unsigned OutlinedFunctionNum = 0;
// Sort by benefit. The most beneficial functions should be outlined first.
- std::stable_sort(
- FunctionList.begin(), FunctionList.end(),
- [](const OutlinedFunction &LHS, const OutlinedFunction &RHS) {
- return LHS.getBenefit() > RHS.getBenefit();
- });
+ llvm::stable_sort(FunctionList, [](const OutlinedFunction &LHS,
+ const OutlinedFunction &RHS) {
+ return LHS.getBenefit() > RHS.getBenefit();
+ });
// Walk over each function, outlining them as we go along. Functions are
// outlined greedily, based off the sort above.
@@ -1253,8 +1245,9 @@ bool MachineOutliner::outline(Module &M,
if (MBB.getParent()->getProperties().hasProperty(
MachineFunctionProperties::Property::TracksLiveness)) {
// Helper lambda for adding implicit def operands to the call
- // instruction.
- auto CopyDefs = [&CallInst](MachineInstr &MI) {
+ // instruction. It also updates call site information for moved
+ // code.
+ auto CopyDefsAndUpdateCalls = [&CallInst](MachineInstr &MI) {
for (MachineOperand &MOP : MI.operands()) {
// Skip over anything that isn't a register.
if (!MOP.isReg())
@@ -1266,13 +1259,16 @@ bool MachineOutliner::outline(Module &M,
MOP.getReg(), true, /* isDef = true */
true /* isImp = true */));
}
+ if (MI.isCall())
+ MI.getMF()->updateCallSiteInfo(&MI);
};
// Copy over the defs in the outlined range.
// First inst in outlined range <-- Anything that's defined in this
// ... .. range has to be added as an
// implicit Last inst in outlined range <-- def to the call
- // instruction.
- std::for_each(CallInst, std::next(EndIt), CopyDefs);
+ // instruction. Also remove call site information for outlined block
+ // of code.
+ std::for_each(CallInst, std::next(EndIt), CopyDefsAndUpdateCalls);
}
// Erase from the point after where the call was inserted up to, and
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 4d451bdd7f69..54df522d371a 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -1,9 +1,8 @@
//===- MachinePipeliner.cpp - Machine Software Pipeliner Pass -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -97,6 +96,14 @@ using namespace llvm;
STATISTIC(NumTrytoPipeline, "Number of loops that we attempt to pipeline");
STATISTIC(NumPipelined, "Number of loops software pipelined");
STATISTIC(NumNodeOrderIssues, "Number of node order issues found");
+STATISTIC(NumFailBranch, "Pipeliner abort due to unknown branch");
+STATISTIC(NumFailLoop, "Pipeliner abort due to unsupported loop");
+STATISTIC(NumFailPreheader, "Pipeliner abort due to missing preheader");
+STATISTIC(NumFailLargeMaxMII, "Pipeliner abort due to MaxMII too large");
+STATISTIC(NumFailZeroMII, "Pipeliner abort due to zero MII");
+STATISTIC(NumFailNoSchedule, "Pipeliner abort due to no schedule found");
+STATISTIC(NumFailZeroStage, "Pipeliner abort due to zero stage");
+STATISTIC(NumFailLargeMaxStage, "Pipeliner abort due to too many stages");
/// A command line option to turn software pipelining on or off.
static cl::opt<bool> EnableSWP("enable-pipeliner", cl::Hidden, cl::init(true),
@@ -141,6 +148,11 @@ static cl::opt<bool> SwpIgnoreRecMII("pipeliner-ignore-recmii",
cl::ReallyHidden, cl::init(false),
cl::ZeroOrMore, cl::desc("Ignore RecMII"));
+static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
+ cl::init(false));
+static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
+ cl::init(false));
+
namespace llvm {
// A command line option to enable the CopyToPhi DAG mutation.
@@ -180,6 +192,16 @@ bool MachinePipeliner::runOnMachineFunction(MachineFunction &mf) {
!EnableSWPOptSize.getPosition())
return false;
+ if (!mf.getSubtarget().enableMachinePipeliner())
+ return false;
+
+ // Cannot pipeline loops without instruction itineraries if we are using
+ // DFA for the pipeliner.
+ if (mf.getSubtarget().useDFAforSMS() &&
+ (!mf.getSubtarget().getInstrItineraryData() ||
+ mf.getSubtarget().getInstrItineraryData()->isEmpty()))
+ return false;
+
MF = &mf;
MLI = &getAnalysis<MachineLoopInfo>();
MDT = &getAnalysis<MachineDominatorTree>();
@@ -211,8 +233,11 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
}
#endif
- if (!canPipelineLoop(L))
+ setPragmaPipelineOptions(L);
+ if (!canPipelineLoop(L)) {
+ LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n");
return Changed;
+ }
++NumTrytoPipeline;
@@ -221,6 +246,50 @@ bool MachinePipeliner::scheduleLoop(MachineLoop &L) {
return Changed;
}
+void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) {
+ MachineBasicBlock *LBLK = L.getTopBlock();
+
+ if (LBLK == nullptr)
+ return;
+
+ const BasicBlock *BBLK = LBLK->getBasicBlock();
+ if (BBLK == nullptr)
+ return;
+
+ const Instruction *TI = BBLK->getTerminator();
+ if (TI == nullptr)
+ return;
+
+ MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop);
+ if (LoopID == nullptr)
+ return;
+
+ assert(LoopID->getNumOperands() > 0 && "requires atleast one operand");
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop");
+
+ for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
+ MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
+
+ if (MD == nullptr)
+ continue;
+
+ MDString *S = dyn_cast<MDString>(MD->getOperand(0));
+
+ if (S == nullptr)
+ continue;
+
+ if (S->getString() == "llvm.loop.pipeline.initiationinterval") {
+ assert(MD->getNumOperands() == 2 &&
+ "Pipeline initiation interval hint metadata should have two operands.");
+ II_setByPragma =
+ mdconst::extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
+ assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive.");
+ } else if (S->getString() == "llvm.loop.pipeline.disable") {
+ disabledByPragma = true;
+ }
+ }
+}
+
/// Return true if the loop can be software pipelined. The algorithm is
/// restricted to loops with a single basic block. Make sure that the
/// branch in the loop can be analyzed.
@@ -228,21 +297,36 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
if (L.getNumBlocks() != 1)
return false;
+ if (disabledByPragma)
+ return false;
+
// Check if the branch can't be understood because we can't do pipelining
// if that's the case.
LI.TBB = nullptr;
LI.FBB = nullptr;
LI.BrCond.clear();
- if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond))
+ if (TII->analyzeBranch(*L.getHeader(), LI.TBB, LI.FBB, LI.BrCond)) {
+ LLVM_DEBUG(
+ dbgs() << "Unable to analyzeBranch, can NOT pipeline current Loop\n");
+ NumFailBranch++;
return false;
+ }
LI.LoopInductionVar = nullptr;
LI.LoopCompare = nullptr;
- if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare))
+ if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) {
+ LLVM_DEBUG(
+ dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
+ NumFailLoop++;
return false;
+ }
- if (!L.getLoopPreheader())
+ if (!L.getLoopPreheader()) {
+ LLVM_DEBUG(
+ dbgs() << "Preheader not found, can NOT pipeline current Loop\n");
+ NumFailPreheader++;
return false;
+ }
// Remove any subregisters from inputs to phi nodes.
preprocessPhiNodes(*L.getHeader());
@@ -286,7 +370,8 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
- SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo);
+ SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
+ II_setByPragma);
MachineBasicBlock *MBB = L.getHeader();
// The kernel should not include any terminator instructions. These
@@ -309,6 +394,20 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) {
return SMS.hasNewSchedule();
}
+void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) {
+ if (II_setByPragma > 0)
+ MII = II_setByPragma;
+ else
+ MII = std::max(ResMII, RecMII);
+}
+
+void SwingSchedulerDAG::setMAX_II() {
+ if (II_setByPragma > 0)
+ MAX_II = II_setByPragma;
+ else
+ MAX_II = MII + 10;
+}
+
/// We override the schedule function in ScheduleDAGInstrs to implement the
/// scheduling part of the Swing Modulo Scheduling algorithm.
void SwingSchedulerDAG::schedule() {
@@ -335,17 +434,28 @@ void SwingSchedulerDAG::schedule() {
if (SwpIgnoreRecMII)
RecMII = 0;
- MII = std::max(ResMII, RecMII);
- LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII
- << ", res=" << ResMII << ")\n");
+ setMII(ResMII, RecMII);
+ setMAX_II();
+
+ LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II
+ << " (rec=" << RecMII << ", res=" << ResMII << ")\n");
// Can't schedule a loop without a valid MII.
- if (MII == 0)
+ if (MII == 0) {
+ LLVM_DEBUG(
+ dbgs()
+ << "0 is not a valid Minimal Initiation Interval, can NOT schedule\n");
+ NumFailZeroMII++;
return;
+ }
// Don't pipeline large loops.
- if (SwpMaxMii != -1 && (int)MII > SwpMaxMii)
+ if (SwpMaxMii != -1 && (int)MII > SwpMaxMii) {
+ LLVM_DEBUG(dbgs() << "MII > " << SwpMaxMii
+ << ", we don't pipleline large loops\n");
+ NumFailLargeMaxMII++;
return;
+ }
computeNodeFunctions(NodeSets);
@@ -362,7 +472,7 @@ void SwingSchedulerDAG::schedule() {
}
});
- std::stable_sort(NodeSets.begin(), NodeSets.end(), std::greater<NodeSet>());
+ llvm::stable_sort(NodeSets, std::greater<NodeSet>());
groupRemainingNodes(NodeSets);
@@ -383,17 +493,27 @@ void SwingSchedulerDAG::schedule() {
SMSchedule Schedule(Pass.MF);
Scheduled = schedulePipeline(Schedule);
- if (!Scheduled)
+ if (!Scheduled){
+ LLVM_DEBUG(dbgs() << "No schedule found, return\n");
+ NumFailNoSchedule++;
return;
+ }
unsigned numStages = Schedule.getMaxStageCount();
// No need to generate pipeline if there are no overlapped iterations.
- if (numStages == 0)
+ if (numStages == 0) {
+ LLVM_DEBUG(
+ dbgs() << "No overlapped iterations, no need to generate pipeline\n");
+ NumFailZeroStage++;
return;
-
+ }
// Check that the maximum stage count is less than user-defined limit.
- if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages)
+ if (SwpMaxStages > -1 && (int)numStages > SwpMaxStages) {
+ LLVM_DEBUG(dbgs() << "numStages:" << numStages << ">" << SwpMaxStages
+ << " : too many stages, abort\n");
+ NumFailLargeMaxStage++;
return;
+ }
generatePipelinedLoop(Schedule);
++NumPipelined;
@@ -467,7 +587,8 @@ static bool isSuccOrder(SUnit *SUa, SUnit *SUb) {
/// Return true if the instruction causes a chain between memory
/// references before and after it.
static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
- return MI.isCall() || MI.hasUnmodeledSideEffects() ||
+ return MI.isCall() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects() ||
(MI.hasOrderedMemoryRef() &&
(!MI.mayLoad() || !MI.isDereferenceableInvariantLoad(AA)));
}
@@ -475,16 +596,16 @@ static bool isDependenceBarrier(MachineInstr &MI, AliasAnalysis *AA) {
/// Return the underlying objects for the memory references of an instruction.
/// This function calls the code in ValueTracking, but first checks that the
/// instruction has a memory operand.
-static void getUnderlyingObjects(MachineInstr *MI,
- SmallVectorImpl<Value *> &Objs,
+static void getUnderlyingObjects(const MachineInstr *MI,
+ SmallVectorImpl<const Value *> &Objs,
const DataLayout &DL) {
if (!MI->hasOneMemOperand())
return;
MachineMemOperand *MM = *MI->memoperands_begin();
if (!MM->getValue())
return;
- GetUnderlyingObjects(const_cast<Value *>(MM->getValue()), Objs, DL);
- for (Value *V : Objs) {
+ GetUnderlyingObjects(MM->getValue(), Objs, DL);
+ for (const Value *V : Objs) {
if (!isIdentifiedObject(V)) {
Objs.clear();
return;
@@ -498,7 +619,7 @@ static void getUnderlyingObjects(MachineInstr *MI,
/// dependence. This code is very similar to the code in ScheduleDAGInstrs
/// but that code doesn't create loop carried dependences.
void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
- MapVector<Value *, SmallVector<SUnit *, 4>> PendingLoads;
+ MapVector<const Value *, SmallVector<SUnit *, 4>> PendingLoads;
Value *UnknownValue =
UndefValue::get(Type::getVoidTy(MF.getFunction().getContext()));
for (auto &SU : SUnits) {
@@ -506,7 +627,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
if (isDependenceBarrier(MI, AA))
PendingLoads.clear();
else if (MI.mayLoad()) {
- SmallVector<Value *, 4> Objs;
+ SmallVector<const Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
if (Objs.empty())
Objs.push_back(UnknownValue);
@@ -515,12 +636,12 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
SUs.push_back(&SU);
}
} else if (MI.mayStore()) {
- SmallVector<Value *, 4> Objs;
+ SmallVector<const Value *, 4> Objs;
getUnderlyingObjects(&MI, Objs, MF.getDataLayout());
if (Objs.empty())
Objs.push_back(UnknownValue);
for (auto V : Objs) {
- MapVector<Value *, SmallVector<SUnit *, 4>>::iterator I =
+ MapVector<const Value *, SmallVector<SUnit *, 4>>::iterator I =
PendingLoads.find(V);
if (I == PendingLoads.end())
continue;
@@ -531,7 +652,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
// First, perform the cheaper check that compares the base register.
// If they are the same and the load offset is less than the store
// offset, then mark the dependence as loop carried potentially.
- MachineOperand *BaseOp1, *BaseOp2;
+ const MachineOperand *BaseOp1, *BaseOp2;
int64_t Offset1, Offset2;
if (TII->getMemOperandWithOffset(LdMI, BaseOp1, Offset1, TRI) &&
TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
@@ -744,27 +865,55 @@ namespace {
// the number of functional unit choices.
struct FuncUnitSorter {
const InstrItineraryData *InstrItins;
+ const MCSubtargetInfo *STI;
DenseMap<unsigned, unsigned> Resources;
- FuncUnitSorter(const InstrItineraryData *IID) : InstrItins(IID) {}
+ FuncUnitSorter(const TargetSubtargetInfo &TSI)
+ : InstrItins(TSI.getInstrItineraryData()), STI(&TSI) {}
// Compute the number of functional unit alternatives needed
// at each stage, and take the minimum value. We prioritize the
// instructions by the least number of choices first.
unsigned minFuncUnits(const MachineInstr *Inst, unsigned &F) const {
- unsigned schedClass = Inst->getDesc().getSchedClass();
+ unsigned SchedClass = Inst->getDesc().getSchedClass();
unsigned min = UINT_MAX;
- for (const InstrStage *IS = InstrItins->beginStage(schedClass),
- *IE = InstrItins->endStage(schedClass);
- IS != IE; ++IS) {
- unsigned funcUnits = IS->getUnits();
- unsigned numAlternatives = countPopulation(funcUnits);
- if (numAlternatives < min) {
- min = numAlternatives;
- F = funcUnits;
+ if (InstrItins && !InstrItins->isEmpty()) {
+ for (const InstrStage &IS :
+ make_range(InstrItins->beginStage(SchedClass),
+ InstrItins->endStage(SchedClass))) {
+ unsigned funcUnits = IS.getUnits();
+ unsigned numAlternatives = countPopulation(funcUnits);
+ if (numAlternatives < min) {
+ min = numAlternatives;
+ F = funcUnits;
+ }
}
+ return min;
+ }
+ if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc =
+ STI->getSchedModel().getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ // No valid Schedule Class Desc for schedClass, should be
+ // Pseudo/PostRAPseudo
+ return min;
+
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ if (!PRE.Cycles)
+ continue;
+ const MCProcResourceDesc *ProcResource =
+ STI->getSchedModel().getProcResource(PRE.ProcResourceIdx);
+ unsigned NumUnits = ProcResource->NumUnits;
+ if (NumUnits < min) {
+ min = NumUnits;
+ F = PRE.ProcResourceIdx;
+ }
+ }
+ return min;
}
- return min;
+ llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
}
// Compute the critical resources needed by the instruction. This
@@ -774,13 +923,34 @@ struct FuncUnitSorter {
// the same, highly used, functional unit have high priority.
void calcCriticalResources(MachineInstr &MI) {
unsigned SchedClass = MI.getDesc().getSchedClass();
- for (const InstrStage *IS = InstrItins->beginStage(SchedClass),
- *IE = InstrItins->endStage(SchedClass);
- IS != IE; ++IS) {
- unsigned FuncUnits = IS->getUnits();
- if (countPopulation(FuncUnits) == 1)
- Resources[FuncUnits]++;
+ if (InstrItins && !InstrItins->isEmpty()) {
+ for (const InstrStage &IS :
+ make_range(InstrItins->beginStage(SchedClass),
+ InstrItins->endStage(SchedClass))) {
+ unsigned FuncUnits = IS.getUnits();
+ if (countPopulation(FuncUnits) == 1)
+ Resources[FuncUnits]++;
+ }
+ return;
+ }
+ if (STI && STI->getSchedModel().hasInstrSchedModel()) {
+ const MCSchedClassDesc *SCDesc =
+ STI->getSchedModel().getSchedClassDesc(SchedClass);
+ if (!SCDesc->isValid())
+ // No valid Schedule Class Desc for schedClass, should be
+ // Pseudo/PostRAPseudo
+ return;
+
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ if (!PRE.Cycles)
+ continue;
+ Resources[PRE.ProcResourceIdx]++;
+ }
+ return;
}
+ llvm_unreachable("Should have non-empty InstrItins or hasInstrSchedModel!");
}
/// Return true if IS1 has less priority than IS2.
@@ -803,14 +973,15 @@ struct FuncUnitSorter {
/// to add it to each existing DFA, until a legal space is found. If the
/// instruction cannot be reserved in an existing DFA, we create a new one.
unsigned SwingSchedulerDAG::calculateResMII() {
- SmallVector<DFAPacketizer *, 8> Resources;
+
+ LLVM_DEBUG(dbgs() << "calculateResMII:\n");
+ SmallVector<ResourceManager*, 8> Resources;
MachineBasicBlock *MBB = Loop.getHeader();
- Resources.push_back(TII->CreateTargetScheduleState(MF.getSubtarget()));
+ Resources.push_back(new ResourceManager(&MF.getSubtarget()));
// Sort the instructions by the number of available choices for scheduling,
// least to most. Use the number of critical resources as the tie breaker.
- FuncUnitSorter FUS =
- FuncUnitSorter(MF.getSubtarget().getInstrItineraryData());
+ FuncUnitSorter FUS = FuncUnitSorter(MF.getSubtarget());
for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(),
E = MBB->getFirstTerminator();
I != E; ++I)
@@ -832,33 +1003,40 @@ unsigned SwingSchedulerDAG::calculateResMII() {
// DFA is needed for each cycle.
unsigned NumCycles = getSUnit(MI)->Latency;
unsigned ReservedCycles = 0;
- SmallVectorImpl<DFAPacketizer *>::iterator RI = Resources.begin();
- SmallVectorImpl<DFAPacketizer *>::iterator RE = Resources.end();
+ SmallVectorImpl<ResourceManager *>::iterator RI = Resources.begin();
+ SmallVectorImpl<ResourceManager *>::iterator RE = Resources.end();
+ LLVM_DEBUG({
+ dbgs() << "Trying to reserve resource for " << NumCycles
+ << " cycles for \n";
+ MI->dump();
+ });
for (unsigned C = 0; C < NumCycles; ++C)
while (RI != RE) {
- if ((*RI++)->canReserveResources(*MI)) {
+ if ((*RI)->canReserveResources(*MI)) {
+ (*RI)->reserveResources(*MI);
++ReservedCycles;
break;
}
+ RI++;
}
- // Start reserving resources using existing DFAs.
- for (unsigned C = 0; C < ReservedCycles; ++C) {
- --RI;
- (*RI)->reserveResources(*MI);
- }
+ LLVM_DEBUG(dbgs() << "ReservedCycles:" << ReservedCycles
+ << ", NumCycles:" << NumCycles << "\n");
// Add new DFAs, if needed, to reserve resources.
for (unsigned C = ReservedCycles; C < NumCycles; ++C) {
- DFAPacketizer *NewResource =
- TII->CreateTargetScheduleState(MF.getSubtarget());
+ LLVM_DEBUG(if (SwpDebugResource) dbgs()
+ << "NewResource created to reserve resources"
+ << "\n");
+ ResourceManager *NewResource = new ResourceManager(&MF.getSubtarget());
assert(NewResource->canReserveResources(*MI) && "Reserve error.");
NewResource->reserveResources(*MI);
Resources.push_back(NewResource);
}
}
int Resmii = Resources.size();
+ LLVM_DEBUG(dbgs() << "Retrun Res MII:" << Resmii << "\n");
// Delete the memory for each of the DFAs that were created earlier.
- for (DFAPacketizer *RI : Resources) {
- DFAPacketizer *D = RI;
+ for (ResourceManager *RI : Resources) {
+ ResourceManager *D = RI;
delete D;
}
Resources.clear();
@@ -1517,7 +1695,7 @@ void SwingSchedulerDAG::groupRemainingNodes(NodeSetType &NodeSets) {
}
}
-/// Add the node to the set, and add all is its connected nodes to the set.
+/// Add the node to the set, and add all of its connected nodes to the set.
void SwingSchedulerDAG::addConnectedNodes(SUnit *SU, NodeSet &NewSet,
SetVector<SUnit *> &NodesAdded) {
NewSet.insert(SU);
@@ -1741,12 +1919,16 @@ void SwingSchedulerDAG::computeNodeOrder(NodeSetType &NodeSets) {
/// Process the nodes in the computed order and create the pipelined schedule
/// of the instructions, if possible. Return true if a schedule is found.
bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
- if (NodeOrder.empty())
+
+ if (NodeOrder.empty()){
+ LLVM_DEBUG(dbgs() << "NodeOrder is empty! abort scheduling\n" );
return false;
+ }
bool scheduleFound = false;
+ unsigned II = 0;
// Keep increasing II until a valid schedule is found.
- for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) {
+ for (II = MII; II <= MAX_II && !scheduleFound; ++II) {
Schedule.reset();
Schedule.setInitiationInterval(II);
LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n");
@@ -1767,13 +1949,14 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
Schedule.computeStart(SU, &EarlyStart, &LateStart, &SchedEnd, &SchedStart,
II, this);
LLVM_DEBUG({
+ dbgs() << "\n";
dbgs() << "Inst (" << SU->NodeNum << ") ";
SU->getInstr()->dump();
dbgs() << "\n";
});
LLVM_DEBUG({
- dbgs() << "\tes: " << EarlyStart << " ls: " << LateStart
- << " me: " << SchedEnd << " ms: " << SchedStart << "\n";
+ dbgs() << format("\tes: %8x ls: %8x me: %8x ms: %8x\n", EarlyStart,
+ LateStart, SchedEnd, SchedStart);
});
if (EarlyStart > LateStart || SchedEnd < EarlyStart ||
@@ -1818,7 +2001,8 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
scheduleFound = Schedule.isValidSchedule(this);
}
- LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n");
+ LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II
+ << ")\n");
if (scheduleFound)
Schedule.finalizeSchedule(this);
@@ -1847,6 +2031,10 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
InstrMapTy InstrMap;
SmallVector<MachineBasicBlock *, 4> PrologBBs;
+
+ MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
+ assert(PreheaderBB != nullptr &&
+ "Need to add code to handle loops w/o preheader");
// Generate the prolog instructions that set up the pipeline.
generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
MF.insert(BB->getIterator(), KernelBB);
@@ -1903,7 +2091,7 @@ void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
removeDeadInstructions(KernelBB, EpilogBBs);
// Add branches between prolog and epilog blocks.
- addBranches(PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
+ addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
// Remove the original loop since it's no longer referenced.
for (auto &I : *BB)
@@ -2242,7 +2430,7 @@ void SwingSchedulerDAG::generateExistingPhis(
// Use the value defined by the Phi, unless we're generating the first
// epilog and the Phi refers to a Phi in a different stage.
else if (VRMap[PrevStage - np].count(Def) &&
- (!LoopDefIsPhi || PrevStage != LastStageNum))
+ (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled)))
PhiOp2 = VRMap[PrevStage - np][Def];
}
@@ -2588,7 +2776,8 @@ static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
/// Create branches from each prolog basic block to the appropriate epilog
/// block. These edges are needed if the loop ends before reaching the
/// kernel.
-void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
+void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB,
+ MBBVectorTy &PrologBBs,
MachineBasicBlock *KernelBB,
MBBVectorTy &EpilogBBs,
SMSchedule &Schedule, ValueMapTy *VRMap) {
@@ -2615,8 +2804,8 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
// Check if the LOOP0 has already been removed. If so, then there is no need
// to reduce the trip count.
if (LC != 0)
- LC = TII->reduceLoopCount(*Prolog, IndVar, *Cmp, Cond, PrevInsts, j,
- MaxIter);
+ LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
+ PrevInsts, j, MaxIter);
// Record the value of the first trip count, which is used to determine if
// branches and blocks can be removed for constant trip counts.
@@ -2657,7 +2846,7 @@ void SwingSchedulerDAG::addBranches(MBBVectorTy &PrologBBs,
/// during each iteration. Set Delta to the amount of the change.
bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
int64_t Offset;
if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
return false;
@@ -2698,7 +2887,9 @@ void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
return;
SmallVector<MachineMemOperand *, 2> NewMMOs;
for (MachineMemOperand *MMO : NewMI.memoperands()) {
- if (MMO->isVolatile() || (MMO->isInvariant() && MMO->isDereferenceable()) ||
+ // TODO: Figure out whether isAtomic is really necessary (see D57601).
+ if (MMO->isVolatile() || MMO->isAtomic() ||
+ (MMO->isInvariant() && MMO->isDereferenceable()) ||
(!MMO->getValue())) {
NewMMOs.push_back(MMO);
continue;
@@ -3058,6 +3249,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
// Assume ordered loads and stores may have a loop carried dependence.
if (SI->hasUnmodeledSideEffects() || DI->hasUnmodeledSideEffects() ||
+ SI->mayRaiseFPException() || DI->mayRaiseFPException() ||
SI->hasOrderedMemoryRef() || DI->hasOrderedMemoryRef())
return true;
@@ -3069,7 +3261,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
if (!computeDelta(*SI, DeltaS) || !computeDelta(*DI, DeltaD))
return true;
- MachineOperand *BaseOpS, *BaseOpD;
+ const MachineOperand *BaseOpS, *BaseOpD;
int64_t OffsetS, OffsetD;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TII->getMemOperandWithOffset(*SI, BaseOpS, OffsetS, TRI) ||
@@ -3097,12 +3289,14 @@ bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
// This is the main test, which checks the offset values and the loop
// increment value to determine if the accesses may be loop carried.
- if (OffsetS >= OffsetD)
- return OffsetS + AccessSizeS > DeltaS;
- else
- return OffsetD + AccessSizeD > DeltaD;
+ if (AccessSizeS == MemoryLocation::UnknownSize ||
+ AccessSizeD == MemoryLocation::UnknownSize)
+ return true;
- return true;
+ if (DeltaS != DeltaD || DeltaS < AccessSizeS || DeltaD < AccessSizeD)
+ return true;
+
+ return (OffsetS + (int64_t)AccessSizeS < OffsetD + (int64_t)AccessSizeD);
}
void SwingSchedulerDAG::postprocessDAG() {
@@ -3117,6 +3311,10 @@ void SwingSchedulerDAG::postprocessDAG() {
/// the relative values of StartCycle and EndCycle.
bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
bool forward = true;
+ LLVM_DEBUG({
+ dbgs() << "Trying to insert node between " << StartCycle << " and "
+ << EndCycle << " II: " << II << "\n";
+ });
if (StartCycle > EndCycle)
forward = false;
@@ -3125,8 +3323,9 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
for (int curCycle = StartCycle; curCycle != termCycle;
forward ? ++curCycle : --curCycle) {
- // Add the already scheduled instructions at the specified cycle to the DFA.
- Resources->clearResources();
+ // Add the already scheduled instructions at the specified cycle to the
+ // DFA.
+ ProcItinResources.clearResources();
for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II);
checkCycle <= LastCycle; checkCycle += II) {
std::deque<SUnit *> &cycleInstrs = ScheduledInstrs[checkCycle];
@@ -3136,13 +3335,13 @@ bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) {
I != E; ++I) {
if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode()))
continue;
- assert(Resources->canReserveResources(*(*I)->getInstr()) &&
+ assert(ProcItinResources.canReserveResources(*(*I)->getInstr()) &&
"These instructions have already been scheduled.");
- Resources->reserveResources(*(*I)->getInstr());
+ ProcItinResources.reserveResources(*(*I)->getInstr());
}
}
if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) ||
- Resources->canReserveResources(*SU->getInstr())) {
+ ProcItinResources.canReserveResources(*SU->getInstr())) {
LLVM_DEBUG({
dbgs() << "\tinsert at cycle " << curCycle << " ";
SU->getInstr()->dump();
@@ -3360,6 +3559,14 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
if (Pos < MoveUse)
MoveUse = Pos;
}
+ // We did not handle HW dependences in previous for loop,
+ // and we normally set Latency = 0 for Anti deps,
+ // so may have nodes in same cycle with Anti denpendent on HW regs.
+ else if (S.getKind() == SDep::Anti && stageScheduled(*I) == StageInst1) {
+ OrderBeforeUse = true;
+ if ((MoveUse == 0) || (Pos < MoveUse))
+ MoveUse = Pos;
+ }
}
for (auto &P : SU->Preds) {
if (P.getSUnit() != *I)
@@ -3523,9 +3730,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
for (SDep &PredEdge : SU->Preds) {
SUnit *PredSU = PredEdge.getSUnit();
- unsigned PredIndex =
- std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
- std::make_pair(PredSU, 0), CompareKey));
+ unsigned PredIndex = std::get<1>(
+ *llvm::lower_bound(Indices, std::make_pair(PredSU, 0), CompareKey));
if (!PredSU->getInstr()->isPHI() && PredIndex < Index) {
PredBefore = true;
Pred = PredSU;
@@ -3535,9 +3741,13 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
for (SDep &SuccEdge : SU->Succs) {
SUnit *SuccSU = SuccEdge.getSUnit();
- unsigned SuccIndex =
- std::get<1>(*std::lower_bound(Indices.begin(), Indices.end(),
- std::make_pair(SuccSU, 0), CompareKey));
+ // Do not process a boundary node, it was not included in NodeOrder,
+ // hence not in Indices either, call to std::lower_bound() below will
+ // return Indices.end().
+ if (SuccSU->isBoundaryNode())
+ continue;
+ unsigned SuccIndex = std::get<1>(
+ *llvm::lower_bound(Indices, std::make_pair(SuccSU, 0), CompareKey));
if (!SuccSU->getInstr()->isPHI() && SuccIndex < Index) {
SuccBefore = true;
Succ = SuccSU;
@@ -3548,9 +3758,8 @@ void SwingSchedulerDAG::checkValidNodeOrder(const NodeSetType &Circuits) const {
if (PredBefore && SuccBefore && !SU->getInstr()->isPHI()) {
// instructions in circuits are allowed to be scheduled
// after both a successor and predecessor.
- bool InCircuit = std::any_of(
- Circuits.begin(), Circuits.end(),
- [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
+ bool InCircuit = llvm::any_of(
+ Circuits, [SU](const NodeSet &Circuit) { return Circuit.count(SU); });
if (InCircuit)
LLVM_DEBUG(dbgs() << "In a circuit, predecessor ";);
else {
@@ -3740,5 +3949,140 @@ LLVM_DUMP_METHOD void NodeSet::dump() const { print(dbgs()); }
#endif
+void ResourceManager::initProcResourceVectors(
+ const MCSchedModel &SM, SmallVectorImpl<uint64_t> &Masks) {
+ unsigned ProcResourceID = 0;
+
+ // We currently limit the resource kinds to 64 and below so that we can use
+ // uint64_t for Masks
+ assert(SM.getNumProcResourceKinds() < 64 &&
+ "Too many kinds of resources, unsupported");
+ // Create a unique bitmask for every processor resource unit.
+ // Skip resource at index 0, since it always references 'InvalidUnit'.
+ Masks.resize(SM.getNumProcResourceKinds());
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ if (Desc.SubUnitsIdxBegin)
+ continue;
+ Masks[I] = 1ULL << ProcResourceID;
+ ProcResourceID++;
+ }
+ // Create a unique bitmask for every processor resource group.
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc &Desc = *SM.getProcResource(I);
+ if (!Desc.SubUnitsIdxBegin)
+ continue;
+ Masks[I] = 1ULL << ProcResourceID;
+ for (unsigned U = 0; U < Desc.NumUnits; ++U)
+ Masks[I] |= Masks[Desc.SubUnitsIdxBegin[U]];
+ ProcResourceID++;
+ }
+ LLVM_DEBUG({
+ if (SwpShowResMask) {
+ dbgs() << "ProcResourceDesc:\n";
+ for (unsigned I = 1, E = SM.getNumProcResourceKinds(); I < E; ++I) {
+ const MCProcResourceDesc *ProcResource = SM.getProcResource(I);
+ dbgs() << format(" %16s(%2d): Mask: 0x%08x, NumUnits:%2d\n",
+ ProcResource->Name, I, Masks[I],
+ ProcResource->NumUnits);
+ }
+ dbgs() << " -----------------\n";
+ }
+ });
+}
+
+bool ResourceManager::canReserveResources(const MCInstrDesc *MID) const {
+
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "canReserveResources:\n";
+ });
+ if (UseDFA)
+ return DFAResources->canReserveResources(MID);
+
+ unsigned InsnClass = MID->getSchedClass();
+ const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+ if (!SCDesc->isValid()) {
+ LLVM_DEBUG({
+ dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+ dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+ });
+ return true;
+ }
+
+ const MCWriteProcResEntry *I = STI->getWriteProcResBegin(SCDesc);
+ const MCWriteProcResEntry *E = STI->getWriteProcResEnd(SCDesc);
+ for (; I != E; ++I) {
+ if (!I->Cycles)
+ continue;
+ const MCProcResourceDesc *ProcResource =
+ SM.getProcResource(I->ProcResourceIdx);
+ unsigned NumUnits = ProcResource->NumUnits;
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
+ ProcResource->Name, I->ProcResourceIdx,
+ ProcResourceCount[I->ProcResourceIdx], NumUnits,
+ I->Cycles);
+ });
+ if (ProcResourceCount[I->ProcResourceIdx] >= NumUnits)
+ return false;
+ }
+ LLVM_DEBUG(if (SwpDebugResource) dbgs() << "return true\n\n";);
+ return true;
+}
+
+void ResourceManager::reserveResources(const MCInstrDesc *MID) {
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "reserveResources:\n";
+ });
+ if (UseDFA)
+ return DFAResources->reserveResources(MID);
+ unsigned InsnClass = MID->getSchedClass();
+ const MCSchedClassDesc *SCDesc = SM.getSchedClassDesc(InsnClass);
+ if (!SCDesc->isValid()) {
+ LLVM_DEBUG({
+ dbgs() << "No valid Schedule Class Desc for schedClass!\n";
+ dbgs() << "isPseduo:" << MID->isPseudo() << "\n";
+ });
+ return;
+ }
+ for (const MCWriteProcResEntry &PRE :
+ make_range(STI->getWriteProcResBegin(SCDesc),
+ STI->getWriteProcResEnd(SCDesc))) {
+ if (!PRE.Cycles)
+ continue;
+ ++ProcResourceCount[PRE.ProcResourceIdx];
+ LLVM_DEBUG({
+ if (SwpDebugResource) {
+ const MCProcResourceDesc *ProcResource =
+ SM.getProcResource(PRE.ProcResourceIdx);
+ dbgs() << format(" %16s(%2d): Count: %2d, NumUnits:%2d, Cycles:%2d\n",
+ ProcResource->Name, PRE.ProcResourceIdx,
+ ProcResourceCount[PRE.ProcResourceIdx],
+ ProcResource->NumUnits, PRE.Cycles);
+ }
+ });
+ }
+ LLVM_DEBUG({
+ if (SwpDebugResource)
+ dbgs() << "reserveResources: done!\n\n";
+ });
+}
+
+bool ResourceManager::canReserveResources(const MachineInstr &MI) const {
+ return canReserveResources(&MI.getDesc());
+}
+
+void ResourceManager::reserveResources(const MachineInstr &MI) {
+ return reserveResources(&MI.getDesc());
+}
+
+void ResourceManager::clearResources() {
+ if (UseDFA)
+ return DFAResources->clearResources();
+ std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
+}
diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp
index 488377998cb3..7f220ed1fd8f 100644
--- a/lib/CodeGen/MachinePostDominators.cpp
+++ b/lib/CodeGen/MachinePostDominators.cpp
@@ -1,9 +1,8 @@
//===- MachinePostDominators.cpp -Machine Post Dominator Calculation ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineRegionInfo.cpp b/lib/CodeGen/MachineRegionInfo.cpp
index 2619d8f78276..2961d456be0d 100644
--- a/lib/CodeGen/MachineRegionInfo.cpp
+++ b/lib/CodeGen/MachineRegionInfo.cpp
@@ -1,9 +1,8 @@
//===- lib/Codegen/MachineRegionInfo.cpp ----------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 6e5ca45d5e5e..f0fd0405d69d 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -1,9 +1,8 @@
//===- lib/Codegen/MachineRegisterInfo.cpp --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -155,7 +154,7 @@ unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {
/// createVirtualRegister - Create and return a new virtual register in the
/// function with the specified register class.
///
-unsigned
+Register
MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
StringRef Name) {
assert(RegClass && "Cannot create register without RegClass!");
@@ -170,7 +169,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
return Reg;
}
-unsigned MachineRegisterInfo::cloneVirtualRegister(unsigned VReg,
+Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
StringRef Name) {
unsigned Reg = createIncompleteVirtualRegister(Name);
VRegInfo[Reg].first = VRegInfo[VReg].first;
@@ -185,7 +184,7 @@ void MachineRegisterInfo::setType(unsigned VReg, LLT Ty) {
VRegToType[VReg] = Ty;
}
-unsigned
+Register
MachineRegisterInfo::createGenericVirtualRegister(LLT Ty, StringRef Name) {
// New virtual register number.
unsigned Reg = createIncompleteVirtualRegister(Name);
@@ -424,6 +423,13 @@ bool MachineRegisterInfo::hasOneNonDBGUse(unsigned RegNo) const {
return ++UI == use_nodbg_end();
}
+bool MachineRegisterInfo::hasOneNonDBGUser(unsigned RegNo) const {
+ use_instr_nodbg_iterator UI = use_instr_nodbg_begin(RegNo);
+ if (UI == use_instr_nodbg_end())
+ return false;
+ return ++UI == use_instr_nodbg_end();
+}
+
/// clearKillFlags - Iterate over all the uses of the given register and
/// clear the kill flag from the MachineOperand. This function is used by
/// optimization passes which extend register lifetimes and need only
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index 542491eabbf2..e8b42047b49f 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -1,9 +1,8 @@
//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index 90dad9d399fe..ae1170ad1be6 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -1,9 +1,8 @@
//===- MachineScheduler.cpp - Machine Instruction Scheduler ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -487,13 +486,17 @@ getSchedRegions(MachineBasicBlock *MBB,
MachineInstr &MI = *std::prev(I);
if (isSchedBoundary(&MI, &*MBB, MF, TII))
break;
- if (!MI.isDebugInstr())
+ if (!MI.isDebugInstr()) {
// MBB::size() uses instr_iterator to count. Here we need a bundle to
// count as a single instruction.
++NumRegionInstrs;
+ }
}
- Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
+ // It's possible we found a scheduling region that only has debug
+ // instructions. Don't bother scheduling these.
+ if (NumRegionInstrs != 0)
+ Regions.push_back(SchedRegion(I, RegionEnd, NumRegionInstrs));
}
if (RegionsTopDown)
@@ -605,23 +608,6 @@ LLVM_DUMP_METHOD void ReadyQueue::dump() const {
// Provide a vtable anchor.
ScheduleDAGMI::~ScheduleDAGMI() = default;
-bool ScheduleDAGMI::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
- return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
-}
-
-bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) {
- if (SuccSU != &ExitSU) {
- // Do not use WillCreateCycle, it assumes SD scheduling.
- // If Pred is reachable from Succ, then the edge creates a cycle.
- if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
- return false;
- Topo.AddPred(SuccSU, PredDep.getSUnit());
- }
- SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
- // Return true regardless of whether a new edge needed to be inserted.
- return true;
-}
-
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
/// NumPredsLeft reaches zero, release the successor node.
///
@@ -762,8 +748,6 @@ void ScheduleDAGMI::schedule() {
// Build the DAG.
buildSchedGraph(AA);
- Topo.InitDAGTopologicalSorting();
-
postprocessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
@@ -1212,8 +1196,6 @@ void ScheduleDAGMILive::schedule() {
LLVM_DEBUG(SchedImpl->dumpPolicy());
buildDAGWithRegPressure();
- Topo.InitDAGTopologicalSorting();
-
postprocessDAG();
SmallVector<SUnit*, 8> TopRoots, BotRoots;
@@ -1484,10 +1466,10 @@ namespace {
class BaseMemOpClusterMutation : public ScheduleDAGMutation {
struct MemOpInfo {
SUnit *SU;
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
int64_t Offset;
- MemOpInfo(SUnit *su, MachineOperand *Op, int64_t ofs)
+ MemOpInfo(SUnit *su, const MachineOperand *Op, int64_t ofs)
: SU(su), BaseOp(Op), Offset(ofs) {}
bool operator<(const MemOpInfo &RHS) const {
@@ -1533,7 +1515,7 @@ public:
void apply(ScheduleDAGInstrs *DAGInstrs) override;
protected:
- void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG);
+ void clusterNeighboringMemOps(ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG);
};
class StoreClusterMutation : public BaseMemOpClusterMutation {
@@ -1570,10 +1552,10 @@ createStoreClusterDAGMutation(const TargetInstrInfo *TII,
} // end namespace llvm
void BaseMemOpClusterMutation::clusterNeighboringMemOps(
- ArrayRef<SUnit *> MemOps, ScheduleDAGMI *DAG) {
+ ArrayRef<SUnit *> MemOps, ScheduleDAGInstrs *DAG) {
SmallVector<MemOpInfo, 32> MemOpRecords;
for (SUnit *SU : MemOps) {
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
int64_t Offset;
if (TII->getMemOperandWithOffset(*SU->getInstr(), BaseOp, Offset, TRI))
MemOpRecords.push_back(MemOpInfo(SU, BaseOp, Offset));
@@ -1610,9 +1592,7 @@ void BaseMemOpClusterMutation::clusterNeighboringMemOps(
}
/// Callback from DAG postProcessing to create cluster edges for loads.
-void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAGInstrs) {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
+void BaseMemOpClusterMutation::apply(ScheduleDAGInstrs *DAG) {
// Map DAG NodeNum to store chain ID.
DenseMap<unsigned, unsigned> StoreChainIDs;
// Map each store chain to a set of dependent MemOps.
@@ -1857,9 +1837,15 @@ SchedBoundary::~SchedBoundary() { delete HazardRec; }
/// Given a Count of resource usage and a Latency value, return true if a
/// SchedBoundary becomes resource limited.
+/// If we are checking after scheduling a node, we should return true when
+/// we just reach the resource limit.
static bool checkResourceLimit(unsigned LFactor, unsigned Count,
- unsigned Latency) {
- return (int)(Count - (Latency * LFactor)) > (int)LFactor;
+ unsigned Latency, bool AfterSchedNode) {
+ int ResCntFactor = (int)(Count - (Latency * LFactor));
+ if (AfterSchedNode)
+ return ResCntFactor >= (int)LFactor;
+ else
+ return ResCntFactor > (int)LFactor;
}
void SchedBoundary::reset() {
@@ -1883,6 +1869,7 @@ void SchedBoundary::reset() {
ZoneCritResIdx = 0;
IsResourceLimited = false;
ReservedCycles.clear();
+ ReservedCyclesIndex.clear();
#ifndef NDEBUG
// Track the maximum number of stall cycles that could arise either from the
// latency of a DAG edge or the number of cycles that a processor resource is
@@ -1921,8 +1908,17 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
SchedModel = smodel;
Rem = rem;
if (SchedModel->hasInstrSchedModel()) {
- ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
- ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
+ unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
+ ReservedCyclesIndex.resize(ResourceCount);
+ ExecutedResCounts.resize(ResourceCount);
+ unsigned NumUnits = 0;
+
+ for (unsigned i = 0; i < ResourceCount; ++i) {
+ ReservedCyclesIndex[i] = NumUnits;
+ NumUnits += SchedModel->getProcResource(i)->NumUnits;
+ }
+
+ ReservedCycles.resize(NumUnits, InvalidCycle);
}
}
@@ -1943,11 +1939,11 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
return 0;
}
-/// Compute the next cycle at which the given processor resource can be
-/// scheduled.
-unsigned SchedBoundary::
-getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
- unsigned NextUnreserved = ReservedCycles[PIdx];
+/// Compute the next cycle at which the given processor resource unit
+/// can be scheduled.
+unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
+ unsigned Cycles) {
+ unsigned NextUnreserved = ReservedCycles[InstanceIdx];
// If this resource has never been used, always return cycle zero.
if (NextUnreserved == InvalidCycle)
return 0;
@@ -1957,6 +1953,29 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
return NextUnreserved;
}
+/// Compute the next cycle at which the given processor resource can be
+/// scheduled. Returns the next cycle and the index of the processor resource
+/// instance in the reserved cycles vector.
+std::pair<unsigned, unsigned>
+SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
+ unsigned MinNextUnreserved = InvalidCycle;
+ unsigned InstanceIdx = 0;
+ unsigned StartIndex = ReservedCyclesIndex[PIdx];
+ unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
+ assert(NumberOfInstances > 0 &&
+ "Cannot have zero instances of a ProcResource");
+
+ for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
+ ++I) {
+ unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
+ if (MinNextUnreserved > NextUnreserved) {
+ InstanceIdx = I;
+ MinNextUnreserved = NextUnreserved;
+ }
+ }
+ return std::make_pair(MinNextUnreserved, InstanceIdx);
+}
+
/// Does this SU have a hazard within the current instruction group.
///
/// The scheduler supports two modes of hazard recognition. The first is the
@@ -1998,14 +2017,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
SchedModel->getWriteProcResEnd(SC))) {
unsigned ResIdx = PE.ProcResourceIdx;
unsigned Cycles = PE.Cycles;
- unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
+ unsigned NRCycle, InstanceIdx;
+ std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
if (NRCycle > CurrCycle) {
#ifndef NDEBUG
MaxObservedStall = std::max(Cycles, MaxObservedStall);
#endif
LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
- << SchedModel->getResourceName(ResIdx) << "="
- << NRCycle << "c\n");
+ << SchedModel->getResourceName(ResIdx)
+ << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
+ << "=" << NRCycle << "c\n");
return true;
}
}
@@ -2119,7 +2140,7 @@ void SchedBoundary::bumpCycle(unsigned NextCycle) {
CheckPending = true;
IsResourceLimited =
checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
- getScheduledLatency());
+ getScheduledLatency(), true);
LLVM_DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName()
<< '\n');
@@ -2160,10 +2181,12 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
<< "c\n");
}
// For reserved resources, record the highest cycle using the resource.
- unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
+ unsigned NextAvailable, InstanceIdx;
+ std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
if (NextAvailable > CurrCycle) {
LLVM_DEBUG(dbgs() << " Resource conflict: "
- << SchedModel->getProcResource(PIdx)->Name
+ << SchedModel->getResourceName(PIdx)
+ << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
<< " reserved until @" << NextAvailable << "\n");
}
return NextAvailable;
@@ -2179,6 +2202,8 @@ void SchedBoundary::bumpNode(SUnit *SU) {
HazardRec->Reset();
}
HazardRec->EmitInstruction(SU);
+ // Scheduling an instruction may have made pending instructions available.
+ CheckPending = true;
}
// checkHazard should prevent scheduling multiple instructions per cycle that
// exceed the issue width.
@@ -2251,12 +2276,13 @@ void SchedBoundary::bumpNode(SUnit *SU) {
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
unsigned PIdx = PI->ProcResourceIdx;
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
+ unsigned ReservedUntil, InstanceIdx;
+ std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
if (isTop()) {
- ReservedCycles[PIdx] =
- std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
- }
- else
- ReservedCycles[PIdx] = NextCycle;
+ ReservedCycles[InstanceIdx] =
+ std::max(ReservedUntil, NextCycle + PI->Cycles);
+ } else
+ ReservedCycles[InstanceIdx] = NextCycle;
}
}
}
@@ -2282,7 +2308,7 @@ void SchedBoundary::bumpNode(SUnit *SU) {
// resource limited. If a stall occurred, bumpCycle does this.
IsResourceLimited =
checkResourceLimit(SchedModel->getLatencyFactor(), getCriticalCount(),
- getScheduledLatency());
+ getScheduledLatency(), true);
// Update CurrMOps after calling bumpCycle to handle stalls, since bumpCycle
// resets CurrMOps. Loop to handle instructions with more MOps than issue in
@@ -2501,7 +2527,7 @@ void GenericSchedulerBase::setPolicy(CandPolicy &Policy, bool IsPostRA,
RemLatency = computeRemLatency(CurrZone);
RemLatencyComputed = true;
OtherResLimited = checkResourceLimit(SchedModel->getLatencyFactor(),
- OtherCount, RemLatency);
+ OtherCount, RemLatency, false);
}
// Schedule aggressively for latency in PostRA mode. We don't check for
@@ -2741,8 +2767,10 @@ void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin,
MF.getSubtarget().overrideSchedPolicy(RegionPolicy, NumRegionInstrs);
// After subtarget overrides, apply command line options.
- if (!EnableRegPressure)
+ if (!EnableRegPressure) {
RegionPolicy.ShouldTrackPressure = false;
+ RegionPolicy.ShouldTrackLaneMasks = false;
+ }
// Check -misched-topdown/bottomup can force or unforce scheduling direction.
// e.g. -misched-bottomup=false allows scheduling in both directions.
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index cdc597db6401..41db2c88ce50 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -1,9 +1,8 @@
//===- MachineSink.cpp - Sinking for machine instructions -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -585,9 +584,8 @@ MachineSinking::GetAllSortedSuccessors(MachineInstr &MI, MachineBasicBlock *MBB,
AllSuccs.push_back(DTChild->getBlock());
// Sort Successors according to their loop depth or block frequency info.
- std::stable_sort(
- AllSuccs.begin(), AllSuccs.end(),
- [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+ llvm::stable_sort(
+ AllSuccs, [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
@@ -716,7 +714,7 @@ static bool SinkingPreventsImplicitNullCheck(MachineInstr &MI,
!PredBB->getTerminator()->getMetadata(LLVMContext::MD_make_implicit))
return false;
- MachineOperand *BaseOp;
+ const MachineOperand *BaseOp;
int64_t Offset;
if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
return false;
@@ -1203,6 +1201,9 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
}
bool PostRAMachineSinking::runOnMachineFunction(MachineFunction &MF) {
+ if (skipFunction(MF.getFunction()))
+ return false;
+
bool Changed = false;
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index e62ed3094651..f9505df4e7f4 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -1,9 +1,8 @@
//===- lib/CodeGen/MachineTraceMetrics.cpp --------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 534d3699db29..0ad792ac62cf 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1,9 +1,8 @@
//===- MachineVerifier.cpp - Machine Code Verifier ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -219,7 +218,7 @@ namespace {
bool isAllocatable(unsigned Reg) const {
return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
- !regsReserved.test(Reg);
+ !regsReserved.test(Reg);
}
// Analysis information if available
@@ -231,6 +230,9 @@ namespace {
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
void visitMachineBundleBefore(const MachineInstr *MI);
+
+ bool verifyVectorElementMatch(LLT Ty0, LLT Ty1, const MachineInstr *MI);
+ void verifyPreISelGenericInstruction(const MachineInstr *MI);
void visitMachineInstrBefore(const MachineInstr *MI);
void visitMachineOperand(const MachineOperand *MO, unsigned MONum);
void visitMachineInstrAfter(const MachineInstr *MI);
@@ -838,7 +840,7 @@ void MachineVerifier::visitMachineBundleBefore(const MachineInstr *MI) {
if (MI->isTerminator() && !TII->isPredicated(*MI)) {
if (!FirstTerminator)
FirstTerminator = MI;
- } else if (FirstTerminator) {
+ } else if (FirstTerminator && !MI->isDebugEntryValue()) {
report("Non-terminator instruction after the first terminator", MI);
errs() << "First terminator was:\t" << *FirstTerminator;
}
@@ -889,109 +891,150 @@ void MachineVerifier::verifyInlineAsm(const MachineInstr *MI) {
}
}
-void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
- const MCInstrDesc &MCID = MI->getDesc();
- if (MI->getNumOperands() < MCID.getNumOperands()) {
- report("Too few operands", MI);
- errs() << MCID.getNumOperands() << " operands expected, but "
- << MI->getNumOperands() << " given.\n";
+/// Check that types are consistent when two operands need to have the same
+/// number of vector elements.
+/// \return true if the types are valid.
+bool MachineVerifier::verifyVectorElementMatch(LLT Ty0, LLT Ty1,
+ const MachineInstr *MI) {
+ if (Ty0.isVector() != Ty1.isVector()) {
+ report("operand types must be all-vector or all-scalar", MI);
+ // Generally we try to report as many issues as possible at once, but in
+ // this case it's not clear what should we be comparing the size of the
+ // scalar with: the size of the whole vector or its lane. Instead of
+ // making an arbitrary choice and emitting not so helpful message, let's
+ // avoid the extra noise and stop here.
+ return false;
}
- if (MI->isPHI()) {
- if (MF->getProperties().hasProperty(
- MachineFunctionProperties::Property::NoPHIs))
- report("Found PHI instruction with NoPHIs property set", MI);
+ if (Ty0.isVector() && Ty0.getNumElements() != Ty1.getNumElements()) {
+ report("operand types must preserve number of vector elements", MI);
+ return false;
+ }
- if (FirstNonPHI)
- report("Found PHI instruction after non-PHI", MI);
- } else if (FirstNonPHI == nullptr)
- FirstNonPHI = MI;
+ return true;
+}
- // Check the tied operands.
- if (MI->isInlineAsm())
- verifyInlineAsm(MI);
+void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
+ if (isFunctionSelected)
+ report("Unexpected generic instruction in a Selected function", MI);
- // Check the MachineMemOperands for basic consistency.
- for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
- E = MI->memoperands_end();
+ const MCInstrDesc &MCID = MI->getDesc();
+ unsigned NumOps = MI->getNumOperands();
+
+ // Check types.
+ SmallVector<LLT, 4> Types;
+ for (unsigned I = 0, E = std::min(MCID.getNumOperands(), NumOps);
I != E; ++I) {
- if ((*I)->isLoad() && !MI->mayLoad())
- report("Missing mayLoad flag", MI);
- if ((*I)->isStore() && !MI->mayStore())
- report("Missing mayStore flag", MI);
- }
+ if (!MCID.OpInfo[I].isGenericType())
+ continue;
+ // Generic instructions specify type equality constraints between some of
+ // their operands. Make sure these are consistent.
+ size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex();
+ Types.resize(std::max(TypeIdx + 1, Types.size()));
+
+ const MachineOperand *MO = &MI->getOperand(I);
+ if (!MO->isReg()) {
+ report("generic instruction must use register operands", MI);
+ continue;
+ }
- // Debug values must not have a slot index.
- // Other instructions must have one, unless they are inside a bundle.
- if (LiveInts) {
- bool mapped = !LiveInts->isNotInMIMap(*MI);
- if (MI->isDebugInstr()) {
- if (mapped)
- report("Debug instruction has a slot index", MI);
- } else if (MI->isInsideBundle()) {
- if (mapped)
- report("Instruction inside bundle has a slot index", MI);
+ LLT OpTy = MRI->getType(MO->getReg());
+ // Don't report a type mismatch if there is no actual mismatch, only a
+ // type missing, to reduce noise:
+ if (OpTy.isValid()) {
+ // Only the first valid type for a type index will be printed: don't
+ // overwrite it later so it's always clear which type was expected:
+ if (!Types[TypeIdx].isValid())
+ Types[TypeIdx] = OpTy;
+ else if (Types[TypeIdx] != OpTy)
+ report("Type mismatch in generic instruction", MO, I, OpTy);
} else {
- if (!mapped)
- report("Missing slot index", MI);
+ // Generic instructions must have types attached to their operands.
+ report("Generic instruction is missing a virtual register type", MO, I);
}
}
- if (isPreISelGenericOpcode(MCID.getOpcode())) {
- if (isFunctionSelected)
- report("Unexpected generic instruction in a Selected function", MI);
-
- // Check types.
- SmallVector<LLT, 4> Types;
- for (unsigned I = 0; I < MCID.getNumOperands(); ++I) {
- if (!MCID.OpInfo[I].isGenericType())
- continue;
- // Generic instructions specify type equality constraints between some of
- // their operands. Make sure these are consistent.
- size_t TypeIdx = MCID.OpInfo[I].getGenericTypeIndex();
- Types.resize(std::max(TypeIdx + 1, Types.size()));
-
- const MachineOperand *MO = &MI->getOperand(I);
- LLT OpTy = MRI->getType(MO->getReg());
- // Don't report a type mismatch if there is no actual mismatch, only a
- // type missing, to reduce noise:
- if (OpTy.isValid()) {
- // Only the first valid type for a type index will be printed: don't
- // overwrite it later so it's always clear which type was expected:
- if (!Types[TypeIdx].isValid())
- Types[TypeIdx] = OpTy;
- else if (Types[TypeIdx] != OpTy)
- report("Type mismatch in generic instruction", MO, I, OpTy);
- } else {
- // Generic instructions must have types attached to their operands.
- report("Generic instruction is missing a virtual register type", MO, I);
- }
- }
-
- // Generic opcodes must not have physical register operands.
- for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
- const MachineOperand *MO = &MI->getOperand(I);
- if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()))
- report("Generic instruction cannot have physical register", MO, I);
- }
+ // Generic opcodes must not have physical register operands.
+ for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
+ const MachineOperand *MO = &MI->getOperand(I);
+ if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()))
+ report("Generic instruction cannot have physical register", MO, I);
}
+ // Avoid out of bounds in checks below. This was already reported earlier.
+ if (MI->getNumOperands() < MCID.getNumOperands())
+ return;
+
StringRef ErrorInfo;
if (!TII->verifyInstruction(*MI, ErrorInfo))
report(ErrorInfo.data(), MI);
// Verify properties of various specific instruction types
- switch(MI->getOpcode()) {
- default:
+ switch (MI->getOpcode()) {
+ case TargetOpcode::G_CONSTANT:
+ case TargetOpcode::G_FCONSTANT: {
+ if (MI->getNumOperands() < MCID.getNumOperands())
+ break;
+
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (DstTy.isVector())
+ report("Instruction cannot use a vector result type", MI);
+
+ if (MI->getOpcode() == TargetOpcode::G_CONSTANT) {
+ if (!MI->getOperand(1).isCImm()) {
+ report("G_CONSTANT operand must be cimm", MI);
+ break;
+ }
+
+ const ConstantInt *CI = MI->getOperand(1).getCImm();
+ if (CI->getBitWidth() != DstTy.getSizeInBits())
+ report("inconsistent constant size", MI);
+ } else {
+ if (!MI->getOperand(1).isFPImm()) {
+ report("G_FCONSTANT operand must be fpimm", MI);
+ break;
+ }
+ const ConstantFP *CF = MI->getOperand(1).getFPImm();
+
+ if (APFloat::getSizeInBits(CF->getValueAPF().getSemantics()) !=
+ DstTy.getSizeInBits()) {
+ report("inconsistent constant size", MI);
+ }
+ }
+
break;
+ }
case TargetOpcode::G_LOAD:
case TargetOpcode::G_STORE:
+ case TargetOpcode::G_ZEXTLOAD:
+ case TargetOpcode::G_SEXTLOAD: {
+ LLT ValTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!PtrTy.isPointer())
+ report("Generic memory instruction must access a pointer", MI);
+
// Generic loads and stores must have a single MachineMemOperand
// describing that access.
- if (!MI->hasOneMemOperand())
+ if (!MI->hasOneMemOperand()) {
report("Generic instruction accessing memory must have one mem operand",
MI);
+ } else {
+ const MachineMemOperand &MMO = **MI->memoperands_begin();
+ if (MI->getOpcode() == TargetOpcode::G_ZEXTLOAD ||
+ MI->getOpcode() == TargetOpcode::G_SEXTLOAD) {
+ if (MMO.getSizeInBits() >= ValTy.getSizeInBits())
+ report("Generic extload must have a narrower memory type", MI);
+ } else if (MI->getOpcode() == TargetOpcode::G_LOAD) {
+ if (MMO.getSize() > ValTy.getSizeInBytes())
+ report("load memory size cannot exceed result size", MI);
+ } else if (MI->getOpcode() == TargetOpcode::G_STORE) {
+ if (ValTy.getSizeInBytes() < MMO.getSize())
+ report("store memory size cannot exceed value size", MI);
+ }
+ }
+
break;
+ }
case TargetOpcode::G_PHI: {
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
if (!DstTy.isValid() ||
@@ -1009,6 +1052,70 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
MI);
break;
}
+ case TargetOpcode::G_BITCAST: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid())
+ break;
+
+ if (SrcTy.isPointer() != DstTy.isPointer())
+ report("bitcast cannot convert between pointers and other types", MI);
+
+ if (SrcTy.getSizeInBits() != DstTy.getSizeInBits())
+ report("bitcast sizes must match", MI);
+ break;
+ }
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_PTRTOINT:
+ case TargetOpcode::G_ADDRSPACE_CAST: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!DstTy.isValid() || !SrcTy.isValid())
+ break;
+
+ verifyVectorElementMatch(DstTy, SrcTy, MI);
+
+ DstTy = DstTy.getScalarType();
+ SrcTy = SrcTy.getScalarType();
+
+ if (MI->getOpcode() == TargetOpcode::G_INTTOPTR) {
+ if (!DstTy.isPointer())
+ report("inttoptr result type must be a pointer", MI);
+ if (SrcTy.isPointer())
+ report("inttoptr source type must not be a pointer", MI);
+ } else if (MI->getOpcode() == TargetOpcode::G_PTRTOINT) {
+ if (!SrcTy.isPointer())
+ report("ptrtoint source type must be a pointer", MI);
+ if (DstTy.isPointer())
+ report("ptrtoint result type must not be a pointer", MI);
+ } else {
+ assert(MI->getOpcode() == TargetOpcode::G_ADDRSPACE_CAST);
+ if (!SrcTy.isPointer() || !DstTy.isPointer())
+ report("addrspacecast types must be pointers", MI);
+ else {
+ if (SrcTy.getAddressSpace() == DstTy.getAddressSpace())
+ report("addrspacecast must convert different address spaces", MI);
+ }
+ }
+
+ break;
+ }
+ case TargetOpcode::G_GEP: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
+ LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg());
+ if (!DstTy.isValid() || !PtrTy.isValid() || !OffsetTy.isValid())
+ break;
+
+ if (!PtrTy.getScalarType().isPointer())
+ report("gep first operand must be a pointer", MI);
+
+ if (OffsetTy.getScalarType().isPointer())
+ report("gep offset operand must not be a pointer", MI);
+
+ // TODO: Is the offset allowed to be a scalar with a vector?
+ break;
+ }
case TargetOpcode::G_SEXT:
case TargetOpcode::G_ZEXT:
case TargetOpcode::G_ANYEXT:
@@ -1021,30 +1128,18 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// instructions aren't guaranteed to have the right number of operands or
// types attached to them at this point
assert(MCID.getNumOperands() == 2 && "Expected 2 operands G_*{EXT,TRUNC}");
- if (MI->getNumOperands() < MCID.getNumOperands())
- break;
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
if (!DstTy.isValid() || !SrcTy.isValid())
break;
- LLT DstElTy = DstTy.isVector() ? DstTy.getElementType() : DstTy;
- LLT SrcElTy = SrcTy.isVector() ? SrcTy.getElementType() : SrcTy;
+ LLT DstElTy = DstTy.getScalarType();
+ LLT SrcElTy = SrcTy.getScalarType();
if (DstElTy.isPointer() || SrcElTy.isPointer())
report("Generic extend/truncate can not operate on pointers", MI);
- if (DstTy.isVector() != SrcTy.isVector()) {
- report("Generic extend/truncate must be all-vector or all-scalar", MI);
- // Generally we try to report as many issues as possible at once, but in
- // this case it's not clear what should we be comparing the size of the
- // scalar with: the size of the whole vector or its lane. Instead of
- // making an arbitrary choice and emitting not so helpful message, let's
- // avoid the extra noise and stop here.
- break;
- }
- if (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements())
- report("Generic vector extend/truncate must preserve number of lanes",
- MI);
+ verifyVectorElementMatch(DstTy, SrcTy, MI);
+
unsigned DstSize = DstElTy.getSizeInBits();
unsigned SrcSize = SrcElTy.getSizeInBits();
switch (MI->getOpcode()) {
@@ -1061,6 +1156,17 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
}
break;
}
+ case TargetOpcode::G_SELECT: {
+ LLT SelTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT CondTy = MRI->getType(MI->getOperand(1).getReg());
+ if (!SelTy.isValid() || !CondTy.isValid())
+ break;
+
+ // Scalar condition select on a vector is valid.
+ if (CondTy.isVector())
+ verifyVectorElementMatch(SelTy, CondTy, MI);
+ break;
+ }
case TargetOpcode::G_MERGE_VALUES: {
// G_MERGE_VALUES should only be used to merge scalars into a larger scalar,
// e.g. s2N = MERGE sN, sN
@@ -1070,6 +1176,16 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
if (DstTy.isVector() || SrcTy.isVector())
report("G_MERGE_VALUES cannot operate on vectors", MI);
+
+ const unsigned NumOps = MI->getNumOperands();
+ if (DstTy.getSizeInBits() != SrcTy.getSizeInBits() * (NumOps - 1))
+ report("G_MERGE_VALUES result size is inconsistent", MI);
+
+ for (unsigned I = 2; I != NumOps; ++I) {
+ if (MRI->getType(MI->getOperand(I).getReg()) != SrcTy)
+ report("G_MERGE_VALUES source types do not match", MI);
+ }
+
break;
}
case TargetOpcode::G_UNMERGE_VALUES: {
@@ -1092,18 +1208,23 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
// must match the dest vector size.
LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
LLT SrcEltTy = MRI->getType(MI->getOperand(1).getReg());
- if (!DstTy.isVector() || SrcEltTy.isVector())
+ if (!DstTy.isVector() || SrcEltTy.isVector()) {
report("G_BUILD_VECTOR must produce a vector from scalar operands", MI);
+ break;
+ }
+
+ if (DstTy.getElementType() != SrcEltTy)
+ report("G_BUILD_VECTOR result element type must match source type", MI);
+
+ if (DstTy.getNumElements() != MI->getNumOperands() - 1)
+ report("G_BUILD_VECTOR must have an operand for each elemement", MI);
+
for (unsigned i = 2; i < MI->getNumOperands(); ++i) {
if (MRI->getType(MI->getOperand(1).getReg()) !=
MRI->getType(MI->getOperand(i).getReg()))
report("G_BUILD_VECTOR source operand types are not homogeneous", MI);
}
- if (DstTy.getSizeInBits() !=
- SrcEltTy.getSizeInBits() * (MI->getNumOperands() - 1))
- report("G_BUILD_VECTOR src operands total size don't match dest "
- "size.",
- MI);
+
break;
}
case TargetOpcode::G_BUILD_VECTOR_TRUNC: {
@@ -1144,6 +1265,176 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
report("G_CONCAT_VECTOR num dest and source elements should match", MI);
break;
}
+ case TargetOpcode::G_ICMP:
+ case TargetOpcode::G_FCMP: {
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(2).getReg());
+
+ if ((DstTy.isVector() != SrcTy.isVector()) ||
+ (DstTy.isVector() && DstTy.getNumElements() != SrcTy.getNumElements()))
+ report("Generic vector icmp/fcmp must preserve number of lanes", MI);
+
+ break;
+ }
+ case TargetOpcode::G_EXTRACT: {
+ const MachineOperand &SrcOp = MI->getOperand(1);
+ if (!SrcOp.isReg()) {
+ report("extract source must be a register", MI);
+ break;
+ }
+
+ const MachineOperand &OffsetOp = MI->getOperand(2);
+ if (!OffsetOp.isImm()) {
+ report("extract offset must be a constant", MI);
+ break;
+ }
+
+ unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+ if (SrcSize == DstSize)
+ report("extract source must be larger than result", MI);
+
+ if (DstSize + OffsetOp.getImm() > SrcSize)
+ report("extract reads past end of register", MI);
+ break;
+ }
+ case TargetOpcode::G_INSERT: {
+ const MachineOperand &SrcOp = MI->getOperand(2);
+ if (!SrcOp.isReg()) {
+ report("insert source must be a register", MI);
+ break;
+ }
+
+ const MachineOperand &OffsetOp = MI->getOperand(3);
+ if (!OffsetOp.isImm()) {
+ report("insert offset must be a constant", MI);
+ break;
+ }
+
+ unsigned DstSize = MRI->getType(MI->getOperand(0).getReg()).getSizeInBits();
+ unsigned SrcSize = MRI->getType(SrcOp.getReg()).getSizeInBits();
+
+ if (DstSize <= SrcSize)
+ report("inserted size must be smaller than total register", MI);
+
+ if (SrcSize + OffsetOp.getImm() > DstSize)
+ report("insert writes past end of register", MI);
+
+ break;
+ }
+ case TargetOpcode::G_JUMP_TABLE: {
+ if (!MI->getOperand(1).isJTI())
+ report("G_JUMP_TABLE source operand must be a jump table index", MI);
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ if (!DstTy.isPointer())
+ report("G_JUMP_TABLE dest operand must have a pointer type", MI);
+ break;
+ }
+ case TargetOpcode::G_BRJT: {
+ if (!MRI->getType(MI->getOperand(0).getReg()).isPointer())
+ report("G_BRJT src operand 0 must be a pointer type", MI);
+
+ if (!MI->getOperand(1).isJTI())
+ report("G_BRJT src operand 1 must be a jump table index", MI);
+
+ const auto &IdxOp = MI->getOperand(2);
+ if (!IdxOp.isReg() || MRI->getType(IdxOp.getReg()).isPointer())
+ report("G_BRJT src operand 2 must be a scalar reg type", MI);
+ break;
+ }
+ case TargetOpcode::G_INTRINSIC:
+ case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: {
+ // TODO: Should verify number of def and use operands, but the current
+ // interface requires passing in IR types for mangling.
+ const MachineOperand &IntrIDOp = MI->getOperand(MI->getNumExplicitDefs());
+ if (!IntrIDOp.isIntrinsicID()) {
+ report("G_INTRINSIC first src operand must be an intrinsic ID", MI);
+ break;
+ }
+
+ bool NoSideEffects = MI->getOpcode() == TargetOpcode::G_INTRINSIC;
+ unsigned IntrID = IntrIDOp.getIntrinsicID();
+ if (IntrID != 0 && IntrID < Intrinsic::num_intrinsics) {
+ AttributeList Attrs
+ = Intrinsic::getAttributes(MF->getFunction().getContext(),
+ static_cast<Intrinsic::ID>(IntrID));
+ bool DeclHasSideEffects = !Attrs.hasFnAttribute(Attribute::ReadNone);
+ if (NoSideEffects && DeclHasSideEffects) {
+ report("G_INTRINSIC used with intrinsic that accesses memory", MI);
+ break;
+ }
+ if (!NoSideEffects && !DeclHasSideEffects) {
+ report("G_INTRINSIC_W_SIDE_EFFECTS used with readnone intrinsic", MI);
+ break;
+ }
+ }
+
+ break;
+ }
+ default:
+ break;
+ }
+}
+
+void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
+ const MCInstrDesc &MCID = MI->getDesc();
+ if (MI->getNumOperands() < MCID.getNumOperands()) {
+ report("Too few operands", MI);
+ errs() << MCID.getNumOperands() << " operands expected, but "
+ << MI->getNumOperands() << " given.\n";
+ }
+
+ if (MI->isPHI()) {
+ if (MF->getProperties().hasProperty(
+ MachineFunctionProperties::Property::NoPHIs))
+ report("Found PHI instruction with NoPHIs property set", MI);
+
+ if (FirstNonPHI)
+ report("Found PHI instruction after non-PHI", MI);
+ } else if (FirstNonPHI == nullptr)
+ FirstNonPHI = MI;
+
+ // Check the tied operands.
+ if (MI->isInlineAsm())
+ verifyInlineAsm(MI);
+
+ // Check the MachineMemOperands for basic consistency.
+ for (MachineInstr::mmo_iterator I = MI->memoperands_begin(),
+ E = MI->memoperands_end();
+ I != E; ++I) {
+ if ((*I)->isLoad() && !MI->mayLoad())
+ report("Missing mayLoad flag", MI);
+ if ((*I)->isStore() && !MI->mayStore())
+ report("Missing mayStore flag", MI);
+ }
+
+ // Debug values must not have a slot index.
+ // Other instructions must have one, unless they are inside a bundle.
+ if (LiveInts) {
+ bool mapped = !LiveInts->isNotInMIMap(*MI);
+ if (MI->isDebugInstr()) {
+ if (mapped)
+ report("Debug instruction has a slot index", MI);
+ } else if (MI->isInsideBundle()) {
+ if (mapped)
+ report("Instruction inside bundle has a slot index", MI);
+ } else {
+ if (!mapped)
+ report("Missing slot index", MI);
+ }
+ }
+
+ if (isPreISelGenericOpcode(MCID.getOpcode())) {
+ verifyPreISelGenericInstruction(MI);
+ return;
+ }
+
+ StringRef ErrorInfo;
+ if (!TII->verifyInstruction(*MI, ErrorInfo))
+ report(ErrorInfo.data(), MI);
+
+ // Verify properties of various specific instruction types
+ switch (MI->getOpcode()) {
case TargetOpcode::COPY: {
if (foundErrors)
break;
@@ -1193,7 +1484,8 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
VerifyStackMapConstant(VarStart + StatepointOpers::NumDeoptOperandsOffset);
// TODO: verify we have properly encoded deopt arguments
- };
+ break;
+ }
}
void
@@ -1356,7 +1648,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
return;
}
if (SubIdx) {
- report("Generic virtual register does not subregister index", MO,
+ report("Generic virtual register does not allow subregister index", MO,
MONum);
return;
}
@@ -1911,6 +2203,10 @@ void MachineVerifier::visitMachineFunctionAfter() {
verifyLiveVariables();
if (LiveInts)
verifyLiveIntervals();
+
+ for (auto CSInfo : MF->getCallSitesInfo())
+ if (!CSInfo.first->isCall())
+ report("Call site info referencing instruction that is not call", MF);
}
void MachineVerifier::verifyLiveVariables() {
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 82b6d642c73b..2db1e86905a4 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -1,9 +1,8 @@
//===- MacroFusion.cpp - Macro Fusion -------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -37,7 +36,7 @@ static bool isHazard(const SDep &Dep) {
return Dep.getKind() == SDep::Anti || Dep.getKind() == SDep::Output;
}
-static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
+static bool fuseInstructionPair(ScheduleDAGInstrs &DAG, SUnit &FirstSU,
SUnit &SecondSU) {
// Check that neither instr is already paired with another along the edge
// between them.
@@ -49,7 +48,7 @@ static bool fuseInstructionPair(ScheduleDAGMI &DAG, SUnit &FirstSU,
if (SI.isCluster())
return false;
// Though the reachability checks above could be made more generic,
- // perhaps as part of ScheduleDAGMI::addEdge(), since such edges are valid,
+ // perhaps as part of ScheduleDAGInstrs::addEdge(), since such edges are valid,
// the extra computation cost makes it less interesting in general cases.
// Create a single weak edge between the adjacent instrs. The only effect is
@@ -118,7 +117,7 @@ namespace {
class MacroFusion : public ScheduleDAGMutation {
ShouldSchedulePredTy shouldScheduleAdjacent;
bool FuseBlock;
- bool scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU);
+ bool scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU);
public:
MacroFusion(ShouldSchedulePredTy shouldScheduleAdjacent, bool FuseBlock)
@@ -129,9 +128,7 @@ public:
} // end anonymous namespace
-void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
- ScheduleDAGMI *DAG = static_cast<ScheduleDAGMI*>(DAGInstrs);
-
+void MacroFusion::apply(ScheduleDAGInstrs *DAG) {
if (FuseBlock)
// For each of the SUnits in the scheduling block, try to fuse the instr in
// it with one in its predecessors.
@@ -145,7 +142,7 @@ void MacroFusion::apply(ScheduleDAGInstrs *DAGInstrs) {
/// Implement the fusion of instr pairs in the scheduling DAG,
/// anchored at the instr in AnchorSU..
-bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGMI &DAG, SUnit &AnchorSU) {
+bool MacroFusion::scheduleAdjacentImpl(ScheduleDAGInstrs &DAG, SUnit &AnchorSU) {
const MachineInstr &AnchorMI = *AnchorSU.getInstr();
const TargetInstrInfo &TII = *DAG.TII;
const TargetSubtargetInfo &ST = DAG.MF.getSubtarget();
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index 770f6c5b0403..c70b62252139 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -1,9 +1,8 @@
//===- OptimizePHIs.cpp - Optimize machine instruction PHIs ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -182,11 +181,12 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
continue;
- // for the case SingleValReg taken from copy instr
- MRI->clearKillFlags(SingleValReg);
-
MRI->replaceRegWith(OldReg, SingleValReg);
MI->eraseFromParent();
+
+ // The kill flags on OldReg and SingleValReg may no longer be correct.
+ MRI->clearKillFlags(SingleValReg);
+
++NumPHICycles;
Changed = true;
continue;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index b9801c6fd97b..948a5835438c 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -1,9 +1,8 @@
//===- PhiElimination.cpp - Eliminate PHI nodes by inserting copies -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
index 4e67ff2e5088..3a2cdaf3bd3c 100644
--- a/lib/CodeGen/PHIEliminationUtils.cpp
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -1,9 +1,8 @@
//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h
index b997d7ac5f4f..0ff3a41f47d3 100644
--- a/lib/CodeGen/PHIEliminationUtils.h
+++ b/lib/CodeGen/PHIEliminationUtils.h
@@ -1,9 +1,8 @@
//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ParallelCG.cpp b/lib/CodeGen/ParallelCG.cpp
index bc3f2a6e9b5a..e4c73658cb4f 100644
--- a/lib/CodeGen/ParallelCG.cpp
+++ b/lib/CodeGen/ParallelCG.cpp
@@ -1,9 +1,8 @@
//===-- ParallelCG.cpp ----------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index afb4b0a7e174..a3fa1b0ad8ed 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -1,9 +1,8 @@
//===-- PatchableFunction.cpp - Patchable prologues for LLVM -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 1d058ccfb633..b918396aa8c5 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1,9 +1,8 @@
//===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1307,7 +1306,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
/// Check whether MI is a candidate for folding into a later instruction.
/// We only fold loads to virtual registers and the virtual register defined
-/// has a single use.
+/// has a single user.
bool PeepholeOptimizer::isLoadFoldable(
MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
if (!MI.canFoldAsLoad() || !MI.mayLoad())
@@ -1317,12 +1316,12 @@ bool PeepholeOptimizer::isLoadFoldable(
return false;
unsigned Reg = MI.getOperand(0).getReg();
- // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
+ // To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
// loads. It should be checked when processing uses of the load, since
// uses can be removed during peephole.
if (!MI.getOperand(0).getSubReg() &&
TargetRegisterInfo::isVirtualRegister(Reg) &&
- MRI->hasOneNonDBGUse(Reg)) {
+ MRI->hasOneNonDBGUser(Reg)) {
FoldAsLoadDefCandidates.insert(Reg);
return true;
}
@@ -1778,6 +1777,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.erase(MI);
LocalMIs.erase(DefMI);
LocalMIs.insert(FoldMI);
+ if (MI->isCall())
+ MI->getMF()->updateCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
DefMI->eraseFromParent();
MRI->markUsesInDebugValueAsUndef(FoldedReg);
@@ -1826,7 +1827,7 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
assert(Def->isBitcast() && "Invalid definition");
// Bail if there are effects that a plain copy will not expose.
- if (Def->hasUnmodeledSideEffects())
+ if (Def->mayRaiseFPException() || Def->hasUnmodeledSideEffects())
return ValueTrackerResult();
// Bitcasts with more than one def are not supported.
@@ -1901,13 +1902,8 @@ ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
// Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
// Check if one of the operand defines the subreg we are interested in.
for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
- if (RegSeqInput.SubIdx == DefSubReg) {
- if (RegSeqInput.SubReg)
- // Bail if we have to compose sub registers.
- return ValueTrackerResult();
-
+ if (RegSeqInput.SubIdx == DefSubReg)
return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
- }
}
// If the subreg we are tracking is super-defined by another subreg,
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp
index f9d4a9746e41..0a3838617bc5 100644
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/lib/CodeGen/PostRAHazardRecognizer.cpp
@@ -1,9 +1,8 @@
//===----- PostRAHazardRecognizer.cpp - hazard recognizer -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index dd0a5fe1b39d..5bea9f2893c9 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -1,9 +1,8 @@
//===----- SchedulePostRAList.cpp - list scheduler ------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp
index b0e9ac03612d..2752e186875c 100644
--- a/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -1,9 +1,8 @@
//===- PreISelIntrinsicLowering.cpp - Pre-ISel intrinsic lowering pass ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -45,7 +44,7 @@ static bool lowerLoadRelative(Function &F) {
Value *OffsetPtr =
B.CreateGEP(Int8Ty, CI->getArgOperand(0), CI->getArgOperand(1));
Value *OffsetPtrI32 = B.CreateBitCast(OffsetPtr, Int32PtrTy);
- Value *OffsetI32 = B.CreateAlignedLoad(OffsetPtrI32, 4);
+ Value *OffsetI32 = B.CreateAlignedLoad(Int32Ty, OffsetPtrI32, 4);
Value *ResultPtr = B.CreateGEP(Int8Ty, CI->getArgOperand(0), OffsetI32);
@@ -65,9 +64,9 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
// If we haven't already looked up this function, check to see if the
// program already contains a function with this name.
Module *M = F.getParent();
- Constant* FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
+ FunctionCallee FCache = M->getOrInsertFunction(NewFn, F.getFunctionType());
- if (Function* Fn = dyn_cast<Function>(FCache)) {
+ if (Function *Fn = dyn_cast<Function>(FCache.getCallee())) {
Fn->setLinkage(F.getLinkage());
if (setNonLazyBind && !Fn->isWeakForLinker()) {
// If we have Native ARC, set nonlazybind attribute for these APIs for
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 7e9b4af12ee9..b38987ad1c90 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -1,9 +1,8 @@
//===---------------------- ProcessImplicitDefs.cpp -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 23754e487a18..d463bee67595 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -1,9 +1,8 @@
//===- PrologEpilogInserter.cpp - Insert Prolog/Epilog code in function ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -32,6 +31,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
@@ -169,6 +169,46 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
/// StackObjSet - A set of stack object indexes
using StackObjSet = SmallSetVector<int, 8>;
+using SavedDbgValuesMap =
+ SmallDenseMap<MachineBasicBlock *, SmallVector<MachineInstr *, 4>, 4>;
+
+/// Stash DBG_VALUEs that describe parameters and which are placed at the start
+/// of the block. Later on, after the prologue code has been emitted, the
+/// stashed DBG_VALUEs will be reinserted at the start of the block.
+static void stashEntryDbgValues(MachineBasicBlock &MBB,
+ SavedDbgValuesMap &EntryDbgValues) {
+ SmallVector<const MachineInstr *, 4> FrameIndexValues;
+
+ for (auto &MI : MBB) {
+ if (!MI.isDebugInstr())
+ break;
+ if (!MI.isDebugValue() || !MI.getDebugVariable()->isParameter())
+ continue;
+ if (MI.getOperand(0).isFI()) {
+ // We can only emit valid locations for frame indices after the frame
+ // setup, so do not stash away them.
+ FrameIndexValues.push_back(&MI);
+ continue;
+ }
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *Expr = MI.getDebugExpression();
+ auto Overlaps = [Var, Expr](const MachineInstr *DV) {
+ return Var == DV->getDebugVariable() &&
+ Expr->fragmentsOverlap(DV->getDebugExpression());
+ };
+ // See if the debug value overlaps with any preceding debug value that will
+ // not be stashed. If that is the case, then we can't stash this value, as
+ // we would then reorder the values at reinsertion.
+ if (llvm::none_of(FrameIndexValues, Overlaps))
+ EntryDbgValues[&MBB].push_back(&MI);
+ }
+
+ // Remove stashed debug values from the block.
+ if (EntryDbgValues.count(&MBB))
+ for (auto *MI : EntryDbgValues[&MBB])
+ MI->removeFromParent();
+}
+
/// runOnMachineFunction - Insert prolog/epilog code and replace abstract
/// frame indexes with appropriate references.
bool PEI::runOnMachineFunction(MachineFunction &MF) {
@@ -179,8 +219,6 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
RS = TRI->requiresRegisterScavenging(MF) ? new RegScavenger() : nullptr;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(MF);
- FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
- TRI->requiresFrameIndexReplacementScavenging(MF);
ORE = &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE();
// Calculate the MaxCallFrameSize and AdjustsStack variables for the
@@ -192,6 +230,11 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
// place all spills in the entry block, all restores in return blocks.
calculateSaveRestoreBlocks(MF);
+ // Stash away DBG_VALUEs that should not be moved by insertion of prolog code.
+ SavedDbgValuesMap EntryDbgValues;
+ for (MachineBasicBlock *SaveBlock : SaveBlocks)
+ stashEntryDbgValues(*SaveBlock, EntryDbgValues);
+
// Handle CSR spilling and restoring, for targets that need it.
if (MF.getTarget().usesPhysRegsForPEI())
spillCalleeSavedRegs(MF);
@@ -211,6 +254,10 @@ bool PEI::runOnMachineFunction(MachineFunction &MF) {
if (!F.hasFnAttribute(Attribute::Naked))
insertPrologEpilogCode(MF);
+ // Reinsert stashed debug values at the start of the entry blocks.
+ for (auto &I : EntryDbgValues)
+ I.first->insert(I.first->begin(), I.second.begin(), I.second.end());
+
// Replace all MO_FrameIndex operands with physical register references
// and actual offsets.
//
@@ -495,9 +542,16 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
for (const CalleeSavedInfo &CS : CSI) {
// Insert the spill to the stack frame.
unsigned Reg = CS.getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
- TRI);
+
+ if (CS.isSpilledToReg()) {
+ BuildMI(SaveBlock, I, DebugLoc(),
+ TII.get(TargetOpcode::COPY), CS.getDstReg())
+ .addReg(Reg, getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
+ TRI);
+ }
}
}
}
@@ -517,12 +571,17 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
for (const CalleeSavedInfo &CI : reverse(CSI)) {
unsigned Reg = CI.getReg();
- const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
- TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
- assert(I != RestoreBlock.begin() &&
- "loadRegFromStackSlot didn't insert any code!");
- // Insert in reverse order. loadRegFromStackSlot can insert
- // multiple instructions.
+ if (CI.isSpilledToReg()) {
+ BuildMI(RestoreBlock, I, DebugLoc(), TII.get(TargetOpcode::COPY), Reg)
+ .addReg(CI.getDstReg(), getKillRegState(true));
+ } else {
+ const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+ TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
+ assert(I != RestoreBlock.begin() &&
+ "loadRegFromStackSlot didn't insert any code!");
+ // Insert in reverse order. loadRegFromStackSlot can insert
+ // multiple instructions.
+ }
}
}
}
@@ -615,10 +674,13 @@ computeFreeStackSlots(MachineFrameInfo &MFI, bool StackGrowsDown,
SmallVector<int, 16> AllocatedFrameSlots;
// Add fixed objects.
for (int i = MFI.getObjectIndexBegin(); i != 0; ++i)
- AllocatedFrameSlots.push_back(i);
+ // StackSlot scavenging is only implemented for the default stack.
+ if (MFI.getStackID(i) == TargetStackID::Default)
+ AllocatedFrameSlots.push_back(i);
// Add callee-save objects.
for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
- AllocatedFrameSlots.push_back(i);
+ if (MFI.getStackID(i) == TargetStackID::Default)
+ AllocatedFrameSlots.push_back(i);
for (int i : AllocatedFrameSlots) {
// These are converted from int64_t, but they should always fit in int
@@ -740,11 +802,23 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// Skew to be applied to alignment.
unsigned Skew = TFI.getStackAlignmentSkew(MF);
+#ifdef EXPENSIVE_CHECKS
+ for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i)
+ if (!MFI.isDeadObjectIndex(i) &&
+ MFI.getStackID(i) == TargetStackID::Default)
+ assert(MFI.getObjectAlignment(i) <= MFI.getMaxAlignment() &&
+ "MaxAlignment is invalid");
+#endif
+
// If there are fixed sized objects that are preallocated in the local area,
// non-fixed objects can't be allocated right at the start of local area.
// Adjust 'Offset' to point to the end of last fixed sized preallocated
// object.
for (int i = MFI.getObjectIndexBegin(); i != 0; ++i) {
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
+ continue;
+
int64_t FixedOff;
if (StackGrowsDown) {
// The maximum distance from the stack pointer is at lower address of
@@ -763,6 +837,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// callee saved registers.
if (StackGrowsDown) {
for (unsigned i = MinCSFrameIndex; i <= MaxCSFrameIndex; ++i) {
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
+ continue;
+
// If the stack grows down, we need to add the size to find the lowest
// address of the object.
Offset += MFI.getObjectSize(i);
@@ -777,6 +855,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
} else if (MaxCSFrameIndex >= MinCSFrameIndex) {
// Be careful about underflow in comparisons agains MinCSFrameIndex.
for (unsigned i = MaxCSFrameIndex; i != MinCSFrameIndex - 1; --i) {
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
+ continue;
+
if (MFI.isDeadObjectIndex(i))
continue;
@@ -845,18 +927,26 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// Make sure that the stack protector comes before the local variables on the
// stack.
SmallSet<int, 16> ProtectedObjs;
- if (MFI.getStackProtectorIndex() >= 0) {
+ if (MFI.hasStackProtectorIndex()) {
+ int StackProtectorFI = MFI.getStackProtectorIndex();
StackObjSet LargeArrayObjs;
StackObjSet SmallArrayObjs;
StackObjSet AddrOfObjs;
- AdjustStackOffset(MFI, MFI.getStackProtectorIndex(), StackGrowsDown,
- Offset, MaxAlign, Skew);
+ // If we need a stack protector, we need to make sure that
+ // LocalStackSlotPass didn't already allocate a slot for it.
+ // If we are told to use the LocalStackAllocationBlock, the stack protector
+ // is expected to be already pre-allocated.
+ if (!MFI.getUseLocalStackAllocationBlock())
+ AdjustStackOffset(MFI, StackProtectorFI, StackGrowsDown, Offset, MaxAlign,
+ Skew);
+ else if (!MFI.isObjectPreAllocated(MFI.getStackProtectorIndex()))
+ llvm_unreachable(
+ "Stack protector not pre-allocated by LocalStackSlotPass.");
// Assign large stack objects first.
for (unsigned i = 0, e = MFI.getObjectIndexEnd(); i != e; ++i) {
- if (MFI.isObjectPreAllocated(i) &&
- MFI.getUseLocalStackAllocationBlock())
+ if (MFI.isObjectPreAllocated(i) && MFI.getUseLocalStackAllocationBlock())
continue;
if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex)
continue;
@@ -864,8 +954,10 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
continue;
if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI.getStackProtectorIndex() == (int)i ||
- EHRegNodeFrameIndex == (int)i)
+ if (StackProtectorFI == (int)i || EHRegNodeFrameIndex == (int)i)
+ continue;
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
continue;
switch (MFI.getObjectSSPLayout(i)) {
@@ -884,6 +976,15 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
llvm_unreachable("Unexpected SSPLayoutKind.");
}
+ // We expect **all** the protected stack objects to be pre-allocated by
+ // LocalStackSlotPass. If it turns out that PEI still has to allocate some
+ // of them, we may end up messing up the expected order of the objects.
+ if (MFI.getUseLocalStackAllocationBlock() &&
+ !(LargeArrayObjs.empty() && SmallArrayObjs.empty() &&
+ AddrOfObjs.empty()))
+ llvm_unreachable("Found protected stack objects not pre-allocated by "
+ "LocalStackSlotPass.");
+
AssignProtectedObjSet(LargeArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
Offset, MaxAlign, Skew);
AssignProtectedObjSet(SmallArrayObjs, ProtectedObjs, MFI, StackGrowsDown,
@@ -905,11 +1006,13 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
continue;
if (MFI.isDeadObjectIndex(i))
continue;
- if (MFI.getStackProtectorIndex() == (int)i ||
- EHRegNodeFrameIndex == (int)i)
+ if (MFI.getStackProtectorIndex() == (int)i || EHRegNodeFrameIndex == (int)i)
continue;
if (ProtectedObjs.count(i))
continue;
+ if (MFI.getStackID(i) !=
+ TargetStackID::Default) // Only allocate objects on the default stack.
+ continue;
// Add the objects that we need to allocate to our working set.
ObjectsToAllocate.push_back(i);
@@ -1026,8 +1129,16 @@ void PEI::insertPrologEpilogCode(MachineFunction &MF) {
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
/// register references and actual offsets.
void PEI::replaceFrameIndices(MachineFunction &MF) {
- const TargetFrameLowering &TFI = *MF.getSubtarget().getFrameLowering();
- if (!TFI.needsFrameIndexResolution(MF)) return;
+ const auto &ST = MF.getSubtarget();
+ const TargetFrameLowering &TFI = *ST.getFrameLowering();
+ if (!TFI.needsFrameIndexResolution(MF))
+ return;
+
+ const TargetRegisterInfo *TRI = ST.getRegisterInfo();
+
+ // Allow the target to determine this after knowing the frame size.
+ FrameIndexEliminationScavenging = (RS && !FrameIndexVirtualScavenging) ||
+ TRI->requiresFrameIndexReplacementScavenging(MF);
// Store SPAdj at exit of a basic block.
SmallVector<int, 8> SPState;
@@ -1095,12 +1206,37 @@ void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &MF,
assert(i == 0 && "Frame indices can only appear as the first "
"operand of a DBG_VALUE machine instruction");
unsigned Reg;
+ unsigned FrameIdx = MI.getOperand(0).getIndex();
+ unsigned Size = MF.getFrameInfo().getObjectSize(FrameIdx);
+
int64_t Offset =
- TFI->getFrameIndexReference(MF, MI.getOperand(0).getIndex(), Reg);
+ TFI->getFrameIndexReference(MF, FrameIdx, Reg);
MI.getOperand(0).ChangeToRegister(Reg, false /*isDef*/);
MI.getOperand(0).setIsDebug();
- auto *DIExpr = DIExpression::prepend(MI.getDebugExpression(),
- DIExpression::NoDeref, Offset);
+
+ const DIExpression *DIExpr = MI.getDebugExpression();
+
+ // If we have a direct DBG_VALUE, and its location expression isn't
+ // currently complex, then adding an offset will morph it into a
+ // complex location that is interpreted as being a memory address.
+ // This changes a pointer-valued variable to dereference that pointer,
+ // which is incorrect. Fix by adding DW_OP_stack_value.
+ unsigned PrependFlags = DIExpression::ApplyOffset;
+ if (!MI.isIndirectDebugValue() && !DIExpr->isComplex())
+ PrependFlags |= DIExpression::StackValue;
+
+ // If we have DBG_VALUE that is indirect and has a Implicit location
+ // expression need to insert a deref before prepending a Memory
+ // location expression. Also after doing this we change the DBG_VALUE
+ // to be direct.
+ if (MI.isIndirectDebugValue() && DIExpr->isImplicit()) {
+ SmallVector<uint64_t, 2> Ops = {dwarf::DW_OP_deref_size, Size};
+ bool WithStackValue = true;
+ DIExpr = DIExpression::prependOpcodes(DIExpr, Ops, WithStackValue);
+ // Make the DBG_VALUE direct.
+ MI.getOperand(1).ChangeToRegister(0, false);
+ }
+ DIExpr = DIExpression::prepend(DIExpr, PrependFlags, Offset);
MI.getOperand(3).setMetadata(DIExpr);
continue;
}
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 6ca8d86e3f8e..da3ef4b771f3 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/PseudoSourceValue.cpp ----------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp
index a9f0a9387297..f05c97ad621e 100644
--- a/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -1,9 +1,8 @@
//===---- ReachingDefAnalysis.cpp - Reaching Def Analysis ---*- C++ -*-----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index bc28a054c680..1cbe75c27d13 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -1,9 +1,8 @@
//===- RegAllocBase.cpp - Register Allocator Base Class -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -20,6 +19,7 @@
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LiveRegMatrix.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/VirtRegMap.h"
@@ -119,16 +119,19 @@ void RegAllocBase::allocatePhysRegs() {
for (MachineRegisterInfo::reg_instr_iterator
I = MRI->reg_instr_begin(VirtReg->reg), E = MRI->reg_instr_end();
I != E; ) {
- MachineInstr *TmpMI = &*(I++);
- if (TmpMI->isInlineAsm()) {
- MI = TmpMI;
+ MI = &*(I++);
+ if (MI->isInlineAsm())
break;
- }
}
- if (MI)
+ if (MI && MI->isInlineAsm()) {
MI->emitError("inline assembly requires more registers than available");
- else
+ } else if (MI) {
+ LLVMContext &Context =
+ MI->getParent()->getParent()->getMMI().getModule()->getContext();
+ Context.emitError("ran out of registers during register allocation");
+ } else {
report_fatal_error("ran out of registers during register allocation");
+ }
// Keep going after reporting the error.
VRM->assignVirt2Phys(VirtReg->reg,
RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front());
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
index 686ffc36e049..6a7cc5ba4308 100644
--- a/lib/CodeGen/RegAllocBase.h
+++ b/lib/CodeGen/RegAllocBase.h
@@ -1,9 +1,8 @@
//===- RegAllocBase.h - basic regalloc interface and driver -----*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
index daeff3fc3963..46f6946f7003 100644
--- a/lib/CodeGen/RegAllocBasic.cpp
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -1,9 +1,8 @@
//===-- RegAllocBasic.cpp - Basic Register Allocator ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index eb3a4e481f5d..2ffa5e389f89 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -1,9 +1,8 @@
//===- RegAllocFast.cpp - A fast register allocator for debug code --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -102,6 +101,10 @@ namespace {
DenseMap<unsigned, SmallVector<MachineInstr *, 2>> LiveDbgValueMap;
+ /// Has a bit set for every virtual register for which it was determined
+ /// that it is alive across blocks.
+ BitVector MayLiveAcrossBlocks;
+
/// State of a physical register.
enum RegState {
/// A disabled register is not available for allocation, but an alias may
@@ -152,6 +155,7 @@ namespace {
enum : unsigned {
spillClean = 50,
spillDirty = 100,
+ spillPrefBonus = 20,
spillImpossible = ~0u
};
@@ -204,19 +208,26 @@ namespace {
}
void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
+ void allocVirtRegUndef(MachineOperand &MO);
MCPhysReg defineVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
unsigned Hint);
LiveReg &reloadVirtReg(MachineInstr &MI, unsigned OpNum, unsigned VirtReg,
unsigned Hint);
- void spillAll(MachineBasicBlock::iterator MI);
+ void spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut);
bool setPhysReg(MachineInstr &MI, MachineOperand &MO, MCPhysReg PhysReg);
+ unsigned traceCopies(unsigned VirtReg) const;
+ unsigned traceCopyChain(unsigned Reg) const;
+
int getStackSpaceFor(unsigned VirtReg);
void spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
MCPhysReg AssignedReg, bool Kill);
void reload(MachineBasicBlock::iterator Before, unsigned VirtReg,
MCPhysReg PhysReg);
+ bool mayLiveOut(unsigned VirtReg);
+ bool mayLiveIn(unsigned VirtReg);
+
void dumpState();
};
@@ -251,6 +262,53 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
return FrameIdx;
}
+/// Returns false if \p VirtReg is known to not live out of the current block.
+bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
+ if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) {
+ // Cannot be live-out if there are no successors.
+ return !MBB->succ_empty();
+ }
+
+ // If this block loops back to itself, it would be necessary to check whether
+ // the use comes after the def.
+ if (MBB->isSuccessor(MBB)) {
+ MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ return true;
+ }
+
+ // See if the first \p Limit uses of the register are all in the current
+ // block.
+ static const unsigned Limit = 8;
+ unsigned C = 0;
+ for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
+ if (UseInst.getParent() != MBB || ++C >= Limit) {
+ MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ // Cannot be live-out if there are no successors.
+ return !MBB->succ_empty();
+ }
+ }
+
+ return false;
+}
+
+/// Returns false if \p VirtReg is known to not be live into the current block.
+bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
+ if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg)))
+ return !MBB->pred_empty();
+
+ // See if the first \p Limit def of the register are all in the current block.
+ static const unsigned Limit = 8;
+ unsigned C = 0;
+ for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
+ if (DefInst.getParent() != MBB || ++C >= Limit) {
+ MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ return !MBB->pred_empty();
+ }
+ }
+
+ return false;
+}
+
/// Insert spill instruction for \p AssignedReg before \p Before. Update
/// DBG_VALUEs with \p VirtReg operands with the stack slot.
void RegAllocFast::spill(MachineBasicBlock::iterator Before, unsigned VirtReg,
@@ -374,7 +432,7 @@ void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI, LiveReg &LR) {
}
/// Spill all dirty virtregs without killing them.
-void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
+void RegAllocFast::spillAll(MachineBasicBlock::iterator MI, bool OnlyLiveOut) {
if (LiveVirtRegs.empty())
return;
// The LiveRegMap is keyed by an unsigned (the virtreg number), so the order
@@ -382,6 +440,8 @@ void RegAllocFast::spillAll(MachineBasicBlock::iterator MI) {
for (LiveReg &LR : LiveVirtRegs) {
if (!LR.PhysReg)
continue;
+ if (OnlyLiveOut && !mayLiveOut(LR.VirtReg))
+ continue;
spillVirtReg(MI, LR);
}
LiveVirtRegs.clear();
@@ -558,8 +618,48 @@ void RegAllocFast::assignVirtToPhysReg(LiveReg &LR, MCPhysReg PhysReg) {
setPhysRegState(PhysReg, VirtReg);
}
+static bool isCoalescable(const MachineInstr &MI) {
+ return MI.isFullCopy();
+}
+
+unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
+ static const unsigned ChainLengthLimit = 3;
+ unsigned C = 0;
+ do {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ return Reg;
+ assert(TargetRegisterInfo::isVirtualRegister(Reg));
+
+ MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg);
+ if (!VRegDef || !isCoalescable(*VRegDef))
+ return 0;
+ Reg = VRegDef->getOperand(1).getReg();
+ } while (++C <= ChainLengthLimit);
+ return 0;
+}
+
+/// Check if any of \p VirtReg's definitions is a copy. If it is follow the
+/// chain of copies to check whether we reach a physical register we can
+/// coalesce with.
+unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
+ static const unsigned DefLimit = 3;
+ unsigned C = 0;
+ for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
+ if (isCoalescable(MI)) {
+ unsigned Reg = MI.getOperand(1).getReg();
+ Reg = traceCopyChain(Reg);
+ if (Reg != 0)
+ return Reg;
+ }
+
+ if (++C >= DefLimit)
+ break;
+ }
+ return 0;
+}
+
/// Allocates a physical register for VirtReg.
-void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
+void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
const unsigned VirtReg = LR.VirtReg;
assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
@@ -567,32 +667,54 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
LLVM_DEBUG(dbgs() << "Search register for " << printReg(VirtReg)
- << " in class " << TRI->getRegClassName(&RC) << '\n');
+ << " in class " << TRI->getRegClassName(&RC)
+ << " with hint " << printReg(Hint0, TRI) << '\n');
// Take hint when possible.
- if (TargetRegisterInfo::isPhysicalRegister(Hint) &&
- MRI->isAllocatable(Hint) && RC.contains(Hint)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Hint0) &&
+ MRI->isAllocatable(Hint0) && RC.contains(Hint0)) {
// Ignore the hint if we would have to spill a dirty register.
- unsigned Cost = calcSpillCost(Hint);
+ unsigned Cost = calcSpillCost(Hint0);
if (Cost < spillDirty) {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
+ << '\n');
if (Cost)
- definePhysReg(MI, Hint, regFree);
- assignVirtToPhysReg(LR, Hint);
+ definePhysReg(MI, Hint0, regFree);
+ assignVirtToPhysReg(LR, Hint0);
return;
+ } else {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 1: " << printReg(Hint0, TRI)
+ << "occupied\n");
}
+ } else {
+ Hint0 = 0;
}
- // First try to find a completely free register.
- ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
- for (MCPhysReg PhysReg : AllocationOrder) {
- if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) {
- assignVirtToPhysReg(LR, PhysReg);
+ // Try other hint.
+ unsigned Hint1 = traceCopies(VirtReg);
+ if (TargetRegisterInfo::isPhysicalRegister(Hint1) &&
+ MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
+ !isRegUsedInInstr(Hint1)) {
+ // Ignore the hint if we would have to spill a dirty register.
+ unsigned Cost = calcSpillCost(Hint1);
+ if (Cost < spillDirty) {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
+ << '\n');
+ if (Cost)
+ definePhysReg(MI, Hint1, regFree);
+ assignVirtToPhysReg(LR, Hint1);
return;
+ } else {
+ LLVM_DEBUG(dbgs() << "\tPreferred Register 0: " << printReg(Hint1, TRI)
+ << "occupied\n");
}
+ } else {
+ Hint1 = 0;
}
MCPhysReg BestReg = 0;
unsigned BestCost = spillImpossible;
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
for (MCPhysReg PhysReg : AllocationOrder) {
LLVM_DEBUG(dbgs() << "\tRegister: " << printReg(PhysReg, TRI) << ' ');
unsigned Cost = calcSpillCost(PhysReg);
@@ -602,6 +724,10 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
assignVirtToPhysReg(LR, PhysReg);
return;
}
+
+ if (PhysReg == Hint1 || PhysReg == Hint0)
+ Cost -= spillPrefBonus;
+
if (Cost < BestCost) {
BestReg = PhysReg;
BestCost = Cost;
@@ -624,6 +750,31 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint) {
assignVirtToPhysReg(LR, BestReg);
}
+void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
+ assert(MO.isUndef() && "expected undef use");
+ unsigned VirtReg = MO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg");
+
+ LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
+ MCPhysReg PhysReg;
+ if (LRI != LiveVirtRegs.end() && LRI->PhysReg) {
+ PhysReg = LRI->PhysReg;
+ } else {
+ const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
+ ArrayRef<MCPhysReg> AllocationOrder = RegClassInfo.getOrder(&RC);
+ assert(!AllocationOrder.empty() && "Allocation order must not be empty");
+ PhysReg = AllocationOrder[0];
+ }
+
+ unsigned SubRegIdx = MO.getSubReg();
+ if (SubRegIdx != 0) {
+ PhysReg = TRI->getSubReg(PhysReg, SubRegIdx);
+ MO.setSubReg(0);
+ }
+ MO.setReg(PhysReg);
+ MO.setIsRenamable(true);
+}
+
/// Allocates a register for VirtReg and mark it as dirty.
MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint) {
@@ -941,12 +1092,23 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// Second scan.
// Allocate virtreg uses.
+ bool HasUndefUse = false;
for (unsigned I = 0; I != VirtOpEnd; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
+ if (MO.isUndef()) {
+ HasUndefUse = true;
+ // There is no need to allocate a register for an undef use.
+ continue;
+ }
+
+ // Populate MayLiveAcrossBlocks in case the use block is allocated before
+ // the def block (removing the vreg uses).
+ mayLiveIn(Reg);
+
LiveReg &LR = reloadVirtReg(MI, I, Reg, CopyDstReg);
MCPhysReg PhysReg = LR.PhysReg;
CopySrcReg = (CopySrcReg == Reg || CopySrcReg == PhysReg) ? PhysReg : 0;
@@ -955,6 +1117,22 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
}
}
+ // Allocate undef operands. This is a separate step because in a situation
+ // like ` = OP undef %X, %X` both operands need the same register assign
+ // so we should perform the normal assignment first.
+ if (HasUndefUse) {
+ for (MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ assert(MO.isUndef() && "Should only have undef virtreg uses left");
+ allocVirtRegUndef(MO);
+ }
+ }
+
// Track registers defined by instruction - early clobbers and tied uses at
// this point.
UsedInInstr.clear();
@@ -979,10 +1157,24 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
// definitions may be used later on and we do not want to reuse
// those for virtual registers in between.
LLVM_DEBUG(dbgs() << " Spilling remaining registers before call.\n");
- spillAll(MI);
+ spillAll(MI, /*OnlyLiveOut*/ false);
}
// Third scan.
+ // Mark all physreg defs as used before allocating virtreg defs.
+ for (unsigned I = 0; I != DefOpEnd; ++I) {
+ const MachineOperand &MO = MI.getOperand(I);
+ if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
+ continue;
+ unsigned Reg = MO.getReg();
+
+ if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ !MRI->isAllocatable(Reg))
+ continue;
+ definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+ }
+
+ // Fourth scan.
// Allocate defs and collect dead defs.
for (unsigned I = 0; I != DefOpEnd; ++I) {
const MachineOperand &MO = MI.getOperand(I);
@@ -990,11 +1182,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
continue;
unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (!MRI->isAllocatable(Reg)) continue;
- definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
+ // We have already dealt with phys regs in the previous scan.
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
continue;
- }
MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
VirtDead.push_back(Reg);
@@ -1089,7 +1279,7 @@ void RegAllocFast::allocateBasicBlock(MachineBasicBlock &MBB) {
// Spill all physical registers holding virtual registers now.
LLVM_DEBUG(dbgs() << "Spilling live registers at end of block.\n");
- spillAll(MBB.getFirstTerminator());
+ spillAll(MBB.getFirstTerminator(), /*OnlyLiveOut*/ true);
// Erase all the coalesced copies. We are delaying it until now because
// LiveVirtRegs might refer to the instrs.
@@ -1118,6 +1308,8 @@ bool RegAllocFast::runOnMachineFunction(MachineFunction &MF) {
unsigned NumVirtRegs = MRI->getNumVirtRegs();
StackSlotForVirtReg.resize(NumVirtRegs);
LiveVirtRegs.setUniverse(NumVirtRegs);
+ MayLiveAcrossBlocks.clear();
+ MayLiveAcrossBlocks.resize(NumVirtRegs);
// Loop over all of the basic blocks, eliminating virtual register references
for (MachineBasicBlock &MBB : MF)
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 81b21b442437..771fc46415db 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -1,9 +1,8 @@
//===- RegAllocGreedy.cpp - greedy register allocator ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -138,7 +137,7 @@ CSRFirstTimeCost("regalloc-csr-first-time-cost",
cl::init(0), cl::Hidden);
static cl::opt<bool> ConsiderLocalIntervalCost(
- "condsider-local-interval-cost", cl::Hidden,
+ "consider-local-interval-cost", cl::Hidden,
cl::desc("Consider the cost of local intervals created by a split "
"candidate when choosing the best split candidate."),
cl::init(false));
@@ -465,7 +464,8 @@ private:
void calcGapWeights(unsigned, SmallVectorImpl<float>&);
unsigned canReassign(LiveInterval &VirtReg, unsigned PrevReg);
bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool);
- bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&);
+ bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&,
+ const SmallVirtRegSet&);
bool canEvictInterferenceInRange(LiveInterval &VirtReg, unsigned PhysReg,
SlotIndex Start, SlotIndex End,
EvictionCost &MaxCost);
@@ -479,9 +479,11 @@ private:
const SmallVirtRegSet &FixedRegisters);
unsigned tryAssign(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&);
+ SmallVectorImpl<unsigned>&,
+ const SmallVirtRegSet&);
unsigned tryEvict(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&, unsigned = ~0u);
+ SmallVectorImpl<unsigned>&, unsigned,
+ const SmallVirtRegSet&);
unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
unsigned isSplitBenefitWorthCost(LiveInterval &VirtReg);
@@ -508,7 +510,8 @@ private:
unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
SmallVectorImpl<unsigned>&);
unsigned trySplit(LiveInterval&, AllocationOrder&,
- SmallVectorImpl<unsigned>&);
+ SmallVectorImpl<unsigned>&,
+ const SmallVirtRegSet&);
unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &,
SmallVectorImpl<unsigned> &,
SmallVirtRegSet &, unsigned);
@@ -758,7 +761,8 @@ LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) {
/// tryAssign - Try to assign VirtReg to an available register.
unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
AllocationOrder &Order,
- SmallVectorImpl<unsigned> &NewVRegs) {
+ SmallVectorImpl<unsigned> &NewVRegs,
+ const SmallVirtRegSet &FixedRegisters) {
Order.rewind();
unsigned PhysReg;
while ((PhysReg = Order.next()))
@@ -776,7 +780,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << "missed hint " << printReg(Hint, TRI) << '\n');
EvictionCost MaxCost;
MaxCost.setBrokenHints(1);
- if (canEvictInterference(VirtReg, Hint, true, MaxCost)) {
+ if (canEvictInterference(VirtReg, Hint, true, MaxCost, FixedRegisters)) {
evictInterference(VirtReg, Hint, NewVRegs);
return Hint;
}
@@ -794,7 +798,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg,
LLVM_DEBUG(dbgs() << printReg(PhysReg, TRI) << " is available at cost "
<< Cost << '\n');
- unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost);
+ unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost, FixedRegisters);
return CheapReg ? CheapReg : PhysReg;
}
@@ -866,7 +870,8 @@ bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint,
/// when returning true.
/// @returns True when interference can be evicted cheaper than MaxCost.
bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
- bool IsHint, EvictionCost &MaxCost) {
+ bool IsHint, EvictionCost &MaxCost,
+ const SmallVirtRegSet &FixedRegisters) {
// It is only possible to evict virtual register interference.
if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg)
return false;
@@ -896,6 +901,13 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
"Only expecting virtual register interference from query");
+
+ // Do not allow eviction of a virtual register if we are in the middle
+ // of last-chance recoloring and this virtual register is one that we
+ // have scavenged a physical register for.
+ if (FixedRegisters.count(Intf->reg))
+ return false;
+
// Never evict spill products. They cannot split or spill.
if (getStage(*Intf) == RS_Done)
return false;
@@ -1094,7 +1106,8 @@ bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const {
unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
AllocationOrder &Order,
SmallVectorImpl<unsigned> &NewVRegs,
- unsigned CostPerUseLimit) {
+ unsigned CostPerUseLimit,
+ const SmallVirtRegSet &FixedRegisters) {
NamedRegionTimer T("evict", "Evict", TimerGroupName, TimerGroupDescription,
TimePassesIsEnabled);
@@ -1142,7 +1155,8 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg,
continue;
}
- if (!canEvictInterference(VirtReg, PhysReg, false, BestCost))
+ if (!canEvictInterference(VirtReg, PhysReg, false, BestCost,
+ FixedRegisters))
continue;
// Best so far.
@@ -2248,8 +2262,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber());
LLVM_DEBUG(dbgs() << RMS.size() << " regmasks in block:");
// Constrain to VirtReg's live range.
- unsigned ri = std::lower_bound(RMS.begin(), RMS.end(),
- Uses.front().getRegSlot()) - RMS.begin();
+ unsigned ri =
+ llvm::lower_bound(RMS, Uses.front().getRegSlot()) - RMS.begin();
unsigned re = RMS.size();
for (unsigned i = 0; i != NumGaps && ri != re; ++i) {
// Look for Uses[i] <= RMS <= Uses[i+1].
@@ -2444,7 +2458,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
/// assignable.
/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
- SmallVectorImpl<unsigned>&NewVRegs) {
+ SmallVectorImpl<unsigned>&NewVRegs,
+ const SmallVirtRegSet &FixedRegisters) {
// Ranges must be Split2 or less.
if (getStage(VirtReg) >= RS_Spill)
return 0;
@@ -2472,7 +2487,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
if (SA->didRepairRange()) {
// VirtReg has changed, so all cached queries are invalid.
Matrix->invalidateVirtRegs();
- if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs))
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters))
return PhysReg;
}
@@ -2611,6 +2626,7 @@ unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg,
DenseMap<unsigned, unsigned> VirtRegToPhysReg;
// Mark VirtReg as fixed, i.e., it will not be recolored pass this point in
// this recoloring "session".
+ assert(!FixedRegisters.count(VirtReg.reg));
FixedRegisters.insert(VirtReg.reg);
SmallVector<unsigned, 4> CurrentNewVRegs;
@@ -2858,14 +2874,14 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
if (!Instr.isFullCopy())
continue;
// Look for the other end of the copy.
- unsigned OtherReg = Instr.getOperand(0).getReg();
+ Register OtherReg = Instr.getOperand(0).getReg();
if (OtherReg == Reg) {
OtherReg = Instr.getOperand(1).getReg();
if (OtherReg == Reg)
continue;
}
// Get the current assignment.
- unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
+ Register OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
? OtherReg
: VRM->getPhys(OtherReg);
// Push the collected information.
@@ -3022,7 +3038,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
unsigned CostPerUseLimit = ~0u;
// First try assigning a free register.
AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix);
- if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) {
+ if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs, FixedRegisters)) {
// If VirtReg got an assignment, the eviction info is no longre relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg);
// When NewVRegs is not empty, we may have made decisions such as evicting
@@ -3049,7 +3065,8 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
// get a second chance until they have been split.
if (Stage != RS_Split)
if (unsigned PhysReg =
- tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) {
+ tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit,
+ FixedRegisters)) {
unsigned Hint = MRI->getSimpleHint(VirtReg.reg);
// If VirtReg has a hint and that hint is broken record this
// virtual register as a recoloring candidate for broken hint.
@@ -3079,7 +3096,7 @@ unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg,
if (Stage < RS_Spill) {
// Try splitting VirtReg or interferences.
unsigned NewVRegSizeBefore = NewVRegs.size();
- unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs, FixedRegisters);
if (PhysReg || (NewVRegs.size() - NewVRegSizeBefore)) {
// If VirtReg got split, the eviction info is no longre relevant.
LastEvicted.clearEvicteeInfo(VirtReg.reg);
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index c19001c8403d..7a5a6c148ed4 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -1,9 +1,8 @@
//===- RegAllocPBQP.cpp ---- PBQP Register Allocator ----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index 66c7c5cd7dbf..b37dfada7101 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -1,9 +1,8 @@
//===-- RegUsageInfoCollector.cpp - Register Usage Information Collector --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
@@ -78,14 +77,48 @@ FunctionPass *llvm::createRegUsageInfoCollector() {
return new RegUsageInfoCollector();
}
+// TODO: Move to hook somwehere?
+
+// Return true if it is useful to track the used registers for IPRA / no CSR
+// optimizations. This is not useful for entry points, and computing the
+// register usage information is expensive.
+static bool isCallableFunction(const MachineFunction &MF) {
+ switch (MF.getFunction().getCallingConv()) {
+ case CallingConv::AMDGPU_VS:
+ case CallingConv::AMDGPU_GS:
+ case CallingConv::AMDGPU_PS:
+ case CallingConv::AMDGPU_CS:
+ case CallingConv::AMDGPU_HS:
+ case CallingConv::AMDGPU_ES:
+ case CallingConv::AMDGPU_LS:
+ case CallingConv::AMDGPU_KERNEL:
+ return false;
+ default:
+ return true;
+ }
+}
+
bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo *MRI = &MF.getRegInfo();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
const LLVMTargetMachine &TM = MF.getTarget();
LLVM_DEBUG(dbgs() << " -------------------- " << getPassName()
- << " -------------------- \n");
- LLVM_DEBUG(dbgs() << "Function Name : " << MF.getName() << "\n");
+ << " -------------------- \nFunction Name : "
+ << MF.getName() << '\n');
+
+ // Analyzing the register usage may be expensive on some targets.
+ if (!isCallableFunction(MF)) {
+ LLVM_DEBUG(dbgs() << "Not analyzing non-callable function\n");
+ return false;
+ }
+
+ // If there are no callers, there's no point in computing more precise
+ // register usage here.
+ if (MF.getFunction().use_empty()) {
+ LLVM_DEBUG(dbgs() << "Not analyzing function with no callers\n");
+ return false;
+ }
std::vector<uint32_t> RegMask;
@@ -111,6 +144,7 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
};
// Scan all the physical registers. When a register is defined in the current
// function set it and all the aliasing registers as defined in the regmask.
+ // FIXME: Rewrite to use regunits.
for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
// Don't count registers that are saved and restored.
if (SavedRegs.test(PReg))
@@ -136,11 +170,14 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
<< " function optimized for not having CSR.\n");
}
- for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg)
- if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
- LLVM_DEBUG(dbgs() << printReg(PReg, TRI) << " ");
+ LLVM_DEBUG(
+ for (unsigned PReg = 1, PRegE = TRI->getNumRegs(); PReg < PRegE; ++PReg) {
+ if (MachineOperand::clobbersPhysReg(&(RegMask[0]), PReg))
+ dbgs() << printReg(PReg, TRI) << " ";
+ }
- LLVM_DEBUG(dbgs() << " \n----------------------------------------\n");
+ dbgs() << " \n----------------------------------------\n";
+ );
PRUI.storeUpdateRegUsageInfo(F, RegMask);
@@ -155,38 +192,17 @@ computeCalleeSavedRegs(BitVector &SavedRegs, MachineFunction &MF) {
// Target will return the set of registers that it saves/restores as needed.
SavedRegs.clear();
TFI.determineCalleeSaves(MF, SavedRegs);
+ if (SavedRegs.none())
+ return;
// Insert subregs.
const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF);
for (unsigned i = 0; CSRegs[i]; ++i) {
- unsigned Reg = CSRegs[i];
- if (SavedRegs.test(Reg))
- for (MCSubRegIterator SR(Reg, &TRI, false); SR.isValid(); ++SR)
+ MCPhysReg Reg = CSRegs[i];
+ if (SavedRegs.test(Reg)) {
+ // Save subregisters
+ for (MCSubRegIterator SR(Reg, &TRI); SR.isValid(); ++SR)
SavedRegs.set(*SR);
- }
-
- // Insert any register fully saved via subregisters.
- for (const TargetRegisterClass *RC : TRI.regclasses()) {
- if (!RC->CoveredBySubRegs)
- continue;
-
- for (unsigned PReg = 1, PRegE = TRI.getNumRegs(); PReg < PRegE; ++PReg) {
- if (SavedRegs.test(PReg))
- continue;
-
- // Check if PReg is fully covered by its subregs.
- if (!RC->contains(PReg))
- continue;
-
- // Add PReg to SavedRegs if all subregs are saved.
- bool AllSubRegsSaved = true;
- for (MCSubRegIterator SR(PReg, &TRI, false); SR.isValid(); ++SR)
- if (!SavedRegs.test(*SR)) {
- AllSubRegsSaved = false;
- break;
- }
- if (AllSubRegsSaved)
- SavedRegs.set(PReg);
}
}
}
diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp
index 256de295821d..fc4be82d215e 100644
--- a/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -1,9 +1,8 @@
//=--- RegUsageInfoPropagate.cpp - Register Usage Informartion Propagation --=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp
index add8faec97d4..530e0cccf1d4 100644
--- a/lib/CodeGen/RegisterClassInfo.cpp
+++ b/lib/CodeGen/RegisterClassInfo.cpp
@@ -1,9 +1,8 @@
//===- RegisterClassInfo.cpp - Dynamic Register Class Info ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -91,6 +90,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) {
void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
assert(RC && "no register class given");
RCInfo &RCI = RegClass[RC->getID()];
+ auto &STI = MF->getSubtarget();
// Raw register count, including all reserved regs.
unsigned NumRegs = RC->getNumRegs();
@@ -115,7 +115,8 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const {
unsigned Cost = TRI->getCostPerUse(PhysReg);
MinCost = std::min(MinCost, Cost);
- if (CalleeSavedAliases[PhysReg])
+ if (CalleeSavedAliases[PhysReg] &&
+ !STI.ignoreCSRForAllocationOrder(*MF, PhysReg))
// PhysReg aliases a CSR, save it for later.
CSRAlias.push_back(PhysReg);
else {
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 2a06d5e95fbb..2db6ab454cea 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -1,9 +1,8 @@
//===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -105,6 +104,19 @@ static cl::opt<unsigned> LateRematUpdateThreshold(
"repeated work. "),
cl::init(100));
+static cl::opt<unsigned> LargeIntervalSizeThreshold(
+ "large-interval-size-threshold", cl::Hidden,
+ cl::desc("If the valnos size of an interval is larger than the threshold, "
+ "it is regarded as a large interval. "),
+ cl::init(100));
+
+static cl::opt<unsigned> LargeIntervalFreqThreshold(
+ "large-interval-freq-threshold", cl::Hidden,
+ cl::desc("For a large interval, if it is coalesed with other live "
+ "intervals many times more than the threshold, stop its "
+ "coalescing to control the compile time. "),
+ cl::init(100));
+
namespace {
class RegisterCoalescer : public MachineFunctionPass,
@@ -153,6 +165,10 @@ namespace {
/// lateLiveIntervalUpdate is called.
DenseSet<unsigned> ToBeUpdated;
+ /// Record how many times the large live interval with many valnos
+ /// has been tried to join with other live interval.
+ DenseMap<unsigned, unsigned long> LargeLIVisitCounter;
+
/// Recursively eliminate dead defs in DeadDefs.
void eliminateDeadDefs();
@@ -195,6 +211,11 @@ namespace {
/// Attempt joining two virtual registers. Return true on success.
bool joinVirtRegs(CoalescerPair &CP);
+ /// If a live interval has many valnos and is coalesced with other
+ /// live intervals many times, we regard such live interval as having
+ /// high compile time cost.
+ bool isHighCostLiveInterval(LiveInterval &LI);
+
/// Attempt joining with a reserved physreg.
bool joinReservedPhysReg(CoalescerPair &CP);
@@ -337,9 +358,10 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
"Simple Register Coalescing", false, false)
-static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
- unsigned &Src, unsigned &Dst,
- unsigned &SrcSub, unsigned &DstSub) {
+LLVM_NODISCARD static bool isMoveInstr(const TargetRegisterInfo &tri,
+ const MachineInstr *MI, unsigned &Src,
+ unsigned &Dst, unsigned &SrcSub,
+ unsigned &DstSub) {
if (MI->isCopy()) {
Dst = MI->getOperand(0).getReg();
DstSub = MI->getOperand(0).getSubReg();
@@ -672,8 +694,7 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
for (LiveRange::Segment &ASeg : IntA.segments) {
if (ASeg.valno != AValNo) continue;
- LiveInterval::iterator BI =
- std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
+ LiveInterval::iterator BI = llvm::upper_bound(IntB, ASeg.start);
if (BI != IntB.begin())
--BI;
for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
@@ -903,23 +924,32 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
}
SlotIndex AIdx = CopyIdx.getRegSlot(true);
LaneBitmask MaskA;
+ const SlotIndexes &Indexes = *LIS->getSlotIndexes();
for (LiveInterval::SubRange &SA : IntA.subranges()) {
VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
- assert(ASubValNo != nullptr);
+ // Even if we are dealing with a full copy, some lanes can
+ // still be undefined.
+ // E.g.,
+ // undef A.subLow = ...
+ // B = COPY A <== A.subHigh is undefined here and does
+ // not have a value number.
+ if (!ASubValNo)
+ continue;
MaskA |= SA.LaneMask;
- IntB.refineSubRanges(Allocator, SA.LaneMask,
- [&Allocator,&SA,CopyIdx,ASubValNo,&ShrinkB]
- (LiveInterval::SubRange &SR) {
- VNInfo *BSubValNo = SR.empty()
- ? SR.getNextValue(CopyIdx, Allocator)
- : SR.getVNInfoAt(CopyIdx);
- assert(BSubValNo != nullptr);
- auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
- ShrinkB |= P.second;
- if (P.first)
- BSubValNo->def = ASubValNo->def;
- });
+ IntB.refineSubRanges(
+ Allocator, SA.LaneMask,
+ [&Allocator, &SA, CopyIdx, ASubValNo,
+ &ShrinkB](LiveInterval::SubRange &SR) {
+ VNInfo *BSubValNo = SR.empty() ? SR.getNextValue(CopyIdx, Allocator)
+ : SR.getVNInfoAt(CopyIdx);
+ assert(BSubValNo != nullptr);
+ auto P = addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
+ ShrinkB |= P.second;
+ if (P.first)
+ BSubValNo->def = ASubValNo->def;
+ },
+ Indexes, *TRI);
}
// Go over all subranges of IntB that have not been covered by IntA,
// and delete the segments starting at CopyIdx. This can happen if
@@ -947,7 +977,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
/// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
/// predecessor of BB2, and if B is not redefined on the way from A = B
-/// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
+/// in BB0 to B = A in BB2, B = A in BB2 is partially redundant if the
/// execution goes through the path from BB0 to BB2. We may move B = A
/// to the predecessor without such reversed copy.
/// So we will transform the program from:
@@ -1494,7 +1524,8 @@ MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
// CoalescerPair may have a new register class with adjusted subreg indices
// at this point.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
- isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
+ if(!isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ return nullptr;
SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
@@ -1994,19 +2025,19 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
if (CP.isFlipped()) {
// Physreg is copied into vreg
// %y = COPY %physreg_x
- // ... //< no other def of %x here
+ // ... //< no other def of %physreg_x here
// use %y
// =>
// ...
- // use %x
+ // use %physreg_x
CopyMI = MRI->getVRegDef(SrcReg);
} else {
// VReg is copied into physreg:
// %y = def
- // ... //< no other def or use of %y here
- // %y = COPY %physreg_x
+ // ... //< no other def or use of %physreg_x here
+ // %physreg_x = COPY %y
// =>
- // %y = def
+ // %physreg_x = def
// ...
if (!MRI->hasOneNonDBGUse(SrcReg)) {
LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
@@ -3010,7 +3041,9 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// If a subrange starts at the copy then an undefined value has been
// copied and we must remove that subrange value as well.
VNInfo *ValueOut = Q.valueOutOrDead();
- if (ValueOut != nullptr && Q.valueIn() == nullptr) {
+ if (ValueOut != nullptr && (Q.valueIn() == nullptr ||
+ (V.Identical && V.Resolution == CR_Erase &&
+ ValueOut->def == Def))) {
LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
<< " at " << Def << "\n");
SmallVector<SlotIndex,8> EndPoints;
@@ -3019,7 +3052,7 @@ void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
// Mark value number as unused.
ValueOut->markUnused();
- if (V.Identical && S.Query(OtherDef).valueOut()) {
+ if (V.Identical && S.Query(OtherDef).valueOutOrDead()) {
// If V is identical to V.OtherVNI (and S was live at OtherDef),
// then we can't simply prune V from S. V needs to be replaced
// with V.OtherVNI.
@@ -3241,16 +3274,29 @@ void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
LaneBitmask LaneMask,
CoalescerPair &CP) {
BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
- LI.refineSubRanges(Allocator, LaneMask,
- [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
- if (SR.empty()) {
- SR.assign(ToMerge, Allocator);
- } else {
- // joinSubRegRange() destroys the merged range, so we need a copy.
- LiveRange RangeCopy(ToMerge, Allocator);
- joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
- }
- });
+ LI.refineSubRanges(
+ Allocator, LaneMask,
+ [this, &Allocator, &ToMerge, &CP](LiveInterval::SubRange &SR) {
+ if (SR.empty()) {
+ SR.assign(ToMerge, Allocator);
+ } else {
+ // joinSubRegRange() destroys the merged range, so we need a copy.
+ LiveRange RangeCopy(ToMerge, Allocator);
+ joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
+ }
+ },
+ *LIS->getSlotIndexes(), *TRI);
+}
+
+bool RegisterCoalescer::isHighCostLiveInterval(LiveInterval &LI) {
+ if (LI.valnos.size() < LargeIntervalSizeThreshold)
+ return false;
+ auto &Counter = LargeLIVisitCounter[LI.reg];
+ if (Counter < LargeIntervalFreqThreshold) {
+ Counter++;
+ return false;
+ }
+ return true;
}
bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
@@ -3265,6 +3311,9 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
+ if (isHighCostLiveInterval(LHS) || isHighCostLiveInterval(RHS))
+ return false;
+
// First compute NewVNInfo and the simple value mappings.
// Detect impossible conflicts early.
if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
@@ -3474,7 +3523,8 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
if (!UseTerminalRule)
return false;
unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
- isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
+ if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ return false;
// Check if the destination of this copy has any other affinity.
if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
// If SrcReg is a physical register, the copy won't be coalesced.
@@ -3498,8 +3548,9 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
continue;
unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
- isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
- OtherSubReg);
+ if (!isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
+ OtherSubReg))
+ return false;
if (OtherReg == SrcReg)
OtherReg = OtherSrcReg;
// Check if OtherReg is a non-terminal.
@@ -3620,6 +3671,7 @@ void RegisterCoalescer::releaseMemory() {
WorkList.clear();
DeadDefs.clear();
InflateRegs.clear();
+ LargeLIVisitCounter.clear();
}
bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
diff --git a/lib/CodeGen/RegisterCoalescer.h b/lib/CodeGen/RegisterCoalescer.h
index 1a46f6d053e6..f505d46cd338 100644
--- a/lib/CodeGen/RegisterCoalescer.h
+++ b/lib/CodeGen/RegisterCoalescer.h
@@ -1,9 +1,8 @@
//===- RegisterCoalescer.h - Register Coalescing Interface ------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 1099e468e885..7d9b3aa9b2d7 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -1,9 +1,8 @@
//===- RegisterPressure.cpp - Dynamic Register Pressure -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -846,7 +845,7 @@ void RegPressureTracker::recedeSkipDebugValues() {
CurrPos = skipDebugInstructionsBackward(std::prev(CurrPos), MBB->begin());
SlotIndex SlotIdx;
- if (RequireIntervals)
+ if (RequireIntervals && !CurrPos->isDebugInstr())
SlotIdx = LIS->getInstructionIndex(*CurrPos).getRegSlot();
// Open the top of the region using slot indexes.
@@ -856,6 +855,12 @@ void RegPressureTracker::recedeSkipDebugValues() {
void RegPressureTracker::recede(SmallVectorImpl<RegisterMaskPair> *LiveUses) {
recedeSkipDebugValues();
+ if (CurrPos->isDebugValue()) {
+ // It's possible to only have debug_value instructions and hit the start of
+ // the block.
+ assert(CurrPos == MBB->begin());
+ return;
+ }
const MachineInstr &MI = *CurrPos;
RegisterOperands RegOpers;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 3660586c1358..bb19110e6d70 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -1,9 +1,8 @@
//===- RegisterScavenging.cpp - Machine register scavenging ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -534,7 +533,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
- int SPAdj) {
+ int SPAdj, bool AllowSpill) {
MachineInstr &MI = *I;
const MachineFunction &MF = *MI.getMF();
// Consider all allocatable registers in the register class initially
@@ -565,6 +564,9 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
+ if (!AllowSpill)
+ return 0;
+
ScavengedInfo &Scavenged = spill(SReg, *RC, SPAdj, I, UseMI);
Scavenged.Restore = &*std::prev(UseMI);
@@ -576,7 +578,8 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator To,
- bool RestoreAfter, int SPAdj) {
+ bool RestoreAfter, int SPAdj,
+ bool AllowSpill) {
const MachineBasicBlock &MBB = *To->getParent();
const MachineFunction &MF = *MBB.getParent();
@@ -590,21 +593,25 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator SpillBefore = P.second;
assert(Reg != 0 && "No register left to scavenge!");
// Found an available register?
- if (SpillBefore != MBB.end()) {
- MachineBasicBlock::iterator ReloadAfter =
- RestoreAfter ? std::next(MBBI) : MBBI;
- MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
- if (ReloadBefore != MBB.end())
- LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
- ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
- Scavenged.Restore = &*std::prev(SpillBefore);
- LiveUnits.removeReg(Reg);
- LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
- << " until " << *SpillBefore);
- } else {
+ if (SpillBefore == MBB.end()) {
LLVM_DEBUG(dbgs() << "Scavenged free register: " << printReg(Reg, TRI)
- << '\n');
+ << '\n');
+ return Reg;
}
+
+ if (!AllowSpill)
+ return 0;
+
+ MachineBasicBlock::iterator ReloadAfter =
+ RestoreAfter ? std::next(MBBI) : MBBI;
+ MachineBasicBlock::iterator ReloadBefore = std::next(ReloadAfter);
+ if (ReloadBefore != MBB.end())
+ LLVM_DEBUG(dbgs() << "Reload before: " << *ReloadBefore << '\n');
+ ScavengedInfo &Scavenged = spill(Reg, RC, SPAdj, SpillBefore, ReloadBefore);
+ Scavenged.Restore = &*std::prev(SpillBefore);
+ LiveUnits.removeReg(Reg);
+ LLVM_DEBUG(dbgs() << "Scavenged register with spill: " << printReg(Reg, TRI)
+ << " until " << *SpillBefore);
return Reg;
}
diff --git a/lib/CodeGen/RegisterUsageInfo.cpp b/lib/CodeGen/RegisterUsageInfo.cpp
index 6b9880a8913f..6858d7233bc5 100644
--- a/lib/CodeGen/RegisterUsageInfo.cpp
+++ b/lib/CodeGen/RegisterUsageInfo.cpp
@@ -1,9 +1,8 @@
//===- RegisterUsageInfo.cpp - Register Usage Information Storage ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index 156d1c81c238..22cff48c3051 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -1,9 +1,8 @@
//===-- RenameIndependentSubregs.cpp - Live Interval Analysis -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/ResetMachineFunctionPass.cpp b/lib/CodeGen/ResetMachineFunctionPass.cpp
index a02302e6ff99..019de6554d2a 100644
--- a/lib/CodeGen/ResetMachineFunctionPass.cpp
+++ b/lib/CodeGen/ResetMachineFunctionPass.cpp
@@ -1,9 +1,8 @@
//===-- ResetMachineFunctionPass.cpp - Reset Machine Function ----*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
@@ -27,6 +26,7 @@ using namespace llvm;
#define DEBUG_TYPE "reset-machine-function"
STATISTIC(NumFunctionsReset, "Number of functions reset");
+STATISTIC(NumFunctionsVisited, "Number of functions visited");
namespace {
class ResetMachineFunction : public MachineFunctionPass {
@@ -51,6 +51,7 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) override {
+ ++NumFunctionsVisited;
// No matter what happened, whether we successfully selected the function
// or not, nothing is going to use the vreg types after us. Make sure they
// disappear.
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index c356fb57ac6d..a6bc7330e2cc 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -1,9 +1,8 @@
//===- SafeStack.cpp - Safe Stack Insertion -------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -372,7 +371,7 @@ Value *SafeStack::getStackGuard(IRBuilder<> &IRB, Function &F) {
if (!StackGuardVar)
StackGuardVar =
F.getParent()->getOrInsertGlobal("__stack_chk_guard", StackPtrTy);
- return IRB.CreateLoad(StackGuardVar, "StackGuard");
+ return IRB.CreateLoad(StackPtrTy, StackGuardVar, "StackGuard");
}
void SafeStack::findInsts(Function &F,
@@ -453,7 +452,8 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
++NumUnsafeStackRestorePoints;
IRB.SetInsertPoint(I->getNextNode());
- Value *CurrentTop = DynamicTop ? IRB.CreateLoad(DynamicTop) : StaticTop;
+ Value *CurrentTop =
+ DynamicTop ? IRB.CreateLoad(StackPtrTy, DynamicTop) : StaticTop;
IRB.CreateStore(CurrentTop, UnsafeStackPtr);
}
@@ -462,7 +462,7 @@ SafeStack::createStackRestorePoints(IRBuilder<> &IRB, Function &F,
void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
AllocaInst *StackGuardSlot, Value *StackGuard) {
- Value *V = IRB.CreateLoad(StackGuardSlot);
+ Value *V = IRB.CreateLoad(StackPtrTy, StackGuardSlot);
Value *Cmp = IRB.CreateICmpNE(StackGuard, V);
auto SuccessProb = BranchProbabilityInfo::getBranchProbStackProtector(true);
@@ -475,8 +475,8 @@ void SafeStack::checkStackGuard(IRBuilder<> &IRB, Function &F, ReturnInst &RI,
/* Unreachable */ true, Weights);
IRBuilder<> IRBFail(CheckTerm);
// FIXME: respect -fsanitize-trap / -ftrap-function here?
- Constant *StackChkFail = F.getParent()->getOrInsertFunction(
- "__stack_chk_fail", IRB.getVoidTy());
+ FunctionCallee StackChkFail =
+ F.getParent()->getOrInsertFunction("__stack_chk_fail", IRB.getVoidTy());
IRBFail.CreateCall(StackChkFail, {});
}
@@ -550,7 +550,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (StackGuardSlot) {
unsigned Offset = SSL.getObjectOffset(StackGuardSlot);
- Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
ConstantInt::get(Int32Ty, -Offset));
Value *NewAI =
IRB.CreateBitCast(Off, StackGuardSlot->getType(), "StackGuardSlot");
@@ -569,14 +569,14 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
if (Size == 0)
Size = 1; // Don't create zero-sized stack objects.
- Value *Off = IRB.CreateGEP(BasePointer, // BasePointer is i8*
+ Value *Off = IRB.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
ConstantInt::get(Int32Ty, -Offset));
Value *NewArg = IRB.CreateBitCast(Off, Arg->getType(),
Arg->getName() + ".unsafe-byval");
// Replace alloc with the new location.
replaceDbgDeclare(Arg, BasePointer, BasePointer->getNextNode(), DIB,
- DIExpression::NoDeref, -Offset, DIExpression::NoDeref);
+ DIExpression::ApplyOffset, -Offset);
Arg->replaceAllUsesWith(NewArg);
IRB.SetInsertPoint(cast<Instruction>(NewArg)->getNextNode());
IRB.CreateMemCpy(Off, Align, Arg, Arg->getParamAlignment(), Size);
@@ -587,12 +587,8 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
IRB.SetInsertPoint(AI);
unsigned Offset = SSL.getObjectOffset(AI);
- uint64_t Size = getStaticAllocaAllocationSize(AI);
- if (Size == 0)
- Size = 1; // Don't create zero-sized stack objects.
-
- replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::NoDeref,
- -Offset, DIExpression::NoDeref);
+ replaceDbgDeclareForAlloca(AI, BasePointer, DIB, DIExpression::ApplyOffset,
+ -Offset);
replaceDbgValueForAlloca(AI, BasePointer, DIB, -Offset);
// Replace uses of the alloca with the new location.
@@ -609,20 +605,16 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
InsertBefore = User;
IRBuilder<> IRBUser(InsertBefore);
- Value *Off = IRBUser.CreateGEP(BasePointer, // BasePointer is i8*
+ Value *Off = IRBUser.CreateGEP(Int8Ty, BasePointer, // BasePointer is i8*
ConstantInt::get(Int32Ty, -Offset));
Value *Replacement = IRBUser.CreateBitCast(Off, AI->getType(), Name);
- if (auto *PHI = dyn_cast<PHINode>(User)) {
+ if (auto *PHI = dyn_cast<PHINode>(User))
// PHI nodes may have multiple incoming edges from the same BB (why??),
// all must be updated at once with the same incoming value.
- auto *BB = PHI->getIncomingBlock(U);
- for (unsigned I = 0; I < PHI->getNumIncomingValues(); ++I)
- if (PHI->getIncomingBlock(I) == BB)
- PHI->setIncomingValue(I, Replacement);
- } else {
+ PHI->setIncomingValueForBlock(PHI->getIncomingBlock(U), Replacement);
+ else
U.set(Replacement);
- }
}
AI->eraseFromParent();
@@ -637,7 +629,7 @@ Value *SafeStack::moveStaticAllocasToUnsafeStack(
IRB.SetInsertPoint(BasePointer->getNextNode());
Value *StaticTop =
- IRB.CreateGEP(BasePointer, ConstantInt::get(Int32Ty, -FrameSize),
+ IRB.CreateGEP(Int8Ty, BasePointer, ConstantInt::get(Int32Ty, -FrameSize),
"unsafe_stack_static_top");
IRB.CreateStore(StaticTop, UnsafeStackPtr);
return StaticTop;
@@ -660,7 +652,8 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
uint64_t TySize = DL.getTypeAllocSize(Ty);
Value *Size = IRB.CreateMul(ArraySize, ConstantInt::get(IntPtrTy, TySize));
- Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(UnsafeStackPtr), IntPtrTy);
+ Value *SP = IRB.CreatePtrToInt(IRB.CreateLoad(StackPtrTy, UnsafeStackPtr),
+ IntPtrTy);
SP = IRB.CreateSub(SP, Size);
// Align the SP value to satisfy the AllocaInst, type and stack alignments.
@@ -682,8 +675,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (AI->hasName() && isa<Instruction>(NewAI))
NewAI->takeName(AI);
- replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::NoDeref, 0,
- DIExpression::NoDeref);
+ replaceDbgDeclareForAlloca(AI, NewAI, DIB, DIExpression::ApplyOffset, 0);
AI->replaceAllUsesWith(NewAI);
AI->eraseFromParent();
}
@@ -698,7 +690,7 @@ void SafeStack::moveDynamicAllocasToUnsafeStack(
if (II->getIntrinsicID() == Intrinsic::stacksave) {
IRBuilder<> IRB(II);
- Instruction *LI = IRB.CreateLoad(UnsafeStackPtr);
+ Instruction *LI = IRB.CreateLoad(StackPtrTy, UnsafeStackPtr);
LI->takeName(II);
II->replaceAllUsesWith(LI);
II->eraseFromParent();
@@ -727,7 +719,7 @@ void SafeStack::TryInlinePointerAddress() {
if (!isa<CallInst>(UnsafeStackPtr))
return;
- if(F.hasFnAttribute(Attribute::OptimizeNone))
+ if(F.hasOptNone())
return;
CallSite CS(UnsafeStackPtr);
@@ -783,7 +775,7 @@ bool SafeStack::run() {
if (DISubprogram *SP = F.getSubprogram())
IRB.SetCurrentDebugLocation(DebugLoc::get(SP->getScopeLine(), 0, SP));
if (SafeStackUsePointerAddress) {
- Value *Fn = F.getParent()->getOrInsertFunction(
+ FunctionCallee Fn = F.getParent()->getOrInsertFunction(
"__safestack_pointer_address", StackPtrTy->getPointerTo(0));
UnsafeStackPtr = IRB.CreateCall(Fn);
} else {
@@ -793,7 +785,7 @@ bool SafeStack::run() {
// Load the current stack pointer (we'll also use it as a base pointer).
// FIXME: use a dedicated register for it ?
Instruction *BasePointer =
- IRB.CreateLoad(UnsafeStackPtr, false, "unsafe_stack_ptr");
+ IRB.CreateLoad(StackPtrTy, UnsafeStackPtr, false, "unsafe_stack_ptr");
assert(BasePointer->getType() == StackPtrTy);
AllocaInst *StackGuardSlot = nullptr;
diff --git a/lib/CodeGen/SafeStackColoring.cpp b/lib/CodeGen/SafeStackColoring.cpp
index 726c38002817..04a5c4b6d892 100644
--- a/lib/CodeGen/SafeStackColoring.cpp
+++ b/lib/CodeGen/SafeStackColoring.cpp
@@ -1,9 +1,8 @@
//===- SafeStackColoring.cpp - SafeStack frame coloring -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SafeStackColoring.h b/lib/CodeGen/SafeStackColoring.h
index 902e63ebeb7e..b696b1b6baed 100644
--- a/lib/CodeGen/SafeStackColoring.h
+++ b/lib/CodeGen/SafeStackColoring.h
@@ -1,9 +1,8 @@
//===- SafeStackColoring.h - SafeStack frame coloring ----------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SafeStackLayout.cpp b/lib/CodeGen/SafeStackLayout.cpp
index 07b6a5d1883b..09964866e4d3 100644
--- a/lib/CodeGen/SafeStackLayout.cpp
+++ b/lib/CodeGen/SafeStackLayout.cpp
@@ -1,9 +1,8 @@
//===- SafeStackLayout.cpp - SafeStack frame layout -----------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SafeStackLayout.h b/lib/CodeGen/SafeStackLayout.h
index ac531d800f6e..349d9a8b595c 100644
--- a/lib/CodeGen/SafeStackLayout.h
+++ b/lib/CodeGen/SafeStackLayout.h
@@ -1,9 +1,8 @@
//===- SafeStackLayout.h - SafeStack frame layout --------------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 2684f92b3a93..7776dffb4e9c 100644
--- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -1,10 +1,9 @@
//===- ScalarizeMaskedMemIntrin.cpp - Scalarize unsupported masked mem ----===//
// instrinsics
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -124,7 +123,7 @@ static bool isConstantIntVector(Value *Mask) {
// %10 = extractelement <16 x i1> %mask, i32 2
// br i1 %10, label %cond.load4, label %else5
//
-static void scalarizeMaskedLoad(CallInst *CI) {
+static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
Value *Ptr = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
@@ -144,7 +143,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// Short-cut if the mask is all-true.
if (isa<Constant>(Mask) && cast<Constant>(Mask)->isAllOnesValue()) {
- Value *NewI = Builder.CreateAlignedLoad(Ptr, AlignVal);
+ Value *NewI = Builder.CreateAlignedLoad(VecType, Ptr, AlignVal);
CI->replaceAllUsesWith(NewI);
CI->eraseFromParent();
return;
@@ -152,9 +151,9 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// Adjust alignment for the scalar instruction.
AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr fron i8* to EltTy*
+ // Bitcast %addr from i8* to EltTy*
Type *NewPtrType =
- EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
@@ -165,11 +164,9 @@ static void scalarizeMaskedLoad(CallInst *CI) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- VResult =
- Builder.CreateInsertElement(VResult, Load, Builder.getInt32(Idx));
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
+ VResult = Builder.CreateInsertElement(VResult, Load, Idx);
}
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
@@ -184,8 +181,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// br i1 %mask_1, label %cond.load, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
// Create "cond" block
//
@@ -197,11 +193,9 @@ static void scalarizeMaskedLoad(CallInst *CI) {
"cond.load");
Builder.SetInsertPoint(InsertPt);
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
- LoadInst *Load = Builder.CreateAlignedLoad(Gep, AlignVal);
- Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
- Builder.getInt32(Idx));
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Gep, AlignVal);
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock =
@@ -222,6 +216,8 @@ static void scalarizeMaskedLoad(CallInst *CI) {
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
+
+ ModifiedDT = true;
}
// Translate a masked store intrinsic, like
@@ -250,7 +246,7 @@ static void scalarizeMaskedLoad(CallInst *CI) {
// store i32 %6, i32* %7
// br label %else2
// . . .
-static void scalarizeMaskedStore(CallInst *CI) {
+static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptr = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -276,9 +272,9 @@ static void scalarizeMaskedStore(CallInst *CI) {
// Adjust alignment for the scalar instruction.
AlignVal = MinAlign(AlignVal, EltTy->getPrimitiveSizeInBits() / 8);
- // Bitcast %addr fron i8* to EltTy*
+ // Bitcast %addr from i8* to EltTy*
Type *NewPtrType =
- EltTy->getPointerTo(cast<PointerType>(Ptr->getType())->getAddressSpace());
+ EltTy->getPointerTo(Ptr->getType()->getPointerAddressSpace());
Value *FirstEltPtr = Builder.CreateBitCast(Ptr, NewPtrType);
unsigned VectorWidth = VecType->getNumElements();
@@ -286,9 +282,8 @@ static void scalarizeMaskedStore(CallInst *CI) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
}
CI->eraseFromParent();
@@ -301,8 +296,7 @@ static void scalarizeMaskedStore(CallInst *CI) {
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
// br i1 %mask_1, label %cond.store, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Builder.getInt32(Idx));
+ Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
// Create "cond" block
//
@@ -314,9 +308,8 @@ static void scalarizeMaskedStore(CallInst *CI) {
IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
Builder.SetInsertPoint(InsertPt);
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx));
- Value *Gep =
- Builder.CreateInBoundsGEP(EltTy, FirstEltPtr, Builder.getInt32(Idx));
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+ Value *Gep = Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
Builder.CreateAlignedStore(OneElt, Gep, AlignVal);
// Create "else" block, fill it in the next iteration
@@ -329,6 +322,8 @@ static void scalarizeMaskedStore(CallInst *CI) {
IfBlock = NewIfBlock;
}
CI->eraseFromParent();
+
+ ModifiedDT = true;
}
// Translate a masked gather intrinsic like
@@ -360,13 +355,14 @@ static void scalarizeMaskedStore(CallInst *CI) {
// . . .
// %Result = select <16 x i1> %Mask, <16 x i32> %res.phi.select, <16 x i32> %Src
// ret <16 x i32> %Result
-static void scalarizeMaskedGather(CallInst *CI) {
+static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
Value *Ptrs = CI->getArgOperand(0);
Value *Alignment = CI->getArgOperand(1);
Value *Mask = CI->getArgOperand(2);
Value *Src0 = CI->getArgOperand(3);
VectorType *VecType = cast<VectorType>(CI->getType());
+ Type *EltTy = VecType->getElementType();
IRBuilder<> Builder(CI->getContext());
Instruction *InsertPt = CI;
@@ -385,12 +381,11 @@ static void scalarizeMaskedGather(CallInst *CI) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
LoadInst *Load =
- Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
- VResult = Builder.CreateInsertElement(
- VResult, Load, Builder.getInt32(Idx), "Res" + Twine(Idx));
+ Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
+ VResult =
+ Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
}
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
@@ -404,8 +399,8 @@ static void scalarizeMaskedGather(CallInst *CI) {
// br i1 %Mask1, label %cond.load, label %else
//
- Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
- "Mask" + Twine(Idx));
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
// Create "cond" block
//
@@ -416,13 +411,11 @@ static void scalarizeMaskedGather(CallInst *CI) {
BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.load");
Builder.SetInsertPoint(InsertPt);
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
LoadInst *Load =
- Builder.CreateAlignedLoad(Ptr, AlignVal, "Load" + Twine(Idx));
- Value *NewVResult = Builder.CreateInsertElement(VResult, Load,
- Builder.getInt32(Idx),
- "Res" + Twine(Idx));
+ Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal, "Load" + Twine(Idx));
+ Value *NewVResult =
+ Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
// Create "else" block, fill it in the next iteration
BasicBlock *NewIfBlock = CondBlock->splitBasicBlock(InsertPt, "else");
@@ -441,6 +434,8 @@ static void scalarizeMaskedGather(CallInst *CI) {
CI->replaceAllUsesWith(VResult);
CI->eraseFromParent();
+
+ ModifiedDT = true;
}
// Translate a masked scatter intrinsic, like
@@ -469,7 +464,7 @@ static void scalarizeMaskedGather(CallInst *CI) {
// store i32 %Elt1, i32* %Ptr1, align 4
// br label %else2
// . . .
-static void scalarizeMaskedScatter(CallInst *CI) {
+static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
Value *Src = CI->getArgOperand(0);
Value *Ptrs = CI->getArgOperand(1);
Value *Alignment = CI->getArgOperand(2);
@@ -493,12 +488,11 @@ static void scalarizeMaskedScatter(CallInst *CI) {
// Shorten the way if the mask is a vector of constants.
if (isConstantIntVector(Mask)) {
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
- if (cast<ConstantVector>(Mask)->getAggregateElement(Idx)->isNullValue())
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
continue;
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
- "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
+ Value *OneElt =
+ Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
}
CI->eraseFromParent();
@@ -511,8 +505,8 @@ static void scalarizeMaskedScatter(CallInst *CI) {
// %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
// br i1 %Mask1, label %cond.store, label %else
//
- Value *Predicate = Builder.CreateExtractElement(Mask, Builder.getInt32(Idx),
- "Mask" + Twine(Idx));
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
// Create "cond" block
//
@@ -523,10 +517,8 @@ static void scalarizeMaskedScatter(CallInst *CI) {
BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt, "cond.store");
Builder.SetInsertPoint(InsertPt);
- Value *OneElt = Builder.CreateExtractElement(Src, Builder.getInt32(Idx),
- "Elt" + Twine(Idx));
- Value *Ptr = Builder.CreateExtractElement(Ptrs, Builder.getInt32(Idx),
- "Ptr" + Twine(Idx));
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *Ptr = Builder.CreateExtractElement(Ptrs, Idx, "Ptr" + Twine(Idx));
Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
// Create "else" block, fill it in the next iteration
@@ -538,6 +530,156 @@ static void scalarizeMaskedScatter(CallInst *CI) {
IfBlock = NewIfBlock;
}
CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
+ Value *Ptr = CI->getArgOperand(0);
+ Value *Mask = CI->getArgOperand(1);
+ Value *PassThru = CI->getArgOperand(2);
+
+ VectorType *VecType = cast<VectorType>(CI->getType());
+
+ Type *EltTy = VecType->getElementType();
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ unsigned VectorWidth = VecType->getNumElements();
+
+ // The result vector
+ Value *VResult = PassThru;
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // br i1 %mask_1, label %cond.load, label %else
+ //
+
+ Value *Predicate =
+ Builder.CreateExtractElement(Mask, Idx);
+
+ // Create "cond" block
+ //
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %Elt = load i32* %EltAddr
+ // VResult = insertelement <16 x i32> VResult, i32 %Elt, i32 Idx
+ //
+ BasicBlock *CondBlock = IfBlock->splitBasicBlock(InsertPt->getIterator(),
+ "cond.load");
+ Builder.SetInsertPoint(InsertPt);
+
+ LoadInst *Load = Builder.CreateAlignedLoad(EltTy, Ptr, 1);
+ Value *NewVResult = Builder.CreateInsertElement(VResult, Load, Idx);
+
+ // Move the pointer if there are more blocks to come.
+ Value *NewPtr;
+ if ((Idx + 1) != VectorWidth)
+ NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ BasicBlock *PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+
+ // Create the phi to join the new and previous value.
+ PHINode *ResultPhi = Builder.CreatePHI(VecType, 2, "res.phi.else");
+ ResultPhi->addIncoming(NewVResult, CondBlock);
+ ResultPhi->addIncoming(VResult, PrevIfBlock);
+ VResult = ResultPhi;
+
+ // Add a PHI for the pointer if this isn't the last iteration.
+ if ((Idx + 1) != VectorWidth) {
+ PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
+ PtrPhi->addIncoming(NewPtr, CondBlock);
+ PtrPhi->addIncoming(Ptr, PrevIfBlock);
+ Ptr = PtrPhi;
+ }
+ }
+
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
+}
+
+static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
+ Value *Src = CI->getArgOperand(0);
+ Value *Ptr = CI->getArgOperand(1);
+ Value *Mask = CI->getArgOperand(2);
+
+ VectorType *VecType = cast<VectorType>(Src->getType());
+
+ IRBuilder<> Builder(CI->getContext());
+ Instruction *InsertPt = CI;
+ BasicBlock *IfBlock = CI->getParent();
+
+ Builder.SetInsertPoint(InsertPt);
+ Builder.SetCurrentDebugLocation(CI->getDebugLoc());
+
+ Type *EltTy = VecType->getVectorElementType();
+
+ unsigned VectorWidth = VecType->getNumElements();
+
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ // Fill the "else" block, created in the previous iteration
+ //
+ // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // br i1 %mask_1, label %cond.store, label %else
+ //
+ Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
+
+ // Create "cond" block
+ //
+ // %OneElt = extractelement <16 x i32> %Src, i32 Idx
+ // %EltAddr = getelementptr i32* %1, i32 0
+ // %store i32 %OneElt, i32* %EltAddr
+ //
+ BasicBlock *CondBlock =
+ IfBlock->splitBasicBlock(InsertPt->getIterator(), "cond.store");
+ Builder.SetInsertPoint(InsertPt);
+
+ Value *OneElt = Builder.CreateExtractElement(Src, Idx);
+ Builder.CreateAlignedStore(OneElt, Ptr, 1);
+
+ // Move the pointer if there are more blocks to come.
+ Value *NewPtr;
+ if ((Idx + 1) != VectorWidth)
+ NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
+
+ // Create "else" block, fill it in the next iteration
+ BasicBlock *NewIfBlock =
+ CondBlock->splitBasicBlock(InsertPt->getIterator(), "else");
+ Builder.SetInsertPoint(InsertPt);
+ Instruction *OldBr = IfBlock->getTerminator();
+ BranchInst::Create(CondBlock, NewIfBlock, Predicate, OldBr);
+ OldBr->eraseFromParent();
+ BasicBlock *PrevIfBlock = IfBlock;
+ IfBlock = NewIfBlock;
+
+ // Add a PHI for the pointer if this isn't the last iteration.
+ if ((Idx + 1) != VectorWidth) {
+ PHINode *PtrPhi = Builder.CreatePHI(Ptr->getType(), 2, "ptr.phi.else");
+ PtrPhi->addIncoming(NewPtr, CondBlock);
+ PtrPhi->addIncoming(Ptr, PrevIfBlock);
+ Ptr = PtrPhi;
+ }
+ }
+ CI->eraseFromParent();
+
+ ModifiedDT = true;
}
bool ScalarizeMaskedMemIntrin::runOnFunction(Function &F) {
@@ -587,33 +729,35 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
break;
case Intrinsic::masked_load:
// Scalarize unsupported vector masked load
- if (!TTI->isLegalMaskedLoad(CI->getType())) {
- scalarizeMaskedLoad(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
+ if (TTI->isLegalMaskedLoad(CI->getType()))
+ return false;
+ scalarizeMaskedLoad(CI, ModifiedDT);
+ return true;
case Intrinsic::masked_store:
- if (!TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType())) {
- scalarizeMaskedStore(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
+ if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType()))
+ return false;
+ scalarizeMaskedStore(CI, ModifiedDT);
+ return true;
case Intrinsic::masked_gather:
- if (!TTI->isLegalMaskedGather(CI->getType())) {
- scalarizeMaskedGather(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
+ if (TTI->isLegalMaskedGather(CI->getType()))
+ return false;
+ scalarizeMaskedGather(CI, ModifiedDT);
+ return true;
case Intrinsic::masked_scatter:
- if (!TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType())) {
- scalarizeMaskedScatter(CI);
- ModifiedDT = true;
- return true;
- }
- return false;
+ if (TTI->isLegalMaskedScatter(CI->getArgOperand(0)->getType()))
+ return false;
+ scalarizeMaskedScatter(CI, ModifiedDT);
+ return true;
+ case Intrinsic::masked_expandload:
+ if (TTI->isLegalMaskedExpandLoad(CI->getType()))
+ return false;
+ scalarizeMaskedExpandLoad(CI, ModifiedDT);
+ return true;
+ case Intrinsic::masked_compressstore:
+ if (TTI->isLegalMaskedCompressStore(CI->getArgOperand(0)->getType()))
+ return false;
+ scalarizeMaskedCompressStore(CI, ModifiedDT);
+ return true;
}
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 6c135b3d69d6..dc3a11670a16 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -1,9 +1,8 @@
//===- ScheduleDAG.cpp - Implement the ScheduleDAG class ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -15,6 +14,7 @@
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
@@ -38,6 +38,10 @@ using namespace llvm;
#define DEBUG_TYPE "pre-RA-sched"
+STATISTIC(NumNewPredsAdded, "Number of times a single predecessor was added");
+STATISTIC(NumTopoInits,
+ "Number of times the topological order has been recomputed");
+
#ifndef NDEBUG
static cl::opt<bool> StressSchedOpt(
"stress-sched", cl::Hidden, cl::init(false),
@@ -458,6 +462,11 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
// On insertion of the edge X->Y, the algorithm first marks by calling DFS
// the nodes reachable from Y, and then shifts them using Shift to lie
// immediately after X in Index2Node.
+
+ // Cancel pending updates, mark as valid.
+ Dirty = false;
+ Updates.clear();
+
unsigned DAGSize = SUnits.size();
std::vector<SUnit*> WorkList;
WorkList.reserve(DAGSize);
@@ -498,6 +507,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
}
Visited.resize(DAGSize);
+ NumTopoInits++;
#ifndef NDEBUG
// Check correctness of the ordering
@@ -510,6 +520,31 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() {
#endif
}
+void ScheduleDAGTopologicalSort::FixOrder() {
+ // Recompute from scratch after new nodes have been added.
+ if (Dirty) {
+ InitDAGTopologicalSorting();
+ return;
+ }
+
+ // Otherwise apply updates one-by-one.
+ for (auto &U : Updates)
+ AddPred(U.first, U.second);
+ Updates.clear();
+}
+
+void ScheduleDAGTopologicalSort::AddPredQueued(SUnit *Y, SUnit *X) {
+ // Recomputing the order from scratch is likely more efficient than applying
+ // updates one-by-one for too many updates. The current cut-off is arbitrarily
+ // chosen.
+ Dirty = Dirty || Updates.size() > 10;
+
+ if (Dirty)
+ return;
+
+ Updates.emplace_back(Y, X);
+}
+
void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
int UpperBound, LowerBound;
LowerBound = Node2Index[Y->NodeNum];
@@ -524,6 +559,8 @@ void ScheduleDAGTopologicalSort::AddPred(SUnit *Y, SUnit *X) {
// Recompute topological indexes.
Shift(Visited, LowerBound, UpperBound);
}
+
+ NumNewPredsAdded++;
}
void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
@@ -665,6 +702,7 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound,
}
bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
+ FixOrder();
// Is SU reachable from TargetSU via successor edges?
if (IsReachable(SU, TargetSU))
return true;
@@ -677,6 +715,7 @@ bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) {
bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU,
const SUnit *TargetSU) {
+ FixOrder();
// If insertion of the edge SU->TargetSU would create a cycle
// then there is a path from TargetSU to SU.
int UpperBound, LowerBound;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 99406ed1496a..d5ad7e92299d 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -1,9 +1,8 @@
//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -115,7 +114,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
: ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
RemoveKillFlags(RemoveKillFlags),
UnknownValue(UndefValue::get(
- Type::getVoidTy(mf.getFunction().getContext()))) {
+ Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
DbgValues.clear();
const TargetSubtargetInfo &ST = mf.getSubtarget();
@@ -132,7 +131,8 @@ static bool getUnderlyingObjectsForInstr(const MachineInstr *MI,
const DataLayout &DL) {
auto allMMOsOkay = [&]() {
for (const MachineMemOperand *MMO : MI->memoperands()) {
- if (MMO->isVolatile())
+ // TODO: Figure out whether isAtomic is really necessary (see D57601).
+ if (MMO->isVolatile() || MMO->isAtomic())
return false;
if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
@@ -743,6 +743,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// done.
Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
+ // Track all instructions that may raise floating-point exceptions.
+ // These do not depend on one other (or normal loads or stores), but
+ // must not be rescheduled across global barriers. Note that we don't
+ // really need a "map" here since we don't track those MIs by value;
+ // using the same Value2SUsMap data type here is simply a matter of
+ // convenience.
+ Value2SUsMap FPExceptions;
+
// Remove any stale debug info; sometimes BuildSchedGraph is called again
// without emitting the info from the previous call.
DbgValues.clear();
@@ -870,10 +878,26 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
addBarrierChain(Loads);
addBarrierChain(NonAliasStores);
addBarrierChain(NonAliasLoads);
+ addBarrierChain(FPExceptions);
continue;
}
+ // Instructions that may raise FP exceptions may not be moved
+ // across any global barriers.
+ if (MI.mayRaiseFPException()) {
+ if (BarrierChain)
+ BarrierChain->addPredBarrier(SU);
+
+ FPExceptions.insert(SU, UnknownValue);
+
+ if (FPExceptions.size() >= HugeRegion) {
+ LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
+ Value2SUsMap empty;
+ reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
+ }
+ }
+
// If it's not a store or a variant load, we're done.
if (!MI.mayStore() &&
!(MI.mayLoad() && !MI.isDereferenceableInvariantLoad(AA)))
@@ -968,6 +992,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
Uses.clear();
CurrentVRegDefs.clear();
CurrentVRegUses.clear();
+
+ Topo.MarkDirty();
}
raw_ostream &llvm::operator<<(raw_ostream &OS, const PseudoSourceValue* PSV) {
@@ -1089,22 +1115,21 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
if (!MI.isBundled()) {
toggleKills(MRI, LiveRegs, MI, true);
} else {
- MachineBasicBlock::instr_iterator First = MI.getIterator();
- if (MI.isBundle()) {
+ MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
+ if (MI.isBundle())
toggleKills(MRI, LiveRegs, MI, false);
- ++First;
- }
+
// Some targets make the (questionable) assumtion that the instructions
// inside the bundle are ordered and consequently only the last use of
// a register inside the bundle can kill it.
- MachineBasicBlock::instr_iterator I = std::next(First);
+ MachineBasicBlock::instr_iterator I = std::next(Bundle);
while (I->isBundledWithSucc())
++I;
do {
if (!I->isDebugInstr())
toggleKills(MRI, LiveRegs, *I, true);
--I;
- } while(I != First);
+ } while (I != Bundle);
}
}
}
@@ -1146,6 +1171,23 @@ std::string ScheduleDAGInstrs::getDAGName() const {
return "dag." + BB->getFullName();
}
+bool ScheduleDAGInstrs::canAddEdge(SUnit *SuccSU, SUnit *PredSU) {
+ return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
+}
+
+bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
+ if (SuccSU != &ExitSU) {
+ // Do not use WillCreateCycle, it assumes SD scheduling.
+ // If Pred is reachable from Succ, then the edge creates a cycle.
+ if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
+ return false;
+ Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
+ }
+ SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
+ // Return true regardless of whether a new edge needed to be inserted.
+ return true;
+}
+
//===----------------------------------------------------------------------===//
// SchedDFSResult Implementation
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp
index ff2085aae865..8d04711f07c6 100644
--- a/lib/CodeGen/ScheduleDAGPrinter.cpp
+++ b/lib/CodeGen/ScheduleDAGPrinter.cpp
@@ -1,9 +1,8 @@
//===-- ScheduleDAGPrinter.cpp - Implement ScheduleDAG::viewGraph() -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index 4301372179b8..a9fda56f2dac 100644
--- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -1,9 +1,8 @@
//===- ScoreboardHazardRecognizer.cpp - Scheduler Support -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ff5505c97721..49c922f560fa 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1,9 +1,8 @@
//===- DAGCombiner.cpp - Implement a DAG node combiner --------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -112,6 +111,10 @@ static cl::opt<bool>
MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<unsigned> TokenFactorInlineLimit(
+ "combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
+ cl::desc("Limit the number of operands to inline for Token Factors"));
+
namespace {
class DAGCombiner {
@@ -138,6 +141,10 @@ namespace {
/// them) when they are deleted from the underlying DAG. It relies on
/// stable indices of nodes within the worklist.
DenseMap<SDNode *, unsigned> WorklistMap;
+ /// This records all nodes attempted to add to the worklist since we
+ /// considered a new worklist entry. As we keep do not add duplicate nodes
+ /// in the worklist, this is different from the tail of the worklist.
+ SmallSetVector<SDNode *, 32> PruningList;
/// Set of nodes which have been combined (at least once).
///
@@ -155,6 +162,37 @@ namespace {
AddToWorklist(Node);
}
+ // Prune potentially dangling nodes. This is called after
+ // any visit to a node, but should also be called during a visit after any
+ // failed combine which may have created a DAG node.
+ void clearAddedDanglingWorklistEntries() {
+ // Check any nodes added to the worklist to see if they are prunable.
+ while (!PruningList.empty()) {
+ auto *N = PruningList.pop_back_val();
+ if (N->use_empty())
+ recursivelyDeleteUnusedNodes(N);
+ }
+ }
+
+ SDNode *getNextWorklistEntry() {
+ // Before we do any work, remove nodes that are not in use.
+ clearAddedDanglingWorklistEntries();
+ SDNode *N = nullptr;
+ // The Worklist holds the SDNodes in order, but it may contain null
+ // entries.
+ while (!N && !Worklist.empty()) {
+ N = Worklist.pop_back_val();
+ }
+
+ if (N) {
+ bool GoodWorklistEntry = WorklistMap.erase(N);
+ (void)GoodWorklistEntry;
+ assert(GoodWorklistEntry &&
+ "Found a worklist entry without a corresponding map entry!");
+ }
+ return N;
+ }
+
/// Call the node-specific routine that folds each particular type of node.
SDValue visit(SDNode *N);
@@ -162,7 +200,7 @@ namespace {
DAGCombiner(SelectionDAG &D, AliasAnalysis *AA, CodeGenOpt::Level OL)
: DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
OptLevel(OL), AA(AA) {
- ForCodeSize = DAG.getMachineFunction().getFunction().optForSize();
+ ForCodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
MaximumLegalStoreInBits = 0;
for (MVT VT : MVT::all_valuetypes())
@@ -172,6 +210,11 @@ namespace {
MaximumLegalStoreInBits = VT.getSizeInBits();
}
+ void ConsiderForPruning(SDNode *N) {
+ // Mark this for potential pruning.
+ PruningList.insert(N);
+ }
+
/// Add to the worklist making sure its instance is at the back (next to be
/// processed.)
void AddToWorklist(SDNode *N) {
@@ -183,6 +226,8 @@ namespace {
if (N->getOpcode() == ISD::HANDLENODE)
return;
+ ConsiderForPruning(N);
+
if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
Worklist.push_back(N);
}
@@ -190,6 +235,7 @@ namespace {
/// Remove all instances of N from the worklist.
void removeFromWorklist(SDNode *N) {
CombinedNodes.erase(N);
+ PruningList.remove(N);
auto It = WorklistMap.find(N);
if (It == WorklistMap.end())
@@ -229,8 +275,15 @@ namespace {
/// If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
unsigned BitWidth = Op.getScalarValueSizeInBits();
- APInt Demanded = APInt::getAllOnesValue(BitWidth);
- return SimplifyDemandedBits(Op, Demanded);
+ APInt DemandedBits = APInt::getAllOnesValue(BitWidth);
+ return SimplifyDemandedBits(Op, DemandedBits);
+ }
+
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits) {
+ EVT VT = Op.getValueType();
+ unsigned NumElts = VT.isVector() ? VT.getVectorNumElements() : 1;
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedBits(Op, DemandedBits, DemandedElts);
}
/// Check the specified vector node value to see if it can be simplified or
@@ -238,12 +291,13 @@ namespace {
/// elements. If so, return true.
bool SimplifyDemandedVectorElts(SDValue Op) {
unsigned NumElts = Op.getValueType().getVectorNumElements();
- APInt Demanded = APInt::getAllOnesValue(NumElts);
- return SimplifyDemandedVectorElts(Op, Demanded);
+ APInt DemandedElts = APInt::getAllOnesValue(NumElts);
+ return SimplifyDemandedVectorElts(Op, DemandedElts);
}
- bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
- bool SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+ bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts);
+ bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts,
bool AssumeSingleUse = false);
bool CombineToPreIndexedLoadStore(SDNode *N);
@@ -291,15 +345,16 @@ namespace {
SDValue visitTokenFactor(SDNode *N);
SDValue visitMERGE_VALUES(SDNode *N);
SDValue visitADD(SDNode *N);
- SDValue visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference);
+ SDValue visitADDLike(SDNode *N);
+ SDValue visitADDLikeCommutative(SDValue N0, SDValue N1, SDNode *LocReference);
SDValue visitSUB(SDNode *N);
SDValue visitADDSAT(SDNode *N);
SDValue visitSUBSAT(SDNode *N);
SDValue visitADDC(SDNode *N);
- SDValue visitUADDO(SDNode *N);
+ SDValue visitADDO(SDNode *N);
SDValue visitUADDOLike(SDValue N0, SDValue N1, SDNode *N);
SDValue visitSUBC(SDNode *N);
- SDValue visitUSUBO(SDNode *N);
+ SDValue visitSUBO(SDNode *N);
SDValue visitADDE(SDNode *N);
SDValue visitADDCARRY(SDNode *N);
SDValue visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn, SDNode *N);
@@ -316,8 +371,7 @@ namespace {
SDValue visitMULHS(SDNode *N);
SDValue visitSMUL_LOHI(SDNode *N);
SDValue visitUMUL_LOHI(SDNode *N);
- SDValue visitSMULO(SDNode *N);
- SDValue visitUMULO(SDNode *N);
+ SDValue visitMULO(SDNode *N);
SDValue visitIMINMAX(SDNode *N);
SDValue visitAND(SDNode *N);
SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *N);
@@ -386,6 +440,7 @@ namespace {
SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
SDValue visitSTORE(SDNode *N);
+ SDValue visitLIFETIME_END(SDNode *N);
SDValue visitINSERT_VECTOR_ELT(SDNode *N);
SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
SDValue visitBUILD_VECTOR(SDNode *N);
@@ -400,13 +455,19 @@ namespace {
SDValue visitMSCATTER(SDNode *N);
SDValue visitFP_TO_FP16(SDNode *N);
SDValue visitFP16_TO_FP(SDNode *N);
+ SDValue visitVECREDUCE(SDNode *N);
SDValue visitFADDForFMACombine(SDNode *N);
SDValue visitFSUBForFMACombine(SDNode *N);
SDValue visitFMULForFMADistributiveCombine(SDNode *N);
SDValue XformToShuffleWithZero(SDNode *N);
- SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ bool reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL, SDValue N0,
+ SDValue N1);
+ SDValue reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1);
+ SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
@@ -466,6 +527,7 @@ namespace {
const SDLoc &DL);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
+ SDValue MatchStoreCombine(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue splitMergedValStore(StoreSDNode *ST);
@@ -475,7 +537,8 @@ namespace {
SDValue reduceBuildVecToShuffle(SDNode *N);
SDValue createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask, SDValue VecIn1,
- SDValue VecIn2, unsigned LeftIdx);
+ SDValue VecIn2, unsigned LeftIdx,
+ bool DidSplitVec);
SDValue matchVSelectOpSizesWithSetCC(SDNode *Cast);
/// Walk up chain skipping non-aliasing memory nodes,
@@ -484,7 +547,7 @@ namespace {
SmallVectorImpl<SDValue> &Aliases);
/// Return true if there is any possibility that the two addresses overlap.
- bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
+ bool isAlias(SDNode *Op0, SDNode *Op1) const;
/// Walk up chain skipping non-aliasing memory nodes, looking for a better
/// chain (aliasing node.)
@@ -642,6 +705,18 @@ public:
}
};
+class WorklistInserter : public SelectionDAG::DAGUpdateListener {
+ DAGCombiner &DC;
+
+public:
+ explicit WorklistInserter(DAGCombiner &dc)
+ : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
+
+ // FIXME: Ideally we could add N to the worklist, but this causes exponential
+ // compile time costs in large DAGs, e.g. Halide.
+ void NodeInserted(SDNode *N) override { DC.ConsiderForPruning(N); }
+};
+
} // end anonymous namespace
//===----------------------------------------------------------------------===//
@@ -697,20 +772,23 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
static char isNegatibleForFree(SDValue Op, bool LegalOperations,
const TargetLowering &TLI,
const TargetOptions *Options,
+ bool ForCodeSize,
unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG) return 2;
+ if (Op.getOpcode() == ISD::FNEG)
+ return 2;
// Don't allow anything with multiple uses unless we know it is free.
EVT VT = Op.getValueType();
const SDNodeFlags Flags = Op->getFlags();
- if (!Op.hasOneUse())
- if (!(Op.getOpcode() == ISD::FP_EXTEND &&
- TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
- return 0;
+ if (!Op.hasOneUse() &&
+ !(Op.getOpcode() == ISD::FP_EXTEND &&
+ TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
+ return 0;
// Don't recurse exponentially.
- if (Depth > 6) return 0;
+ if (Depth > 6)
+ return 0;
switch (Op.getOpcode()) {
default: return false;
@@ -721,7 +799,25 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// Don't invert constant FP values after legalization unless the target says
// the negated constant is legal.
return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT);
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ ForCodeSize);
+ }
+ case ISD::BUILD_VECTOR: {
+ // Only permit BUILD_VECTOR of constants.
+ if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+ return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+ }))
+ return 0;
+ if (!LegalOperations)
+ return 1;
+ if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
+ TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return 1;
+ return llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ ForCodeSize);
+ });
}
case ISD::FADD:
if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
@@ -733,15 +829,14 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, Depth + 1))
+ Options, ForCodeSize, Depth + 1))
return V;
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- Depth + 1);
+ ForCodeSize, Depth + 1);
case ISD::FSUB:
// We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->NoSignedZerosFPMath &&
- !Flags.hasNoSignedZeros())
+ if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
return 0;
// fold (fneg (fsub A, B)) -> (fsub B, A)
@@ -751,30 +846,31 @@ static char isNegatibleForFree(SDValue Op, bool LegalOperations,
case ISD::FDIV:
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, Depth + 1))
+ Options, ForCodeSize, Depth + 1))
return V;
return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- Depth + 1);
+ ForCodeSize, Depth + 1);
case ISD::FP_EXTEND:
case ISD::FP_ROUND:
case ISD::FSIN:
return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
- Depth + 1);
+ ForCodeSize, Depth + 1);
}
}
/// If isNegatibleForFree returns true, return the newly negated expression.
static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations, unsigned Depth = 0) {
- const TargetOptions &Options = DAG.getTarget().Options;
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth = 0) {
// fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
+ if (Op.getOpcode() == ISD::FNEG)
+ return Op.getOperand(0);
assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
-
- const SDNodeFlags Flags = Op.getNode()->getFlags();
+ const TargetOptions &Options = DAG.getTarget().Options;
+ const SDNodeFlags Flags = Op->getFlags();
switch (Op.getOpcode()) {
default: llvm_unreachable("Unknown code");
@@ -783,24 +879,41 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
V.changeSign();
return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
}
+ case ISD::BUILD_VECTOR: {
+ SmallVector<SDValue, 4> Ops;
+ for (SDValue C : Op->op_values()) {
+ if (C.isUndef()) {
+ Ops.push_back(C);
+ continue;
+ }
+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+ V.changeSign();
+ Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+ }
+ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+ }
case ISD::FADD:
assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
// fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
+ Depth + 1))
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
+ LegalOperations, ForCodeSize,
+ Depth + 1),
Op.getOperand(1), Flags);
// fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1),
+ LegalOperations, ForCodeSize,
+ Depth + 1),
Op.getOperand(0), Flags);
case ISD::FSUB:
// fold (fneg (fsub 0, B)) -> B
- if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
+ if (ConstantFPSDNode *N0CFP =
+ isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
if (N0CFP->isZero())
return Op.getOperand(1);
@@ -812,28 +925,33 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
case ISD::FDIV:
// fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, Depth+1))
+ DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
+ Depth + 1))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
+ LegalOperations, ForCodeSize,
+ Depth + 1),
Op.getOperand(1), Flags);
// fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
Op.getOperand(0),
GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, Depth+1), Flags);
+ LegalOperations, ForCodeSize,
+ Depth + 1), Flags);
case ISD::FP_EXTEND:
case ISD::FSIN:
return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1));
+ LegalOperations, ForCodeSize,
+ Depth + 1));
case ISD::FP_ROUND:
- return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, Depth+1),
- Op.getOperand(1));
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+ GetNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1));
}
}
@@ -924,53 +1042,113 @@ static bool isAnyConstantBuildVector(SDValue V, bool NoOpaques = false) {
ISD::isBuildVectorOfConstantFPSDNodes(V.getNode());
}
-SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
- SDValue N1, SDNodeFlags Flags) {
- // Don't reassociate reductions.
- if (Flags.hasVectorReduction())
- return SDValue();
+bool DAGCombiner::reassociationCanBreakAddressingModePattern(unsigned Opc,
+ const SDLoc &DL,
+ SDValue N0,
+ SDValue N1) {
+ // Currently this only tries to ensure we don't undo the GEP splits done by
+ // CodeGenPrepare when shouldConsiderGEPOffsetSplit is true. To ensure this,
+ // we check if the following transformation would be problematic:
+ // (load/store (add, (add, x, offset1), offset2)) ->
+ // (load/store (add, x, offset1+offset2)).
- EVT VT = N0.getValueType();
- if (N0.getOpcode() == Opc && !N0->getFlags().hasVectorReduction()) {
- if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
- if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
- // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
- return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
- return SDValue();
- }
- if (N0.hasOneUse()) {
- // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
- // use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
- if (!OpNode.getNode())
- return SDValue();
- AddToWorklist(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
- }
+ if (Opc != ISD::ADD || N0.getOpcode() != ISD::ADD)
+ return false;
+
+ if (N0.hasOneUse())
+ return false;
+
+ auto *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
+ auto *C2 = dyn_cast<ConstantSDNode>(N1);
+ if (!C1 || !C2)
+ return false;
+
+ const APInt &C1APIntVal = C1->getAPIntValue();
+ const APInt &C2APIntVal = C2->getAPIntValue();
+ if (C1APIntVal.getBitWidth() > 64 || C2APIntVal.getBitWidth() > 64)
+ return false;
+
+ const APInt CombinedValueIntVal = C1APIntVal + C2APIntVal;
+ if (CombinedValueIntVal.getBitWidth() > 64)
+ return false;
+ const int64_t CombinedValue = CombinedValueIntVal.getSExtValue();
+
+ for (SDNode *Node : N0->uses()) {
+ auto LoadStore = dyn_cast<MemSDNode>(Node);
+ if (LoadStore) {
+ // Is x[offset2] already not a legal addressing mode? If so then
+ // reassociating the constants breaks nothing (we test offset2 because
+ // that's the one we hope to fold into the load or store).
+ TargetLoweringBase::AddrMode AM;
+ AM.HasBaseReg = true;
+ AM.BaseOffs = C2APIntVal.getSExtValue();
+ EVT VT = LoadStore->getMemoryVT();
+ unsigned AS = LoadStore->getAddressSpace();
+ Type *AccessTy = VT.getTypeForEVT(*DAG.getContext());
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ continue;
+
+ // Would x[offset1+offset2] still be a legal addressing mode?
+ AM.BaseOffs = CombinedValue;
+ if (!TLI.isLegalAddressingMode(DAG.getDataLayout(), AM, AccessTy, AS))
+ return true;
}
}
- if (N1.getOpcode() == Opc && !N1->getFlags().hasVectorReduction()) {
- if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
- if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
- // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
- if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
- return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
+ return false;
+}
+
+// Helper for DAGCombiner::reassociateOps. Try to reassociate an expression
+// such as (Opc N0, N1), if \p N0 is the same kind of operation as \p Opc.
+SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
+ SDValue N0, SDValue N1) {
+ EVT VT = N0.getValueType();
+
+ if (N0.getOpcode() != Opc)
+ return SDValue();
+
+ // Don't reassociate reductions.
+ if (N0->getFlags().hasVectorReduction())
+ return SDValue();
+
+ if (SDNode *C1 = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
+ if (SDNode *C2 = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
+ // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2))
+ if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, C1, C2))
+ return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
+ return SDValue();
+ }
+ if (N0.hasOneUse()) {
+ // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1)
+ // iff (op x, c1) has one use
+ SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
+ if (!OpNode.getNode())
return SDValue();
- }
- if (N1.hasOneUse()) {
- // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
- // use
- SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
- if (!OpNode.getNode())
- return SDValue();
- AddToWorklist(OpNode.getNode());
- return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
- }
+ AddToWorklist(OpNode.getNode());
+ return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
+ return SDValue();
+}
+// Try to reassociate commutative binops.
+SDValue DAGCombiner::reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
+ SDValue N1, SDNodeFlags Flags) {
+ assert(TLI.isCommutativeBinOp(Opc) && "Operation not commutative.");
+ // Don't reassociate reductions.
+ if (Flags.hasVectorReduction())
+ return SDValue();
+
+ // Floating-point reassociation is not allowed without loose FP math.
+ if (N0.getValueType().isFloatingPoint() ||
+ N1.getValueType().isFloatingPoint())
+ if (!Flags.hasAllowReassociation() || !Flags.hasNoSignedZeros())
+ return SDValue();
+
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N0, N1))
+ return Combined;
+ if (SDValue Combined = reassociateOpsCommutative(Opc, DL, N1, N0))
+ return Combined;
return SDValue();
}
@@ -1026,10 +1204,11 @@ CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
/// Check the specified integer node value to see if it can be simplified or if
/// things it uses can be simplified by bit propagation. If so, return true.
-bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
+bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
+ const APInt &DemandedElts) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
KnownBits Known;
- if (!TLI.SimplifyDemandedBits(Op, Demanded, Known, TLO))
+ if (!TLI.SimplifyDemandedBits(Op, DemandedBits, DemandedElts, Known, TLO))
return false;
// Revisit the node.
@@ -1048,12 +1227,13 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
/// Check the specified vector node value to see if it can be simplified or
/// if things it uses can be simplified as it only uses some of the elements.
/// If so, return true.
-bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op, const APInt &Demanded,
+bool DAGCombiner::SimplifyDemandedVectorElts(SDValue Op,
+ const APInt &DemandedElts,
bool AssumeSingleUse) {
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownUndef, KnownZero;
- if (!TLI.SimplifyDemandedVectorElts(Op, Demanded, KnownUndef, KnownZero, TLO,
- 0, AssumeSingleUse))
+ if (!TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero,
+ TLO, 0, AssumeSingleUse))
return false;
// Revisit the node.
@@ -1383,6 +1563,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
LegalOperations = Level >= AfterLegalizeVectorOps;
LegalTypes = Level >= AfterLegalizeTypes;
+ WorklistInserter AddNodes(*this);
+
// Add all the dag nodes to the worklist.
for (SDNode &Node : DAG.allnodes())
AddToWorklist(&Node);
@@ -1392,19 +1574,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
// changes of the root.
HandleSDNode Dummy(DAG.getRoot());
- // While the worklist isn't empty, find a node and try to combine it.
- while (!WorklistMap.empty()) {
- SDNode *N;
- // The Worklist holds the SDNodes in order, but it may contain null entries.
- do {
- N = Worklist.pop_back_val();
- } while (!N);
-
- bool GoodWorklistEntry = WorklistMap.erase(N);
- (void)GoodWorklistEntry;
- assert(GoodWorklistEntry &&
- "Found a worklist entry without a corresponding map entry!");
-
+ // While we have a valid worklist entry node, try to combine it.
+ while (SDNode *N = getNextWorklistEntry()) {
// If N has no uses, it is dead. Make sure to revisit all N's operands once
// N is deleted from the DAG, since they too may now be dead or may have a
// reduced number of uses, allowing other xforms.
@@ -1493,9 +1664,11 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::SSUBSAT:
case ISD::USUBSAT: return visitSUBSAT(N);
case ISD::ADDC: return visitADDC(N);
- case ISD::UADDO: return visitUADDO(N);
+ case ISD::SADDO:
+ case ISD::UADDO: return visitADDO(N);
case ISD::SUBC: return visitSUBC(N);
- case ISD::USUBO: return visitUSUBO(N);
+ case ISD::SSUBO:
+ case ISD::USUBO: return visitSUBO(N);
case ISD::ADDE: return visitADDE(N);
case ISD::ADDCARRY: return visitADDCARRY(N);
case ISD::SUBE: return visitSUBE(N);
@@ -1509,8 +1682,8 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MULHS: return visitMULHS(N);
case ISD::SMUL_LOHI: return visitSMUL_LOHI(N);
case ISD::UMUL_LOHI: return visitUMUL_LOHI(N);
- case ISD::SMULO: return visitSMULO(N);
- case ISD::UMULO: return visitUMULO(N);
+ case ISD::SMULO:
+ case ISD::UMULO: return visitMULO(N);
case ISD::SMIN:
case ISD::SMAX:
case ISD::UMIN:
@@ -1590,8 +1763,22 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::MLOAD: return visitMLOAD(N);
case ISD::MSCATTER: return visitMSCATTER(N);
case ISD::MSTORE: return visitMSTORE(N);
+ case ISD::LIFETIME_END: return visitLIFETIME_END(N);
case ISD::FP_TO_FP16: return visitFP_TO_FP16(N);
case ISD::FP16_TO_FP: return visitFP16_TO_FP(N);
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN: return visitVECREDUCE(N);
}
return SDValue();
}
@@ -1644,7 +1831,7 @@ SDValue DAGCombiner::combine(SDNode *N) {
}
}
- // If N is a commutative binary node, try eliminate it if the commuted
+ // If N is a commutative binary node, try to eliminate it if the commuted
// version is already present in the DAG.
if (!RV.getNode() && TLI.isCommutativeBinOp(N->getOpcode()) &&
N->getNumValues() == 1) {
@@ -1693,6 +1880,12 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
+ // If the sole user is a token factor, we should make sure we have a
+ // chance to merge them together. This prevents TF chains from inhibiting
+ // optimizations.
+ if (N->hasOneUse() && N->use_begin()->getOpcode() == ISD::TokenFactor)
+ AddToWorklist(*(N->use_begin()));
+
SmallVector<SDNode *, 8> TFs; // List of token factors to visit.
SmallVector<SDValue, 8> Ops; // Ops for replacing token factor.
SmallPtrSet<SDNode*, 16> SeenOps;
@@ -1704,8 +1897,19 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
// Iterate through token factors. The TFs grows when new token factors are
// encountered.
for (unsigned i = 0; i < TFs.size(); ++i) {
- SDNode *TF = TFs[i];
+ // Limit number of nodes to inline, to avoid quadratic compile times.
+ // We have to add the outstanding Token Factors to Ops, otherwise we might
+ // drop Ops from the resulting Token Factors.
+ if (Ops.size() > TokenFactorInlineLimit) {
+ for (unsigned j = i; j < TFs.size(); j++)
+ Ops.emplace_back(TFs[j], 0);
+ // Drop unprocessed Token Factors from TFs, so we do not add them to the
+ // combiner worklist later.
+ TFs.resize(i);
+ break;
+ }
+ SDNode *TF = TFs[i];
// Check each of the operands.
for (const SDValue &Op : TF->op_values()) {
switch (Op.getOpcode()) {
@@ -1719,8 +1923,6 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (Op.hasOneUse() && !is_contained(TFs, Op.getNode())) {
// Queue up for processing.
TFs.push_back(Op.getNode());
- // Clean up in case the token factor is removed.
- AddToWorklist(Op.getNode());
Changed = true;
break;
}
@@ -1737,6 +1939,11 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
+ // Re-visit inlined Token Factors, to clean them up in case they have been
+ // removed. Skip the first Token Factor, as this is the current node.
+ for (unsigned i = 1, e = TFs.size(); i < e; i++)
+ AddToWorklist(TFs[i]);
+
// Remove Nodes that are chained to another node in the list. Do so
// by walking up chains breath-first stopping when we've seen
// another operand. In general we must climb to the EntryNode, but we can exit
@@ -1803,6 +2010,8 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
for (const SDValue &Op : CurNode->op_values())
AddToWorklist(i, Op.getNode(), CurOpNumber);
break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
case ISD::CopyFromReg:
case ISD::CopyToReg:
AddToWorklist(i, CurNode->getOperand(0).getNode(), CurOpNumber);
@@ -1831,9 +2040,9 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
if (SeenChains.count(Op.getNode()) == 0)
PrunedOps.push_back(Op);
}
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, PrunedOps);
+ Result = DAG.getTokenFactor(SDLoc(N), PrunedOps);
} else {
- Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
+ Result = DAG.getTokenFactor(SDLoc(N), Ops);
}
}
return Result;
@@ -1869,7 +2078,8 @@ static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
}
SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
- assert(ISD::isBinaryOp(BO) && "Unexpected binary operator");
+ assert(TLI.isBinOp(BO->getOpcode()) && BO->getNumValues() == 1 &&
+ "Unexpected binary operator");
// Don't do this unless the old select is going away. We want to eliminate the
// binary operator, not replace a binop with a select.
@@ -1940,7 +2150,9 @@ SDValue DAGCombiner::foldBinOpIntoSelect(SDNode *BO) {
!isConstantFPBuildVectorOrConstantFP(NewCF))
return SDValue();
- return DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+ SDValue SelectOp = DAG.getSelect(DL, VT, Sel.getOperand(0), NewCT, NewCF);
+ SelectOp->setFlags(BO->getFlags());
+ return SelectOp;
}
static SDValue foldAddSubBoolOfMaskedVal(SDNode *N, SelectionDAG &DAG) {
@@ -1990,6 +2202,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
// We need a constant operand for the add/sub, and the other operand is a
// logical shift right: add (srl), C or sub C, (srl).
+ // TODO - support non-uniform vector amounts.
bool IsAdd = N->getOpcode() == ISD::ADD;
SDValue ConstantOp = IsAdd ? N->getOperand(1) : N->getOperand(0);
SDValue ShiftOp = IsAdd ? N->getOperand(0) : N->getOperand(1);
@@ -2006,7 +2219,7 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
EVT VT = ShiftOp.getValueType();
SDValue ShAmt = ShiftOp.getOperand(1);
ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
- if (!ShAmtC || ShAmtC->getZExtValue() != VT.getScalarSizeInBits() - 1)
+ if (!ShAmtC || ShAmtC->getAPIntValue() != (VT.getScalarSizeInBits() - 1))
return SDValue();
// Eliminate the 'not' by adjusting the shift and add/sub constant:
@@ -2019,7 +2232,10 @@ static SDValue foldAddSubOfSignBit(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::ADD, DL, VT, NewShift, DAG.getConstant(NewC, DL, VT));
}
-SDValue DAGCombiner::visitADD(SDNode *N) {
+/// Try to fold a node that behaves like an ADD (note that N isn't necessarily
+/// an ISD::ADD here, it could for example be an ISD::OR if we know that there
+/// are no common bits set in the operands).
+SDValue DAGCombiner::visitADDLike(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
@@ -2058,13 +2274,22 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return N0;
if (isConstantOrConstantVector(N1, /* NoOpaque */ true)) {
+ // fold ((A-c1)+c2) -> (A+(c2-c1))
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaque */ true)) {
+ SDValue Sub = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N1.getNode(),
+ N0.getOperand(1).getNode());
+ assert(Sub && "Constant folding failed");
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), Sub);
+ }
+
// fold ((c1-A)+c2) -> (c1+c2)-A
if (N0.getOpcode() == ISD::SUB &&
isConstantOrConstantVector(N0.getOperand(0), /* NoOpaque */ true)) {
- // FIXME: Adding 2 constants should be handled by FoldConstantArithmetic.
- return DAG.getNode(ISD::SUB, DL, VT,
- DAG.getNode(ISD::ADD, DL, VT, N1, N0.getOperand(0)),
- N0.getOperand(1));
+ SDValue Add = DAG.FoldConstantArithmetic(ISD::ADD, DL, VT, N1.getNode(),
+ N0.getOperand(0).getNode());
+ assert(Add && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
}
// add (sext i1 X), 1 -> zext (not i1 X)
@@ -2097,9 +2322,10 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
return NewSel;
// reassociate add
- if (SDValue RADD = ReassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
- return RADD;
-
+ if (!reassociationCanBreakAddressingModePattern(ISD::ADD, DL, N0, N1)) {
+ if (SDValue RADD = reassociateOps(ISD::ADD, DL, N0, N1, N->getFlags()))
+ return RADD;
+ }
// fold ((0-A) + B) -> B-A
if (N0.getOpcode() == ISD::SUB && isNullOrNullSplat(N0.getOperand(0)))
return DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
@@ -2116,6 +2342,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
return N0.getOperand(0);
+ // fold ((A-B)+(C-A)) -> (C-B)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(0) == N1.getOperand(1))
+ return DAG.getNode(ISD::SUB, DL, VT, N1.getOperand(0),
+ N0.getOperand(1));
+
+ // fold ((A-B)+(B-C)) -> (A-C)
+ if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB &&
+ N0.getOperand(1) == N1.getOperand(0))
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0),
+ N1.getOperand(1));
+
// fold (A+(B-(A+C))) to (B-C)
if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
N0 == N1.getOperand(1).getOperand(0))
@@ -2148,31 +2386,93 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
}
+ // fold (add (umax X, C), -C) --> (usubsat X, C)
+ if (N0.getOpcode() == ISD::UMAX && hasOperation(ISD::USUBSAT, VT)) {
+ auto MatchUSUBSAT = [](ConstantSDNode *Max, ConstantSDNode *Op) {
+ return (!Max && !Op) ||
+ (Max && Op && Max->getAPIntValue() == (-Op->getAPIntValue()));
+ };
+ if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchUSUBSAT,
+ /*AllowUndefs*/ true))
+ return DAG.getNode(ISD::USUBSAT, DL, VT, N0.getOperand(0),
+ N0.getOperand(1));
+ }
+
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
+ if (isOneOrOneSplat(N1)) {
+ // fold (add (xor a, -1), 1) -> (sub 0, a)
+ if (isBitwiseNot(N0))
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
+ N0.getOperand(0));
+
+ // fold (add (add (xor a, -1), b), 1) -> (sub b, a)
+ if (N0.getOpcode() == ISD::ADD ||
+ N0.getOpcode() == ISD::UADDO ||
+ N0.getOpcode() == ISD::SADDO) {
+ SDValue A, Xor;
+
+ if (isBitwiseNot(N0.getOperand(0))) {
+ A = N0.getOperand(1);
+ Xor = N0.getOperand(0);
+ } else if (isBitwiseNot(N0.getOperand(1))) {
+ A = N0.getOperand(0);
+ Xor = N0.getOperand(1);
+ }
+
+ if (Xor)
+ return DAG.getNode(ISD::SUB, DL, VT, A, Xor.getOperand(0));
+ }
+
+ // Look for:
+ // add (add x, y), 1
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::ADD) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(1), Not);
+ }
+ }
+
+ // (x - y) + -1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isAllOnesOrAllOnesSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ if (SDValue Combined = visitADDLikeCommutative(N0, N1, N))
+ return Combined;
+
+ if (SDValue Combined = visitADDLikeCommutative(N1, N0, N))
+ return Combined;
+
+ return SDValue();
+}
+
+SDValue DAGCombiner::visitADD(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
+ SDLoc DL(N);
+
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
if (SDValue V = foldAddSubBoolOfMaskedVal(N, DAG))
return V;
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
- if (SimplifyDemandedBits(SDValue(N, 0)))
- return SDValue(N, 0);
-
// fold (a+b) -> (a|b) iff a and b share no bits.
if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
DAG.haveNoCommonBitsSet(N0, N1))
return DAG.getNode(ISD::OR, DL, VT, N0, N1);
- // fold (add (xor a, -1), 1) -> (sub 0, a)
- if (isBitwiseNot(N0) && isOneOrOneSplat(N1))
- return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
-
- if (SDValue Combined = visitADDLike(N0, N1, N))
- return Combined;
-
- if (SDValue Combined = visitADDLike(N1, N0, N))
- return Combined;
-
return SDValue();
}
@@ -2246,6 +2546,10 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
V.getOpcode() != ISD::UADDO && V.getOpcode() != ISD::USUBO)
return SDValue();
+ EVT VT = V.getNode()->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(V.getOpcode(), VT))
+ return SDValue();
+
// If the result is masked, then no matter what kind of bool it is we can
// return. If it isn't, then we need to make sure the bool type is either 0 or
// 1 and not other values.
@@ -2257,7 +2561,26 @@ static SDValue getAsCarry(const TargetLowering &TLI, SDValue V) {
return SDValue();
}
-SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference) {
+/// Given the operands of an add/sub operation, see if the 2nd operand is a
+/// masked 0/1 whose source operand is actually known to be 0/-1. If so, invert
+/// the opcode and bypass the mask operation.
+static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1,
+ SelectionDAG &DAG, const SDLoc &DL) {
+ if (N1.getOpcode() != ISD::AND || !isOneOrOneSplat(N1->getOperand(1)))
+ return SDValue();
+
+ EVT VT = N0.getValueType();
+ if (DAG.ComputeNumSignBits(N1.getOperand(0)) != VT.getScalarSizeInBits())
+ return SDValue();
+
+ // add N0, (and (AssertSext X, i1), 1) --> sub N0, X
+ // sub N0, (and (AssertSext X, i1), 1) --> add N0, X
+ return DAG.getNode(IsAdd ? ISD::SUB : ISD::ADD, DL, VT, N0, N1.getOperand(0));
+}
+
+/// Helper for doing combines based on N0 and N1 being added to each other.
+SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1,
+ SDNode *LocReference) {
EVT VT = N0.getValueType();
SDLoc DL(LocReference);
@@ -2269,21 +2592,42 @@ SDValue DAGCombiner::visitADDLike(SDValue N0, SDValue N1, SDNode *LocReference)
N1.getOperand(0).getOperand(1),
N1.getOperand(1)));
- if (N1.getOpcode() == ISD::AND) {
- SDValue AndOp0 = N1.getOperand(0);
- unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
- unsigned DestBits = VT.getScalarSizeInBits();
-
- // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
- // and similar xforms where the inner op is either ~0 or 0.
- if (NumSignBits == DestBits && isOneOrOneSplat(N1->getOperand(1)))
- return DAG.getNode(ISD::SUB, DL, VT, N0, AndOp0);
- }
+ if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL))
+ return V;
- // add (sext i1), X -> sub X, (zext i1)
+ // Look for:
+ // add (add x, 1), y
+ // And if the target does not like this form then turn into:
+ // sub y, (xor x, -1)
+ if (!TLI.preferIncOfAddToSubOfNot(VT) && N0.hasOneUse() &&
+ N0.getOpcode() == ISD::ADD && isOneOrOneSplat(N0.getOperand(1))) {
+ SDValue Not = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, Not);
+ }
+
+ // Hoist one-use subtraction by non-opaque constant:
+ // (x - C) + y -> (x + y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Add, N0.getOperand(1));
+ }
+ // Hoist one-use subtraction from non-opaque constant:
+ // (C - x) + y -> (y - x) + C
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N1, N0.getOperand(1));
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(0));
+ }
+
+ // If the target's bool is represented as 0/1, prefer to make this 'sub 0/1'
+ // rather than 'add 0/-1' (the zext should get folded).
+ // add (sext i1 Y), X --> sub X, (zext i1 Y)
if (N0.getOpcode() == ISD::SIGN_EXTEND &&
- N0.getOperand(0).getValueType() == MVT::i1 &&
- !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ N0.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) == TargetLowering::ZeroOrOneBooleanContent) {
SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
}
@@ -2344,8 +2688,10 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
return SDValue();
}
-static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
+static SDValue flipBoolean(SDValue V, const SDLoc &DL,
SelectionDAG &DAG, const TargetLowering &TLI) {
+ EVT VT = V.getValueType();
+
SDValue Cst;
switch (TLI.getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
@@ -2353,35 +2699,60 @@ static SDValue flipBoolean(SDValue V, const SDLoc &DL, EVT VT,
Cst = DAG.getConstant(1, DL, VT);
break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- Cst = DAG.getConstant(-1, DL, VT);
+ Cst = DAG.getAllOnesConstant(DL, VT);
break;
}
return DAG.getNode(ISD::XOR, DL, VT, V, Cst);
}
-static bool isBooleanFlip(SDValue V, EVT VT, const TargetLowering &TLI) {
- if (V.getOpcode() != ISD::XOR) return false;
- ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V.getOperand(1));
- if (!Const) return false;
+/**
+ * Flips a boolean if it is cheaper to compute. If the Force parameters is set,
+ * then the flip also occurs if computing the inverse is the same cost.
+ * This function returns an empty SDValue in case it cannot flip the boolean
+ * without increasing the cost of the computation. If you want to flip a boolean
+ * no matter what, use flipBoolean.
+ */
+static SDValue extractBooleanFlip(SDValue V, SelectionDAG &DAG,
+ const TargetLowering &TLI,
+ bool Force) {
+ if (Force && isa<ConstantSDNode>(V))
+ return flipBoolean(V, SDLoc(V), DAG, TLI);
+
+ if (V.getOpcode() != ISD::XOR)
+ return SDValue();
+
+ ConstantSDNode *Const = isConstOrConstSplat(V.getOperand(1), false);
+ if (!Const)
+ return SDValue();
+ EVT VT = V.getValueType();
+
+ bool IsFlip = false;
switch(TLI.getBooleanContents(VT)) {
case TargetLowering::ZeroOrOneBooleanContent:
- return Const->isOne();
+ IsFlip = Const->isOne();
+ break;
case TargetLowering::ZeroOrNegativeOneBooleanContent:
- return Const->isAllOnesValue();
+ IsFlip = Const->isAllOnesValue();
+ break;
case TargetLowering::UndefinedBooleanContent:
- return (Const->getAPIntValue() & 0x01) == 1;
+ IsFlip = (Const->getAPIntValue() & 0x01) == 1;
+ break;
}
- llvm_unreachable("Unsupported boolean content");
+
+ if (IsFlip)
+ return V.getOperand(0);
+ if (Force)
+ return flipBoolean(V, SDLoc(V), DAG, TLI);
+ return SDValue();
}
-SDValue DAGCombiner::visitUADDO(SDNode *N) {
+SDValue DAGCombiner::visitADDO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
+ bool IsSigned = (ISD::SADDO == N->getOpcode());
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
@@ -2392,40 +2763,42 @@ SDValue DAGCombiner::visitUADDO(SDNode *N) {
DAG.getUNDEF(CarryVT));
// canonicalize constant to RHS.
- ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- if (N0C && !N1C)
- return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N1, N0);
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
- // fold (uaddo x, 0) -> x + no carry out
- if (isNullConstant(N1))
+ // fold (addo x, 0) -> x + no carry out
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
- // If it cannot overflow, transform into an add.
- if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
- return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
- DAG.getConstant(0, DL, CarryVT));
+ if (!IsSigned) {
+ // If it cannot overflow, transform into an add.
+ if (DAG.computeOverflowKind(N0, N1) == SelectionDAG::OFK_Never)
+ return CombineTo(N, DAG.getNode(ISD::ADD, DL, VT, N0, N1),
+ DAG.getConstant(0, DL, CarryVT));
- // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
- if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
- SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
- DAG.getConstant(0, DL, VT),
- N0.getOperand(0));
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
- }
+ // fold (uaddo (xor a, -1), 1) -> (usub 0, a) and flip carry.
+ if (isBitwiseNot(N0) && isOneOrOneSplat(N1)) {
+ SDValue Sub = DAG.getNode(ISD::USUBO, DL, N->getVTList(),
+ DAG.getConstant(0, DL, VT), N0.getOperand(0));
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ }
- if (SDValue Combined = visitUADDOLike(N0, N1, N))
- return Combined;
+ if (SDValue Combined = visitUADDOLike(N0, N1, N))
+ return Combined;
- if (SDValue Combined = visitUADDOLike(N1, N0, N))
- return Combined;
+ if (SDValue Combined = visitUADDOLike(N1, N0, N))
+ return Combined;
+ }
return SDValue();
}
SDValue DAGCombiner::visitUADDOLike(SDValue N0, SDValue N1, SDNode *N) {
- auto VT = N0.getValueType();
+ EVT VT = N0.getValueType();
+ if (VT.isVector())
+ return SDValue();
// (uaddo X, (addcarry Y, 0, Carry)) -> (addcarry X, Y, Carry)
// If Y + 1 cannot overflow.
@@ -2484,11 +2857,10 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return DAG.getNode(ISD::UADDO, DL, N->getVTList(), N0, N1);
}
- EVT CarryVT = CarryIn.getValueType();
-
// fold (addcarry 0, 0, X) -> (and (ext/trunc X), 1) and no carry.
if (isNullConstant(N0) && isNullConstant(N1)) {
EVT VT = N0.getValueType();
+ EVT CarryVT = CarryIn.getValueType();
SDValue CarryExt = DAG.getBoolExtOrTrunc(CarryIn, DL, VT, CarryVT);
AddToWorklist(CarryExt.getNode());
return CombineTo(N, DAG.getNode(ISD::AND, DL, VT, CarryExt,
@@ -2496,16 +2868,6 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
DAG.getConstant(0, DL, CarryVT));
}
- // fold (addcarry (xor a, -1), 0, !b) -> (subcarry 0, a, b) and flip carry.
- if (isBitwiseNot(N0) && isNullConstant(N1) &&
- isBooleanFlip(CarryIn, CarryVT, TLI)) {
- SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(),
- DAG.getConstant(0, DL, N0.getValueType()),
- N0.getOperand(0), CarryIn.getOperand(0));
- return CombineTo(N, Sub,
- flipBoolean(Sub.getValue(1), DL, CarryVT, DAG, TLI));
- }
-
if (SDValue Combined = visitADDCARRYLike(N0, N1, CarryIn, N))
return Combined;
@@ -2515,12 +2877,112 @@ SDValue DAGCombiner::visitADDCARRY(SDNode *N) {
return SDValue();
}
+/**
+ * If we are facing some sort of diamond carry propapagtion pattern try to
+ * break it up to generate something like:
+ * (addcarry X, 0, (addcarry A, B, Z):Carry)
+ *
+ * The end result is usually an increase in operation required, but because the
+ * carry is now linearized, other tranforms can kick in and optimize the DAG.
+ *
+ * Patterns typically look something like
+ * (uaddo A, B)
+ * / \
+ * Carry Sum
+ * | \
+ * | (addcarry *, 0, Z)
+ * | /
+ * \ Carry
+ * | /
+ * (addcarry X, *, *)
+ *
+ * But numerous variation exist. Our goal is to identify A, B, X and Z and
+ * produce a combine with a single path for carry propagation.
+ */
+static SDValue combineADDCARRYDiamond(DAGCombiner &Combiner, SelectionDAG &DAG,
+ SDValue X, SDValue Carry0, SDValue Carry1,
+ SDNode *N) {
+ if (Carry1.getResNo() != 1 || Carry0.getResNo() != 1)
+ return SDValue();
+ if (Carry1.getOpcode() != ISD::UADDO)
+ return SDValue();
+
+ SDValue Z;
+
+ /**
+ * First look for a suitable Z. It will present itself in the form of
+ * (addcarry Y, 0, Z) or its equivalent (uaddo Y, 1) for Z=true
+ */
+ if (Carry0.getOpcode() == ISD::ADDCARRY &&
+ isNullConstant(Carry0.getOperand(1))) {
+ Z = Carry0.getOperand(2);
+ } else if (Carry0.getOpcode() == ISD::UADDO &&
+ isOneConstant(Carry0.getOperand(1))) {
+ EVT VT = Combiner.getSetCCResultType(Carry0.getValueType());
+ Z = DAG.getConstant(1, SDLoc(Carry0.getOperand(1)), VT);
+ } else {
+ // We couldn't find a suitable Z.
+ return SDValue();
+ }
+
+
+ auto cancelDiamond = [&](SDValue A,SDValue B) {
+ SDLoc DL(N);
+ SDValue NewY = DAG.getNode(ISD::ADDCARRY, DL, Carry0->getVTList(), A, B, Z);
+ Combiner.AddToWorklist(NewY.getNode());
+ return DAG.getNode(ISD::ADDCARRY, DL, N->getVTList(), X,
+ DAG.getConstant(0, DL, X.getValueType()),
+ NewY.getValue(1));
+ };
+
+ /**
+ * (uaddo A, B)
+ * |
+ * Sum
+ * |
+ * (addcarry *, 0, Z)
+ */
+ if (Carry0.getOperand(0) == Carry1.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry1.getOperand(1));
+ }
+
+ /**
+ * (addcarry A, 0, Z)
+ * |
+ * Sum
+ * |
+ * (uaddo *, B)
+ */
+ if (Carry1.getOperand(0) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry0.getOperand(0), Carry1.getOperand(1));
+ }
+
+ if (Carry1.getOperand(1) == Carry0.getValue(0)) {
+ return cancelDiamond(Carry1.getOperand(0), Carry0.getOperand(0));
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
SDNode *N) {
+ // fold (addcarry (xor a, -1), b, c) -> (subcarry b, a, !c) and flip carry.
+ if (isBitwiseNot(N0))
+ if (SDValue NotC = extractBooleanFlip(CarryIn, DAG, TLI, true)) {
+ SDLoc DL(N);
+ SDValue Sub = DAG.getNode(ISD::SUBCARRY, DL, N->getVTList(), N1,
+ N0.getOperand(0), NotC);
+ return CombineTo(N, Sub,
+ flipBoolean(Sub.getValue(1), DL, DAG, TLI));
+ }
+
// Iff the flag result is dead:
// (addcarry (add|uaddo X, Y), 0, Carry) -> (addcarry X, Y, Carry)
+ // Don't do this if the Carry comes from the uaddo. It won't remove the uaddo
+ // or the dependency between the instructions.
if ((N0.getOpcode() == ISD::ADD ||
- (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0)) &&
+ (N0.getOpcode() == ISD::UADDO && N0.getResNo() == 0 &&
+ N0.getValue(1) != CarryIn)) &&
isNullConstant(N1) && !N->hasAnyUseOfValue(1))
return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(),
N0.getOperand(0), N0.getOperand(1), CarryIn);
@@ -2529,35 +2991,13 @@ SDValue DAGCombiner::visitADDCARRYLike(SDValue N0, SDValue N1, SDValue CarryIn,
* When one of the addcarry argument is itself a carry, we may be facing
* a diamond carry propagation. In which case we try to transform the DAG
* to ensure linear carry propagation if that is possible.
- *
- * We are trying to get:
- * (addcarry X, 0, (addcarry A, B, Z):Carry)
*/
if (auto Y = getAsCarry(TLI, N1)) {
- /**
- * (uaddo A, B)
- * / \
- * Carry Sum
- * | \
- * | (addcarry *, 0, Z)
- * | /
- * \ Carry
- * | /
- * (addcarry X, *, *)
- */
- if (Y.getOpcode() == ISD::UADDO &&
- CarryIn.getResNo() == 1 &&
- CarryIn.getOpcode() == ISD::ADDCARRY &&
- isNullConstant(CarryIn.getOperand(1)) &&
- CarryIn.getOperand(0) == Y.getValue(0)) {
- auto NewY = DAG.getNode(ISD::ADDCARRY, SDLoc(N), Y->getVTList(),
- Y.getOperand(0), Y.getOperand(1),
- CarryIn.getOperand(2));
- AddToWorklist(NewY.getNode());
- return DAG.getNode(ISD::ADDCARRY, SDLoc(N), N->getVTList(), N0,
- DAG.getConstant(0, SDLoc(N), N0.getValueType()),
- NewY.getValue(1));
- }
+ // Because both are carries, Y and Z can be swapped.
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, Y, CarryIn, N))
+ return R;
+ if (auto R = combineADDCARRYDiamond(*this, DAG, N0, CarryIn, Y, N))
+ return R;
}
return SDValue();
@@ -2620,7 +3060,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// -(X >>s 31) -> (X >>u 31)
if (N1->getOpcode() == ISD::SRA || N1->getOpcode() == ISD::SRL) {
ConstantSDNode *ShiftAmt = isConstOrConstSplat(N1.getOperand(1));
- if (ShiftAmt && ShiftAmt->getZExtValue() == BitWidth - 1) {
+ if (ShiftAmt && ShiftAmt->getAPIntValue() == (BitWidth - 1)) {
auto NewSh = N1->getOpcode() == ISD::SRA ? ISD::SRL : ISD::SRA;
if (!LegalOperations || TLI.isOperationLegal(NewSh, VT))
return DAG.getNode(NewSh, DL, VT, N1.getOperand(0), N1.getOperand(1));
@@ -2662,16 +3102,48 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
return N0.getOperand(0);
+ // fold (A+C1)-C2 -> A+(C1-C2)
+ if (N0.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::SUB, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(0), NewC);
+ }
+
// fold C2-(A+C1) -> (C2-C1)-A
if (N1.getOpcode() == ISD::ADD) {
SDValue N11 = N1.getOperand(1);
if (isConstantOrConstantVector(N0, /* NoOpaques */ true) &&
isConstantOrConstantVector(N11, /* NoOpaques */ true)) {
- SDValue NewC = DAG.getNode(ISD::SUB, DL, VT, N0, N11);
+ SDValue NewC = DAG.FoldConstantArithmetic(ISD::SUB, DL, VT, N0.getNode(),
+ N11.getNode());
+ assert(NewC && "Constant folding failed");
return DAG.getNode(ISD::SUB, DL, VT, NewC, N1.getOperand(0));
}
}
+ // fold (A-C1)-C2 -> A-(C1+C2)
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(1), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::ADD, DL, VT, N0.getOperand(1).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), NewC);
+ }
+
+ // fold (c1-A)-c2 -> (c1-c2)-A
+ if (N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N1, /* NoOpaques */ true) &&
+ isConstantOrConstantVector(N0.getOperand(0), /* NoOpaques */ true)) {
+ SDValue NewC = DAG.FoldConstantArithmetic(
+ ISD::SUB, DL, VT, N0.getOperand(0).getNode(), N1.getNode());
+ assert(NewC && "Constant folding failed");
+ return DAG.getNode(ISD::SUB, DL, VT, NewC, N0.getOperand(1));
+ }
+
// fold ((A+(B+or-C))-B) -> A+or-C
if (N0.getOpcode() == ISD::ADD &&
(N0.getOperand(1).getOpcode() == ISD::SUB ||
@@ -2728,6 +3200,63 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (SDValue V = foldAddSubOfSignBit(N, DAG))
return V;
+ if (SDValue V = foldAddSubMasked1(false, N0, N1, DAG, SDLoc(N)))
+ return V;
+
+ // (x - y) - 1 -> add (xor y, -1), x
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB && isOneOrOneSplat(N1)) {
+ SDValue Xor = DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ return DAG.getNode(ISD::ADD, DL, VT, Xor, N0.getOperand(0));
+ }
+
+ // Look for:
+ // sub y, (xor x, -1)
+ // And if the target does not like this form then turn into:
+ // add (add x, y), 1
+ if (TLI.preferIncOfAddToSubOfNot(VT) && N1.hasOneUse() && isBitwiseNot(N1)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, Add, DAG.getConstant(1, DL, VT));
+ }
+
+ // Hoist one-use addition by non-opaque constant:
+ // (x + C) - y -> (x - y) + C
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::ADD, DL, VT, Sub, N0.getOperand(1));
+ }
+ // y - (x + C) -> (y - x) - C
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::ADD &&
+ isConstantOrConstantVector(N1.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, N1.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N1.getOperand(1));
+ }
+ // (x - C) - y -> (x - y) - C
+ // This is necessary because SUB(X,C) -> ADD(X,-C) doesn't work for vectors.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(1), /*NoOpaques=*/true)) {
+ SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, Sub, N0.getOperand(1));
+ }
+ // (C - x) - y -> C - (x + y)
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::SUB &&
+ isConstantOrConstantVector(N0.getOperand(0), /*NoOpaques=*/true)) {
+ SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1), N1);
+ return DAG.getNode(ISD::SUB, DL, VT, N0.getOperand(0), Add);
+ }
+
+ // If the target's bool is represented as 0/-1, prefer to make this 'add 0/-1'
+ // rather than 'sub 0/1' (the sext should get folded).
+ // sub X, (zext i1 Y) --> add X, (sext i1 Y)
+ if (N1.getOpcode() == ISD::ZERO_EXTEND &&
+ N1.getOperand(0).getScalarValueSizeInBits() == 1 &&
+ TLI.getBooleanContents(VT) ==
+ TargetLowering::ZeroOrNegativeOneBooleanContent) {
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, N1.getOperand(0));
+ return DAG.getNode(ISD::ADD, DL, VT, N0, SExt);
+ }
+
// fold Y = sra (X, size(X)-1); sub (xor (X, Y), Y) -> (abs X)
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT)) {
if (N0.getOpcode() == ISD::XOR && N1.getOpcode() == ISD::SRA) {
@@ -2772,7 +3301,8 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
if (!LegalOperations && N1.getOpcode() == ISD::SRL && N1.hasOneUse()) {
SDValue ShAmt = N1.getOperand(1);
ConstantSDNode *ShAmtC = isConstOrConstSplat(ShAmt);
- if (ShAmtC && ShAmtC->getZExtValue() == N1.getScalarValueSizeInBits() - 1) {
+ if (ShAmtC &&
+ ShAmtC->getAPIntValue() == (N1.getScalarValueSizeInBits() - 1)) {
SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, N1.getOperand(0), ShAmt);
return DAG.getNode(ISD::ADD, DL, VT, N0, SRA);
}
@@ -2846,12 +3376,11 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitUSUBO(SDNode *N) {
+SDValue DAGCombiner::visitSUBO(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
EVT VT = N0.getValueType();
- if (VT.isVector())
- return SDValue();
+ bool IsSigned = (ISD::SSUBO == N->getOpcode());
EVT CarryVT = N->getValueType(1);
SDLoc DL(N);
@@ -2861,17 +3390,25 @@ SDValue DAGCombiner::visitUSUBO(SDNode *N) {
return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
DAG.getUNDEF(CarryVT));
- // fold (usubo x, x) -> 0 + no borrow
+ // fold (subo x, x) -> 0 + no borrow
if (N0 == N1)
return CombineTo(N, DAG.getConstant(0, DL, VT),
DAG.getConstant(0, DL, CarryVT));
- // fold (usubo x, 0) -> x + no borrow
- if (isNullConstant(N1))
+ ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
+
+ // fold (subox, c) -> (addo x, -c)
+ if (IsSigned && N1C && !N1C->getAPIntValue().isMinSignedValue()) {
+ return DAG.getNode(ISD::SADDO, DL, N->getVTList(), N0,
+ DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
+ }
+
+ // fold (subo x, 0) -> x + no borrow
+ if (isNullOrNullSplat(N1))
return CombineTo(N, N0, DAG.getConstant(0, DL, CarryVT));
// Canonicalize (usubo -1, x) -> ~x, i.e. (xor x, -1) + no borrow
- if (isAllOnesConstant(N0))
+ if (!IsSigned && isAllOnesOrAllOnesSplat(N0))
return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
DAG.getConstant(0, DL, CarryVT));
@@ -3012,13 +3549,13 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
MathOp = ISD::SUB;
if (MathOp != ISD::DELETED_NODE) {
- unsigned ShAmt = MathOp == ISD::ADD ? (MulC - 1).logBase2()
- : (MulC + 1).logBase2();
- assert(ShAmt > 0 && ShAmt < VT.getScalarSizeInBits() &&
- "Not expecting multiply-by-constant that could have simplified");
+ unsigned ShAmt =
+ MathOp == ISD::ADD ? (MulC - 1).logBase2() : (MulC + 1).logBase2();
+ assert(ShAmt < VT.getScalarSizeInBits() &&
+ "multiply-by-constant generated out of bounds shift");
SDLoc DL(N);
- SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, N0,
- DAG.getConstant(ShAmt, DL, VT));
+ SDValue Shl =
+ DAG.getNode(ISD::SHL, DL, VT, N0, DAG.getConstant(ShAmt, DL, VT));
SDValue R = DAG.getNode(MathOp, DL, VT, Shl, N0);
if (ConstValue1.isNegative())
R = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), R);
@@ -3069,7 +3606,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
N0.getOperand(1), N1));
// reassociate mul
- if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RMUL = reassociateOps(ISD::MUL, SDLoc(N), N0, N1, N->getFlags()))
return RMUL;
return SDValue();
@@ -3612,7 +4149,6 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
// fold (mulhu x, (1 << c)) -> x >> (bitwidth - c)
if (isConstantOrConstantVector(N1, /*NoOpaques*/ true) &&
DAG.isKnownToBeAPowerOfTwo(N1) && hasOperation(ISD::SRL, VT)) {
- SDLoc DL(N);
unsigned NumEltBits = VT.getScalarSizeInBits();
SDValue LogBase2 = BuildLogBase2(N1, DL);
SDValue SRLAmt = DAG.getNode(
@@ -3753,22 +4289,14 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitSMULO(SDNode *N) {
- // (smulo x, 2) -> (saddo x, x)
- if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
- if (C2->getAPIntValue() == 2)
- return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
- N->getOperand(0), N->getOperand(0));
-
- return SDValue();
-}
+SDValue DAGCombiner::visitMULO(SDNode *N) {
+ bool IsSigned = (ISD::SMULO == N->getOpcode());
-SDValue DAGCombiner::visitUMULO(SDNode *N) {
- // (umulo x, 2) -> (uaddo x, x)
- if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
+ // (mulo x, 2) -> (addo x, x)
+ if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
if (C2->getAPIntValue() == 2)
- return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
- N->getOperand(0), N->getOperand(0));
+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
+ N->getVTList(), N->getOperand(0), N->getOperand(0));
return SDValue();
}
@@ -4075,6 +4603,33 @@ SDValue DAGCombiner::foldLogicOfSetCCs(bool IsAnd, SDValue N0, SDValue N1,
SDValue Zero = DAG.getConstant(0, DL, OpVT);
return DAG.getSetCC(DL, VT, Or, Zero, CC1);
}
+
+ // Turn compare of constants whose difference is 1 bit into add+and+setcc.
+ // TODO - support non-uniform vector amounts.
+ if ((IsAnd && CC1 == ISD::SETNE) || (!IsAnd && CC1 == ISD::SETEQ)) {
+ // Match a shared variable operand and 2 non-opaque constant operands.
+ ConstantSDNode *C0 = isConstOrConstSplat(LR);
+ ConstantSDNode *C1 = isConstOrConstSplat(RR);
+ if (LL == RL && C0 && C1 && !C0->isOpaque() && !C1->isOpaque()) {
+ // Canonicalize larger constant as C0.
+ if (C1->getAPIntValue().ugt(C0->getAPIntValue()))
+ std::swap(C0, C1);
+
+ // The difference of the constants must be a single bit.
+ const APInt &C0Val = C0->getAPIntValue();
+ const APInt &C1Val = C1->getAPIntValue();
+ if ((C0Val - C1Val).isPowerOf2()) {
+ // and/or (setcc X, C0, ne), (setcc X, C1, ne/eq) -->
+ // setcc ((add X, -C1), ~(C0 - C1)), 0, ne/eq
+ SDValue OffsetC = DAG.getConstant(-C1Val, DL, OpVT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LL, OffsetC);
+ SDValue MaskC = DAG.getConstant(~(C0Val - C1Val), DL, OpVT);
+ SDValue And = DAG.getNode(ISD::AND, DL, OpVT, Add, MaskC);
+ SDValue Zero = DAG.getConstant(0, DL, OpVT);
+ return DAG.getSetCC(DL, VT, And, Zero, CC0);
+ }
+ }
+ }
}
// Canonicalize equivalent operands to LL == RL.
@@ -4259,7 +4814,8 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
// Ensure that this isn't going to produce an unsupported unaligned access.
if (ShAmt &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
- LDST->getAddressSpace(), ShAmt / 8))
+ LDST->getAddressSpace(), ShAmt / 8,
+ LDST->getMemOperand()->getFlags()))
return false;
// It's not possible to generate a constant of extended or untyped type.
@@ -4316,9 +4872,7 @@ bool DAGCombiner::SearchForAndLoads(SDNode *N,
SDNode *&NodeToMask) {
// Recursively search for the operands, looking for loads which can be
// narrowed.
- for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) {
- SDValue Op = N->getOperand(i);
-
+ for (SDValue Op : N->op_values()) {
if (Op.getValueType().isVector())
return false;
@@ -4480,7 +5034,7 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
SDValue N1 = N->getOperand(1);
// Do we actually prefer shifts over mask?
- if (!TLI.preferShiftsToClearExtremeBits(N0))
+ if (!TLI.shouldFoldMaskToVariableShiftPair(N0))
return SDValue();
// Try to match (-1 '[outer] logical shift' y)
@@ -4575,7 +5129,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return NewSel;
// reassociate and
- if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue RAND = reassociateOps(ISD::AND, SDLoc(N), N0, N1, N->getFlags()))
return RAND;
// Try to convert a constant mask AND into a shuffle clear mask.
@@ -4644,24 +5198,22 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
// the first vector value and FF for the rest, repeating. We need a mask
// that will apply equally to all members of the vector, so AND all the
// lanes of the constant together.
- EVT VT = Vector->getValueType(0);
- unsigned BitWidth = VT.getScalarSizeInBits();
+ unsigned EltBitWidth = Vector->getValueType(0).getScalarSizeInBits();
// If the splat value has been compressed to a bitlength lower
// than the size of the vector lane, we need to re-expand it to
// the lane size.
- if (BitWidth > SplatBitSize)
- for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
- SplatBitSize < BitWidth;
- SplatBitSize = SplatBitSize * 2)
+ if (EltBitWidth > SplatBitSize)
+ for (SplatValue = SplatValue.zextOrTrunc(EltBitWidth);
+ SplatBitSize < EltBitWidth; SplatBitSize = SplatBitSize * 2)
SplatValue |= SplatValue.shl(SplatBitSize);
// Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
// multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
- if (SplatBitSize % BitWidth == 0) {
- Constant = APInt::getAllOnesValue(BitWidth);
- for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
- Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
+ if ((SplatBitSize % EltBitWidth) == 0) {
+ Constant = APInt::getAllOnesValue(EltBitWidth);
+ for (unsigned i = 0, n = (SplatBitSize / EltBitWidth); i < n; ++i)
+ Constant &= SplatValue.extractBits(EltBitWidth, i * EltBitWidth);
}
}
}
@@ -4773,44 +5325,29 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0);
// fold (zext_inreg (extload x)) -> (zextload x)
- if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT MemVT = LN0->getMemoryVT();
- // If we zero all the possible extended bits, then we can turn this into
- // a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getScalarValueSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarSizeInBits())) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
- TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
- AddToWorklist(N);
- CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
- }
- }
// fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
- if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
- N0.hasOneUse()) {
+ if (ISD::isUNINDEXEDLoad(N0.getNode()) &&
+ (ISD::isEXTLoad(N0.getNode()) ||
+ (ISD::isSEXTLoad(N0.getNode()) && N0.hasOneUse()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
// If we zero all the possible extended bits, then we can turn this into
// a zextload if we are running before legalize or the operation is legal.
- unsigned BitWidth = N1.getScalarValueSizeInBits();
- if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
- BitWidth - MemVT.getScalarSizeInBits())) &&
+ unsigned ExtBitSize = N1.getScalarValueSizeInBits();
+ unsigned MemBitSize = MemVT.getScalarSizeInBits();
+ APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
+ if (DAG.MaskedValueIsZero(N1, ExtBits) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
- LN0->getChain(), LN0->getBasePtr(),
- MemVT, LN0->getMemOperand());
+ SDValue ExtLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
+ LN0->getBasePtr(), MemVT, LN0->getMemOperand());
AddToWorklist(N);
CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
- return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+
// fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
@@ -5155,6 +5692,23 @@ SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *N) {
return SDValue();
}
+/// OR combines for which the commuted variant will be tried as well.
+static SDValue visitORCommutative(
+ SelectionDAG &DAG, SDValue N0, SDValue N1, SDNode *N) {
+ EVT VT = N0.getValueType();
+ if (N0.getOpcode() == ISD::AND) {
+ // fold (or (and X, (xor Y, -1)), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(1)) && N0.getOperand(1).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(0), N1);
+
+ // fold (or (and (xor Y, -1), X), Y) -> (or X, Y)
+ if (isBitwiseNot(N0.getOperand(0)) && N0.getOperand(0).getOperand(0) == N1)
+ return DAG.getNode(ISD::OR, SDLoc(N), VT, N0.getOperand(1), N1);
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5284,7 +5838,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return BSwap;
// reassociate or
- if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
+ if (SDValue ROR = reassociateOps(ISD::OR, SDLoc(N), N0, N1, N->getFlags()))
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
@@ -5302,6 +5856,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
}
+ if (SDValue Combined = visitORCommutative(DAG, N0, N1, N))
+ return Combined;
+ if (SDValue Combined = visitORCommutative(DAG, N1, N0, N))
+ return Combined;
+
// Simplify: (or (op x...), (op y...)) -> (op (or x, y))
if (N0.getOpcode() == N1.getOpcode())
if (SDValue V = hoistLogicOpWithSameOpcodeHands(N))
@@ -5318,6 +5877,12 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
+ // If OR can be rewritten into ADD, try combines based on ADD.
+ if ((!LegalOperations || TLI.isOperationLegal(ISD::ADD, VT)) &&
+ DAG.haveNoCommonBitsSet(N0, N1))
+ if (SDValue Combined = visitADDLike(N))
+ return Combined;
+
return SDValue();
}
@@ -5869,6 +6434,213 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
return None;
}
+static unsigned LittleEndianByteAt(unsigned BW, unsigned i) {
+ return i;
+}
+
+static unsigned BigEndianByteAt(unsigned BW, unsigned i) {
+ return BW - i - 1;
+}
+
+// Check if the bytes offsets we are looking at match with either big or
+// little endian value loaded. Return true for big endian, false for little
+// endian, and None if match failed.
+static Optional<bool> isBigEndian(const SmallVector<int64_t, 4> &ByteOffsets,
+ int64_t FirstOffset) {
+ // The endian can be decided only when it is 2 bytes at least.
+ unsigned Width = ByteOffsets.size();
+ if (Width < 2)
+ return None;
+
+ bool BigEndian = true, LittleEndian = true;
+ for (unsigned i = 0; i < Width; i++) {
+ int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
+ LittleEndian &= CurrentByteOffset == LittleEndianByteAt(Width, i);
+ BigEndian &= CurrentByteOffset == BigEndianByteAt(Width, i);
+ if (!BigEndian && !LittleEndian)
+ return None;
+ }
+
+ assert((BigEndian != LittleEndian) && "It should be either big endian or"
+ "little endian");
+ return BigEndian;
+}
+
+static SDValue stripTruncAndExt(SDValue Value) {
+ switch (Value.getOpcode()) {
+ case ISD::TRUNCATE:
+ case ISD::ZERO_EXTEND:
+ case ISD::SIGN_EXTEND:
+ case ISD::ANY_EXTEND:
+ return stripTruncAndExt(Value.getOperand(0));
+ }
+ return Value;
+}
+
+/// Match a pattern where a wide type scalar value is stored by several narrow
+/// stores. Fold it into a single store or a BSWAP and a store if the targets
+/// supports it.
+///
+/// Assuming little endian target:
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 0) & 0xFF;
+/// p[1] = (val >> 8) & 0xFF;
+/// p[2] = (val >> 16) & 0xFF;
+/// p[3] = (val >> 24) & 0xFF;
+/// =>
+/// *((i32)p) = val;
+///
+/// i8 *p = ...
+/// i32 val = ...
+/// p[0] = (val >> 24) & 0xFF;
+/// p[1] = (val >> 16) & 0xFF;
+/// p[2] = (val >> 8) & 0xFF;
+/// p[3] = (val >> 0) & 0xFF;
+/// =>
+/// *((i32)p) = BSWAP(val);
+SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
+ // Collect all the stores in the chain.
+ SDValue Chain;
+ SmallVector<StoreSDNode *, 8> Stores;
+ for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+ if (Store->getMemoryVT() != MVT::i8 ||
+ Store->isVolatile() || Store->isIndexed())
+ return SDValue();
+ Stores.push_back(Store);
+ Chain = Store->getChain();
+ }
+ // Handle the simple type only.
+ unsigned Width = Stores.size();
+ EVT VT = EVT::getIntegerVT(
+ *DAG.getContext(), Width * N->getMemoryVT().getSizeInBits());
+ if (VT != MVT::i16 && VT != MVT::i32 && VT != MVT::i64)
+ return SDValue();
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (LegalOperations && !TLI.isOperationLegal(ISD::STORE, VT))
+ return SDValue();
+
+ // Check if all the bytes of the combined value we are looking at are stored
+ // to the same base address. Collect bytes offsets from Base address into
+ // ByteOffsets.
+ SDValue CombinedValue;
+ SmallVector<int64_t, 4> ByteOffsets(Width, INT64_MAX);
+ int64_t FirstOffset = INT64_MAX;
+ StoreSDNode *FirstStore = nullptr;
+ Optional<BaseIndexOffset> Base;
+ for (auto Store : Stores) {
+ // All the stores store different byte of the CombinedValue. A truncate is
+ // required to get that byte value.
+ SDValue Trunc = Store->getValue();
+ if (Trunc.getOpcode() != ISD::TRUNCATE)
+ return SDValue();
+ // A shift operation is required to get the right byte offset, except the
+ // first byte.
+ int64_t Offset = 0;
+ SDValue Value = Trunc.getOperand(0);
+ if (Value.getOpcode() == ISD::SRL ||
+ Value.getOpcode() == ISD::SRA) {
+ ConstantSDNode *ShiftOffset =
+ dyn_cast<ConstantSDNode>(Value.getOperand(1));
+ // Trying to match the following pattern. The shift offset must be
+ // a constant and a multiple of 8. It is the byte offset in "y".
+ //
+ // x = srl y, offset
+ // i8 z = trunc x
+ // store z, ...
+ if (!ShiftOffset || (ShiftOffset->getSExtValue() % 8))
+ return SDValue();
+
+ Offset = ShiftOffset->getSExtValue()/8;
+ Value = Value.getOperand(0);
+ }
+
+ // Stores must share the same combined value with different offsets.
+ if (!CombinedValue)
+ CombinedValue = Value;
+ else if (stripTruncAndExt(CombinedValue) != stripTruncAndExt(Value))
+ return SDValue();
+
+ // The trunc and all the extend operation should be stripped to get the
+ // real value we are stored.
+ else if (CombinedValue.getValueType() != VT) {
+ if (Value.getValueType() == VT ||
+ Value.getValueSizeInBits() > CombinedValue.getValueSizeInBits())
+ CombinedValue = Value;
+ // Give up if the combined value type is smaller than the store size.
+ if (CombinedValue.getValueSizeInBits() < VT.getSizeInBits())
+ return SDValue();
+ }
+
+ // Stores must share the same base address
+ BaseIndexOffset Ptr = BaseIndexOffset::match(Store, DAG);
+ int64_t ByteOffsetFromBase = 0;
+ if (!Base)
+ Base = Ptr;
+ else if (!Base->equalBaseIndex(Ptr, DAG, ByteOffsetFromBase))
+ return SDValue();
+
+ // Remember the first byte store
+ if (ByteOffsetFromBase < FirstOffset) {
+ FirstStore = Store;
+ FirstOffset = ByteOffsetFromBase;
+ }
+ // Map the offset in the store and the offset in the combined value, and
+ // early return if it has been set before.
+ if (Offset < 0 || Offset >= Width || ByteOffsets[Offset] != INT64_MAX)
+ return SDValue();
+ ByteOffsets[Offset] = ByteOffsetFromBase;
+ }
+
+ assert(FirstOffset != INT64_MAX && "First byte offset must be set");
+ assert(FirstStore && "First store must be set");
+
+ // Check if the bytes of the combined value we are looking at match with
+ // either big or little endian value store.
+ Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ if (!IsBigEndian.hasValue())
+ return SDValue();
+
+ // The node we are looking at matches with the pattern, check if we can
+ // replace it with a single bswap if needed and store.
+
+ // If the store needs byte swap check if the target supports it
+ bool NeedsBswap = DAG.getDataLayout().isBigEndian() != *IsBigEndian;
+
+ // Before legalize we can introduce illegal bswaps which will be later
+ // converted to an explicit bswap sequence. This way we end up with a single
+ // store and byte shuffling instead of several stores and byte shuffling.
+ if (NeedsBswap && LegalOperations && !TLI.isOperationLegal(ISD::BSWAP, VT))
+ return SDValue();
+
+ // Check that a store of the wide type is both allowed and fast on the target
+ bool Fast = false;
+ bool Allowed =
+ TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *FirstStore->getMemOperand(), &Fast);
+ if (!Allowed || !Fast)
+ return SDValue();
+
+ if (VT != CombinedValue.getValueType()) {
+ assert(CombinedValue.getValueType().getSizeInBits() > VT.getSizeInBits() &&
+ "Get unexpected store value to combine");
+ CombinedValue = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT,
+ CombinedValue);
+ }
+
+ if (NeedsBswap)
+ CombinedValue = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, CombinedValue);
+
+ SDValue NewStore =
+ DAG.getStore(Chain, SDLoc(N), CombinedValue, FirstStore->getBasePtr(),
+ FirstStore->getPointerInfo(), FirstStore->getAlignment());
+
+ // Rely on other DAG combine rules to remove the other individual stores.
+ DAG.ReplaceAllUsesWith(N, NewStore.getNode());
+ return NewStore;
+}
+
/// Match a pattern where a wide type scalar value is loaded by several narrow
/// loads and combined by shifts and ors. Fold it into a single load or a load
/// and a BSWAP if the targets supports it.
@@ -5916,11 +6688,6 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
if (LegalOperations && !TLI.isOperationLegal(ISD::LOAD, VT))
return SDValue();
- std::function<unsigned(unsigned, unsigned)> LittleEndianByteAt = [](
- unsigned BW, unsigned i) { return i; };
- std::function<unsigned(unsigned, unsigned)> BigEndianByteAt = [](
- unsigned BW, unsigned i) { return BW - i - 1; };
-
bool IsBigEndianTarget = DAG.getDataLayout().isBigEndian();
auto MemoryByteOffset = [&] (ByteProvider P) {
assert(P.isMemory() && "Must be a memory byte provider");
@@ -5987,15 +6754,10 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check if the bytes of the OR we are looking at match with either big or
// little endian value load
- bool BigEndian = true, LittleEndian = true;
- for (unsigned i = 0; i < ByteWidth; i++) {
- int64_t CurrentByteOffset = ByteOffsets[i] - FirstOffset;
- LittleEndian &= CurrentByteOffset == LittleEndianByteAt(ByteWidth, i);
- BigEndian &= CurrentByteOffset == BigEndianByteAt(ByteWidth, i);
- if (!BigEndian && !LittleEndian)
- return SDValue();
- }
- assert((BigEndian != LittleEndian) && "should be either or");
+ Optional<bool> IsBigEndian = isBigEndian(ByteOffsets, FirstOffset);
+ if (!IsBigEndian.hasValue())
+ return SDValue();
+
assert(FirstByteProvider && "must be set");
// Ensure that the first byte is loaded from zero offset of the first load.
@@ -6008,7 +6770,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// replace it with a single load and bswap if needed.
// If the load needs byte swap check if the target supports it
- bool NeedsBswap = IsBigEndianTarget != BigEndian;
+ bool NeedsBswap = IsBigEndianTarget != *IsBigEndian;
// Before legalize we can introduce illegal bswaps which will be later
// converted to an explicit bswap sequence. This way we end up with a single
@@ -6019,8 +6781,7 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
// Check that a load of the wide type is both allowed and fast on the target
bool Fast = false;
bool Allowed = TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(),
- VT, FirstLoad->getAddressSpace(),
- FirstLoad->getAlignment(), &Fast);
+ VT, *FirstLoad->getMemOperand(), &Fast);
if (!Allowed || !Fast)
return SDValue();
@@ -6160,7 +6921,7 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return NewSel;
// reassociate xor
- if (SDValue RXOR = ReassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
+ if (SDValue RXOR = reassociateOps(ISD::XOR, DL, N0, N1, N->getFlags()))
return RXOR;
// fold !(x cc y) -> (x !cc y)
@@ -6218,6 +6979,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
}
}
+
+ // fold (not (neg x)) -> (add X, -1)
+ // FIXME: This can be generalized to (not (sub Y, X)) -> (add X, ~Y) if
+ // Y is a constant or the subtract has a single use.
+ if (isAllOnesConstant(N1) && N0.getOpcode() == ISD::SUB &&
+ isNullConstant(N0.getOperand(0))) {
+ return DAG.getNode(ISD::ADD, DL, VT, N0.getOperand(1),
+ DAG.getAllOnesConstant(DL, VT));
+ }
+
// fold (xor (and x, y), y) -> (and (not x), y)
if (N0Opcode == ISD::AND && N0.hasOneUse() && N0->getOperand(1) == N1) {
SDValue X = N0.getOperand(0);
@@ -6310,11 +7081,16 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
+/// We are looking for: (shift being one of shl/sra/srl)
+/// shift (binop X, C0), C1
+/// And want to transform into:
+/// binop (shift X, C1), (shift C0, C1)
SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
// Do not turn a 'not' into a regular xor.
if (isBitwiseNot(N->getOperand(0)))
return SDValue();
+ // The inner binop must be one-use, since we want to replace it.
SDNode *LHS = N->getOperand(0).getNode();
if (!LHS->hasOneUse()) return SDValue();
@@ -6322,56 +7098,43 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
// thing happens with address calculations, so it's important to canonicalize
// it.
- bool HighBitSet = false; // Can we transform this if the high bit is set?
-
switch (LHS->getOpcode()) {
- default: return SDValue();
+ default:
+ return SDValue();
case ISD::OR:
case ISD::XOR:
- HighBitSet = false; // We can only transform sra if the high bit is clear.
- break;
case ISD::AND:
- HighBitSet = true; // We can only transform sra if the high bit is set.
break;
case ISD::ADD:
if (N->getOpcode() != ISD::SHL)
return SDValue(); // only shl(add) not sr[al](add).
- HighBitSet = false; // We can only transform sra if the high bit is clear.
break;
}
// We require the RHS of the binop to be a constant and not opaque as well.
ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
- if (!BinOpCst) return SDValue();
+ if (!BinOpCst)
+ return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant
- // or is copy/select.Enable this in other cases when figure out it's exactly profitable.
- SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
- bool isShift = BinOpLHSVal->getOpcode() == ISD::SHL ||
- BinOpLHSVal->getOpcode() == ISD::SRA ||
- BinOpLHSVal->getOpcode() == ISD::SRL;
- bool isCopyOrSelect = BinOpLHSVal->getOpcode() == ISD::CopyFromReg ||
- BinOpLHSVal->getOpcode() == ISD::SELECT;
-
- if ((!isShift || !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) &&
- !isCopyOrSelect)
+ // or is copy/select. Enable this in other cases when figure out it's exactly
+ // profitable.
+ SDValue BinOpLHSVal = LHS->getOperand(0);
+ bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
+ BinOpLHSVal.getOpcode() == ISD::SRA ||
+ BinOpLHSVal.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(BinOpLHSVal.getOperand(1));
+ bool IsCopyOrSelect = BinOpLHSVal.getOpcode() == ISD::CopyFromReg ||
+ BinOpLHSVal.getOpcode() == ISD::SELECT;
+
+ if (!IsShiftByConstant && !IsCopyOrSelect)
return SDValue();
- if (isCopyOrSelect && N->hasOneUse())
+ if (IsCopyOrSelect && N->hasOneUse())
return SDValue();
EVT VT = N->getValueType(0);
- // If this is a signed shift right, and the high bit is modified by the
- // logical operation, do not perform the transformation. The highBitSet
- // boolean indicates the value of the high bit of the constant which would
- // cause it to be modified for this operation.
- if (N->getOpcode() == ISD::SRA) {
- bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
- if (BinOpRHSSignSet != HighBitSet)
- return SDValue();
- }
-
if (!TLI.isDesirableToCommuteWithShift(N, Level))
return SDValue();
@@ -6395,11 +7158,12 @@ SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
assert(N->getOperand(0).getOpcode() == ISD::AND);
// (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
- if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
+ EVT TruncVT = N->getValueType(0);
+ if (N->hasOneUse() && N->getOperand(0).hasOneUse() &&
+ TLI.isTypeDesirableForOp(ISD::AND, TruncVT)) {
SDValue N01 = N->getOperand(0).getOperand(1);
if (isConstantOrConstantVector(N01, /* NoOpaques */ true)) {
SDLoc DL(N);
- EVT TruncVT = N->getValueType(0);
SDValue N00 = N->getOperand(0).getOperand(0);
SDValue Trunc00 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00);
SDValue Trunc01 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N01);
@@ -6431,6 +7195,7 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
}
// fold (rot x, c) -> (rot x, c % BitSize)
+ // TODO - support non-uniform vector amounts.
if (ConstantSDNode *Cst = isConstOrConstSplat(N1)) {
if (Cst->getAPIntValue().uge(Bitsize)) {
uint64_t RotAmt = Cst->getAPIntValue().urem(Bitsize);
@@ -6476,6 +7241,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return V;
EVT VT = N0.getValueType();
+ EVT ShiftVT = N1.getValueType();
unsigned OpSizeInBits = VT.getScalarSizeInBits();
// fold vector ops
@@ -6506,6 +7272,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (shl c1, c2) -> c1<<c2
+ // TODO - support non-uniform vector shift amounts.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
@@ -6517,6 +7284,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
if (DAG.MaskedValueIsZero(SDValue(N, 0),
APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, SDLoc(N), VT);
+
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -6524,6 +7292,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
}
+ // TODO - support non-uniform vector shift amounts.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -6548,69 +7317,86 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
};
if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchInRange)) {
SDLoc DL(N);
- EVT ShiftVT = N1.getValueType();
SDValue Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, N1, N0.getOperand(1));
return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Sum);
}
}
- // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // fold (shl (ext (shl x, c1)), c2) -> (shl (ext x), (add c1, c2))
// For this to be valid, the second form must not preserve any of the bits
// that are shifted out by the inner shift in the first form. This means
// the outer shift size must be >= the number of bits added by the ext.
// As a corollary, we don't care what kind of ext it is.
- if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
- N0.getOpcode() == ISD::ANY_EXTEND ||
- N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
N0.getOperand(0).getOpcode() == ISD::SHL) {
SDValue N0Op0 = N0.getOperand(0);
- if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- APInt c1 = N0Op0C1->getAPIntValue();
- APInt c2 = N1C->getAPIntValue();
- zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+ EVT InnerVT = N0Op0.getValueType();
+ uint64_t InnerBitwidth = InnerVT.getScalarSizeInBits();
- EVT InnerShiftVT = N0Op0.getValueType();
- uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
- if (c2.uge(OpSizeInBits - InnerShiftSize)) {
- SDLoc DL(N0);
- APInt Sum = c1 + c2;
- if (Sum.uge(OpSizeInBits))
- return DAG.getConstant(0, DL, VT);
+ auto MatchOutOfRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).uge(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchOutOfRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true))
+ return DAG.getConstant(0, SDLoc(N), VT);
- return DAG.getNode(
- ISD::SHL, DL, VT,
- DAG.getNode(N0.getOpcode(), DL, VT, N0Op0->getOperand(0)),
- DAG.getConstant(Sum.getZExtValue(), DL, N1.getValueType()));
- }
+ auto MatchInRange = [OpSizeInBits, InnerBitwidth](ConstantSDNode *LHS,
+ ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2, 1 /* Overflow Bit */);
+ return c2.uge(OpSizeInBits - InnerBitwidth) &&
+ (c1 + c2).ult(OpSizeInBits);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchInRange,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ SDValue Ext = DAG.getNode(N0.getOpcode(), DL, VT, N0Op0.getOperand(0));
+ SDValue Sum = DAG.getZExtOrTrunc(InnerShiftAmt, DL, ShiftVT);
+ Sum = DAG.getNode(ISD::ADD, DL, ShiftVT, Sum, N1);
+ return DAG.getNode(ISD::SHL, DL, VT, Ext, Sum);
}
}
// fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
// Only fold this if the inner zext has no other uses to avoid increasing
// the total number of instructions.
- if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
+ if (N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
SDValue N0Op0 = N0.getOperand(0);
- if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
- if (N0Op0C1->getAPIntValue().ult(VT.getScalarSizeInBits())) {
- uint64_t c1 = N0Op0C1->getZExtValue();
- uint64_t c2 = N1C->getZExtValue();
- if (c1 == c2) {
- SDValue NewOp0 = N0.getOperand(0);
- EVT CountVT = NewOp0.getOperand(1).getValueType();
- SDLoc DL(N);
- SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
- NewOp0,
- DAG.getConstant(c2, DL, CountVT));
- AddToWorklist(NewSHL.getNode());
- return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
- }
- }
+ SDValue InnerShiftAmt = N0Op0.getOperand(1);
+
+ auto MatchEqual = [VT](ConstantSDNode *LHS, ConstantSDNode *RHS) {
+ APInt c1 = LHS->getAPIntValue();
+ APInt c2 = RHS->getAPIntValue();
+ zeroExtendToMatch(c1, c2);
+ return c1.ult(VT.getScalarSizeInBits()) && (c1 == c2);
+ };
+ if (ISD::matchBinaryPredicate(InnerShiftAmt, N1, MatchEqual,
+ /*AllowUndefs*/ false,
+ /*AllowTypeMismatch*/ true)) {
+ SDLoc DL(N);
+ EVT InnerShiftAmtVT = N0Op0.getOperand(1).getValueType();
+ SDValue NewSHL = DAG.getZExtOrTrunc(N1, DL, InnerShiftAmtVT);
+ NewSHL = DAG.getNode(ISD::SHL, DL, N0Op0.getValueType(), N0Op0, NewSHL);
+ AddToWorklist(NewSHL.getNode());
+ return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
}
}
// fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2
// fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2
+ // TODO - support non-uniform vector shift amounts.
if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
N0->getFlags().hasExact()) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
@@ -6619,9 +7405,9 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
SDLoc DL(N);
if (C1 <= C2)
return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(C2 - C1, DL, N1.getValueType()));
+ DAG.getConstant(C2 - C1, DL, ShiftVT));
return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
- DAG.getConstant(C1 - C2, DL, N1.getValueType()));
+ DAG.getConstant(C1 - C2, DL, ShiftVT));
}
}
@@ -6629,11 +7415,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
// (and (srl x, (sub c1, c2), MASK)
// Only fold this if the inner shift has no other uses -- if it does, folding
// this will increase the total number of instructions.
+ // TODO - drop hasOneUse requirement if c1 == c2?
+ // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
- TLI.shouldFoldShiftPairToMask(N, Level)) {
+ TLI.shouldFoldConstantShiftPairToMask(N, Level)) {
if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
- uint64_t c1 = N0C1->getZExtValue();
- if (c1 < OpSizeInBits) {
+ if (N0C1->getAPIntValue().ult(OpSizeInBits)) {
+ uint64_t c1 = N0C1->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
SDValue Shift;
@@ -6641,12 +7429,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
Mask <<= c2 - c1;
SDLoc DL(N);
Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c2 - c1, DL, N1.getValueType()));
+ DAG.getConstant(c2 - c1, DL, ShiftVT));
} else {
Mask.lshrInPlace(c1 - c2);
SDLoc DL(N);
Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
- DAG.getConstant(c1 - c2, DL, N1.getValueType()));
+ DAG.getConstant(c1 - c2, DL, ShiftVT));
}
SDLoc DL(N0);
return DAG.getNode(ISD::AND, DL, VT, Shift,
@@ -6719,6 +7507,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (sra c1, c2) -> (sra c1, c2)
+ // TODO - support non-uniform vector shift amounts.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
@@ -6815,32 +7604,32 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
}
+ // fold (sra (trunc (sra x, c1)), c2) -> (trunc (sra x, c1 + c2))
// fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
// if c1 is equal to the number of bits the trunc removes
+ // TODO - support non-uniform vector shift amounts.
if (N0.getOpcode() == ISD::TRUNCATE &&
(N0.getOperand(0).getOpcode() == ISD::SRL ||
N0.getOperand(0).getOpcode() == ISD::SRA) &&
N0.getOperand(0).hasOneUse() &&
- N0.getOperand(0).getOperand(1).hasOneUse() &&
- N1C) {
+ N0.getOperand(0).getOperand(1).hasOneUse() && N1C) {
SDValue N0Op0 = N0.getOperand(0);
if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
- unsigned LargeShiftVal = LargeShift->getZExtValue();
EVT LargeVT = N0Op0.getValueType();
-
- if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
+ unsigned TruncBits = LargeVT.getScalarSizeInBits() - OpSizeInBits;
+ if (LargeShift->getAPIntValue() == TruncBits) {
SDLoc DL(N);
- SDValue Amt =
- DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
- getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
- SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
- N0Op0.getOperand(0), Amt);
+ SDValue Amt = DAG.getConstant(N1C->getZExtValue() + TruncBits, DL,
+ getShiftAmountTy(LargeVT));
+ SDValue SRA =
+ DAG.getNode(ISD::SRA, DL, LargeVT, N0Op0.getOperand(0), Amt);
return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
}
}
}
// Simplify, based on bits shifted out of the LHS.
+ // TODO - support non-uniform vector shift amounts.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -6872,6 +7661,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ConstantSDNode *N1C = isConstOrConstSplat(N1);
// fold (srl c1, c2) -> c1 >>u c2
+ // TODO - support non-uniform vector shift amounts.
ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
if (N0C && N1C && !N1C->isOpaque())
return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
@@ -6912,6 +7702,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
N0.getOperand(0).getOpcode() == ISD::SRL) {
if (auto N001C = isConstOrConstSplat(N0.getOperand(0).getOperand(1))) {
@@ -6935,6 +7726,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (shl x, c), c) -> (and x, cst2)
+ // TODO - (srl (shl x, c1), c2).
if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
isConstantOrConstantVector(N1, /* NoOpaques */ true)) {
SDLoc DL(N);
@@ -6945,11 +7737,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
// fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
+ // TODO - support non-uniform vector shift amounts.
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
EVT SmallVT = N0.getOperand(0).getValueType();
unsigned BitSize = SmallVT.getScalarSizeInBits();
- if (N1C->getZExtValue() >= BitSize)
+ if (N1C->getAPIntValue().uge(BitSize))
return DAG.getUNDEF(VT);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
@@ -6970,7 +7763,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
// bit, which is unmodified by sra.
- if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
+ if (N1C && N1C->getAPIntValue() == (OpSizeInBits - 1)) {
if (N0.getOpcode() == ISD::SRA)
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
}
@@ -7021,6 +7814,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// fold operands of srl based on knowledge that the low bits are not
// demanded.
+ // TODO - support non-uniform vector shift amounts.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -7079,13 +7873,49 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
return IsFSHL ? N0 : N1;
- // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
+ auto IsUndefOrZero = [](SDValue V) {
+ return V.isUndef() || isNullOrNullSplat(V, /*AllowUndefs*/ true);
+ };
+
+ // TODO - support non-uniform vector shift amounts.
if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+ EVT ShAmtTy = N2.getValueType();
+
+ // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
if (Cst->getAPIntValue().uge(BitWidth)) {
uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
- DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+ DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
}
+
+ unsigned ShAmt = Cst->getZExtValue();
+ if (ShAmt == 0)
+ return IsFSHL ? N0 : N1;
+
+ // fold fshl(undef_or_zero, N1, C) -> lshr(N1, BW-C)
+ // fold fshr(undef_or_zero, N1, C) -> lshr(N1, C)
+ // fold fshl(N0, undef_or_zero, C) -> shl(N0, C)
+ // fold fshr(N0, undef_or_zero, C) -> shl(N0, BW-C)
+ if (IsUndefOrZero(N0))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+ DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+ SDLoc(N), ShAmtTy));
+ if (IsUndefOrZero(N1))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+ DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+ SDLoc(N), ShAmtTy));
+ }
+
+ // fold fshr(undef_or_zero, N1, N2) -> lshr(N1, N2)
+ // fold fshl(N0, undef_or_zero, N2) -> shl(N0, N2)
+ // iff We know the shift amount is in range.
+ // TODO: when is it worth doing SUB(BW, N2) as well?
+ if (isPowerOf2_32(BitWidth)) {
+ APInt ModuloBits(N2.getScalarValueSizeInBits(), BitWidth - 1);
+ if (IsUndefOrZero(N0) && !IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+ if (IsUndefOrZero(N1) && IsFSHL && DAG.MaskedValueIsZero(N2, ~ModuloBits))
+ return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
}
// fold (fshl N0, N0, N2) -> (rotl N0, N2)
@@ -7096,6 +7926,10 @@ SDValue DAGCombiner::visitFunnelShift(SDNode *N) {
if (N0 == N1 && hasOperation(RotOpc, VT))
return DAG.getNode(RotOpc, SDLoc(N), VT, N0, N2);
+ // Simplify, based on bits shifted out of N0/N1.
+ if (SimplifyDemandedBits(SDValue(N, 0)))
+ return SDValue(N, 0);
+
return SDValue();
}
@@ -7207,11 +8041,14 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) {
// FIXME: This should be checking for no signed zeros on individual operands, as
// well as no nans.
-static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS, SDValue RHS) {
+static bool isLegalToCombineMinNumMaxNum(SelectionDAG &DAG, SDValue LHS,
+ SDValue RHS,
+ const TargetLowering &TLI) {
const TargetOptions &Options = DAG.getTarget().Options;
EVT VT = LHS.getValueType();
return Options.NoSignedZerosFPMath && VT.isFloatingPoint() &&
+ TLI.isProfitableToCombineMinNumMaxNum(VT) &&
DAG.isKnownNeverNaN(LHS) && DAG.isKnownNeverNaN(RHS);
}
@@ -7364,6 +8201,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
EVT VT = N->getValueType(0);
EVT VT0 = N0.getValueType();
SDLoc DL(N);
+ SDNodeFlags Flags = N->getFlags();
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
@@ -7414,20 +8252,26 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
SDValue InnerSelect =
- DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+ DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2, Flags);
if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0,
- InnerSelect, N2);
+ InnerSelect, N2, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
}
// select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
SDValue Cond0 = N0->getOperand(0);
SDValue Cond1 = N0->getOperand(1);
- SDValue InnerSelect =
- DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond1, N1, N2);
+ SDValue InnerSelect = DAG.getNode(ISD::SELECT, DL, N1.getValueType(),
+ Cond1, N1, N2, Flags);
if (normalizeToSequence || !InnerSelect.use_empty())
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Cond0, N1,
- InnerSelect);
+ InnerSelect, Flags);
+ // Cleanup on failure.
+ if (InnerSelect.use_empty())
+ recursivelyDeleteUnusedNodes(InnerSelect.getNode());
}
// select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
@@ -7439,12 +8283,14 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// Create the actual and node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue And = DAG.getNode(ISD::AND, DL, N0.getValueType(), N0, N1_0);
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1, N2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), And, N1_1,
+ N2, Flags);
}
// Otherwise see if we can optimize the "and" to a better pattern.
- if (SDValue Combined = visitANDLike(N0, N1_0, N))
+ if (SDValue Combined = visitANDLike(N0, N1_0, N)) {
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1_1,
- N2);
+ N2, Flags);
+ }
}
}
// select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
@@ -7456,20 +8302,22 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// Create the actual or node if we can generate good code for it.
if (!normalizeToSequence) {
SDValue Or = DAG.getNode(ISD::OR, DL, N0.getValueType(), N0, N2_0);
- return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1, N2_2);
+ return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Or, N1,
+ N2_2, Flags);
}
// Otherwise see if we can optimize to a better pattern.
if (SDValue Combined = visitORLike(N0, N2_0, N))
return DAG.getNode(ISD::SELECT, DL, N1.getValueType(), Combined, N1,
- N2_2);
+ N2_2, Flags);
}
}
}
- if (VT0 == MVT::i1) {
- // select (not Cond), N1, N2 -> select Cond, N2, N1
- if (isBitwiseNot(N0))
- return DAG.getNode(ISD::SELECT, DL, VT, N0->getOperand(0), N2, N1);
+ // select (not Cond), N1, N2 -> select Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false)) {
+ SDValue SelectOp = DAG.getSelect(DL, VT, F, N2, N1);
+ SelectOp->setFlags(Flags);
+ return SelectOp;
}
// Fold selects based on a setcc into other things, such as min/max/abs.
@@ -7481,7 +8329,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
// select (fcmp gt x, y), x, y -> fmaxnum x, y
//
// This is OK if we don't care what happens if either operand is a NaN.
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2))
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N1, N2, TLI))
if (SDValue FMinMax = combineMinNumMaxNum(DL, VT, Cond0, Cond1, N1, N2,
CC, TLI, DAG))
return FMinMax;
@@ -7516,9 +8364,16 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
}
if (TLI.isOperationLegal(ISD::SELECT_CC, VT) ||
- (!LegalOperations && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)))
- return DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1, N2,
- N0.getOperand(2));
+ (!LegalOperations &&
+ TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT))) {
+ // Any flags available in a select/setcc fold will be on the setcc as they
+ // migrated from fcmp
+ Flags = N0.getNode()->getFlags();
+ SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, VT, Cond0, Cond1, N1,
+ N2, N0.getOperand(2));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
+ }
return SimplifySelect(DL, N0, N1, N2);
}
@@ -7599,14 +8454,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
}
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
- SDValue Data = MSC->getValue();
+ SDValue Data = MSC->getValue();
+ SDValue Chain = MSC->getChain();
SDLoc DL(N);
+ // Zap scatters with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MSCATTER data type requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7624,8 +8484,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
EVT LoVT, HiVT;
std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
- SDValue Chain = MSC->getChain();
-
EVT MemoryVT = MSC->getMemoryVT();
unsigned Alignment = MSC->getOriginalAlignment();
@@ -7658,15 +8516,20 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
+ MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
- SDValue Data = MST->getValue();
+ SDValue Data = MST->getValue();
+ SDValue Chain = MST->getChain();
EVT VT = Data.getValueType();
SDLoc DL(N);
+ // Zap masked stores with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return Chain;
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MSTORE data type requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7680,17 +8543,11 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
SDValue MaskLo, MaskHi, Lo, Hi;
std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
- SDValue Chain = MST->getChain();
SDValue Ptr = MST->getBasePtr();
EVT MemoryVT = MST->getMemoryVT();
unsigned Alignment = MST->getOriginalAlignment();
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == VT.getSizeInBits() / 8) ? Alignment / 2 : Alignment;
-
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -7712,7 +8569,7 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MMO = DAG.getMachineFunction().getMachineMemOperand(
MST->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
MST->getAAInfo(), MST->getRanges());
Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
@@ -7728,13 +8585,17 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) {
}
SDValue DAGCombiner::visitMGATHER(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
MaskedGatherSDNode *MGT = cast<MaskedGatherSDNode>(N);
SDValue Mask = MGT->getMask();
SDLoc DL(N);
+ // Zap gathers with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, MGT->getPassThru(), MGT->getChain());
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MGATHER result requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7805,13 +8666,17 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
}
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
+ MaskedLoadSDNode *MLD = cast<MaskedLoadSDNode>(N);
SDValue Mask = MLD->getMask();
SDLoc DL(N);
+ // Zap masked loads with a zero mask.
+ if (ISD::isBuildVectorAllZeros(Mask.getNode()))
+ return CombineTo(N, MLD->getPassThru(), MLD->getChain());
+
+ if (Level >= AfterLegalizeTypes)
+ return SDValue();
+
// If the MLOAD result requires splitting and the mask is provided by a
// SETCC, then split both nodes and its operands before legalization. This
// prevents the type legalizer from unrolling SETCC into scalar comparisons
@@ -7839,12 +8704,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
EVT MemoryVT = MLD->getMemoryVT();
unsigned Alignment = MLD->getOriginalAlignment();
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
- Alignment/2 : Alignment;
-
EVT LoMemVT, HiMemVT;
std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
@@ -7862,7 +8721,7 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
MMO = DAG.getMachineFunction().getMachineMemOperand(
MLD->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), SecondHalfAlignment,
+ MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
MLD->getAAInfo(), MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
@@ -7943,11 +8802,16 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
+ EVT VT = N->getValueType(0);
SDLoc DL(N);
if (SDValue V = DAG.simplifySelect(N0, N1, N2))
return V;
+ // vselect (not Cond), N1, N2 -> vselect Cond, N2, N1
+ if (SDValue F = extractBooleanFlip(N0, DAG, TLI, false))
+ return DAG.getSelect(DL, VT, F, N2, N1);
+
// Canonicalize integer abs.
// vselect (setg[te] X, 0), X, -X ->
// vselect (setgt X, -1), X, -X ->
@@ -7987,11 +8851,10 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// This is OK if we don't care about what happens if either operand is a
// NaN.
//
- EVT VT = N->getValueType(0);
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0), N0.getOperand(1))) {
- ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
+ N0.getOperand(1), TLI)) {
if (SDValue FMinMax = combineMinNumMaxNum(
- DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
return FMinMax;
}
@@ -8080,9 +8943,11 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
return N2;
} else if (SCC.getOpcode() == ISD::SETCC) {
// Fold to a simpler select_cc
- return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
- SCC.getOperand(0), SCC.getOperand(1), N2, N3,
- SCC.getOperand(2));
+ SDValue SelectOp = DAG.getNode(
+ ISD::SELECT_CC, SDLoc(N), N2.getValueType(), SCC.getOperand(0),
+ SCC.getOperand(1), N2, N3, SCC.getOperand(2));
+ SelectOp->setFlags(SCC->getFlags());
+ return SelectOp;
}
}
@@ -8148,6 +9013,7 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned Opcode = N->getOpcode();
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ SDLoc DL(N);
assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
@@ -8158,7 +9024,33 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
// fold (zext c1) -> c1
// fold (aext c1) -> c1
if (isa<ConstantSDNode>(N0))
- return DAG.getNode(Opcode, SDLoc(N), VT, N0);
+ return DAG.getNode(Opcode, DL, VT, N0);
+
+ // fold (sext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ // fold (zext (select cond, c1, c2)) -> (select cond, zext c1, zext c2)
+ // fold (aext (select cond, c1, c2)) -> (select cond, sext c1, sext c2)
+ if (N0->getOpcode() == ISD::SELECT) {
+ SDValue Op1 = N0->getOperand(1);
+ SDValue Op2 = N0->getOperand(2);
+ if (isa<ConstantSDNode>(Op1) && isa<ConstantSDNode>(Op2) &&
+ (Opcode != ISD::ZERO_EXTEND || !TLI.isZExtFree(N0.getValueType(), VT))) {
+ // For any_extend, choose sign extension of the constants to allow a
+ // possible further transform to sign_extend_inreg.i.e.
+ //
+ // t1: i8 = select t0, Constant:i8<-1>, Constant:i8<0>
+ // t2: i64 = any_extend t1
+ // -->
+ // t3: i64 = select t0, Constant:i64<-1>, Constant:i64<0>
+ // -->
+ // t4: i64 = sign_extend_inreg t3
+ unsigned FoldOpc = Opcode;
+ if (FoldOpc == ISD::ANY_EXTEND)
+ FoldOpc = ISD::SIGN_EXTEND;
+ return DAG.getSelect(DL, VT, N0->getOperand(0),
+ DAG.getNode(FoldOpc, DL, VT, Op1),
+ DAG.getNode(FoldOpc, DL, VT, Op2));
+ }
+ }
// fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
// fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
@@ -8173,7 +9065,6 @@ static SDValue tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
unsigned EVTBits = N0->getValueType(0).getScalarSizeInBits();
SmallVector<SDValue, 8> Elts;
unsigned NumElts = VT.getVectorNumElements();
- SDLoc DL(N);
// For zero-extensions, UNDEF elements still guarantee to have the upper
// bits set to zero.
@@ -8387,6 +9278,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
assert(N->getOpcode() == ISD::ZERO_EXTEND);
EVT VT = N->getValueType(0);
+ EVT OrigVT = N->getOperand(0).getValueType();
+ if (TLI.isZExtFree(OrigVT, VT))
+ return SDValue();
// and/or/xor
SDValue N0 = N->getOperand(0);
@@ -8450,6 +9344,10 @@ SDValue DAGCombiner::CombineZExtLogicopShiftLoad(SDNode *N) {
Load->getValueType(0), ExtLoad);
CombineTo(Load, Trunc, ExtLoad.getValue(1));
}
+
+ // N0 is dead at this point.
+ recursivelyDeleteUnusedNodes(N0.getNode());
+
return SDValue(N,0); // Return N so it doesn't get rechecked!
}
@@ -8509,19 +9407,21 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
: ISD::isZEXTLoad(N0Node);
if ((!isAExtLoad && !ISD::isEXTLoad(N0Node)) ||
!ISD::isUNINDEXEDLoad(N0Node) || !N0.hasOneUse())
- return {};
+ return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
- return {};
+ return SDValue();
SDValue ExtLoad =
DAG.getExtLoad(ExtLoadType, SDLoc(LN0), VT, LN0->getChain(),
LN0->getBasePtr(), MemVT, LN0->getMemOperand());
Combiner.CombineTo(N, ExtLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ if (LN0->use_empty())
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
@@ -8559,6 +9459,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
Combiner.CombineTo(N, ExtLoad);
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ Combiner.recursivelyDeleteUnusedNodes(LN0);
} else {
SDValue Trunc =
DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad);
@@ -8804,6 +9705,25 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (SDValue NewVSel = matchVSelectOpSizesWithSetCC(N))
return NewVSel;
+ // Eliminate this sign extend by doing a negation in the destination type:
+ // sext i32 (0 - (zext i8 X to i32)) to i64 --> 0 - (zext i8 X to i64)
+ if (N0.getOpcode() == ISD::SUB && N0.hasOneUse() &&
+ isNullOrNullSplat(N0.getOperand(0)) &&
+ N0.getOperand(1).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::SUB, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(1).getOperand(0), DL, VT);
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Zext);
+ }
+ // Eliminate this sign extend by doing a decrement in the destination type:
+ // sext i32 ((zext i8 X to i32) + (-1)) to i64 --> (zext i8 X to i64) + (-1)
+ if (N0.getOpcode() == ISD::ADD && N0.hasOneUse() &&
+ isAllOnesOrAllOnesSplat(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ TLI.isOperationLegalOrCustom(ISD::ADD, VT)) {
+ SDValue Zext = DAG.getZExtOrTrunc(N0.getOperand(0).getOperand(0), DL, VT);
+ return DAG.getNode(ISD::ADD, DL, VT, Zext, DAG.getAllOnesConstant(DL, VT));
+ }
+
return SDValue();
}
@@ -9061,14 +9981,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.hasOneUse()) {
SDValue ShAmt = N0.getOperand(1);
- unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
if (N0.getOpcode() == ISD::SHL) {
SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
unsigned KnownZeroBits = InnerZExt.getValueSizeInBits() -
InnerZExt.getOperand(0).getValueSizeInBits();
- if (ShAmtVal > KnownZeroBits)
+ if (cast<ConstantSDNode>(ShAmt)->getAPIntValue().ugt(KnownZeroBits))
return SDValue();
}
@@ -9162,6 +10081,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
CombineTo(N, ExtLoad);
if (NoReplaceTrunc) {
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
} else {
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
N0.getValueType(), ExtLoad);
@@ -9185,6 +10105,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
MemVT, LN0->getMemOperand());
CombineTo(N, ExtLoad);
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), ExtLoad.getValue(1));
+ recursivelyDeleteUnusedNodes(LN0);
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
@@ -9574,14 +10495,14 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
- if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
- if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
+ if (auto *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
+ if (ShAmt->getAPIntValue().ule(VTBits - EVTBits)) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
- if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
- return DAG.getNode(ISD::SRA, SDLoc(N), VT,
- N0.getOperand(0), N0.getOperand(1));
+ if (((VTBits - EVTBits) - ShAmt->getZExtValue()) < InSignBits)
+ return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0),
+ N0.getOperand(1));
}
}
@@ -9667,10 +10588,11 @@ SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ EVT SrcVT = N0.getValueType();
bool isLE = DAG.getDataLayout().isLittleEndian();
// noop truncate
- if (N0.getValueType() == N->getValueType(0))
+ if (SrcVT == VT)
return N0;
// fold (truncate (truncate x)) -> (truncate x)
@@ -9740,7 +10662,6 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// trunc (select c, a, b) -> select c, (trunc a), (trunc b)
if (N0.getOpcode() == ISD::SELECT && N0.hasOneUse()) {
- EVT SrcVT = N0.getValueType();
if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
TLI.isTruncateFree(SrcVT, VT)) {
SDLoc SL(N0);
@@ -9753,7 +10674,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// trunc (shl x, K) -> shl (trunc x), K => K < VT.getScalarSizeInBits()
if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
- (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
+ (!LegalOperations || TLI.isOperationLegal(ISD::SHL, VT)) &&
TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
SDValue Amt = N0.getOperand(1);
KnownBits Known = DAG.computeKnownBits(Amt);
@@ -9771,6 +10692,19 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
}
}
+ // Attempt to pre-truncate BUILD_VECTOR sources.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && !LegalOperations &&
+ TLI.isTruncateFree(SrcVT.getScalarType(), VT.getScalarType())) {
+ SDLoc DL(N);
+ EVT SVT = VT.getScalarType();
+ SmallVector<SDValue, 8> TruncOps;
+ for (const SDValue &Op : N0->op_values()) {
+ SDValue TruncOp = DAG.getNode(ISD::TRUNCATE, DL, SVT, Op);
+ TruncOps.push_back(TruncOp);
+ }
+ return DAG.getBuildVector(VT, DL, TruncOps);
+ }
+
// Fold a series of buildvector, bitcast, and truncate if possible.
// For example fold
// (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
@@ -9906,7 +10840,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// When the adde's carry is not used.
if ((N0.getOpcode() == ISD::ADDE || N0.getOpcode() == ISD::ADDCARRY) &&
N0.hasOneUse() && !N0.getNode()->hasAnyUseOfValue(1) &&
- (!LegalOperations || TLI.isOperationLegal(N0.getOpcode(), VT))) {
+ // We only do for addcarry before legalize operation
+ ((!LegalOperations && N0.getOpcode() == ISD::ADDCARRY) ||
+ TLI.isOperationLegal(N0.getOpcode(), VT))) {
SDLoc SL(N);
auto X = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
auto Y = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
@@ -10070,14 +11006,17 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
return DAG.getUNDEF(VT);
// If the input is a BUILD_VECTOR with all constant elements, fold this now.
- // Only do this before legalize types, since we might create an illegal
- // scalar type. Even if we knew we wouldn't create an illegal scalar type
- // we can only do this before legalize ops, since the target maybe
- // depending on the bitcast.
+ // Only do this before legalize types, unless both types are integer and the
+ // scalar type is legal. Only do this before legalize ops, since the target
+ // maybe depending on the bitcast.
// First check to see if this is all constant.
- if (!LegalTypes &&
+ // TODO: Support FP bitcasts after legalize types.
+ if (VT.isVector() &&
+ (!LegalTypes ||
+ (!LegalOperations && VT.isInteger() && N0.getValueType().isInteger() &&
+ TLI.isTypeLegal(VT.getVectorElementType()))) &&
N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
- VT.isVector() && cast<BuildVectorSDNode>(N0)->isConstant())
+ cast<BuildVectorSDNode>(N0)->isConstant())
return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(),
VT.getVectorElementType());
@@ -10113,18 +11052,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
- TLI.isOperationLegal(ISD::LOAD, VT)) &&
- TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
+ TLI.isOperationLegal(ISD::LOAD, VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- unsigned OrigAlign = LN0->getAlignment();
- bool Fast = false;
- if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
- LN0->getAddressSpace(), OrigAlign, &Fast) &&
- Fast) {
+ if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG,
+ *LN0->getMemOperand())) {
SDValue Load =
DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(),
- LN0->getPointerInfo(), OrigAlign,
+ LN0->getPointerInfo(), LN0->getAlignment(),
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
return Load;
@@ -11071,15 +12006,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
+ isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
+ isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations), Flags);
+ GetNegatedExpression(N0, DAG, LegalOperations,
+ ForCodeSize), Flags);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -11105,8 +12042,8 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// Selection pass has a hard time dealing with FP constants.
bool AllowNewConst = (Level < AfterLegalizeDAG);
- // If 'unsafe math' or nnan is enabled, fold lots of things.
- if ((Options.UnsafeFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
+ // If nnan is enabled, fold lots of things.
+ if ((Options.NoNaNsFPMath || Flags.hasNoNaNs()) && AllowNewConst) {
// If allowed, fold (fadd (fneg x), x) -> 0.0
if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
return DAG.getConstantFP(0.0, DL, VT);
@@ -11246,16 +12183,20 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0 == N1) {
// (fsub x, x) -> 0.0
- if (Options.UnsafeFPMath || Flags.hasNoNaNs())
+ if (Options.NoNaNsFPMath || Flags.hasNoNaNs())
return DAG.getConstantFP(0.0f, DL, VT);
}
// (fsub -0.0, N1) -> -N1
+ // NOTE: It is safe to transform an FSUB(-0.0,X) into an FNEG(X), since the
+ // FSUB does not specify the sign bit of a NaN. Also note that for
+ // the same reason, the inverse transform is not safe, unless fast math
+ // flags are in play.
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
- return GetNegatedExpression(N1, DAG, LegalOperations);
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
+ return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
@@ -11273,9 +12214,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
+ if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
return DAG.getNode(ISD::FADD, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations), Flags);
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize), Flags);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -11319,7 +12261,7 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
- if (Options.UnsafeFPMath ||
+ if ((Options.NoNaNsFPMath && Options.NoSignedZerosFPMath) ||
(Flags.hasNoNaNs() && Flags.hasNoSignedZeros())) {
// fold (fmul A, 0) -> 0
if (N1CFP && N1CFP->isZero())
@@ -11361,14 +12303,18 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
return DAG.getNode(ISD::FNEG, DL, VT, N0);
// fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FMUL, DL, VT,
- GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations),
+ GetNegatedExpression(N0, DAG, LegalOperations,
+ ForCodeSize),
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize),
Flags);
}
}
@@ -11506,7 +12452,8 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// fma (fneg x), K, y -> fma x -K, y
if (N0.getOpcode() == ISD::FNEG &&
(TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT)))) {
+ (N1.hasOneUse() && !TLI.isFPImmLegal(N1CFP->getValueAPF(), VT,
+ ForCodeSize)))) {
return DAG.getNode(ISD::FMA, DL, VT, N0.getOperand(0),
DAG.getNode(ISD::FNEG, DL, VT, N1, Flags), N2);
}
@@ -11541,22 +12488,33 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
// FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
// is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
+ // TODO: Limit this transform based on optsize/minsize - it always creates at
+ // least 1 extra instruction. But the perf win may be substantial enough
+ // that only minsize should restrict this.
bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
const SDNodeFlags Flags = N->getFlags();
if (!UnsafeMath && !Flags.hasAllowReciprocal())
return SDValue();
- // Skip if current node is a reciprocal.
+ // Skip if current node is a reciprocal/fneg-reciprocal.
SDValue N0 = N->getOperand(0);
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
- if (N0CFP && N0CFP->isExactlyValue(1.0))
+ ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0, /* AllowUndefs */ true);
+ if (N0CFP && (N0CFP->isExactlyValue(1.0) || N0CFP->isExactlyValue(-1.0)))
return SDValue();
// Exit early if the target does not want this transform or if there can't
// possibly be enough uses of the divisor to make the transform worthwhile.
SDValue N1 = N->getOperand(1);
unsigned MinUses = TLI.combineRepeatedFPDivisors();
- if (!MinUses || N1->use_size() < MinUses)
+
+ // For splat vectors, scale the number of uses by the splat factor. If we can
+ // convert the division into a scalar op, that will likely be much faster.
+ unsigned NumElts = 1;
+ EVT VT = N->getValueType(0);
+ if (VT.isVector() && DAG.isSplatValue(N1))
+ NumElts = VT.getVectorNumElements();
+
+ if (!MinUses || (N1->use_size() * NumElts) < MinUses)
return SDValue();
// Find all FDIV users of the same divisor.
@@ -11573,10 +12531,9 @@ SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
// Now that we have the actual number of divisor uses, make sure it meets
// the minimum threshold specified by the target.
- if (Users.size() < MinUses)
+ if ((Users.size() * NumElts) < MinUses)
return SDValue();
- EVT VT = N->getValueType(0);
SDLoc DL(N);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
@@ -11619,6 +12576,9 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
+ if (SDValue V = combineRepeatedFPDivisors(N))
+ return V;
+
if (Options.UnsafeFPMath || Flags.hasAllowReciprocal()) {
// fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
if (N1CFP) {
@@ -11634,7 +12594,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// backend)... we should handle this gracefully after Legalize.
// TLI.isOperationLegalOrCustom(ISD::ConstantFP, VT) ||
TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(Recip, VT)))
+ TLI.isFPImmLegal(Recip, VT, ForCodeSize)))
return DAG.getNode(ISD::FMUL, DL, VT, N0,
DAG.getConstantFP(Recip, DL, VT), Flags);
}
@@ -11692,21 +12652,22 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
+ if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
+ if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
+ ForCodeSize)) {
// Both can be negated for free, check to see if at least one is cheaper
// negated.
if (LHSNeg == 2 || RHSNeg == 2)
return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
- GetNegatedExpression(N0, DAG, LegalOperations),
- GetNegatedExpression(N1, DAG, LegalOperations),
+ GetNegatedExpression(N0, DAG, LegalOperations,
+ ForCodeSize),
+ GetNegatedExpression(N1, DAG, LegalOperations,
+ ForCodeSize),
Flags);
}
}
- if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
- return CombineRepeatedDivisors;
-
return SDValue();
}
@@ -11838,18 +12799,24 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
return DAG.getNode(ISD::FCBRT, SDLoc(N), VT, N->getOperand(0), Flags);
}
- // Try to convert x ** (1/4) into square roots.
+ // Try to convert x ** (1/4) and x ** (3/4) into square roots.
// x ** (1/2) is canonicalized to sqrt, so we do not bother with that case.
// TODO: This could be extended (using a target hook) to handle smaller
// power-of-2 fractional exponents.
- if (ExponentC->getValueAPF().isExactlyValue(0.25)) {
+ bool ExponentIs025 = ExponentC->getValueAPF().isExactlyValue(0.25);
+ bool ExponentIs075 = ExponentC->getValueAPF().isExactlyValue(0.75);
+ if (ExponentIs025 || ExponentIs075) {
// pow(-0.0, 0.25) = +0.0; sqrt(sqrt(-0.0)) = -0.0.
// pow(-inf, 0.25) = +inf; sqrt(sqrt(-inf)) = NaN.
+ // pow(-0.0, 0.75) = +0.0; sqrt(-0.0) * sqrt(sqrt(-0.0)) = +0.0.
+ // pow(-inf, 0.75) = +inf; sqrt(-inf) * sqrt(sqrt(-inf)) = NaN.
// For regular numbers, rounding may cause the results to differ.
// Therefore, we require { nsz ninf afn } for this transform.
// TODO: We could select out the special cases if we don't have nsz/ninf.
SDNodeFlags Flags = N->getFlags();
- if (!Flags.hasNoSignedZeros() || !Flags.hasNoInfs() ||
+
+ // We only need no signed zeros for the 0.25 case.
+ if ((!Flags.hasNoSignedZeros() && ExponentIs025) || !Flags.hasNoInfs() ||
!Flags.hasApproximateFuncs())
return SDValue();
@@ -11859,13 +12826,17 @@ SDValue DAGCombiner::visitFPOW(SDNode *N) {
// Assume that libcalls are the smallest code.
// TODO: This restriction should probably be lifted for vectors.
- if (DAG.getMachineFunction().getFunction().optForSize())
+ if (DAG.getMachineFunction().getFunction().hasOptSize())
return SDValue();
// pow(X, 0.25) --> sqrt(sqrt(X))
SDLoc DL(N);
SDValue Sqrt = DAG.getNode(ISD::FSQRT, DL, VT, N->getOperand(0), Flags);
- return DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ SDValue SqrtSqrt = DAG.getNode(ISD::FSQRT, DL, VT, Sqrt, Flags);
+ if (ExponentIs025)
+ return SqrtSqrt;
+ // pow(X, 0.75) --> sqrt(X) * sqrt(sqrt(X))
+ return DAG.getNode(ISD::FMUL, DL, VT, Sqrt, SqrtSqrt, Flags);
}
return SDValue();
@@ -11911,6 +12882,10 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (sint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
@@ -11968,6 +12943,10 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
EVT VT = N->getValueType(0);
EVT OpVT = N0.getValueType();
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (N0.isUndef())
+ return DAG.getConstantFP(0.0, SDLoc(N), VT);
+
// fold (uint_to_fp c1) -> c1fp
if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
// ...but only if the target supports immediate floating-point values
@@ -12051,6 +13030,10 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // fold (fp_to_sint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (fp_to_sint c1fp) -> c1
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
@@ -12062,6 +13045,10 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ // fold (fp_to_uint undef) -> undef
+ if (N0.isUndef())
+ return DAG.getUNDEF(VT);
+
// fold (fp_to_uint c1fp) -> c1
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
@@ -12250,8 +13237,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options))
- return GetNegatedExpression(N0, DAG, LegalOperations);
+ &DAG.getTarget().Options, ForCodeSize))
+ return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
@@ -12287,7 +13274,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
APFloat CVal = CFP1->getValueAPF();
CVal.changeSign();
if (Level >= AfterLegalizeDAG &&
- (TLI.isFPImmLegal(CVal, VT) ||
+ (TLI.isFPImmLegal(CVal, VT, ForCodeSize) ||
TLI.isOperationLegal(ISD::ConstantFP, VT)))
return DAG.getNode(
ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
@@ -12556,6 +13543,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
TargetLowering::AddrMode AM;
if (N->getOpcode() == ISD::ADD) {
+ AM.HasBaseReg = true;
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
@@ -12564,6 +13552,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
// [reg +/- reg]
AM.Scale = 1;
} else if (N->getOpcode() == ISD::SUB) {
+ AM.HasBaseReg = true;
ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (Offset)
// [reg +/- imm]
@@ -12653,7 +13642,13 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
// Check #2.
if (!isLoad) {
SDValue Val = cast<StoreSDNode>(N)->getValue();
- if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
+
+ // Would require a copy.
+ if (Val == BasePtr)
+ return false;
+
+ // Would create a cycle.
+ if (Val == Ptr || Ptr->isPredecessorOf(Val.getNode()))
return false;
}
@@ -13190,7 +14185,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (LD->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes.
- SDValue BetterChain = FindBetterChain(N, Chain);
+ SDValue BetterChain = FindBetterChain(LD, Chain);
// If there is a better chain.
if (Chain != BetterChain) {
@@ -13378,7 +14373,7 @@ struct LoadedSlice {
/// Get the alignment of the load used for this slice.
unsigned getAlignment() const {
unsigned Alignment = Origin->getAlignment();
- unsigned Offset = getOffsetFromBase();
+ uint64_t Offset = getOffsetFromBase();
if (Offset != 0)
Alignment = MinAlign(Alignment, Alignment + Offset);
return Alignment;
@@ -13500,9 +14495,11 @@ struct LoadedSlice {
assert(DAG && "Missing context");
const TargetLowering &TLI = DAG->getTargetLoweringInfo();
EVT ResVT = Use->getValueType(0);
- const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
+ const TargetRegisterClass *ResRC =
+ TLI.getRegClassFor(ResVT.getSimpleVT(), Use->isDivergent());
const TargetRegisterClass *ArgRC =
- TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
+ TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT(),
+ Use->getOperand(0)->isDivergent());
if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
return false;
@@ -13826,7 +14823,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
// For narrowing to be valid, it must be the case that the load the
- // immediately preceeding memory operation before the store.
+ // immediately preceding memory operation before the store.
if (LD == Chain.getNode())
; // ok.
else if (Chain->getOpcode() == ISD::TokenFactor &&
@@ -14039,11 +15036,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
/// load / store operations if the target deems the transformation profitable.
SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
- SDValue Chain = ST->getChain();
SDValue Value = ST->getValue();
if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
- Value.hasOneUse() &&
- Chain == SDValue(Value.getNode(), 1)) {
+ Value.hasOneUse()) {
LoadSDNode *LD = cast<LoadSDNode>(Value);
EVT VT = LD->getMemoryVT();
if (!VT.isFloatingPoint() ||
@@ -14073,7 +15068,7 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
LD->getPointerInfo(), LDAlign);
SDValue NewST =
- DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(),
+ DAG.getStore(ST->getChain(), SDLoc(N), NewLD, ST->getBasePtr(),
ST->getPointerInfo(), STAlign);
AddToWorklist(NewLD.getNode());
@@ -14171,14 +15166,14 @@ SDValue DAGCombiner::getMergeStoreChains(SmallVectorImpl<MemOpLink> &StoreNodes,
Visited.insert(StoreNodes[i].MemNode);
}
- // don't include nodes that are children
+ // don't include nodes that are children or repeated nodes.
for (unsigned i = 0; i < NumStores; ++i) {
- if (Visited.count(StoreNodes[i].MemNode->getChain().getNode()) == 0)
+ if (Visited.insert(StoreNodes[i].MemNode->getChain().getNode()).second)
Chains.push_back(StoreNodes[i].MemNode->getChain());
}
assert(Chains.size() > 0 && "Chain should have generated a chain");
- return DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, Chains);
+ return DAG.getTokenFactor(StoreDL, Chains);
}
bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
@@ -14372,15 +15367,19 @@ void DAGCombiner::getStoreMergeCandidates(
// Loads must only have one use.
if (!Ld->hasNUsesOfValue(1, 0))
return;
- // The memory operands must not be volatile.
+ // The memory operands must not be volatile/indexed.
if (Ld->isVolatile() || Ld->isIndexed())
return;
}
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
+ // The memory operands must not be volatile/indexed.
if (Other->isVolatile() || Other->isIndexed())
return false;
- SDValue Val = peekThroughBitcasts(Other->getValue());
+ // Don't mix temporal stores with non-temporal stores.
+ if (St->isNonTemporal() != Other->isNonTemporal())
+ return false;
+ SDValue OtherBC = peekThroughBitcasts(Other->getValue());
// Allow merging constants of different types as integers.
bool NoTypeMatch = (MemVT.isInteger()) ? !MemVT.bitsEq(Other->getMemoryVT())
: Other->getMemoryVT() != MemVT;
@@ -14388,16 +15387,19 @@ void DAGCombiner::getStoreMergeCandidates(
if (NoTypeMatch)
return false;
// The Load's Base Ptr must also match
- if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(Val)) {
- auto LPtr = BaseIndexOffset::match(OtherLd, DAG);
+ if (LoadSDNode *OtherLd = dyn_cast<LoadSDNode>(OtherBC)) {
+ BaseIndexOffset LPtr = BaseIndexOffset::match(OtherLd, DAG);
if (LoadVT != OtherLd->getMemoryVT())
return false;
// Loads must only have one use.
if (!OtherLd->hasNUsesOfValue(1, 0))
return false;
- // The memory operands must not be volatile.
+ // The memory operands must not be volatile/indexed.
if (OtherLd->isVolatile() || OtherLd->isIndexed())
return false;
+ // Don't mix temporal loads with non-temporal loads.
+ if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
+ return false;
if (!(LBasePtr.equalBaseIndex(LPtr, DAG)))
return false;
} else
@@ -14406,17 +15408,17 @@ void DAGCombiner::getStoreMergeCandidates(
if (IsConstantSrc) {
if (NoTypeMatch)
return false;
- if (!(isa<ConstantSDNode>(Val) || isa<ConstantFPSDNode>(Val)))
+ if (!(isa<ConstantSDNode>(OtherBC) || isa<ConstantFPSDNode>(OtherBC)))
return false;
}
if (IsExtractVecSrc) {
// Do not merge truncated stores here.
if (Other->isTruncatingStore())
return false;
- if (!MemVT.bitsEq(Val.getValueType()))
+ if (!MemVT.bitsEq(OtherBC.getValueType()))
return false;
- if (Val.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
- Val.getOpcode() != ISD::EXTRACT_SUBVECTOR)
+ if (OtherBC.getOpcode() != ISD::EXTRACT_VECTOR_ELT &&
+ OtherBC.getOpcode() != ISD::EXTRACT_SUBVECTOR)
return false;
}
Ptr = BaseIndexOffset::match(Other, DAG);
@@ -14441,9 +15443,11 @@ void DAGCombiner::getStoreMergeCandidates(
RootNode = St->getChain().getNode();
+ unsigned NumNodesExplored = 0;
if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(RootNode)) {
RootNode = Ldn->getChain().getNode();
- for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
if (I.getOperandNo() == 0 && isa<LoadSDNode>(*I)) // walk down chain
for (auto I2 = (*I)->use_begin(), E2 = (*I)->use_end(); I2 != E2; ++I2)
if (I2.getOperandNo() == 0)
@@ -14454,7 +15458,8 @@ void DAGCombiner::getStoreMergeCandidates(
StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
} else
- for (auto I = RootNode->use_begin(), E = RootNode->use_end(); I != E; ++I)
+ for (auto I = RootNode->use_begin(), E = RootNode->use_end();
+ I != E && NumNodesExplored < 1024; ++I, ++NumNodesExplored)
if (I.getOperandNo() == 0)
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
BaseIndexOffset Ptr;
@@ -14551,6 +15556,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
isa<ConstantFPSDNode>(StoredVal);
bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
+ bool IsNonTemporalStore = St->isNonTemporal();
+ bool IsNonTemporalLoad =
+ IsLoadSrc && cast<LoadSDNode>(StoredVal)->isNonTemporal();
if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
return false;
@@ -14652,8 +15660,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFast) &&
IsFast) {
LastIntegerTrunc = false;
LastLegalType = i + 1;
@@ -14664,8 +15672,9 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
if (TLI.isTruncStoreLegal(LegalizedStoredValTy, StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, LegalizedStoredValTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(),
+ &IsFast) &&
IsFast) {
LastIntegerTrunc = true;
LastLegalType = i + 1;
@@ -14683,8 +15692,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
EVT Ty = EVT::getVectorVT(Context, MemVT.getScalarType(), Elts);
if (TLI.isTypeLegal(Ty) && TLI.isTypeLegal(MemVT) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(
+ Context, DL, Ty, *FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
LastLegalVectorType = i + 1;
}
@@ -14755,8 +15764,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
if (TLI.isTypeLegal(Ty) &&
TLI.canMergeStoresTo(FirstStoreAS, Ty, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
- FirstStoreAlign, &IsFast) &&
+ TLI.allowsMemoryAccess(Context, DL, Ty,
+ *FirstInChain->getMemOperand(), &IsFast) &&
IsFast)
NumStoresToMerge = i + 1;
}
@@ -14847,7 +15856,6 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
unsigned FirstStoreAS = FirstInChain->getAddressSpace();
unsigned FirstStoreAlign = FirstInChain->getAlignment();
LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
- unsigned FirstLoadAS = FirstLoad->getAddressSpace();
unsigned FirstLoadAlign = FirstLoad->getAlignment();
// Scan the memory operations on the chain and find the first
@@ -14887,11 +15895,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
bool IsFastSt, IsFastLd;
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalVectorType = i + 1;
}
@@ -14901,11 +15909,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
StoreTy = EVT::getIntegerVT(Context, SizeInBits);
if (TLI.isTypeLegal(StoreTy) &&
TLI.canMergeStoresTo(FirstStoreAS, StoreTy, DAG) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(), &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = false;
@@ -14920,11 +15928,12 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValTy,
StoreTy) &&
TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValTy, StoreTy) &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
- FirstStoreAlign, &IsFastSt) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstInChain->getMemOperand(),
+ &IsFastSt) &&
IsFastSt &&
- TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
- FirstLoadAlign, &IsFastLd) &&
+ TLI.allowsMemoryAccess(Context, DL, StoreTy,
+ *FirstLoad->getMemOperand(), &IsFastLd) &&
IsFastLd) {
LastLegalIntegerType = i + 1;
DoIntegerTruncate = true;
@@ -14994,26 +16003,32 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
SDValue NewStoreChain = getMergeStoreChains(StoreNodes, NumElem);
AddToWorklist(NewStoreChain.getNode());
- MachineMemOperand::Flags MMOFlags =
+ MachineMemOperand::Flags LdMMOFlags =
isDereferenceable ? MachineMemOperand::MODereferenceable
: MachineMemOperand::MONone;
+ if (IsNonTemporalLoad)
+ LdMMOFlags |= MachineMemOperand::MONonTemporal;
+
+ MachineMemOperand::Flags StMMOFlags =
+ IsNonTemporalStore ? MachineMemOperand::MONonTemporal
+ : MachineMemOperand::MONone;
SDValue NewLoad, NewStore;
if (UseVectorTy || !DoIntegerTruncate) {
NewLoad =
DAG.getLoad(JointMemOpVT, LoadDL, FirstLoad->getChain(),
FirstLoad->getBasePtr(), FirstLoad->getPointerInfo(),
- FirstLoadAlign, MMOFlags);
+ FirstLoadAlign, LdMMOFlags);
NewStore = DAG.getStore(
NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
- FirstInChain->getPointerInfo(), FirstStoreAlign);
+ FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
} else { // This must be the truncstore/extload case
EVT ExtendedTy =
TLI.getTypeToTransformTo(*DAG.getContext(), JointMemOpVT);
NewLoad = DAG.getExtLoad(ISD::EXTLOAD, LoadDL, ExtendedTy,
FirstLoad->getChain(), FirstLoad->getBasePtr(),
FirstLoad->getPointerInfo(), JointMemOpVT,
- FirstLoadAlign, MMOFlags);
+ FirstLoadAlign, LdMMOFlags);
NewStore = DAG.getTruncStore(NewStoreChain, StoreDL, NewLoad,
FirstInChain->getBasePtr(),
FirstInChain->getPointerInfo(),
@@ -15168,16 +16183,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// illegal type.
if (((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegal(ISD::STORE, SVT)) &&
- TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
- unsigned OrigAlign = ST->getAlignment();
- bool Fast = false;
- if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
- ST->getAddressSpace(), OrigAlign, &Fast) &&
- Fast) {
- return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
- ST->getPointerInfo(), OrigAlign,
- ST->getMemOperand()->getFlags(), ST->getAAInfo());
- }
+ TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
+ DAG, *ST->getMemOperand())) {
+ return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr,
+ ST->getPointerInfo(), ST->getAlignment(),
+ ST->getMemOperand()->getFlags(), ST->getAAInfo());
}
}
@@ -15205,6 +16215,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (SDValue NewST = TransformFPLoadStorePair(N))
return NewST;
+ // Try transforming several stores into STORE (BSWAP).
+ if (SDValue Store = MatchStoreCombine(ST))
+ return Store;
+
if (ST->isUnindexed()) {
// Walk up chain skipping non-aliasing memory nodes, on this store and any
// adjacent stores.
@@ -15221,23 +16235,22 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Value.getValueType().isInteger() &&
(!isa<ConstantSDNode>(Value) ||
!cast<ConstantSDNode>(Value)->isOpaque())) {
+ APInt TruncDemandedBits =
+ APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
+ ST->getMemoryVT().getScalarSizeInBits());
+
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter = DAG.GetDemandedBits(
- Value, APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits()));
+ SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
AddToWorklist(Value.getNode());
- if (Shorter.getNode())
- return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
- Ptr, ST->getMemoryVT(), ST->getMemOperand());
+ if (Shorter)
+ return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
+ ST->getMemOperand());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
- if (SimplifyDemandedBits(
- Value,
- APInt::getLowBitsSet(Value.getScalarValueSizeInBits(),
- ST->getMemoryVT().getScalarSizeInBits()))) {
+ if (SimplifyDemandedBits(Value, TruncDemandedBits)) {
// Re-visit the store if anything changed and the store hasn't been merged
// with another node (N is deleted) SimplifyDemandedBits will add Value's
// node back to the worklist if necessary, but we also need to re-visit
@@ -15263,25 +16276,55 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
- !ST1->isVolatile() && ST1->getBasePtr() == Ptr &&
- ST->getMemoryVT() == ST1->getMemoryVT()) {
- // If this is a store followed by a store with the same value to the same
- // location, then the store is dead/noop.
- if (ST1->getValue() == Value) {
- // The store is dead, remove it.
+ !ST1->isVolatile()) {
+ if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
+ ST->getMemoryVT() == ST1->getMemoryVT()) {
+ // If this is a store followed by a store with the same value to the
+ // same location, then the store is dead/noop.
return Chain;
}
- // If this is a store who's preceeding store to the same location
- // and no one other node is chained to that store we can effectively
- // drop the store. Do not remove stores to undef as they may be used as
- // data sinks.
if (OptLevel != CodeGenOpt::None && ST1->hasOneUse() &&
!ST1->getBasePtr().isUndef()) {
- // ST1 is fully overwritten and can be elided. Combine with it's chain
- // value.
- CombineTo(ST1, ST1->getChain());
- return SDValue();
+ const BaseIndexOffset STBase = BaseIndexOffset::match(ST, DAG);
+ const BaseIndexOffset ChainBase = BaseIndexOffset::match(ST1, DAG);
+ unsigned STBitSize = ST->getMemoryVT().getSizeInBits();
+ unsigned ChainBitSize = ST1->getMemoryVT().getSizeInBits();
+ // If this is a store who's preceding store to a subset of the current
+ // location and no one other node is chained to that store we can
+ // effectively drop the store. Do not remove stores to undef as they may
+ // be used as data sinks.
+ if (STBase.contains(DAG, STBitSize, ChainBase, ChainBitSize)) {
+ CombineTo(ST1, ST1->getChain());
+ return SDValue();
+ }
+
+ // If ST stores to a subset of preceding store's write set, we may be
+ // able to fold ST's value into the preceding stored value. As we know
+ // the other uses of ST1's chain are unconcerned with ST, this folding
+ // will not affect those nodes.
+ int64_t BitOffset;
+ if (ChainBase.contains(DAG, ChainBitSize, STBase, STBitSize,
+ BitOffset)) {
+ SDValue ChainValue = ST1->getValue();
+ if (auto *C1 = dyn_cast<ConstantSDNode>(ChainValue)) {
+ if (auto *C = dyn_cast<ConstantSDNode>(Value)) {
+ APInt Val = C1->getAPIntValue();
+ APInt InsertVal = C->getAPIntValue().zextOrTrunc(STBitSize);
+ // FIXME: Handle Big-endian mode.
+ if (!DAG.getDataLayout().isBigEndian()) {
+ Val.insertBits(InsertVal, BitOffset);
+ SDValue NewSDVal =
+ DAG.getConstant(Val, SDLoc(C), ChainValue.getValueType(),
+ C1->isTargetOpcode(), C1->isOpaque());
+ SDNode *NewST1 = DAG.UpdateNodeOperands(
+ ST1, ST1->getChain(), NewSDVal, ST1->getOperand(2),
+ ST1->getOperand(3));
+ return CombineTo(ST, SDValue(NewST1, 0));
+ }
+ }
+ }
+ } // End ST subset of ST1 case.
}
}
}
@@ -15299,7 +16342,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Always perform this optimization before types are legal. If the target
// prefers, also try this after legalization to catch stores that were created
// by intrinsics or other nodes.
- if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
+ if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
while (true) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
@@ -15333,6 +16376,54 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
return ReduceLoadOpStoreWidth(N);
}
+SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
+ const auto *LifetimeEnd = cast<LifetimeSDNode>(N);
+ if (!LifetimeEnd->hasOffset())
+ return SDValue();
+
+ const BaseIndexOffset LifetimeEndBase(N->getOperand(1), SDValue(),
+ LifetimeEnd->getOffset(), false);
+
+ // We walk up the chains to find stores.
+ SmallVector<SDValue, 8> Chains = {N->getOperand(0)};
+ while (!Chains.empty()) {
+ SDValue Chain = Chains.back();
+ Chains.pop_back();
+ if (!Chain.hasOneUse())
+ continue;
+ switch (Chain.getOpcode()) {
+ case ISD::TokenFactor:
+ for (unsigned Nops = Chain.getNumOperands(); Nops;)
+ Chains.push_back(Chain.getOperand(--Nops));
+ break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the node.
+ if (!isAlias(Chain.getNode(), N))
+ Chains.push_back(Chain.getOperand(0));
+ break;
+ case ISD::STORE: {
+ StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
+ if (ST->isVolatile() || ST->isIndexed())
+ continue;
+ const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
+ // If we store purely within object bounds just before its lifetime ends,
+ // we can remove the store.
+ if (LifetimeEndBase.contains(DAG, LifetimeEnd->getSize() * 8, StoreBase,
+ ST->getMemoryVT().getStoreSizeInBits())) {
+ LLVM_DEBUG(dbgs() << "\nRemoving store:"; StoreBase.dump();
+ dbgs() << "\nwithin LIFETIME_END of : ";
+ LifetimeEndBase.dump(); dbgs() << "\n");
+ CombineTo(ST, ST->getChain());
+ return SDValue(N, 0);
+ }
+ }
+ }
+ }
+ return SDValue();
+}
+
/// For the instruction sequence of store below, F and I values
/// are bundled together as an i64 value before being stored into memory.
/// Sometimes it is more efficent to generate separate stores for F and I,
@@ -15616,7 +16707,9 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
Offset = DAG.getNode(
ISD::MUL, DL, PtrType, Offset,
DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
- MPI = OriginalLoad->getPointerInfo();
+ // Discard the pointer info except the address space because the memory
+ // operand can't represent this new access since the offset is variable.
+ MPI = MachinePointerInfo(OriginalLoad->getPointerInfo().getAddrSpace());
}
NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
@@ -15668,14 +16761,15 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
/// the math/logic after an extract element of a vector.
static SDValue scalarizeExtractedBinop(SDNode *ExtElt, SelectionDAG &DAG,
bool LegalOperations) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Vec = ExtElt->getOperand(0);
SDValue Index = ExtElt->getOperand(1);
auto *IndexC = dyn_cast<ConstantSDNode>(Index);
- if (!IndexC || !ISD::isBinaryOp(Vec.getNode()) || !Vec.hasOneUse())
+ if (!IndexC || !TLI.isBinOp(Vec.getOpcode()) || !Vec.hasOneUse() ||
+ Vec.getNode()->getNumValues() != 1)
return SDValue();
// Targets may want to avoid this to prevent an expensive register transfer.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.shouldScalarizeBinop(Vec))
return SDValue();
@@ -16073,7 +17167,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
ArrayRef<int> VectorMask,
SDValue VecIn1, SDValue VecIn2,
- unsigned LeftIdx) {
+ unsigned LeftIdx, bool DidSplitVec) {
MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
SDValue ZeroIdx = DAG.getConstant(0, DL, IdxTy);
@@ -16081,17 +17175,12 @@ SDValue DAGCombiner::createBuildVecShuffle(const SDLoc &DL, SDNode *N,
EVT InVT1 = VecIn1.getValueType();
EVT InVT2 = VecIn2.getNode() ? VecIn2.getValueType() : InVT1;
- unsigned Vec2Offset = 0;
unsigned NumElems = VT.getVectorNumElements();
unsigned ShuffleNumElems = NumElems;
- // In case both the input vectors are extracted from same base
- // vector we do not need extra addend (Vec2Offset) while
- // computing shuffle mask.
- if (!VecIn2 || !(VecIn1.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
- !(VecIn2.getOpcode() == ISD::EXTRACT_SUBVECTOR) ||
- !(VecIn1.getOperand(0) == VecIn2.getOperand(0)))
- Vec2Offset = InVT1.getVectorNumElements();
+ // If we artificially split a vector in two already, then the offsets in the
+ // operands will all be based off of VecIn1, even those in VecIn2.
+ unsigned Vec2Offset = DidSplitVec ? 0 : InVT1.getVectorNumElements();
// We can't generate a shuffle node with mismatched input and output types.
// Try to make the types match the type of the output.
@@ -16214,23 +17303,29 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// The build vector contains some number of undef elements and exactly
// one other element. That other element must be a zero-extended scalar
// extracted from a vector at a constant index to turn this into a shuffle.
+ // Also, require that the build vector does not implicitly truncate/extend
+ // its elements.
// TODO: This could be enhanced to allow ANY_EXTEND as well as ZERO_EXTEND.
+ EVT VT = BV->getValueType(0);
SDValue Zext = BV->getOperand(ZextElt);
if (Zext.getOpcode() != ISD::ZERO_EXTEND || !Zext.hasOneUse() ||
Zext.getOperand(0).getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
- !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)))
+ !isa<ConstantSDNode>(Zext.getOperand(0).getOperand(1)) ||
+ Zext.getValueSizeInBits() != VT.getScalarSizeInBits())
return SDValue();
- // The zero-extend must be a multiple of the source size.
+ // The zero-extend must be a multiple of the source size, and we must be
+ // building a vector of the same size as the source of the extract element.
SDValue Extract = Zext.getOperand(0);
unsigned DestSize = Zext.getValueSizeInBits();
unsigned SrcSize = Extract.getValueSizeInBits();
- if (DestSize % SrcSize != 0)
+ if (DestSize % SrcSize != 0 ||
+ Extract.getOperand(0).getValueSizeInBits() != VT.getSizeInBits())
return SDValue();
// Create a shuffle mask that will combine the extracted element with zeros
// and undefs.
- int ZextRatio = DestSize / SrcSize;
+ int ZextRatio = DestSize / SrcSize;
int NumMaskElts = NumBVOps * ZextRatio;
SmallVector<int, 32> ShufMask(NumMaskElts, -1);
for (int i = 0; i != NumMaskElts; ++i) {
@@ -16260,7 +17355,7 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
ShufMask);
- return DAG.getBitcast(BV->getValueType(0), Shuf);
+ return DAG.getBitcast(VT, Shuf);
}
// Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
@@ -16316,7 +17411,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
return SDValue();
SDValue ExtractedFromVec = Op.getOperand(0);
- APInt ExtractIdx = cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue();
+ const APInt &ExtractIdx = Op.getConstantOperandAPInt(1);
if (ExtractIdx.uge(ExtractedFromVec.getValueType().getVectorNumElements()))
return SDValue();
@@ -16344,6 +17439,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// vector, then split the vector efficiently based on the maximum
// vector access index and adjust the VectorMask and
// VecIn accordingly.
+ bool DidSplitVec = false;
if (VecIn.size() == 2) {
unsigned MaxIndex = 0;
unsigned NearestPow2 = 0;
@@ -16374,6 +17470,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
VecIn.pop_back();
VecIn.push_back(VecIn1);
VecIn.push_back(VecIn2);
+ DidSplitVec = true;
for (unsigned i = 0; i < NumElems; i++) {
if (VectorMask[i] <= 0)
@@ -16411,7 +17508,7 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
(LeftIdx + 1) < VecIn.size() ? VecIn[LeftIdx + 1] : SDValue();
if (SDValue Shuffle = createBuildVecShuffle(DL, N, VectorMask, VecLeft,
- VecRight, LeftIdx))
+ VecRight, LeftIdx, DidSplitVec))
Shuffles.push_back(Shuffle);
else
return SDValue();
@@ -16477,18 +17574,20 @@ SDValue DAGCombiner::reduceBuildVecToShuffle(SDNode *N) {
// Try to turn a build vector of zero extends of extract vector elts into a
// a vector zero extend and possibly an extract subvector.
-// TODO: Support sign extend or any extend?
+// TODO: Support sign extend?
// TODO: Allow undef elements?
-// TODO: Don't require the extracts to start at element 0.
SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
if (LegalOperations)
return SDValue();
EVT VT = N->getValueType(0);
+ bool FoundZeroExtend = false;
SDValue Op0 = N->getOperand(0);
auto checkElem = [&](SDValue Op) -> int64_t {
- if (Op.getOpcode() == ISD::ZERO_EXTEND &&
+ unsigned Opc = Op.getOpcode();
+ FoundZeroExtend |= (Opc == ISD::ZERO_EXTEND);
+ if ((Opc == ISD::ZERO_EXTEND || Opc == ISD::ANY_EXTEND) &&
Op.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
Op0.getOperand(0).getOperand(0) == Op.getOperand(0).getOperand(0))
if (auto *C = dyn_cast<ConstantSDNode>(Op.getOperand(0).getOperand(1)))
@@ -16520,7 +17619,8 @@ SDValue DAGCombiner::convertBuildVecZextToZext(SDNode *N) {
SDLoc DL(N);
In = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InVT, In,
Op0.getOperand(0).getOperand(1));
- return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, In);
+ return DAG.getNode(FoundZeroExtend ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, DL,
+ VT, In);
}
SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
@@ -16885,14 +17985,14 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
- unsigned IdentityIndex = i * PartNumElem;
- ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ auto *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
// The extract index must be constant.
if (!CS)
return SDValue();
// Check that we are reading from the identity index.
- if (CS->getZExtValue() != IdentityIndex)
+ unsigned IdentityIndex = i * PartNumElem;
+ if (CS->getAPIntValue() != IdentityIndex)
return SDValue();
}
@@ -16902,12 +18002,59 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
+ SelectionDAG &DAG) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue BinOp = Extract->getOperand(0);
+ unsigned BinOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
+ return SDValue();
+
+ SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
+ SDValue Index = Extract->getOperand(1);
+ EVT VT = Extract->getValueType(0);
+
+ // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+ // if the source subvector is the same type as the one being extracted.
+ auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
+ return V.getOperand(1);
+ }
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+ V.getOperand(0).getValueType() == VT &&
+ (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
+ return V.getOperand(SubIdx);
+ }
+ return SDValue();
+ };
+ SDValue Sub0 = GetSubVector(Bop0);
+ SDValue Sub1 = GetSubVector(Bop1);
+
+ // TODO: We could handle the case where only 1 operand is being inserted by
+ // creating an extract of the other operand, but that requires checking
+ // number of uses and/or costs.
+ if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))
+ return SDValue();
+
+ // We are inserting both operands of the wide binop only to extract back
+ // to the narrow vector size. Eliminate all of the insert/extract:
+ // ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
+ return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,
+ BinOp->getFlags());
+}
+
/// If we are extracting a subvector produced by a wide binary operator try
/// to use a narrow binary operator and/or avoid concatenation and extraction.
static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// TODO: Refactor with the caller (visitEXTRACT_SUBVECTOR), so we can share
// some of these bailouts with other transforms.
+ if (SDValue V = narrowInsertExtractVectorBinOp(Extract, DAG))
+ return V;
+
// The extract index must be a constant, so we can map it to a concat operand.
auto *ExtractIndexC = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
if (!ExtractIndexC)
@@ -16915,8 +18062,10 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// We are looking for an optionally bitcasted wide vector binary operator
// feeding an extract subvector.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue BinOp = peekThroughBitcasts(Extract->getOperand(0));
- if (!ISD::isBinaryOp(BinOp.getNode()))
+ unsigned BOpcode = BinOp.getOpcode();
+ if (!TLI.isBinOp(BOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
// The binop must be a vector type, so we can extract some fraction of it.
@@ -16945,8 +18094,6 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// Bail out if the target does not support a narrower version of the binop.
EVT NarrowBVT = EVT::getVectorVT(*DAG.getContext(), WideBVT.getScalarType(),
WideNumElts / NarrowingRatio);
- unsigned BOpcode = BinOp.getOpcode();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!TLI.isOperationLegalOrCustomOrPromote(BOpcode, NarrowBVT))
return SDValue();
@@ -16986,35 +18133,35 @@ static SDValue narrowExtractedVectorBinOp(SDNode *Extract, SelectionDAG &DAG) {
// We need at least one concatenation operation of a binop operand to make
// this transform worthwhile. The concat must double the input vector sizes.
- // TODO: Should we also handle INSERT_SUBVECTOR patterns?
- SDValue LHS = peekThroughBitcasts(BinOp.getOperand(0));
- SDValue RHS = peekThroughBitcasts(BinOp.getOperand(1));
- bool ConcatL =
- LHS.getOpcode() == ISD::CONCAT_VECTORS && LHS.getNumOperands() == 2;
- bool ConcatR =
- RHS.getOpcode() == ISD::CONCAT_VECTORS && RHS.getNumOperands() == 2;
- if (!ConcatL && !ConcatR)
+ auto GetSubVector = [ConcatOpNum](SDValue V) -> SDValue {
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && V.getNumOperands() == 2)
+ return V.getOperand(ConcatOpNum);
return SDValue();
+ };
+ SDValue SubVecL = GetSubVector(peekThroughBitcasts(BinOp.getOperand(0)));
+ SDValue SubVecR = GetSubVector(peekThroughBitcasts(BinOp.getOperand(1)));
+
+ if (SubVecL || SubVecR) {
+ // If a binop operand was not the result of a concat, we must extract a
+ // half-sized operand for our new narrow binop:
+ // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
+ // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, IndexC)
+ // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, IndexC), YN
+ SDLoc DL(Extract);
+ SDValue IndexC = DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT);
+ SDValue X = SubVecL ? DAG.getBitcast(NarrowBVT, SubVecL)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(0), IndexC);
- // If one of the binop operands was not the result of a concat, we must
- // extract a half-sized operand for our new narrow binop.
- SDLoc DL(Extract);
-
- // extract (binop (concat X1, X2), (concat Y1, Y2)), N --> binop XN, YN
- // extract (binop (concat X1, X2), Y), N --> binop XN, (extract Y, N)
- // extract (binop X, (concat Y1, Y2)), N --> binop (extract X, N), YN
- SDValue X = ConcatL ? DAG.getBitcast(NarrowBVT, LHS.getOperand(ConcatOpNum))
- : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
- BinOp.getOperand(0),
- DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+ SDValue Y = SubVecR ? DAG.getBitcast(NarrowBVT, SubVecR)
+ : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
+ BinOp.getOperand(1), IndexC);
- SDValue Y = ConcatR ? DAG.getBitcast(NarrowBVT, RHS.getOperand(ConcatOpNum))
- : DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NarrowBVT,
- BinOp.getOperand(1),
- DAG.getConstant(ExtBOIdx, DL, ExtBOIdxVT));
+ SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
+ return DAG.getBitcast(VT, NarrowBinOp);
+ }
- SDValue NarrowBinOp = DAG.getNode(BOpcode, DL, NarrowBVT, X, Y);
- return DAG.getBitcast(VT, NarrowBinOp);
+ return SDValue();
}
/// If we are extracting a subvector from a wide vector load, convert to a
@@ -17052,7 +18199,7 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
return NewLd;
}
-SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
+SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode *N) {
EVT NVT = N->getValueType(0);
SDValue V = N->getOperand(0);
@@ -17064,14 +18211,51 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (SDValue NarrowLoad = narrowExtractedVectorLoad(N, DAG))
return NarrowLoad;
+ // Combine an extract of an extract into a single extract_subvector.
+ // ext (ext X, C), 0 --> ext X, C
+ SDValue Index = N->getOperand(1);
+ if (isNullConstant(Index) && V.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ V.hasOneUse() && isa<ConstantSDNode>(V.getOperand(1))) {
+ if (TLI.isExtractSubvectorCheap(NVT, V.getOperand(0).getValueType(),
+ V.getConstantOperandVal(1)) &&
+ TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NVT)) {
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT, V.getOperand(0),
+ V.getOperand(1));
+ }
+ }
+
+ // Try to move vector bitcast after extract_subv by scaling extraction index:
+ // extract_subv (bitcast X), Index --> bitcast (extract_subv X, Index')
+ if (isa<ConstantSDNode>(Index) && V.getOpcode() == ISD::BITCAST &&
+ V.getOperand(0).getValueType().isVector()) {
+ SDValue SrcOp = V.getOperand(0);
+ EVT SrcVT = SrcOp.getValueType();
+ unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ unsigned DestNumElts = V.getValueType().getVectorNumElements();
+ if ((SrcNumElts % DestNumElts) == 0) {
+ unsigned SrcDestRatio = SrcNumElts / DestNumElts;
+ unsigned NewExtNumElts = NVT.getVectorNumElements() * SrcDestRatio;
+ EVT NewExtVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NewExtNumElts);
+ if (TLI.isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, NewExtVT)) {
+ unsigned IndexValScaled = N->getConstantOperandVal(1) * SrcDestRatio;
+ SDLoc DL(N);
+ SDValue NewIndex = DAG.getIntPtrConstant(IndexValScaled, DL);
+ SDValue NewExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, NewExtVT,
+ V.getOperand(0), NewIndex);
+ return DAG.getBitcast(NVT, NewExtract);
+ }
+ }
+ // TODO - handle (DestNumElts % SrcNumElts) == 0
+ }
+
// Combine:
// (extract_subvec (concat V1, V2, ...), i)
// Into:
// Vi if possible
// Only operand 0 is checked as 'concat' assumes all inputs of the same
// type.
- if (V.getOpcode() == ISD::CONCAT_VECTORS &&
- isa<ConstantSDNode>(N->getOperand(1)) &&
+ if (V.getOpcode() == ISD::CONCAT_VECTORS && isa<ConstantSDNode>(Index) &&
V.getOperand(0).getValueType() == NVT) {
unsigned Idx = N->getConstantOperandVal(1);
unsigned NumElems = NVT.getVectorNumElements();
@@ -17084,7 +18268,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
// If the input is a build vector. Try to make a smaller build vector.
if (V.getOpcode() == ISD::BUILD_VECTOR) {
- if (auto *Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Index)) {
EVT InVT = V.getValueType();
unsigned ExtractSize = NVT.getSizeInBits();
unsigned EltSize = InVT.getScalarSizeInBits();
@@ -17092,26 +18276,27 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
if (ExtractSize % EltSize == 0) {
unsigned NumElems = ExtractSize / EltSize;
EVT EltVT = InVT.getVectorElementType();
- EVT ExtractVT = NumElems == 1 ? EltVT :
- EVT::getVectorVT(*DAG.getContext(), EltVT, NumElems);
+ EVT ExtractVT = NumElems == 1 ? EltVT
+ : EVT::getVectorVT(*DAG.getContext(),
+ EltVT, NumElems);
if ((Level < AfterLegalizeDAG ||
(NumElems == 1 ||
TLI.isOperationLegal(ISD::BUILD_VECTOR, ExtractVT))) &&
(!LegalTypes || TLI.isTypeLegal(ExtractVT))) {
- unsigned IdxVal = (Idx->getZExtValue() * NVT.getScalarSizeInBits()) /
- EltSize;
+ unsigned IdxVal = IdxC->getZExtValue();
+ IdxVal *= NVT.getScalarSizeInBits();
+ IdxVal /= EltSize;
+
if (NumElems == 1) {
SDValue Src = V->getOperand(IdxVal);
if (EltVT != Src.getValueType())
Src = DAG.getNode(ISD::TRUNCATE, SDLoc(N), InVT, Src);
-
return DAG.getBitcast(NVT, Src);
}
// Extract the pieces from the original build_vector.
- SDValue BuildVec = DAG.getBuildVector(ExtractVT, SDLoc(N),
- makeArrayRef(V->op_begin() + IdxVal,
- NumElems));
+ SDValue BuildVec = DAG.getBuildVector(
+ ExtractVT, SDLoc(N), V->ops().slice(IdxVal, NumElems));
return DAG.getBitcast(NVT, BuildVec);
}
}
@@ -17126,9 +18311,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
// Only handle cases where both indexes are constants.
- auto *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ auto *ExtIdx = dyn_cast<ConstantSDNode>(Index);
auto *InsIdx = dyn_cast<ConstantSDNode>(V.getOperand(2));
-
if (InsIdx && ExtIdx) {
// Combine:
// (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
@@ -17141,7 +18325,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, SDLoc(N), NVT,
DAG.getBitcast(N->getOperand(0).getValueType(), V.getOperand(0)),
- N->getOperand(1));
+ Index);
}
}
@@ -17154,6 +18338,53 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
return SDValue();
}
+/// Try to convert a wide shuffle of concatenated vectors into 2 narrow shuffles
+/// followed by concatenation. Narrow vector ops may have better performance
+/// than wide ops, and this can unlock further narrowing of other vector ops.
+/// Targets can invert this transform later if it is not profitable.
+static SDValue foldShuffleOfConcatUndefs(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ SDValue N0 = Shuf->getOperand(0), N1 = Shuf->getOperand(1);
+ if (N0.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 ||
+ N1.getOpcode() != ISD::CONCAT_VECTORS || N1.getNumOperands() != 2 ||
+ !N0.getOperand(1).isUndef() || !N1.getOperand(1).isUndef())
+ return SDValue();
+
+ // Split the wide shuffle mask into halves. Any mask element that is accessing
+ // operand 1 is offset down to account for narrowing of the vectors.
+ ArrayRef<int> Mask = Shuf->getMask();
+ EVT VT = Shuf->getValueType(0);
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned HalfNumElts = NumElts / 2;
+ SmallVector<int, 16> Mask0(HalfNumElts, -1);
+ SmallVector<int, 16> Mask1(HalfNumElts, -1);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (Mask[i] == -1)
+ continue;
+ int M = Mask[i] < (int)NumElts ? Mask[i] : Mask[i] - (int)HalfNumElts;
+ if (i < HalfNumElts)
+ Mask0[i] = M;
+ else
+ Mask1[i - HalfNumElts] = M;
+ }
+
+ // Ask the target if this is a valid transform.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
+ HalfNumElts);
+ if (!TLI.isShuffleMaskLegal(Mask0, HalfVT) ||
+ !TLI.isShuffleMaskLegal(Mask1, HalfVT))
+ return SDValue();
+
+ // shuffle (concat X, undef), (concat Y, undef), Mask -->
+ // concat (shuffle X, Y, Mask0), (shuffle X, Y, Mask1)
+ SDValue X = N0.getOperand(0), Y = N1.getOperand(0);
+ SDLoc DL(Shuf);
+ SDValue Shuf0 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask0);
+ SDValue Shuf1 = DAG.getVectorShuffle(HalfVT, DL, X, Y, Mask1);
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Shuf0, Shuf1);
+}
+
// Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
// or turn a shuffle of a single concat into simpler shuffle then concat.
static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
@@ -17163,20 +18394,24 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ ArrayRef<int> Mask = SVN->getMask();
SmallVector<SDValue, 4> Ops;
EVT ConcatVT = N0.getOperand(0).getValueType();
unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
unsigned NumConcats = NumElts / NumElemsPerConcat;
+ auto IsUndefMaskElt = [](int i) { return i == -1; };
+
// Special case: shuffle(concat(A,B)) can be more efficiently represented
// as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
// half vector elements.
if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
- std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
- SVN->getMask().end(), [](int i) { return i == -1; })) {
- N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
- makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
+ llvm::all_of(Mask.slice(NumElemsPerConcat, NumElemsPerConcat),
+ IsUndefMaskElt)) {
+ N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0),
+ N0.getOperand(1),
+ Mask.slice(0, NumElemsPerConcat));
N1 = DAG.getUNDEF(ConcatVT);
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
}
@@ -17184,35 +18419,32 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
// Look at every vector that's inserted. We're looking for exact
// subvector-sized copies from a concatenated vector
for (unsigned I = 0; I != NumConcats; ++I) {
- // Make sure we're dealing with a copy.
unsigned Begin = I * NumElemsPerConcat;
- bool AllUndef = true, NoUndef = true;
- for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
- if (SVN->getMaskElt(J) >= 0)
- AllUndef = false;
- else
- NoUndef = false;
+ ArrayRef<int> SubMask = Mask.slice(Begin, NumElemsPerConcat);
+
+ // Make sure we're dealing with a copy.
+ if (llvm::all_of(SubMask, IsUndefMaskElt)) {
+ Ops.push_back(DAG.getUNDEF(ConcatVT));
+ continue;
}
- if (NoUndef) {
- if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
+ int OpIdx = -1;
+ for (int i = 0; i != (int)NumElemsPerConcat; ++i) {
+ if (IsUndefMaskElt(SubMask[i]))
+ continue;
+ if ((SubMask[i] % (int)NumElemsPerConcat) != i)
return SDValue();
-
- for (unsigned J = 1; J != NumElemsPerConcat; ++J)
- if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
- return SDValue();
-
- unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
- if (FirstElt < N0.getNumOperands())
- Ops.push_back(N0.getOperand(FirstElt));
- else
- Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
-
- } else if (AllUndef) {
- Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
- } else { // Mixed with general masks and undefs, can't do optimization.
- return SDValue();
+ int EltOpIdx = SubMask[i] / NumElemsPerConcat;
+ if (0 <= OpIdx && EltOpIdx != OpIdx)
+ return SDValue();
+ OpIdx = EltOpIdx;
}
+ assert(0 <= OpIdx && "Unknown concat_vectors op");
+
+ if (OpIdx < (int)N0.getNumOperands())
+ Ops.push_back(N0.getOperand(OpIdx));
+ else
+ Ops.push_back(N1.getOperand(OpIdx - N0.getNumOperands()));
}
return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
@@ -17278,8 +18510,8 @@ static SDValue combineShuffleOfScalars(ShuffleVectorSDNode *SVN,
if (S.getOpcode() == ISD::BUILD_VECTOR) {
Op = S.getOperand(Idx);
} else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- assert(Idx == 0 && "Unexpected SCALAR_TO_VECTOR operand index.");
- Op = S.getOperand(0);
+ SDValue Op0 = S.getOperand(0);
+ Op = Idx == 0 ? Op0 : DAG.getUNDEF(Op0.getValueType());
} else {
// Operand can't be combined - bail out.
return SDValue();
@@ -17433,11 +18665,17 @@ static SDValue combineTruncationShuffle(ShuffleVectorSDNode *SVN,
// If splat-mask contains undef elements, we need to be careful about
// introducing undef's in the folded mask which are not the result of composing
// the masks of the shuffles.
-static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
- ShuffleVectorSDNode *Splat,
- SelectionDAG &DAG) {
+static SDValue combineShuffleOfSplatVal(ShuffleVectorSDNode *Shuf,
+ SelectionDAG &DAG) {
+ if (!Shuf->getOperand(1).isUndef())
+ return SDValue();
+ auto *Splat = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Splat || !Splat->isSplat())
+ return SDValue();
+
+ ArrayRef<int> ShufMask = Shuf->getMask();
ArrayRef<int> SplatMask = Splat->getMask();
- assert(UserMask.size() == SplatMask.size() && "Mask length mismatch");
+ assert(ShufMask.size() == SplatMask.size() && "Mask length mismatch");
// Prefer simplifying to the splat-shuffle, if possible. This is legal if
// every undef mask element in the splat-shuffle has a corresponding undef
@@ -17463,13 +18701,13 @@ static SDValue combineShuffleOfSplat(ArrayRef<int> UserMask,
return false;
return true;
};
- if (CanSimplifyToExistingSplat(UserMask, SplatMask))
- return SDValue(Splat, 0);
+ if (CanSimplifyToExistingSplat(ShufMask, SplatMask))
+ return Shuf->getOperand(0);
// Create a new shuffle with a mask that is composed of the two shuffles'
// masks.
SmallVector<int, 32> NewMask;
- for (int Idx : UserMask)
+ for (int Idx : ShufMask)
NewMask.push_back(Idx == -1 ? -1 : SplatMask[Idx]);
return DAG.getVectorShuffle(Splat->getValueType(0), SDLoc(Splat),
@@ -17555,6 +18793,34 @@ static SDValue replaceShuffleOfInsert(ShuffleVectorSDNode *Shuf,
Op1, Op0.getOperand(1), NewInsIndex);
}
+/// If we have a unary shuffle of a shuffle, see if it can be folded away
+/// completely. This has the potential to lose undef knowledge because the first
+/// shuffle may not have an undef mask element where the second one does. So
+/// only call this after doing simplifications based on demanded elements.
+static SDValue simplifyShuffleOfShuffle(ShuffleVectorSDNode *Shuf) {
+ // shuf (shuf0 X, Y, Mask0), undef, Mask
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(Shuf->getOperand(0));
+ if (!Shuf0 || !Shuf->getOperand(1).isUndef())
+ return SDValue();
+
+ ArrayRef<int> Mask = Shuf->getMask();
+ ArrayRef<int> Mask0 = Shuf0->getMask();
+ for (int i = 0, e = (int)Mask.size(); i != e; ++i) {
+ // Ignore undef elements.
+ if (Mask[i] == -1)
+ continue;
+ assert(Mask[i] >= 0 && Mask[i] < e && "Unexpected shuffle mask value");
+
+ // Is the element of the shuffle operand chosen by this shuffle the same as
+ // the element chosen by the shuffle operand itself?
+ if (Mask0[Mask[i]] != Mask0[i])
+ return SDValue();
+ }
+ // Every element of this shuffle is identical to the result of the previous
+ // shuffle, so we can replace this value.
+ return Shuf->getOperand(0);
+}
+
SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
EVT VT = N->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
@@ -17604,19 +18870,35 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SDValue InsElt = replaceShuffleOfInsert(SVN, DAG))
return InsElt;
- // A shuffle of a single vector that is a splat can always be folded.
- if (auto *N0Shuf = dyn_cast<ShuffleVectorSDNode>(N0))
- if (N1->isUndef() && N0Shuf->isSplat())
- return combineShuffleOfSplat(SVN->getMask(), N0Shuf, DAG);
+ // A shuffle of a single vector that is a splatted value can always be folded.
+ if (SDValue V = combineShuffleOfSplatVal(SVN, DAG))
+ return V;
// If it is a splat, check if the argument vector is another splat or a
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
- SDNode *V = N0.getNode();
+ int SplatIndex = SVN->getSplatIndex();
+ if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
+ TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
+ // splat (vector_bo L, R), Index -->
+ // splat (scalar_bo (extelt L, Index), (extelt R, Index))
+ SDValue L = N0.getOperand(0), R = N0.getOperand(1);
+ SDLoc DL(N);
+ EVT EltVT = VT.getScalarType();
+ SDValue Index = DAG.getIntPtrConstant(SplatIndex, DL);
+ SDValue ExtL = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, L, Index);
+ SDValue ExtR = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, R, Index);
+ SDValue NewBO = DAG.getNode(N0.getOpcode(), DL, EltVT, ExtL, ExtR,
+ N0.getNode()->getFlags());
+ SDValue Insert = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, NewBO);
+ SmallVector<int, 16> ZeroMask(VT.getVectorNumElements(), 0);
+ return DAG.getVectorShuffle(VT, DL, Insert, DAG.getUNDEF(VT), ZeroMask);
+ }
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
+ SDNode *V = N0.getNode();
if (V->getOpcode() == ISD::BITCAST) {
SDValue ConvInput = V->getOperand(0);
if (ConvInput.getValueType().isVector() &&
@@ -17649,7 +18931,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return N0;
// Canonicalize any other splat as a build_vector.
- const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
+ SDValue Splatted = V->getOperand(SplatIndex);
SmallVector<SDValue, 8> Ops(NumElts, Splatted);
SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
@@ -17665,6 +18947,11 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
if (SimplifyDemandedVectorElts(SDValue(N, 0)))
return SDValue(N, 0);
+ // This is intentionally placed after demanded elements simplification because
+ // it could eliminate knowledge of undef elements created by this shuffle.
+ if (SDValue ShufOp = simplifyShuffleOfShuffle(SVN))
+ return ShufOp;
+
// Match shuffles that can be converted to any_vector_extend_in_reg.
if (SDValue V = combineShuffleToVectorExtend(SVN, DAG, TLI, LegalOperations))
return V;
@@ -17704,7 +18991,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
NewMask.push_back(M < 0 ? -1 : Scale * M + s);
return NewMask;
};
-
+
SDValue BC0 = peekThroughOneUseBitcasts(N0);
if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
EVT SVT = VT.getScalarType();
@@ -17884,6 +19171,9 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
}
+ if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
+ return V;
+
return SDValue();
}
@@ -18006,7 +19296,44 @@ SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
if (!isa<ConstantSDNode>(N2))
return SDValue();
- unsigned InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+ uint64_t InsIdx = cast<ConstantSDNode>(N2)->getZExtValue();
+
+ // Push subvector bitcasts to the output, adjusting the index as we go.
+ // insert_subvector(bitcast(v), bitcast(s), c1)
+ // -> bitcast(insert_subvector(v, s, c2))
+ if ((N0.isUndef() || N0.getOpcode() == ISD::BITCAST) &&
+ N1.getOpcode() == ISD::BITCAST) {
+ SDValue N0Src = peekThroughBitcasts(N0);
+ SDValue N1Src = peekThroughBitcasts(N1);
+ EVT N0SrcSVT = N0Src.getValueType().getScalarType();
+ EVT N1SrcSVT = N1Src.getValueType().getScalarType();
+ if ((N0.isUndef() || N0SrcSVT == N1SrcSVT) &&
+ N0Src.getValueType().isVector() && N1Src.getValueType().isVector()) {
+ EVT NewVT;
+ SDLoc DL(N);
+ SDValue NewIdx;
+ MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned NumElts = VT.getVectorNumElements();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+ if ((EltSizeInBits % N1SrcSVT.getSizeInBits()) == 0) {
+ unsigned Scale = EltSizeInBits / N1SrcSVT.getSizeInBits();
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts * Scale);
+ NewIdx = DAG.getConstant(InsIdx * Scale, DL, IdxVT);
+ } else if ((N1SrcSVT.getSizeInBits() % EltSizeInBits) == 0) {
+ unsigned Scale = N1SrcSVT.getSizeInBits() / EltSizeInBits;
+ if ((NumElts % Scale) == 0 && (InsIdx % Scale) == 0) {
+ NewVT = EVT::getVectorVT(Ctx, N1SrcSVT, NumElts / Scale);
+ NewIdx = DAG.getConstant(InsIdx / Scale, DL, IdxVT);
+ }
+ }
+ if (NewIdx && hasOperation(ISD::INSERT_SUBVECTOR, NewVT)) {
+ SDValue Res = DAG.getBitcast(NewVT, N0Src);
+ Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, NewVT, Res, N1Src, NewIdx);
+ return DAG.getBitcast(VT, Res);
+ }
+ }
+ }
// Canonicalize insert_subvector dag nodes.
// Example:
@@ -18070,6 +19397,36 @@ SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
return SDValue();
}
+SDValue DAGCombiner::visitVECREDUCE(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N0.getValueType();
+ unsigned Opcode = N->getOpcode();
+
+ // VECREDUCE over 1-element vector is just an extract.
+ if (VT.getVectorNumElements() == 1) {
+ SDLoc dl(N);
+ SDValue Res = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, VT.getVectorElementType(), N0,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Res);
+ return Res;
+ }
+
+ // On an boolean vector an and/or reduction is the same as a umin/umax
+ // reduction. Convert them if the latter is legal while the former isn't.
+ if (Opcode == ISD::VECREDUCE_AND || Opcode == ISD::VECREDUCE_OR) {
+ unsigned NewOpcode = Opcode == ISD::VECREDUCE_AND
+ ? ISD::VECREDUCE_UMIN : ISD::VECREDUCE_UMAX;
+ if (!TLI.isOperationLegalOrCustom(Opcode, VT) &&
+ TLI.isOperationLegalOrCustom(NewOpcode, VT) &&
+ DAG.ComputeNumSignBits(N0) == VT.getScalarSizeInBits())
+ return DAG.getNode(NewOpcode, SDLoc(N), N->getValueType(0), N0);
+ }
+
+ return SDValue();
+}
+
/// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
/// with the destination vector and a zero vector.
/// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
@@ -18161,6 +19518,53 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
return SDValue();
}
+/// If a vector binop is performed on splat values, it may be profitable to
+/// extract, scalarize, and insert/splat.
+static SDValue scalarizeBinOpOfSplats(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ unsigned Opcode = N->getOpcode();
+ EVT VT = N->getValueType(0);
+ EVT EltVT = VT.getVectorElementType();
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // TODO: Remove/replace the extract cost check? If the elements are available
+ // as scalars, then there may be no extract cost. Should we ask if
+ // inserting a scalar back into a vector is cheap instead?
+ int Index0, Index1;
+ SDValue Src0 = DAG.getSplatSourceVector(N0, Index0);
+ SDValue Src1 = DAG.getSplatSourceVector(N1, Index1);
+ if (!Src0 || !Src1 || Index0 != Index1 ||
+ Src0.getValueType().getVectorElementType() != EltVT ||
+ Src1.getValueType().getVectorElementType() != EltVT ||
+ !TLI.isExtractVecEltCheap(VT, Index0) ||
+ !TLI.isOperationLegalOrCustom(Opcode, EltVT))
+ return SDValue();
+
+ SDLoc DL(N);
+ SDValue IndexC =
+ DAG.getConstant(Index0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue X = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N0, IndexC);
+ SDValue Y = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, N1, IndexC);
+ SDValue ScalarBO = DAG.getNode(Opcode, DL, EltVT, X, Y, N->getFlags());
+
+ // If all lanes but 1 are undefined, no need to splat the scalar result.
+ // TODO: Keep track of undefs and use that info in the general case.
+ if (N0.getOpcode() == ISD::BUILD_VECTOR && N0.getOpcode() == N1.getOpcode() &&
+ count_if(N0->ops(), [](SDValue V) { return !V.isUndef(); }) == 1 &&
+ count_if(N1->ops(), [](SDValue V) { return !V.isUndef(); }) == 1) {
+ // bo (build_vec ..undef, X, undef...), (build_vec ..undef, Y, undef...) -->
+ // build_vec ..undef, (bo X, Y), undef...
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), DAG.getUNDEF(EltVT));
+ Ops[Index0] = ScalarBO;
+ return DAG.getBuildVector(VT, DL, Ops);
+ }
+
+ // bo (splat X, Index), (splat Y, Index) --> splat (bo X, Y), Index
+ SmallVector<SDValue, 8> Ops(VT.getVectorNumElements(), ScalarBO);
+ return DAG.getBuildVector(VT, DL, Ops);
+}
+
/// Visit a binary vector operation, like ADD.
SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
assert(N->getValueType(0).isVector() &&
@@ -18169,34 +19573,63 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Ops[] = {LHS, RHS};
+ EVT VT = N->getValueType(0);
+ unsigned Opcode = N->getOpcode();
// See if we can constant fold the vector operation.
if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
- N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
+ Opcode, SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
return Fold;
- // Type legalization might introduce new shuffles in the DAG.
- // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
- // -> (shuffle (VBinOp (A, B)), Undef, Mask).
- if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
- isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
- LHS.getOperand(1).isUndef() &&
- RHS.getOperand(1).isUndef()) {
- ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
- ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
-
- if (SVN0->getMask().equals(SVN1->getMask())) {
- EVT VT = N->getValueType(0);
- SDValue UndefVector = LHS.getOperand(1);
- SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
- LHS.getOperand(0), RHS.getOperand(0),
- N->getFlags());
- AddUsersToWorklist(N);
- return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
- SVN0->getMask());
+ // Move unary shuffles with identical masks after a vector binop:
+ // VBinOp (shuffle A, Undef, Mask), (shuffle B, Undef, Mask))
+ // --> shuffle (VBinOp A, B), Undef, Mask
+ // This does not require type legality checks because we are creating the
+ // same types of operations that are in the original sequence. We do have to
+ // restrict ops like integer div that have immediate UB (eg, div-by-zero)
+ // though. This code is adapted from the identical transform in instcombine.
+ if (Opcode != ISD::UDIV && Opcode != ISD::SDIV &&
+ Opcode != ISD::UREM && Opcode != ISD::SREM &&
+ Opcode != ISD::UDIVREM && Opcode != ISD::SDIVREM) {
+ auto *Shuf0 = dyn_cast<ShuffleVectorSDNode>(LHS);
+ auto *Shuf1 = dyn_cast<ShuffleVectorSDNode>(RHS);
+ if (Shuf0 && Shuf1 && Shuf0->getMask().equals(Shuf1->getMask()) &&
+ LHS.getOperand(1).isUndef() && RHS.getOperand(1).isUndef() &&
+ (LHS.hasOneUse() || RHS.hasOneUse() || LHS == RHS)) {
+ SDLoc DL(N);
+ SDValue NewBinOp = DAG.getNode(Opcode, DL, VT, LHS.getOperand(0),
+ RHS.getOperand(0), N->getFlags());
+ SDValue UndefV = LHS.getOperand(1);
+ return DAG.getVectorShuffle(VT, DL, NewBinOp, UndefV, Shuf0->getMask());
+ }
+ }
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of insertion may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (ins undef, X, Z), (ins undef, Y, Z) --> ins VecC, (VBinOp X, Y), Z
+ if (LHS.getOpcode() == ISD::INSERT_SUBVECTOR && LHS.getOperand(0).isUndef() &&
+ RHS.getOpcode() == ISD::INSERT_SUBVECTOR && RHS.getOperand(0).isUndef() &&
+ LHS.getOperand(2) == RHS.getOperand(2) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ SDValue X = LHS.getOperand(1);
+ SDValue Y = RHS.getOperand(1);
+ SDValue Z = LHS.getOperand(2);
+ EVT NarrowVT = X.getValueType();
+ if (NarrowVT == Y.getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ // (binop undef, undef) may not return undef, so compute that result.
+ SDLoc DL(N);
+ SDValue VecC =
+ DAG.getNode(Opcode, DL, VT, DAG.getUNDEF(VT), DAG.getUNDEF(VT));
+ SDValue NarrowBO = DAG.getNode(Opcode, DL, NarrowVT, X, Y);
+ return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, VecC, NarrowBO, Z);
}
}
+ if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
+ return V;
+
return SDValue();
}
@@ -18214,13 +19647,16 @@ SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
// Check to see if we got a select_cc back (to turn into setcc/select).
// Otherwise, just return whatever node we got back, like fabs.
if (SCC.getOpcode() == ISD::SELECT_CC) {
+ const SDNodeFlags Flags = N0.getNode()->getFlags();
SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
N0.getValueType(),
SCC.getOperand(0), SCC.getOperand(1),
- SCC.getOperand(4));
+ SCC.getOperand(4), Flags);
AddToWorklist(SETCC.getNode());
- return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
- SCC.getOperand(2), SCC.getOperand(3));
+ SDValue SelectNode = DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
+ SCC.getOperand(2), SCC.getOperand(3));
+ SelectNode->setFlags(Flags);
+ return SelectNode;
}
return SCC;
@@ -18305,6 +19741,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// locations are not in the default address space.
LLD->getPointerInfo().getAddrSpace() != 0 ||
RLD->getPointerInfo().getAddrSpace() != 0 ||
+ // We can't produce a CMOV of a TargetFrameIndex since we won't
+ // generate the address generation required.
+ LLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
+ RLD->getBasePtr().getOpcode() == ISD::TargetFrameIndex ||
!TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
LLD->getBasePtr().getValueType()))
return false;
@@ -18501,8 +19941,8 @@ SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
// If a constant can be materialized without loads, this does not make sense.
if (TLI.getOperationAction(ISD::ConstantFP, VT) == TargetLowering::Legal ||
- TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) ||
- TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0)))
+ TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0), ForCodeSize) ||
+ TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0), ForCodeSize))
return SDValue();
// If both constants have multiple uses, then we won't need to do an extra
@@ -18547,20 +19987,20 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
if (N2 == N3) return N2;
EVT CmpOpVT = N0.getValueType();
+ EVT CmpResVT = getSetCCResultType(CmpOpVT);
EVT VT = N2.getValueType();
auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
auto *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
auto *N3C = dyn_cast<ConstantSDNode>(N3.getNode());
// Determine if the condition we're dealing with is constant.
- SDValue SCC = SimplifySetCC(getSetCCResultType(CmpOpVT), N0, N1, CC, DL,
- false);
- if (SCC.getNode()) AddToWorklist(SCC.getNode());
-
- if (auto *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
- // fold select_cc true, x, y -> x
- // fold select_cc false, x, y -> y
- return !SCCC->isNullValue() ? N2 : N3;
+ if (SDValue SCC = DAG.FoldSetCC(CmpResVT, N0, N1, CC, DL)) {
+ AddToWorklist(SCC.getNode());
+ if (auto *SCCC = dyn_cast<ConstantSDNode>(SCC)) {
+ // fold select_cc true, x, y -> x
+ // fold select_cc false, x, y -> y
+ return !(SCCC->isNullValue()) ? N2 : N3;
+ }
}
if (SDValue V =
@@ -18621,7 +20061,7 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
SDValue Temp, SCC;
// zext (setcc n0, n1)
if (LegalTypes) {
- SCC = DAG.getSetCC(DL, getSetCCResultType(CmpOpVT), N0, N1, CC);
+ SCC = DAG.getSetCC(DL, CmpResVT, N0, N1, CC);
if (VT.bitsLT(SCC.getValueType()))
Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), VT);
else
@@ -18644,36 +20084,6 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
getShiftAmountTy(Temp.getValueType())));
}
- // Check to see if this is an integer abs.
- // select_cc setg[te] X, 0, X, -X ->
- // select_cc setgt X, -1, X, -X ->
- // select_cc setl[te] X, 0, -X, X ->
- // select_cc setlt X, 1, -X, X ->
- // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
- if (N1C) {
- ConstantSDNode *SubC = nullptr;
- if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
- (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
- N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
- SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
- else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
- (N1C->isOne() && CC == ISD::SETLT)) &&
- N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
- SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
-
- if (SubC && SubC->isNullValue() && CmpOpVT.isInteger()) {
- SDLoc DL(N0);
- SDValue Shift = DAG.getNode(ISD::SRA, DL, CmpOpVT, N0,
- DAG.getConstant(CmpOpVT.getSizeInBits() - 1,
- DL,
- getShiftAmountTy(CmpOpVT)));
- SDValue Add = DAG.getNode(ISD::ADD, DL, CmpOpVT, N0, Shift);
- AddToWorklist(Shift.getNode());
- AddToWorklist(Add.getNode());
- return DAG.getNode(ISD::XOR, DL, CmpOpVT, Add, Shift);
- }
- }
-
// select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
// select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
// select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
@@ -18728,7 +20138,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SDValue DAGCombiner::BuildSDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction().optForMinSize())
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -18769,7 +20179,7 @@ SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
SDValue DAGCombiner::BuildUDIV(SDNode *N) {
// when optimising for minimum size, we don't want to expand a div to a mul
// and a shift.
- if (DAG.getMachineFunction().getFunction().optForMinSize())
+ if (DAG.getMachineFunction().getFunction().hasMinSize())
return SDValue();
SmallVector<SDNode *, 8> Built;
@@ -18821,7 +20231,6 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
AddToWorklist(Est.getNode());
if (Iterations) {
- EVT VT = Op.getValueType();
SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
@@ -18977,7 +20386,6 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
if (!Reciprocal) {
// The estimate is now completely wrong if the input was exactly 0.0 or
// possibly a denormal. Force the answer to 0.0 for those cases.
- EVT VT = Op.getValueType();
SDLoc DL(Op);
EVT CCVT = getSetCCResultType(VT);
ISD::NodeType SelOpcode = VT.isVector() ? ISD::VSELECT : ISD::SELECT;
@@ -19020,79 +20428,95 @@ SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags Flags) {
}
/// Return true if there is any possibility that the two addresses overlap.
-bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
- // If they are the same then they must be aliases.
- if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
+bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
- // If they are both volatile then they cannot be reordered.
- if (Op0->isVolatile() && Op1->isVolatile()) return true;
+ struct MemUseCharacteristics {
+ bool IsVolatile;
+ SDValue BasePtr;
+ int64_t Offset;
+ Optional<int64_t> NumBytes;
+ MachineMemOperand *MMO;
+ };
- // If one operation reads from invariant memory, and the other may store, they
- // cannot alias. These should really be checking the equivalent of mayWrite,
- // but it only matters for memory nodes other than load /store.
- if (Op0->isInvariant() && Op1->writeMem())
- return false;
+ auto getCharacteristics = [](SDNode *N) -> MemUseCharacteristics {
+ if (const auto *LSN = dyn_cast<LSBaseSDNode>(N)) {
+ int64_t Offset = 0;
+ if (auto *C = dyn_cast<ConstantSDNode>(LSN->getOffset()))
+ Offset = (LSN->getAddressingMode() == ISD::PRE_INC)
+ ? C->getSExtValue()
+ : (LSN->getAddressingMode() == ISD::PRE_DEC)
+ ? -1 * C->getSExtValue()
+ : 0;
+ return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
+ Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
+ LSN->getMemOperand()};
+ }
+ if (const auto *LN = cast<LifetimeSDNode>(N))
+ return {false /*isVolatile*/, LN->getOperand(1),
+ (LN->hasOffset()) ? LN->getOffset() : 0,
+ (LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
+ : Optional<int64_t>(),
+ (MachineMemOperand *)nullptr};
+ // Default.
+ return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
+ Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
+ };
- if (Op1->isInvariant() && Op0->writeMem())
- return false;
+ MemUseCharacteristics MUC0 = getCharacteristics(Op0),
+ MUC1 = getCharacteristics(Op1);
- unsigned NumBytes0 = Op0->getMemoryVT().getStoreSize();
- unsigned NumBytes1 = Op1->getMemoryVT().getStoreSize();
-
- // Check for BaseIndexOffset matching.
- BaseIndexOffset BasePtr0 = BaseIndexOffset::match(Op0, DAG);
- BaseIndexOffset BasePtr1 = BaseIndexOffset::match(Op1, DAG);
- int64_t PtrDiff;
- if (BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()) {
- if (BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff))
- return !((NumBytes0 <= PtrDiff) || (PtrDiff + NumBytes1 <= 0));
-
- // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
- // able to calculate their relative offset if at least one arises
- // from an alloca. However, these allocas cannot overlap and we
- // can infer there is no alias.
- if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
- if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
- MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- // If the base are the same frame index but the we couldn't find a
- // constant offset, (indices are different) be conservative.
- if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
- !MFI.isFixedObjectIndex(B->getIndex())))
- return false;
- }
+ // If they are to the same address, then they must be aliases.
+ if (MUC0.BasePtr.getNode() && MUC0.BasePtr == MUC1.BasePtr &&
+ MUC0.Offset == MUC1.Offset)
+ return true;
- bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
- bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
- bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
- bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
- bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
- bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
-
- // If of mismatched base types or checkable indices we can check
- // they do not alias.
- if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
- (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
- (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1))
+ // If they are both volatile then they cannot be reordered.
+ if (MUC0.IsVolatile && MUC1.IsVolatile)
+ return true;
+
+ if (MUC0.MMO && MUC1.MMO) {
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
return false;
}
+ // Try to prove that there is aliasing, or that there is no aliasing. Either
+ // way, we can return now. If nothing can be proved, proceed with more tests.
+ bool IsAlias;
+ if (BaseIndexOffset::computeAliasing(Op0, MUC0.NumBytes, Op1, MUC1.NumBytes,
+ DAG, IsAlias))
+ return IsAlias;
+
+ // The following all rely on MMO0 and MMO1 being valid. Fail conservatively if
+ // either are not known.
+ if (!MUC0.MMO || !MUC1.MMO)
+ return true;
+
+ // If one operation reads from invariant memory, and the other may store, they
+ // cannot alias. These should really be checking the equivalent of mayWrite,
+ // but it only matters for memory nodes other than load /store.
+ if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
+ (MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
+ return false;
+
// If we know required SrcValue1 and SrcValue2 have relatively large
// alignment compared to the size and offset of the access, we may be able
// to prove they do not alias. This check is conservative for now to catch
// cases created by splitting vector types.
- int64_t SrcValOffset0 = Op0->getSrcValueOffset();
- int64_t SrcValOffset1 = Op1->getSrcValueOffset();
- unsigned OrigAlignment0 = Op0->getOriginalAlignment();
- unsigned OrigAlignment1 = Op1->getOriginalAlignment();
+ int64_t SrcValOffset0 = MUC0.MMO->getOffset();
+ int64_t SrcValOffset1 = MUC1.MMO->getOffset();
+ unsigned OrigAlignment0 = MUC0.MMO->getBaseAlignment();
+ unsigned OrigAlignment1 = MUC1.MMO->getBaseAlignment();
if (OrigAlignment0 == OrigAlignment1 && SrcValOffset0 != SrcValOffset1 &&
- NumBytes0 == NumBytes1 && OrigAlignment0 > NumBytes0) {
+ MUC0.NumBytes.hasValue() && MUC1.NumBytes.hasValue() &&
+ *MUC0.NumBytes == *MUC1.NumBytes && OrigAlignment0 > *MUC0.NumBytes) {
int64_t OffAlign0 = SrcValOffset0 % OrigAlignment0;
int64_t OffAlign1 = SrcValOffset1 % OrigAlignment1;
// There is no overlap between these relatively aligned accesses of
// similar size. Return no alias.
- if ((OffAlign0 + NumBytes0) <= OffAlign1 ||
- (OffAlign1 + NumBytes1) <= OffAlign0)
+ if ((OffAlign0 + *MUC0.NumBytes) <= OffAlign1 ||
+ (OffAlign1 + *MUC1.NumBytes) <= OffAlign0)
return false;
}
@@ -19105,17 +20529,16 @@ bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
UseAA = false;
#endif
- if (UseAA && AA &&
- Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
+ if (UseAA && AA && MUC0.MMO->getValue() && MUC1.MMO->getValue()) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValOffset0, SrcValOffset1);
- int64_t Overlap0 = NumBytes0 + SrcValOffset0 - MinOffset;
- int64_t Overlap1 = NumBytes1 + SrcValOffset1 - MinOffset;
- AliasResult AAResult =
- AA->alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap0,
- UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
- MemoryLocation(Op1->getMemOperand()->getValue(), Overlap1,
- UseTBAA ? Op1->getAAInfo() : AAMDNodes()) );
+ int64_t Overlap0 = *MUC0.NumBytes + SrcValOffset0 - MinOffset;
+ int64_t Overlap1 = *MUC1.NumBytes + SrcValOffset1 - MinOffset;
+ AliasResult AAResult = AA->alias(
+ MemoryLocation(MUC0.MMO->getValue(), Overlap0,
+ UseTBAA ? MUC0.MMO->getAAInfo() : AAMDNodes()),
+ MemoryLocation(MUC1.MMO->getValue(), Overlap1,
+ UseTBAA ? MUC1.MMO->getAAInfo() : AAMDNodes()));
if (AAResult == NoAlias)
return false;
}
@@ -19132,18 +20555,64 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
- bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
+ const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
// Starting off.
Chains.push_back(OriginalChain);
unsigned Depth = 0;
+ // Attempt to improve chain by a single step
+ std::function<bool(SDValue &)> ImproveChain = [&](SDValue &C) -> bool {
+ switch (C.getOpcode()) {
+ case ISD::EntryToken:
+ // No need to mark EntryToken.
+ C = SDValue();
+ return true;
+ case ISD::LOAD:
+ case ISD::STORE: {
+ // Get alias information for C.
+ bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
+ !cast<LSBaseSDNode>(C.getNode())->isVolatile();
+ if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ // Alias, so stop here.
+ return false;
+ }
+
+ case ISD::CopyFromReg:
+ // Always forward past past CopyFromReg.
+ C = C.getOperand(0);
+ return true;
+
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END: {
+ // We can forward past any lifetime start/end that can be proven not to
+ // alias the memory access.
+ if (!isAlias(N, C.getNode())) {
+ // Look further up the chain.
+ C = C.getOperand(0);
+ return true;
+ }
+ return false;
+ }
+ default:
+ return false;
+ }
+ };
+
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.pop_back_val();
+ // Don't bother if we've seen Chain before.
+ if (!Visited.insert(Chain.getNode()).second)
+ continue;
+
// For TokenFactor nodes, look at each operand and only continue up the
// chain until we reach the depth limit.
//
@@ -19156,58 +20625,30 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
return;
}
- // Don't bother if we've been before.
- if (!Visited.insert(Chain.getNode()).second)
- continue;
-
- switch (Chain.getOpcode()) {
- case ISD::EntryToken:
- // Entry token is ideal chain operand, but handled in FindBetterChain.
- break;
-
- case ISD::LOAD:
- case ISD::STORE: {
- // Get alias information for Chain.
- bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
- !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
-
- // If chain is alias then stop here.
- if (!(IsLoad && IsOpLoad) &&
- isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
- Aliases.push_back(Chain);
- } else {
- // Look further up the chain.
- Chains.push_back(Chain.getOperand(0));
- ++Depth;
- }
- break;
- }
-
- case ISD::TokenFactor:
+ if (Chain.getOpcode() == ISD::TokenFactor) {
// We have to check each of the operands of the token factor for "small"
// token factors, so we queue them up. Adding the operands to the queue
// (stack) in reverse order maintains the original order and increases the
// likelihood that getNode will find a matching token factor (CSE.)
if (Chain.getNumOperands() > 16) {
Aliases.push_back(Chain);
- break;
+ continue;
}
for (unsigned n = Chain.getNumOperands(); n;)
Chains.push_back(Chain.getOperand(--n));
++Depth;
- break;
-
- case ISD::CopyFromReg:
- // Forward past CopyFromReg.
- Chains.push_back(Chain.getOperand(0));
+ continue;
+ }
+ // Everything else
+ if (ImproveChain(Chain)) {
+ // Updated Chain Found, Consider new chain if one exists.
+ if (Chain.getNode())
+ Chains.push_back(Chain);
++Depth;
- break;
-
- default:
- // For all other instructions we will just have to take what we can get.
- Aliases.push_back(Chain);
- break;
+ continue;
}
+ // No Improved Chain Possible, treat as Alias.
+ Aliases.push_back(Chain);
}
}
@@ -19232,13 +20673,15 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
return Aliases[0];
// Construct a custom tailored token factor.
- return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
+ return DAG.getTokenFactor(SDLoc(N), Aliases);
}
+namespace {
// TODO: Replace with with std::monostate when we move to C++17.
struct UnitT { } Unit;
bool operator==(const UnitT &, const UnitT &) { return true; }
bool operator!=(const UnitT &, const UnitT &) { return false; }
+} // namespace
// This function tries to collect a bunch of potentially interesting
// nodes to improve the chains of, all at once. This might seem
@@ -19349,7 +20792,7 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
if (AddNewChain)
TFOps.insert(TFOps.begin(), NewChain);
- SDValue TF = DAG.getNode(ISD::TokenFactor, SDLoc(STChain), MVT::Other, TFOps);
+ SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
CombineTo(St, TF);
AddToWorklist(STChain);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index a9a3c44ea0c9..22c23ba877e8 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1,9 +1,8 @@
//===- FastISel.cpp - Implementation of the FastISel class ----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -782,7 +781,7 @@ bool FastISel::addStackMapLiveVars(SmallVectorImpl<MachineOperand> &Ops,
unsigned Reg = getRegForValue(Val);
if (!Reg)
return false;
- Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
}
}
return true;
@@ -831,8 +830,8 @@ bool FastISel::selectStackmap(const CallInst *I) {
const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
for (unsigned i = 0; ScratchRegs[i]; ++i)
Ops.push_back(MachineOperand::CreateReg(
- ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
- /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+ ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+ /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
// Issue CALLSEQ_START
unsigned AdjStackDown = TII.getCallFrameSetupOpcode();
@@ -942,7 +941,7 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
assert(CLI.NumResultRegs == 0 && "Unexpected result register.");
CLI.ResultReg = createResultReg(TLI.getRegClassFor(MVT::i64));
CLI.NumResultRegs = 1;
- Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*IsDef=*/true));
+ Ops.push_back(MachineOperand::CreateReg(CLI.ResultReg, /*isDef=*/true));
}
// Add the <id> and <numBytes> constants.
@@ -991,13 +990,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
unsigned Reg = getRegForValue(I->getArgOperand(i));
if (!Reg)
return false;
- Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
}
}
// Push the arguments from the call instruction.
for (auto Reg : CLI.OutRegs)
- Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/false));
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/false));
// Push live variables for the stack map.
if (!addStackMapLiveVars(Ops, I, NumMetaOpers + NumArgs))
@@ -1011,13 +1010,13 @@ bool FastISel::selectPatchpoint(const CallInst *I) {
const MCPhysReg *ScratchRegs = TLI.getScratchRegisters(CC);
for (unsigned i = 0; ScratchRegs[i]; ++i)
Ops.push_back(MachineOperand::CreateReg(
- ScratchRegs[i], /*IsDef=*/true, /*IsImp=*/true, /*IsKill=*/false,
- /*IsDead=*/false, /*IsUndef=*/false, /*IsEarlyClobber=*/true));
+ ScratchRegs[i], /*isDef=*/true, /*isImp=*/true, /*isKill=*/false,
+ /*isDead=*/false, /*isUndef=*/false, /*isEarlyClobber=*/true));
// Add implicit defs (return values).
for (auto Reg : CLI.InRegs)
- Ops.push_back(MachineOperand::CreateReg(Reg, /*IsDef=*/true,
- /*IsImpl=*/true));
+ Ops.push_back(MachineOperand::CreateReg(Reg, /*isDef=*/true,
+ /*isImp=*/true));
// Insert the patchpoint instruction before the call generated by the target.
MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, CLI.Call, DbgLoc,
@@ -1045,9 +1044,9 @@ bool FastISel::selectXRayCustomEvent(const CallInst *I) {
return true; // don't do anything to this instruction.
SmallVector<MachineOperand, 8> Ops;
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
- /*IsDef=*/false));
+ /*isDef=*/false));
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
- /*IsDef=*/false));
+ /*isDef=*/false));
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::PATCHABLE_EVENT_CALL));
@@ -1064,11 +1063,11 @@ bool FastISel::selectXRayTypedEvent(const CallInst *I) {
return true; // don't do anything to this instruction.
SmallVector<MachineOperand, 8> Ops;
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(0)),
- /*IsDef=*/false));
+ /*isDef=*/false));
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(1)),
- /*IsDef=*/false));
+ /*isDef=*/false));
Ops.push_back(MachineOperand::CreateReg(getRegForValue(I->getArgOperand(2)),
- /*IsDef=*/false));
+ /*isDef=*/false));
MachineInstrBuilder MIB =
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::PATCHABLE_TYPED_EVENT_CALL));
@@ -1205,9 +1204,11 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
if (Arg.IsByVal || Arg.IsInAlloca) {
PointerType *Ty = cast<PointerType>(Arg.Ty);
Type *ElementTy = Ty->getElementType();
- unsigned FrameSize = DL.getTypeAllocSize(ElementTy);
- // For ByVal, alignment should come from FE. BE will guess if this info is
- // not there, but there are cases it cannot get right.
+ unsigned FrameSize =
+ DL.getTypeAllocSize(Arg.ByValType ? Arg.ByValType : ElementTy);
+
+ // For ByVal, alignment should come from FE. BE will guess if this info
+ // is not there, but there are cases it cannot get right.
unsigned FrameAlign = Arg.Alignment;
if (!FrameAlign)
FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
@@ -1235,6 +1236,12 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
if (CLI.NumResultRegs && CLI.CS)
updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
+ // Set labels for heapallocsite call.
+ if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) {
+ MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
+ MF->addCodeViewHeapAllocSite(CLI.Call, MD);
+ }
+
return true;
}
@@ -1304,9 +1311,6 @@ bool FastISel::selectCall(const User *I) {
return true;
}
- MachineModuleInfo &MMI = FuncInfo.MF->getMMI();
- computeUsesVAFloatArgument(*Call, MMI);
-
// Handle intrinsic function calls.
if (const auto *II = dyn_cast<IntrinsicInst>(Call))
return selectIntrinsicCall(II);
@@ -1710,14 +1714,11 @@ void FastISel::finishCondBranch(const BasicBlock *BranchBB,
}
/// Emit an FNeg operation.
-bool FastISel::selectFNeg(const User *I) {
- Value *X;
- if (!match(I, m_FNeg(m_Value(X))))
- return false;
- unsigned OpReg = getRegForValue(X);
+bool FastISel::selectFNeg(const User *I, const Value *In) {
+ unsigned OpReg = getRegForValue(In);
if (!OpReg)
return false;
- bool OpRegIsKill = hasTrivialKill(I);
+ bool OpRegIsKill = hasTrivialKill(In);
// If the target has ISD::FNEG, use it.
EVT VT = TLI.getValueType(DL, I->getType());
@@ -1804,9 +1805,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return selectBinaryOp(I, ISD::FADD);
case Instruction::Sub:
return selectBinaryOp(I, ISD::SUB);
- case Instruction::FSub:
+ case Instruction::FSub: {
// FNeg is currently represented in LLVM IR as a special case of FSub.
- return selectFNeg(I) || selectBinaryOp(I, ISD::FSUB);
+ Value *X;
+ if (match(I, m_FNeg(m_Value(X))))
+ return selectFNeg(I, X);
+ return selectBinaryOp(I, ISD::FSUB);
+ }
case Instruction::Mul:
return selectBinaryOp(I, ISD::MUL);
case Instruction::FMul:
@@ -1836,6 +1841,9 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
case Instruction::Xor:
return selectBinaryOp(I, ISD::XOR);
+ case Instruction::FNeg:
+ return selectFNeg(I, I->getOperand(0));
+
case Instruction::GetElementPtr:
return selectGetElementPtr(I);
@@ -1869,6 +1877,13 @@ bool FastISel::selectOperator(const User *I, unsigned Opcode) {
return false;
case Instruction::Call:
+ // On AIX, call lowering uses the DAG-ISEL path currently so that the
+ // callee of the direct function call instruction will be mapped to the
+ // symbol for the function's entry point, which is distinct from the
+ // function descriptor symbol. The latter is the symbol whose XCOFF symbol
+ // name is the C-linkage name of the source level function.
+ if (TM.getTargetTriple().isOSAIX())
+ return false;
return selectCall(I);
case Instruction::BitCast:
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index fba728625b07..8b1759246b76 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -1,9 +1,8 @@
//===-- FunctionLoweringInfo.cpp ------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,6 +85,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
RegInfo = &MF->getRegInfo();
const TargetFrameLowering *TFI = MF->getSubtarget().getFrameLowering();
unsigned StackAlign = TFI->getStackAlignment();
+ DA = DAG->getDivergenceAnalysis();
// Check whether the function can return without sret-demotion.
SmallVector<ISD::OutputArg, 4> Outs;
@@ -151,7 +151,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
auto Iter = CatchObjects.find(AI);
if (Iter != CatchObjects.end() && TLI->needsFixedCatchObjects()) {
FrameIndex = MF->getFrameInfo().CreateFixedObject(
- TySize, 0, /*Immutable=*/false, /*isAliased=*/true);
+ TySize, 0, /*IsImmutable=*/false, /*isAliased=*/true);
MF->getFrameInfo().setObjectAlignment(FrameIndex, Align);
} else {
FrameIndex =
@@ -322,13 +322,6 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf,
NewMap[MBBMap[Src]] = MBBMap[Dst];
}
EHInfo.EHPadUnwindMap = std::move(NewMap);
- NewMap.clear();
- for (auto &KV : EHInfo.ThrowUnwindMap) {
- const auto *Src = KV.first.get<const BasicBlock *>();
- const auto *Dst = KV.second.get<const BasicBlock *>();
- NewMap[MBBMap[Src]] = MBBMap[Dst];
- }
- EHInfo.ThrowUnwindMap = std::move(NewMap);
}
}
@@ -343,6 +336,7 @@ void FunctionLoweringInfo::clear() {
LiveOutRegInfo.clear();
VisitedBBs.clear();
ArgDbgValues.clear();
+ DescribedArgs.clear();
ByValArgFrameIndexMap.clear();
RegFixups.clear();
RegsWithFixups.clear();
@@ -352,9 +346,9 @@ void FunctionLoweringInfo::clear() {
}
/// CreateReg - Allocate a single virtual register for the given type.
-unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
+unsigned FunctionLoweringInfo::CreateReg(MVT VT, bool isDivergent) {
return RegInfo->createVirtualRegister(
- MF->getSubtarget().getTargetLowering()->getRegClassFor(VT));
+ MF->getSubtarget().getTargetLowering()->getRegClassFor(VT, isDivergent));
}
/// CreateRegs - Allocate the appropriate number of virtual registers of
@@ -364,7 +358,7 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) {
/// In the case that the given value has struct or array type, this function
/// will assign registers for each member or element.
///
-unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
+unsigned FunctionLoweringInfo::CreateRegs(Type *Ty, bool isDivergent) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
SmallVector<EVT, 4> ValueVTs;
@@ -377,13 +371,18 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) {
unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT);
for (unsigned i = 0; i != NumRegs; ++i) {
- unsigned R = CreateReg(RegisterVT);
+ unsigned R = CreateReg(RegisterVT, isDivergent);
if (!FirstReg) FirstReg = R;
}
}
return FirstReg;
}
+unsigned FunctionLoweringInfo::CreateRegs(const Value *V) {
+ return CreateRegs(V->getType(), DA && !TLI->requiresUniformRegister(*MF, V) &&
+ DA->isDivergent(V));
+}
+
/// GetLiveOutRegInfo - Gets LiveOutInfo for a register, returning NULL if the
/// register is a PHI destination and the PHI's LiveOutInfo is not valid. If
/// the register's LiveOutInfo is for a smaller bit width, it is extended to
@@ -400,7 +399,7 @@ FunctionLoweringInfo::GetLiveOutRegInfo(unsigned Reg, unsigned BitWidth) {
if (BitWidth > LOI->Known.getBitWidth()) {
LOI->NumSignBits = 1;
- LOI->Known = LOI->Known.zextOrTrunc(BitWidth);
+ LOI->Known = LOI->Known.zext(BitWidth, false /* => any extend */);
}
return LOI;
@@ -526,56 +525,6 @@ unsigned FunctionLoweringInfo::getCatchPadExceptionPointerVReg(
return VReg;
}
-unsigned
-FunctionLoweringInfo::getOrCreateSwiftErrorVReg(const MachineBasicBlock *MBB,
- const Value *Val) {
- auto Key = std::make_pair(MBB, Val);
- auto It = SwiftErrorVRegDefMap.find(Key);
- // If this is the first use of this swifterror value in this basic block,
- // create a new virtual register.
- // After we processed all basic blocks we will satisfy this "upwards exposed
- // use" by inserting a copy or phi at the beginning of this block.
- if (It == SwiftErrorVRegDefMap.end()) {
- auto &DL = MF->getDataLayout();
- const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- auto VReg = MF->getRegInfo().createVirtualRegister(RC);
- SwiftErrorVRegDefMap[Key] = VReg;
- SwiftErrorVRegUpwardsUse[Key] = VReg;
- return VReg;
- } else return It->second;
-}
-
-void FunctionLoweringInfo::setCurrentSwiftErrorVReg(
- const MachineBasicBlock *MBB, const Value *Val, unsigned VReg) {
- SwiftErrorVRegDefMap[std::make_pair(MBB, Val)] = VReg;
-}
-
-std::pair<unsigned, bool>
-FunctionLoweringInfo::getOrCreateSwiftErrorVRegDefAt(const Instruction *I) {
- auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
- auto It = SwiftErrorVRegDefUses.find(Key);
- if (It == SwiftErrorVRegDefUses.end()) {
- auto &DL = MF->getDataLayout();
- const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- unsigned VReg = MF->getRegInfo().createVirtualRegister(RC);
- SwiftErrorVRegDefUses[Key] = VReg;
- return std::make_pair(VReg, true);
- }
- return std::make_pair(It->second, false);
-}
-
-std::pair<unsigned, bool>
-FunctionLoweringInfo::getOrCreateSwiftErrorVRegUseAt(const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
- auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
- auto It = SwiftErrorVRegDefUses.find(Key);
- if (It == SwiftErrorVRegDefUses.end()) {
- unsigned VReg = getOrCreateSwiftErrorVReg(MBB, Val);
- SwiftErrorVRegDefUses[Key] = VReg;
- return std::make_pair(VReg, true);
- }
- return std::make_pair(It->second, false);
-}
-
const Value *
FunctionLoweringInfo::getValueFromVirtualReg(unsigned Vreg) {
if (VirtReg2Value.empty()) {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 6a6114677cc2..9bc07d35dfc5 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -1,9 +1,8 @@
//==--- InstrEmitter.cpp - Emit MachineInstrs for the SelectionDAG class ---==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -106,7 +105,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
// Stick to the preferred register classes for legal types.
if (TLI->isTypeLegal(VT))
- UseRC = TLI->getRegClassFor(VT);
+ UseRC = TLI->getRegClassFor(VT, Node->isDivergent());
if (!IsClone && !IsCloned)
for (SDNode *User : Node->uses()) {
@@ -165,7 +164,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
"Incompatible phys register def and uses!");
DstRC = UseRC;
} else {
- DstRC = TLI->getRegClassFor(VT);
+ DstRC = TLI->getRegClassFor(VT, Node->isDivergent());
}
// If all uses are reading from the src physical register and copying the
@@ -187,24 +186,6 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
assert(isNew && "Node emitted out of order - early");
}
-/// getDstOfCopyToRegUse - If the only use of the specified result number of
-/// node is a CopyToReg, return its destination register. Return 0 otherwise.
-unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node,
- unsigned ResNo) const {
- if (!Node->hasOneUse())
- return 0;
-
- SDNode *User = *Node->use_begin();
- if (User->getOpcode() == ISD::CopyToReg &&
- User->getOperand(2).getNode() == Node &&
- User->getOperand(2).getResNo() == ResNo) {
- unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
- return Reg;
- }
- return 0;
-}
-
void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
@@ -226,8 +207,9 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
// type correctly. For example, a 64-bit float (X86::FR64) can't live in
// the 32-bit float super-class (X86::FR32).
if (i < NumResults && TLI->isTypeLegal(Node->getSimpleValueType(i))) {
- const TargetRegisterClass *VTRC =
- TLI->getRegClassFor(Node->getSimpleValueType(i));
+ const TargetRegisterClass *VTRC = TLI->getRegClassFor(
+ Node->getSimpleValueType(i),
+ (Node->isDivergent() || (RC && TRI->isDivergentRegClass(RC))));
if (RC)
VTRC = TRI->getCommonSubClass(RC, VTRC);
if (VTRC)
@@ -286,14 +268,11 @@ unsigned InstrEmitter::getVR(SDValue Op,
if (Op.isMachineOpcode() &&
Op.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Add an IMPLICIT_DEF instruction before every use.
- unsigned VReg = getDstOfOnlyCopyToRegUse(Op.getNode(), Op.getResNo());
// IMPLICIT_DEF can produce any type of result so its MCInstrDesc
// does not include operand register class info.
- if (!VReg) {
- const TargetRegisterClass *RC =
- TLI->getRegClassFor(Op.getSimpleValueType());
- VReg = MRI->createVirtualRegister(RC);
- }
+ const TargetRegisterClass *RC = TLI->getRegClassFor(
+ Op.getSimpleValueType(), Op.getNode()->isDivergent());
+ unsigned VReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
return VReg;
@@ -396,11 +375,15 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
} else if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(Op)) {
unsigned VReg = R->getReg();
MVT OpVT = Op.getSimpleValueType();
- const TargetRegisterClass *OpRC =
- TLI->isTypeLegal(OpVT) ? TLI->getRegClassFor(OpVT) : nullptr;
const TargetRegisterClass *IIRC =
II ? TRI->getAllocatableClass(TII->getRegClass(*II, IIOpNum, TRI, *MF))
: nullptr;
+ const TargetRegisterClass *OpRC =
+ TLI->isTypeLegal(OpVT)
+ ? TLI->getRegClassFor(OpVT,
+ Op.getNode()->isDivergent() ||
+ (IIRC && TRI->isDivergentRegClass(IIRC)))
+ : nullptr;
if (OpRC && IIRC && OpRC != IIRC &&
TargetRegisterInfo::isVirtualRegister(VReg)) {
@@ -465,7 +448,7 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
}
unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
- MVT VT, const DebugLoc &DL) {
+ MVT VT, bool isDivergent, const DebugLoc &DL) {
const TargetRegisterClass *VRC = MRI->getRegClass(VReg);
const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx);
@@ -480,7 +463,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
// VReg couldn't be reasonably constrained. Emit a COPY to a new virtual
// register instead.
- RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT), SubIdx);
+ RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
assert(RC && "No legal register class for VT supports that SubIdx");
unsigned NewReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
@@ -515,7 +498,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// classes.
unsigned SubIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
const TargetRegisterClass *TRC =
- TLI->getRegClassFor(Node->getSimpleValueType(0));
+ TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
unsigned Reg;
MachineInstr *DefMI;
@@ -549,8 +532,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
if (TargetRegisterInfo::isVirtualRegister(Reg))
Reg = ConstrainForSubReg(Reg, SubIdx,
Node->getOperand(0).getSimpleValueType(),
- Node->getDebugLoc());
-
+ Node->isDivergent(), Node->getDebugLoc());
// Create the destreg if it is missing.
if (VRBase == 0)
VRBase = MRI->createVirtualRegister(TRC);
@@ -585,7 +567,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
//
// There is no constraint on the %src register class.
//
- const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0));
+ const TargetRegisterClass *SRC =
+ TLI->getRegClassFor(Node->getSimpleValueType(0), Node->isDivergent());
SRC = TRI->getSubClassWithSubReg(SRC, SubIdx);
assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG");
@@ -900,6 +883,9 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
if (Flags.hasExact())
MI->setFlag(MachineInstr::MIFlag::IsExact);
+
+ if (Flags.hasFPExcept())
+ MI->setFlag(MachineInstr::MIFlag::FPExcept);
}
// Emit all of the actual operands of this instruction, adding them to the
@@ -1007,14 +993,23 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::TokenFactor: // fall thru
break;
case ISD::CopyToReg: {
- unsigned SrcReg;
+ unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
+ if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
+ SrcVal.isMachineOpcode() &&
+ SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
+ // Instead building a COPY to that vreg destination, build an
+ // IMPLICIT_DEF instruction instead.
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
+ break;
+ }
+ unsigned SrcReg;
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(SrcVal))
SrcReg = R->getReg();
else
SrcReg = getVR(SrcVal, VRBaseMap);
- unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
if (SrcReg == DestReg) // Coalesced away the copy? Ignore.
break;
@@ -1049,14 +1044,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
break;
}
- case ISD::INLINEASM: {
+ case ISD::INLINEASM:
+ case ISD::INLINEASM_BR: {
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
--NumOps; // Ignore the glue operand.
// Create the inline asm machine instruction.
- MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(),
- TII->get(TargetOpcode::INLINEASM));
+ unsigned TgtOpc = Node->getOpcode() == ISD::INLINEASM_BR
+ ? TargetOpcode::INLINEASM_BR
+ : TargetOpcode::INLINEASM;
+ MachineInstrBuilder MIB =
+ BuildMI(*MF, Node->getDebugLoc(), TII->get(TgtOpc));
// Add the asm string as an external symbol operand.
SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString);
@@ -1137,7 +1136,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// then remove the early-clobber flag.
for (unsigned Reg : ECRegs) {
if (MIB->readsRegister(Reg, TRI)) {
- MachineOperand *MO = MIB->findRegisterDefOperand(Reg, false, TRI);
+ MachineOperand *MO =
+ MIB->findRegisterDefOperand(Reg, false, false, TRI);
assert(MO && "No def operand for clobbered register?");
MO->setIsEarlyClobber(false);
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index 701b6368690b..cfe99dd977b5 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -1,9 +1,8 @@
//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -43,11 +42,6 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
unsigned SrcReg,
DenseMap<SDValue, unsigned> &VRBaseMap);
- /// getDstOfCopyToRegUse - If the only use of the specified result number of
- /// node is a CopyToReg, return its destination register. Return 0 otherwise.
- unsigned getDstOfOnlyCopyToRegUse(SDNode *Node,
- unsigned ResNo) const;
-
void CreateVirtualRegisters(SDNode *Node,
MachineInstrBuilder &MIB,
const MCInstrDesc &II,
@@ -84,7 +78,7 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter {
/// supports SubIdx sub-registers. Emit a copy if that isn't possible.
/// Return the virtual register to use.
unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, MVT VT,
- const DebugLoc &DL);
+ bool isDivergent, const DebugLoc &DL);
/// EmitSubregNode - Generate machine code for subreg nodes.
///
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index d3aea37f944d..bf817f00f83d 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1,9 +1,8 @@
//===- LegalizeDAG.cpp - Implement SelectionDAG::Legalize -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -137,8 +136,6 @@ private:
bool &NeedInvert, const SDLoc &dl);
SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned);
- SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops,
- unsigned NumOps, bool isSigned, const SDLoc &dl);
std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node, bool isSigned);
@@ -152,11 +149,17 @@ private:
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
+ SDValue ExpandArgFPLibCall(SDNode *Node,
+ RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80, RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128);
void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl<SDValue> &Results);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl);
+ SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
+ const SDLoc &dl, SDValue ChainIn);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
@@ -489,10 +492,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// If this is an unaligned store and the target doesn't support it,
// expand it.
EVT MemVT = ST->getMemoryVT();
- unsigned AS = ST->getAddressSpace();
- unsigned Align = ST->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
@@ -542,7 +544,9 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
- unsigned RoundWidth = 1 << Log2_32(StWidth);
+ unsigned LogStWidth = Log2_32(StWidth);
+ assert(LogStWidth < 32);
+ unsigned RoundWidth = 1 << LogStWidth;
assert(RoundWidth < StWidth);
unsigned ExtraWidth = StWidth - RoundWidth;
assert(ExtraWidth < RoundWidth);
@@ -602,11 +606,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
EVT MemVT = ST->getMemoryVT();
- unsigned AS = ST->getAddressSpace();
- unsigned Align = ST->getAlignment();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
}
@@ -663,13 +666,12 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
default: llvm_unreachable("This action is not supported yet!");
case TargetLowering::Legal: {
EVT MemVT = LD->getMemoryVT();
- unsigned AS = LD->getAddressSpace();
- unsigned Align = LD->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
- std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
+ std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
}
break;
}
@@ -756,7 +758,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
} else if (SrcWidth & (SrcWidth - 1)) {
// If not loading a power-of-2 number of bits, expand as two loads.
assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ unsigned LogSrcWidth = Log2_32(SrcWidth);
+ assert(LogSrcWidth < 32);
+ unsigned RoundWidth = 1 << LogSrcWidth;
assert(RoundWidth < SrcWidth);
unsigned ExtraWidth = SrcWidth - RoundWidth;
assert(ExtraWidth < RoundWidth);
@@ -853,10 +857,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
// If this is an unaligned load and the target doesn't support it,
// expand it.
EVT MemVT = LD->getMemoryVT();
- unsigned AS = LD->getAddressSpace();
- unsigned Align = LD->getAlignment();
const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT, AS, Align)) {
+ if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
std::tie(Value, Chain) = TLI.expandUnalignedLoad(LD, DAG);
}
}
@@ -994,6 +997,10 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::EXTRACT_VECTOR_ELT:
+ case ISD::LROUND:
+ case ISD::LLROUND:
+ case ISD::LRINT:
+ case ISD::LLRINT:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
@@ -1114,6 +1121,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_EXTEND:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -1128,7 +1137,9 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
}
- case ISD::SMULFIX: {
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -1142,6 +1153,22 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
cast<MaskedStoreSDNode>(Node)->getValue().getValueType());
break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Action = TLI.getOperationAction(
+ Node->getOpcode(), Node->getOperand(0).getValueType());
+ break;
default:
if (Node->getOpcode() >= ISD::BUILTIN_OP_END) {
Action = TargetLowering::Legal;
@@ -1386,6 +1413,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
unsigned TypeByteSize = EltVT.getSizeInBits() / 8;
+ assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
// Store (in the right endianness) the elements to memory.
for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
// Ignore undef elements.
@@ -1723,6 +1751,12 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
/// The resultant code need not be legal.
SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
EVT DestVT, const SDLoc &dl) {
+ return EmitStackConvert(SrcOp, SlotVT, DestVT, dl, DAG.getEntryNode());
+}
+
+SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
+ EVT DestVT, const SDLoc &dl,
+ SDValue Chain) {
// Create the stack frame object.
unsigned SrcAlign = DAG.getDataLayout().getPrefTypeAlignment(
SrcOp.getValueType().getTypeForEVT(*DAG.getContext()));
@@ -1743,19 +1777,19 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT,
// later than DestVT.
SDValue Store;
- if (SrcSize > SlotSize)
- Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo,
+ if (SrcSize > SlotSize)
+ Store = DAG.getTruncStore(Chain, dl, SrcOp, FIPtr, PtrInfo,
SlotVT, SrcAlign);
else {
assert(SrcSize == SlotSize && "Invalid store");
Store =
- DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
+ DAG.getStore(Chain, dl, SrcOp, FIPtr, PtrInfo, SrcAlign);
}
// Result is a load from the stack slot.
if (SlotSize == DestSize)
return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo, DestAlign);
-
+
assert(SlotSize < DestSize && "Unknown extension!");
return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, PtrInfo, SlotVT,
DestAlign);
@@ -2049,41 +2083,6 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
return CallInfo.first;
}
-/// Generate a libcall taking the given operands as arguments
-/// and returning a result of type RetVT.
-SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT,
- const SDValue *Ops, unsigned NumOps,
- bool isSigned, const SDLoc &dl) {
- TargetLowering::ArgListTy Args;
- Args.reserve(NumOps);
-
- TargetLowering::ArgListEntry Entry;
- for (unsigned i = 0; i != NumOps; ++i) {
- Entry.Node = Ops[i];
- Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.IsSExt = isSigned;
- Entry.IsZExt = !isSigned;
- Args.push_back(Entry);
- }
- SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
- TLI.getPointerTy(DAG.getDataLayout()));
-
- Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
-
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl)
- .setChain(DAG.getEntryNode())
- .setLibCallee(TLI.getLibcallCallingConv(LC), RetTy, Callee,
- std::move(Args))
- .setSExtResult(isSigned)
- .setZExtResult(!isSigned)
- .setIsPostTypeLegalization(true);
-
- std::pair<SDValue,SDValue> CallInfo = TLI.LowerCallTo(CLI);
-
- return CallInfo.first;
-}
-
// Expand a node into a call to a libcall. Similar to
// ExpandLibCall except that the first operand is the in-chain.
std::pair<SDValue, SDValue>
@@ -2160,6 +2159,27 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
return ExpandLibCall(LC, Node, isSigned);
}
+/// Expand the node to a libcall based on first argument type (for instance
+/// lround and its variant).
+SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
+ RTLIB::Libcall Call_F32,
+ RTLIB::Libcall Call_F64,
+ RTLIB::Libcall Call_F80,
+ RTLIB::Libcall Call_F128,
+ RTLIB::Libcall Call_PPCF128) {
+ RTLIB::Libcall LC;
+ switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
+ default: llvm_unreachable("Unexpected request for libcall!");
+ case MVT::f32: LC = Call_F32; break;
+ case MVT::f64: LC = Call_F64; break;
+ case MVT::f80: LC = Call_F80; break;
+ case MVT::f128: LC = Call_F128; break;
+ case MVT::ppcf128: LC = Call_PPCF128; break;
+ }
+
+ return ExpandLibCall(LC, Node, false);
+}
+
/// Issue libcalls to __{u}divmod to compute div / rem pairs.
void
SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node,
@@ -2530,16 +2550,12 @@ SDValue SelectionDAGLegalize::ExpandBITREVERSE(SDValue Op, const SDLoc &dl) {
// TODO: We can easily support i4/i2 legal types if any target ever does.
if (Sz >= 8 && isPowerOf2_32(Sz)) {
// Create the masks - repeating the pattern every byte.
- APInt MaskHi4(Sz, 0), MaskHi2(Sz, 0), MaskHi1(Sz, 0);
- APInt MaskLo4(Sz, 0), MaskLo2(Sz, 0), MaskLo1(Sz, 0);
- for (unsigned J = 0; J != Sz; J += 8) {
- MaskHi4 = MaskHi4 | (0xF0ull << J);
- MaskLo4 = MaskLo4 | (0x0Full << J);
- MaskHi2 = MaskHi2 | (0xCCull << J);
- MaskLo2 = MaskLo2 | (0x33ull << J);
- MaskHi1 = MaskHi1 | (0xAAull << J);
- MaskLo1 = MaskLo1 | (0x55ull << J);
- }
+ APInt MaskHi4 = APInt::getSplat(Sz, APInt(8, 0xF0));
+ APInt MaskHi2 = APInt::getSplat(Sz, APInt(8, 0xCC));
+ APInt MaskHi1 = APInt::getSplat(Sz, APInt(8, 0xAA));
+ APInt MaskLo4 = APInt::getSplat(Sz, APInt(8, 0x0F));
+ APInt MaskLo2 = APInt::getSplat(Sz, APInt(8, 0x33));
+ APInt MaskLo1 = APInt::getSplat(Sz, APInt(8, 0x55));
// BSWAP if the type is wider than a single byte.
Tmp = (Sz > 8 ? DAG.getNode(ISD::BSWAP, dl, VT, Op) : Op);
@@ -2593,9 +2609,8 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, const SDLoc &dl) {
switch (VT.getSimpleVT().getScalarType().SimpleTy) {
default: llvm_unreachable("Unhandled Expand type in BSWAP!");
case MVT::i16:
- Tmp2 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- Tmp1 = DAG.getNode(ISD::SRL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
- return DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
+ // Use a rotate by 8. This can be further expanded if necessary.
+ return DAG.getNode(ISD::ROTL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
case MVT::i32:
Tmp4 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(24, dl, SHVT));
Tmp3 = DAG.getNode(ISD::SHL, dl, VT, Op, DAG.getConstant(8, dl, SHVT));
@@ -2799,12 +2814,27 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
}
break;
}
+ case ISD::STRICT_FP_ROUND:
+ Tmp1 = EmitStackConvert(Node->getOperand(1),
+ Node->getValueType(0),
+ Node->getValueType(0), dl, Node->getOperand(0));
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_ROUND node\n");
+ return true;
case ISD::FP_ROUND:
case ISD::BITCAST:
- Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
+ Tmp1 = EmitStackConvert(Node->getOperand(0),
+ Node->getValueType(0),
Node->getValueType(0), dl);
Results.push_back(Tmp1);
break;
+ case ISD::STRICT_FP_EXTEND:
+ Tmp1 = EmitStackConvert(Node->getOperand(1),
+ Node->getOperand(1).getValueType(),
+ Node->getValueType(0), dl, Node->getOperand(0));
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_EXTEND node\n");
+ return true;
case ISD::FP_EXTEND:
Tmp1 = EmitStackConvert(Node->getOperand(0),
Node->getOperand(0).getValueType(),
@@ -2875,6 +2905,30 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
Results.push_back(Tmp1);
break;
+ case ISD::LROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128));
+ break;
+ case ISD::LLROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128));
+ break;
+ case ISD::LRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128));
+ break;
+ case ISD::LLRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128));
+ break;
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
Results.push_back(Results[0].getValue(1));
@@ -3117,7 +3171,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Node);
// Check to see if this FP immediate is already legal.
// If this is a legal constant, turn it into a TargetConstantFP node.
- if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0)))
+ if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0),
+ DAG.getMachineFunction().getFunction().hasOptSize()))
Results.push_back(ExpandConstantFP(CFP, true));
break;
}
@@ -3291,176 +3346,75 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(TLI.expandAddSubSat(Node, DAG));
break;
case ISD::SMULFIX:
- Results.push_back(TLI.getExpandedFixedPointMultiplication(Node, DAG));
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
- case ISD::SADDO:
- case ISD::SSUBO: {
+ case ISD::ADDCARRY:
+ case ISD::SUBCARRY: {
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
- SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ?
- ISD::ADD : ISD::SUB, dl, LHS.getValueType(),
- LHS, RHS);
- Results.push_back(Sum);
- EVT ResultType = Node->getValueType(1);
- EVT OType = getSetCCResultType(Node->getValueType(0));
-
- SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
-
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Sum >= 0
- //
- // Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
- // Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
- Node->getOpcode() == ISD::SADDO ?
- ISD::SETEQ : ISD::SETNE);
-
- SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE);
- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
-
- SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
- Results.push_back(DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType));
- break;
- }
- case ISD::UADDO:
- case ISD::USUBO: {
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
- bool IsAdd = Node->getOpcode() == ISD::UADDO;
- // If ADD/SUBCARRY is legal, use that instead.
- unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
- if (TLI.isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
- SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
- SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
- { LHS, RHS, CarryIn });
- Results.push_back(SDValue(NodeCarry.getNode(), 0));
- Results.push_back(SDValue(NodeCarry.getNode(), 1));
- break;
- }
+ SDValue Carry = Node->getOperand(2);
+
+ bool IsAdd = Node->getOpcode() == ISD::ADDCARRY;
- SDValue Sum = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
- LHS.getValueType(), LHS, RHS);
- Results.push_back(Sum);
+ // Initial add of the 2 operands.
+ unsigned Op = IsAdd ? ISD::ADD : ISD::SUB;
+ EVT VT = LHS.getValueType();
+ SDValue Sum = DAG.getNode(Op, dl, VT, LHS, RHS);
- EVT ResultType = Node->getValueType(1);
+ // Initial check for overflow.
+ EVT CarryType = Node->getValueType(1);
EVT SetCCType = getSetCCResultType(Node->getValueType(0));
ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
- SDValue SetCC = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
-
- Results.push_back(DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType));
+ SDValue Overflow = DAG.getSetCC(dl, SetCCType, Sum, LHS, CC);
+
+ // Add of the sum and the carry.
+ SDValue CarryExt =
+ DAG.getZeroExtendInReg(DAG.getZExtOrTrunc(Carry, dl, VT), dl, MVT::i1);
+ SDValue Sum2 = DAG.getNode(Op, dl, VT, Sum, CarryExt);
+
+ // Second check for overflow. If we are adding, we can only overflow if the
+ // initial sum is all 1s ang the carry is set, resulting in a new sum of 0.
+ // If we are subtracting, we can only overflow if the initial sum is 0 and
+ // the carry is set, resulting in a new sum of all 1s.
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue Overflow2 =
+ IsAdd ? DAG.getSetCC(dl, SetCCType, Sum2, Zero, ISD::SETEQ)
+ : DAG.getSetCC(dl, SetCCType, Sum, Zero, ISD::SETEQ);
+ Overflow2 = DAG.getNode(ISD::AND, dl, SetCCType, Overflow2,
+ DAG.getZExtOrTrunc(Carry, dl, SetCCType));
+
+ SDValue ResultCarry =
+ DAG.getNode(ISD::OR, dl, SetCCType, Overflow, Overflow2);
+
+ Results.push_back(Sum2);
+ Results.push_back(DAG.getBoolExtOrTrunc(ResultCarry, dl, CarryType, VT));
+ break;
+ }
+ case ISD::SADDO:
+ case ISD::SSUBO: {
+ SDValue Result, Overflow;
+ TLI.expandSADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
+ break;
+ }
+ case ISD::UADDO:
+ case ISD::USUBO: {
+ SDValue Result, Overflow;
+ TLI.expandUADDSUBO(Node, Result, Overflow, DAG);
+ Results.push_back(Result);
+ Results.push_back(Overflow);
break;
}
case ISD::UMULO:
case ISD::SMULO: {
- EVT VT = Node->getValueType(0);
- EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
- SDValue LHS = Node->getOperand(0);
- SDValue RHS = Node->getOperand(1);
- SDValue BottomHalf;
- SDValue TopHalf;
- static const unsigned Ops[2][3] =
- { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
- { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
- bool isSigned = Node->getOpcode() == ISD::SMULO;
- if (TLI.isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
- BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
- TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
- } else if (TLI.isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
- BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
- RHS);
- TopHalf = BottomHalf.getValue(1);
- } else if (TLI.isTypeLegal(WideVT)) {
- LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
- RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
- Tmp1 = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
- BottomHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
- DAG.getIntPtrConstant(0, dl));
- TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
- DAG.getIntPtrConstant(1, dl));
- } else {
- // We can fall back to a libcall with an illegal type for the MUL if we
- // have a libcall big enough.
- // Also, we can fall back to a division in some cases, but that's a big
- // performance hit in the general case.
- RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
- if (WideVT == MVT::i16)
- LC = RTLIB::MUL_I16;
- else if (WideVT == MVT::i32)
- LC = RTLIB::MUL_I32;
- else if (WideVT == MVT::i64)
- LC = RTLIB::MUL_I64;
- else if (WideVT == MVT::i128)
- LC = RTLIB::MUL_I128;
- assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
-
- SDValue HiLHS;
- SDValue HiRHS;
- if (isSigned) {
- // The high part is obtained by SRA'ing all but one of the bits of low
- // part.
- unsigned LoSize = VT.getSizeInBits();
- HiLHS =
- DAG.getNode(ISD::SRA, dl, VT, LHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
- HiRHS =
- DAG.getNode(ISD::SRA, dl, VT, RHS,
- DAG.getConstant(LoSize - 1, dl,
- TLI.getPointerTy(DAG.getDataLayout())));
- } else {
- HiLHS = DAG.getConstant(0, dl, VT);
- HiRHS = DAG.getConstant(0, dl, VT);
- }
-
- // Here we're passing the 2 arguments explicitly as 4 arguments that are
- // pre-lowered to the correct types. This all depends upon WideVT not
- // being a legal type for the architecture and thus has to be split to
- // two arguments.
- SDValue Ret;
- if(DAG.getDataLayout().isLittleEndian()) {
- // Halves of WideVT are packed into registers in different order
- // depending on platform endianness. This is usually handled by
- // the C calling convention, but we can't defer to it in
- // the legalizer.
- SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
- Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
- } else {
- SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
- Ret = ExpandLibCall(LC, WideVT, Args, 4, isSigned, dl);
- }
- assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
- "Ret value is a collection of constituent nodes holding result.");
- BottomHalf = Ret.getOperand(0);
- TopHalf = Ret.getOperand(1);
+ SDValue Result, Overflow;
+ if (TLI.expandMULO(Node, Result, Overflow, DAG)) {
+ Results.push_back(Result);
+ Results.push_back(Overflow);
}
-
- if (isSigned) {
- Tmp1 = DAG.getConstant(
- VT.getSizeInBits() - 1, dl,
- TLI.getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
- Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1);
- TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1,
- ISD::SETNE);
- } else {
- TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf,
- DAG.getConstant(0, dl, VT), ISD::SETNE);
- }
-
- // Truncate the result if SetCC returns a larger type than needed.
- EVT RType = Node->getValueType(1);
- if (RType.getSizeInBits() < TopHalf.getValueSizeInBits())
- TopHalf = DAG.getNode(ISD::TRUNCATE, dl, RType, TopHalf);
-
- assert(RType.getSizeInBits() == TopHalf.getValueSizeInBits() &&
- "Unexpected result type for S/UMULO legalization");
-
- Results.push_back(BottomHalf);
- Results.push_back(TopHalf);
break;
}
case ISD::BUILD_PAIR: {
@@ -3487,6 +3441,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getConstant(0, dl, Tmp1.getValueType()),
Tmp2, Tmp3, ISD::SETNE);
}
+ Tmp1->setFlags(Node->getFlags());
Results.push_back(Tmp1);
break;
case ISD::BR_JT: {
@@ -3570,7 +3525,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// condition code, create a new SETCC node.
if (Tmp3.getNode())
Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
- Tmp1, Tmp2, Tmp3);
+ Tmp1, Tmp2, Tmp3, Node->getFlags());
// If we expanded the SETCC by inverting the condition code, then wrap
// the existing SETCC in a NOT to restore the intended condition.
@@ -3598,6 +3553,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
DAG.getConstant(TrueValue, dl, VT),
DAG.getConstant(0, dl, VT),
Tmp3);
+ Tmp1->setFlags(Node->getFlags());
Results.push_back(Tmp1);
break;
}
@@ -3617,9 +3573,8 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
assert(!TLI.isOperationExpand(ISD::SELECT, VT) &&
"Cannot expand ISD::SELECT_CC when ISD::SELECT also needs to be "
"expanded.");
- EVT CCVT =
- TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT);
- SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC);
+ EVT CCVT = getSetCCResultType(CmpVT);
+ SDValue Cond = DAG.getNode(ISD::SETCC, dl, CCVT, Tmp1, Tmp2, CC, Node->getFlags());
Results.push_back(DAG.getSelect(dl, VT, Cond, Tmp3, Tmp4));
break;
}
@@ -3635,6 +3590,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// Use the new condition code and swap true and false
Legalized = true;
Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC);
+ Tmp1->setFlags(Node->getFlags());
} else {
// If The inverse is not legal, then try to swap the arguments using
// the inverse condition code.
@@ -3644,6 +3600,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
// lhs and rhs.
Legalized = true;
Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC);
+ Tmp1->setFlags(Node->getFlags());
}
}
@@ -3670,6 +3627,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1,
Tmp2, Tmp3, Tmp4, CC);
}
+ Tmp1->setFlags(Node->getFlags());
}
Results.push_back(Tmp1);
break;
@@ -3729,6 +3687,21 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
ReplaceNode(SDValue(Node, 0), Result);
break;
}
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Results.push_back(TLI.expandVecReduce(Node, DAG));
+ break;
case ISD::GLOBAL_OFFSET_TABLE:
case ISD::GlobalAddress:
case ISD::GlobalTLSAddress:
@@ -4273,6 +4246,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2));
// Perform the larger operation, then round down.
Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3);
+ Tmp1->setFlags(Node->getFlags());
if (TruncOp != ISD::FP_ROUND)
Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1);
else
@@ -4303,8 +4277,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
}
Tmp1 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(0));
Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1));
- Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0),
- Tmp1, Tmp2, Node->getOperand(2)));
+ Results.push_back(DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), Tmp1,
+ Tmp2, Node->getOperand(2), Node->getFlags()));
break;
}
case ISD::BR_CC: {
@@ -4532,6 +4506,24 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) {
Results.push_back(CvtVec);
break;
}
+ case ISD::ATOMIC_SWAP: {
+ AtomicSDNode *AM = cast<AtomicSDNode>(Node);
+ SDLoc SL(Node);
+ SDValue CastVal = DAG.getNode(ISD::BITCAST, SL, NVT, AM->getVal());
+ assert(NVT.getSizeInBits() == OVT.getSizeInBits() &&
+ "unexpected promotion type");
+ assert(AM->getMemoryVT().getSizeInBits() == NVT.getSizeInBits() &&
+ "unexpected atomic_swap with illegal type");
+
+ SDValue NewAtomic
+ = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, NVT,
+ DAG.getVTList(NVT, MVT::Other),
+ { AM->getChain(), AM->getBasePtr(), CastVal },
+ AM->getMemOperand());
+ Results.push_back(DAG.getNode(ISD::BITCAST, SL, OVT, NewAtomic));
+ Results.push_back(NewAtomic.getValue(1));
+ break;
+ }
}
// Replace the original node with the legalized result.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 4644e9588e7b..b4849b2881e6 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -1,9 +1,8 @@
//===-------- LegalizeFloatTypes.cpp - Legalization of float types --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -104,6 +103,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
case ISD::SINT_TO_FP:
@@ -440,6 +440,15 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
return SDValue(N, ResNo);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
+
+ EVT FloatVT = N->getValueType(ResNo);
+ if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
+ // Expand Y = FNEG(X) -> Y = X ^ sign mask
+ APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
+ return DAG.getNode(ISD::XOR, dl, NVT, GetSoftenedFloat(N->getOperand(0)),
+ DAG.getConstant(SignMask, dl, NVT));
+ }
+
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
GetSoftenedFloat(N->getOperand(0)) };
@@ -763,6 +772,10 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_XINT(N); break;
+ case ISD::LROUND: Res = SoftenFloatOp_LROUND(N); break;
+ case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;
+ case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;
+ case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;
case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
@@ -1029,6 +1042,61 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
ST->getMemOperand());
}
+SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+
+ SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128),
+ NVT, Op, false, SDLoc(N)).first;
+}
//===----------------------------------------------------------------------===//
// Float Result Expansion
@@ -1562,6 +1630,10 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break;
case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break;
+ case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break;
+ case ISD::LLROUND: Res = ExpandFloatOp_LLROUND(N); break;
+ case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break;
+ case ISD::LLRINT: Res = ExpandFloatOp_LLRINT(N); break;
case ISD::SELECT_CC: Res = ExpandFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = ExpandFloatOp_SETCC(N); break;
case ISD::STORE: Res = ExpandFloatOp_STORE(cast<StoreSDNode>(N),
@@ -1732,6 +1804,54 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
ST->getMemoryVT(), ST->getMemOperand());
}
+SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64,
+ RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128),
+ RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64,
+ RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128),
+ RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64,
+ RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128),
+ RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
+SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
+ EVT RVT = N->getValueType(0);
+ EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
+ RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64,
+ RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128),
+ RVT, N->getOperand(0), false, SDLoc(N)).first;
+}
+
//===----------------------------------------------------------------------===//
// Float Operand Promotion
//===----------------------------------------------------------------------===//
@@ -1748,6 +1868,8 @@ static ISD::NodeType GetPromotionOpcode(EVT OpVT, EVT RetVT) {
}
bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
+ LLVM_DEBUG(dbgs() << "Promote float operand " << OpNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue R = SDValue();
if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) {
@@ -1762,6 +1884,10 @@ bool DAGTypeLegalizer::PromoteFloatOperand(SDNode *N, unsigned OpNo) {
// a part of PromoteFloatResult.
switch (N->getOpcode()) {
default:
+ #ifndef NDEBUG
+ dbgs() << "PromoteFloatOperand Op #" << OpNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+ #endif
llvm_unreachable("Do not know how to promote this operator's operand!");
case ISD::BITCAST: R = PromoteFloatOp_BITCAST(N, OpNo); break;
@@ -1872,6 +1998,8 @@ SDValue DAGTypeLegalizer::PromoteFloatOp_STORE(SDNode *N, unsigned OpNo) {
//===----------------------------------------------------------------------===//
void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
+ LLVM_DEBUG(dbgs() << "Promote float result " << ResNo << ": "; N->dump(&DAG);
+ dbgs() << "\n");
SDValue R = SDValue();
switch (N->getOpcode()) {
@@ -1880,6 +2008,10 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FP16_TO_FP:
case ISD::FP_TO_FP16:
default:
+#ifndef NDEBUG
+ dbgs() << "PromoteFloatResult #" << ResNo << ": ";
+ N->dump(&DAG); dbgs() << "\n";
+#endif
llvm_unreachable("Do not know how to promote this operator's result!");
case ISD::BITCAST: R = PromoteFloatRes_BITCAST(N); break;
@@ -1932,7 +2064,7 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = PromoteFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = PromoteFloatRes_UNDEF(N); break;
-
+ case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
}
if (R.getNode())
@@ -2166,3 +2298,29 @@ SDValue DAGTypeLegalizer::PromoteFloatRes_UNDEF(SDNode *N) {
N->getValueType(0)));
}
+SDValue DAGTypeLegalizer::BitcastToInt_ATOMIC_SWAP(SDNode *N) {
+ EVT VT = N->getValueType(0);
+ EVT NFPVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+
+ AtomicSDNode *AM = cast<AtomicSDNode>(N);
+ SDLoc SL(N);
+
+ SDValue CastVal = BitConvertToInteger(AM->getVal());
+ EVT CastVT = CastVal.getValueType();
+
+ SDValue NewAtomic
+ = DAG.getAtomic(ISD::ATOMIC_SWAP, SL, CastVT,
+ DAG.getVTList(CastVT, MVT::Other),
+ { AM->getChain(), AM->getBasePtr(), CastVal },
+ AM->getMemOperand());
+
+ SDValue ResultCast = DAG.getNode(GetPromotionOpcode(VT, NFPVT), SL, NFPVT,
+ NewAtomic);
+ // Legalize the chain result by replacing uses of the old value chain with the
+ // new one
+ ReplaceValueWith(SDValue(N, 1), NewAtomic.getValue(1));
+
+ return ResultCast;
+
+}
+
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 5fbc70fce60d..15ac45c37c66 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1,9 +1,8 @@
//===----- LegalizeIntegerTypes.cpp - Legalization of integer types -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -149,7 +148,10 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
- case ISD::SMULFIX: Res = PromoteIntRes_SMULFIX(N); break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break;
+ case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
case ISD::ATOMIC_LOAD:
Res = PromoteIntRes_Atomic0(cast<AtomicSDNode>(N)); break;
@@ -172,6 +174,18 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
Res = PromoteIntRes_AtomicCmpSwap(cast<AtomicSDNode>(N), ResNo);
break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ Res = PromoteIntRes_VECREDUCE(N);
+ break;
}
// If the result is null then the sub-method took care of registering it.
@@ -293,21 +307,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
BitConvertToInteger(GetScalarizedVector(InOp)));
break;
case TargetLowering::TypeSplitVector: {
- // For example, i32 = BITCAST v2i16 on alpha. Convert the split
- // pieces of the input into integers and reassemble in the final type.
- SDValue Lo, Hi;
- GetSplitVector(N->getOperand(0), Lo, Hi);
- Lo = BitConvertToInteger(Lo);
- Hi = BitConvertToInteger(Hi);
-
- if (DAG.getDataLayout().isBigEndian())
- std::swap(Lo, Hi);
-
- InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
- EVT::getIntegerVT(*DAG.getContext(),
- NOutVT.getSizeInBits()),
- JoinIntegers(Lo, Hi));
- return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ if (!NOutVT.isVector()) {
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
+ // pieces of the input into integers and reassemble in the final type.
+ SDValue Lo, Hi;
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ Lo = BitConvertToInteger(Lo);
+ Hi = BitConvertToInteger(Hi);
+
+ if (DAG.getDataLayout().isBigEndian())
+ std::swap(Lo, Hi);
+
+ InOp = DAG.getNode(ISD::ANY_EXTEND, dl,
+ EVT::getIntegerVT(*DAG.getContext(),
+ NOutVT.getSizeInBits()),
+ JoinIntegers(Lo, Hi));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
+ }
+ break;
}
case TargetLowering::TypeWidenVector:
// The input is widened to the same size. Convert to the widened value.
@@ -555,7 +572,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MLOAD(MaskedLoadSDNode *N) {
SDLoc dl(N);
SDValue Res = DAG.getMaskedLoad(NVT, dl, N->getChain(), N->getBasePtr(),
N->getMask(), ExtPassThru, N->getMemoryVT(),
- N->getMemOperand(), ISD::SEXTLOAD);
+ N->getMemOperand(), ISD::EXTLOAD);
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -582,23 +599,27 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
/// Promote the overflow flag of an overflowing arithmetic node.
SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
- // Simply change the return type of the boolean result.
+ // Change the return type of the boolean result while obeying
+ // getSetCCResultType.
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1));
- EVT ValueVTs[] = { N->getValueType(0), NVT };
+ EVT VT = N->getValueType(0);
+ EVT SVT = getSetCCResultType(VT);
SDValue Ops[3] = { N->getOperand(0), N->getOperand(1) };
unsigned NumOps = N->getNumOperands();
assert(NumOps <= 3 && "Too many operands");
if (NumOps == 3)
Ops[2] = N->getOperand(2);
- SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
- DAG.getVTList(ValueVTs), makeArrayRef(Ops, NumOps));
+ SDLoc dl(N);
+ SDValue Res = DAG.getNode(N->getOpcode(), dl, DAG.getVTList(VT, SVT),
+ makeArrayRef(Ops, NumOps));
// Modified the sum result - switch anything that used the old sum to use
// the new one.
ReplaceValueWith(SDValue(N, 0), Res);
- return SDValue(Res.getNode(), 1);
+ // Convert to the expected type.
+ return DAG.getBoolExtOrTrunc(Res.getValue(1), dl, NVT, VT);
}
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
@@ -646,12 +667,39 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
}
-SDValue DAGTypeLegalizer::PromoteIntRes_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
// Can just promote the operands then continue with operation.
SDLoc dl(N);
- SDValue Op1Promoted = SExtPromotedInteger(N->getOperand(0));
- SDValue Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ SDValue Op1Promoted, Op2Promoted;
+ bool Signed =
+ N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
+ if (Signed) {
+ Op1Promoted = SExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = SExtPromotedInteger(N->getOperand(1));
+ } else {
+ Op1Promoted = ZExtPromotedInteger(N->getOperand(0));
+ Op2Promoted = ZExtPromotedInteger(N->getOperand(1));
+ }
+ EVT OldType = N->getOperand(0).getValueType();
EVT PromotedType = Op1Promoted.getValueType();
+ unsigned DiffSize =
+ PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
+
+ bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
+ if (Saturating) {
+ // Promoting the operand and result values changes the saturation width,
+ // which is extends the values that we clamp to on saturation. This could be
+ // resolved by shifting one of the operands the same amount, which would
+ // also shift the result we compare against, then shifting back.
+ EVT ShiftTy = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ Op1Promoted = DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted,
+ DAG.getConstant(DiffSize, dl, ShiftTy));
+ SDValue Result = DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted,
+ Op2Promoted, N->getOperand(2));
+ unsigned ShiftOp = Signed ? ISD::SRA : ISD::SRL;
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result,
+ DAG.getConstant(DiffSize, dl, ShiftTy));
+ }
return DAG.getNode(N->getOpcode(), dl, PromotedType, Op1Promoted, Op2Promoted,
N->getOperand(2));
}
@@ -875,7 +923,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
// Calculate the overflow flag: zero extend the arithmetic result from
// the original type.
- SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT);
+ SDValue Ofl = DAG.getZeroExtendInReg(Res, dl, OVT.getScalarType());
// Overflowed if and only if this is not equal to Res.
Ofl = DAG.getSetCC(dl, N->getValueType(1), Ofl, Res, ISD::SETNE);
@@ -917,6 +965,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBCARRY(SDNode *N, unsigned ResNo) {
return SDValue(Res.getNode(), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_ABS(SDNode *N) {
+ SDValue Op0 = SExtPromotedInteger(N->getOperand(0));
+ return DAG.getNode(ISD::ABS, SDLoc(N), Op0.getValueType(), Op0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
// Promote the overflow bit trivially.
if (ResNo == 1)
@@ -946,9 +999,11 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
SDValue Overflow;
if (N->getOpcode() == ISD::UMULO) {
// Unsigned overflow occurred if the high part is non-zero.
+ unsigned Shift = SmallVT.getScalarSizeInBits();
+ EVT ShiftTy = getShiftAmountTyForConstant(Shift, Mul.getValueType(),
+ TLI, DAG);
SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
- DAG.getIntPtrConstant(SmallVT.getSizeInBits(),
- DL));
+ DAG.getConstant(Shift, DL, ShiftTy));
Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
DAG.getConstant(0, DL, Hi.getValueType()),
ISD::SETNE);
@@ -1091,7 +1146,21 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::PREFETCH: Res = PromoteIntOp_PREFETCH(N, OpNo); break;
- case ISD::SMULFIX: Res = PromoteIntOp_SMULFIX(N); break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;
+
+ case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN: Res = PromoteIntOp_VECREDUCE(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -1434,24 +1503,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo) {
SDValue Carry = N->getOperand(2);
SDLoc DL(N);
- auto VT = getSetCCResultType(LHS.getValueType());
- TargetLoweringBase::BooleanContent BoolType = TLI.getBooleanContents(VT);
- switch (BoolType) {
- case TargetLoweringBase::UndefinedBooleanContent:
- Carry = DAG.getAnyExtOrTrunc(Carry, DL, VT);
- break;
- case TargetLoweringBase::ZeroOrOneBooleanContent:
- Carry = DAG.getZExtOrTrunc(Carry, DL, VT);
- break;
- case TargetLoweringBase::ZeroOrNegativeOneBooleanContent:
- Carry = DAG.getSExtOrTrunc(Carry, DL, VT);
- break;
- }
+ Carry = PromoteTargetBoolean(Carry, LHS.getValueType());
return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, Carry), 0);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_MULFIX(SDNode *N) {
SDValue Op2 = ZExtPromotedInteger(N->getOperand(2));
return SDValue(
DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Op2), 0);
@@ -1475,6 +1532,44 @@ SDValue DAGTypeLegalizer::PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo) {
0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_FPOWI(SDNode *N) {
+ SDValue Op = SExtPromotedInteger(N->getOperand(1));
+ return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op), 0);
+}
+
+SDValue DAGTypeLegalizer::PromoteIntOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op;
+ switch (N->getOpcode()) {
+ default: llvm_unreachable("Expected integer vector reduction");
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ Op = GetPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ Op = SExtPromotedInteger(N->getOperand(0));
+ break;
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ Op = ZExtPromotedInteger(N->getOperand(0));
+ break;
+ }
+
+ EVT EltVT = Op.getValueType().getVectorElementType();
+ EVT VT = N->getValueType(0);
+ if (VT.bitsGE(EltVT))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, Op);
+
+ // Result size must be >= element size. If this is not the case after
+ // promotion, also promote the result type and then truncate.
+ SDValue Reduce = DAG.getNode(N->getOpcode(), dl, EltVT, Op);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, Reduce);
+}
+
//===----------------------------------------------------------------------===//
// Integer Result Expansion
//===----------------------------------------------------------------------===//
@@ -1499,7 +1594,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
dbgs() << "ExpandIntegerResult #" << ResNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to expand the result of this operator!");
+ report_fatal_error("Do not know how to expand the result of this "
+ "operator!");
case ISD::MERGE_VALUES: SplitRes_MERGE_VALUES(N, ResNo, Lo, Hi); break;
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
@@ -1518,6 +1614,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::BITREVERSE: ExpandIntRes_BITREVERSE(N, Lo, Hi); break;
case ISD::BSWAP: ExpandIntRes_BSWAP(N, Lo, Hi); break;
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
+ case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
@@ -1526,6 +1623,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::FLT_ROUNDS_: ExpandIntRes_FLT_ROUNDS(N, Lo, Hi); break;
case ISD::FP_TO_SINT: ExpandIntRes_FP_TO_SINT(N, Lo, Hi); break;
case ISD::FP_TO_UINT: ExpandIntRes_FP_TO_UINT(N, Lo, Hi); break;
+ case ISD::LLROUND: ExpandIntRes_LLROUND(N, Lo, Hi); break;
+ case ISD::LLRINT: ExpandIntRes_LLRINT(N, Lo, Hi); break;
case ISD::LOAD: ExpandIntRes_LOAD(cast<LoadSDNode>(N), Lo, Hi); break;
case ISD::MUL: ExpandIntRes_MUL(N, Lo, Hi); break;
case ISD::READCYCLECOUNTER: ExpandIntRes_READCYCLECOUNTER(N, Lo, Hi); break;
@@ -1613,7 +1712,20 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: ExpandIntRes_ADDSUBSAT(N, Lo, Hi); break;
- case ISD::SMULFIX: ExpandIntRes_SMULFIX(N, Lo, Hi); break;
+
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN: ExpandIntRes_VECREDUCE(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -2267,6 +2379,25 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
IsOpaque);
}
+void DAGTypeLegalizer::ExpandIntRes_ABS(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+
+ // abs(HiLo) -> (Hi < 0 ? -HiLo : HiLo)
+ EVT VT = N->getValueType(0);
+ SDValue N0 = N->getOperand(0);
+ SDValue Neg = DAG.getNode(ISD::SUB, dl, VT,
+ DAG.getConstant(0, dl, VT), N0);
+ SDValue NegLo, NegHi;
+ SplitInteger(Neg, NegLo, NegHi);
+
+ GetExpandedInteger(N0, Lo, Hi);
+ EVT NVT = Lo.getValueType();
+ SDValue HiIsNeg = DAG.getSetCC(dl, getSetCCResultType(NVT),
+ DAG.getConstant(0, dl, NVT), Hi, ISD::SETGT);
+ Lo = DAG.getSelect(dl, NVT, HiIsNeg, NegLo, Lo);
+ Hi = DAG.getSelect(dl, NVT, HiIsNeg, NegHi, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
SDValue &Lo, SDValue &Hi) {
SDLoc dl(N);
@@ -2361,6 +2492,58 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
Lo, Hi);
}
+void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ if (VT == MVT::f32)
+ LC = RTLIB::LLROUND_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLROUND_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLROUND_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLROUND_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLROUND_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llround input type!");
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ SDLoc dl(N);
+ EVT RetVT = N->getValueType(0);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ Lo, Hi);
+}
+
+void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ EVT VT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ if (VT == MVT::f32)
+ LC = RTLIB::LLRINT_F32;
+ else if (VT == MVT::f64)
+ LC = RTLIB::LLRINT_F64;
+ else if (VT == MVT::f80)
+ LC = RTLIB::LLRINT_F80;
+ else if (VT == MVT::f128)
+ LC = RTLIB::LLRINT_F128;
+ else if (VT == MVT::ppcf128)
+ LC = RTLIB::LLRINT_PPCF128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected llrint input type!");
+
+ SDValue Op = N->getOperand(0);
+ if (getTypeAction(Op.getValueType()) == TargetLowering::TypePromoteFloat)
+ Op = GetPromotedFloat(Op);
+
+ SDLoc dl(N);
+ EVT RetVT = N->getValueType(0);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ Lo, Hi);
+}
+
void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue &Lo, SDValue &Hi) {
if (ISD::isNormalLoad(N)) {
@@ -2581,15 +2764,39 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBSAT(SDNode *N, SDValue &Lo,
SplitInteger(Result, Lo, Hi);
}
-void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+/// This performs an expansion of the integer result for a fixed point
+/// multiplication. The default expansion performs rounding down towards
+/// negative infinity, though targets that do care about rounding should specify
+/// a target hook for rounding and provide their own expansion or lowering of
+/// fixed point multiplication to be consistent with rounding.
+void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
SDLoc dl(N);
EVT VT = N->getValueType(0);
+ unsigned VTSize = VT.getScalarSizeInBits();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
uint64_t Scale = N->getConstantOperandVal(2);
+ bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
+ EVT BoolVT = getSetCCResultType(VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
if (!Scale) {
- SDValue Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ SDValue Result;
+ if (!Saturating) {
+ Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else {
+ Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ }
SplitInteger(Result, Lo, Hi);
return;
}
@@ -2600,15 +2807,19 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
GetExpandedInteger(RHS, RL, RH);
SmallVector<SDValue, 4> Result;
- if (!TLI.expandMUL_LOHI(ISD::SMUL_LOHI, VT, dl, LHS, RHS, Result, NVT, DAG,
+ bool Signed = (N->getOpcode() == ISD::SMULFIX ||
+ N->getOpcode() == ISD::SMULFIXSAT);
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
LL, LH, RL, RH)) {
- report_fatal_error("Unable to expand SMUL_FIX using SMUL_LOHI.");
+ report_fatal_error("Unable to expand MUL_FIX using MUL_LOHI.");
return;
}
- unsigned VTSize = VT.getScalarSizeInBits();
unsigned NVTSize = NVT.getScalarSizeInBits();
+ assert((VTSize == NVTSize * 2) && "Expected the new value type to be half "
+ "the size of the current value type");
EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
// Shift whole amount by scale.
@@ -2617,6 +2828,11 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
SDValue ResultHL = Result[2];
SDValue ResultHH = Result[3];
+ SDValue SatMax, SatMin;
+ SDValue NVTZero = DAG.getConstant(0, dl, NVT);
+ SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
+ EVT BoolNVT = getSetCCResultType(NVT);
+
// After getting the multplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
// Let's say we multiply 2 64 bit numbers. The resulting value can be held in
@@ -2645,11 +2861,60 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
+
+ // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
+ // highest bit of HH determines saturation direction in the event of
+ // saturation.
+ // The number of overflow bits we can check are VTSize - Scale + 1 (we
+ // include the sign bit). If these top bits are > 0, then we overflowed past
+ // the max value. If these top bits are < -1, then we overflowed past the
+ // min value. Otherwise, we did not overflow.
+ if (Saturating) {
+ unsigned OverflowBits = VTSize - Scale + 1;
+ assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
+ "Extent of overflow bits must start within HL");
+ SDValue HLHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
+ SDValue HLLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
+
+ // HH > 0 or HH == 0 && HL > HLLoMask
+ SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLPos =
+ DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos));
+
+ // HH < -1 or HH == -1 && HL < HLHiMask
+ SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLNeg =
+ DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg));
+ }
} else if (Scale == NVTSize) {
// If the scales are equal, Lo and Hi are ResultLH and Result HL,
// respectively. Avoid shifting to prevent undefined behavior.
Lo = ResultLH;
Hi = ResultHL;
+
+ // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1.
+ // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0.
+ if (Saturating) {
+ SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg));
+
+ SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos));
+ }
} else if (Scale < VTSize) {
// If the scale is instead less than the old VT size, but greater than or
// equal to the expanded VT size, the first part of the result (ResultLL) is
@@ -2664,9 +2929,39 @@ void DAGTypeLegalizer::ExpandIntRes_SMULFIX(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
+
+ // This is similar to the case when we saturate if Scale < NVTSize, but we
+ // only need to chech HH.
+ if (Saturating) {
+ unsigned OverflowBits = VTSize - Scale + 1;
+ SDValue HHHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
+ SDValue HHLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
+
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
+ SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
+ }
+ } else if (Scale == VTSize) {
+ assert(
+ !Signed &&
+ "Only unsigned types can have a scale equal to the operand bit width");
+
+ Lo = ResultHL;
+ Hi = ResultHH;
} else {
- llvm_unreachable(
- "Expected the scale to be less than the width of the operands");
+ llvm_unreachable("Expected the scale to be less than or equal to the width "
+ "of the operands");
+ }
+
+ if (Saturating) {
+ APInt LHMax = APInt::getSignedMaxValue(NVTSize);
+ APInt LLMax = APInt::getAllOnesValue(NVTSize);
+ APInt LHMin = APInt::getSignedMinValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi);
+ Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo);
+ Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
}
}
@@ -2765,11 +3060,15 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
}
// Next check to see if the target supports this SHL_PARTS operation or if it
- // will custom expand it.
+ // will custom expand it. Don't lower this to SHL_PARTS when we optimise for
+ // size, but create a libcall instead.
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
TargetLowering::LegalizeAction Action = TLI.getOperationAction(PartsOpc, NVT);
- if ((Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
- Action == TargetLowering::Custom) {
+ const bool LegalOrCustom =
+ (Action == TargetLowering::Legal && TLI.isTypeLegal(NVT)) ||
+ Action == TargetLowering::Custom;
+
+ if (LegalOrCustom && TLI.shouldExpandShift(DAG, N)) {
// Expand the subcomponents.
SDValue LHSL, LHSH;
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
@@ -3145,6 +3444,14 @@ void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Swap.getValue(2));
}
+void DAGTypeLegalizer::ExpandIntRes_VECREDUCE(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ // TODO For VECREDUCE_(AND|OR|XOR) we could split the vector and calculate
+ // both halves independently.
+ SDValue Res = TLI.expandVecReduce(N, DAG);
+ SplitInteger(Res, Lo, Hi);
+}
+
//===----------------------------------------------------------------------===//
// Integer Operand Expansion
//===----------------------------------------------------------------------===//
@@ -3167,7 +3474,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
dbgs() << "ExpandIntegerOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
#endif
- llvm_unreachable("Do not know how to expand this operator's operand!");
+ report_fatal_error("Do not know how to expand this operator's operand!");
case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
@@ -3632,8 +3939,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
- SDValue InOp0 = N->getOperand(0);
- EVT InVT = InOp0.getValueType();
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
@@ -3644,6 +3949,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDLoc dl(N);
SDValue BaseIdx = N->getOperand(1);
+ SDValue InOp0 = N->getOperand(0);
+ if (getTypeAction(InOp0.getValueType()) == TargetLowering::TypePromoteInteger)
+ InOp0 = GetPromotedInteger(N->getOperand(0));
+
+ EVT InVT = InOp0.getValueType();
+
SmallVector<SDValue, 8> Ops;
Ops.reserve(OutNumElems);
for (unsigned i = 0; i != OutNumElems; ++i) {
@@ -3654,7 +3965,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) {
SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
InVT.getVectorElementType(), N->getOperand(0), Index);
- SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, Ext);
+ SDValue Op = DAG.getAnyExtOrTrunc(Ext, dl, NOutVTElem);
// Insert the converted element to the new vector.
Ops.push_back(Op);
}
@@ -3809,6 +4120,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) {
V0, ConvElem, N->getOperand(2));
}
+SDValue DAGTypeLegalizer::PromoteIntRes_VECREDUCE(SDNode *N) {
+ // The VECREDUCE result size may be larger than the element size, so
+ // we can simply change the result type.
+ SDLoc dl(N);
+ EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ return DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0));
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) {
SDLoc dl(N);
SDValue V0 = GetPromotedInteger(N->getOperand(0));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index a9f144c06e9a..14fd5be23ccb 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1,9 +1,8 @@
//===-- LegalizeTypes.cpp - Common code for DAG type legalizer ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -708,6 +707,7 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
auto &OpIdEntry = PromotedIntegers[getTableId(Op)];
assert((OpIdEntry == 0) && "Node is already promoted!");
OpIdEntry = getTableId(Result);
+ Result->setFlags(Op->getFlags());
DAG.transferDbgValues(Op, Result);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 032000f6cb79..1d489b1b3a33 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -1,9 +1,8 @@
//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -345,8 +344,10 @@ private:
SDValue PromoteIntRes_VAARG(SDNode *N);
SDValue PromoteIntRes_XMULO(SDNode *N, unsigned ResNo);
SDValue PromoteIntRes_ADDSUBSAT(SDNode *N);
- SDValue PromoteIntRes_SMULFIX(SDNode *N);
+ SDValue PromoteIntRes_MULFIX(SDNode *N);
SDValue PromoteIntRes_FLT_ROUNDS(SDNode *N);
+ SDValue PromoteIntRes_VECREDUCE(SDNode *N);
+ SDValue PromoteIntRes_ABS(SDNode *N);
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OpNo);
@@ -379,7 +380,9 @@ private:
SDValue PromoteIntOp_ADDSUBCARRY(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_FRAMERETURNADDR(SDNode *N);
SDValue PromoteIntOp_PREFETCH(SDNode *N, unsigned OpNo);
- SDValue PromoteIntOp_SMULFIX(SDNode *N);
+ SDValue PromoteIntOp_MULFIX(SDNode *N);
+ SDValue PromoteIntOp_FPOWI(SDNode *N);
+ SDValue PromoteIntOp_VECREDUCE(SDNode *N);
void PromoteSetCCOperands(SDValue &LHS,SDValue &RHS, ISD::CondCode Code);
@@ -402,6 +405,7 @@ private:
void ExpandIntRes_AssertSext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -414,6 +418,8 @@ private:
void ExpandIntRes_FLT_ROUNDS (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_SINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FP_TO_UINT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LLROUND (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_LLRINT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_Logical (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUB (SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -435,9 +441,10 @@ private:
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_XMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ADDSUBSAT (SDNode *N, SDValue &Lo, SDValue &Hi);
- void ExpandIntRes_SMULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_MULFIX (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_ATOMIC_LOAD (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_VECREDUCE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, const APInt &Amt,
SDValue &Lo, SDValue &Hi);
@@ -548,6 +555,10 @@ private:
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
+ SDValue SoftenFloatOp_LROUND(SDNode *N);
+ SDValue SoftenFloatOp_LLROUND(SDNode *N);
+ SDValue SoftenFloatOp_LRINT(SDNode *N);
+ SDValue SoftenFloatOp_LLRINT(SDNode *N);
SDValue SoftenFloatOp_SELECT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
@@ -607,6 +618,10 @@ private:
SDValue ExpandFloatOp_FP_ROUND(SDNode *N);
SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N);
SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N);
+ SDValue ExpandFloatOp_LROUND(SDNode *N);
+ SDValue ExpandFloatOp_LLROUND(SDNode *N);
+ SDValue ExpandFloatOp_LRINT(SDNode *N);
+ SDValue ExpandFloatOp_LLRINT(SDNode *N);
SDValue ExpandFloatOp_SELECT_CC(SDNode *N);
SDValue ExpandFloatOp_SETCC(SDNode *N);
SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -640,6 +655,7 @@ private:
SDValue PromoteFloatRes_SELECT_CC(SDNode *N);
SDValue PromoteFloatRes_UnaryOp(SDNode *N);
SDValue PromoteFloatRes_UNDEF(SDNode *N);
+ SDValue BitcastToInt_ATOMIC_SWAP(SDNode *N);
SDValue PromoteFloatRes_XINT_TO_FP(SDNode *N);
bool PromoteFloatOperand(SDNode *N, unsigned OpNo);
@@ -673,6 +689,7 @@ private:
SDValue ScalarizeVecRes_TernaryOp(SDNode *N);
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_StrictFPOp(SDNode *N);
+ SDValue ScalarizeVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
SDValue ScalarizeVecRes_VecInregOp(SDNode *N);
@@ -680,6 +697,7 @@ private:
SDValue ScalarizeVecRes_BUILD_VECTOR(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FP_ROUND(SDNode *N);
+ SDValue ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
@@ -691,7 +709,7 @@ private:
SDValue ScalarizeVecRes_UNDEF(SDNode *N);
SDValue ScalarizeVecRes_VECTOR_SHUFFLE(SDNode *N);
- SDValue ScalarizeVecRes_SMULFIX(SDNode *N);
+ SDValue ScalarizeVecRes_MULFIX(SDNode *N);
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
@@ -703,6 +721,8 @@ private:
SDValue ScalarizeVecOp_VSETCC(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N, unsigned OpNo);
+ SDValue ScalarizeVecOp_VECREDUCE(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Splitting Support: LegalizeVectorTypes.cpp
@@ -727,8 +747,10 @@ private:
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_ExtVecInRegOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi);
- void SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -745,6 +767,7 @@ private:
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
SDValue &Hi);
+ void SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi);
// Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>.
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
@@ -808,7 +831,9 @@ private:
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
+ SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_Convert(SDNode *N);
+ SDValue WidenVecRes_Convert_StrictFP(SDNode *N);
SDValue WidenVecRes_FCOPYSIGN(SDNode *N);
SDValue WidenVecRes_POWI(SDNode *N);
SDValue WidenVecRes_Shift(SDNode *N);
@@ -827,9 +852,16 @@ private:
SDValue WidenVecOp_MGATHER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_MSCATTER(SDNode* N, unsigned OpNo);
SDValue WidenVecOp_SETCC(SDNode* N);
+ SDValue WidenVecOp_VSELECT(SDNode *N);
SDValue WidenVecOp_Convert(SDNode *N);
SDValue WidenVecOp_FCOPYSIGN(SDNode *N);
+ SDValue WidenVecOp_VECREDUCE(SDNode *N);
+
+ /// Helper function to generate a set of operations to perform
+ /// a vector operation for a wider type.
+ ///
+ SDValue UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE);
//===--------------------------------------------------------------------===//
// Vector Widening Utilities Support: LegalizeVectorTypes.cpp
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index b9d370441c3e..943f63f46c47 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -1,9 +1,8 @@
//===-------- LegalizeTypesGeneric.cpp - Generic type legalization --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 4923a529c21b..10b8b705869e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -1,9 +1,8 @@
//===- LegalizeVectorOps.cpp - Implement SelectionDAG::LegalizeVectors ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -141,7 +140,11 @@ class VectorLegalizer {
SDValue ExpandFunnelShift(SDValue Op);
SDValue ExpandROT(SDValue Op);
SDValue ExpandFMINNUM_FMAXNUM(SDValue Op);
+ SDValue ExpandUADDSUBO(SDValue Op);
+ SDValue ExpandSADDSUBO(SDValue Op);
+ SDValue ExpandMULO(SDValue Op);
SDValue ExpandAddSubSat(SDValue Op);
+ SDValue ExpandFixedPointMul(SDValue Op);
SDValue ExpandStrictFPOp(SDValue Op);
/// Implements vector promotion.
@@ -263,7 +266,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
LLVM_FALLTHROUGH;
case TargetLowering::Expand:
Changed = true;
- return LegalizeOp(ExpandLoad(Op));
+ return ExpandLoad(Op);
}
}
} else if (Op.getOpcode() == ISD::STORE) {
@@ -288,17 +291,18 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
}
case TargetLowering::Expand:
Changed = true;
- return LegalizeOp(ExpandStore(Op));
+ return ExpandStore(Op);
}
}
}
- bool HasVectorValue = false;
- for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end();
- J != E;
- ++J)
- HasVectorValue |= J->isVector();
- if (!HasVectorValue)
+ bool HasVectorValueOrOp = false;
+ for (auto J = Node->value_begin(), E = Node->value_end(); J != E; ++J)
+ HasVectorValueOrOp |= J->isVector();
+ for (const SDValue &Op : Node->op_values())
+ HasVectorValueOrOp |= Op.getValueType().isVector();
+
+ if (!HasVectorValueOrOp)
return TranslateLegalizeResults(Op, Result);
TargetLowering::LegalizeAction Action = TargetLowering::Legal;
@@ -329,6 +333,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_EXTEND:
// These pseudo-ops get legalized as if they were their non-strict
// equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
// is also legal, but if ISD::FSQRT requires expansion then so does
@@ -418,6 +424,12 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::UMAX:
case ISD::SMUL_LOHI:
case ISD::UMUL_LOHI:
+ case ISD::SADDO:
+ case ISD::UADDO:
+ case ISD::SSUBO:
+ case ISD::USUBO:
+ case ISD::SMULO:
+ case ISD::UMULO:
case ISD::FCANONICALIZE:
case ISD::SADDSAT:
case ISD::UADDSAT:
@@ -425,7 +437,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::USUBSAT:
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
break;
- case ISD::SMULFIX: {
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -437,6 +451,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
@@ -650,23 +677,21 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
LoadChains.push_back(ScalarLoad.getValue(1));
}
- // Extract bits, pack and extend/trunc them into destination type.
- unsigned SrcEltBits = SrcEltVT.getSizeInBits();
- SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, dl, WideVT);
-
unsigned BitOffset = 0;
unsigned WideIdx = 0;
unsigned WideBits = WideVT.getSizeInBits();
+ // Extract bits, pack and extend/trunc them into destination type.
+ unsigned SrcEltBits = SrcEltVT.getSizeInBits();
+ SDValue SrcEltBitMask = DAG.getConstant(
+ APInt::getLowBitsSet(WideBits, SrcEltBits), dl, WideVT);
+
for (unsigned Idx = 0; Idx != NumElem; ++Idx) {
- SDValue Lo, Hi, ShAmt;
+ assert(BitOffset < WideBits && "Unexpected offset!");
- if (BitOffset < WideBits) {
- ShAmt = DAG.getConstant(
- BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
- Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
- }
+ SDValue ShAmt = DAG.getConstant(
+ BitOffset, dl, TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ SDValue Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt);
BitOffset += SrcEltBits;
if (BitOffset >= WideBits) {
@@ -676,13 +701,13 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) {
ShAmt = DAG.getConstant(
SrcEltBits - BitOffset, dl,
TLI.getShiftAmountTy(WideVT, DAG.getDataLayout()));
- Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
- Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask);
+ SDValue Hi =
+ DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt);
+ Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
}
}
- if (Hi.getNode())
- Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi);
+ Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask);
switch (ExtType) {
default: llvm_unreachable("Unknown extended-load op!");
@@ -778,11 +803,23 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::FMINNUM:
case ISD::FMAXNUM:
return ExpandFMINNUM_FMAXNUM(Op);
+ case ISD::UADDO:
+ case ISD::USUBO:
+ return ExpandUADDSUBO(Op);
+ case ISD::SADDO:
+ case ISD::SSUBO:
+ return ExpandSADDSUBO(Op);
+ case ISD::UMULO:
+ case ISD::SMULO:
+ return ExpandMULO(Op);
case ISD::USUBSAT:
case ISD::SSUBSAT:
case ISD::UADDSAT:
case ISD::SADDSAT:
return ExpandAddSubSat(Op);
+ case ISD::SMULFIX:
+ case ISD::UMULFIX:
+ return ExpandFixedPointMul(Op);
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -808,6 +845,20 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
return ExpandStrictFPOp(Op);
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ return TLI.expandVecReduce(Op.getNode(), DAG);
default:
return DAG.UnrollVectorOp(Op.getNode());
}
@@ -898,6 +949,19 @@ SDValue VectorLegalizer::ExpandANY_EXTEND_VECTOR_INREG(SDValue Op) {
EVT SrcVT = Src.getValueType();
int NumSrcElements = SrcVT.getVectorNumElements();
+ // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+ // into a larger vector type.
+ if (SrcVT.bitsLE(VT)) {
+ assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ "ANY_EXTEND_VECTOR_INREG vector size mismatch");
+ NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NumSrcElements);
+ Src = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
// Build a base mask of undef shuffles.
SmallVector<int, 16> ShuffleMask;
ShuffleMask.resize(NumSrcElements, -1);
@@ -945,6 +1009,19 @@ SDValue VectorLegalizer::ExpandZERO_EXTEND_VECTOR_INREG(SDValue Op) {
EVT SrcVT = Src.getValueType();
int NumSrcElements = SrcVT.getVectorNumElements();
+ // *_EXTEND_VECTOR_INREG SrcVT can be smaller than VT - so insert the vector
+ // into a larger vector type.
+ if (SrcVT.bitsLE(VT)) {
+ assert((VT.getSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 &&
+ "ZERO_EXTEND_VECTOR_INREG vector size mismatch");
+ NumSrcElements = VT.getSizeInBits() / SrcVT.getScalarSizeInBits();
+ SrcVT = EVT::getVectorVT(*DAG.getContext(), SrcVT.getScalarType(),
+ NumSrcElements);
+ Src = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, SrcVT, DAG.getUNDEF(SrcVT), Src,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+
// Build up a zero vector to blend into this one.
SDValue Zero = DAG.getConstant(0, DL, SrcVT);
@@ -1212,12 +1289,58 @@ SDValue VectorLegalizer::ExpandFMINNUM_FMAXNUM(SDValue Op) {
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandUADDSUBO(SDValue Op) {
+ SDValue Result, Overflow;
+ TLI.expandUADDSUBO(Op.getNode(), Result, Overflow, DAG);
+
+ if (Op.getResNo() == 0) {
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+ return Result;
+ } else {
+ AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+ return Overflow;
+ }
+}
+
+SDValue VectorLegalizer::ExpandSADDSUBO(SDValue Op) {
+ SDValue Result, Overflow;
+ TLI.expandSADDSUBO(Op.getNode(), Result, Overflow, DAG);
+
+ if (Op.getResNo() == 0) {
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+ return Result;
+ } else {
+ AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+ return Overflow;
+ }
+}
+
+SDValue VectorLegalizer::ExpandMULO(SDValue Op) {
+ SDValue Result, Overflow;
+ if (!TLI.expandMULO(Op.getNode(), Result, Overflow, DAG))
+ std::tie(Result, Overflow) = DAG.UnrollVectorOverflowOp(Op.getNode());
+
+ if (Op.getResNo() == 0) {
+ AddLegalizedOperand(Op.getValue(1), LegalizeOp(Overflow));
+ return Result;
+ } else {
+ AddLegalizedOperand(Op.getValue(0), LegalizeOp(Result));
+ return Overflow;
+ }
+}
+
SDValue VectorLegalizer::ExpandAddSubSat(SDValue Op) {
if (SDValue Expanded = TLI.expandAddSubSat(Op.getNode(), DAG))
return Expanded;
return DAG.UnrollVectorOp(Op.getNode());
}
+SDValue VectorLegalizer::ExpandFixedPointMul(SDValue Op) {
+ if (SDValue Expanded = TLI.expandFixedPointMul(Op.getNode(), DAG))
+ return Expanded;
+ return DAG.UnrollVectorOp(Op.getNode());
+}
+
SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
EVT VT = Op.getValueType();
EVT EltVT = VT.getVectorElementType();
@@ -1245,7 +1368,7 @@ SDValue VectorLegalizer::ExpandStrictFPOp(SDValue Op) {
if (OperVT.isVector())
Oper = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl,
- EltVT, Oper, Idx);
+ OperVT.getVectorElementType(), Oper, Idx);
Opers.push_back(Oper);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index f367e9358576..7e4d52617977 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1,9 +1,8 @@
//===------- LegalizeVectorTypes.cpp - Legalization of vector types -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -51,6 +50,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = ScalarizeVecRes_BUILD_VECTOR(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
+ case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
@@ -69,6 +69,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::ZERO_EXTEND_VECTOR_INREG:
R = ScalarizeVecRes_VecInregOp(N);
break;
+ case ISD::ABS:
case ISD::ANY_EXTEND:
case ISD::BITREVERSE:
case ISD::BSWAP:
@@ -170,10 +171,21 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_EXTEND:
R = ScalarizeVecRes_StrictFPOp(N);
break;
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ R = ScalarizeVecRes_OverflowOp(N, ResNo);
+ break;
case ISD::SMULFIX:
- R = ScalarizeVecRes_SMULFIX(N);
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ R = ScalarizeVecRes_MULFIX(N);
break;
}
@@ -197,7 +209,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) {
Op0.getValueType(), Op0, Op1, Op2);
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_SMULFIX(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_MULFIX(SDNode *N) {
SDValue Op0 = GetScalarizedVector(N->getOperand(0));
SDValue Op1 = GetScalarizedVector(N->getOperand(1));
SDValue Op2 = N->getOperand(2);
@@ -235,6 +247,43 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_StrictFPOp(SDNode *N) {
return Result;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_OverflowOp(SDNode *N,
+ unsigned ResNo) {
+ SDLoc DL(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+
+ SDValue ScalarLHS, ScalarRHS;
+ if (getTypeAction(ResVT) == TargetLowering::TypeScalarizeVector) {
+ ScalarLHS = GetScalarizedVector(N->getOperand(0));
+ ScalarRHS = GetScalarizedVector(N->getOperand(1));
+ } else {
+ SmallVector<SDValue, 1> ElemsLHS, ElemsRHS;
+ DAG.ExtractVectorElements(N->getOperand(0), ElemsLHS);
+ DAG.ExtractVectorElements(N->getOperand(1), ElemsRHS);
+ ScalarLHS = ElemsLHS[0];
+ ScalarRHS = ElemsRHS[0];
+ }
+
+ SDVTList ScalarVTs = DAG.getVTList(
+ ResVT.getVectorElementType(), OvVT.getVectorElementType());
+ SDNode *ScalarNode = DAG.getNode(
+ N->getOpcode(), DL, ScalarVTs, ScalarLHS, ScalarRHS).getNode();
+
+ // Replace the other vector result not being explicitly scalarized here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeScalarizeVector) {
+ SetScalarizedVector(SDValue(N, OtherNo), SDValue(ScalarNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(
+ ISD::SCALAR_TO_VECTOR, DL, OtherVT, SDValue(ScalarNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(ScalarNode, ResNo);
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N,
unsigned ResNo) {
SDValue Op = DisintegrateMERGE_VALUES(N, ResNo);
@@ -275,6 +324,18 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) {
NewVT, Op, N->getOperand(1));
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_STRICT_FP_ROUND(SDNode *N) {
+ EVT NewVT = N->getValueType(0).getVectorElementType();
+ SDValue Op = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ { NewVT, MVT::Other },
+ { N->getOperand(0), Op, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return Res;
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) {
SDValue Op = GetScalarizedVector(N->getOperand(0));
return DAG.getNode(ISD::FPOWI, SDLoc(N),
@@ -558,9 +619,27 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::STORE:
Res = ScalarizeVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
+ case ISD::STRICT_FP_ROUND:
+ Res = ScalarizeVecOp_STRICT_FP_ROUND(N, OpNo);
+ break;
case ISD::FP_ROUND:
Res = ScalarizeVecOp_FP_ROUND(N, OpNo);
break;
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = ScalarizeVecOp_VECREDUCE(N);
+ break;
}
}
@@ -691,6 +770,28 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_FP_ROUND(SDNode *N, unsigned OpNo) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
}
+SDValue DAGTypeLegalizer::ScalarizeVecOp_STRICT_FP_ROUND(SDNode *N,
+ unsigned OpNo) {
+ assert(OpNo == 1 && "Wrong operand for scalarization!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(ISD::STRICT_FP_ROUND, SDLoc(N),
+ { N->getValueType(0).getVectorElementType(),
+ MVT::Other },
+ { N->getOperand(0), Elt, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
+SDValue DAGTypeLegalizer::ScalarizeVecOp_VECREDUCE(SDNode *N) {
+ SDValue Res = GetScalarizedVector(N->getOperand(0));
+ // Result type may be wider than element type.
+ if (Res.getValueType() != N->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res);
+ return Res;
+}
+
//===----------------------------------------------------------------------===//
// Result Vector Splitting
//===----------------------------------------------------------------------===//
@@ -748,6 +849,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::VECTOR_SHUFFLE:
SplitVecRes_VECTOR_SHUFFLE(cast<ShuffleVectorSDNode>(N), Lo, Hi);
break;
+ case ISD::VAARG:
+ SplitVecRes_VAARG(N, Lo, Hi);
+ break;
case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
@@ -755,6 +859,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
SplitVecRes_ExtVecInRegOp(N, Lo, Hi);
break;
+ case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
@@ -774,7 +879,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FNEARBYINT:
case ISD::FNEG:
case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_ROUND:
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::FRINT:
@@ -859,8 +966,18 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FTRUNC:
SplitVecRes_StrictFPOp(N, Lo, Hi);
break;
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ SplitVecRes_OverflowOp(N, ResNo, Lo, Hi);
+ break;
case ISD::SMULFIX:
- SplitVecRes_SMULFIX(N, Lo, Hi);
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ SplitVecRes_MULFIX(N, Lo, Hi);
break;
}
@@ -899,8 +1016,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo,
Op0Hi, Op1Hi, Op2Hi);
}
-void DAGTypeLegalizer::SplitVecRes_SMULFIX(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_MULFIX(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LHSLo, LHSHi;
GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
SDValue RHSLo, RHSHi;
@@ -1205,6 +1321,104 @@ void DAGTypeLegalizer::SplitVecRes_StrictFPOp(SDNode *N, SDValue &Lo,
ReplaceValueWith(SDValue(N, 1), Chain);
}
+SDValue DAGTypeLegalizer::UnrollVectorOp_StrictFP(SDNode *N, unsigned ResNE) {
+ SDValue Chain = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ unsigned NE = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SDLoc dl(N);
+
+ SmallVector<SDValue, 8> Scalars;
+ SmallVector<SDValue, 4> Operands(N->getNumOperands());
+
+ // If ResNE is 0, fully unroll the vector op.
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ //The results of each unrolled operation, including the chain.
+ EVT ChainVTs[] = {EltVT, MVT::Other};
+ SmallVector<SDValue, 8> Chains;
+
+ unsigned i;
+ for (i = 0; i != NE; ++i) {
+ Operands[0] = Chain;
+ for (unsigned j = 1, e = N->getNumOperands(); j != e; ++j) {
+ SDValue Operand = N->getOperand(j);
+ EVT OperandVT = Operand.getValueType();
+ if (OperandVT.isVector()) {
+ EVT OperandEltVT = OperandVT.getVectorElementType();
+ Operands[j] =
+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(
+ DAG.getDataLayout())));
+ } else {
+ Operands[j] = Operand;
+ }
+ }
+ SDValue Scalar = DAG.getNode(N->getOpcode(), dl, ChainVTs, Operands);
+ Scalar.getNode()->setFlags(N->getFlags());
+
+ //Add in the scalar as well as its chain value to the
+ //result vectors.
+ Scalars.push_back(Scalar);
+ Chains.push_back(Scalar.getValue(1));
+ }
+
+ for (; i < ResNE; ++i)
+ Scalars.push_back(DAG.getUNDEF(EltVT));
+
+ // Build a new factor node to connect the chain back together.
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
+ ReplaceValueWith(SDValue(N, 1), Chain);
+
+ // Create a new BUILD_VECTOR node
+ EVT VecVT = EVT::getVectorVT(*DAG.getContext(), EltVT, ResNE);
+ return DAG.getBuildVector(VecVT, dl, Scalars);
+}
+
+void DAGTypeLegalizer::SplitVecRes_OverflowOp(SDNode *N, unsigned ResNo,
+ SDValue &Lo, SDValue &Hi) {
+ SDLoc dl(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT LoResVT, HiResVT, LoOvVT, HiOvVT;
+ std::tie(LoResVT, HiResVT) = DAG.GetSplitDestVTs(ResVT);
+ std::tie(LoOvVT, HiOvVT) = DAG.GetSplitDestVTs(OvVT);
+
+ SDValue LoLHS, HiLHS, LoRHS, HiRHS;
+ if (getTypeAction(ResVT) == TargetLowering::TypeSplitVector) {
+ GetSplitVector(N->getOperand(0), LoLHS, HiLHS);
+ GetSplitVector(N->getOperand(1), LoRHS, HiRHS);
+ } else {
+ std::tie(LoLHS, HiLHS) = DAG.SplitVectorOperand(N, 0);
+ std::tie(LoRHS, HiRHS) = DAG.SplitVectorOperand(N, 1);
+ }
+
+ unsigned Opcode = N->getOpcode();
+ SDVTList LoVTs = DAG.getVTList(LoResVT, LoOvVT);
+ SDVTList HiVTs = DAG.getVTList(HiResVT, HiOvVT);
+ SDNode *LoNode = DAG.getNode(Opcode, dl, LoVTs, LoLHS, LoRHS).getNode();
+ SDNode *HiNode = DAG.getNode(Opcode, dl, HiVTs, HiLHS, HiRHS).getNode();
+
+ Lo = SDValue(LoNode, ResNo);
+ Hi = SDValue(HiNode, ResNo);
+
+ // Replace the other vector result not being explicitly split here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeSplitVector) {
+ SetSplitVector(SDValue(N, OtherNo),
+ SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+ } else {
+ SDValue OtherVal = DAG.getNode(
+ ISD::CONCAT_VECTORS, dl, OtherVT,
+ SDValue(LoNode, OtherNo), SDValue(HiNode, OtherNo));
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+}
+
void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
@@ -1344,12 +1558,6 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
unsigned Alignment = MLD->getOriginalAlignment();
ISD::LoadExtType ExtType = MLD->getExtensionType();
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
- Alignment/2 : Alignment;
-
// Split Mask operand
SDValue MaskLo, MaskHi;
if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
@@ -1381,7 +1589,7 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
MMO = DAG.getMachineFunction().getMachineMemOperand(
MLD->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOLoad,
- HiMemVT.getStoreSize(), SecondHalfAlignment, MLD->getAAInfo(),
+ HiMemVT.getStoreSize(), Alignment, MLD->getAAInfo(),
MLD->getRanges());
Hi = DAG.getMaskedLoad(HiVT, dl, Ch, Ptr, MaskHi, PassThruHi, HiMemVT, MMO,
@@ -1496,15 +1704,34 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo,
// If the input also splits, handle it directly for a compile time speedup.
// Otherwise split it by hand.
- EVT InVT = N->getOperand(0).getValueType();
+ unsigned OpNo = N->isStrictFPOpcode() ? 1 : 0;
+ EVT InVT = N->getOperand(OpNo).getValueType();
if (getTypeAction(InVT) == TargetLowering::TypeSplitVector)
- GetSplitVector(N->getOperand(0), Lo, Hi);
+ GetSplitVector(N->getOperand(OpNo), Lo, Hi);
else
- std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0);
+ std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, OpNo);
if (N->getOpcode() == ISD::FP_ROUND) {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1));
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi, N->getOperand(1));
+ } else if (N->getOpcode() == ISD::STRICT_FP_ROUND) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
+ { N->getOperand(0), Lo, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
+ { N->getOperand(0), Hi, N->getOperand(2) });
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { LoVT, MVT::Other },
+ { N->getOperand(0), Lo });
+ Hi = DAG.getNode(N->getOpcode(), dl, { HiVT, MVT::Other },
+ { N->getOperand(0), Hi });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
} else {
Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo);
Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi);
@@ -1669,6 +1896,26 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N,
}
}
+void DAGTypeLegalizer::SplitVecRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
+ EVT OVT = N->getValueType(0);
+ EVT NVT = OVT.getHalfNumVectorElementsVT(*DAG.getContext());
+ SDValue Chain = N->getOperand(0);
+ SDValue Ptr = N->getOperand(1);
+ SDValue SV = N->getOperand(2);
+ SDLoc dl(N);
+
+ const unsigned Alignment = DAG.getDataLayout().getABITypeAlignment(
+ NVT.getTypeForEVT(*DAG.getContext()));
+
+ Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, SV, Alignment);
+ Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, SV, Alignment);
+ Chain = Hi.getValue(1);
+
+ // Modified the chain - switch anything that used the old chain to use
+ // the new one.
+ ReplaceValueWith(SDValue(N, 1), Chain);
+}
+
//===----------------------------------------------------------------------===//
// Operand Vector Splitting
@@ -1705,6 +1952,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::TRUNCATE:
Res = SplitVecOp_TruncateHelper(N);
break;
+ case ISD::STRICT_FP_ROUND:
case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::FCOPYSIGN: Res = SplitVecOp_FCOPYSIGN(N); break;
case ISD::STORE:
@@ -1734,6 +1982,7 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_EXTEND:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@@ -1775,7 +2024,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
if (Res.getNode() == N)
return true;
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ if (N->isStrictFPOpcode())
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+ "Invalid operand expansion");
+ else
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
"Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
@@ -1863,14 +2116,30 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc dl(N);
- GetSplitVector(N->getOperand(0), Lo, Hi);
+ GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
- Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
- Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+ if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
+ { N->getOperand(0), Lo });
+ Hi = DAG.getNode(N->getOpcode(), dl, { OutVT, MVT::Other },
+ { N->getOperand(0), Hi });
+
+ // Build a factor node to remember that this operation is independent
+ // of the other one.
+ SDValue Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Ch);
+ } else {
+ Lo = DAG.getNode(N->getOpcode(), dl, OutVT, Lo);
+ Hi = DAG.getNode(N->getOpcode(), dl, OutVT, Hi);
+ }
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
@@ -1920,7 +2189,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
if (isa<ConstantSDNode>(Idx)) {
uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
- assert(IdxVal < VecVT.getVectorNumElements() && "Invalid vector index!");
SDValue Lo, Hi;
GetSplitVector(Vec, Lo, Hi);
@@ -2079,12 +2347,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
else
std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
- // if Alignment is equal to the vector size,
- // take the half of it for the second part
- unsigned SecondHalfAlignment =
- (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
- Alignment/2 : Alignment;
-
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
@@ -2101,7 +2363,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
MMO = DAG.getMachineFunction().getMachineMemOperand(
N->getPointerInfo().getWithOffset(HiOffset), MachineMemOperand::MOStore,
- HiMemVT.getStoreSize(), SecondHalfAlignment, N->getAAInfo(),
+ HiMemVT.getStoreSize(), Alignment, N->getAAInfo(),
N->getRanges());
Hi = DAG.getMaskedStore(Ch, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
@@ -2343,14 +2605,26 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
EVT ResVT = N->getValueType(0);
SDValue Lo, Hi;
SDLoc DL(N);
- GetSplitVector(N->getOperand(0), Lo, Hi);
+ GetSplitVector(N->getOperand(N->isStrictFPOpcode() ? 1 : 0), Lo, Hi);
EVT InVT = Lo.getValueType();
EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
InVT.getVectorNumElements());
- Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
- Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+ if (N->isStrictFPOpcode()) {
+ Lo = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
+ { N->getOperand(0), Lo, N->getOperand(2) });
+ Hi = DAG.getNode(N->getOpcode(), DL, { OutVT, MVT::Other },
+ { N->getOperand(0), Hi, N->getOperand(2) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
+ Lo.getValue(1), Hi.getValue(1));
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else {
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+ }
return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
}
@@ -2472,6 +2746,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_StrictFP(N);
break;
+ case ISD::UADDO:
+ case ISD::SADDO:
+ case ISD::USUBO:
+ case ISD::SSUBO:
+ case ISD::UMULO:
+ case ISD::SMULO:
+ Res = WidenVecRes_OverflowOp(N, ResNo);
+ break;
+
case ISD::FCOPYSIGN:
Res = WidenVecRes_FCOPYSIGN(N);
break;
@@ -2505,6 +2788,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_Convert(N);
break;
+ case ISD::STRICT_FP_EXTEND:
+ case ISD::STRICT_FP_ROUND:
+ Res = WidenVecRes_Convert_StrictFP(N);
+ break;
+
case ISD::FABS:
case ISD::FCEIL:
case ISD::FCOS:
@@ -2523,13 +2811,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
// We're going to widen this vector op to a legal type by padding with undef
// elements. If the wide vector op is eventually going to be expanded to
// scalar libcalls, then unroll into scalar ops now to avoid unnecessary
- // libcalls on the undef elements. We are assuming that if the scalar op
- // requires expanding, then the vector op needs expanding too.
+ // libcalls on the undef elements.
EVT VT = N->getValueType(0);
- if (TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
- EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- assert(!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
- "Target supports vector op, but scalar requires expansion?");
+ EVT WideVecVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
+ if (!TLI.isOperationLegalOrCustom(N->getOpcode(), WideVecVT) &&
+ TLI.isOperationExpand(N->getOpcode(), VT.getScalarType())) {
Res = DAG.UnrollVectorOp(N, WideVecVT.getVectorNumElements());
break;
}
@@ -2539,11 +2825,14 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
// any other unary ops.
LLVM_FALLTHROUGH;
+ case ISD::ABS:
case ISD::BITREVERSE:
case ISD::BSWAP:
case ISD::CTLZ:
+ case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTPOP:
case ISD::CTTZ:
+ case ISD::CTTZ_ZERO_UNDEF:
case ISD::FNEG:
case ISD::FCANONICALIZE:
Res = WidenVecRes_Unary(N);
@@ -2593,14 +2882,13 @@ static SDValue CollectOpsToWiden(SelectionDAG &DAG, const TargetLowering &TLI,
SDLoc dl(ConcatOps[0]);
EVT WidenEltVT = WidenVT.getVectorElementType();
- int Idx = 0;
// while (Some element of ConcatOps is not of type MaxVT) {
// From the end of ConcatOps, collect elements of the same type and put
// them into an op of the next larger supported type
// }
while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) {
- Idx = ConcatEnd - 1;
+ int Idx = ConcatEnd - 1;
VT = ConcatOps[Idx--].getValueType();
while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT)
Idx--;
@@ -2750,7 +3038,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
// No legal vector version so unroll the vector operation and then widen.
if (NumElts == 1)
- return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
+ return UnrollVectorOp_StrictFP(N, WidenVT.getVectorNumElements());
// Since the operation can trap, apply operation on the original vector.
EVT MaxVT = VT;
@@ -2846,6 +3134,58 @@ SDValue DAGTypeLegalizer::WidenVecRes_StrictFP(SDNode *N) {
return CollectOpsToWiden(DAG, TLI, ConcatOps, ConcatEnd, VT, MaxVT, WidenVT);
}
+SDValue DAGTypeLegalizer::WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo) {
+ SDLoc DL(N);
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT WideResVT, WideOvVT;
+ SDValue WideLHS, WideRHS;
+
+ // TODO: This might result in a widen/split loop.
+ if (ResNo == 0) {
+ WideResVT = TLI.getTypeToTransformTo(*DAG.getContext(), ResVT);
+ WideOvVT = EVT::getVectorVT(
+ *DAG.getContext(), OvVT.getVectorElementType(),
+ WideResVT.getVectorNumElements());
+
+ WideLHS = GetWidenedVector(N->getOperand(0));
+ WideRHS = GetWidenedVector(N->getOperand(1));
+ } else {
+ WideOvVT = TLI.getTypeToTransformTo(*DAG.getContext(), OvVT);
+ WideResVT = EVT::getVectorVT(
+ *DAG.getContext(), ResVT.getVectorElementType(),
+ WideOvVT.getVectorNumElements());
+
+ SDValue Zero = DAG.getConstant(
+ 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ WideLHS = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+ N->getOperand(0), Zero);
+ WideRHS = DAG.getNode(
+ ISD::INSERT_SUBVECTOR, DL, WideResVT, DAG.getUNDEF(WideResVT),
+ N->getOperand(1), Zero);
+ }
+
+ SDVTList WideVTs = DAG.getVTList(WideResVT, WideOvVT);
+ SDNode *WideNode = DAG.getNode(
+ N->getOpcode(), DL, WideVTs, WideLHS, WideRHS).getNode();
+
+ // Replace the other vector result not being explicitly widened here.
+ unsigned OtherNo = 1 - ResNo;
+ EVT OtherVT = N->getValueType(OtherNo);
+ if (getTypeAction(OtherVT) == TargetLowering::TypeWidenVector) {
+ SetWidenedVector(SDValue(N, OtherNo), SDValue(WideNode, OtherNo));
+ } else {
+ SDValue Zero = DAG.getConstant(
+ 0, DL, TLI.getVectorIdxTy(DAG.getDataLayout()));
+ SDValue OtherVal = DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, OtherVT, SDValue(WideNode, OtherNo), Zero);
+ ReplaceValueWith(SDValue(N, OtherNo), OtherVal);
+ }
+
+ return SDValue(WideNode, ResNo);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InOp = N->getOperand(0);
SDLoc DL(N);
@@ -2929,6 +3269,43 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getBuildVector(WidenVT, DL, Ops);
}
+SDValue DAGTypeLegalizer::WidenVecRes_Convert_StrictFP(SDNode *N) {
+ SDValue InOp = N->getOperand(1);
+ SDLoc DL(N);
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ unsigned WidenNumElts = WidenVT.getVectorNumElements();
+ SmallVector<EVT, 2> WidenVTs = { WidenVT, MVT::Other };
+
+ EVT InVT = InOp.getValueType();
+ EVT InEltVT = InVT.getVectorElementType();
+
+ unsigned Opcode = N->getOpcode();
+
+ // FIXME: Optimizations need to be implemented here.
+
+ // Otherwise unroll into some nasty scalar code and rebuild the vector.
+ EVT EltVT = WidenVT.getVectorElementType();
+ SmallVector<EVT, 2> EltVTs = { EltVT, MVT::Other };
+ SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT));
+ SmallVector<SDValue, 32> OpChains;
+ // Use the original element count so we don't do more scalar opts than
+ // necessary.
+ unsigned MinElts = N->getValueType(0).getVectorNumElements();
+ for (unsigned i=0; i < MinElts; ++i) {
+ NewOps[1] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i] = DAG.getNode(Opcode, DL, EltVTs, NewOps);
+ OpChains.push_back(Ops[i].getValue(1));
+ }
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OpChains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+
+ return DAG.getBuildVector(WidenVT, DL, Ops);
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_EXTEND_VECTOR_INREG(SDNode *N) {
unsigned Opcode = N->getOpcode();
SDValue InOp = N->getOperand(0);
@@ -3654,8 +4031,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) {
return Res;
}
- InOp1 = GetWidenedVector(InOp1);
- SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ // If the inputs also widen, handle them directly. Otherwise widen by hand.
+ SDValue InOp2 = N->getOperand(1);
+ if (getTypeAction(InVT) == TargetLowering::TypeWidenVector) {
+ InOp1 = GetWidenedVector(InOp1);
+ InOp2 = GetWidenedVector(InOp2);
+ } else {
+ InOp1 = DAG.WidenVector(InOp1, SDLoc(N));
+ InOp2 = DAG.WidenVector(InOp2, SDLoc(N));
+ }
// Assume that the input and output will be widen appropriately. If not,
// we will have to unroll it at some point.
@@ -3698,6 +4082,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::MGATHER: Res = WidenVecOp_MGATHER(N, OpNo); break;
case ISD::MSCATTER: Res = WidenVecOp_MSCATTER(N, OpNo); break;
case ISD::SETCC: Res = WidenVecOp_SETCC(N); break;
+ case ISD::VSELECT: Res = WidenVecOp_VSELECT(N); break;
case ISD::FCOPYSIGN: Res = WidenVecOp_FCOPYSIGN(N); break;
case ISD::ANY_EXTEND:
@@ -3707,6 +4092,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_EXTEND:
+ case ISD::STRICT_FP_EXTEND:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
@@ -3714,6 +4100,22 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::TRUNCATE:
Res = WidenVecOp_Convert(N);
break;
+
+ case ISD::VECREDUCE_FADD:
+ case ISD::VECREDUCE_FMUL:
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_MUL:
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_SMAX:
+ case ISD::VECREDUCE_SMIN:
+ case ISD::VECREDUCE_UMAX:
+ case ISD::VECREDUCE_UMIN:
+ case ISD::VECREDUCE_FMAX:
+ case ISD::VECREDUCE_FMIN:
+ Res = WidenVecOp_VECREDUCE(N);
+ break;
}
// If Res is null, the sub-method took care of registering the result.
@@ -3725,8 +4127,12 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
return true;
- assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand expansion");
+ if (N->isStrictFPOpcode())
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 2 &&
+ "Invalid operand expansion");
+ else
+ assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
+ "Invalid operand expansion");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
@@ -3806,7 +4212,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
EVT EltVT = VT.getVectorElementType();
SDLoc dl(N);
unsigned NumElts = VT.getVectorNumElements();
- SDValue InOp = N->getOperand(0);
+ SDValue InOp = N->getOperand(N->isStrictFPOpcode() ? 1 : 0);
assert(getTypeAction(InOp.getValueType()) ==
TargetLowering::TypeWidenVector &&
"Unexpected type action");
@@ -3815,10 +4221,19 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
unsigned Opcode = N->getOpcode();
// See if a widened result type would be legal, if so widen the node.
+ // FIXME: This isn't safe for StrictFP. Other optimization here is needed.
EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT,
InVT.getVectorNumElements());
- if (TLI.isTypeLegal(WideVT)) {
- SDValue Res = DAG.getNode(Opcode, dl, WideVT, InOp);
+ if (TLI.isTypeLegal(WideVT) && !N->isStrictFPOpcode()) {
+ SDValue Res;
+ if (N->isStrictFPOpcode()) {
+ Res = DAG.getNode(Opcode, dl, { WideVT, MVT::Other },
+ { N->getOperand(0), InOp });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ } else
+ Res = DAG.getNode(Opcode, dl, WideVT, InOp);
return DAG.getNode(
ISD::EXTRACT_SUBVECTOR, dl, VT, Res,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
@@ -3828,12 +4243,26 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
// Unroll the convert into some scalar code and create a nasty build vector.
SmallVector<SDValue, 16> Ops(NumElts);
- for (unsigned i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(
- Opcode, dl, EltVT,
- DAG.getNode(
- ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ if (N->isStrictFPOpcode()) {
+ SmallVector<SDValue, 4> NewOps(N->op_begin(), N->op_end());
+ SmallVector<SDValue, 32> OpChains;
+ for (unsigned i=0; i < NumElts; ++i) {
+ NewOps[1] = DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ Ops[i] = DAG.getNode(Opcode, dl, { EltVT, MVT::Other }, NewOps);
+ OpChains.push_back(Ops[i].getValue(1));
+ }
+ SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OpChains);
+ ReplaceValueWith(SDValue(N, 1), NewChain);
+ } else {
+ for (unsigned i = 0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(
+ Opcode, dl, EltVT,
+ DAG.getNode(
+ ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
+ DAG.getConstant(i, dl, TLI.getVectorIdxTy(DAG.getDataLayout()))));
+ }
return DAG.getBuildVector(VT, dl, Ops);
}
@@ -3859,6 +4288,24 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
}
}
+ // Handle a case like bitcast v12i8 -> v3i32. Normally that would get widened
+ // to v16i8 -> v4i32, but for a target where v3i32 is legal but v12i8 is not,
+ // we end up here. Handling the case here with EXTRACT_SUBVECTOR avoids
+ // having to copy via memory.
+ if (VT.isVector()) {
+ EVT EltVT = VT.getVectorElementType();
+ unsigned EltSize = EltVT.getSizeInBits();
+ if (InWidenSize % EltSize == 0) {
+ unsigned NewNumElts = InWidenSize / EltSize;
+ EVT NewVT = EVT::getVectorVT(*DAG.getContext(), EltVT, NewNumElts);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, BitOp,
+ DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+ }
+ }
+ }
+
return CreateStackStoreLoad(InOp, VT);
}
@@ -4000,10 +4447,9 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
SDValue Index = MSC->getIndex();
SDValue Scale = MSC->getScale();
- unsigned NumElts;
if (OpNo == 1) {
DataOp = GetWidenedVector(DataOp);
- NumElts = DataOp.getValueType().getVectorNumElements();
+ unsigned NumElts = DataOp.getValueType().getVectorNumElements();
// Widen index.
EVT IndexVT = Index.getValueType();
@@ -4041,8 +4487,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
// Get a new SETCC node to compare the newly widened operands.
// Only some of the compared elements are legal.
- EVT SVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- InOp0.getValueType());
+ EVT SVT = getSetCCResultType(InOp0.getValueType());
// The result type is legal, if its vXi1, keep vXi1 for the new SETCC.
if (VT.getScalarType() == MVT::i1)
SVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1,
@@ -4062,6 +4507,80 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
return PromoteTargetBoolean(CC, VT);
}
+SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
+ SDLoc dl(N);
+ SDValue Op = GetWidenedVector(N->getOperand(0));
+ EVT OrigVT = N->getOperand(0).getValueType();
+ EVT WideVT = Op.getValueType();
+ EVT ElemVT = OrigVT.getVectorElementType();
+
+ SDValue NeutralElem;
+ switch (N->getOpcode()) {
+ case ISD::VECREDUCE_ADD:
+ case ISD::VECREDUCE_OR:
+ case ISD::VECREDUCE_XOR:
+ case ISD::VECREDUCE_UMAX:
+ NeutralElem = DAG.getConstant(0, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_MUL:
+ NeutralElem = DAG.getConstant(1, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_AND:
+ case ISD::VECREDUCE_UMIN:
+ NeutralElem = DAG.getAllOnesConstant(dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_SMAX:
+ NeutralElem = DAG.getConstant(
+ APInt::getSignedMinValue(ElemVT.getSizeInBits()), dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_SMIN:
+ NeutralElem = DAG.getConstant(
+ APInt::getSignedMaxValue(ElemVT.getSizeInBits()), dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FADD:
+ NeutralElem = DAG.getConstantFP(0.0, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FMUL:
+ NeutralElem = DAG.getConstantFP(1.0, dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FMAX:
+ NeutralElem = DAG.getConstantFP(
+ std::numeric_limits<double>::infinity(), dl, ElemVT);
+ break;
+ case ISD::VECREDUCE_FMIN:
+ NeutralElem = DAG.getConstantFP(
+ -std::numeric_limits<double>::infinity(), dl, ElemVT);
+ break;
+ }
+
+ // Pad the vector with the neutral element.
+ unsigned OrigElts = OrigVT.getVectorNumElements();
+ unsigned WideElts = WideVT.getVectorNumElements();
+ for (unsigned Idx = OrigElts; Idx < WideElts; Idx++)
+ Op = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, WideVT, Op, NeutralElem,
+ DAG.getConstant(Idx, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
+
+ return DAG.getNode(N->getOpcode(), dl, N->getValueType(0), Op, N->getFlags());
+}
+
+SDValue DAGTypeLegalizer::WidenVecOp_VSELECT(SDNode *N) {
+ // This only gets called in the case that the left and right inputs and
+ // result are of a legal odd vector type, and the condition is illegal i1 of
+ // the same odd width that needs widening.
+ EVT VT = N->getValueType(0);
+ assert(VT.isVector() && !VT.isPow2VectorType() && isTypeLegal(VT));
+
+ SDValue Cond = GetWidenedVector(N->getOperand(0));
+ SDValue LeftIn = DAG.WidenVector(N->getOperand(1), SDLoc(N));
+ SDValue RightIn = DAG.WidenVector(N->getOperand(2), SDLoc(N));
+ SDLoc DL(N);
+
+ SDValue Select = DAG.getNode(N->getOpcode(), DL, LeftIn.getValueType(), Cond,
+ LeftIn, RightIn);
+ return DAG.getNode(
+ ISD::EXTRACT_SUBVECTOR, DL, VT, Select,
+ DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
+}
//===----------------------------------------------------------------------===//
// Vector Widening Utilities
@@ -4102,6 +4621,8 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
+ if (MemVTWidth == WidenWidth)
+ return MemVT;
RetVT = MemVT;
break;
}
@@ -4113,7 +4634,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
+ if ((Action == TargetLowering::TypeLegal ||
+ Action == TargetLowering::TypePromoteInteger) &&
+ WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
isPowerOf2_32(WidenWidth / MemVTWidth) &&
(MemVTWidth <= Width ||
diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
index 7f369c746d24..34660e3a48ec 100644
--- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
+++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp
@@ -1,9 +1,8 @@
//===- ResourcePriorityQueue.cpp - A DFA-oriented priority queue -*- C++ -*-==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -85,6 +84,7 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) {
case ISD::CopyFromReg: NumberDeps++; break;
case ISD::CopyToReg: break;
case ISD::INLINEASM: break;
+ case ISD::INLINEASM_BR: break;
}
if (!ScegN->isMachineOpcode())
continue;
@@ -121,6 +121,7 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU,
case ISD::CopyFromReg: break;
case ISD::CopyToReg: NumberDeps++; break;
case ISD::INLINEASM: break;
+ case ISD::INLINEASM_BR: break;
}
if (!ScegN->isMachineOpcode())
continue;
@@ -446,6 +447,7 @@ int ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) {
break;
case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
ResCount += PriorityThree;
break;
}
@@ -548,6 +550,7 @@ void ResourcePriorityQueue::initNumRegDefsLeft(SUnit *SU) {
NodeNumDefs++;
break;
case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
NodeNumDefs++;
break;
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index f7566b246f32..65b9d017fc5c 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -1,9 +1,8 @@
//===-- llvm/CodeGen/SDNodeDbgValue.h - SelectionDAG dbg_value --*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -136,7 +135,8 @@ public:
/// dbg.addr is emitted twice.
void clearIsEmitted() { Emitted = false; }
- LLVM_DUMP_METHOD void dump(raw_ostream &OS) const;
+ LLVM_DUMP_METHOD void dump() const;
+ LLVM_DUMP_METHOD void print(raw_ostream &OS) const;
};
/// Holds the information from a dbg_label node through SDISel.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 90e109b022fd..2cb850fa1a3d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -1,9 +1,8 @@
//===----- ScheduleDAGFast.cpp - Fast poor list scheduler -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -480,7 +479,8 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
}
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
- if (Node->getOpcode() == ISD::INLINEASM) {
+ if (Node->getOpcode() == ISD::INLINEASM ||
+ Node->getOpcode() == ISD::INLINEASM_BR) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 8d75b8133a30..34b4c8502353 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1,9 +1,8 @@
//===- ScheduleDAGRRList.cpp - Reg pressure reduction list scheduler ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -220,6 +219,14 @@ public:
return Topo.WillCreateCycle(SU, TargetSU);
}
+ /// AddPredQueued - Queues and update to add a predecessor edge to SUnit SU.
+ /// This returns true if this is a new predecessor.
+ /// Does *NOT* update the topological ordering! It just queues an update.
+ void AddPredQueued(SUnit *SU, const SDep &D) {
+ Topo.AddPredQueued(SU, D.getSUnit());
+ SU->addPred(D);
+ }
+
/// AddPred - adds a predecessor edge to SUnit SU.
/// This returns true if this is a new predecessor.
/// Updates the topological ordering if required.
@@ -267,24 +274,22 @@ private:
void ListScheduleBottomUp();
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
- /// Updates the topological ordering if required.
SUnit *CreateNewSUnit(SDNode *N) {
unsigned NumSUnits = SUnits.size();
SUnit *NewNode = newSUnit(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
- Topo.InitDAGTopologicalSorting();
+ Topo.MarkDirty();
return NewNode;
}
/// CreateClone - Creates a new SUnit from an existing one.
- /// Updates the topological ordering if required.
SUnit *CreateClone(SUnit *N) {
unsigned NumSUnits = SUnits.size();
SUnit *NewNode = Clone(N);
// Update the topological ordering.
if (NewNode->NodeNum >= NumSUnits)
- Topo.InitDAGTopologicalSorting();
+ Topo.MarkDirty();
return NewNode;
}
@@ -366,7 +371,7 @@ void ScheduleDAGRRList::Schedule() {
BuildSchedGraph(nullptr);
LLVM_DEBUG(dump());
- Topo.InitDAGTopologicalSorting();
+ Topo.MarkDirty();
AvailableQueue->initNodes(SUnits);
@@ -709,6 +714,7 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
// removed.
return;
case ISD::INLINEASM:
+ case ISD::INLINEASM_BR:
// For inline asm, clear the pipeline state.
HazardRec->Reset();
return;
@@ -1017,8 +1023,9 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
NewSU = &SUnits[N->getNodeId()];
// If NewSU has already been scheduled, we need to clone it, but this
// negates the benefit to unfolding so just return SU.
- if (NewSU->isScheduled)
+ if (NewSU->isScheduled) {
return SU;
+ }
isNewN = false;
} else {
NewSU = CreateNewSUnit(N);
@@ -1071,23 +1078,23 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
for (const SDep &Pred : ChainPreds) {
RemovePred(SU, Pred);
if (isNewLoad)
- AddPred(LoadSU, Pred);
+ AddPredQueued(LoadSU, Pred);
}
for (const SDep &Pred : LoadPreds) {
RemovePred(SU, Pred);
if (isNewLoad)
- AddPred(LoadSU, Pred);
+ AddPredQueued(LoadSU, Pred);
}
for (const SDep &Pred : NodePreds) {
RemovePred(SU, Pred);
- AddPred(NewSU, Pred);
+ AddPredQueued(NewSU, Pred);
}
for (SDep D : NodeSuccs) {
SUnit *SuccDep = D.getSUnit();
D.setSUnit(SU);
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
- AddPred(SuccDep, D);
+ AddPredQueued(SuccDep, D);
// Balance register pressure.
if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled &&
!D.isCtrl() && NewSU->NumRegDefsLeft > 0)
@@ -1099,7 +1106,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
RemovePred(SuccDep, D);
if (isNewLoad) {
D.setSUnit(LoadSU);
- AddPred(SuccDep, D);
+ AddPredQueued(SuccDep, D);
}
}
@@ -1107,7 +1114,7 @@ SUnit *ScheduleDAGRRList::TryUnfoldSU(SUnit *SU) {
// by LoadSU.
SDep D(LoadSU, SDep::Data, 0);
D.setLatency(LoadSU->Latency);
- AddPred(NewSU, D);
+ AddPredQueued(NewSU, D);
if (isNewLoad)
AvailableQueue->addNode(LoadSU);
@@ -1179,7 +1186,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
// New SUnit has the exact same predecessors.
for (SDep &Pred : SU->Preds)
if (!Pred.isArtificial())
- AddPred(NewSU, Pred);
+ AddPredQueued(NewSU, Pred);
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
@@ -1191,7 +1198,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (SuccSU->isScheduled) {
SDep D = Succ;
D.setSUnit(NewSU);
- AddPred(SuccSU, D);
+ AddPredQueued(SuccSU, D);
D.setSUnit(SU);
DelDeps.push_back(std::make_pair(SuccSU, D));
}
@@ -1230,14 +1237,14 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
if (SuccSU->isScheduled) {
SDep D = Succ;
D.setSUnit(CopyToSU);
- AddPred(SuccSU, D);
+ AddPredQueued(SuccSU, D);
DelDeps.push_back(std::make_pair(SuccSU, Succ));
}
else {
// Avoid scheduling the def-side copy before other successors. Otherwise
// we could introduce another physreg interference on the copy and
// continue inserting copies indefinitely.
- AddPred(SuccSU, SDep(CopyFromSU, SDep::Artificial));
+ AddPredQueued(SuccSU, SDep(CopyFromSU, SDep::Artificial));
}
}
for (auto &DelDep : DelDeps)
@@ -1245,10 +1252,10 @@ void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
SDep FromDep(SU, SDep::Data, Reg);
FromDep.setLatency(SU->Latency);
- AddPred(CopyFromSU, FromDep);
+ AddPredQueued(CopyFromSU, FromDep);
SDep ToDep(CopyFromSU, SDep::Data, 0);
ToDep.setLatency(CopyFromSU->Latency);
- AddPred(CopyToSU, ToDep);
+ AddPredQueued(CopyToSU, ToDep);
AvailableQueue->updateNode(SU);
AvailableQueue->addNode(CopyFromSU);
@@ -1348,7 +1355,8 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
}
for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
- if (Node->getOpcode() == ISD::INLINEASM) {
+ if (Node->getOpcode() == ISD::INLINEASM ||
+ Node->getOpcode() == ISD::INLINEASM_BR) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
@@ -1477,6 +1485,11 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
if (CurSU)
return CurSU;
+ // We query the topological order in the loop body, so make sure outstanding
+ // updates are applied before entering it (we only enter the loop if there
+ // are some interferences). If we make changes to the ordering, we exit
+ // the loop.
+
// All candidates are delayed due to live physical reg dependencies.
// Try backtracking, code duplication, or inserting cross class copies
// to resolve it.
@@ -1506,7 +1519,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
}
LLVM_DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum
<< ") to SU(" << TrySU->NodeNum << ")\n");
- AddPred(TrySU, SDep(BtSU, SDep::Artificial));
+ AddPredQueued(TrySU, SDep(BtSU, SDep::Artificial));
// If one or more successors has been unscheduled, then the current
// node is no longer available.
@@ -1560,14 +1573,14 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n");
- AddPred(TrySU, SDep(Copies.front(), SDep::Artificial));
+ AddPredQueued(TrySU, SDep(Copies.front(), SDep::Artificial));
NewDef = Copies.back();
}
LLVM_DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
<< " to SU #" << TrySU->NodeNum << "\n");
LiveRegDefs[Reg] = NewDef;
- AddPred(NewDef, SDep(TrySU, SDep::Artificial));
+ AddPredQueued(NewDef, SDep(TrySU, SDep::Artificial));
TrySU->isAvailable = false;
CurSU = NewDef;
}
@@ -2939,6 +2952,29 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
(cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
+ SDNode *PredFrameSetup = nullptr;
+ for (const SDep &Pred : SU.Preds)
+ if (Pred.isCtrl() && Pred.getSUnit()) {
+ // Find the predecessor which is not data dependence.
+ SDNode *PredND = Pred.getSUnit()->getNode();
+
+ // If PredND is FrameSetup, we should not pre-scheduled the node,
+ // or else, when bottom up scheduling, ADJCALLSTACKDOWN and
+ // ADJCALLSTACKUP may hold CallResource too long and make other
+ // calls can't be scheduled. If there's no other available node
+ // to schedule, the schedular will try to rename the register by
+ // creating copy to avoid the conflict which will fail because
+ // CallResource is not a real physical register.
+ if (PredND && PredND->isMachineOpcode() &&
+ (PredND->getMachineOpcode() == TII->getCallFrameSetupOpcode())) {
+ PredFrameSetup = PredND;
+ break;
+ }
+ }
+ // Skip the node has FrameSetup parent.
+ if (PredFrameSetup != nullptr)
+ continue;
+
// Locate the single data predecessor.
SUnit *PredSU = nullptr;
for (const SDep &Pred : SU.Preds)
@@ -2993,9 +3029,9 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
if (SuccSU != &SU) {
Edge.setSUnit(PredSU);
scheduleDAG->RemovePred(SuccSU, Edge);
- scheduleDAG->AddPred(&SU, Edge);
+ scheduleDAG->AddPredQueued(&SU, Edge);
Edge.setSUnit(&SU);
- scheduleDAG->AddPred(SuccSU, Edge);
+ scheduleDAG->AddPredQueued(SuccSU, Edge);
--i;
}
}
@@ -3077,7 +3113,7 @@ void RegReductionPQBase::AddPseudoTwoAddrDeps() {
LLVM_DEBUG(dbgs()
<< " Adding a pseudo-two-addr edge from SU #"
<< SU.NodeNum << " to SU #" << SuccSU->NodeNum << "\n");
- scheduleDAG->AddPred(&SU, SDep(SuccSU, SDep::Artificial));
+ scheduleDAG->AddPredQueued(&SU, SDep(SuccSU, SDep::Artificial));
}
}
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index e258f0a218a5..568c6191e512 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -1,9 +1,8 @@
//===--- ScheduleDAGSDNodes.cpp - Implement the ScheduleDAGSDNodes class --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -206,6 +205,19 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
if (!Chain)
return;
+ // Skip any load instruction that has a tied input. There may be an additional
+ // dependency requiring a different order than by increasing offsets, and the
+ // added glue may introduce a cycle.
+ auto hasTiedInput = [this](const SDNode *N) {
+ const MCInstrDesc &MCID = TII->get(N->getMachineOpcode());
+ for (unsigned I = 0; I != MCID.getNumOperands(); ++I) {
+ if (MCID.getOperandConstraint(I, MCOI::TIED_TO) != -1)
+ return true;
+ }
+
+ return false;
+ };
+
// Look for other loads of the same chain. Find loads that are loading from
// the same base pointer and different offsets.
SmallPtrSet<SDNode*, 16> Visited;
@@ -213,6 +225,10 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
DenseMap<long long, SDNode*> O2SMap; // Map from offset to SDNode.
bool Cluster = false;
SDNode *Base = Node;
+
+ if (hasTiedInput(Base))
+ return;
+
// This algorithm requires a reasonably low use count before finding a match
// to avoid uselessly blowing up compile time in large blocks.
unsigned UseCount = 0;
@@ -223,10 +239,12 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
continue;
int64_t Offset1, Offset2;
if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) ||
- Offset1 == Offset2)
+ Offset1 == Offset2 ||
+ hasTiedInput(User)) {
// FIXME: Should be ok if they addresses are identical. But earlier
// optimizations really should have eliminated one of the loads.
continue;
+ }
if (O2SMap.insert(std::make_pair(Offset1, Base)).second)
Offsets.push_back(Offset1);
O2SMap.insert(std::make_pair(Offset2, User));
@@ -741,28 +759,27 @@ ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
static void
ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter,
DenseMap<SDValue, unsigned> &VRBaseMap,
- SmallVectorImpl<std::pair<unsigned, MachineInstr*> > &Orders,
- SmallSet<unsigned, 8> &Seen) {
+ SmallVectorImpl<std::pair<unsigned, MachineInstr *>> &Orders,
+ SmallSet<unsigned, 8> &Seen, MachineInstr *NewInsn) {
unsigned Order = N->getIROrder();
- if (!Order || !Seen.insert(Order).second) {
+ if (!Order || Seen.count(Order)) {
// Process any valid SDDbgValues even if node does not have any order
// assigned.
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
return;
}
- MachineBasicBlock *BB = Emitter.getBlock();
- auto IP = Emitter.getInsertPos();
- if (IP == BB->begin() || BB->back().isPHI() ||
- // Fast-isel may have inserted some instructions, in which case the
- // BB->back().isPHI() test will not fire when we want it to.
- std::prev(IP)->isPHI()) {
- // Did not insert any instruction.
- Orders.push_back({Order, (MachineInstr *)nullptr});
- return;
+ // If a new instruction was generated for this Order number, record it.
+ // Otherwise, leave this order number unseen: we will either find later
+ // instructions for it, or leave it unseen if there were no instructions at
+ // all.
+ if (NewInsn) {
+ Seen.insert(Order);
+ Orders.push_back({Order, NewInsn});
}
- Orders.push_back({Order, &*std::prev(IP)});
+ // Even if no instruction was generated, a Value may have become defined via
+ // earlier nodes. Try to process them now.
ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
}
@@ -815,6 +832,43 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
SmallSet<unsigned, 8> Seen;
bool HasDbg = DAG->hasDebugValues();
+ // Emit a node, and determine where its first instruction is for debuginfo.
+ // Zero, one, or multiple instructions can be created when emitting a node.
+ auto EmitNode =
+ [&](SDNode *Node, bool IsClone, bool IsCloned,
+ DenseMap<SDValue, unsigned> &VRBaseMap) -> MachineInstr * {
+ // Fetch instruction prior to this, or end() if nonexistant.
+ auto GetPrevInsn = [&](MachineBasicBlock::iterator I) {
+ if (I == BB->begin())
+ return BB->end();
+ else
+ return std::prev(Emitter.getInsertPos());
+ };
+
+ MachineBasicBlock::iterator Before = GetPrevInsn(Emitter.getInsertPos());
+ Emitter.EmitNode(Node, IsClone, IsCloned, VRBaseMap);
+ MachineBasicBlock::iterator After = GetPrevInsn(Emitter.getInsertPos());
+
+ // If the iterator did not change, no instructions were inserted.
+ if (Before == After)
+ return nullptr;
+
+ MachineInstr *MI;
+ if (Before == BB->end()) {
+ // There were no prior instructions; the new ones must start at the
+ // beginning of the block.
+ MI = &Emitter.getBlock()->instr_front();
+ } else {
+ // Return first instruction after the pre-existing instructions.
+ MI = &*std::next(Before);
+ }
+
+ if (MI->isCall() && DAG->getTarget().Options.EnableDebugEntryValues)
+ MF.addCallArgsForwardingRegs(MI, DAG->getSDCallSiteInfo(Node));
+
+ return MI;
+ };
+
// If this is the first BB, emit byval parameter dbg_value's.
if (HasDbg && BB->getParent()->begin() == MachineFunction::iterator(BB)) {
SDDbgInfo::DbgIterator PDI = DAG->ByvalParmDbgBegin();
@@ -851,18 +905,18 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
GluedNodes.push_back(N);
while (!GluedNodes.empty()) {
SDNode *N = GluedNodes.back();
- Emitter.EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
+ auto NewInsn = EmitNode(N, SU->OrigNode != SU, SU->isCloned, VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
- ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
+ ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
GluedNodes.pop_back();
}
- Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
- VRBaseMap);
+ auto NewInsn =
+ EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned, VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
- ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders,
- Seen);
+ ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
+ NewInsn);
}
// Insert all the dbg_values which have not already been inserted in source
@@ -873,7 +927,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Sort the source order instructions and use the order to insert debug
// values. Use stable_sort so that DBG_VALUEs are inserted in the same order
// regardless of the host's implementation fo std::sort.
- std::stable_sort(Orders.begin(), Orders.end(), less_first());
+ llvm::stable_sort(Orders, less_first());
std::stable_sort(DAG->DbgBegin(), DAG->DbgEnd(),
[](const SDDbgValue *LHS, const SDDbgValue *RHS) {
return LHS->getOrder() < RHS->getOrder();
@@ -887,8 +941,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
unsigned Order = Orders[i].first;
MachineInstr *MI = Orders[i].second;
// Insert all SDDbgValue's whose order(s) are before "Order".
- if (!MI)
- continue;
+ assert(MI);
for (; DI != DE; ++DI) {
if ((*DI)->getOrder() < LastOrder || (*DI)->getOrder() >= Order)
break;
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 3fa7ad895725..5163b4fa4fd3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -1,9 +1,8 @@
//===---- ScheduleDAGSDNodes.h - SDNode Scheduling --------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index 416061475b1a..ab06b55b49fd 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -1,9 +1,8 @@
//===- ScheduleDAGVLIW.cpp - SelectionDAG list scheduler for VLIW -*- C++ -*-=//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 647496c1afcb..5852e693fa9f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAG.cpp - Implement the SelectionDAG data structures ------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,6 +85,7 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
// Default null implementations of the callbacks.
void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {}
void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {}
+void SelectionDAG::DAGUpdateListener::NodeInserted(SDNode *) {}
void SelectionDAG::DAGNodeDeletedListener::anchor() {}
@@ -262,12 +262,7 @@ bool ISD::allOperandsUndef(const SDNode *N) {
// is probably the desired behavior.
if (N->getNumOperands() == 0)
return false;
-
- for (const SDValue &Op : N->op_values())
- if (!Op.isUndef())
- return false;
-
- return true;
+ return all_of(N->op_values(), [](SDValue Op) { return Op.isUndef(); });
}
bool ISD::matchUnaryPredicate(SDValue Op,
@@ -299,8 +294,8 @@ bool ISD::matchUnaryPredicate(SDValue Op,
bool ISD::matchBinaryPredicate(
SDValue LHS, SDValue RHS,
std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match,
- bool AllowUndefs) {
- if (LHS.getValueType() != RHS.getValueType())
+ bool AllowUndefs, bool AllowTypeMismatch) {
+ if (!AllowTypeMismatch && LHS.getValueType() != RHS.getValueType())
return false;
// TODO: Add support for scalar UNDEF cases?
@@ -323,8 +318,8 @@ bool ISD::matchBinaryPredicate(
auto *RHSCst = dyn_cast<ConstantSDNode>(RHSOp);
if ((!LHSCst && !LHSUndef) || (!RHSCst && !RHSUndef))
return false;
- if (LHSOp.getValueType() != SVT ||
- LHSOp.getValueType() != RHSOp.getValueType())
+ if (!AllowTypeMismatch && (LHSOp.getValueType() != SVT ||
+ LHSOp.getValueType() != RHSOp.getValueType()))
return false;
if (!Match(LHSCst, RHSCst))
return false;
@@ -518,6 +513,13 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) {
case ISD::TargetFrameIndex:
ID.AddInteger(cast<FrameIndexSDNode>(N)->getIndex());
break;
+ case ISD::LIFETIME_START:
+ case ISD::LIFETIME_END:
+ if (cast<LifetimeSDNode>(N)->hasOffset()) {
+ ID.AddInteger(cast<LifetimeSDNode>(N)->getSize());
+ ID.AddInteger(cast<LifetimeSDNode>(N)->getOffset());
+ }
+ break;
case ISD::JumpTable:
case ISD::TargetJumpTable:
ID.AddInteger(cast<JumpTableSDNode>(N)->getIndex());
@@ -834,6 +836,8 @@ void SelectionDAG::InsertNode(SDNode *N) {
N->PersistentId = NextPersistentId++;
VerifySDNode(N);
#endif
+ for (DAGUpdateListener *DUL = UpdateListeners; DUL; DUL = DUL->Next)
+ DUL->NodeInserted(N);
}
/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -1136,6 +1140,18 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
getConstant(Imm, DL, Op.getValueType()));
}
+SDValue SelectionDAG::getPtrExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT) {
+ // Only unsigned pointer semantics are supported right now. In the future this
+ // might delegate to TLI to check pointer signedness.
+ return getZExtOrTrunc(Op, DL, VT);
+}
+
+SDValue SelectionDAG::getPtrExtendInReg(SDValue Op, const SDLoc &DL, EVT VT) {
+ // Only unsigned pointer semantics are supported right now. In the future this
+ // might delegate to TLI to check pointer signedness.
+ return getZeroExtendInReg(Op, DL, VT);
+}
+
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
SDValue SelectionDAG::getNOT(const SDLoc &DL, SDValue Val, EVT VT) {
EVT EltVT = VT.getScalarType();
@@ -1274,6 +1290,12 @@ SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, const SDLoc &DL,
return getConstant(Val, DL, TLI->getPointerTy(getDataLayout()), isTarget);
}
+SDValue SelectionDAG::getShiftAmountConstant(uint64_t Val, EVT VT,
+ const SDLoc &DL, bool LegalTypes) {
+ EVT ShiftVT = TLI->getShiftAmountTy(VT, getDataLayout(), LegalTypes);
+ return getConstant(Val, DL, ShiftVT);
+}
+
SDValue SelectionDAG::getConstantFP(const APFloat &V, const SDLoc &DL, EVT VT,
bool isTarget) {
return getConstantFP(*ConstantFP::get(*getContext(), V), DL, VT, isTarget);
@@ -1403,7 +1425,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
- Alignment = MF->getFunction().optForSize()
+ Alignment = MF->getFunction().hasOptSize()
? getDataLayout().getABITypeAlignment(C->getType())
: getDataLayout().getPrefTypeAlignment(C->getType());
unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool;
@@ -1770,7 +1792,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
if (SDNode *E = FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
- auto *N = newSDNode<LabelSDNode>(dl.getIROrder(), dl.getDebugLoc(), Label);
+ auto *N =
+ newSDNode<LabelSDNode>(Opcode, dl.getIROrder(), dl.getDebugLoc(), Label);
createOperands(N, Ops);
CSEMap.InsertNode(N, IP);
@@ -1965,10 +1988,30 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
case ISD::SETUO:
case ISD::SETUEQ:
case ISD::SETUNE:
- assert(!N1.getValueType().isInteger() && "Illegal setcc for integer!");
+ assert(!OpVT.isInteger() && "Illegal setcc for integer!");
break;
}
+ if (OpVT.isInteger()) {
+ // For EQ and NE, we can always pick a value for the undef to make the
+ // predicate pass or fail, so we can return undef.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ // icmp eq/ne X, undef -> undef.
+ if ((N1.isUndef() || N2.isUndef()) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE))
+ return getUNDEF(VT);
+
+ // If both operands are undef, we can return undef for int comparison.
+ // icmp undef, undef -> undef.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+
+ // icmp X, X -> true/false
+ // icmp X, undef -> true/false because undef could be X.
+ if (N1 == N2)
+ return getBoolConstant(ISD::isTrueWhenEqual(Cond), dl, VT, OpVT);
+ }
+
if (ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2)) {
const APInt &C2 = N2C->getAPIntValue();
if (ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1)) {
@@ -1989,71 +2032,88 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
}
}
}
- if (ConstantFPSDNode *N1C = dyn_cast<ConstantFPSDNode>(N1)) {
- if (ConstantFPSDNode *N2C = dyn_cast<ConstantFPSDNode>(N2)) {
- APFloat::cmpResult R = N1C->getValueAPF().compare(N2C->getValueAPF());
- switch (Cond) {
- default: break;
- case ISD::SETEQ: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
- OpVT);
- case ISD::SETNE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpLessThan, dl, VT,
- OpVT);
- case ISD::SETLT: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
- OpVT);
- case ISD::SETGT: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
- VT, OpVT);
- case ISD::SETLE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
- R==APFloat::cmpEqual, dl, VT,
- OpVT);
- case ISD::SETGE: if (R==APFloat::cmpUnordered)
- return getUNDEF(VT);
- LLVM_FALLTHROUGH;
- case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpEqual, dl, VT, OpVT);
- case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
- OpVT);
- case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
- OpVT);
- case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpEqual, dl, VT,
- OpVT);
- case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
- OpVT);
- case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
- R==APFloat::cmpLessThan, dl, VT,
- OpVT);
- case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
- R==APFloat::cmpUnordered, dl, VT,
- OpVT);
- case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
- VT, OpVT);
- case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
- OpVT);
- }
- } else {
- // Ensure that the constant occurs on the RHS.
- ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
- MVT CompVT = N1.getValueType().getSimpleVT();
- if (!TLI->isCondCodeLegal(SwappedCond, CompVT))
- return SDValue();
- return getSetCC(dl, VT, N2, N1, SwappedCond);
+ auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
+ auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2);
+
+ if (N1CFP && N2CFP) {
+ APFloat::cmpResult R = N1CFP->getValueAPF().compare(N2CFP->getValueAPF());
+ switch (Cond) {
+ default: break;
+ case ISD::SETEQ: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOEQ: return getBoolConstant(R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETNE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETONE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETLT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOLT: return getBoolConstant(R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETGT: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOGT: return getBoolConstant(R==APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETLE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOLE: return getBoolConstant(R==APFloat::cmpLessThan ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETGE: if (R==APFloat::cmpUnordered)
+ return getUNDEF(VT);
+ LLVM_FALLTHROUGH;
+ case ISD::SETOGE: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpEqual, dl, VT, OpVT);
+ case ISD::SETO: return getBoolConstant(R!=APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUO: return getBoolConstant(R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETUEQ: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETUNE: return getBoolConstant(R!=APFloat::cmpEqual, dl, VT,
+ OpVT);
+ case ISD::SETULT: return getBoolConstant(R==APFloat::cmpUnordered ||
+ R==APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ case ISD::SETUGT: return getBoolConstant(R==APFloat::cmpGreaterThan ||
+ R==APFloat::cmpUnordered, dl, VT,
+ OpVT);
+ case ISD::SETULE: return getBoolConstant(R!=APFloat::cmpGreaterThan, dl,
+ VT, OpVT);
+ case ISD::SETUGE: return getBoolConstant(R!=APFloat::cmpLessThan, dl, VT,
+ OpVT);
+ }
+ } else if (N1CFP && OpVT.isSimple() && !N2.isUndef()) {
+ // Ensure that the constant occurs on the RHS.
+ ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond);
+ if (!TLI->isCondCodeLegal(SwappedCond, OpVT.getSimpleVT()))
+ return SDValue();
+ return getSetCC(dl, VT, N2, N1, SwappedCond);
+ } else if ((N2CFP && N2CFP->getValueAPF().isNaN()) ||
+ (OpVT.isFloatingPoint() && (N1.isUndef() || N2.isUndef()))) {
+ // If an operand is known to be a nan (or undef that could be a nan), we can
+ // fold it.
+ // Choosing NaN for the undef will always make unordered comparison succeed
+ // and ordered comparison fails.
+ // Matches behavior in llvm::ConstantFoldCompareInstruction.
+ switch (ISD::getUnorderedFlavor(Cond)) {
+ default:
+ llvm_unreachable("Unknown flavor!");
+ case 0: // Known false.
+ return getBoolConstant(false, dl, VT, OpVT);
+ case 1: // Known true.
+ return getBoolConstant(true, dl, VT, OpVT);
+ case 2: // Undefined.
+ return getUNDEF(VT);
}
}
@@ -2062,16 +2122,32 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, SDValue N2,
}
/// See if the specified operand can be simplified with the knowledge that only
-/// the bits specified by Mask are used.
-SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
+/// the bits specified by DemandedBits are used.
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits) {
+ EVT VT = V.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return GetDemandedBits(V, DemandedBits, DemandedElts);
+}
+
+/// See if the specified operand can be simplified with the knowledge that only
+/// the bits specified by DemandedBits are used in the elements specified by
+/// DemandedElts.
+/// TODO: really we should be making this into the DAG equivalent of
+/// SimplifyMultipleUseDemandedBits and not generate any new nodes.
+SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
+ const APInt &DemandedElts) {
switch (V.getOpcode()) {
default:
break;
case ISD::Constant: {
- const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
+ auto *CV = cast<ConstantSDNode>(V.getNode());
assert(CV && "Const value should be ConstSDNode.");
const APInt &CVal = CV->getAPIntValue();
- APInt NewVal = CVal & Mask;
+ APInt NewVal = CVal & DemandedBits;
if (NewVal != CVal)
return getConstant(NewVal, SDLoc(V), V.getValueType());
break;
@@ -2079,44 +2155,51 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
case ISD::OR:
case ISD::XOR:
// If the LHS or RHS don't contribute bits to the or, drop them.
- if (MaskedValueIsZero(V.getOperand(0), Mask))
+ if (MaskedValueIsZero(V.getOperand(0), DemandedBits))
return V.getOperand(1);
- if (MaskedValueIsZero(V.getOperand(1), Mask))
+ if (MaskedValueIsZero(V.getOperand(1), DemandedBits))
return V.getOperand(0);
break;
case ISD::SRL:
// Only look at single-use SRLs.
if (!V.getNode()->hasOneUse())
break;
- if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
+ if (auto *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) {
// See if we can recursively simplify the LHS.
unsigned Amt = RHSC->getZExtValue();
// Watch out for shift count overflow though.
- if (Amt >= Mask.getBitWidth())
+ if (Amt >= DemandedBits.getBitWidth())
break;
- APInt NewMask = Mask << Amt;
- if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
+ APInt SrcDemandedBits = DemandedBits << Amt;
+ if (SDValue SimplifyLHS =
+ GetDemandedBits(V.getOperand(0), SrcDemandedBits))
return getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS,
V.getOperand(1));
}
break;
case ISD::AND: {
// X & -1 -> X (ignoring bits which aren't demanded).
- ConstantSDNode *AndVal = isConstOrConstSplat(V.getOperand(1));
- if (AndVal && Mask.isSubsetOf(AndVal->getAPIntValue()))
- return V.getOperand(0);
+ // Also handle the case where masked out bits in X are known to be zero.
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(V.getOperand(1))) {
+ const APInt &AndVal = RHSC->getAPIntValue();
+ if (DemandedBits.isSubsetOf(AndVal) ||
+ DemandedBits.isSubsetOf(computeKnownBits(V.getOperand(0)).Zero |
+ AndVal))
+ return V.getOperand(0);
+ }
break;
}
case ISD::ANY_EXTEND: {
SDValue Src = V.getOperand(0);
unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
// Being conservative here - only peek through if we only demand bits in the
- // non-extended source (even though the extended bits are technically undef).
- if (Mask.getActiveBits() > SrcBitWidth)
+ // non-extended source (even though the extended bits are technically
+ // undef).
+ if (DemandedBits.getActiveBits() > SrcBitWidth)
break;
- APInt SrcMask = Mask.trunc(SrcBitWidth);
- if (SDValue DemandedSrc = GetDemandedBits(Src, SrcMask))
+ APInt SrcDemandedBits = DemandedBits.trunc(SrcBitWidth);
+ if (SDValue DemandedSrc = GetDemandedBits(Src, SrcDemandedBits))
return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
break;
}
@@ -2125,7 +2208,7 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &Mask) {
unsigned ExVTBits = ExVT.getScalarSizeInBits();
// If none of the extended bits are demanded, eliminate the sextinreg.
- if (Mask.getActiveBits() <= ExVTBits)
+ if (DemandedBits.getActiveBits() <= ExVTBits)
return V.getOperand(0);
break;
@@ -2143,9 +2226,28 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero. We use
/// this predicate to simplify operations downstream. Mask is known to be zero
/// for bits that V cannot have.
-bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask,
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+ unsigned Depth) const {
+ EVT VT = V.getValueType();
+ APInt DemandedElts = VT.isVector()
+ ? APInt::getAllOnesValue(VT.getVectorNumElements())
+ : APInt(1, 1);
+ return MaskedValueIsZero(V, Mask, DemandedElts, Depth);
+}
+
+/// MaskedValueIsZero - Return true if 'V & Mask' is known to be zero in
+/// DemandedElts. We use this predicate to simplify operations downstream.
+/// Mask is known to be zero for bits that V cannot have.
+bool SelectionDAG::MaskedValueIsZero(SDValue V, const APInt &Mask,
+ const APInt &DemandedElts,
unsigned Depth) const {
- return Mask.isSubsetOf(computeKnownBits(Op, Depth).Zero);
+ return Mask.isSubsetOf(computeKnownBits(V, DemandedElts, Depth).Zero);
+}
+
+/// MaskedValueIsAllOnes - Return true if '(Op & Mask) == Mask'.
+bool SelectionDAG::MaskedValueIsAllOnes(SDValue V, const APInt &Mask,
+ unsigned Depth) const {
+ return Mask.isSubsetOf(computeKnownBits(V, Depth).One);
}
/// isSplatValue - Return true if the vector V has the same value
@@ -2244,28 +2346,50 @@ bool SelectionDAG::isSplatValue(SDValue V, bool AllowUndefs) {
(AllowUndefs || !UndefElts);
}
-/// Helper function that checks to see if a node is a constant or a
-/// build vector of splat constants at least within the demanded elts.
-static ConstantSDNode *isConstOrDemandedConstSplat(SDValue N,
- const APInt &DemandedElts) {
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
- return CN;
- if (N.getOpcode() != ISD::BUILD_VECTOR)
- return nullptr;
- EVT VT = N.getValueType();
- ConstantSDNode *Cst = nullptr;
- unsigned NumElts = VT.getVectorNumElements();
- assert(DemandedElts.getBitWidth() == NumElts && "Unexpected vector size");
- for (unsigned i = 0; i != NumElts; ++i) {
- if (!DemandedElts[i])
- continue;
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(i));
- if (!C || (Cst && Cst->getAPIntValue() != C->getAPIntValue()) ||
- C->getValueType(0) != VT.getScalarType())
- return nullptr;
- Cst = C;
+SDValue SelectionDAG::getSplatSourceVector(SDValue V, int &SplatIdx) {
+ V = peekThroughExtractSubvectors(V);
+
+ EVT VT = V.getValueType();
+ unsigned Opcode = V.getOpcode();
+ switch (Opcode) {
+ default: {
+ APInt UndefElts;
+ APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements());
+ if (isSplatValue(V, DemandedElts, UndefElts)) {
+ // Handle case where all demanded elements are UNDEF.
+ if (DemandedElts.isSubsetOf(UndefElts)) {
+ SplatIdx = 0;
+ return getUNDEF(VT);
+ }
+ SplatIdx = (UndefElts & DemandedElts).countTrailingOnes();
+ return V;
+ }
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ // Check if this is a shuffle node doing a splat.
+ // TODO - remove this and rely purely on SelectionDAG::isSplatValue,
+ // getTargetVShiftNode currently struggles without the splat source.
+ auto *SVN = cast<ShuffleVectorSDNode>(V);
+ if (!SVN->isSplat())
+ break;
+ int Idx = SVN->getSplatIndex();
+ int NumElts = V.getValueType().getVectorNumElements();
+ SplatIdx = Idx % NumElts;
+ return V.getOperand(Idx / NumElts);
}
- return Cst;
+ }
+
+ return SDValue();
+}
+
+SDValue SelectionDAG::getSplatValue(SDValue V) {
+ int SplatIdx;
+ if (SDValue SrcVector = getSplatSourceVector(V, SplatIdx))
+ return getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V),
+ SrcVector.getValueType().getScalarType(), SrcVector,
+ getIntPtrConstant(SplatIdx, SDLoc(V)));
+ return SDValue();
}
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
@@ -2708,8 +2832,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
break;
case ISD::FSHL:
case ISD::FSHR:
- if (ConstantSDNode *C =
- isConstOrDemandedConstSplat(Op.getOperand(2), DemandedElts)) {
+ if (ConstantSDNode *C = isConstOrConstSplat(Op.getOperand(2), DemandedElts)) {
unsigned Amt = C->getAPIntValue().urem(BitWidth);
// For fshl, 0-shift returns the 1st arg.
@@ -2801,8 +2924,59 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::LOAD: {
LoadSDNode *LD = cast<LoadSDNode>(Op);
- // If this is a ZEXTLoad and we are looking at the loaded value.
- if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ const Constant *Cst = TLI->getTargetConstantFromLoad(LD);
+ if (ISD::isNON_EXTLoad(LD) && Cst) {
+ // Determine any common known bits from the loaded constant pool value.
+ Type *CstTy = Cst->getType();
+ if ((NumElts * BitWidth) == CstTy->getPrimitiveSizeInBits()) {
+ // If its a vector splat, then we can (quickly) reuse the scalar path.
+ // NOTE: We assume all elements match and none are UNDEF.
+ if (CstTy->isVectorTy()) {
+ if (const Constant *Splat = Cst->getSplatValue()) {
+ Cst = Splat;
+ CstTy = Cst->getType();
+ }
+ }
+ // TODO - do we need to handle different bitwidths?
+ if (CstTy->isVectorTy() && BitWidth == CstTy->getScalarSizeInBits()) {
+ // Iterate across all vector elements finding common known bits.
+ Known.One.setAllBits();
+ Known.Zero.setAllBits();
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (Constant *Elt = Cst->getAggregateElement(i)) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+ const APInt &Value = CInt->getValue();
+ Known.One &= Value;
+ Known.Zero &= ~Value;
+ continue;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Known.One &= Value;
+ Known.Zero &= ~Value;
+ continue;
+ }
+ }
+ Known.One.clearAllBits();
+ Known.Zero.clearAllBits();
+ break;
+ }
+ } else if (BitWidth == CstTy->getPrimitiveSizeInBits()) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Cst)) {
+ const APInt &Value = CInt->getValue();
+ Known.One = Value;
+ Known.Zero = ~Value;
+ } else if (auto *CFP = dyn_cast<ConstantFP>(Cst)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Known.One = Value;
+ Known.Zero = ~Value;
+ }
+ }
+ }
+ } else if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) {
+ // If this is a ZEXTLoad and we are looking at the loaded value.
EVT VT = LD->getMemoryVT();
unsigned MemBits = VT.getScalarSizeInBits();
Known.Zero.setBitsFrom(MemBits);
@@ -2816,15 +2990,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
EVT InVT = Op.getOperand(0).getValueType();
APInt InDemandedElts = DemandedElts.zextOrSelf(InVT.getVectorNumElements());
Known = computeKnownBits(Op.getOperand(0), InDemandedElts, Depth + 1);
- Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
break;
}
case ISD::ZERO_EXTEND: {
- EVT InVT = Op.getOperand(0).getValueType();
Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InVT.getScalarSizeInBits());
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
break;
}
case ISD::SIGN_EXTEND_VECTOR_INREG: {
@@ -2845,7 +3016,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
}
case ISD::ANY_EXTEND: {
Known = computeKnownBits(Op.getOperand(0), Depth+1);
- Known = Known.zext(BitWidth);
+ Known = Known.zext(BitWidth, false /* ExtendedBitsAreKnownZero */);
break;
}
case ISD::TRUNCATE: {
@@ -2878,39 +3049,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
LLVM_FALLTHROUGH;
case ISD::SUB:
case ISD::SUBC: {
- if (ConstantSDNode *CLHS = isConstOrConstSplat(Op.getOperand(0))) {
- // We know that the top bits of C-X are clear if X contains less bits
- // than C (i.e. no wrap-around can happen). For example, 20-X is
- // positive if we can prove that X is >= 0 and < 16.
- if (CLHS->getAPIntValue().isNonNegative()) {
- unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros();
- // NLZ can't be BitWidth with no sign bit
- APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1);
- Known2 = computeKnownBits(Op.getOperand(1), DemandedElts,
- Depth + 1);
-
- // If all of the MaskV bits are known to be zero, then we know the
- // output top bits are zero, because we now know that the output is
- // from [0-C].
- if ((Known2.Zero & MaskV) == MaskV) {
- unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros();
- // Top bits known zero.
- Known.Zero.setHighBits(NLZ2);
- }
- }
- }
-
- // If low bits are know to be zero in both operands, then we know they are
- // going to be 0 in the result. Both addition and complement operations
- // preserve the low zero bits.
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- unsigned KnownZeroLow = Known2.countMinTrailingZeros();
- if (KnownZeroLow == 0)
- break;
-
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
- Known.Zero.setLowBits(KnownZeroLow);
+ Known = KnownBits::computeForAddSub(/* Add */ false, /* NSW */ false,
+ Known, Known2);
break;
}
case ISD::UADDO:
@@ -2928,34 +3070,26 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::ADD:
case ISD::ADDC:
case ISD::ADDE: {
- // Output known-0 bits are known if clear or set in both the low clear bits
- // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
- // low 3 bits clear.
- // Output known-0 bits are also known if the top bits of each input are
- // known to be clear. For example, if one input has the top 10 bits clear
- // and the other has the top 8 bits clear, we know the top 7 bits of the
- // output must be clear.
- Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
- unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
- unsigned KnownZeroLow = Known2.countMinTrailingZeros();
+ assert(Op.getResNo() == 0 && "We only compute knownbits for the sum here.");
+
+ // With ADDE and ADDCARRY, a carry bit may be added in.
+ KnownBits Carry(1);
+ if (Opcode == ISD::ADDE)
+ // Can't track carry from glue, set carry to unknown.
+ Carry.resetAll();
+ else if (Opcode == ISD::ADDCARRY)
+ // TODO: Compute known bits for the carry operand. Not sure if it is worth
+ // the trouble (how often will we find a known carry bit). And I haven't
+ // tested this very much yet, but something like this might work:
+ // Carry = computeKnownBits(Op.getOperand(2), DemandedElts, Depth + 1);
+ // Carry = Carry.zextOrTrunc(1, false);
+ Carry.resetAll();
+ else
+ Carry.setAllZero();
+ Known = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
Known2 = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
- KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
- KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
-
- if (Opcode == ISD::ADDE || Opcode == ISD::ADDCARRY) {
- // With ADDE and ADDCARRY, a carry bit may be added in, so we can only
- // use this information if we know (at least) that the low two bits are
- // clear. We then return to the caller that the low bit is unknown but
- // that other bits are known zero.
- if (KnownZeroLow >= 2)
- Known.Zero.setBits(1, KnownZeroLow);
- break;
- }
-
- Known.Zero.setLowBits(KnownZeroLow);
- if (KnownZeroHigh > 1)
- Known.Zero.setHighBits(KnownZeroHigh - 1);
+ Known = KnownBits::computeForAddCarry(Known, Known2, Carry);
break;
}
case ISD::SREM:
@@ -3010,21 +3144,20 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
case ISD::EXTRACT_ELEMENT: {
Known = computeKnownBits(Op.getOperand(0), Depth+1);
const unsigned Index = Op.getConstantOperandVal(1);
- const unsigned BitWidth = Op.getValueSizeInBits();
+ const unsigned EltBitWidth = Op.getValueSizeInBits();
// Remove low part of known bits mask
- Known.Zero = Known.Zero.getHiBits(Known.Zero.getBitWidth() - Index * BitWidth);
- Known.One = Known.One.getHiBits(Known.One.getBitWidth() - Index * BitWidth);
+ Known.Zero = Known.Zero.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
+ Known.One = Known.One.getHiBits(Known.getBitWidth() - Index * EltBitWidth);
// Remove high part of known bit mask
- Known = Known.trunc(BitWidth);
+ Known = Known.trunc(EltBitWidth);
break;
}
case ISD::EXTRACT_VECTOR_ELT: {
SDValue InVec = Op.getOperand(0);
SDValue EltNo = Op.getOperand(1);
EVT VecVT = InVec.getValueType();
- const unsigned BitWidth = Op.getValueSizeInBits();
const unsigned EltBitWidth = VecVT.getScalarSizeInBits();
const unsigned NumSrcElts = VecVT.getVectorNumElements();
// If BitWidth > EltBitWidth the value is anyext:ed. So we do not know
@@ -3042,7 +3175,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known = computeKnownBits(InVec, Depth + 1);
}
if (BitWidth > EltBitWidth)
- Known = Known.zext(BitWidth);
+ Known = Known.zext(BitWidth, false /* => any extend */);
break;
}
case ISD::INSERT_VECTOR_ELT: {
@@ -3146,10 +3279,10 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// the minimum of the clamp min/max range.
bool IsMax = (Opcode == ISD::SMAX);
ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
- if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
- CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
- DemandedElts);
+ CstHigh =
+ isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
if (CstLow && CstHigh) {
if (!IsMax)
std::swap(CstLow, CstHigh);
@@ -3430,7 +3563,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
// SRA X, C -> adds C sign bits.
if (ConstantSDNode *C =
- isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
APInt ShiftVal = C->getAPIntValue();
ShiftVal += Tmp;
Tmp = ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue();
@@ -3438,7 +3571,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Tmp;
case ISD::SHL:
if (ConstantSDNode *C =
- isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)) {
+ isConstOrConstSplat(Op.getOperand(1), DemandedElts)) {
// shl destroys sign bits.
Tmp = ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth+1);
if (C->getAPIntValue().uge(VTBits) || // Bad shift.
@@ -3478,10 +3611,10 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
// the minimum of the clamp min/max range.
bool IsMax = (Opcode == ISD::SMAX);
ConstantSDNode *CstLow = nullptr, *CstHigh = nullptr;
- if ((CstLow = isConstOrDemandedConstSplat(Op.getOperand(1), DemandedElts)))
+ if ((CstLow = isConstOrConstSplat(Op.getOperand(1), DemandedElts)))
if (Op.getOperand(0).getOpcode() == (IsMax ? ISD::SMIN : ISD::SMAX))
- CstHigh = isConstOrDemandedConstSplat(Op.getOperand(0).getOperand(1),
- DemandedElts);
+ CstHigh =
+ isConstOrConstSplat(Op.getOperand(0).getOperand(1), DemandedElts);
if (CstLow && CstHigh) {
if (!IsMax)
std::swap(CstLow, CstHigh);
@@ -3621,7 +3754,6 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
SDValue InVec = Op.getOperand(0);
SDValue InVal = Op.getOperand(1);
SDValue EltNo = Op.getOperand(2);
- unsigned NumElts = InVec.getValueType().getVectorNumElements();
ConstantSDNode *CEltNo = dyn_cast<ConstantSDNode>(EltNo);
if (CEltNo && CEltNo->getAPIntValue().ult(NumElts)) {
@@ -3752,13 +3884,43 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
unsigned ExtType = LD->getExtensionType();
switch (ExtType) {
- default: break;
- case ISD::SEXTLOAD: // '17' bits known
- Tmp = LD->getMemoryVT().getScalarSizeInBits();
- return VTBits-Tmp+1;
- case ISD::ZEXTLOAD: // '16' bits known
- Tmp = LD->getMemoryVT().getScalarSizeInBits();
- return VTBits-Tmp;
+ default: break;
+ case ISD::SEXTLOAD: // e.g. i16->i32 = '17' bits known.
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
+ return VTBits - Tmp + 1;
+ case ISD::ZEXTLOAD: // e.g. i16->i32 = '16' bits known.
+ Tmp = LD->getMemoryVT().getScalarSizeInBits();
+ return VTBits - Tmp;
+ case ISD::NON_EXTLOAD:
+ if (const Constant *Cst = TLI->getTargetConstantFromLoad(LD)) {
+ // We only need to handle vectors - computeKnownBits should handle
+ // scalar cases.
+ Type *CstTy = Cst->getType();
+ if (CstTy->isVectorTy() &&
+ (NumElts * VTBits) == CstTy->getPrimitiveSizeInBits()) {
+ Tmp = VTBits;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (!DemandedElts[i])
+ continue;
+ if (Constant *Elt = Cst->getAggregateElement(i)) {
+ if (auto *CInt = dyn_cast<ConstantInt>(Elt)) {
+ const APInt &Value = CInt->getValue();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ if (auto *CFP = dyn_cast<ConstantFP>(Elt)) {
+ APInt Value = CFP->getValueAPF().bitcastToAPInt();
+ Tmp = std::min(Tmp, Value.getNumSignBits());
+ continue;
+ }
+ }
+ // Unknown type. Conservatively assume no bits match sign bit.
+ return 1;
+ }
+ return Tmp;
+ }
+ }
+ break;
}
}
}
@@ -3803,8 +3965,7 @@ bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
return false;
if (Op.getOpcode() == ISD::OR &&
- !MaskedValueIsZero(Op.getOperand(0),
- cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ !MaskedValueIsZero(Op.getOperand(0), Op.getConstantOperandAPInt(1)))
return false;
return true;
@@ -4013,7 +4174,9 @@ static SDValue FoldBUILD_VECTOR(const SDLoc &DL, EVT VT,
return SDValue();
}
-static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
+/// Try to simplify vector concatenation to an input value, undef, or build
+/// vector.
+static SDValue foldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
ArrayRef<SDValue> Ops,
SelectionDAG &DAG) {
assert(!Ops.empty() && "Can't concatenate an empty list of vectors!");
@@ -4033,6 +4196,31 @@ static SDValue FoldCONCAT_VECTORS(const SDLoc &DL, EVT VT,
if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); }))
return DAG.getUNDEF(VT);
+ // Scan the operands and look for extract operations from a single source
+ // that correspond to insertion at the same location via this concatenation:
+ // concat (extract X, 0*subvec_elts), (extract X, 1*subvec_elts), ...
+ SDValue IdentitySrc;
+ bool IsIdentity = true;
+ for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
+ SDValue Op = Ops[i];
+ unsigned IdentityIndex = i * Op.getValueType().getVectorNumElements();
+ if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR ||
+ Op.getOperand(0).getValueType() != VT ||
+ (IdentitySrc && Op.getOperand(0) != IdentitySrc) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)) ||
+ Op.getConstantOperandVal(1) != IdentityIndex) {
+ IsIdentity = false;
+ break;
+ }
+ assert((!IdentitySrc || IdentitySrc == Op.getOperand(0)) &&
+ "Unexpected identity source vector for concat of extracts");
+ IdentitySrc = Op.getOperand(0);
+ }
+ if (IsIdentity) {
+ assert(IdentitySrc && "Failed to set source vector of extracts");
+ return IdentitySrc;
+ }
+
// A CONCAT_VECTOR with all UNDEF/BUILD_VECTOR operands can be
// simplified to one big BUILD_VECTOR.
// FIXME: Add support for SCALAR_TO_VECTOR as well.
@@ -4288,9 +4476,23 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (Operand.isUndef())
return getUNDEF(VT);
break;
+ case ISD::FP_TO_SINT:
+ case ISD::FP_TO_UINT:
+ if (Operand.isUndef())
+ return getUNDEF(VT);
+ break;
+ case ISD::SINT_TO_FP:
+ case ISD::UINT_TO_FP:
+ // [us]itofp(undef) = 0, because the result value is bounded.
+ if (Operand.isUndef())
+ return getConstantFP(0.0, DL, VT);
+ break;
case ISD::SIGN_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "SIGN_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
@@ -4307,6 +4509,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::ZERO_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ZERO_EXTEND!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "ZERO_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
@@ -4323,6 +4528,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::ANY_EXTEND:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ANY_EXTEND!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "ANY_EXTEND result type type should be vector iff the operand "
+ "type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop extension
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
@@ -4350,6 +4558,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::TRUNCATE:
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid TRUNCATE!");
+ assert(VT.isVector() == Operand.getValueType().isVector() &&
+ "TRUNCATE result type type should be vector iff the operand "
+ "type is vector!");
if (Operand.getValueType() == VT) return Operand; // noop truncate
assert((!VT.isVector() ||
VT.getVectorNumElements() ==
@@ -4429,6 +4640,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return Operand.getOperand(0);
break;
case ISD::FNEG:
+ // Negation of an unknown bag of bits is still completely undefined.
+ if (OpOpcode == ISD::UNDEF)
+ return getUNDEF(VT);
+
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
OpOpcode == ISD::FSUB)
@@ -4513,13 +4728,13 @@ static std::pair<APInt, bool> FoldValue(unsigned Opcode, const APInt &C1,
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, const ConstantSDNode *Cst1,
- const ConstantSDNode *Cst2) {
- if (Cst1->isOpaque() || Cst2->isOpaque())
+ EVT VT, const ConstantSDNode *C1,
+ const ConstantSDNode *C2) {
+ if (C1->isOpaque() || C2->isOpaque())
return SDValue();
- std::pair<APInt, bool> Folded = FoldValue(Opcode, Cst1->getAPIntValue(),
- Cst2->getAPIntValue());
+ std::pair<APInt, bool> Folded = FoldValue(Opcode, C1->getAPIntValue(),
+ C2->getAPIntValue());
if (!Folded.second)
return SDValue();
return getConstant(Folded.first, DL, VT);
@@ -4532,16 +4747,16 @@ SDValue SelectionDAG::FoldSymbolOffset(unsigned Opcode, EVT VT,
return SDValue();
if (!TLI->isOffsetFoldingLegal(GA))
return SDValue();
- const ConstantSDNode *Cst2 = dyn_cast<ConstantSDNode>(N2);
- if (!Cst2)
+ auto *C2 = dyn_cast<ConstantSDNode>(N2);
+ if (!C2)
return SDValue();
- int64_t Offset = Cst2->getSExtValue();
+ int64_t Offset = C2->getSExtValue();
switch (Opcode) {
case ISD::ADD: break;
case ISD::SUB: Offset = -uint64_t(Offset); break;
default: return SDValue();
}
- return getGlobalAddress(GA->getGlobal(), SDLoc(Cst2), VT,
+ return getGlobalAddress(GA->getGlobal(), SDLoc(C2), VT,
GA->getOffset() + uint64_t(Offset));
}
@@ -4571,21 +4786,20 @@ bool SelectionDAG::isUndef(unsigned Opcode, ArrayRef<SDValue> Ops) {
}
SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
- EVT VT, SDNode *Cst1,
- SDNode *Cst2) {
+ EVT VT, SDNode *N1, SDNode *N2) {
// If the opcode is a target-specific ISD node, there's nothing we can
// do here and the operand rules may not line up with the below, so
// bail early.
if (Opcode >= ISD::BUILTIN_OP_END)
return SDValue();
- if (isUndef(Opcode, {SDValue(Cst1, 0), SDValue(Cst2, 0)}))
+ if (isUndef(Opcode, {SDValue(N1, 0), SDValue(N2, 0)}))
return getUNDEF(VT);
// Handle the case of two scalars.
- if (const ConstantSDNode *Scalar1 = dyn_cast<ConstantSDNode>(Cst1)) {
- if (const ConstantSDNode *Scalar2 = dyn_cast<ConstantSDNode>(Cst2)) {
- SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, Scalar1, Scalar2);
+ if (auto *C1 = dyn_cast<ConstantSDNode>(N1)) {
+ if (auto *C2 = dyn_cast<ConstantSDNode>(N2)) {
+ SDValue Folded = FoldConstantArithmetic(Opcode, DL, VT, C1, C2);
assert((!Folded || !VT.isVector()) &&
"Can't fold vectors ops with scalar operands");
return Folded;
@@ -4593,19 +4807,19 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL,
}
// fold (add Sym, c) -> Sym+c
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst1))
- return FoldSymbolOffset(Opcode, VT, GA, Cst2);
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N1))
+ return FoldSymbolOffset(Opcode, VT, GA, N2);
if (TLI->isCommutativeBinOp(Opcode))
- if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Cst2))
- return FoldSymbolOffset(Opcode, VT, GA, Cst1);
+ if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N2))
+ return FoldSymbolOffset(Opcode, VT, GA, N1);
// For vectors, extract each constant element and fold them individually.
// Either input may be an undef value.
- auto *BV1 = dyn_cast<BuildVectorSDNode>(Cst1);
- if (!BV1 && !Cst1->isUndef())
+ auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
+ if (!BV1 && !N1->isUndef())
return SDValue();
- auto *BV2 = dyn_cast<BuildVectorSDNode>(Cst2);
- if (!BV2 && !Cst2->isUndef())
+ auto *BV2 = dyn_cast<BuildVectorSDNode>(N2);
+ if (!BV2 && !N2->isUndef())
return SDValue();
// If both operands are undef, that's handled the same way as scalars.
if (!BV1 && !BV2)
@@ -4755,6 +4969,64 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode,
return V;
}
+SDValue SelectionDAG::foldConstantFPMath(unsigned Opcode, const SDLoc &DL,
+ EVT VT, SDValue N1, SDValue N2) {
+ // TODO: We don't do any constant folding for strict FP opcodes here, but we
+ // should. That will require dealing with a potentially non-default
+ // rounding mode, checking the "opStatus" return value from the APFloat
+ // math calculations, and possibly other variations.
+ auto *N1CFP = dyn_cast<ConstantFPSDNode>(N1.getNode());
+ auto *N2CFP = dyn_cast<ConstantFPSDNode>(N2.getNode());
+ if (N1CFP && N2CFP) {
+ APFloat C1 = N1CFP->getValueAPF(), C2 = N2CFP->getValueAPF();
+ switch (Opcode) {
+ case ISD::FADD:
+ C1.add(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FSUB:
+ C1.subtract(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FMUL:
+ C1.multiply(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FDIV:
+ C1.divide(C2, APFloat::rmNearestTiesToEven);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FREM:
+ C1.mod(C2);
+ return getConstantFP(C1, DL, VT);
+ case ISD::FCOPYSIGN:
+ C1.copySign(C2);
+ return getConstantFP(C1, DL, VT);
+ default: break;
+ }
+ }
+ if (N1CFP && Opcode == ISD::FP_ROUND) {
+ APFloat C1 = N1CFP->getValueAPF(); // make copy
+ bool Unused;
+ // This can return overflow, underflow, or inexact; we don't care.
+ // FIXME need to be more flexible about rounding mode.
+ (void) C1.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven,
+ &Unused);
+ return getConstantFP(C1, DL, VT);
+ }
+
+ switch (Opcode) {
+ case ISD::FADD:
+ case ISD::FSUB:
+ case ISD::FMUL:
+ case ISD::FDIV:
+ case ISD::FREM:
+ // If both operands are undef, the result is undef. If 1 operand is undef,
+ // the result is NaN. This should match the behavior of the IR optimizer.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
+ if (N1.isUndef() || N2.isUndef())
+ return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
+ }
+ return SDValue();
+}
+
SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
SDValue N1, SDValue N2, const SDNodeFlags Flags) {
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
@@ -4791,9 +5063,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::CONCAT_VECTORS: {
- // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2};
- if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
}
@@ -4847,6 +5118,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
assert(VT.isFloatingPoint() && "This operator only applies to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
+ if (SDValue V = simplifyFPBinop(Opcode, N1, N2))
+ return V;
break;
case ISD::FCOPYSIGN: // N1 and result must match. N1/N2 need not match.
assert(N1.getValueType() == VT &&
@@ -5100,73 +5373,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
FoldConstantArithmetic(Opcode, DL, VT, N1.getNode(), N2.getNode()))
return SV;
- // Constant fold FP operations.
- bool HasFPExceptions = TLI->hasFloatingPointExceptions();
- if (N1CFP) {
- if (N2CFP) {
- APFloat V1 = N1CFP->getValueAPF(), V2 = N2CFP->getValueAPF();
- APFloat::opStatus s;
- switch (Opcode) {
- case ISD::FADD:
- s = V1.add(V2, APFloat::rmNearestTiesToEven);
- if (!HasFPExceptions || s != APFloat::opInvalidOp)
- return getConstantFP(V1, DL, VT);
- break;
- case ISD::FSUB:
- s = V1.subtract(V2, APFloat::rmNearestTiesToEven);
- if (!HasFPExceptions || s!=APFloat::opInvalidOp)
- return getConstantFP(V1, DL, VT);
- break;
- case ISD::FMUL:
- s = V1.multiply(V2, APFloat::rmNearestTiesToEven);
- if (!HasFPExceptions || s!=APFloat::opInvalidOp)
- return getConstantFP(V1, DL, VT);
- break;
- case ISD::FDIV:
- s = V1.divide(V2, APFloat::rmNearestTiesToEven);
- if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
- s!=APFloat::opDivByZero)) {
- return getConstantFP(V1, DL, VT);
- }
- break;
- case ISD::FREM :
- s = V1.mod(V2);
- if (!HasFPExceptions || (s!=APFloat::opInvalidOp &&
- s!=APFloat::opDivByZero)) {
- return getConstantFP(V1, DL, VT);
- }
- break;
- case ISD::FCOPYSIGN:
- V1.copySign(V2);
- return getConstantFP(V1, DL, VT);
- default: break;
- }
- }
-
- if (Opcode == ISD::FP_ROUND) {
- APFloat V = N1CFP->getValueAPF(); // make copy
- bool ignored;
- // This can return overflow, underflow, or inexact; we don't care.
- // FIXME need to be more flexible about rounding mode.
- (void)V.convert(EVTToAPFloatSemantics(VT),
- APFloat::rmNearestTiesToEven, &ignored);
- return getConstantFP(V, DL, VT);
- }
- }
-
- switch (Opcode) {
- case ISD::FADD:
- case ISD::FSUB:
- case ISD::FMUL:
- case ISD::FDIV:
- case ISD::FREM:
- // If both operands are undef, the result is undef. If 1 operand is undef,
- // the result is NaN. This should match the behavior of the IR optimizer.
- if (N1.isUndef() && N2.isUndef())
- return getUNDEF(VT);
- if (N1.isUndef() || N2.isUndef())
- return getConstantFP(APFloat::getNaN(EVTToAPFloatSemantics(VT)), DL, VT);
- }
+ if (SDValue V = foldConstantFPMath(Opcode, DL, VT, N1, N2))
+ return V;
// Canonicalize an UNDEF to the RHS, even over a constant.
if (N1.isUndef()) {
@@ -5261,10 +5469,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
APFloat V1 = N1CFP->getValueAPF();
const APFloat &V2 = N2CFP->getValueAPF();
const APFloat &V3 = N3CFP->getValueAPF();
- APFloat::opStatus s =
- V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
- if (!TLI->hasFloatingPointExceptions() || s != APFloat::opInvalidOp)
- return getConstantFP(V1, DL, VT);
+ V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven);
+ return getConstantFP(V1, DL, VT);
}
break;
}
@@ -5276,9 +5482,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::CONCAT_VECTORS: {
- // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
SDValue Ops[] = {N1, N2, N3};
- if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
}
@@ -5317,6 +5522,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
break;
}
case ISD::INSERT_SUBVECTOR: {
+ // Inserting undef into undef is still undef.
+ if (N1.isUndef() && N2.isUndef())
+ return getUNDEF(VT);
SDValue Index = N3;
if (VT.isSimple() && N1.getValueType().isSimple()
&& N2.getValueType().isSimple()) {
@@ -5337,6 +5545,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
// Trivial insertion.
if (VT.getSimpleVT() == N2.getSimpleValueType())
return N2;
+
+ // If this is an insert of an extracted vector into an undef vector, we
+ // can just use the input to the extract.
+ if (N1.isUndef() && N2.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
+ N2.getOperand(1) == N3 && N2.getOperand(0).getValueType() == VT)
+ return N2.getOperand(0);
}
break;
}
@@ -5521,116 +5735,12 @@ static bool isMemSrcFromConstant(SDValue Src, ConstantDataArraySlice &Slice) {
SrcDelta + G->getOffset());
}
-/// Determines the optimal series of memory ops to replace the memset / memcpy.
-/// Return true if the number of memory ops is below the threshold (Limit).
-/// It returns the types of the sequence of memory ops to perform
-/// memset / memcpy by reference.
-static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
- unsigned Limit, uint64_t Size,
- unsigned DstAlign, unsigned SrcAlign,
- bool IsMemset,
- bool ZeroMemset,
- bool MemcpyStrSrc,
- bool AllowOverlap,
- unsigned DstAS, unsigned SrcAS,
- SelectionDAG &DAG,
- const TargetLowering &TLI) {
- assert((SrcAlign == 0 || SrcAlign >= DstAlign) &&
- "Expecting memcpy / memset source to meet alignment requirement!");
- // If 'SrcAlign' is zero, that means the memory operation does not need to
- // load the value, i.e. memset or memcpy from constant string. Otherwise,
- // it's the inferred alignment of the source. 'DstAlign', on the other hand,
- // is the specified alignment of the memory operation. If it is zero, that
- // means it's possible to change the alignment of the destination.
- // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
- // not need to be loaded.
- EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign,
- IsMemset, ZeroMemset, MemcpyStrSrc,
- DAG.getMachineFunction());
-
- if (VT == MVT::Other) {
- // Use the largest integer type whose alignment constraints are satisfied.
- // We only need to check DstAlign here as SrcAlign is always greater or
- // equal to DstAlign (or zero).
- VT = MVT::i64;
- while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
- !TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
- VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
- assert(VT.isInteger());
-
- // Find the largest legal integer type.
- MVT LVT = MVT::i64;
- while (!TLI.isTypeLegal(LVT))
- LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
- assert(LVT.isInteger());
-
- // If the type we've chosen is larger than the largest legal integer type
- // then use that instead.
- if (VT.bitsGT(LVT))
- VT = LVT;
- }
-
- unsigned NumMemOps = 0;
- while (Size != 0) {
- unsigned VTSize = VT.getSizeInBits() / 8;
- while (VTSize > Size) {
- // For now, only use non-vector load / store's for the left-over pieces.
- EVT NewVT = VT;
- unsigned NewVTSize;
-
- bool Found = false;
- if (VT.isVector() || VT.isFloatingPoint()) {
- NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
- if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) &&
- TLI.isSafeMemOpType(NewVT.getSimpleVT()))
- Found = true;
- else if (NewVT == MVT::i64 &&
- TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
- TLI.isSafeMemOpType(MVT::f64)) {
- // i64 is usually not legal on 32-bit targets, but f64 may be.
- NewVT = MVT::f64;
- Found = true;
- }
- }
-
- if (!Found) {
- do {
- NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
- if (NewVT == MVT::i8)
- break;
- } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT()));
- }
- NewVTSize = NewVT.getSizeInBits() / 8;
-
- // If the new VT cannot cover all of the remaining bits, then consider
- // issuing a (or a pair of) unaligned and overlapping load / store.
- bool Fast;
- if (NumMemOps && AllowOverlap && NewVTSize < Size &&
- TLI.allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign, &Fast) &&
- Fast)
- VTSize = Size;
- else {
- VT = NewVT;
- VTSize = NewVTSize;
- }
- }
-
- if (++NumMemOps > Limit)
- return false;
-
- MemOps.push_back(VT);
- Size -= VTSize;
- }
-
- return true;
-}
-
static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
// On Darwin, -Os means optimize for size without hurting performance, so
// only really optimize for size when -Oz (MinSize) is used.
if (MF.getTarget().getTargetTriple().isOSDarwin())
- return MF.getFunction().optForMinSize();
- return MF.getFunction().optForSize();
+ return MF.getFunction().hasMinSize();
+ return MF.getFunction().hasOptSize();
}
static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
@@ -5665,6 +5775,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Turn a memcpy of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
return Chain;
@@ -5691,13 +5802,12 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
- if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
- (DstAlignCanChange ? 0 : Align),
- (isZeroConstant ? 0 : SrcAlign),
- false, false, CopyFromConstant, true,
- DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(),
- DAG, TLI))
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align),
+ (isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
+ /*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
+ /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@@ -5851,6 +5961,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
// Turn a memmove of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
return Chain;
@@ -5871,13 +5982,15 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (Align > SrcAlign)
SrcAlign = Align;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
-
- if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
- (DstAlignCanChange ? 0 : Align), SrcAlign,
- false, false, false, false,
- DstPtrInfo.getAddrSpace(),
- SrcPtrInfo.getAddrSpace(),
- DAG, TLI))
+ // FIXME: `AllowOverlap` should really be `!isVol` but there is a bug in
+ // findOptimalMemOpLowering. Meanwhile, setting it to `false` produces the
+ // correct code.
+ bool AllowOverlap = false;
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), SrcAlign,
+ /*IsMemset=*/false, /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+ AllowOverlap, DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
+ MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@@ -5956,6 +6069,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
uint64_t Size, unsigned Align, bool isVol,
MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
+ // FIXME: We need to honor volatile even is Src is undef.
if (Src.isUndef())
return Chain;
@@ -5972,11 +6086,12 @@ static SDValue getMemsetStores(SelectionDAG &DAG, const SDLoc &dl,
DstAlignCanChange = true;
bool IsZeroVal =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
- if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
- Size, (DstAlignCanChange ? 0 : Align), 0,
- true, IsZeroVal, false, true,
- DstPtrInfo.getAddrSpace(), ~0u,
- DAG, TLI))
+ if (!TLI.findOptimalMemOpLowering(
+ MemOps, TLI.getMaxStoresPerMemset(OptSize), Size,
+ (DstAlignCanChange ? 0 : Align), 0, /*IsMemset=*/true,
+ /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
+ /*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes()))
return SDValue();
if (DstAlignCanChange) {
@@ -6097,9 +6212,11 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Ty = Type::getInt8PtrTy(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
+
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
@@ -6199,9 +6316,11 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, const SDLoc &dl, SDValue Dst,
// Emit a library call.
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Ty = getDataLayout().getIntPtrType(*getContext());
+ Entry.Ty = Type::getInt8PtrTy(*getContext());
Entry.Node = Dst; Args.push_back(Entry);
Entry.Node = Src; Args.push_back(Entry);
+
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Entry.Node = Size; Args.push_back(Entry);
// FIXME: pass in SDLoc
TargetLowering::CallLoweringInfo CLI(*this);
@@ -6294,16 +6413,15 @@ SDValue SelectionDAG::getMemset(SDValue Chain, const SDLoc &dl, SDValue Dst,
checkAddrSpaceIsValidForLibcall(TLI, DstPtrInfo.getAddrSpace());
// Emit a library call.
- Type *IntPtrTy = getDataLayout().getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
- Entry.Node = Dst; Entry.Ty = IntPtrTy;
+ Entry.Node = Dst; Entry.Ty = Type::getInt8PtrTy(*getContext());
Args.push_back(Entry);
Entry.Node = Src;
Entry.Ty = Src.getValueType().getTypeForEVT(*getContext());
Args.push_back(Entry);
Entry.Node = Size;
- Entry.Ty = IntPtrTy;
+ Entry.Ty = getDataLayout().getIntPtrType(*getContext());
Args.push_back(Entry);
// FIXME: pass in SDLoc
@@ -6384,32 +6502,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
return SDValue(N, 0);
}
-SDValue SelectionDAG::getAtomicCmpSwap(
- unsigned Opcode, const SDLoc &dl, EVT MemVT, SDVTList VTs, SDValue Chain,
- SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo,
- unsigned Alignment, AtomicOrdering SuccessOrdering,
- AtomicOrdering FailureOrdering, SyncScope::ID SSID) {
- assert(Opcode == ISD::ATOMIC_CMP_SWAP ||
- Opcode == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS);
- assert(Cmp.getValueType() == Swp.getValueType() && "Invalid Atomic Op Types");
-
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(MemVT);
-
- MachineFunction &MF = getMachineFunction();
-
- // FIXME: Volatile isn't really correct; we should keep track of atomic
- // orderings in the memoperand.
- auto Flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad |
- MachineMemOperand::MOStore;
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
- AAMDNodes(), nullptr, SSID, SuccessOrdering,
- FailureOrdering);
-
- return getAtomicCmpSwap(Opcode, dl, MemVT, VTs, Chain, Ptr, Cmp, Swp, MMO);
-}
-
SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
EVT MemVT, SDVTList VTs, SDValue Chain,
SDValue Ptr, SDValue Cmp, SDValue Swp,
@@ -6424,35 +6516,6 @@ SDValue SelectionDAG::getAtomicCmpSwap(unsigned Opcode, const SDLoc &dl,
SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
SDValue Chain, SDValue Ptr, SDValue Val,
- const Value *PtrVal, unsigned Alignment,
- AtomicOrdering Ordering,
- SyncScope::ID SSID) {
- if (Alignment == 0) // Ensure that codegen never sees alignment 0
- Alignment = getEVTAlignment(MemVT);
-
- MachineFunction &MF = getMachineFunction();
- // An atomic store does not load. An atomic load does not store.
- // (An atomicrmw obviously both loads and stores.)
- // For now, atomics are considered to be volatile always, and they are
- // chained as such.
- // FIXME: Volatile isn't really correct; we should keep track of atomic
- // orderings in the memoperand.
- auto Flags = MachineMemOperand::MOVolatile;
- if (Opcode != ISD::ATOMIC_STORE)
- Flags |= MachineMemOperand::MOLoad;
- if (Opcode != ISD::ATOMIC_LOAD)
- Flags |= MachineMemOperand::MOStore;
-
- MachineMemOperand *MMO =
- MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
- MemVT.getStoreSize(), Alignment, AAMDNodes(),
- nullptr, SSID, Ordering);
-
- return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
-}
-
-SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
- SDValue Chain, SDValue Ptr, SDValue Val,
MachineMemOperand *MMO) {
assert((Opcode == ISD::ATOMIC_LOAD_ADD ||
Opcode == ISD::ATOMIC_LOAD_SUB ||
@@ -6465,6 +6528,8 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT,
Opcode == ISD::ATOMIC_LOAD_MAX ||
Opcode == ISD::ATOMIC_LOAD_UMIN ||
Opcode == ISD::ATOMIC_LOAD_UMAX ||
+ Opcode == ISD::ATOMIC_LOAD_FADD ||
+ Opcode == ISD::ATOMIC_LOAD_FSUB ||
Opcode == ISD::ATOMIC_SWAP ||
Opcode == ISD::ATOMIC_STORE) &&
"Invalid Atomic Op");
@@ -6502,7 +6567,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
- MachineMemOperand::Flags Flags, unsigned Size) {
+ MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
@@ -6511,7 +6576,7 @@ SDValue SelectionDAG::getMemIntrinsicNode(
MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrInfo, Flags, Size, Align);
+ MF.getMachineMemOperand(PtrInfo, Flags, Size, Align, AAInfo);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, MemVT, MMO);
}
@@ -6557,6 +6622,36 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
return SDValue(N, 0);
}
+SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
+ SDValue Chain, int FrameIndex,
+ int64_t Size, int64_t Offset) {
+ const unsigned Opcode = IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END;
+ const auto VTs = getVTList(MVT::Other);
+ SDValue Ops[2] = {
+ Chain,
+ getFrameIndex(FrameIndex,
+ getTargetLoweringInfo().getFrameIndexTy(getDataLayout()),
+ true)};
+
+ FoldingSetNodeID ID;
+ AddNodeIDNode(ID, Opcode, VTs, Ops);
+ ID.AddInteger(FrameIndex);
+ ID.AddInteger(Size);
+ ID.AddInteger(Offset);
+ void *IP = nullptr;
+ if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP))
+ return SDValue(E, 0);
+
+ LifetimeSDNode *N = newSDNode<LifetimeSDNode>(
+ Opcode, dl.getIROrder(), dl.getDebugLoc(), VTs, Size, Offset);
+ createOperands(N, Ops);
+ CSEMap.InsertNode(N, IP);
+ InsertNode(N);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
+}
+
/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
/// MachinePointerInfo record from it. This is particularly useful because the
/// code generator has many cases where it doesn't bother passing in a
@@ -6875,7 +6970,7 @@ SDValue SelectionDAG::getMaskedLoad(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ops[] = { Chain, Ptr, Mask, PassThru };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MLOAD, VTs, Ops);
- ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedLoadSDNode>(
dl.getIROrder(), VTs, ExtTy, isExpanding, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
@@ -6901,12 +6996,11 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
bool IsTruncating, bool IsCompressing) {
assert(Chain.getValueType() == MVT::Other &&
"Invalid chain type");
- EVT VT = Val.getValueType();
SDVTList VTs = getVTList(MVT::Other);
SDValue Ops[] = { Chain, Val, Ptr, Mask };
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSTORE, VTs, Ops);
- ID.AddInteger(VT.getRawBits());
+ ID.AddInteger(MemVT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedStoreSDNode>(
dl.getIROrder(), VTs, IsTruncating, IsCompressing, MemVT, MMO));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
@@ -7057,6 +7151,31 @@ SDValue SelectionDAG::simplifyShift(SDValue X, SDValue Y) {
return SDValue();
}
+// TODO: Use fast-math-flags to enable more simplifications.
+SDValue SelectionDAG::simplifyFPBinop(unsigned Opcode, SDValue X, SDValue Y) {
+ ConstantFPSDNode *YC = isConstOrConstSplatFP(Y, /* AllowUndefs */ true);
+ if (!YC)
+ return SDValue();
+
+ // X + -0.0 --> X
+ if (Opcode == ISD::FADD)
+ if (YC->getValueAPF().isNegZero())
+ return X;
+
+ // X - +0.0 --> X
+ if (Opcode == ISD::FSUB)
+ if (YC->getValueAPF().isPosZero())
+ return X;
+
+ // X * 1.0 --> X
+ // X / 1.0 --> X
+ if (Opcode == ISD::FMUL || Opcode == ISD::FDIV)
+ if (YC->getValueAPF().isExactlyValue(1.0))
+ return X;
+
+ return SDValue();
+}
+
SDValue SelectionDAG::getVAArg(EVT VT, const SDLoc &dl, SDValue Chain,
SDValue Ptr, SDValue SV, unsigned Align) {
SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, dl, MVT::i32) };
@@ -7098,8 +7217,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return V;
break;
case ISD::CONCAT_VECTORS:
- // Attempt to fold CONCAT_VECTORS into BUILD_VECTOR or UNDEF.
- if (SDValue V = FoldCONCAT_VECTORS(DL, VT, Ops, *this))
+ if (SDValue V = foldCONCAT_VECTORS(DL, VT, Ops, *this))
return V;
break;
case ISD::SELECT_CC:
@@ -7629,56 +7747,50 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
unsigned OrigOpc = Node->getOpcode();
unsigned NewOpc;
- bool IsUnary = false;
- bool IsTernary = false;
switch (OrigOpc) {
default:
llvm_unreachable("mutateStrictFPToFP called with unexpected opcode!");
- case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
- case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
- case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
- case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
- case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
- case ISD::STRICT_FMA: NewOpc = ISD::FMA; IsTernary = true; break;
- case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; IsUnary = true; break;
- case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
- case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
- case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; IsUnary = true; break;
- case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; IsUnary = true; break;
- case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; IsUnary = true; break;
- case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; IsUnary = true; break;
- case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; IsUnary = true; break;
- case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; IsUnary = true; break;
- case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; IsUnary = true; break;
- case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; IsUnary = true; break;
- case ISD::STRICT_FNEARBYINT:
- NewOpc = ISD::FNEARBYINT;
- IsUnary = true;
- break;
- case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
- case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
- case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; IsUnary = true; break;
- case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; IsUnary = true; break;
- case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; IsUnary = true; break;
- case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; IsUnary = true; break;
- }
+ case ISD::STRICT_FADD: NewOpc = ISD::FADD; break;
+ case ISD::STRICT_FSUB: NewOpc = ISD::FSUB; break;
+ case ISD::STRICT_FMUL: NewOpc = ISD::FMUL; break;
+ case ISD::STRICT_FDIV: NewOpc = ISD::FDIV; break;
+ case ISD::STRICT_FREM: NewOpc = ISD::FREM; break;
+ case ISD::STRICT_FMA: NewOpc = ISD::FMA; break;
+ case ISD::STRICT_FSQRT: NewOpc = ISD::FSQRT; break;
+ case ISD::STRICT_FPOW: NewOpc = ISD::FPOW; break;
+ case ISD::STRICT_FPOWI: NewOpc = ISD::FPOWI; break;
+ case ISD::STRICT_FSIN: NewOpc = ISD::FSIN; break;
+ case ISD::STRICT_FCOS: NewOpc = ISD::FCOS; break;
+ case ISD::STRICT_FEXP: NewOpc = ISD::FEXP; break;
+ case ISD::STRICT_FEXP2: NewOpc = ISD::FEXP2; break;
+ case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break;
+ case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break;
+ case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break;
+ case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break;
+ case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
+ case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
+ case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
+ case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break;
+ case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break;
+ case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break;
+ case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break;
+ case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
+ case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break;
+ }
+
+ assert(Node->getNumValues() == 2 && "Unexpected number of results!");
// We're taking this node out of the chain, so we need to re-link things.
SDValue InputChain = Node->getOperand(0);
SDValue OutputChain = SDValue(Node, 1);
ReplaceAllUsesOfValueWith(OutputChain, InputChain);
- SDVTList VTs = getVTList(Node->getOperand(1).getValueType());
- SDNode *Res = nullptr;
- if (IsUnary)
- Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1) });
- else if (IsTernary)
- Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
- Node->getOperand(2),
- Node->getOperand(3)});
- else
- Res = MorphNodeTo(Node, NewOpc, VTs, { Node->getOperand(1),
- Node->getOperand(2) });
+ SmallVector<SDValue, 3> Ops;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i)
+ Ops.push_back(Node->getOperand(i));
+
+ SDVTList VTs = getVTList(Node->getValueType(0));
+ SDNode *Res = MorphNodeTo(Node, NewOpc, VTs, Ops);
// MorphNodeTo can operate in two ways: if an existing node with the
// specified operands exists, it can just return it. Otherwise, it
@@ -7980,9 +8092,8 @@ void SelectionDAG::salvageDebugInfo(SDNode &N) {
// DIExpression, we need to mark the expression with a
// DW_OP_stack_value.
auto *DIExpr = DV->getExpression();
- DIExpr = DIExpression::prepend(DIExpr, DIExpression::NoDeref, Offset,
- DIExpression::NoDeref,
- DIExpression::WithStackValue);
+ DIExpr =
+ DIExpression::prepend(DIExpr, DIExpression::StackValue, Offset);
SDDbgValue *Clone =
getDbgValue(DV->getVariable(), DIExpr, N0.getNode(), N0.getResNo(),
DV->isIndirect(), DV->getDebugLoc(), DV->getOrder());
@@ -8288,19 +8399,17 @@ void SelectionDAG::updateDivergence(SDNode * N)
}
}
-
-void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
+void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode *> &Order) {
DenseMap<SDNode *, unsigned> Degree;
Order.reserve(AllNodes.size());
- for (auto & N : allnodes()) {
+ for (auto &N : allnodes()) {
unsigned NOps = N.getNumOperands();
Degree[&N] = NOps;
if (0 == NOps)
Order.push_back(&N);
}
- for (std::vector<SDNode *>::iterator I = Order.begin();
- I!=Order.end();++I) {
- SDNode * N = *I;
+ for (size_t I = 0; I != Order.size(); ++I) {
+ SDNode *N = Order[I];
for (auto U : N->uses()) {
unsigned &UnsortedOps = Degree[U];
if (0 == --UnsortedOps)
@@ -8310,9 +8419,8 @@ void SelectionDAG::CreateTopologicalOrder(std::vector<SDNode*>& Order) {
}
#ifndef NDEBUG
-void SelectionDAG::VerifyDAGDiverence()
-{
- std::vector<SDNode*> TopoOrder;
+void SelectionDAG::VerifyDAGDiverence() {
+ std::vector<SDNode *> TopoOrder;
CreateTopologicalOrder(TopoOrder);
const TargetLowering &TLI = getTargetLoweringInfo();
DenseMap<const SDNode *, bool> DivergenceMap;
@@ -8338,7 +8446,6 @@ void SelectionDAG::VerifyDAGDiverence()
}
#endif
-
/// ReplaceAllUsesOfValuesWith - Replace any uses of From with To, leaving
/// uses of other values produced by From.getNode() alone. The same value
/// may appear in both the From and To list. The Deleted vector is
@@ -8584,14 +8691,24 @@ SDValue llvm::peekThroughOneUseBitcasts(SDValue V) {
return V;
}
-bool llvm::isBitwiseNot(SDValue V) {
+SDValue llvm::peekThroughExtractSubvectors(SDValue V) {
+ while (V.getOpcode() == ISD::EXTRACT_SUBVECTOR)
+ V = V.getOperand(0);
+ return V;
+}
+
+bool llvm::isBitwiseNot(SDValue V, bool AllowUndefs) {
if (V.getOpcode() != ISD::XOR)
return false;
- ConstantSDNode *C = isConstOrConstSplat(peekThroughBitcasts(V.getOperand(1)));
- return C && C->isAllOnesValue();
+ V = peekThroughBitcasts(V.getOperand(1));
+ unsigned NumBits = V.getScalarValueSizeInBits();
+ ConstantSDNode *C =
+ isConstOrConstSplat(V, AllowUndefs, /*AllowTruncation*/ true);
+ return C && (C->getAPIntValue().countTrailingOnes() >= NumBits);
}
-ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs,
+ bool AllowTruncation) {
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
return CN;
@@ -8599,10 +8716,39 @@ ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, bool AllowUndefs) {
BitVector UndefElements;
ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
- // BuildVectors can truncate their operands. Ignore that case here.
- if (CN && (UndefElements.none() || AllowUndefs) &&
- CN->getValueType(0) == N.getValueType().getScalarType())
- return CN;
+ // BuildVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ if (CN && (UndefElements.none() || AllowUndefs)) {
+ EVT CVT = CN->getValueType(0);
+ EVT NSVT = N.getValueType().getScalarType();
+ assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+ if (AllowTruncation || (CVT == NSVT))
+ return CN;
+ }
+ }
+
+ return nullptr;
+}
+
+ConstantSDNode *llvm::isConstOrConstSplat(SDValue N, const APInt &DemandedElts,
+ bool AllowUndefs,
+ bool AllowTruncation) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantSDNode *CN = BV->getConstantSplatNode(DemandedElts, &UndefElements);
+
+ // BuildVectors can truncate their operands. Ignore that case here unless
+ // AllowTruncation is set.
+ if (CN && (UndefElements.none() || AllowUndefs)) {
+ EVT CVT = CN->getValueType(0);
+ EVT NSVT = N.getValueType().getScalarType();
+ assert(CVT.bitsGE(NSVT) && "Illegal build vector element extension");
+ if (AllowTruncation || (CVT == NSVT))
+ return CN;
+ }
}
return nullptr;
@@ -8622,9 +8768,26 @@ ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N, bool AllowUndefs) {
return nullptr;
}
-bool llvm::isNullOrNullSplat(SDValue N) {
+ConstantFPSDNode *llvm::isConstOrConstSplatFP(SDValue N,
+ const APInt &DemandedElts,
+ bool AllowUndefs) {
+ if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
+ return CN;
+
+ if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
+ BitVector UndefElements;
+ ConstantFPSDNode *CN =
+ BV->getConstantFPSplatNode(DemandedElts, &UndefElements);
+ if (CN && (UndefElements.none() || AllowUndefs))
+ return CN;
+ }
+
+ return nullptr;
+}
+
+bool llvm::isNullOrNullSplat(SDValue N, bool AllowUndefs) {
// TODO: may want to use peekThroughBitcast() here.
- ConstantSDNode *C = isConstOrConstSplat(N);
+ ConstantSDNode *C = isConstOrConstSplat(N, AllowUndefs);
return C && C->isNullValue();
}
@@ -8773,17 +8936,12 @@ bool SDNode::areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N) {
/// isOperand - Return true if this node is an operand of N.
bool SDValue::isOperandOf(const SDNode *N) const {
- for (const SDValue &Op : N->op_values())
- if (*this == Op)
- return true;
- return false;
+ return any_of(N->op_values(), [this](SDValue Op) { return *this == Op; });
}
bool SDNode::isOperandOf(const SDNode *N) const {
- for (const SDValue &Op : N->op_values())
- if (this == Op.getNode())
- return true;
- return false;
+ return any_of(N->op_values(),
+ [this](SDValue Op) { return this == Op.getNode(); });
}
/// reachesChainWithoutSideEffects - Return true if this operand (which must
@@ -8973,6 +9131,56 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
return getBuildVector(VecVT, dl, Scalars);
}
+std::pair<SDValue, SDValue> SelectionDAG::UnrollVectorOverflowOp(
+ SDNode *N, unsigned ResNE) {
+ unsigned Opcode = N->getOpcode();
+ assert((Opcode == ISD::UADDO || Opcode == ISD::SADDO ||
+ Opcode == ISD::USUBO || Opcode == ISD::SSUBO ||
+ Opcode == ISD::UMULO || Opcode == ISD::SMULO) &&
+ "Expected an overflow opcode");
+
+ EVT ResVT = N->getValueType(0);
+ EVT OvVT = N->getValueType(1);
+ EVT ResEltVT = ResVT.getVectorElementType();
+ EVT OvEltVT = OvVT.getVectorElementType();
+ SDLoc dl(N);
+
+ // If ResNE is 0, fully unroll the vector op.
+ unsigned NE = ResVT.getVectorNumElements();
+ if (ResNE == 0)
+ ResNE = NE;
+ else if (NE > ResNE)
+ NE = ResNE;
+
+ SmallVector<SDValue, 8> LHSScalars;
+ SmallVector<SDValue, 8> RHSScalars;
+ ExtractVectorElements(N->getOperand(0), LHSScalars, 0, NE);
+ ExtractVectorElements(N->getOperand(1), RHSScalars, 0, NE);
+
+ EVT SVT = TLI->getSetCCResultType(getDataLayout(), *getContext(), ResEltVT);
+ SDVTList VTs = getVTList(ResEltVT, SVT);
+ SmallVector<SDValue, 8> ResScalars;
+ SmallVector<SDValue, 8> OvScalars;
+ for (unsigned i = 0; i < NE; ++i) {
+ SDValue Res = getNode(Opcode, dl, VTs, LHSScalars[i], RHSScalars[i]);
+ SDValue Ov =
+ getSelect(dl, OvEltVT, Res.getValue(1),
+ getBoolConstant(true, dl, OvEltVT, ResVT),
+ getConstant(0, dl, OvEltVT));
+
+ ResScalars.push_back(Res);
+ OvScalars.push_back(Ov);
+ }
+
+ ResScalars.append(ResNE - NE, getUNDEF(ResEltVT));
+ OvScalars.append(ResNE - NE, getUNDEF(OvEltVT));
+
+ EVT NewResVT = EVT::getVectorVT(*getContext(), ResEltVT, ResNE);
+ EVT NewOvVT = EVT::getVectorVT(*getContext(), OvEltVT, ResNE);
+ return std::make_pair(getBuildVector(NewResVT, dl, ResScalars),
+ getBuildVector(NewOvVT, dl, OvScalars));
+}
+
bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
LoadSDNode *Base,
unsigned Bytes,
@@ -9014,7 +9222,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
// If this is a direct reference to a stack slot, use information about the
// stack slot's alignment.
- int FrameIdx = 1 << 31;
+ int FrameIdx = INT_MIN;
int64_t FrameOffset = 0;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
FrameIdx = FI->getIndex();
@@ -9025,7 +9233,7 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
FrameOffset = Ptr.getConstantOperandVal(1);
}
- if (FrameIdx != (1 << 31)) {
+ if (FrameIdx != INT_MIN) {
const MachineFrameInfo &MFI = getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
@@ -9065,6 +9273,15 @@ SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT,
return std::make_pair(Lo, Hi);
}
+/// Widen the vector up to the next power of two using INSERT_SUBVECTOR.
+SDValue SelectionDAG::WidenVector(const SDValue &N, const SDLoc &DL) {
+ EVT VT = N.getValueType();
+ EVT WideVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(),
+ NextPowerOf2(VT.getVectorNumElements()));
+ return getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, getUNDEF(WideVT), N,
+ getConstant(0, DL, TLI->getVectorIdxTy(getDataLayout())));
+}
+
void SelectionDAG::ExtractVectorElements(SDValue Op,
SmallVectorImpl<SDValue> &Args,
unsigned Start, unsigned Count) {
@@ -9158,13 +9375,20 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue, APInt &SplatUndef,
return true;
}
-SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+SDValue BuildVectorSDNode::getSplatValue(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
if (UndefElements) {
UndefElements->clear();
UndefElements->resize(getNumOperands());
}
+ assert(getNumOperands() == DemandedElts.getBitWidth() &&
+ "Unexpected vector size");
+ if (!DemandedElts)
+ return SDValue();
SDValue Splatted;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ if (!DemandedElts[i])
+ continue;
SDValue Op = getOperand(i);
if (Op.isUndef()) {
if (UndefElements)
@@ -9177,20 +9401,40 @@ SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
}
if (!Splatted) {
- assert(getOperand(0).isUndef() &&
+ unsigned FirstDemandedIdx = DemandedElts.countTrailingZeros();
+ assert(getOperand(FirstDemandedIdx).isUndef() &&
"Can only have a splat without a constant for all undefs.");
- return getOperand(0);
+ return getOperand(FirstDemandedIdx);
}
return Splatted;
}
+SDValue BuildVectorSDNode::getSplatValue(BitVector *UndefElements) const {
+ APInt DemandedElts = APInt::getAllOnesValue(getNumOperands());
+ return getSplatValue(DemandedElts, UndefElements);
+}
+
+ConstantSDNode *
+BuildVectorSDNode::getConstantSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantSDNode>(
+ getSplatValue(DemandedElts, UndefElements));
+}
+
ConstantSDNode *
BuildVectorSDNode::getConstantSplatNode(BitVector *UndefElements) const {
return dyn_cast_or_null<ConstantSDNode>(getSplatValue(UndefElements));
}
ConstantFPSDNode *
+BuildVectorSDNode::getConstantFPSplatNode(const APInt &DemandedElts,
+ BitVector *UndefElements) const {
+ return dyn_cast_or_null<ConstantFPSDNode>(
+ getSplatValue(DemandedElts, UndefElements));
+}
+
+ConstantFPSDNode *
BuildVectorSDNode::getConstantFPSplatNode(BitVector *UndefElements) const {
return dyn_cast_or_null<ConstantFPSDNode>(getSplatValue(UndefElements));
}
@@ -9228,7 +9472,10 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
for (i = 0, e = VT.getVectorNumElements(); i != e && Mask[i] < 0; ++i)
/* search */;
- assert(i != e && "VECTOR_SHUFFLE node with all undef indices!");
+ // If all elements are undefined, this shuffle can be considered a splat
+ // (although it should eventually get simplified away completely).
+ if (i == e)
+ return true;
// Make sure all remaining elements are either undef or the same as the first
// non-undef value.
@@ -9266,8 +9513,7 @@ SDNode *SelectionDAG::isConstantFPBuildVectorOrConstantFP(SDValue N) {
void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
assert(!Node->OperandList && "Node already has operands");
- assert(std::numeric_limits<decltype(SDNode::NumOperands)>::max() >=
- Vals.size() &&
+ assert(SDNode::getMaxNumOperands() >= Vals.size() &&
"too many operands to fit into SDNode");
SDUse *Ops = OperandRecycler.allocate(
ArrayRecycler<SDUse>::Capacity::get(Vals.size()), OperandAllocator);
@@ -9287,6 +9533,19 @@ void SelectionDAG::createOperands(SDNode *Node, ArrayRef<SDValue> Vals) {
checkForCycles(Node);
}
+SDValue SelectionDAG::getTokenFactor(const SDLoc &DL,
+ SmallVectorImpl<SDValue> &Vals) {
+ size_t Limit = SDNode::getMaxNumOperands();
+ while (Vals.size() > Limit) {
+ unsigned SliceIdx = Vals.size() - Limit;
+ auto ExtractedTFs = ArrayRef<SDValue>(Vals).slice(SliceIdx, Limit);
+ SDValue NewTF = getNode(ISD::TokenFactor, DL, MVT::Other, ExtractedTFs);
+ Vals.erase(Vals.begin() + SliceIdx, Vals.end());
+ Vals.emplace_back(NewTF);
+ }
+ return getNode(ISD::TokenFactor, DL, MVT::Other, Vals);
+}
+
#ifndef NDEBUG
static void checkForCyclesHelper(const SDNode *N,
SmallPtrSetImpl<const SDNode*> &Visited,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 488bac1a9a80..9592bc30a4e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -1,9 +1,8 @@
//==- llvm/CodeGen/SelectionDAGAddressAnalysis.cpp - DAG Address Analysis --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -25,8 +24,10 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
// Conservatively fail if we a match failed..
if (!Base.getNode() || !Other.Base.getNode())
return false;
+ if (!hasValidOffset() || !Other.hasValidOffset())
+ return false;
// Initial Offset difference.
- Off = Other.Offset - Offset;
+ Off = *Other.Offset - *Offset;
if ((Other.Index == Index) && (Other.IsIndexSignExt == IsIndexSignExt)) {
// Trivial match.
@@ -60,24 +61,110 @@ bool BaseIndexOffset::equalBaseIndex(const BaseIndexOffset &Other,
const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- // Match non-equal FrameIndexes - If both frame indices are fixed
- // we know their relative offsets and can compare them. Otherwise
- // we must be conservative.
+ // Match FrameIndexes.
if (auto *A = dyn_cast<FrameIndexSDNode>(Base))
- if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(Other.Base)) {
+ // Equal FrameIndexes - offsets are directly comparable.
+ if (A->getIndex() == B->getIndex())
+ return true;
+ // Non-equal FrameIndexes - If both frame indices are fixed
+ // we know their relative offsets and can compare them. Otherwise
+ // we must be conservative.
if (MFI.isFixedObjectIndex(A->getIndex()) &&
MFI.isFixedObjectIndex(B->getIndex())) {
Off += MFI.getObjectOffset(B->getIndex()) -
MFI.getObjectOffset(A->getIndex());
return true;
}
+ }
}
return false;
}
+bool BaseIndexOffset::computeAliasing(const SDNode *Op0,
+ const Optional<int64_t> NumBytes0,
+ const SDNode *Op1,
+ const Optional<int64_t> NumBytes1,
+ const SelectionDAG &DAG, bool &IsAlias) {
+
+ BaseIndexOffset BasePtr0 = match(Op0, DAG);
+ BaseIndexOffset BasePtr1 = match(Op1, DAG);
+
+ if (!(BasePtr0.getBase().getNode() && BasePtr1.getBase().getNode()))
+ return false;
+ int64_t PtrDiff;
+ if (NumBytes0.hasValue() && NumBytes1.hasValue() &&
+ BasePtr0.equalBaseIndex(BasePtr1, DAG, PtrDiff)) {
+ // BasePtr1 is PtrDiff away from BasePtr0. They alias if none of the
+ // following situations arise:
+ IsAlias = !(
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // ========PtrDiff========>
+ (*NumBytes0 <= PtrDiff) ||
+ // [----BasePtr0----]
+ // [---BasePtr1--]
+ // =====(-PtrDiff)====>
+ (PtrDiff + *NumBytes1 <= 0)); // i.e. *NumBytes1 < -PtrDiff.
+ return true;
+ }
+ // If both BasePtr0 and BasePtr1 are FrameIndexes, we will not be
+ // able to calculate their relative offset if at least one arises
+ // from an alloca. However, these allocas cannot overlap and we
+ // can infer there is no alias.
+ if (auto *A = dyn_cast<FrameIndexSDNode>(BasePtr0.getBase()))
+ if (auto *B = dyn_cast<FrameIndexSDNode>(BasePtr1.getBase())) {
+ MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
+ // If the base are the same frame index but the we couldn't find a
+ // constant offset, (indices are different) be conservative.
+ if (A != B && (!MFI.isFixedObjectIndex(A->getIndex()) ||
+ !MFI.isFixedObjectIndex(B->getIndex()))) {
+ IsAlias = false;
+ return true;
+ }
+ }
+
+ bool IsFI0 = isa<FrameIndexSDNode>(BasePtr0.getBase());
+ bool IsFI1 = isa<FrameIndexSDNode>(BasePtr1.getBase());
+ bool IsGV0 = isa<GlobalAddressSDNode>(BasePtr0.getBase());
+ bool IsGV1 = isa<GlobalAddressSDNode>(BasePtr1.getBase());
+ bool IsCV0 = isa<ConstantPoolSDNode>(BasePtr0.getBase());
+ bool IsCV1 = isa<ConstantPoolSDNode>(BasePtr1.getBase());
+
+ // If of mismatched base types or checkable indices we can check
+ // they do not alias.
+ if ((BasePtr0.getIndex() == BasePtr1.getIndex() || (IsFI0 != IsFI1) ||
+ (IsGV0 != IsGV1) || (IsCV0 != IsCV1)) &&
+ (IsFI0 || IsGV0 || IsCV0) && (IsFI1 || IsGV1 || IsCV1)) {
+ IsAlias = false;
+ return true;
+ }
+ return false; // Cannot determine whether the pointers alias.
+}
+
+bool BaseIndexOffset::contains(const SelectionDAG &DAG, int64_t BitSize,
+ const BaseIndexOffset &Other,
+ int64_t OtherBitSize, int64_t &BitOffset) const {
+ int64_t Offset;
+ if (!equalBaseIndex(Other, DAG, Offset))
+ return false;
+ if (Offset >= 0) {
+ // Other is after *this:
+ // [-------*this---------]
+ // [---Other--]
+ // ==Offset==>
+ BitOffset = 8 * Offset;
+ return BitOffset + OtherBitSize <= BitSize;
+ }
+ // Other starts strictly before *this, it cannot be fully contained.
+ // [-------*this---------]
+ // [--Other--]
+ return false;
+}
+
/// Parses tree in Ptr for base, index, offset addresses.
-BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N,
- const SelectionDAG &DAG) {
+static BaseIndexOffset matchLSNode(const LSBaseSDNode *N,
+ const SelectionDAG &DAG) {
SDValue Ptr = N->getBasePtr();
// (((B + I*M) + c)) + c ...
@@ -178,3 +265,33 @@ BaseIndexOffset BaseIndexOffset::match(const LSBaseSDNode *N,
}
return BaseIndexOffset(Base, Index, Offset, IsIndexSignExt);
}
+
+BaseIndexOffset BaseIndexOffset::match(const SDNode *N,
+ const SelectionDAG &DAG) {
+ if (const auto *LS0 = dyn_cast<LSBaseSDNode>(N))
+ return matchLSNode(LS0, DAG);
+ if (const auto *LN = dyn_cast<LifetimeSDNode>(N)) {
+ if (LN->hasOffset())
+ return BaseIndexOffset(LN->getOperand(1), SDValue(), LN->getOffset(),
+ false);
+ return BaseIndexOffset(LN->getOperand(1), SDValue(), false);
+ }
+ return BaseIndexOffset();
+}
+
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+
+LLVM_DUMP_METHOD void BaseIndexOffset::dump() const {
+ print(dbgs());
+}
+
+void BaseIndexOffset::print(raw_ostream& OS) const {
+ OS << "BaseIndexOffset base=[";
+ Base->print(OS);
+ OS << "] index=[";
+ if (Index)
+ Index->print(OS);
+ OS << "] offset=" << Offset;
+}
+
+#endif
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 871ab9b29881..e818dd27c05e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -55,6 +54,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/SelectionDAGTargetInfo.h"
#include "llvm/CodeGen/StackMaps.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
@@ -109,6 +109,7 @@
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/Local.h"
#include <algorithm>
#include <cassert>
#include <cstddef>
@@ -123,6 +124,7 @@
using namespace llvm;
using namespace PatternMatch;
+using namespace SwitchCG;
#define DEBUG_TYPE "isel"
@@ -215,8 +217,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
unsigned ValueBits = ValueVT.getSizeInBits();
// Assemble the power of 2 part.
- unsigned RoundParts = NumParts & (NumParts - 1) ?
- 1 << Log2_32(NumParts) : NumParts;
+ unsigned RoundParts =
+ (NumParts & (NumParts - 1)) ? 1 << Log2_32(NumParts) : NumParts;
unsigned RoundBits = PartBits * RoundParts;
EVT RoundVT = RoundBits == ValueBits ?
ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
@@ -322,7 +324,15 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
}
- llvm_unreachable("Unknown mismatch!");
+ // Handle MMX to a narrower integer type by bitcasting MMX to integer and
+ // then truncating.
+ if (PartEVT == MVT::x86mmx && ValueVT.isInteger() &&
+ ValueVT.bitsLT(PartEVT)) {
+ Val = DAG.getNode(ISD::BITCAST, DL, MVT::i64, Val);
+ return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
+ }
+
+ report_fatal_error("Unknown mismatch in getCopyFromParts!");
}
static void diagnosePossiblyInvalidConstraint(LLVMContext &Ctx, const Value *V,
@@ -573,7 +583,8 @@ static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
unsigned RoundBits = RoundParts * PartBits;
unsigned OddParts = NumParts - RoundParts;
SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
- DAG.getIntPtrConstant(RoundBits, DL));
+ DAG.getShiftAmountConstant(RoundBits, ValueVT, DL, /*LegalTypes*/false));
+
getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
CallConv);
@@ -1003,6 +1014,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis *aa,
DL = &DAG.getDataLayout();
Context = DAG.getContext();
LPadToCallSiteMap.clear();
+ SL->init(DAG.getTargetLoweringInfo(), TM, DAG.getDataLayout());
}
void SelectionDAGBuilder::clear() {
@@ -1032,19 +1044,7 @@ SDValue SelectionDAGBuilder::getRoot() {
}
// Otherwise, we have to make a token factor node.
- // If we have >= 2^16 loads then split across multiple token factors as
- // there's a 64k limit on the number of SDNode operands.
- SDValue Root;
- size_t Limit = (1 << 16) - 1;
- while (PendingLoads.size() > Limit) {
- unsigned SliceIdx = PendingLoads.size() - Limit;
- auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit);
- SDValue NewTF =
- DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs);
- PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end());
- PendingLoads.emplace_back(NewTF);
- }
- Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads);
+ SDValue Root = DAG.getTokenFactor(getCurSDLoc(), PendingLoads);
PendingLoads.clear();
DAG.setRoot(Root);
return Root;
@@ -1144,6 +1144,13 @@ void SelectionDAGBuilder::dropDanglingDebugInfo(const DILocalVariable *Variable,
for (auto &DDIMI : DanglingDebugInfoMap) {
DanglingDebugInfoVector &DDIV = DDIMI.second;
+
+ // If debug info is to be dropped, run it through final checks to see
+ // whether it can be salvaged.
+ for (auto &DDI : DDIV)
+ if (isMatchingDbgValue(DDI))
+ salvageUnresolvedDbgValue(DDI);
+
DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
}
}
@@ -1169,6 +1176,12 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
"Expected inlined-at fields to agree");
SDDbgValue *SDV;
if (Val.getNode()) {
+ // FIXME: I doubt that it is correct to resolve a dangling DbgValue as a
+ // FuncArgumentDbgValue (it would be hoisted to the function entry, and if
+ // we couldn't resolve it directly when examining the DbgValue intrinsic
+ // in the first place we should not be more successful here). Unless we
+ // have some test case that prove this to be correct we should avoid
+ // calling EmitFuncArgumentDbgValue here.
if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
<< DbgSDNodeOrder << "] for:\n " << *DI << "\n");
@@ -1186,12 +1199,173 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
} else
LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
<< "in EmitFuncArgumentDbgValue\n");
- } else
+ } else {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
+ auto Undef =
+ UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+ auto SDV =
+ DAG.getConstantDbgValue(Variable, Expr, Undef, dl, DbgSDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ }
}
DDIV.clear();
}
+void SelectionDAGBuilder::salvageUnresolvedDbgValue(DanglingDebugInfo &DDI) {
+ Value *V = DDI.getDI()->getValue();
+ DILocalVariable *Var = DDI.getDI()->getVariable();
+ DIExpression *Expr = DDI.getDI()->getExpression();
+ DebugLoc DL = DDI.getdl();
+ DebugLoc InstDL = DDI.getDI()->getDebugLoc();
+ unsigned SDOrder = DDI.getSDNodeOrder();
+
+ // Currently we consider only dbg.value intrinsics -- we tell the salvager
+ // that DW_OP_stack_value is desired.
+ assert(isa<DbgValueInst>(DDI.getDI()));
+ bool StackValue = true;
+
+ // Can this Value can be encoded without any further work?
+ if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder))
+ return;
+
+ // Attempt to salvage back through as many instructions as possible. Bail if
+ // a non-instruction is seen, such as a constant expression or global
+ // variable. FIXME: Further work could recover those too.
+ while (isa<Instruction>(V)) {
+ Instruction &VAsInst = *cast<Instruction>(V);
+ DIExpression *NewExpr = salvageDebugInfoImpl(VAsInst, Expr, StackValue);
+
+ // If we cannot salvage any further, and haven't yet found a suitable debug
+ // expression, bail out.
+ if (!NewExpr)
+ break;
+
+ // New value and expr now represent this debuginfo.
+ V = VAsInst.getOperand(0);
+ Expr = NewExpr;
+
+ // Some kind of simplification occurred: check whether the operand of the
+ // salvaged debug expression can be encoded in this DAG.
+ if (handleDebugValue(V, Var, Expr, DL, InstDL, SDOrder)) {
+ LLVM_DEBUG(dbgs() << "Salvaged debug location info for:\n "
+ << DDI.getDI() << "\nBy stripping back to:\n " << V);
+ return;
+ }
+ }
+
+ // This was the final opportunity to salvage this debug information, and it
+ // couldn't be done. Place an undef DBG_VALUE at this location to terminate
+ // any earlier variable location.
+ auto Undef = UndefValue::get(DDI.getDI()->getVariableLocation()->getType());
+ auto SDV = DAG.getConstantDbgValue(Var, Expr, Undef, DL, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+
+ LLVM_DEBUG(dbgs() << "Dropping debug value info for:\n " << DDI.getDI()
+ << "\n");
+ LLVM_DEBUG(dbgs() << " Last seen at:\n " << *DDI.getDI()->getOperand(0)
+ << "\n");
+}
+
+bool SelectionDAGBuilder::handleDebugValue(const Value *V, DILocalVariable *Var,
+ DIExpression *Expr, DebugLoc dl,
+ DebugLoc InstDL, unsigned Order) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDDbgValue *SDV;
+ if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
+ isa<ConstantPointerNull>(V)) {
+ SDV = DAG.getConstantDbgValue(Var, Expr, V, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ return true;
+ }
+
+ // If the Value is a frame index, we can create a FrameIndex debug value
+ // without relying on the DAG at all.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
+ auto SI = FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ auto SDV =
+ DAG.getFrameIndexDbgValue(Var, Expr, SI->second,
+ /*IsIndirect*/ false, dl, SDNodeOrder);
+ // Do not attach the SDNodeDbgValue to an SDNode: this variable location
+ // is still available even if the SDNode gets optimized out.
+ DAG.AddDbgValue(SDV, nullptr, false);
+ return true;
+ }
+ }
+
+ // Do not use getValue() in here; we don't want to generate code at
+ // this point if it hasn't been done yet.
+ SDValue N = NodeMap[V];
+ if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
+ N = UnusedArgNodeMap[V];
+ if (N.getNode()) {
+ if (EmitFuncArgumentDbgValue(V, Var, Expr, dl, false, N))
+ return true;
+ SDV = getDbgValue(N, Var, Expr, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, N.getNode(), false);
+ return true;
+ }
+
+ // Special rules apply for the first dbg.values of parameter variables in a
+ // function. Identify them by the fact they reference Argument Values, that
+ // they're parameters, and they are parameters of the current function. We
+ // need to let them dangle until they get an SDNode.
+ bool IsParamOfFunc = isa<Argument>(V) && Var->isParameter() &&
+ !InstDL.getInlinedAt();
+ if (!IsParamOfFunc) {
+ // The value is not used in this block yet (or it would have an SDNode).
+ // We still want the value to appear for the user if possible -- if it has
+ // an associated VReg, we can refer to that instead.
+ auto VMI = FuncInfo.ValueMap.find(V);
+ if (VMI != FuncInfo.ValueMap.end()) {
+ unsigned Reg = VMI->second;
+ // If this is a PHI node, it may be split up into several MI PHI nodes
+ // (in FunctionLoweringInfo::set).
+ RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
+ V->getType(), None);
+ if (RFV.occupiesMultipleRegs()) {
+ unsigned Offset = 0;
+ unsigned BitsToDescribe = 0;
+ if (auto VarSize = Var->getSizeInBits())
+ BitsToDescribe = *VarSize;
+ if (auto Fragment = Expr->getFragmentInfo())
+ BitsToDescribe = Fragment->SizeInBits;
+ for (auto RegAndSize : RFV.getRegsAndSizes()) {
+ unsigned RegisterSize = RegAndSize.second;
+ // Bail out if all bits are described already.
+ if (Offset >= BitsToDescribe)
+ break;
+ unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
+ ? BitsToDescribe - Offset
+ : RegisterSize;
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, FragmentSize);
+ if (!FragmentExpr)
+ continue;
+ SDV = DAG.getVRegDbgValue(Var, *FragmentExpr, RegAndSize.first,
+ false, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ Offset += RegisterSize;
+ }
+ } else {
+ SDV = DAG.getVRegDbgValue(Var, Expr, Reg, false, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, nullptr, false);
+ }
+ return true;
+ }
+ }
+
+ return false;
+}
+
+void SelectionDAGBuilder::resolveOrClearDbgInfo() {
+ // Try to fixup any remaining dangling debug info -- and drop it if we can't.
+ for (auto &Pair : DanglingDebugInfoMap)
+ for (auto &DDI : Pair.second)
+ salvageUnresolvedDbgValue(DDI);
+ clearDanglingDebugInfo();
+}
+
/// getCopyFromRegs - If there was virtual register allocated for the value V
/// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
SDValue SelectionDAGBuilder::getCopyFromRegs(const Value *V, Type *Ty) {
@@ -1469,6 +1643,36 @@ void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
}
}
+// For wasm, there's alwyas a single catch pad attached to a catchswitch, and
+// the control flow always stops at the single catch pad, as it does for a
+// cleanup pad. In case the exception caught is not of the types the catch pad
+// catches, it will be rethrown by a rethrow.
+static void findWasmUnwindDestinations(
+ FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
+ BranchProbability Prob,
+ SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
+ &UnwindDests) {
+ while (EHPadBB) {
+ const Instruction *Pad = EHPadBB->getFirstNonPHI();
+ if (isa<CleanupPadInst>(Pad)) {
+ // Stop on cleanup pads.
+ UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ break;
+ } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
+ // Add the catchpad handlers to the possible destinations. We don't
+ // continue to the unwind destination of the catchswitch for wasm.
+ for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
+ UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
+ UnwindDests.back().first->setIsEHScopeEntry();
+ }
+ break;
+ } else {
+ continue;
+ }
+ }
+}
+
/// When an invoke or a cleanupret unwinds to the next EH pad, there are
/// many places it could ultimately go. In the IR, we have a single unwind
/// destination, but in the machine CFG, we enumerate all the possible blocks.
@@ -1489,6 +1693,13 @@ static void findUnwindDestinations(
bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
bool IsSEH = isAsynchronousEHPersonality(Personality);
+ if (IsWasmCXX) {
+ findWasmUnwindDestinations(FuncInfo, EHPadBB, Prob, UnwindDests);
+ assert(UnwindDests.size() <= 1 &&
+ "There should be at most one unwind destination for wasm");
+ return;
+ }
+
while (EHPadBB) {
const Instruction *Pad = EHPadBB->getFirstNonPHI();
BasicBlock *NewEHPadBB = nullptr;
@@ -1501,8 +1712,7 @@ static void findUnwindDestinations(
// personalities.
UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
UnwindDests.back().first->setIsEHScopeEntry();
- if (!IsWasmCXX)
- UnwindDests.back().first->setIsEHFuncletEntry();
+ UnwindDests.back().first->setIsEHFuncletEntry();
break;
} else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
// Add the catchpad handlers to the possible destinations.
@@ -1588,9 +1798,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
DemoteReg, PtrValueVTs[0]);
SDValue RetOp = getValue(I.getOperand(0));
- SmallVector<EVT, 4> ValueVTs;
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &MemVTs,
+ &Offsets);
unsigned NumValues = ValueVTs.size();
SmallVector<SDValue, 4> Chains(NumValues);
@@ -1598,8 +1809,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// An aggregate return value cannot wrap around the address space, so
// offsets to its parts don't wrap either.
SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
- Chains[i] = DAG.getStore(
- Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+
+ SDValue Val = RetOp.getValue(i);
+ if (MemVTs[i] != ValueVTs[i])
+ Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
+ Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
// FIXME: better loc info would be nice.
Ptr, MachinePointerInfo::getUnknownStack(DAG.getMachineFunction()));
}
@@ -1615,6 +1829,10 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const Function *F = I.getParent()->getParent();
+ bool NeedsRegBlock = TLI.functionArgumentNeedsConsecutiveRegisters(
+ I.getOperand(0)->getType(), F->getCallingConv(),
+ /*IsVarArg*/ false);
+
ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
Attribute::SExt))
@@ -1647,6 +1865,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
if (RetInReg)
Flags.setInReg();
+ if (I.getOperand(0)->getType()->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(I.getOperand(0)->getType())->getAddressSpace());
+ }
+
+ if (NeedsRegBlock) {
+ Flags.setInConsecutiveRegs();
+ if (j == NumValues - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
+
// Propagate extension type if any
if (ExtendKind == ISD::SIGN_EXTEND)
Flags.setSExt();
@@ -1668,7 +1898,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
const Function *F = I.getParent()->getParent();
if (TLI.supportSwiftError() &&
F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
- assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
+ assert(SwiftError.getFunctionArg() && "Need a swift error argument");
ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
Flags.setSwiftError();
Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
@@ -1677,8 +1907,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
0 /*partOffs*/));
// Create SDNode for the swifterror virtual register.
OutVals.push_back(
- DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
- &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
+ DAG.getRegister(SwiftError.getOrCreateVRegUseAt(
+ &I, FuncInfo.MBB, SwiftError.getFunctionArg()),
EVT(TLI.getPointerTy(DL))));
}
@@ -1825,7 +2055,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
- SwitchCases.push_back(CB);
+ SL->SwitchCases.push_back(CB);
return;
}
}
@@ -1834,7 +2064,7 @@ SelectionDAGBuilder::EmitBranchForMergedCondition(const Value *Cond,
ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
- SwitchCases.push_back(CB);
+ SL->SwitchCases.push_back(CB);
}
void SelectionDAGBuilder::FindMergedConditions(const Value *Cond,
@@ -2043,27 +2273,27 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If the compares in later blocks need to use values not currently
// exported from this block, export them now. This block should always
// be the first entry.
- assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
+ assert(SL->SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
// Allow some cases to be rejected.
- if (ShouldEmitAsBranches(SwitchCases)) {
- for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
- ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
- ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
+ if (ShouldEmitAsBranches(SL->SwitchCases)) {
+ for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i) {
+ ExportFromCurrentBlock(SL->SwitchCases[i].CmpLHS);
+ ExportFromCurrentBlock(SL->SwitchCases[i].CmpRHS);
}
// Emit the branch for this block.
- visitSwitchCase(SwitchCases[0], BrMBB);
- SwitchCases.erase(SwitchCases.begin());
+ visitSwitchCase(SL->SwitchCases[0], BrMBB);
+ SL->SwitchCases.erase(SL->SwitchCases.begin());
return;
}
// Okay, we decided not to do this, remove any inserted MBB's and clear
// SwitchCases.
- for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
- FuncInfo.MF->erase(SwitchCases[i].ThisBB);
+ for (unsigned i = 1, e = SL->SwitchCases.size(); i != e; ++i)
+ FuncInfo.MF->erase(SL->SwitchCases[i].ThisBB);
- SwitchCases.clear();
+ SL->SwitchCases.clear();
}
}
@@ -2084,6 +2314,20 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
SDValue CondLHS = getValue(CB.CmpLHS);
SDLoc dl = CB.DL;
+ if (CB.CC == ISD::SETTRUE) {
+ // Branch or fall through to TrueBB.
+ addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
+ SwitchBB->normalizeSuccProbs();
+ if (CB.TrueBB != NextBlock(SwitchBB)) {
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(CB.TrueBB)));
+ }
+ return;
+ }
+
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), CB.CmpLHS->getType());
+
// Build the setcc now.
if (!CB.CmpMHS) {
// Fold "(X == true)" to X and "(X == false)" to !X to
@@ -2095,8 +2339,18 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
CB.CC == ISD::SETEQ) {
SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
- } else
- Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
+ } else {
+ SDValue CondRHS = getValue(CB.CmpRHS);
+
+ // If a pointer's DAG type is larger than its memory type then the DAG
+ // values are zero-extended. This breaks signed comparisons so truncate
+ // back to the underlying type before doing the compare.
+ if (CondLHS.getValueType() != MemVT) {
+ CondLHS = DAG.getPtrExtOrTrunc(CondLHS, getCurSDLoc(), MemVT);
+ CondRHS = DAG.getPtrExtOrTrunc(CondRHS, getCurSDLoc(), MemVT);
+ }
+ Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, CondRHS, CB.CC);
+ }
} else {
assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
@@ -2147,7 +2401,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
}
/// visitJumpTable - Emit JumpTable node in the current MBB
-void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
+void SelectionDAGBuilder::visitJumpTable(SwitchCG::JumpTable &JT) {
// Emit the code for the jump table
assert(JT.Reg != -1U && "Should lower JT Header first!");
EVT PTy = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
@@ -2162,14 +2416,12 @@ void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) {
/// visitJumpTableHeader - This function emits necessary code to produce index
/// in the JumpTable from switch case.
-void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
+void SelectionDAGBuilder::visitJumpTableHeader(SwitchCG::JumpTable &JT,
JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
- // Subtract the lowest switch case value from the value being switched on and
- // conditional branch to default mbb if the result is greater than the
- // difference between smallest and largest cases.
+ // Subtract the lowest switch case value from the value being switched on.
SDValue SwitchOp = getValue(JTH.SValue);
EVT VT = SwitchOp.getValueType();
SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
@@ -2189,24 +2441,33 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT,
JumpTableReg, SwitchOp);
JT.Reg = JumpTableReg;
- // Emit the range check for the jump table, and branch to the default block
- // for the switch statement if the value being switched on exceeds the largest
- // case in the switch.
- SDValue CMP = DAG.getSetCC(
- dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
-
- SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
- MVT::Other, CopyTo, CMP,
- DAG.getBasicBlock(JT.Default));
-
- // Avoid emitting unnecessary branches to the next block.
- if (JT.MBB != NextBlock(SwitchBB))
- BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
- DAG.getBasicBlock(JT.MBB));
-
- DAG.setRoot(BrCond);
+ if (!JTH.OmitRangeCheck) {
+ // Emit the range check for the jump table, and branch to the default block
+ // for the switch statement if the value being switched on exceeds the
+ // largest case in the switch.
+ SDValue CMP = DAG.getSetCC(
+ dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ Sub.getValueType()),
+ Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
+
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
+ MVT::Other, CopyTo, CMP,
+ DAG.getBasicBlock(JT.Default));
+
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(JT.MBB));
+
+ DAG.setRoot(BrCond);
+ } else {
+ // Avoid emitting unnecessary branches to the next block.
+ if (JT.MBB != NextBlock(SwitchBB))
+ DAG.setRoot(DAG.getNode(ISD::BR, dl, MVT::Other, CopyTo,
+ DAG.getBasicBlock(JT.MBB)));
+ else
+ DAG.setRoot(CopyTo);
+ }
}
/// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
@@ -2215,6 +2476,7 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
SDValue &Chain) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
MachineFunction &MF = DAG.getMachineFunction();
Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
MachineSDNode *Node =
@@ -2227,6 +2489,8 @@ static SDValue getLoadStackGuard(SelectionDAG &DAG, const SDLoc &DL,
MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
DAG.setNodeMemRefs(Node, {MemRef});
}
+ if (PtrTy != PtrMemTy)
+ return DAG.getPtrExtOrTrunc(SDValue(Node, 0), DL, PtrMemTy);
return SDValue(Node, 0);
}
@@ -2242,6 +2506,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
// First create the loads to the guard/stack slot for the comparison.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
+ EVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout());
MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
int FI = MFI.getStackProtectorIndex();
@@ -2254,7 +2519,7 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
// Generate code to load the content of the guard slot.
SDValue GuardVal = DAG.getLoad(
- PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
+ PtrMemTy, dl, DAG.getEntryNode(), StackSlotPtr,
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), Align,
MachineMemOperand::MOVolatile);
@@ -2262,27 +2527,26 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
// Retrieve guard check function, nullptr if instrumentation is inlined.
- if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
+ if (const Function *GuardCheckFn = TLI.getSSPStackGuardCheck(M)) {
// The target provides a guard check function to validate the guard value.
// Generate a call to that function with the content of the guard slot as
// argument.
- auto *Fn = cast<Function>(GuardCheck);
- FunctionType *FnTy = Fn->getFunctionType();
+ FunctionType *FnTy = GuardCheckFn->getFunctionType();
assert(FnTy->getNumParams() == 1 && "Invalid function signature");
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = GuardVal;
Entry.Ty = FnTy->getParamType(0);
- if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
+ if (GuardCheckFn->hasAttribute(1, Attribute::AttrKind::InReg))
Entry.IsInReg = true;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(getCurSDLoc())
- .setChain(DAG.getEntryNode())
- .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
- getValue(GuardCheck), std::move(Args));
+ .setChain(DAG.getEntryNode())
+ .setCallee(GuardCheckFn->getCallingConv(), FnTy->getReturnType(),
+ getValue(GuardCheckFn), std::move(Args));
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
@@ -2298,9 +2562,9 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
const Value *IRGuard = TLI.getSDagStackGuard(M);
SDValue GuardPtr = getValue(IRGuard);
- Guard =
- DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
- Align, MachineMemOperand::MOVolatile);
+ Guard = DAG.getLoad(PtrMemTy, dl, Chain, GuardPtr,
+ MachinePointerInfo(IRGuard, 0), Align,
+ MachineMemOperand::MOVolatile);
}
// Perform the comparison via a subtract/getsetcc.
@@ -2339,6 +2603,12 @@ SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
None, false, getCurSDLoc(), false, false).second;
+ // On PS4, the "return address" must still be within the calling function,
+ // even if it's at the very end, so emit an explicit TRAP here.
+ // Passing 'true' for doesNotReturn above won't generate the trap for us.
+ if (TM.getTargetTriple().isPS4CPU())
+ Chain = DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, Chain);
+
DAG.setRoot(Chain);
}
@@ -2493,6 +2763,20 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
break;
+ case Intrinsic::wasm_rethrow_in_catch: {
+ // This is usually done in visitTargetIntrinsic, but this intrinsic is
+ // special because it can be invoked, so we manually lower it to a DAG
+ // node here.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(getRoot()); // inchain
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ Ops.push_back(
+ DAG.getTargetConstant(Intrinsic::wasm_rethrow_in_catch, getCurSDLoc(),
+ TLI.getPointerTy(DAG.getDataLayout())));
+ SDVTList VTs = DAG.getVTList(ArrayRef<EVT>({MVT::Other})); // outchain
+ DAG.setRoot(DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops));
+ break;
+ }
}
} else if (I.countOperandBundlesOfType(LLVMContext::OB_deopt)) {
// Currently we do not lower any intrinsic calls with deopt operand bundles.
@@ -2528,6 +2812,35 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
InvokeMBB->normalizeSuccProbs();
// Drop into normal successor.
+ DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(),
+ DAG.getBasicBlock(Return)));
+}
+
+void SelectionDAGBuilder::visitCallBr(const CallBrInst &I) {
+ MachineBasicBlock *CallBrMBB = FuncInfo.MBB;
+
+ // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
+ // have to do anything here to lower funclet bundles.
+ assert(!I.hasOperandBundlesOtherThan(
+ {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
+ "Cannot lower callbrs with arbitrary operand bundles yet!");
+
+ assert(isa<InlineAsm>(I.getCalledValue()) &&
+ "Only know how to handle inlineasm callbr");
+ visitInlineAsm(&I);
+
+ // Retrieve successors.
+ MachineBasicBlock *Return = FuncInfo.MBBMap[I.getDefaultDest()];
+
+ // Update successor info.
+ addSuccessorWithProb(CallBrMBB, Return);
+ for (unsigned i = 0, e = I.getNumIndirectDests(); i < e; ++i) {
+ MachineBasicBlock *Target = FuncInfo.MBBMap[I.getIndirectDest(i)];
+ addSuccessorWithProb(CallBrMBB, Target);
+ }
+ CallBrMBB->normalizeSuccProbs();
+
+ // Drop into default successor.
DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
MVT::Other, getControlRoot(),
DAG.getBasicBlock(Return)));
@@ -2585,49 +2898,17 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
setValue(&LP, Res);
}
-void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
-#ifndef NDEBUG
- for (const CaseCluster &CC : Clusters)
- assert(CC.Low == CC.High && "Input clusters must be single-case");
-#endif
-
- llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
- return a.Low->getValue().slt(b.Low->getValue());
- });
-
- // Merge adjacent clusters with the same destination.
- const unsigned N = Clusters.size();
- unsigned DstIndex = 0;
- for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
- CaseCluster &CC = Clusters[SrcIndex];
- const ConstantInt *CaseVal = CC.Low;
- MachineBasicBlock *Succ = CC.MBB;
-
- if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
- (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
- // If this case has the same successor and is a neighbour, merge it into
- // the previous cluster.
- Clusters[DstIndex - 1].High = CaseVal;
- Clusters[DstIndex - 1].Prob += CC.Prob;
- } else {
- std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
- sizeof(Clusters[SrcIndex]));
- }
- }
- Clusters.resize(DstIndex);
-}
-
void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
MachineBasicBlock *Last) {
// Update JTCases.
- for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
- if (JTCases[i].first.HeaderBB == First)
- JTCases[i].first.HeaderBB = Last;
+ for (unsigned i = 0, e = SL->JTCases.size(); i != e; ++i)
+ if (SL->JTCases[i].first.HeaderBB == First)
+ SL->JTCases[i].first.HeaderBB = Last;
// Update BitTestCases.
- for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
- if (BitTestCases[i].Parent == First)
- BitTestCases[i].Parent = Last;
+ for (unsigned i = 0, e = SL->BitTestCases.size(); i != e; ++i)
+ if (SL->BitTestCases[i].Parent == First)
+ SL->BitTestCases[i].Parent = Last;
}
void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
@@ -2916,6 +3197,18 @@ void SelectionDAGBuilder::visitICmp(const User &I) {
SDValue Op2 = getValue(I.getOperand(1));
ISD::CondCode Opcode = getICmpCondCode(predicate);
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT MemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+
+ // If a pointer's DAG type is larger than its memory type then the DAG values
+ // are zero-extended. This breaks signed comparisons so truncate back to the
+ // underlying type before doing the compare.
+ if (Op1.getValueType() != MemVT) {
+ Op1 = DAG.getPtrExtOrTrunc(Op1, getCurSDLoc(), MemVT);
+ Op2 = DAG.getPtrExtOrTrunc(Op2, getCurSDLoc(), MemVT);
+ }
+
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
@@ -2963,6 +3256,8 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
ISD::NodeType OpCode = Cond.getValueType().isVector() ?
ISD::VSELECT : ISD::SELECT;
+ bool IsUnaryAbs = false;
+
// Min/max matching is only viable if all output VTs are the same.
if (is_splat(ValueVTs)) {
EVT VT = ValueVTs[0];
@@ -3023,10 +3318,16 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
break;
}
break;
+ case SPF_ABS:
+ IsUnaryAbs = true;
+ Opc = ISD::ABS;
+ break;
+ case SPF_NABS:
+ // TODO: we need to produce sub(0, abs(X)).
default: break;
}
- if (Opc != ISD::DELETED_NODE &&
+ if (!IsUnaryAbs && Opc != ISD::DELETED_NODE &&
(TLI.isOperationLegalOrCustom(Opc, VT) ||
(UseScalarMinMax &&
TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
@@ -3039,15 +3340,30 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
RHSVal = getValue(RHS);
BaseOps = {};
}
+
+ if (IsUnaryAbs) {
+ OpCode = Opc;
+ LHSVal = getValue(LHS);
+ BaseOps = {};
+ }
}
- for (unsigned i = 0; i != NumValues; ++i) {
- SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
- Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
- Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
- Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
- LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
- Ops);
+ if (IsUnaryAbs) {
+ for (unsigned i = 0; i != NumValues; ++i) {
+ Values[i] =
+ DAG.getNode(OpCode, getCurSDLoc(),
+ LHSVal.getNode()->getValueType(LHSVal.getResNo() + i),
+ SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ }
+ } else {
+ for (unsigned i = 0; i != NumValues; ++i) {
+ SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
+ Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
+ Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
+ Values[i] = DAG.getNode(
+ OpCode, getCurSDLoc(),
+ LHSVal.getNode()->getValueType(LHSVal.getResNo() + i), Ops);
+ }
}
setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
@@ -3135,18 +3451,26 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
+ auto &TLI = DAG.getTargetLoweringInfo();
EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
I.getType());
- setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
+ EVT PtrMemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getOperand(0)->getType());
+ N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT);
+ setValue(&I, N);
}
void SelectionDAGBuilder::visitIntToPtr(const User &I) {
// What to do depends on the size of the integer and the size of the pointer.
// We can either truncate, zero extend, or no-op, accordingly.
SDValue N = getValue(I.getOperand(0));
- EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
- I.getType());
- setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
+ auto &TLI = DAG.getTargetLoweringInfo();
+ EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT PtrMemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
+ N = DAG.getZExtOrTrunc(N, getCurSDLoc(), PtrMemVT);
+ N = DAG.getPtrExtOrTrunc(N, getCurSDLoc(), DestVT);
+ setValue(&I, N);
}
void SelectionDAGBuilder::visitBitCast(const User &I) {
@@ -3284,12 +3608,8 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
MOps1[0] = Src1;
MOps2[0] = Src2;
- Src1 = Src1.isUndef()
- ? DAG.getUNDEF(PaddedVT)
- : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
- Src2 = Src2.isUndef()
- ? DAG.getUNDEF(PaddedVT)
- : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
+ Src1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
+ Src2 = DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
// Readjust mask for new input vector length.
SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
@@ -3498,6 +3818,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
SDValue N = getValue(Op0);
SDLoc dl = getCurSDLoc();
+ auto &TLI = DAG.getTargetLoweringInfo();
+ MVT PtrTy = TLI.getPointerTy(DAG.getDataLayout(), AS);
+ MVT PtrMemTy = TLI.getPointerMemTy(DAG.getDataLayout(), AS);
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
@@ -3555,6 +3878,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
Flags.setNoUnsignedWrap(true);
+ OffsVal = DAG.getSExtOrTrunc(OffsVal, dl, N.getValueType());
+
N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
continue;
}
@@ -3580,7 +3905,8 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
N.getValueType(), IdxN,
DAG.getConstant(Amt, dl, IdxN.getValueType()));
} else {
- SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
+ SDValue Scale = DAG.getConstant(ElementSize.getZExtValue(), dl,
+ IdxN.getValueType());
IdxN = DAG.getNode(ISD::MUL, dl,
N.getValueType(), IdxN, Scale);
}
@@ -3591,6 +3917,9 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
}
}
+ if (PtrMemTy != PtrTy && !cast<GEPOperator>(I).isInBounds())
+ N = DAG.getPtrExtendInReg(N, dl, PtrMemTy);
+
setValue(&I, N);
}
@@ -3675,16 +4004,17 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
- bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
+ bool isDereferenceable =
+ isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
- SmallVector<EVT, 4> ValueVTs;
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
- ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
+ ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -3750,12 +4080,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
MMOFlags |= MachineMemOperand::MODereferenceable;
MMOFlags |= TLI.getMMOFlags(I);
- SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
+ SDValue L = DAG.getLoad(MemVTs[i], dl, Root, A,
MachinePointerInfo(SV, Offsets[i]), Alignment,
MMOFlags, AAInfo, Ranges);
+ Chains[ChainI] = L.getValue(1);
+
+ if (MemVTs[i] != ValueVTs[i])
+ L = DAG.getZExtOrTrunc(L, dl, ValueVTs[i]);
Values[i] = L;
- Chains[ChainI] = L.getValue(1);
}
if (!ConstantMemory) {
@@ -3785,15 +4118,13 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SDValue Src = getValue(SrcV);
// Create a virtual register, then update the virtual register.
- unsigned VReg; bool CreatedVReg;
- std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
+ unsigned VReg =
+ SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
// Chain can be getRoot or getControlRoot.
SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
SDValue(Src.getNode(), Src.getResNo()));
DAG.setRoot(CopyNode);
- if (CreatedVReg)
- FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
}
void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
@@ -3826,8 +4157,7 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
// Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
SDValue L = DAG.getCopyFromReg(
getRoot(), getCurSDLoc(),
- FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
- ValueVTs[0]);
+ SwiftError.getOrCreateVRegUseAt(&I, FuncInfo.MBB, SV), ValueVTs[0]);
setValue(&I, L);
}
@@ -3854,10 +4184,10 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
}
}
- SmallVector<EVT, 4> ValueVTs;
+ SmallVector<EVT, 4> ValueVTs, MemVTs;
SmallVector<uint64_t, 4> Offsets;
ComputeValueVTs(DAG.getTargetLoweringInfo(), DAG.getDataLayout(),
- SrcV->getType(), ValueVTs, &Offsets);
+ SrcV->getType(), ValueVTs, &MemVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
if (NumValues == 0)
return;
@@ -3899,9 +4229,12 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
}
SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
- SDValue St = DAG.getStore(
- Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
- MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
+ SDValue Val = SDValue(Src.getNode(), Src.getResNo() + i);
+ if (MemVTs[i] != ValueVTs[i])
+ Val = DAG.getPtrExtOrTrunc(Val, dl, MemVTs[i]);
+ SDValue St =
+ DAG.getStore(Root, dl, Val, Add, MachinePointerInfo(PtrV, Offsets[i]),
+ Alignment, MMOFlags, AAInfo);
Chains[ChainI] = St;
}
@@ -4181,19 +4514,34 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
SDLoc dl = getCurSDLoc();
- AtomicOrdering SuccessOrder = I.getSuccessOrdering();
- AtomicOrdering FailureOrder = I.getFailureOrdering();
+ AtomicOrdering SuccessOrdering = I.getSuccessOrdering();
+ AtomicOrdering FailureOrdering = I.getFailureOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
- SDValue L = DAG.getAtomicCmpSwap(
- ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
- getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
- getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()),
- /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
+
+ auto Alignment = DAG.getEVTAlignment(MemVT);
+
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
+ Flags, MemVT.getStoreSize(), Alignment,
+ AAMDNodes(), nullptr, SSID, SuccessOrdering,
+ FailureOrdering);
+
+ SDValue L = DAG.getAtomicCmpSwap(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS,
+ dl, MemVT, VTs, InChain,
+ getValue(I.getPointerOperand()),
+ getValue(I.getCompareOperand()),
+ getValue(I.getNewValOperand()), MMO);
SDValue OutChain = L.getValue(2);
@@ -4217,20 +4565,32 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
+ case AtomicRMWInst::FAdd: NT = ISD::ATOMIC_LOAD_FADD; break;
+ case AtomicRMWInst::FSub: NT = ISD::ATOMIC_LOAD_FSUB; break;
}
- AtomicOrdering Order = I.getOrdering();
+ AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
+ auto MemVT = getValue(I.getValOperand()).getSimpleValueType();
+ auto Alignment = DAG.getEVTAlignment(MemVT);
+
+ auto Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= DAG.getTargetLoweringInfo().getMMOFlags(I);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
+ MemVT.getStoreSize(), Alignment, AAMDNodes(),
+ nullptr, SSID, Ordering);
+
SDValue L =
- DAG.getAtomic(NT, dl,
- getValue(I.getValOperand()).getSimpleValueType(),
- InChain,
- getValue(I.getPointerOperand()),
- getValue(I.getValOperand()),
- I.getPointerOperand(),
- /* Alignment=*/ 0, Order, SSID);
+ DAG.getAtomic(NT, dl, MemVT, InChain,
+ getValue(I.getPointerOperand()), getValue(I.getValOperand()),
+ MMO);
SDValue OutChain = L.getValue(1);
@@ -4259,27 +4619,39 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ EVT MemVT = TLI.getMemValueType(DAG.getDataLayout(), I.getType());
if (!TLI.supportsUnalignedAtomics() &&
- I.getAlignment() < VT.getStoreSize())
+ I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic load");
+ auto Flags = MachineMemOperand::MOLoad;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ Flags |= MachineMemOperand::MOInvariant;
+ if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
+ DAG.getDataLayout()))
+ Flags |= MachineMemOperand::MODereferenceable;
+
+ Flags |= TLI.getMMOFlags(I);
+
MachineMemOperand *MMO =
DAG.getMachineFunction().
getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
- MachineMemOperand::MOVolatile |
- MachineMemOperand::MOLoad,
- VT.getStoreSize(),
+ Flags, MemVT.getStoreSize(),
I.getAlignment() ? I.getAlignment() :
- DAG.getEVTAlignment(VT),
+ DAG.getEVTAlignment(MemVT),
AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
SDValue L =
- DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
+ DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
getValue(I.getPointerOperand()), MMO);
SDValue OutChain = L.getValue(1);
+ if (MemVT != VT)
+ L = DAG.getPtrExtOrTrunc(L, dl, VT);
setValue(&I, L);
DAG.setRoot(OutChain);
@@ -4288,25 +4660,36 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDLoc dl = getCurSDLoc();
- AtomicOrdering Order = I.getOrdering();
+ AtomicOrdering Ordering = I.getOrdering();
SyncScope::ID SSID = I.getSyncScopeID();
SDValue InChain = getRoot();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- EVT VT =
- TLI.getValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
+ EVT MemVT =
+ TLI.getMemValueType(DAG.getDataLayout(), I.getValueOperand()->getType());
- if (I.getAlignment() < VT.getStoreSize())
+ if (I.getAlignment() < MemVT.getSizeInBits() / 8)
report_fatal_error("Cannot generate unaligned atomic store");
- SDValue OutChain =
- DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
- InChain,
- getValue(I.getPointerOperand()),
- getValue(I.getValueOperand()),
- I.getPointerOperand(), I.getAlignment(),
- Order, SSID);
+ auto Flags = MachineMemOperand::MOStore;
+ if (I.isVolatile())
+ Flags |= MachineMemOperand::MOVolatile;
+ Flags |= TLI.getMMOFlags(I);
+
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()), Flags,
+ MemVT.getStoreSize(), I.getAlignment(), AAMDNodes(),
+ nullptr, SSID, Ordering);
+
+ SDValue Val = getValue(I.getValueOperand());
+ if (Val.getValueType() != MemVT)
+ Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
+
+ SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
+ getValue(I.getPointerOperand()), Val, MMO);
+
DAG.setRoot(OutChain);
}
@@ -4364,10 +4747,12 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
SDValue Result;
if (IsTgtIntrinsic) {
// This is target intrinsic that touches memory
- Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
- Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
- Info.flags, Info.size);
+ AAMDNodes AAInfo;
+ I.getAAMetadata(AAInfo);
+ Result =
+ DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align, Info.flags, Info.size, AAInfo);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -4889,7 +5274,7 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
return DAG.getConstantFP(1.0, DL, LHS.getValueType());
const Function &F = DAG.getMachineFunction().getFunction();
- if (!F.optForSize() ||
+ if (!F.hasOptSize() ||
// If optimizing for size, don't insert too many multiplies.
// This inserts up to 5 multiplies.
countPopulation(Val) + Log2_32(Val) < 7) {
@@ -4952,6 +5337,71 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (!Arg)
return false;
+ if (!IsDbgDeclare) {
+ // ArgDbgValues are hoisted to the beginning of the entry block. So we
+ // should only emit as ArgDbgValue if the dbg.value intrinsic is found in
+ // the entry block.
+ bool IsInEntryBlock = FuncInfo.MBB == &FuncInfo.MF->front();
+ if (!IsInEntryBlock)
+ return false;
+
+ // ArgDbgValues are hoisted to the beginning of the entry block. So we
+ // should only emit as ArgDbgValue if the dbg.value intrinsic describes a
+ // variable that also is a param.
+ //
+ // Although, if we are at the top of the entry block already, we can still
+ // emit using ArgDbgValue. This might catch some situations when the
+ // dbg.value refers to an argument that isn't used in the entry block, so
+ // any CopyToReg node would be optimized out and the only way to express
+ // this DBG_VALUE is by using the physical reg (or FI) as done in this
+ // method. ArgDbgValues are hoisted to the beginning of the entry block. So
+ // we should only emit as ArgDbgValue if the Variable is an argument to the
+ // current function, and the dbg.value intrinsic is found in the entry
+ // block.
+ bool VariableIsFunctionInputArg = Variable->isParameter() &&
+ !DL->getInlinedAt();
+ bool IsInPrologue = SDNodeOrder == LowestSDNodeOrder;
+ if (!IsInPrologue && !VariableIsFunctionInputArg)
+ return false;
+
+ // Here we assume that a function argument on IR level only can be used to
+ // describe one input parameter on source level. If we for example have
+ // source code like this
+ //
+ // struct A { long x, y; };
+ // void foo(struct A a, long b) {
+ // ...
+ // b = a.x;
+ // ...
+ // }
+ //
+ // and IR like this
+ //
+ // define void @foo(i32 %a1, i32 %a2, i32 %b) {
+ // entry:
+ // call void @llvm.dbg.value(metadata i32 %a1, "a", DW_OP_LLVM_fragment
+ // call void @llvm.dbg.value(metadata i32 %a2, "a", DW_OP_LLVM_fragment
+ // call void @llvm.dbg.value(metadata i32 %b, "b",
+ // ...
+ // call void @llvm.dbg.value(metadata i32 %a1, "b"
+ // ...
+ //
+ // then the last dbg.value is describing a parameter "b" using a value that
+ // is an argument. But since we already has used %a1 to describe a parameter
+ // we should not handle that last dbg.value here (that would result in an
+ // incorrect hoisting of the DBG_VALUE to the function entry).
+ // Notice that we allow one dbg.value per IR level argument, to accomodate
+ // for the situation with fragments above.
+ if (VariableIsFunctionInputArg) {
+ unsigned ArgNo = Arg->getArgNo();
+ if (ArgNo >= FuncInfo.DescribedArgs.size())
+ FuncInfo.DescribedArgs.resize(ArgNo + 1, false);
+ else if (!IsInPrologue && FuncInfo.DescribedArgs.test(ArgNo))
+ return false;
+ FuncInfo.DescribedArgs.set(ArgNo);
+ }
+ }
+
MachineFunction &MF = DAG.getMachineFunction();
const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
@@ -4976,12 +5426,14 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
}
- if (!Op && N.getNode())
+ if (!Op && N.getNode()) {
// Check if frame index is available.
- if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ SDValue LCandidate = peekThroughBitcasts(N);
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(LCandidate.getNode()))
if (FrameIndexSDNode *FINode =
dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
Op = MachineOperand::CreateFI(FINode->getIndex());
+ }
if (!Op) {
// Check if ValueMap has reg number.
@@ -5055,11 +5507,29 @@ SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
# define setjmp_undefined_for_msvc
#endif
-/// Lower the call to the specified intrinsic function. If we want to emit this
-/// as a call to a named external function, return the name. Otherwise, lower it
-/// and return null.
-const char *
-SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
+static unsigned FixedPointIntrinsicToOpcode(unsigned Intrinsic) {
+ switch (Intrinsic) {
+ case Intrinsic::smul_fix:
+ return ISD::SMULFIX;
+ case Intrinsic::umul_fix:
+ return ISD::UMULFIX;
+ default:
+ llvm_unreachable("Unhandled fixed point intrinsic");
+ }
+}
+
+void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
+ const char *FunctionName) {
+ assert(FunctionName && "FunctionName must not be nullptr");
+ SDValue Callee = DAG.getExternalSymbol(
+ FunctionName,
+ DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+ LowerCallTo(&I, Callee, I.isTailCall());
+}
+
+/// Lower the call to the specified intrinsic function.
+void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
+ unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc sdl = getCurSDLoc();
DebugLoc dl = getCurDebugLoc();
@@ -5069,28 +5539,28 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
default:
// By default, turn this into a target intrinsic node.
visitTargetIntrinsic(I, Intrinsic);
- return nullptr;
- case Intrinsic::vastart: visitVAStart(I); return nullptr;
- case Intrinsic::vaend: visitVAEnd(I); return nullptr;
- case Intrinsic::vacopy: visitVACopy(I); return nullptr;
+ return;
+ case Intrinsic::vastart: visitVAStart(I); return;
+ case Intrinsic::vaend: visitVAEnd(I); return;
+ case Intrinsic::vacopy: visitVACopy(I); return;
case Intrinsic::returnaddress:
setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::addressofreturnaddress:
setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout())));
- return nullptr;
+ return;
case Intrinsic::sponentry:
setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
TLI.getPointerTy(DAG.getDataLayout())));
- return nullptr;
+ return;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::read_register: {
Value *Reg = I.getArgOperand(0);
SDValue Chain = getRoot();
@@ -5101,7 +5571,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getVTList(VT, MVT::Other), Chain, RegName);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
- return nullptr;
+ return;
}
case Intrinsic::write_register: {
Value *Reg = I.getArgOperand(0);
@@ -5111,12 +5581,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
RegName, getValue(RegValue)));
- return nullptr;
+ return;
}
case Intrinsic::setjmp:
- return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
+ lowerCallToExternalSymbol(I, &"_setjmp"[!TLI.usesUnderscoreSetJmp()]);
+ return;
case Intrinsic::longjmp:
- return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
+ lowerCallToExternalSymbol(I, &"_longjmp"[!TLI.usesUnderscoreLongJmp()]);
+ return;
case Intrinsic::memcpy: {
const auto &MCI = cast<MemCpyInst>(I);
SDValue Op1 = getValue(I.getArgOperand(0));
@@ -5135,7 +5607,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MC);
- return nullptr;
+ return;
}
case Intrinsic::memset: {
const auto &MSI = cast<MemSetInst>(I);
@@ -5149,7 +5621,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
isTC, MachinePointerInfo(I.getArgOperand(0)));
updateDAGForMaybeTailCall(MS);
- return nullptr;
+ return;
}
case Intrinsic::memmove: {
const auto &MMI = cast<MemMoveInst>(I);
@@ -5168,7 +5640,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
isTC, MachinePointerInfo(I.getArgOperand(0)),
MachinePointerInfo(I.getArgOperand(1)));
updateDAGForMaybeTailCall(MM);
- return nullptr;
+ return;
}
case Intrinsic::memcpy_element_unordered_atomic: {
const AtomicMemCpyInst &MI = cast<AtomicMemCpyInst>(I);
@@ -5186,7 +5658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachinePointerInfo(MI.getRawDest()),
MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
- return nullptr;
+ return;
}
case Intrinsic::memmove_element_unordered_atomic: {
auto &MI = cast<AtomicMemMoveInst>(I);
@@ -5204,7 +5676,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MachinePointerInfo(MI.getRawDest()),
MachinePointerInfo(MI.getRawSource()));
updateDAGForMaybeTailCall(MC);
- return nullptr;
+ return;
}
case Intrinsic::memset_element_unordered_atomic: {
auto &MI = cast<AtomicMemSetInst>(I);
@@ -5220,7 +5692,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
LengthTy, ElemSz, isTC,
MachinePointerInfo(MI.getRawDest()));
updateDAGForMaybeTailCall(MC);
- return nullptr;
+ return;
}
case Intrinsic::dbg_addr:
case Intrinsic::dbg_declare: {
@@ -5235,7 +5707,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (!Address || isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
- return nullptr;
+ return;
}
bool isParameter = Variable->isParameter() || isa<Argument>(Address);
@@ -5264,7 +5736,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Variable, Expression, FI, /*IsIndirect*/ true, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, getRoot().getNode(), isParameter);
}
- return nullptr;
+ return;
}
SDValue &N = NodeMap[Address];
@@ -5286,7 +5758,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Address is an argument, so try to emit its dbg value using
// virtual register info from the FuncInfo.ValueMap.
EmitFuncArgumentDbgValue(Address, Variable, Expression, dl, true, N);
- return nullptr;
+ return;
} else {
SDV = DAG.getDbgValue(Variable, Expression, N.getNode(), N.getResNo(),
true, dl, SDNodeOrder);
@@ -5300,7 +5772,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
LLVM_DEBUG(dbgs() << "Dropping debug info for " << DI << "\n");
}
}
- return nullptr;
+ return;
}
case Intrinsic::dbg_label: {
const DbgLabelInst &DI = cast<DbgLabelInst>(I);
@@ -5310,7 +5782,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDDbgLabel *SDV;
SDV = DAG.getDbgLabel(Label, dl, SDNodeOrder);
DAG.AddDbgLabel(SDV);
- return nullptr;
+ return;
}
case Intrinsic::dbg_value: {
const DbgValueInst &DI = cast<DbgValueInst>(I);
@@ -5321,88 +5793,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
dropDanglingDebugInfo(Variable, Expression);
const Value *V = DI.getValue();
if (!V)
- return nullptr;
-
- SDDbgValue *SDV;
- if (isa<ConstantInt>(V) || isa<ConstantFP>(V) || isa<UndefValue>(V) ||
- isa<ConstantPointerNull>(V)) {
- SDV = DAG.getConstantDbgValue(Variable, Expression, V, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
- return nullptr;
- }
-
- // Do not use getValue() in here; we don't want to generate code at
- // this point if it hasn't been done yet.
- SDValue N = NodeMap[V];
- if (!N.getNode() && isa<Argument>(V)) // Check unused arguments map.
- N = UnusedArgNodeMap[V];
- if (N.getNode()) {
- if (EmitFuncArgumentDbgValue(V, Variable, Expression, dl, false, N))
- return nullptr;
- SDV = getDbgValue(N, Variable, Expression, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, N.getNode(), false);
- return nullptr;
- }
-
- // PHI nodes have already been selected, so we should know which VReg that
- // is assigns to already.
- if (isa<PHINode>(V)) {
- auto VMI = FuncInfo.ValueMap.find(V);
- if (VMI != FuncInfo.ValueMap.end()) {
- unsigned Reg = VMI->second;
- // The PHI node may be split up into several MI PHI nodes (in
- // FunctionLoweringInfo::set).
- RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), Reg,
- V->getType(), None);
- if (RFV.occupiesMultipleRegs()) {
- unsigned Offset = 0;
- unsigned BitsToDescribe = 0;
- if (auto VarSize = Variable->getSizeInBits())
- BitsToDescribe = *VarSize;
- if (auto Fragment = Expression->getFragmentInfo())
- BitsToDescribe = Fragment->SizeInBits;
- for (auto RegAndSize : RFV.getRegsAndSizes()) {
- unsigned RegisterSize = RegAndSize.second;
- // Bail out if all bits are described already.
- if (Offset >= BitsToDescribe)
- break;
- unsigned FragmentSize = (Offset + RegisterSize > BitsToDescribe)
- ? BitsToDescribe - Offset
- : RegisterSize;
- auto FragmentExpr = DIExpression::createFragmentExpression(
- Expression, Offset, FragmentSize);
- if (!FragmentExpr)
- continue;
- SDV = DAG.getVRegDbgValue(Variable, *FragmentExpr, RegAndSize.first,
- false, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
- Offset += RegisterSize;
- }
- } else {
- SDV = DAG.getVRegDbgValue(Variable, Expression, Reg, false, dl,
- SDNodeOrder);
- DAG.AddDbgValue(SDV, nullptr, false);
- }
- return nullptr;
- }
- }
+ return;
- // TODO: When we get here we will either drop the dbg.value completely, or
- // we try to move it forward by letting it dangle for awhile. So we should
- // probably add an extra DbgValue to the DAG here, with a reference to
- // "noreg", to indicate that we have lost the debug location for the
- // variable.
+ if (handleDebugValue(V, Variable, Expression, dl, DI.getDebugLoc(),
+ SDNodeOrder))
+ return;
- if (!V->use_empty() ) {
- // Do not call getValue(V) yet, as we don't want to generate code.
- // Remember it for later.
- DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
- return nullptr;
- }
+ // TODO: Dangling debug info will eventually either be resolved or produce
+ // an Undef DBG_VALUE. However in the resolution case, a gap may appear
+ // between the original dbg.value location and its resolved DBG_VALUE, which
+ // we should ideally fill with an extra Undef DBG_VALUE.
- LLVM_DEBUG(dbgs() << "Dropping debug location info for:\n " << DI << "\n");
- LLVM_DEBUG(dbgs() << " Last seen at:\n " << *V << "\n");
- return nullptr;
+ DanglingDebugInfoMap[V].emplace_back(&DI, dl, SDNodeOrder);
+ return;
}
case Intrinsic::eh_typeid_for: {
@@ -5411,7 +5814,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned TypeID = DAG.getMachineFunction().getTypeIDFor(GV);
Res = DAG.getConstant(TypeID, sdl, MVT::i32);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::eh_return_i32:
@@ -5422,15 +5825,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getControlRoot(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
+ return;
case Intrinsic::eh_unwind_init:
DAG.getMachineFunction().setCallsUnwindInit(true);
- return nullptr;
+ return;
case Intrinsic::eh_dwarf_cfa:
setValue(&I, DAG.getNode(ISD::EH_DWARF_CFA, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::eh_sjlj_callsite: {
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(0));
@@ -5438,7 +5841,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!");
MMI.setCurrentCallSite(CI->getZExtValue());
- return nullptr;
+ return;
}
case Intrinsic::eh_sjlj_functioncontext: {
// Get and store the index of the function context.
@@ -5447,7 +5850,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
cast<AllocaInst>(I.getArgOperand(0)->stripPointerCasts());
int FI = FuncInfo.StaticAllocaMap[FnCtx];
MFI.setFunctionContextIndex(FI);
- return nullptr;
+ return;
}
case Intrinsic::eh_sjlj_setjmp: {
SDValue Ops[2];
@@ -5457,34 +5860,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getVTList(MVT::i32, MVT::Other), Ops);
setValue(&I, Op.getValue(0));
DAG.setRoot(Op.getValue(1));
- return nullptr;
+ return;
}
case Intrinsic::eh_sjlj_longjmp:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other,
getRoot(), getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::eh_sjlj_setup_dispatch:
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_SETUP_DISPATCH, sdl, MVT::Other,
getRoot()));
- return nullptr;
+ return;
case Intrinsic::masked_gather:
visitMaskedGather(I);
- return nullptr;
+ return;
case Intrinsic::masked_load:
visitMaskedLoad(I);
- return nullptr;
+ return;
case Intrinsic::masked_scatter:
visitMaskedScatter(I);
- return nullptr;
+ return;
case Intrinsic::masked_store:
visitMaskedStore(I);
- return nullptr;
+ return;
case Intrinsic::masked_expandload:
visitMaskedLoad(I, true /* IsExpanding */);
- return nullptr;
+ return;
case Intrinsic::masked_compressstore:
visitMaskedStore(I, true /* IsCompressing */);
- return nullptr;
+ return;
case Intrinsic::x86_mmx_pslli_w:
case Intrinsic::x86_mmx_pslli_d:
case Intrinsic::x86_mmx_pslli_q:
@@ -5496,7 +5899,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue ShAmt = getValue(I.getArgOperand(1));
if (isa<ConstantSDNode>(ShAmt)) {
visitTargetIntrinsic(I, Intrinsic);
- return nullptr;
+ return;
}
unsigned NewIntrinsic = 0;
EVT ShAmtVT = MVT::v2i32;
@@ -5542,31 +5945,31 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
getValue(I.getArgOperand(0)), ShAmt);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
- return nullptr;
+ return;
case Intrinsic::log:
setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::log2:
setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::log10:
setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::exp:
setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::exp2:
setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::pow:
setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG, TLI));
- return nullptr;
+ return;
case Intrinsic::sqrt:
case Intrinsic::fabs:
case Intrinsic::sin:
@@ -5597,61 +6000,71 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
setValue(&I, DAG.getNode(Opcode, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
}
- case Intrinsic::minnum: {
- auto VT = getValue(I.getArgOperand(0)).getValueType();
- unsigned Opc =
- I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT)
- ? ISD::FMINIMUM
- : ISD::FMINNUM;
- setValue(&I, DAG.getNode(Opc, sdl, VT,
+ case Intrinsic::lround:
+ case Intrinsic::llround:
+ case Intrinsic::lrint:
+ case Intrinsic::llrint: {
+ unsigned Opcode;
+ switch (Intrinsic) {
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ case Intrinsic::lround: Opcode = ISD::LROUND; break;
+ case Intrinsic::llround: Opcode = ISD::LLROUND; break;
+ case Intrinsic::lrint: Opcode = ISD::LRINT; break;
+ case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
+ }
+
+ EVT RetVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
+ setValue(&I, DAG.getNode(Opcode, sdl, RetVT,
+ getValue(I.getArgOperand(0))));
+ return;
+ }
+ case Intrinsic::minnum:
+ setValue(&I, DAG.getNode(ISD::FMINNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
- }
- case Intrinsic::maxnum: {
- auto VT = getValue(I.getArgOperand(0)).getValueType();
- unsigned Opc =
- I.hasNoNaNs() && TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT)
- ? ISD::FMAXIMUM
- : ISD::FMAXNUM;
- setValue(&I, DAG.getNode(Opc, sdl, VT,
+ return;
+ case Intrinsic::maxnum:
+ setValue(&I, DAG.getNode(ISD::FMAXNUM, sdl,
+ getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
- }
+ return;
case Intrinsic::minimum:
setValue(&I, DAG.getNode(ISD::FMINIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
+ return;
case Intrinsic::maximum:
setValue(&I, DAG.getNode(ISD::FMAXIMUM, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
+ return;
case Intrinsic::copysign:
setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1))));
- return nullptr;
+ return;
case Intrinsic::fma:
setValue(&I, DAG.getNode(ISD::FMA, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2))));
- return nullptr;
+ return;
case Intrinsic::experimental_constrained_fadd:
case Intrinsic::experimental_constrained_fsub:
case Intrinsic::experimental_constrained_fmul:
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptrunc:
+ case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
case Intrinsic::experimental_constrained_pow:
case Intrinsic::experimental_constrained_powi:
@@ -5671,7 +6084,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
- return nullptr;
+ return;
case Intrinsic::fmuladd: {
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict &&
@@ -5693,7 +6106,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(2)));
setValue(&I, Add);
}
- return nullptr;
+ return;
}
case Intrinsic::convert_to_fp16:
setValue(&I, DAG.getNode(ISD::BITCAST, sdl, MVT::i16,
@@ -5701,17 +6114,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)),
DAG.getTargetConstant(0, sdl,
MVT::i32))));
- return nullptr;
+ return;
case Intrinsic::convert_from_fp16:
setValue(&I, DAG.getNode(ISD::FP_EXTEND, sdl,
TLI.getValueType(DAG.getDataLayout(), I.getType()),
DAG.getNode(ISD::BITCAST, sdl, MVT::f16,
getValue(I.getArgOperand(0)))));
- return nullptr;
+ return;
case Intrinsic::pcmarker: {
SDValue Tmp = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp));
- return nullptr;
+ return;
}
case Intrinsic::readcyclecounter: {
SDValue Op = getRoot();
@@ -5719,25 +6132,25 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getVTList(MVT::i64, MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
- return nullptr;
+ return;
}
case Intrinsic::bitreverse:
setValue(&I, DAG.getNode(ISD::BITREVERSE, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::bswap:
setValue(&I, DAG.getNode(ISD::BSWAP, sdl,
getValue(I.getArgOperand(0)).getValueType(),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::cttz: {
SDValue Arg = getValue(I.getArgOperand(0));
ConstantInt *CI = cast<ConstantInt>(I.getArgOperand(1));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF,
sdl, Ty, Arg));
- return nullptr;
+ return;
}
case Intrinsic::ctlz: {
SDValue Arg = getValue(I.getArgOperand(0));
@@ -5745,13 +6158,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF,
sdl, Ty, Arg));
- return nullptr;
+ return;
}
case Intrinsic::ctpop: {
SDValue Arg = getValue(I.getArgOperand(0));
EVT Ty = Arg.getValueType();
setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg));
- return nullptr;
+ return;
}
case Intrinsic::fshl:
case Intrinsic::fshr: {
@@ -5767,7 +6180,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
auto FunnelOpcode = IsFSHL ? ISD::FSHL : ISD::FSHR;
if (TLI.isOperationLegalOrCustom(FunnelOpcode, VT)) {
setValue(&I, DAG.getNode(FunnelOpcode, sdl, VT, X, Y, Z));
- return nullptr;
+ return;
}
// When X == Y, this is rotate. If the data type has a power-of-2 size, we
@@ -5777,7 +6190,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
auto RotateOpcode = IsFSHL ? ISD::ROTL : ISD::ROTR;
if (TLI.isOperationLegalOrCustom(RotateOpcode, VT)) {
setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, Z));
- return nullptr;
+ return;
}
// Some targets only rotate one way. Try the opposite direction.
@@ -5786,7 +6199,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Negate the shift amount because it is safe to ignore the high bits.
SDValue NegShAmt = DAG.getNode(ISD::SUB, sdl, VT, Zero, Z);
setValue(&I, DAG.getNode(RotateOpcode, sdl, VT, X, NegShAmt));
- return nullptr;
+ return;
}
// fshl (rotl): (X << (Z % BW)) | (X >> ((0 - Z) % BW))
@@ -5796,7 +6209,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue ShX = DAG.getNode(ISD::SHL, sdl, VT, X, IsFSHL ? ShAmt : NShAmt);
SDValue ShY = DAG.getNode(ISD::SRL, sdl, VT, X, IsFSHL ? NShAmt : ShAmt);
setValue(&I, DAG.getNode(ISD::OR, sdl, VT, ShX, ShY));
- return nullptr;
+ return;
}
// fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
@@ -5816,39 +6229,48 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// For fshr, 0-shift returns the 2nd arg (Y).
SDValue IsZeroShift = DAG.getSetCC(sdl, CCVT, ShAmt, Zero, ISD::SETEQ);
setValue(&I, DAG.getSelect(sdl, VT, IsZeroShift, IsFSHL ? X : Y, Or));
- return nullptr;
+ return;
}
case Intrinsic::sadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SADDSAT, sdl, Op1.getValueType(), Op1, Op2));
- return nullptr;
+ return;
}
case Intrinsic::uadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::UADDSAT, sdl, Op1.getValueType(), Op1, Op2));
- return nullptr;
+ return;
}
case Intrinsic::ssub_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::SSUBSAT, sdl, Op1.getValueType(), Op1, Op2));
- return nullptr;
+ return;
}
case Intrinsic::usub_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::USUBSAT, sdl, Op1.getValueType(), Op1, Op2));
- return nullptr;
+ return;
}
- case Intrinsic::smul_fix: {
+ case Intrinsic::smul_fix:
+ case Intrinsic::umul_fix: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
SDValue Op3 = getValue(I.getArgOperand(2));
- setValue(&I,
- DAG.getNode(ISD::SMULFIX, sdl, Op1.getValueType(), Op1, Op2, Op3));
- return nullptr;
+ setValue(&I, DAG.getNode(FixedPointIntrinsicToOpcode(Intrinsic), sdl,
+ Op1.getValueType(), Op1, Op2, Op3));
+ return;
+ }
+ case Intrinsic::smul_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(ISD::SMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
+ Op3));
+ return;
}
case Intrinsic::stacksave: {
SDValue Op = getRoot();
@@ -5857,26 +6279,26 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Op);
setValue(&I, Res);
DAG.setRoot(Res.getValue(1));
- return nullptr;
+ return;
}
case Intrinsic::stackrestore:
Res = getValue(I.getArgOperand(0));
DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res));
- return nullptr;
+ return;
case Intrinsic::get_dynamic_area_offset: {
SDValue Op = getRoot();
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
EVT ResTy = TLI.getValueType(DAG.getDataLayout(), I.getType());
// Result type for @llvm.get.dynamic.area.offset should match PtrTy for
// target.
- if (PtrTy != ResTy)
+ if (PtrTy.getSizeInBits() < ResTy.getSizeInBits())
report_fatal_error("Wrong result type for @llvm.get.dynamic.area.offset"
" intrinsic!");
Res = DAG.getNode(ISD::GET_DYNAMIC_AREA_OFFSET, sdl, DAG.getVTList(ResTy),
Op);
DAG.setRoot(Op);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::stackguard: {
EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
@@ -5896,7 +6318,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Res = TLI.emitStackGuardXorFP(DAG, Res, sdl);
DAG.setRoot(Chain);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::stackprotector: {
// Emit code into the DAG to store the stack guard onto the stack.
@@ -5923,7 +6345,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
/* Alignment = */ 0, MachineMemOperand::MOVolatile);
setValue(&I, Res);
DAG.setRoot(Res);
- return nullptr;
+ return;
}
case Intrinsic::objectsize: {
// If we don't know by now, we're never going to know.
@@ -5940,14 +6362,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Res = DAG.getConstant(0, sdl, Ty);
setValue(&I, Res);
- return nullptr;
+ return;
}
case Intrinsic::is_constant:
// If this wasn't constant-folded away by now, then it's not a
// constant.
setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
- return nullptr;
+ return;
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
@@ -5955,12 +6377,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::strip_invariant_group:
// Drop the intrinsic, but forward the value
setValue(&I, getValue(I.getOperand(0)));
- return nullptr;
+ return;
case Intrinsic::assume:
case Intrinsic::var_annotation:
case Intrinsic::sideeffect:
// Discard annotate attributes, assumptions, and artificial side-effects.
- return nullptr;
+ return;
case Intrinsic::codeview_annotation: {
// Emit a label associated with this metadata.
@@ -5971,7 +6393,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
MF.addCodeViewAnnotation(Label, cast<MDNode>(MD));
Res = DAG.getLabelNode(ISD::ANNOTATION_LABEL, sdl, getRoot(), Label);
DAG.setRoot(Res);
- return nullptr;
+ return;
}
case Intrinsic::init_trampoline: {
@@ -5988,13 +6410,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops);
DAG.setRoot(Res);
- return nullptr;
+ return;
}
case Intrinsic::adjust_trampoline:
setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl,
TLI.getPointerTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
- return nullptr;
+ return;
case Intrinsic::gcroot: {
assert(DAG.getMachineFunction().getFunction().hasGC() &&
"only valid in functions with gc specified, enforced by Verifier");
@@ -6004,19 +6426,19 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
FrameIndexSDNode *FI = cast<FrameIndexSDNode>(getValue(Alloca).getNode());
GFI->addStackRoot(FI->getIndex(), TypeMap);
- return nullptr;
+ return;
}
case Intrinsic::gcread:
case Intrinsic::gcwrite:
llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!");
case Intrinsic::flt_rounds:
setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32));
- return nullptr;
+ return;
case Intrinsic::expect:
// Just replace __builtin_expect(exp, c) with EXP.
setValue(&I, getValue(I.getArgOperand(0)));
- return nullptr;
+ return;
case Intrinsic::debugtrap:
case Intrinsic::trap: {
@@ -6028,7 +6450,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ?
ISD::TRAP : ISD::DEBUGTRAP;
DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot()));
- return nullptr;
+ return;
}
TargetLowering::ArgListTy Args;
@@ -6041,7 +6463,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
DAG.setRoot(Result.second);
- return nullptr;
+ return;
}
case Intrinsic::uadd_with_overflow:
@@ -6063,9 +6485,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
- SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1);
+ EVT ResultVT = Op1.getValueType();
+ EVT OverflowVT = MVT::i1;
+ if (ResultVT.isVector())
+ OverflowVT = EVT::getVectorVT(
+ *Context, OverflowVT, ResultVT.getVectorNumElements());
+
+ SDVTList VTs = DAG.getVTList(ResultVT, OverflowVT);
setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2));
- return nullptr;
+ return;
}
case Intrinsic::prefetch: {
SDValue Ops[5];
@@ -6088,21 +6516,24 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
PendingLoads.push_back(Result);
Result = getRoot();
DAG.setRoot(Result);
- return nullptr;
+ return;
}
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end: {
bool IsStart = (Intrinsic == Intrinsic::lifetime_start);
// Stack coloring is not enabled in O0, discard region information.
if (TM.getOptLevel() == CodeGenOpt::None)
- return nullptr;
+ return;
- SmallVector<Value *, 4> Allocas;
- GetUnderlyingObjects(I.getArgOperand(1), Allocas, *DL);
+ const int64_t ObjectSize =
+ cast<ConstantInt>(I.getArgOperand(0))->getSExtValue();
+ Value *const ObjectPtr = I.getArgOperand(1);
+ SmallVector<const Value *, 4> Allocas;
+ GetUnderlyingObjects(ObjectPtr, Allocas, *DL);
- for (SmallVectorImpl<Value*>::iterator Object = Allocas.begin(),
+ for (SmallVectorImpl<const Value*>::iterator Object = Allocas.begin(),
E = Allocas.end(); Object != E; ++Object) {
- AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
+ const AllocaInst *LifetimeObject = dyn_cast_or_null<AllocaInst>(*Object);
// Could not find an Alloca.
if (!LifetimeObject)
@@ -6112,49 +6543,50 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// valid frame index.
auto SI = FuncInfo.StaticAllocaMap.find(LifetimeObject);
if (SI == FuncInfo.StaticAllocaMap.end())
- return nullptr;
-
- int FI = SI->second;
-
- SDValue Ops[2];
- Ops[0] = getRoot();
- Ops[1] =
- DAG.getFrameIndex(FI, TLI.getFrameIndexTy(DAG.getDataLayout()), true);
- unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END);
+ return;
- Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops);
+ const int FrameIndex = SI->second;
+ int64_t Offset;
+ if (GetPointerBaseWithConstantOffset(
+ ObjectPtr, Offset, DAG.getDataLayout()) != LifetimeObject)
+ Offset = -1; // Cannot determine offset from alloca to lifetime object.
+ Res = DAG.getLifetimeNode(IsStart, sdl, getRoot(), FrameIndex, ObjectSize,
+ Offset);
DAG.setRoot(Res);
}
- return nullptr;
+ return;
}
case Intrinsic::invariant_start:
// Discard region information.
setValue(&I, DAG.getUNDEF(TLI.getPointerTy(DAG.getDataLayout())));
- return nullptr;
+ return;
case Intrinsic::invariant_end:
// Discard region information.
- return nullptr;
+ return;
case Intrinsic::clear_cache:
- return TLI.getClearCacheBuiltinName();
+ /// FunctionName may be null.
+ if (const char *FunctionName = TLI.getClearCacheBuiltinName())
+ lowerCallToExternalSymbol(I, FunctionName);
+ return;
case Intrinsic::donothing:
// ignore
- return nullptr;
+ return;
case Intrinsic::experimental_stackmap:
visitStackmap(I);
- return nullptr;
+ return;
case Intrinsic::experimental_patchpoint_void:
case Intrinsic::experimental_patchpoint_i64:
visitPatchpoint(&I);
- return nullptr;
+ return;
case Intrinsic::experimental_gc_statepoint:
LowerStatepoint(ImmutableStatepoint(&I));
- return nullptr;
+ return;
case Intrinsic::experimental_gc_result:
visitGCResult(cast<GCResultInst>(I));
- return nullptr;
+ return;
case Intrinsic::experimental_gc_relocate:
visitGCRelocate(cast<GCRelocateInst>(I));
- return nullptr;
+ return;
case Intrinsic::instrprof_increment:
llvm_unreachable("instrprof failed to lower an increment");
case Intrinsic::instrprof_value_profile:
@@ -6182,7 +6614,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
.addFrameIndex(FI);
}
- return nullptr;
+ return;
}
case Intrinsic::localrecover: {
@@ -6211,7 +6643,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Add = DAG.getNode(ISD::ADD, sdl, PtrVT, FPVal, OffsetVal);
setValue(&I, Add);
- return nullptr;
+ return;
}
case Intrinsic::eh_exceptionpointer:
@@ -6226,7 +6658,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (Intrinsic == Intrinsic::eh_exceptioncode)
N = DAG.getZExtOrTrunc(N, getCurSDLoc(), MVT::i32);
setValue(&I, N);
- return nullptr;
+ return;
}
case Intrinsic::xray_customevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
@@ -6234,7 +6666,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
- return nullptr;
+ return;
SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
@@ -6257,7 +6689,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
- return nullptr;
+ return;
}
case Intrinsic::xray_typedevent: {
// Here we want to make sure that the intrinsic behaves as if it has a
@@ -6265,7 +6697,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// FIXME: Support other platforms later.
const auto &Triple = DAG.getTarget().getTargetTriple();
if (Triple.getArch() != Triple::x86_64 || !Triple.isOSLinux())
- return nullptr;
+ return;
SDLoc DL = getCurSDLoc();
SmallVector<SDValue, 8> Ops;
@@ -6292,14 +6724,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue patchableNode = SDValue(MN, 0);
DAG.setRoot(patchableNode);
setValue(&I, patchableNode);
- return nullptr;
+ return;
}
case Intrinsic::experimental_deoptimize:
LowerDeoptimizeCall(&I);
- return nullptr;
+ return;
- case Intrinsic::experimental_vector_reduce_fadd:
- case Intrinsic::experimental_vector_reduce_fmul:
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
case Intrinsic::experimental_vector_reduce_add:
case Intrinsic::experimental_vector_reduce_mul:
case Intrinsic::experimental_vector_reduce_and:
@@ -6312,11 +6744,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::experimental_vector_reduce_fmax:
case Intrinsic::experimental_vector_reduce_fmin:
visitVectorReduce(I, Intrinsic);
- return nullptr;
+ return;
case Intrinsic::icall_branch_funnel: {
SmallVector<SDValue, 16> Ops;
- Ops.push_back(DAG.getRoot());
Ops.push_back(getValue(I.getArgOperand(0)));
int64_t Offset;
@@ -6359,20 +6790,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
Ops.push_back(T.Target);
}
+ Ops.push_back(DAG.getRoot()); // Chain
SDValue N(DAG.getMachineNode(TargetOpcode::ICALL_BRANCH_FUNNEL,
getCurSDLoc(), MVT::Other, Ops),
0);
DAG.setRoot(N);
setValue(&I, N);
HasTailCall = true;
- return nullptr;
+ return;
}
case Intrinsic::wasm_landingpad_index:
// Information this intrinsic contained has been transferred to
// MachineFunction in SelectionDAGISel::PrepareEHLandingPad. We can safely
// delete it now.
- return nullptr;
+ return;
+
+ case Intrinsic::aarch64_settag:
+ case Intrinsic::aarch64_settag_zero: {
+ const SelectionDAGTargetInfo &TSI = DAG.getSelectionDAGInfo();
+ bool ZeroMemory = Intrinsic == Intrinsic::aarch64_settag_zero;
+ SDValue Val = TSI.EmitTargetCodeForSetTag(
+ DAG, getCurSDLoc(), getRoot(), getValue(I.getArgOperand(0)),
+ getValue(I.getArgOperand(1)), MachinePointerInfo(I.getArgOperand(0)),
+ ZeroMemory);
+ DAG.setRoot(Val);
+ setValue(&I, Val);
+ return;
+ }
}
}
@@ -6400,6 +6845,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_fma:
Opcode = ISD::STRICT_FMA;
break;
+ case Intrinsic::experimental_constrained_fptrunc:
+ Opcode = ISD::STRICT_FP_ROUND;
+ break;
+ case Intrinsic::experimental_constrained_fpext:
+ Opcode = ISD::STRICT_FP_EXTEND;
+ break;
case Intrinsic::experimental_constrained_sqrt:
Opcode = ISD::STRICT_FSQRT;
break;
@@ -6463,7 +6914,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
SDVTList VTs = DAG.getVTList(ValueVTs);
SDValue Result;
- if (FPI.isUnaryOp())
+ if (Opcode == ISD::STRICT_FP_ROUND)
+ Result = DAG.getNode(Opcode, sdl, VTs,
+ { Chain, getValue(FPI.getArgOperand(0)),
+ DAG.getTargetConstant(0, sdl,
+ TLI.getPointerTy(DAG.getDataLayout())) });
+ else if (FPI.isUnaryOp())
Result = DAG.getNode(Opcode, sdl, VTs,
{ Chain, getValue(FPI.getArgOperand(0)) });
else if (FPI.isTernaryOp())
@@ -6476,6 +6932,13 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
{ Chain, getValue(FPI.getArgOperand(0)),
getValue(FPI.getArgOperand(1)) });
+ if (FPI.getExceptionBehavior() !=
+ ConstrainedFPIntrinsic::ExceptionBehavior::ebIgnore) {
+ SDNodeFlags Flags;
+ Flags.setFPExcept(true);
+ Result->setFlags(Flags);
+ }
+
assert(Result.getNode()->getNumValues() == 2);
SDValue OutChain = Result.getValue(1);
DAG.setRoot(OutChain);
@@ -6596,11 +7059,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SwiftErrorVal = V;
// We find the virtual register for the actual swifterror argument.
// Instead of using the Value, we use the virtual register instead.
- Entry.Node = DAG.getRegister(FuncInfo
- .getOrCreateSwiftErrorVRegUseAt(
- CS.getInstruction(), FuncInfo.MBB, V)
- .first,
- EVT(TLI.getPointerTy(DL)));
+ Entry.Node = DAG.getRegister(
+ SwiftError.getOrCreateVRegUseAt(CS.getInstruction(), FuncInfo.MBB, V),
+ EVT(TLI.getPointerTy(DL)));
}
Args.push_back(Entry);
@@ -6641,13 +7102,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
- unsigned VReg; bool CreatedVReg;
- std::tie(VReg, CreatedVReg) =
- FuncInfo.getOrCreateSwiftErrorVRegDefAt(CS.getInstruction());
+ unsigned VReg = SwiftError.getOrCreateVRegDefAt(
+ CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
- // We update the virtual register for the actual swifterror argument.
- if (CreatedVReg)
- FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, SwiftErrorVal, VReg);
DAG.setRoot(CopyNode);
}
}
@@ -6995,10 +7452,6 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
return;
}
- MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
- computeUsesVAFloatArgument(I, MMI);
-
- const char *RenameFn = nullptr;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
// Is this an LLVM intrinsic or a target-specific intrinsic?
@@ -7008,9 +7461,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
IID = II->getIntrinsicID(F);
if (IID) {
- RenameFn = visitIntrinsicCall(I, IID);
- if (!RenameFn)
- return;
+ visitIntrinsicCall(I, IID);
+ return;
}
}
@@ -7159,20 +7611,14 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
}
- SDValue Callee;
- if (!RenameFn)
- Callee = getValue(I.getCalledValue());
- else
- Callee = DAG.getExternalSymbol(
- RenameFn,
- DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
-
// Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
// have to do anything here to lower funclet bundles.
assert(!I.hasOperandBundlesOtherThan(
{LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
"Cannot lower calls with arbitrary operand bundles!");
+ SDValue Callee = getValue(I.getCalledValue());
+
if (I.countOperandBundlesOfType(LLVMContext::OB_deopt))
LowerCallSiteWithDeoptBundle(&I, Callee, nullptr);
else
@@ -7328,8 +7774,9 @@ static SDValue getAddressForMemoryInput(SDValue Chain, const SDLoc &Location,
MachineFunction &MF = DAG.getMachineFunction();
int SSFI = MF.getFrameInfo().CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getFrameIndexTy(DL));
- Chain = DAG.getStore(Chain, Location, OpInfo.CallOperand, StackSlot,
- MachinePointerInfo::getFixedStack(MF, SSFI));
+ Chain = DAG.getTruncStore(Chain, Location, OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(MF, SSFI),
+ TLI.getMemValueType(DL, Ty));
OpInfo.CallOperand = StackSlot;
return Chain;
@@ -7353,6 +7800,10 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
SmallVector<unsigned, 4> Regs;
const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
+ // No work to do for memory operations.
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory)
+ return;
+
// If this is a constraint for a single physreg, or a constraint for a
// register class, find it.
unsigned AssignedReg;
@@ -7435,7 +7886,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, const SDLoc &DL,
for (; NumRegs; --NumRegs, ++I) {
assert(I != RC->end() && "Ran out of registers to allocate!");
- auto R = (AssignedReg) ? *I : RegInfo.createVirtualRegister(RC);
+ Register R = AssignedReg ? Register(*I) : RegInfo.createVirtualRegister(RC);
Regs.push_back(R);
}
@@ -7509,9 +7960,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(
DAG.getDataLayout(), DAG.getSubtarget().getRegisterInfo(), CS);
- bool hasMemory = false;
-
- // Remember the HasSideEffect, AlignStack, AsmDialect, MayLoad and MayStore
+ // First Pass: Calculate HasSideEffects and ExtraFlags (AlignStack,
+ // AsmDialect, MayLoad, MayStore).
+ bool HasSideEffect = IA->hasSideEffects();
ExtraFlags ExtraInfo(CS);
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
@@ -7527,7 +7978,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Process the call argument. BasicBlocks are labels, currently appearing
// only in asm's.
- if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
+ const Instruction *I = CS.getInstruction();
+ if (isa<CallBrInst>(I) &&
+ (ArgNo - 1) >= (cast<CallBrInst>(I)->getNumArgOperands() -
+ cast<CallBrInst>(I)->getNumIndirectDests())) {
+ const auto *BA = cast<BlockAddress>(OpInfo.CallOperandVal);
+ EVT VT = TLI.getValueType(DAG.getDataLayout(), BA->getType(), true);
+ OpInfo.CallOperand = DAG.getTargetBlockAddress(BA, VT);
+ } else if (const auto *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
} else {
OpInfo.CallOperand = getValue(OpInfo.CallOperandVal);
@@ -7554,8 +8012,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.ConstraintVT = MVT::Other;
}
- if (!hasMemory)
- hasMemory = OpInfo.hasMemory(TLI);
+ if (!HasSideEffect)
+ HasSideEffect = OpInfo.hasMemory(TLI);
// Determine if this InlineAsm MayLoad or MayStore based on the constraints.
// FIXME: Could we compute this on OpInfo rather than T?
@@ -7566,17 +8024,20 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
ExtraInfo.update(T);
}
- SDValue Chain, Flag;
// We won't need to flush pending loads if this asm doesn't touch
// memory and is nonvolatile.
- if (hasMemory || IA->hasSideEffects())
- Chain = getRoot();
- else
- Chain = DAG.getRoot();
+ SDValue Flag, Chain = (HasSideEffect) ? getRoot() : DAG.getRoot();
+
+ bool IsCallBr = isa<CallBrInst>(CS.getInstruction());
+ if (IsCallBr) {
+ // If this is a callbr we need to flush pending exports since inlineasm_br
+ // is a terminator. We need to do this before nodes are glued to
+ // the inlineasm_br node.
+ Chain = getControlRoot();
+ }
- // Second pass over the constraints: compute which constraint option to use
- // and assign registers to constraints that want a specific physreg.
+ // Second pass over the constraints: compute which constraint option to use.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
@@ -7612,28 +8073,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
OpInfo.isIndirect = true;
}
- // If this constraint is for a specific register, allocate it before
- // anything else.
- SDISelAsmOperandInfo &RefOpInfo =
- OpInfo.isMatchingInputConstraint()
- ? ConstraintOperands[OpInfo.getMatchedOperand()]
- : OpInfo;
- if (RefOpInfo.ConstraintType == TargetLowering::C_Register)
- GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
- }
-
- // Third pass - Loop over all of the operands, assigning virtual or physregs
- // to register class operands.
- for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
- SDISelAsmOperandInfo &RefOpInfo =
- OpInfo.isMatchingInputConstraint()
- ? ConstraintOperands[OpInfo.getMatchedOperand()]
- : OpInfo;
-
- // C_Register operands have already been allocated, Other/Memory don't need
- // to be.
- if (RefOpInfo.ConstraintType == TargetLowering::C_RegisterClass)
- GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
}
// AsmNodeOperands - The operands for the ISD::INLINEASM node.
@@ -7653,21 +8092,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands.push_back(DAG.getTargetConstant(
ExtraInfo.get(), getCurSDLoc(), TLI.getPointerTy(DAG.getDataLayout())));
- // Loop over all of the inputs, copying the operand values into the
- // appropriate registers and processing the output regs.
- RegsForValue RetValRegs;
-
- // IndirectStoresToEmit - The set of stores to emit after the inline asm node.
- std::vector<std::pair<RegsForValue, Value *>> IndirectStoresToEmit;
-
+ // Third pass: Loop over operands to prepare DAG-level operands.. As part of
+ // this, assign virtual and physical registers for inputs and otput.
for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ // Assign Registers.
+ SDISelAsmOperandInfo &RefOpInfo =
+ OpInfo.isMatchingInputConstraint()
+ ? ConstraintOperands[OpInfo.getMatchedOperand()]
+ : OpInfo;
+ GetRegistersForValue(DAG, getCurSDLoc(), OpInfo, RefOpInfo);
+
switch (OpInfo.Type) {
case InlineAsm::isOutput:
- if (OpInfo.ConstraintType != TargetLowering::C_RegisterClass &&
- OpInfo.ConstraintType != TargetLowering::C_Register) {
- // Memory output, or 'other' output (e.g. 'X' constraint).
- assert(OpInfo.isIndirect && "Memory output must be indirect operand");
-
+ if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
+ (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
assert(ConstraintID != InlineAsm::Constraint_Unknown &&
@@ -7680,38 +8119,27 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
- }
-
- // Otherwise, this is a register or register class output.
-
- // Copy the output from the appropriate register. Find a register that
- // we can use.
- if (OpInfo.AssignedRegs.Regs.empty()) {
- emitInlineAsmError(
- CS, "couldn't allocate output register for constraint '" +
- Twine(OpInfo.ConstraintCode) + "'");
- return;
- }
+ } else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
+ !OpInfo.isIndirect) ||
+ OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
+ // Otherwise, this outputs to a register (directly for C_Register /
+ // C_RegisterClass, and a target-defined fashion for C_Other). Find a
+ // register that we can use.
+ if (OpInfo.AssignedRegs.Regs.empty()) {
+ emitInlineAsmError(
+ CS, "couldn't allocate output register for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
- // If this is an indirect operand, store through the pointer after the
- // asm.
- if (OpInfo.isIndirect) {
- IndirectStoresToEmit.push_back(std::make_pair(OpInfo.AssignedRegs,
- OpInfo.CallOperandVal));
- } else {
- // This is the result value of the call.
- assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
- // Concatenate this output onto the outputs list.
- RetValRegs.append(OpInfo.AssignedRegs);
+ // Add information to the INLINEASM node to know that this register is
+ // set.
+ OpInfo.AssignedRegs.AddInlineAsmOperands(
+ OpInfo.isEarlyClobber ? InlineAsm::Kind_RegDefEarlyClobber
+ : InlineAsm::Kind_RegDef,
+ false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
}
-
- // Add information to the INLINEASM node to know that this register is
- // set.
- OpInfo.AssignedRegs
- .AddInlineAsmOperands(OpInfo.isEarlyClobber
- ? InlineAsm::Kind_RegDefEarlyClobber
- : InlineAsm::Kind_RegDef,
- false, 0, getCurSDLoc(), DAG, AsmNodeOperands);
break;
case InlineAsm::isInput: {
@@ -7865,98 +8293,117 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
- Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(),
+ unsigned ISDOpc = IsCallBr ? ISD::INLINEASM_BR : ISD::INLINEASM;
+ Chain = DAG.getNode(ISDOpc, getCurSDLoc(),
DAG.getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
Flag = Chain.getValue(1);
- // If this asm returns a register value, copy the result from that register
- // and set it as the value of the call.
- if (!RetValRegs.Regs.empty()) {
- SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
- Chain, &Flag, CS.getInstruction());
-
- llvm::Type *CSResultType = CS.getType();
- unsigned numRet;
- ArrayRef<Type *> ResultTypes;
- SmallVector<SDValue, 1> ResultValues(1);
- if (StructType *StructResult = dyn_cast<StructType>(CSResultType)) {
- numRet = StructResult->getNumElements();
- assert(Val->getNumOperands() == numRet &&
- "Mismatch in number of output operands in asm result");
- ResultTypes = StructResult->elements();
- ArrayRef<SDUse> ValueUses = Val->ops();
- ResultValues.resize(numRet);
- std::transform(ValueUses.begin(), ValueUses.end(), ResultValues.begin(),
- [](const SDUse &u) -> SDValue { return u.get(); });
- } else {
- numRet = 1;
- ResultValues[0] = Val;
- ResultTypes = makeArrayRef(CSResultType);
- }
- SmallVector<EVT, 1> ResultVTs(numRet);
- for (unsigned i = 0; i < numRet; i++) {
- EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), ResultTypes[i]);
- SDValue Val = ResultValues[i];
- assert(ResultTypes[i]->isSized() && "Unexpected unsized type");
- // If the type of the inline asm call site return value is different but
- // has same size as the type of the asm output bitcast it. One example
- // of this is for vectors with different width / number of elements.
- // This can happen for register classes that can contain multiple
- // different value types. The preg or vreg allocated may not have the
- // same VT as was expected.
- //
- // This can also happen for a return value that disagrees with the
- // register class it is put in, eg. a double in a general-purpose
- // register on a 32-bit machine.
- if (ResultVT != Val.getValueType() &&
- ResultVT.getSizeInBits() == Val.getValueSizeInBits())
- Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, Val);
- else if (ResultVT != Val.getValueType() && ResultVT.isInteger() &&
- Val.getValueType().isInteger()) {
- // If a result value was tied to an input value, the computed result
- // may have a wider width than the expected result. Extract the
- // relevant portion.
- Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, Val);
- }
+ // Do additional work to generate outputs.
- assert(ResultVT == Val.getValueType() && "Asm result value mismatch!");
- ResultVTs[i] = ResultVT;
- ResultValues[i] = Val;
- }
+ SmallVector<EVT, 1> ResultVTs;
+ SmallVector<SDValue, 1> ResultValues;
+ SmallVector<SDValue, 8> OutChains;
- Val = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
- DAG.getVTList(ResultVTs), ResultValues);
- setValue(CS.getInstruction(), Val);
- // Don't need to use this as a chain in this case.
- if (!IA->hasSideEffects() && !hasMemory && IndirectStoresToEmit.empty())
- return;
- }
+ llvm::Type *CSResultType = CS.getType();
+ ArrayRef<Type *> ResultTypes;
+ if (StructType *StructResult = dyn_cast<StructType>(CSResultType))
+ ResultTypes = StructResult->elements();
+ else if (!CSResultType->isVoidTy())
+ ResultTypes = makeArrayRef(CSResultType);
+
+ auto CurResultType = ResultTypes.begin();
+ auto handleRegAssign = [&](SDValue V) {
+ assert(CurResultType != ResultTypes.end() && "Unexpected value");
+ assert((*CurResultType)->isSized() && "Unexpected unsized type");
+ EVT ResultVT = TLI.getValueType(DAG.getDataLayout(), *CurResultType);
+ ++CurResultType;
+ // If the type of the inline asm call site return value is different but has
+ // same size as the type of the asm output bitcast it. One example of this
+ // is for vectors with different width / number of elements. This can
+ // happen for register classes that can contain multiple different value
+ // types. The preg or vreg allocated may not have the same VT as was
+ // expected.
+ //
+ // This can also happen for a return value that disagrees with the register
+ // class it is put in, eg. a double in a general-purpose register on a
+ // 32-bit machine.
+ if (ResultVT != V.getValueType() &&
+ ResultVT.getSizeInBits() == V.getValueSizeInBits())
+ V = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultVT, V);
+ else if (ResultVT != V.getValueType() && ResultVT.isInteger() &&
+ V.getValueType().isInteger()) {
+ // If a result value was tied to an input value, the computed result
+ // may have a wider width than the expected result. Extract the
+ // relevant portion.
+ V = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultVT, V);
+ }
+ assert(ResultVT == V.getValueType() && "Asm result value mismatch!");
+ ResultVTs.push_back(ResultVT);
+ ResultValues.push_back(V);
+ };
- std::vector<std::pair<SDValue, const Value *>> StoresToEmit;
+ // Deal with output operands.
+ for (SDISelAsmOperandInfo &OpInfo : ConstraintOperands) {
+ if (OpInfo.Type == InlineAsm::isOutput) {
+ SDValue Val;
+ // Skip trivial output operands.
+ if (OpInfo.AssignedRegs.Regs.empty())
+ continue;
- // Process indirect outputs, first output all of the flagged copies out of
- // physregs.
- for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) {
- RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
- const Value *Ptr = IndirectStoresToEmit[i].second;
- SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(),
- Chain, &Flag, IA);
- StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
+ switch (OpInfo.ConstraintType) {
+ case TargetLowering::C_Register:
+ case TargetLowering::C_RegisterClass:
+ Val = OpInfo.AssignedRegs.getCopyFromRegs(
+ DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
+ break;
+ case TargetLowering::C_Other:
+ Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
+ OpInfo, DAG);
+ break;
+ case TargetLowering::C_Memory:
+ break; // Already handled.
+ case TargetLowering::C_Unknown:
+ assert(false && "Unexpected unknown constraint");
+ }
+
+ // Indirect output manifest as stores. Record output chains.
+ if (OpInfo.isIndirect) {
+ const Value *Ptr = OpInfo.CallOperandVal;
+ assert(Ptr && "Expected value CallOperandVal for indirect asm operand");
+ SDValue Store = DAG.getStore(Chain, getCurSDLoc(), Val, getValue(Ptr),
+ MachinePointerInfo(Ptr));
+ OutChains.push_back(Store);
+ } else {
+ // generate CopyFromRegs to associated registers.
+ assert(!CS.getType()->isVoidTy() && "Bad inline asm!");
+ if (Val.getOpcode() == ISD::MERGE_VALUES) {
+ for (const SDValue &V : Val->op_values())
+ handleRegAssign(V);
+ } else
+ handleRegAssign(Val);
+ }
+ }
}
- // Emit the non-flagged stores from the physregs.
- SmallVector<SDValue, 8> OutChains;
- for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) {
- SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first,
- getValue(StoresToEmit[i].second),
- MachinePointerInfo(StoresToEmit[i].second));
- OutChains.push_back(Val);
+ // Set results.
+ if (!ResultValues.empty()) {
+ assert(CurResultType == ResultTypes.end() &&
+ "Mismatch in number of ResultTypes");
+ assert(ResultValues.size() == ResultTypes.size() &&
+ "Mismatch in number of output operands in asm result");
+
+ SDValue V = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
+ DAG.getVTList(ResultVTs), ResultValues);
+ setValue(CS.getInstruction(), V);
}
+ // Collect store chains.
if (!OutChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, OutChains);
- DAG.setRoot(Chain);
+ // Only Update Root if inline assembly has a memory effect.
+ if (ResultValues.empty() || HasSideEffect || !OutChains.empty() || IsCallBr)
+ DAG.setRoot(Chain);
}
void SelectionDAGBuilder::emitInlineAsmError(ImmutableCallSite CS,
@@ -7989,12 +8436,16 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const DataLayout &DL = DAG.getDataLayout();
- SDValue V = DAG.getVAArg(TLI.getValueType(DAG.getDataLayout(), I.getType()),
- getCurSDLoc(), getRoot(), getValue(I.getOperand(0)),
- DAG.getSrcValue(I.getOperand(0)),
- DL.getABITypeAlignment(I.getType()));
- setValue(&I, V);
+ SDValue V = DAG.getVAArg(
+ TLI.getMemValueType(DAG.getDataLayout(), I.getType()), getCurSDLoc(),
+ getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)),
+ DL.getABITypeAlignment(I.getType()));
DAG.setRoot(V.getValue(1));
+
+ if (I.getType()->isPointerTy())
+ V = DAG.getPtrExtOrTrunc(
+ V, getCurSDLoc(), TLI.getValueType(DAG.getDataLayout(), I.getType()));
+ setValue(&I, V);
}
void SelectionDAGBuilder::visitVAEnd(const CallInst &I) {
@@ -8021,7 +8472,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
return Op;
ConstantRange CR = getConstantRangeFromMetadata(*Range);
- if (CR.isFullSet() || CR.isEmptySet() || CR.isWrappedSet())
+ if (CR.isFullSet() || CR.isEmptySet() || CR.isUpperWrapped())
return Op;
APInt Lo = CR.getUnsignedMin();
@@ -8058,7 +8509,7 @@ SDValue SelectionDAGBuilder::lowerRangeToAssertZExt(SelectionDAG &DAG,
/// convention or require stack pointer adjustment. Only a subset of the
/// intrinsic's operands need to participate in the calling convention.
void SelectionDAGBuilder::populateCallLoweringInfo(
- TargetLowering::CallLoweringInfo &CLI, ImmutableCallSite CS,
+ TargetLowering::CallLoweringInfo &CLI, const CallBase *Call,
unsigned ArgIdx, unsigned NumArgs, SDValue Callee, Type *ReturnTy,
bool IsPatchPoint) {
TargetLowering::ArgListTy Args;
@@ -8068,21 +8519,21 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
// Attributes for args start at offset 1, after the return attribute.
for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs;
ArgI != ArgE; ++ArgI) {
- const Value *V = CS->getOperand(ArgI);
+ const Value *V = Call->getOperand(ArgI);
assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic.");
TargetLowering::ArgListEntry Entry;
Entry.Node = getValue(V);
Entry.Ty = V->getType();
- Entry.setAttributes(&CS, ArgI);
+ Entry.setAttributes(Call, ArgI);
Args.push_back(Entry);
}
CLI.setDebugLoc(getCurSDLoc())
.setChain(getRoot())
- .setCallee(CS.getCallingConv(), ReturnTy, Callee, std::move(Args))
- .setDiscardResult(CS->use_empty())
+ .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
+ .setDiscardResult(Call->use_empty())
.setIsPatchPoint(IsPatchPoint);
}
@@ -8093,7 +8544,7 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
/// avoid constant materialization and register allocation.
///
/// FrameIndex operands are converted to TargetFrameIndex so that ISEL does not
-/// generate addess computation nodes, and so ExpandISelPseudo can convert the
+/// generate addess computation nodes, and so FinalizeISel can convert the
/// TargetFrameIndex into a DirectMemRefOp StackMap location. This avoids
/// address materialization and register allocation, but may also be required
/// for correctness. If a StackMap (or PatchPoint) intrinsic directly uses an
@@ -8226,8 +8677,8 @@ void SelectionDAGBuilder::visitPatchpoint(ImmutableCallSite CS,
IsAnyRegCC ? Type::getVoidTy(*DAG.getContext()) : CS->getType();
TargetLowering::CallLoweringInfo CLI(DAG);
- populateCallLoweringInfo(CLI, CS, NumMetaOpers, NumCallArgs, Callee, ReturnTy,
- true);
+ populateCallLoweringInfo(CLI, cast<CallBase>(CS.getInstruction()),
+ NumMetaOpers, NumCallArgs, Callee, ReturnTy, true);
std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
SDNode *CallEnd = Result.second.getNode();
@@ -8351,15 +8802,17 @@ void SelectionDAGBuilder::visitVectorReduce(const CallInst &I,
FMF = I.getFastMathFlags();
switch (Intrinsic) {
- case Intrinsic::experimental_vector_reduce_fadd:
- if (FMF.isFast())
- Res = DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2);
+ case Intrinsic::experimental_vector_reduce_v2_fadd:
+ if (FMF.allowReassoc())
+ Res = DAG.getNode(ISD::FADD, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FADD, dl, VT, Op2));
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FADD, dl, VT, Op1, Op2);
break;
- case Intrinsic::experimental_vector_reduce_fmul:
- if (FMF.isFast())
- Res = DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2);
+ case Intrinsic::experimental_vector_reduce_v2_fmul:
+ if (FMF.allowReassoc())
+ Res = DAG.getNode(ISD::FMUL, dl, VT, Op1,
+ DAG.getNode(ISD::VECREDUCE_FMUL, dl, VT, Op2));
else
Res = DAG.getNode(ISD::VECREDUCE_STRICT_FMUL, dl, VT, Op1, Op2);
break;
@@ -8433,8 +8886,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (CLI.IsPostTypeLegalization) {
// If we are lowering a libcall after legalization, split the return type.
- SmallVector<EVT, 4> OldRetTys = std::move(RetTys);
- SmallVector<uint64_t, 4> OldOffsets = std::move(Offsets);
+ SmallVector<EVT, 4> OldRetTys;
+ SmallVector<uint64_t, 4> OldOffsets;
+ RetTys.swap(OldRetTys);
+ Offsets.swap(OldOffsets);
+
for (size_t i = 0, e = OldRetTys.size(); i != e; ++i) {
EVT RetVT = OldRetTys[i];
uint64_t Offset = OldOffsets[i];
@@ -8489,7 +8945,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// points into the callers stack frame.
CLI.IsTailCall = false;
} else {
+ bool NeedsRegBlock = functionArgumentNeedsConsecutiveRegisters(
+ CLI.RetTy, CLI.CallConv, CLI.IsVarArg);
for (unsigned I = 0, E = RetTys.size(); I != E; ++I) {
+ ISD::ArgFlagsTy Flags;
+ if (NeedsRegBlock) {
+ Flags.setInConsecutiveRegs();
+ if (I == RetTys.size() - 1)
+ Flags.setInConsecutiveRegsLast();
+ }
EVT VT = RetTys[I];
MVT RegisterVT = getRegisterTypeForCallingConv(CLI.RetTy->getContext(),
CLI.CallConv, VT);
@@ -8497,9 +8961,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
CLI.CallConv, VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
+ MyFlags.Flags = Flags;
MyFlags.VT = RegisterVT;
MyFlags.ArgVT = VT;
MyFlags.Used = CLI.IsReturnValueUsed;
+ if (CLI.RetTy->isPointerTy()) {
+ MyFlags.Flags.setPointer();
+ MyFlags.Flags.setPointerAddrSpace(
+ cast<PointerType>(CLI.RetTy)->getAddressSpace());
+ }
if (CLI.RetSExt)
MyFlags.Flags.setSExt();
if (CLI.RetZExt)
@@ -8550,6 +9020,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// specify the alignment it wants.
unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
+ if (Args[i].Ty->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(Args[i].Ty)->getAddressSpace());
+ }
if (Args[i].IsZExt)
Flags.setZExt();
if (Args[i].IsSExt)
@@ -8587,8 +9062,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (Args[i].IsByVal || Args[i].IsInAlloca) {
PointerType *Ty = cast<PointerType>(Args[i].Ty);
Type *ElementTy = Ty->getElementType();
- Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
- // For ByVal, alignment should come from FE. BE will guess if this
+
+ unsigned FrameSize = DL.getTypeAllocSize(
+ Args[i].ByValType ? Args[i].ByValType : ElementTy);
+ Flags.setByValSize(FrameSize);
+
// info is not there but there are cases it cannot get right.
unsigned FrameAlign;
if (Args[i].Alignment)
@@ -8619,8 +9097,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// for now.
if (Args[i].IsReturned && !Op.getValueType().isVector() &&
CanLowerReturn) {
- assert(CLI.RetTy == Args[i].Ty && RetTys.size() == NumValues &&
- "unexpected use of 'returned'");
+ assert((CLI.RetTy == Args[i].Ty ||
+ (CLI.RetTy->isPointerTy() && Args[i].Ty->isPointerTy() &&
+ CLI.RetTy->getPointerAddressSpace() ==
+ Args[i].Ty->getPointerAddressSpace())) &&
+ RetTys.size() == NumValues && "unexpected use of 'returned'");
// Before passing 'returned' to the target lowering code, ensure that
// either the register MVT and the actual EVT are the same size or that
// the return value and argument are extended in the same way; in these
@@ -9023,7 +9504,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
unsigned PartBase = 0;
Type *FinalType = Arg.getType();
if (Arg.hasAttribute(Attribute::ByVal))
- FinalType = cast<PointerType>(FinalType)->getElementType();
+ FinalType = Arg.getParamByValType();
bool NeedsRegBlock = TLI->functionArgumentNeedsConsecutiveRegisters(
FinalType, F.getCallingConv(), F.isVarArg());
for (unsigned Value = 0, NumValues = ValueVTs.size();
@@ -9038,6 +9519,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
unsigned OriginalAlignment =
TLI->getABIAlignmentForCallingConv(ArgTy, DL);
+ if (Arg.getType()->isPointerTy()) {
+ Flags.setPointer();
+ Flags.setPointerAddrSpace(
+ cast<PointerType>(Arg.getType())->getAddressSpace());
+ }
if (Arg.hasAttribute(Attribute::ZExt))
Flags.setZExt();
if (Arg.hasAttribute(Attribute::SExt))
@@ -9078,11 +9564,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setByVal();
}
if (Flags.isByVal() || Flags.isInAlloca()) {
- PointerType *Ty = cast<PointerType>(Arg.getType());
- Type *ElementTy = Ty->getElementType();
- Flags.setByValSize(DL.getTypeAllocSize(ElementTy));
- // For ByVal, alignment should be passed from FE. BE will guess if
- // this info is not there but there are cases it cannot get right.
+ Type *ElementTy = Arg.getParamByValType();
+
+ // For ByVal, size and alignment should be passed from FE. BE will
+ // guess if this info is not there but there are cases it cannot get
+ // right.
+ unsigned FrameSize = DL.getTypeAllocSize(Arg.getParamByValType());
+ Flags.setByValSize(FrameSize);
+
unsigned FrameAlign;
if (Arg.getParamAlignment())
FrameAlign = Arg.getParamAlignment();
@@ -9263,17 +9752,16 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg))
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB,
- FuncInfo->SwiftErrorArg, Reg);
+ SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
+ Reg);
}
// If this argument is live outside of the entry block, insert a copy from
// wherever we got it to the vreg that other BB's will reference it as.
- if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::CopyFromReg) {
+ if (Res.getOpcode() == ISD::CopyFromReg) {
// If we can, though, try to skip creating an unnecessary vreg.
// FIXME: This isn't very clean... it would be nice to make this more
- // general. It's also subtly incompatible with the hacks FastISel
- // uses with vregs.
+ // general.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
FuncInfo->ValueMap[&Arg] = Reg;
@@ -9354,7 +9842,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
if (const Constant *C = dyn_cast<Constant>(PHIOp)) {
unsigned &RegOut = ConstantsOut[C];
if (RegOut == 0) {
- RegOut = FuncInfo.CreateRegs(C->getType());
+ RegOut = FuncInfo.CreateRegs(C);
CopyValueToVirtualRegister(C, RegOut);
}
Reg = RegOut;
@@ -9367,7 +9855,7 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) {
assert(isa<AllocaInst>(PHIOp) &&
FuncInfo.StaticAllocaMap.count(cast<AllocaInst>(PHIOp)) &&
"Didn't codegen value into a register!??");
- Reg = FuncInfo.CreateRegs(PHIOp->getType());
+ Reg = FuncInfo.CreateRegs(PHIOp);
CopyValueToVirtualRegister(PHIOp, Reg);
}
}
@@ -9432,450 +9920,6 @@ void SelectionDAGBuilder::updateDAGForMaybeTailCall(SDValue MaybeTC) {
HasTailCall = true;
}
-uint64_t
-SelectionDAGBuilder::getJumpTableRange(const CaseClusterVector &Clusters,
- unsigned First, unsigned Last) const {
- assert(Last >= First);
- const APInt &LowCase = Clusters[First].Low->getValue();
- const APInt &HighCase = Clusters[Last].High->getValue();
- assert(LowCase.getBitWidth() == HighCase.getBitWidth());
-
- // FIXME: A range of consecutive cases has 100% density, but only requires one
- // comparison to lower. We should discriminate against such consecutive ranges
- // in jump tables.
-
- return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
-}
-
-uint64_t SelectionDAGBuilder::getJumpTableNumCases(
- const SmallVectorImpl<unsigned> &TotalCases, unsigned First,
- unsigned Last) const {
- assert(Last >= First);
- assert(TotalCases[Last] >= TotalCases[First]);
- uint64_t NumCases =
- TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
- return NumCases;
-}
-
-bool SelectionDAGBuilder::buildJumpTable(const CaseClusterVector &Clusters,
- unsigned First, unsigned Last,
- const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB,
- CaseCluster &JTCluster) {
- assert(First <= Last);
-
- auto Prob = BranchProbability::getZero();
- unsigned NumCmps = 0;
- std::vector<MachineBasicBlock*> Table;
- DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
-
- // Initialize probabilities in JTProbs.
- for (unsigned I = First; I <= Last; ++I)
- JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
-
- for (unsigned I = First; I <= Last; ++I) {
- assert(Clusters[I].Kind == CC_Range);
- Prob += Clusters[I].Prob;
- const APInt &Low = Clusters[I].Low->getValue();
- const APInt &High = Clusters[I].High->getValue();
- NumCmps += (Low == High) ? 1 : 2;
- if (I != First) {
- // Fill the gap between this and the previous cluster.
- const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
- assert(PreviousHigh.slt(Low));
- uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
- for (uint64_t J = 0; J < Gap; J++)
- Table.push_back(DefaultMBB);
- }
- uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
- for (uint64_t J = 0; J < ClusterSize; ++J)
- Table.push_back(Clusters[I].MBB);
- JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
- }
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- unsigned NumDests = JTProbs.size();
- if (TLI.isSuitableForBitTests(
- NumDests, NumCmps, Clusters[First].Low->getValue(),
- Clusters[Last].High->getValue(), DAG.getDataLayout())) {
- // Clusters[First..Last] should be lowered as bit tests instead.
- return false;
- }
-
- // Create the MBB that will load from and jump through the table.
- // Note: We create it here, but it's not inserted into the function yet.
- MachineFunction *CurMF = FuncInfo.MF;
- MachineBasicBlock *JumpTableMBB =
- CurMF->CreateMachineBasicBlock(SI->getParent());
-
- // Add successors. Note: use table order for determinism.
- SmallPtrSet<MachineBasicBlock *, 8> Done;
- for (MachineBasicBlock *Succ : Table) {
- if (Done.count(Succ))
- continue;
- addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
- Done.insert(Succ);
- }
- JumpTableMBB->normalizeSuccProbs();
-
- unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI.getJumpTableEncoding())
- ->createJumpTableIndex(Table);
-
- // Set up the jump table info.
- JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
- JumpTableHeader JTH(Clusters[First].Low->getValue(),
- Clusters[Last].High->getValue(), SI->getCondition(),
- nullptr, false);
- JTCases.emplace_back(std::move(JTH), std::move(JT));
-
- JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
- JTCases.size() - 1, Prob);
- return true;
-}
-
-void SelectionDAGBuilder::findJumpTables(CaseClusterVector &Clusters,
- const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB) {
-#ifndef NDEBUG
- // Clusters must be non-empty, sorted, and only contain Range clusters.
- assert(!Clusters.empty());
- for (CaseCluster &C : Clusters)
- assert(C.Kind == CC_Range);
- for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
- assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
-#endif
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (!TLI.areJTsAllowed(SI->getParent()->getParent()))
- return;
-
- const int64_t N = Clusters.size();
- const unsigned MinJumpTableEntries = TLI.getMinimumJumpTableEntries();
- const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
-
- if (N < 2 || N < MinJumpTableEntries)
- return;
-
- // TotalCases[i]: Total nbr of cases in Clusters[0..i].
- SmallVector<unsigned, 8> TotalCases(N);
- for (unsigned i = 0; i < N; ++i) {
- const APInt &Hi = Clusters[i].High->getValue();
- const APInt &Lo = Clusters[i].Low->getValue();
- TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
- if (i != 0)
- TotalCases[i] += TotalCases[i - 1];
- }
-
- // Cheap case: the whole range may be suitable for jump table.
- uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
- uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
- assert(NumCases < UINT64_MAX / 100);
- assert(Range >= NumCases);
- if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
- CaseCluster JTCluster;
- if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
- Clusters[0] = JTCluster;
- Clusters.resize(1);
- return;
- }
- }
-
- // The algorithm below is not suitable for -O0.
- if (TM.getOptLevel() == CodeGenOpt::None)
- return;
-
- // Split Clusters into minimum number of dense partitions. The algorithm uses
- // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
- // for the Case Statement'" (1994), but builds the MinPartitions array in
- // reverse order to make it easier to reconstruct the partitions in ascending
- // order. In the choice between two optimal partitionings, it picks the one
- // which yields more jump tables.
-
- // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
- SmallVector<unsigned, 8> MinPartitions(N);
- // LastElement[i] is the last element of the partition starting at i.
- SmallVector<unsigned, 8> LastElement(N);
- // PartitionsScore[i] is used to break ties when choosing between two
- // partitionings resulting in the same number of partitions.
- SmallVector<unsigned, 8> PartitionsScore(N);
- // For PartitionsScore, a small number of comparisons is considered as good as
- // a jump table and a single comparison is considered better than a jump
- // table.
- enum PartitionScores : unsigned {
- NoTable = 0,
- Table = 1,
- FewCases = 1,
- SingleCase = 2
- };
-
- // Base case: There is only one way to partition Clusters[N-1].
- MinPartitions[N - 1] = 1;
- LastElement[N - 1] = N - 1;
- PartitionsScore[N - 1] = PartitionScores::SingleCase;
-
- // Note: loop indexes are signed to avoid underflow.
- for (int64_t i = N - 2; i >= 0; i--) {
- // Find optimal partitioning of Clusters[i..N-1].
- // Baseline: Put Clusters[i] into a partition on its own.
- MinPartitions[i] = MinPartitions[i + 1] + 1;
- LastElement[i] = i;
- PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
-
- // Search for a solution that results in fewer partitions.
- for (int64_t j = N - 1; j > i; j--) {
- // Try building a partition from Clusters[i..j].
- uint64_t Range = getJumpTableRange(Clusters, i, j);
- uint64_t NumCases = getJumpTableNumCases(TotalCases, i, j);
- assert(NumCases < UINT64_MAX / 100);
- assert(Range >= NumCases);
- if (TLI.isSuitableForJumpTable(SI, NumCases, Range)) {
- unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
- unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
- int64_t NumEntries = j - i + 1;
-
- if (NumEntries == 1)
- Score += PartitionScores::SingleCase;
- else if (NumEntries <= SmallNumberOfEntries)
- Score += PartitionScores::FewCases;
- else if (NumEntries >= MinJumpTableEntries)
- Score += PartitionScores::Table;
-
- // If this leads to fewer partitions, or to the same number of
- // partitions with better score, it is a better partitioning.
- if (NumPartitions < MinPartitions[i] ||
- (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
- MinPartitions[i] = NumPartitions;
- LastElement[i] = j;
- PartitionsScore[i] = Score;
- }
- }
- }
- }
-
- // Iterate over the partitions, replacing some with jump tables in-place.
- unsigned DstIndex = 0;
- for (unsigned First = 0, Last; First < N; First = Last + 1) {
- Last = LastElement[First];
- assert(Last >= First);
- assert(DstIndex <= First);
- unsigned NumClusters = Last - First + 1;
-
- CaseCluster JTCluster;
- if (NumClusters >= MinJumpTableEntries &&
- buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
- Clusters[DstIndex++] = JTCluster;
- } else {
- for (unsigned I = First; I <= Last; ++I)
- std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
- }
- }
- Clusters.resize(DstIndex);
-}
-
-bool SelectionDAGBuilder::buildBitTests(CaseClusterVector &Clusters,
- unsigned First, unsigned Last,
- const SwitchInst *SI,
- CaseCluster &BTCluster) {
- assert(First <= Last);
- if (First == Last)
- return false;
-
- BitVector Dests(FuncInfo.MF->getNumBlockIDs());
- unsigned NumCmps = 0;
- for (int64_t I = First; I <= Last; ++I) {
- assert(Clusters[I].Kind == CC_Range);
- Dests.set(Clusters[I].MBB->getNumber());
- NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
- }
- unsigned NumDests = Dests.count();
-
- APInt Low = Clusters[First].Low->getValue();
- APInt High = Clusters[Last].High->getValue();
- assert(Low.slt(High));
-
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.isSuitableForBitTests(NumDests, NumCmps, Low, High, DL))
- return false;
-
- APInt LowBound;
- APInt CmpRange;
-
- const int BitWidth = TLI.getPointerTy(DL).getSizeInBits();
- assert(TLI.rangeFitsInWord(Low, High, DL) &&
- "Case range must fit in bit mask!");
-
- // Check if the clusters cover a contiguous range such that no value in the
- // range will jump to the default statement.
- bool ContiguousRange = true;
- for (int64_t I = First + 1; I <= Last; ++I) {
- if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
- ContiguousRange = false;
- break;
- }
- }
-
- if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
- // Optimize the case where all the case values fit in a word without having
- // to subtract minValue. In this case, we can optimize away the subtraction.
- LowBound = APInt::getNullValue(Low.getBitWidth());
- CmpRange = High;
- ContiguousRange = false;
- } else {
- LowBound = Low;
- CmpRange = High - Low;
- }
-
- CaseBitsVector CBV;
- auto TotalProb = BranchProbability::getZero();
- for (unsigned i = First; i <= Last; ++i) {
- // Find the CaseBits for this destination.
- unsigned j;
- for (j = 0; j < CBV.size(); ++j)
- if (CBV[j].BB == Clusters[i].MBB)
- break;
- if (j == CBV.size())
- CBV.push_back(
- CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
- CaseBits *CB = &CBV[j];
-
- // Update Mask, Bits and ExtraProb.
- uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
- uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
- assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
- CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
- CB->Bits += Hi - Lo + 1;
- CB->ExtraProb += Clusters[i].Prob;
- TotalProb += Clusters[i].Prob;
- }
-
- BitTestInfo BTI;
- llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
- // Sort by probability first, number of bits second, bit mask third.
- if (a.ExtraProb != b.ExtraProb)
- return a.ExtraProb > b.ExtraProb;
- if (a.Bits != b.Bits)
- return a.Bits > b.Bits;
- return a.Mask < b.Mask;
- });
-
- for (auto &CB : CBV) {
- MachineBasicBlock *BitTestBB =
- FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
- BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
- }
- BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
- SI->getCondition(), -1U, MVT::Other, false,
- ContiguousRange, nullptr, nullptr, std::move(BTI),
- TotalProb);
-
- BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
- BitTestCases.size() - 1, TotalProb);
- return true;
-}
-
-void SelectionDAGBuilder::findBitTestClusters(CaseClusterVector &Clusters,
- const SwitchInst *SI) {
-// Partition Clusters into as few subsets as possible, where each subset has a
-// range that fits in a machine word and has <= 3 unique destinations.
-
-#ifndef NDEBUG
- // Clusters must be sorted and contain Range or JumpTable clusters.
- assert(!Clusters.empty());
- assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
- for (const CaseCluster &C : Clusters)
- assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
- for (unsigned i = 1; i < Clusters.size(); ++i)
- assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
-#endif
-
- // The algorithm below is not suitable for -O0.
- if (TM.getOptLevel() == CodeGenOpt::None)
- return;
-
- // If target does not have legal shift left, do not emit bit tests at all.
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- const DataLayout &DL = DAG.getDataLayout();
-
- EVT PTy = TLI.getPointerTy(DL);
- if (!TLI.isOperationLegal(ISD::SHL, PTy))
- return;
-
- int BitWidth = PTy.getSizeInBits();
- const int64_t N = Clusters.size();
-
- // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
- SmallVector<unsigned, 8> MinPartitions(N);
- // LastElement[i] is the last element of the partition starting at i.
- SmallVector<unsigned, 8> LastElement(N);
-
- // FIXME: This might not be the best algorithm for finding bit test clusters.
-
- // Base case: There is only one way to partition Clusters[N-1].
- MinPartitions[N - 1] = 1;
- LastElement[N - 1] = N - 1;
-
- // Note: loop indexes are signed to avoid underflow.
- for (int64_t i = N - 2; i >= 0; --i) {
- // Find optimal partitioning of Clusters[i..N-1].
- // Baseline: Put Clusters[i] into a partition on its own.
- MinPartitions[i] = MinPartitions[i + 1] + 1;
- LastElement[i] = i;
-
- // Search for a solution that results in fewer partitions.
- // Note: the search is limited by BitWidth, reducing time complexity.
- for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
- // Try building a partition from Clusters[i..j].
-
- // Check the range.
- if (!TLI.rangeFitsInWord(Clusters[i].Low->getValue(),
- Clusters[j].High->getValue(), DL))
- continue;
-
- // Check nbr of destinations and cluster types.
- // FIXME: This works, but doesn't seem very efficient.
- bool RangesOnly = true;
- BitVector Dests(FuncInfo.MF->getNumBlockIDs());
- for (int64_t k = i; k <= j; k++) {
- if (Clusters[k].Kind != CC_Range) {
- RangesOnly = false;
- break;
- }
- Dests.set(Clusters[k].MBB->getNumber());
- }
- if (!RangesOnly || Dests.count() > 3)
- break;
-
- // Check if it's a better partition.
- unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
- if (NumPartitions < MinPartitions[i]) {
- // Found a better partition.
- MinPartitions[i] = NumPartitions;
- LastElement[i] = j;
- }
- }
- }
-
- // Iterate over the partitions, replacing with bit-test clusters in-place.
- unsigned DstIndex = 0;
- for (unsigned First = 0, Last; First < N; First = Last + 1) {
- Last = LastElement[First];
- assert(First <= Last);
- assert(DstIndex <= First);
-
- CaseCluster BitTestCluster;
- if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
- Clusters[DstIndex++] = BitTestCluster;
- } else {
- size_t NumClusters = Last - First + 1;
- std::memmove(&Clusters[DstIndex], &Clusters[First],
- sizeof(Clusters[0]) * NumClusters);
- DstIndex += NumClusters;
- }
- }
- Clusters.resize(DstIndex);
-}
-
void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB) {
@@ -9977,10 +10021,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *CurMBB = W.MBB;
for (CaseClusterIt I = W.FirstCluster, E = W.LastCluster; I <= E; ++I) {
+ bool FallthroughUnreachable = false;
MachineBasicBlock *Fallthrough;
if (I == W.LastCluster) {
// For the last cluster, fall through to the default destination.
Fallthrough = DefaultMBB;
+ FallthroughUnreachable = isa<UnreachableInst>(
+ DefaultMBB->getBasicBlock()->getFirstNonPHIOrDbg());
} else {
Fallthrough = CurMF->CreateMachineBasicBlock(CurMBB->getBasicBlock());
CurMF->insert(BBI, Fallthrough);
@@ -9992,8 +10039,8 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
switch (I->Kind) {
case CC_JumpTable: {
// FIXME: Optimize away range check based on pivot comparisons.
- JumpTableHeader *JTH = &JTCases[I->JTCasesIndex].first;
- JumpTable *JT = &JTCases[I->JTCasesIndex].second;
+ JumpTableHeader *JTH = &SL->JTCases[I->JTCasesIndex].first;
+ SwitchCG::JumpTable *JT = &SL->JTCases[I->JTCasesIndex].second;
// The jump block hasn't been inserted yet; insert it here.
MachineBasicBlock *JumpMBB = JT->MBB;
@@ -10017,7 +10064,13 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
}
}
- addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ JTH->OmitRangeCheck = true;
+ }
+
+ if (!JTH->OmitRangeCheck)
+ addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb);
addSuccessorWithProb(CurMBB, JumpMBB, JumpProb);
CurMBB->normalizeSuccProbs();
@@ -10034,8 +10087,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
break;
}
case CC_BitTests: {
+ // FIXME: If Fallthrough is unreachable, skip the range check.
+
// FIXME: Optimize away range check based on pivot comparisons.
- BitTestBlock *BTB = &BitTestCases[I->BTCasesIndex];
+ BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
// The bit test blocks haven't been inserted yet; insert them here.
for (BitTestCase &BTC : BTB->Cases)
@@ -10078,6 +10133,10 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
RHS = I->High;
}
+ // If Fallthrough is unreachable, fold away the comparison.
+ if (FallthroughUnreachable)
+ CC = ISD::SETTRUE;
+
// The false probability is the sum of all unhandled cases.
CaseBlock CB(CC, LHS, RHS, MHS, I->MBB, Fallthrough, CurMBB,
getCurSDLoc(), I->Prob, UnhandledProbs);
@@ -10085,7 +10144,7 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
if (CurMBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
else
- SwitchCases.push_back(CB);
+ SL->SwitchCases.push_back(CB);
break;
}
@@ -10236,7 +10295,7 @@ void SelectionDAGBuilder::splitWorkItem(SwitchWorkList &WorkList,
if (W.MBB == SwitchMBB)
visitSwitchCase(CB, SwitchMBB);
else
- SwitchCases.push_back(CB);
+ SL->SwitchCases.push_back(CB);
}
// Scale CaseProb after peeling a case with the probablity of PeeledCaseProb
@@ -10265,7 +10324,7 @@ MachineBasicBlock *SelectionDAGBuilder::peelDominantCaseCluster(
// Don't perform if there is only one cluster or optimizing for size.
if (SwitchPeelThreshold > 100 || !FuncInfo.BPI || Clusters.size() < 2 ||
TM.getOptLevel() == CodeGenOpt::None ||
- SwitchMBB->getParent()->getFunction().optForMinSize())
+ SwitchMBB->getParent()->getFunction().hasMinSize())
return SwitchMBB;
BranchProbability TopCaseProb = BranchProbability(SwitchPeelThreshold, 100);
@@ -10331,38 +10390,6 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
// if there are many clusters.
sortAndRangeify(Clusters);
- if (TM.getOptLevel() != CodeGenOpt::None) {
- // Replace an unreachable default with the most popular destination.
- // FIXME: Exploit unreachable default more aggressively.
- bool UnreachableDefault =
- isa<UnreachableInst>(SI.getDefaultDest()->getFirstNonPHIOrDbg());
- if (UnreachableDefault && !Clusters.empty()) {
- DenseMap<const BasicBlock *, unsigned> Popularity;
- unsigned MaxPop = 0;
- const BasicBlock *MaxBB = nullptr;
- for (auto I : SI.cases()) {
- const BasicBlock *BB = I.getCaseSuccessor();
- if (++Popularity[BB] > MaxPop) {
- MaxPop = Popularity[BB];
- MaxBB = BB;
- }
- }
- // Set new default.
- assert(MaxPop > 0 && MaxBB);
- DefaultMBB = FuncInfo.MBBMap[MaxBB];
-
- // Remove cases that were pointing to the destination that is now the
- // default.
- CaseClusterVector New;
- New.reserve(Clusters.size());
- for (CaseCluster &CC : Clusters) {
- if (CC.MBB != DefaultMBB)
- New.push_back(CC);
- }
- Clusters = std::move(New);
- }
- }
-
// The branch probablity of the peeled case.
BranchProbability PeeledCaseProb = BranchProbability::getZero();
MachineBasicBlock *PeeledSwitchMBB =
@@ -10380,8 +10407,8 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
return;
}
- findJumpTables(Clusters, &SI, DefaultMBB);
- findBitTestClusters(Clusters, &SI);
+ SL->findJumpTables(Clusters, &SI, DefaultMBB);
+ SL->findBitTestClusters(Clusters, &SI);
LLVM_DEBUG({
dbgs() << "Case clusters: ";
@@ -10420,7 +10447,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
unsigned NumClusters = W.LastCluster - W.FirstCluster + 1;
if (NumClusters > 3 && TM.getOptLevel() != CodeGenOpt::None &&
- !DefaultMBB->getParent()->getFunction().optForMinSize()) {
+ !DefaultMBB->getParent()->getFunction().hasMinSize()) {
// For optimized builds, lower large range as a balanced binary tree.
splitWorkItem(WorkList, W, SI.getCondition(), SwitchMBB);
continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5f9cdb69daf7..0072e33f23b7 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -1,9 +1,8 @@
//===- SelectionDAGBuilder.h - Selection-DAG building -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,11 +17,13 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/MapVector.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/IR/CallSite.h"
@@ -47,6 +48,7 @@ class AtomicRMWInst;
class BasicBlock;
class BranchInst;
class CallInst;
+class CallBrInst;
class CatchPadInst;
class CatchReturnInst;
class CatchSwitchInst;
@@ -76,6 +78,7 @@ class ResumeInst;
class ReturnInst;
class SDDbgValue;
class StoreInst;
+class SwiftErrorValueTracking;
class SwitchInst;
class TargetLibraryInfo;
class TargetMachine;
@@ -91,16 +94,16 @@ class Value;
/// implementation that is parameterized by a TargetLowering object.
///
class SelectionDAGBuilder {
- /// CurInst - The current instruction being visited
+ /// The current instruction being visited.
const Instruction *CurInst = nullptr;
DenseMap<const Value*, SDValue> NodeMap;
- /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used
+ /// Maps argument value for unused arguments. This is used
/// to preserve debug information for incoming arguments.
DenseMap<const Value*, SDValue> UnusedArgNodeMap;
- /// DanglingDebugInfo - Helper type for DanglingDebugInfoMap.
+ /// Helper type for DanglingDebugInfoMap.
class DanglingDebugInfo {
const DbgValueInst* DI = nullptr;
DebugLoc dl;
@@ -116,18 +119,17 @@ class SelectionDAGBuilder {
unsigned getSDNodeOrder() { return SDNodeOrder; }
};
- /// DanglingDebugInfoVector - Helper type for DanglingDebugInfoMap.
+ /// Helper type for DanglingDebugInfoMap.
typedef std::vector<DanglingDebugInfo> DanglingDebugInfoVector;
- /// DanglingDebugInfoMap - Keeps track of dbg_values for which we have not
- /// yet seen the referent. We defer handling these until we do see it.
- DenseMap<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
+ /// Keeps track of dbg_values for which we have not yet seen the referent.
+ /// We defer handling these until we do see it.
+ MapVector<const Value*, DanglingDebugInfoVector> DanglingDebugInfoMap;
public:
- /// PendingLoads - Loads are not emitted to the program immediately. We bunch
- /// them up and then emit token factor nodes when possible. This allows us to
- /// get simple disambiguation between loads without worrying about alias
- /// analysis.
+ /// Loads are not emitted to the program immediately. We bunch them up and
+ /// then emit token factor nodes when possible. This allows us to get simple
+ /// disambiguation between loads without worrying about alias analysis.
SmallVector<SDValue, 8> PendingLoads;
/// State used while lowering a statepoint sequence (gc_statepoint,
@@ -135,247 +137,37 @@ public:
StatepointLoweringState StatepointLowering;
private:
- /// PendingExports - CopyToReg nodes that copy values to virtual registers
- /// for export to other blocks need to be emitted before any terminator
- /// instruction, but they have no other ordering requirements. We bunch them
- /// up and the emit a single tokenfactor for them just before terminator
- /// instructions.
+ /// CopyToReg nodes that copy values to virtual registers for export to other
+ /// blocks need to be emitted before any terminator instruction, but they have
+ /// no other ordering requirements. We bunch them up and the emit a single
+ /// tokenfactor for them just before terminator instructions.
SmallVector<SDValue, 8> PendingExports;
- /// SDNodeOrder - A unique monotonically increasing number used to order the
- /// SDNodes we create.
+ /// A unique monotonically increasing number used to order the SDNodes we
+ /// create.
unsigned SDNodeOrder;
- enum CaseClusterKind {
- /// A cluster of adjacent case labels with the same destination, or just one
- /// case.
- CC_Range,
- /// A cluster of cases suitable for jump table lowering.
- CC_JumpTable,
- /// A cluster of cases suitable for bit test lowering.
- CC_BitTests
- };
-
- /// A cluster of case labels.
- struct CaseCluster {
- CaseClusterKind Kind;
- const ConstantInt *Low, *High;
- union {
- MachineBasicBlock *MBB;
- unsigned JTCasesIndex;
- unsigned BTCasesIndex;
- };
- BranchProbability Prob;
-
- static CaseCluster range(const ConstantInt *Low, const ConstantInt *High,
- MachineBasicBlock *MBB, BranchProbability Prob) {
- CaseCluster C;
- C.Kind = CC_Range;
- C.Low = Low;
- C.High = High;
- C.MBB = MBB;
- C.Prob = Prob;
- return C;
- }
-
- static CaseCluster jumpTable(const ConstantInt *Low,
- const ConstantInt *High, unsigned JTCasesIndex,
- BranchProbability Prob) {
- CaseCluster C;
- C.Kind = CC_JumpTable;
- C.Low = Low;
- C.High = High;
- C.JTCasesIndex = JTCasesIndex;
- C.Prob = Prob;
- return C;
- }
-
- static CaseCluster bitTests(const ConstantInt *Low, const ConstantInt *High,
- unsigned BTCasesIndex, BranchProbability Prob) {
- CaseCluster C;
- C.Kind = CC_BitTests;
- C.Low = Low;
- C.High = High;
- C.BTCasesIndex = BTCasesIndex;
- C.Prob = Prob;
- return C;
- }
- };
-
- using CaseClusterVector = std::vector<CaseCluster>;
- using CaseClusterIt = CaseClusterVector::iterator;
-
- struct CaseBits {
- uint64_t Mask = 0;
- MachineBasicBlock* BB = nullptr;
- unsigned Bits = 0;
- BranchProbability ExtraProb;
-
- CaseBits() = default;
- CaseBits(uint64_t mask, MachineBasicBlock* bb, unsigned bits,
- BranchProbability Prob):
- Mask(mask), BB(bb), Bits(bits), ExtraProb(Prob) {}
- };
-
- using CaseBitsVector = std::vector<CaseBits>;
-
- /// Sort Clusters and merge adjacent cases.
- void sortAndRangeify(CaseClusterVector &Clusters);
-
- /// CaseBlock - This structure is used to communicate between
- /// SelectionDAGBuilder and SDISel for the code generation of additional basic
- /// blocks needed by multi-case switch statements.
- struct CaseBlock {
- // CC - the condition code to use for the case block's setcc node
- ISD::CondCode CC;
-
- // CmpLHS/CmpRHS/CmpMHS - The LHS/MHS/RHS of the comparison to emit.
- // Emit by default LHS op RHS. MHS is used for range comparisons:
- // If MHS is not null: (LHS <= MHS) and (MHS <= RHS).
- const Value *CmpLHS, *CmpMHS, *CmpRHS;
-
- // TrueBB/FalseBB - the block to branch to if the setcc is true/false.
- MachineBasicBlock *TrueBB, *FalseBB;
-
- // ThisBB - the block into which to emit the code for the setcc and branches
- MachineBasicBlock *ThisBB;
-
- /// The debug location of the instruction this CaseBlock was
- /// produced from.
- SDLoc DL;
-
- // TrueProb/FalseProb - branch weights.
- BranchProbability TrueProb, FalseProb;
-
- CaseBlock(ISD::CondCode cc, const Value *cmplhs, const Value *cmprhs,
- const Value *cmpmiddle, MachineBasicBlock *truebb,
- MachineBasicBlock *falsebb, MachineBasicBlock *me,
- SDLoc dl,
- BranchProbability trueprob = BranchProbability::getUnknown(),
- BranchProbability falseprob = BranchProbability::getUnknown())
- : CC(cc), CmpLHS(cmplhs), CmpMHS(cmpmiddle), CmpRHS(cmprhs),
- TrueBB(truebb), FalseBB(falsebb), ThisBB(me), DL(dl),
- TrueProb(trueprob), FalseProb(falseprob) {}
- };
-
- struct JumpTable {
- /// Reg - the virtual register containing the index of the jump table entry
- //. to jump to.
- unsigned Reg;
- /// JTI - the JumpTableIndex for this jump table in the function.
- unsigned JTI;
- /// MBB - the MBB into which to emit the code for the indirect jump.
- MachineBasicBlock *MBB;
- /// Default - the MBB of the default bb, which is a successor of the range
- /// check MBB. This is when updating PHI nodes in successors.
- MachineBasicBlock *Default;
-
- JumpTable(unsigned R, unsigned J, MachineBasicBlock *M,
- MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {}
- };
- struct JumpTableHeader {
- APInt First;
- APInt Last;
- const Value *SValue;
- MachineBasicBlock *HeaderBB;
- bool Emitted;
-
- JumpTableHeader(APInt F, APInt L, const Value *SV, MachineBasicBlock *H,
- bool E = false)
- : First(std::move(F)), Last(std::move(L)), SValue(SV), HeaderBB(H),
- Emitted(E) {}
- };
- using JumpTableBlock = std::pair<JumpTableHeader, JumpTable>;
-
- struct BitTestCase {
- uint64_t Mask;
- MachineBasicBlock *ThisBB;
- MachineBasicBlock *TargetBB;
- BranchProbability ExtraProb;
-
- BitTestCase(uint64_t M, MachineBasicBlock* T, MachineBasicBlock* Tr,
- BranchProbability Prob):
- Mask(M), ThisBB(T), TargetBB(Tr), ExtraProb(Prob) {}
- };
-
- using BitTestInfo = SmallVector<BitTestCase, 3>;
-
- struct BitTestBlock {
- APInt First;
- APInt Range;
- const Value *SValue;
- unsigned Reg;
- MVT RegVT;
- bool Emitted;
- bool ContiguousRange;
- MachineBasicBlock *Parent;
- MachineBasicBlock *Default;
- BitTestInfo Cases;
- BranchProbability Prob;
- BranchProbability DefaultProb;
-
- BitTestBlock(APInt F, APInt R, const Value *SV, unsigned Rg, MVT RgVT,
- bool E, bool CR, MachineBasicBlock *P, MachineBasicBlock *D,
- BitTestInfo C, BranchProbability Pr)
- : First(std::move(F)), Range(std::move(R)), SValue(SV), Reg(Rg),
- RegVT(RgVT), Emitted(E), ContiguousRange(CR), Parent(P), Default(D),
- Cases(std::move(C)), Prob(Pr) {}
- };
-
- /// Return the range of value in [First..Last].
- uint64_t getJumpTableRange(const CaseClusterVector &Clusters, unsigned First,
- unsigned Last) const;
-
- /// Return the number of cases in [First..Last].
- uint64_t getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
- unsigned First, unsigned Last) const;
-
- /// Build a jump table cluster from Clusters[First..Last]. Returns false if it
- /// decides it's not a good idea.
- bool buildJumpTable(const CaseClusterVector &Clusters, unsigned First,
- unsigned Last, const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB, CaseCluster &JTCluster);
-
- /// Find clusters of cases suitable for jump table lowering.
- void findJumpTables(CaseClusterVector &Clusters, const SwitchInst *SI,
- MachineBasicBlock *DefaultMBB);
-
- /// Build a bit test cluster from Clusters[First..Last]. Returns false if it
- /// decides it's not a good idea.
- bool buildBitTests(CaseClusterVector &Clusters, unsigned First, unsigned Last,
- const SwitchInst *SI, CaseCluster &BTCluster);
-
- /// Find clusters of cases suitable for bit test lowering.
- void findBitTestClusters(CaseClusterVector &Clusters, const SwitchInst *SI);
-
- struct SwitchWorkListItem {
- MachineBasicBlock *MBB;
- CaseClusterIt FirstCluster;
- CaseClusterIt LastCluster;
- const ConstantInt *GE;
- const ConstantInt *LT;
- BranchProbability DefaultProb;
- };
- using SwitchWorkList = SmallVector<SwitchWorkListItem, 4>;
-
/// Determine the rank by weight of CC in [First,Last]. If CC has more weight
/// than each cluster in the range, its rank is 0.
- static unsigned caseClusterRank(const CaseCluster &CC, CaseClusterIt First,
- CaseClusterIt Last);
+ unsigned caseClusterRank(const SwitchCG::CaseCluster &CC,
+ SwitchCG::CaseClusterIt First,
+ SwitchCG::CaseClusterIt Last);
/// Emit comparison and split W into two subtrees.
- void splitWorkItem(SwitchWorkList &WorkList, const SwitchWorkListItem &W,
- Value *Cond, MachineBasicBlock *SwitchMBB);
+ void splitWorkItem(SwitchCG::SwitchWorkList &WorkList,
+ const SwitchCG::SwitchWorkListItem &W, Value *Cond,
+ MachineBasicBlock *SwitchMBB);
/// Lower W.
- void lowerWorkItem(SwitchWorkListItem W, Value *Cond,
+ void lowerWorkItem(SwitchCG::SwitchWorkListItem W, Value *Cond,
MachineBasicBlock *SwitchMBB,
MachineBasicBlock *DefaultMBB);
/// Peel the top probability case if it exceeds the threshold
- MachineBasicBlock *peelDominantCaseCluster(const SwitchInst &SI,
- CaseClusterVector &Clusters,
- BranchProbability &PeeledCaseProb);
+ MachineBasicBlock *
+ peelDominantCaseCluster(const SwitchInst &SI,
+ SwitchCG::CaseClusterVector &Clusters,
+ BranchProbability &PeeledCaseProb);
/// A class which encapsulates all of the information needed to generate a
/// stack protector check and signals to isel via its state being initialized
@@ -588,17 +380,22 @@ public:
AliasAnalysis *AA = nullptr;
const TargetLibraryInfo *LibInfo;
- /// SwitchCases - Vector of CaseBlock structures used to communicate
- /// SwitchInst code generation information.
- std::vector<CaseBlock> SwitchCases;
+ class SDAGSwitchLowering : public SwitchCG::SwitchLowering {
+ public:
+ SDAGSwitchLowering(SelectionDAGBuilder *sdb, FunctionLoweringInfo &funcinfo)
+ : SwitchCG::SwitchLowering(funcinfo), SDB(sdb) {}
+
+ virtual void addSuccessorWithProb(
+ MachineBasicBlock *Src, MachineBasicBlock *Dst,
+ BranchProbability Prob = BranchProbability::getUnknown()) override {
+ SDB->addSuccessorWithProb(Src, Dst, Prob);
+ }
- /// JTCases - Vector of JumpTable structures used to communicate
- /// SwitchInst code generation information.
- std::vector<JumpTableBlock> JTCases;
+ private:
+ SelectionDAGBuilder *SDB;
+ };
- /// BitTestCases - Vector of BitTestBlock structures used to communicate
- /// SwitchInst code generation information.
- std::vector<BitTestBlock> BitTestCases;
+ std::unique_ptr<SDAGSwitchLowering> SL;
/// A StackProtectorDescriptor structure used to communicate stack protector
/// information in between SelectBasicBlock and FinishBasicBlock.
@@ -608,27 +405,29 @@ public:
// PHI nodes.
DenseMap<const Constant *, unsigned> ConstantsOut;
- /// FuncInfo - Information about the function as a whole.
- ///
+ /// Information about the function as a whole.
FunctionLoweringInfo &FuncInfo;
- /// GFI - Garbage collection metadata for the function.
+ /// Information about the swifterror values used throughout the function.
+ SwiftErrorValueTracking &SwiftError;
+
+ /// Garbage collection metadata for the function.
GCFunctionInfo *GFI;
- /// LPadToCallSiteMap - Map a landing pad to the call site indexes.
+ /// Map a landing pad to the call site indexes.
DenseMap<MachineBasicBlock *, SmallVector<unsigned, 4>> LPadToCallSiteMap;
- /// HasTailCall - This is set to true if a call in the current
- /// block has been translated as a tail call. In this case,
- /// no subsequent DAG nodes should be created.
+ /// This is set to true if a call in the current block has been translated as
+ /// a tail call. In this case, no subsequent DAG nodes should be created.
bool HasTailCall = false;
LLVMContext *Context;
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
- CodeGenOpt::Level ol)
- : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
- FuncInfo(funcinfo) {}
+ SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
+ : SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
+ SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
+ SwiftError(swifterror) {}
void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
const TargetLibraryInfo *li);
@@ -670,20 +469,34 @@ public:
void visit(unsigned Opcode, const User &I);
- /// getCopyFromRegs - If there was virtual register allocated for the value V
- /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
+ /// If there was virtual register allocated for the value V emit CopyFromReg
+ /// of the specified type Ty. Return empty SDValue() otherwise.
SDValue getCopyFromRegs(const Value *V, Type *Ty);
/// If we have dangling debug info that describes \p Variable, or an
/// overlapping part of variable considering the \p Expr, then this method
- /// weill drop that debug info as it isn't valid any longer.
+ /// will drop that debug info as it isn't valid any longer.
void dropDanglingDebugInfo(const DILocalVariable *Variable,
const DIExpression *Expr);
- // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
- // generate the debug data structures now that we've seen its definition.
+ /// If we saw an earlier dbg_value referring to V, generate the debug data
+ /// structures now that we've seen its definition.
void resolveDanglingDebugInfo(const Value *V, SDValue Val);
+ /// For the given dangling debuginfo record, perform last-ditch efforts to
+ /// resolve the debuginfo to something that is represented in this DAG. If
+ /// this cannot be done, produce an Undef debug value record.
+ void salvageUnresolvedDbgValue(DanglingDebugInfo &DDI);
+
+ /// For a given Value, attempt to create and record a SDDbgValue in the
+ /// SelectionDAG.
+ bool handleDebugValue(const Value *V, DILocalVariable *Var,
+ DIExpression *Expr, DebugLoc CurDL,
+ DebugLoc InstDL, unsigned Order);
+
+ /// Evict any dangling debug information, attempting to salvage it first.
+ void resolveOrClearDbgInfo();
+
SDValue getValue(const Value *V);
bool findValue(const Value *V) const;
@@ -720,7 +533,7 @@ public:
MachineBasicBlock *SwitchBB,
BranchProbability TProb, BranchProbability FProb,
bool InvertCond);
- bool ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases);
+ bool ShouldEmitAsBranches(const std::vector<SwitchCG::CaseBlock> &Cases);
bool isExportableFromCurrentBlock(const Value *V, const BasicBlock *FromBB);
void CopyToExportRegsIfNeeded(const Value *V);
void ExportFromCurrentBlock(const Value *V);
@@ -733,7 +546,7 @@ public:
SDValue Op);
void populateCallLoweringInfo(TargetLowering::CallLoweringInfo &CLI,
- ImmutableCallSite CS, unsigned ArgIdx,
+ const CallBase *Call, unsigned ArgIdx,
unsigned NumArgs, SDValue Callee,
Type *ReturnTy, bool IsPatchPoint);
@@ -741,7 +554,7 @@ public:
lowerInvokable(TargetLowering::CallLoweringInfo &CLI,
const BasicBlock *EHPadBB = nullptr);
- /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// When an MBB was split during scheduling, update the
/// references that need to refer to the last resulting block.
void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
@@ -797,13 +610,13 @@ public:
void LowerStatepoint(ImmutableStatepoint ISP,
const BasicBlock *EHPadBB = nullptr);
- void LowerCallSiteWithDeoptBundle(ImmutableCallSite CS, SDValue Callee,
+ void LowerCallSiteWithDeoptBundle(const CallBase *Call, SDValue Callee,
const BasicBlock *EHPadBB);
void LowerDeoptimizeCall(const CallInst *CI);
void LowerDeoptimizingReturn();
- void LowerCallSiteWithDeoptBundleImpl(ImmutableCallSite CS, SDValue Callee,
+ void LowerCallSiteWithDeoptBundleImpl(const CallBase *Call, SDValue Callee,
const BasicBlock *EHPadBB,
bool VarArgDisallowed,
bool ForceVoidReturnTy);
@@ -833,25 +646,24 @@ private:
BranchProbability Prob = BranchProbability::getUnknown());
public:
- void visitSwitchCase(CaseBlock &CB,
- MachineBasicBlock *SwitchBB);
+ void visitSwitchCase(SwitchCG::CaseBlock &CB, MachineBasicBlock *SwitchBB);
void visitSPDescriptorParent(StackProtectorDescriptor &SPD,
MachineBasicBlock *ParentBB);
void visitSPDescriptorFailure(StackProtectorDescriptor &SPD);
- void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
- void visitBitTestCase(BitTestBlock &BB,
- MachineBasicBlock* NextMBB,
- BranchProbability BranchProbToNext,
- unsigned Reg,
- BitTestCase &B,
- MachineBasicBlock *SwitchBB);
- void visitJumpTable(JumpTable &JT);
- void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH,
+ void visitBitTestHeader(SwitchCG::BitTestBlock &B,
+ MachineBasicBlock *SwitchBB);
+ void visitBitTestCase(SwitchCG::BitTestBlock &BB, MachineBasicBlock *NextMBB,
+ BranchProbability BranchProbToNext, unsigned Reg,
+ SwitchCG::BitTestCase &B, MachineBasicBlock *SwitchBB);
+ void visitJumpTable(SwitchCG::JumpTable &JT);
+ void visitJumpTableHeader(SwitchCG::JumpTable &JT,
+ SwitchCG::JumpTableHeader &JTH,
MachineBasicBlock *SwitchBB);
private:
// These all get lowered before this pass.
void visitInvoke(const InvokeInst &I);
+ void visitCallBr(const CallBrInst &I);
void visitResume(const ResumeInst &I);
void visitUnary(const User &I, unsigned Opcode);
@@ -932,7 +744,7 @@ private:
void visitStoreToSwiftError(const StoreInst &I);
void visitInlineAsm(ImmutableCallSite CS);
- const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
+ void visitIntrinsicCall(const CallInst &I, unsigned Intrinsic);
void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic);
void visitConstrainedFPIntrinsic(const ConstrainedFPIntrinsic &FPI);
@@ -982,9 +794,12 @@ private:
SDDbgValue *getDbgValue(SDValue N, DILocalVariable *Variable,
DIExpression *Expr, const DebugLoc &dl,
unsigned DbgSDNodeOrder);
+
+ /// Lowers CallInst to an external symbol.
+ void lowerCallToExternalSymbol(const CallInst &I, const char *FunctionName);
};
-/// RegsForValue - This struct represents the registers (physical or virtual)
+/// This struct represents the registers (physical or virtual)
/// that a particular set of values is assigned, and the type information about
/// the value. The most common situation is to represent one value at a time,
/// but struct or array values are handled element-wise as multiple values. The
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index 43df2abb674b..da3049881d31 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAGDumper.cpp - Implement SelectionDAG::dump() ------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -96,6 +95,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ATOMIC_LOAD_MAX: return "AtomicLoadMax";
case ISD::ATOMIC_LOAD_UMIN: return "AtomicLoadUMin";
case ISD::ATOMIC_LOAD_UMAX: return "AtomicLoadUMax";
+ case ISD::ATOMIC_LOAD_FADD: return "AtomicLoadFAdd";
case ISD::ATOMIC_LOAD: return "AtomicLoad";
case ISD::ATOMIC_STORE: return "AtomicStore";
case ISD::PCMARKER: return "PCMarker";
@@ -145,6 +145,8 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
unsigned IID = cast<ConstantSDNode>(getOperand(OpNo))->getZExtValue();
if (IID < Intrinsic::num_intrinsics)
return Intrinsic::getName((Intrinsic::ID)IID, None);
+ else if (!G)
+ return "Unknown intrinsic";
else if (const TargetIntrinsicInfo *TII = G->getTarget().getIntrinsicInfo())
return TII->getName(IID);
llvm_unreachable("Invalid intrinsic ID");
@@ -170,7 +172,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UNDEF: return "undef";
case ISD::MERGE_VALUES: return "merge_values";
case ISD::INLINEASM: return "inlineasm";
+ case ISD::INLINEASM_BR: return "inlineasm_br";
case ISD::EH_LABEL: return "eh_label";
+ case ISD::ANNOTATION_LABEL: return "annotation_label";
case ISD::HANDLENODE: return "handlenode";
// Unary operators
@@ -297,7 +301,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UADDSAT: return "uaddsat";
case ISD::SSUBSAT: return "ssubsat";
case ISD::USUBSAT: return "usubsat";
+
case ISD::SMULFIX: return "smulfix";
+ case ISD::SMULFIXSAT: return "smulfixsat";
+ case ISD::UMULFIX: return "umulfix";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
@@ -309,9 +316,11 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ZERO_EXTEND_VECTOR_INREG: return "zero_extend_vector_inreg";
case ISD::TRUNCATE: return "truncate";
case ISD::FP_ROUND: return "fp_round";
+ case ISD::STRICT_FP_ROUND: return "strict_fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
case ISD::FP_ROUND_INREG: return "fp_round_inreg";
case ISD::FP_EXTEND: return "fp_extend";
+ case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
@@ -321,6 +330,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
+ case ISD::LROUND: return "lround";
+ case ISD::LLROUND: return "llround";
+ case ISD::LRINT: return "lrint";
+ case ISD::LLRINT: return "llrint";
// Control flow instructions
case ISD::BR: return "br";
@@ -650,6 +663,36 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << ", " << AM;
OS << ">";
+ } else if (const MaskedLoadSDNode *MLd = dyn_cast<MaskedLoadSDNode>(this)) {
+ OS << "<";
+
+ printMemOperand(OS, *MLd->getMemOperand(), G);
+
+ bool doExt = true;
+ switch (MLd->getExtensionType()) {
+ default: doExt = false; break;
+ case ISD::EXTLOAD: OS << ", anyext"; break;
+ case ISD::SEXTLOAD: OS << ", sext"; break;
+ case ISD::ZEXTLOAD: OS << ", zext"; break;
+ }
+ if (doExt)
+ OS << " from " << MLd->getMemoryVT().getEVTString();
+
+ if (MLd->isExpandingLoad())
+ OS << ", expanding";
+
+ OS << ">";
+ } else if (const MaskedStoreSDNode *MSt = dyn_cast<MaskedStoreSDNode>(this)) {
+ OS << "<";
+ printMemOperand(OS, *MSt->getMemOperand(), G);
+
+ if (MSt->isTruncatingStore())
+ OS << ", trunc to " << MSt->getMemoryVT().getEVTString();
+
+ if (MSt->isCompressingStore())
+ OS << ", compressing";
+
+ OS << ">";
} else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
OS << "<";
printMemOperand(OS, *M->getMemOperand(), G);
@@ -675,6 +718,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
<< " -> "
<< ASC->getDestAddressSpace()
<< ']';
+ } else if (const LifetimeSDNode *LN = dyn_cast<LifetimeSDNode>(this)) {
+ if (LN->hasOffset())
+ OS << "<" << LN->getOffset() << " to " << LN->getOffset() + LN->getSize() << ">";
}
if (VerboseDAGDumping) {
@@ -684,45 +730,63 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (getNodeId() != -1)
OS << " [ID=" << getNodeId() << ']';
if (!(isa<ConstantSDNode>(this) || (isa<ConstantFPSDNode>(this))))
- OS << "# D:" << isDivergent();
-
- if (!G)
- return;
-
- DILocation *L = getDebugLoc();
- if (!L)
- return;
-
- if (auto *Scope = L->getScope())
- OS << Scope->getFilename();
- else
- OS << "<unknown>";
- OS << ':' << L->getLine();
- if (unsigned C = L->getColumn())
- OS << ':' << C;
-
- for (SDDbgValue *Dbg : G->GetDbgValues(this)) {
- if (Dbg->getKind() != SDDbgValue::SDNODE || Dbg->isInvalidated())
- continue;
- Dbg->dump(OS);
- }
+ OS << " # D:" << isDivergent();
+
+ if (G && !G->GetDbgValues(this).empty()) {
+ OS << " [NoOfDbgValues=" << G->GetDbgValues(this).size() << ']';
+ for (SDDbgValue *Dbg : G->GetDbgValues(this))
+ if (!Dbg->isInvalidated())
+ Dbg->print(OS);
+ } else if (getHasDebugValue())
+ OS << " [NoOfDbgValues>0]";
}
}
-LLVM_DUMP_METHOD void SDDbgValue::dump(raw_ostream &OS) const {
- OS << " DbgVal";
- if (kind==SDNODE)
- OS << '(' << u.s.ResNo << ')';
- OS << ":\"" << Var->getName() << '"';
+LLVM_DUMP_METHOD void SDDbgValue::print(raw_ostream &OS) const {
+ OS << " DbgVal(Order=" << getOrder() << ')';
+ if (isInvalidated()) OS << "(Invalidated)";
+ if (isEmitted()) OS << "(Emitted)";
+ switch (getKind()) {
+ case SDNODE:
+ if (getSDNode())
+ OS << "(SDNODE=" << PrintNodeId(*getSDNode()) << ':' << getResNo() << ')';
+ else
+ OS << "(SDNODE)";
+ break;
+ case CONST:
+ OS << "(CONST)";
+ break;
+ case FRAMEIX:
+ OS << "(FRAMEIX=" << getFrameIx() << ')';
+ break;
+ case VREG:
+ OS << "(VREG=" << getVReg() << ')';
+ break;
+ }
+ if (isIndirect()) OS << "(Indirect)";
+ OS << ":\"" << Var->getName() << '"';
#ifndef NDEBUG
- if (Expr->getNumElements())
- Expr->dump();
+ if (Expr->getNumElements())
+ Expr->dump();
#endif
}
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+LLVM_DUMP_METHOD void SDDbgValue::dump() const {
+ if (isInvalidated())
+ return;
+ print(dbgs());
+ dbgs() << "\n";
+}
+#endif
+
/// Return true if this node is so simple that we should just print it inline
/// if it appears as an operand.
-static bool shouldPrintInline(const SDNode &Node) {
+static bool shouldPrintInline(const SDNode &Node, const SelectionDAG *G) {
+ // Avoid lots of cluttering when inline printing nodes with associated
+ // DbgValues in verbose mode.
+ if (VerboseDAGDumping && G && !G->GetDbgValues(&Node).empty())
+ return false;
if (Node.getOpcode() == ISD::EntryToken)
return false;
return Node.getNumOperands() == 0;
@@ -731,7 +795,7 @@ static bool shouldPrintInline(const SDNode &Node) {
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (const SDValue &Op : N->op_values()) {
- if (shouldPrintInline(*Op.getNode()))
+ if (shouldPrintInline(*Op.getNode(), G))
continue;
if (Op.getNode()->hasOneUse())
DumpNodes(Op.getNode(), indent+2, G);
@@ -748,12 +812,24 @@ LLVM_DUMP_METHOD void SelectionDAG::dump() const {
I != E; ++I) {
const SDNode *N = &*I;
if (!N->hasOneUse() && N != getRoot().getNode() &&
- (!shouldPrintInline(*N) || N->use_empty()))
+ (!shouldPrintInline(*N, this) || N->use_empty()))
DumpNodes(N, 2, this);
}
if (getRoot().getNode()) DumpNodes(getRoot().getNode(), 2, this);
- dbgs() << "\n\n";
+ dbgs() << "\n";
+
+ if (VerboseDAGDumping) {
+ if (DbgBegin() != DbgEnd())
+ dbgs() << "SDDbgValues:\n";
+ for (auto *Dbg : make_range(DbgBegin(), DbgEnd()))
+ Dbg->dump();
+ if (ByvalParmDbgBegin() != ByvalParmDbgEnd())
+ dbgs() << "Byval SDDbgValues:\n";
+ for (auto *Dbg : make_range(ByvalParmDbgBegin(), ByvalParmDbgEnd()))
+ Dbg->dump();
+ }
+ dbgs() << "\n";
}
#endif
@@ -769,7 +845,7 @@ static bool printOperand(raw_ostream &OS, const SelectionDAG *G,
if (!Value.getNode()) {
OS << "<null>";
return false;
- } else if (shouldPrintInline(*Value.getNode())) {
+ } else if (shouldPrintInline(*Value.getNode(), G)) {
OS << Value->getOperationName(G) << ':';
Value->print_types(OS, G);
Value->print_details(OS, G);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index af5c2433fa2f..bdf9f2c166e1 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAGISel.cpp - Implement the SelectionDAGISel class --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -42,6 +41,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -49,6 +49,7 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/StackProtector.h"
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -63,6 +64,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InlineAsm.h"
+#include "llvm/IR/InstIterator.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
@@ -306,8 +308,9 @@ SelectionDAGISel::SelectionDAGISel(TargetMachine &tm,
CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm),
FuncInfo(new FunctionLoweringInfo()),
+ SwiftError(new SwiftErrorValueTracking()),
CurDAG(new SelectionDAG(tm, OL)),
- SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
+ SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, *SwiftError, OL)),
AA(), GFI(),
OptLevel(OL),
DAGSize(0) {
@@ -323,6 +326,7 @@ SelectionDAGISel::~SelectionDAGISel() {
delete SDB;
delete CurDAG;
delete FuncInfo;
+ delete SwiftError;
}
void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -379,6 +383,30 @@ static void SplitCriticalSideEffectEdges(Function &Fn, DominatorTree *DT,
}
}
+static void computeUsesMSVCFloatingPoint(const Triple &TT, const Function &F,
+ MachineModuleInfo &MMI) {
+ // Only needed for MSVC
+ if (!TT.isWindowsMSVCEnvironment())
+ return;
+
+ // If it's already set, nothing to do.
+ if (MMI.usesMSVCFloatingPoint())
+ return;
+
+ for (const Instruction &I : instructions(F)) {
+ if (I.getType()->isFPOrFPVectorTy()) {
+ MMI.setUsesMSVCFloatingPoint(true);
+ return;
+ }
+ for (const auto &Op : I.operands()) {
+ if (Op->getType()->isFPOrFPVectorTy()) {
+ MMI.setUsesMSVCFloatingPoint(true);
+ return;
+ }
+ }
+ }
+}
+
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// If we already selected that function, we do not need to run SDISel.
if (mf.getProperties().hasProperty(
@@ -421,6 +449,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
CurDAG->init(*MF, *ORE, this, LibInfo,
getAnalysisIfAvailable<LegacyDivergenceAnalysis>());
FuncInfo->set(Fn, *MF, CurDAG);
+ SwiftError->setFunction(*MF);
// Now get the optional analyzes if we want to.
// This is based on the possibly changed OptLevel (after optnone is taken
@@ -474,6 +503,40 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
Fn.getContext().diagnose(DiagFallback);
}
+ // Replace forward-declared registers with the registers containing
+ // the desired value.
+ // Note: it is important that this happens **before** the call to
+ // EmitLiveInCopies, since implementations can skip copies of unused
+ // registers. If we don't apply the reg fixups before, some registers may
+ // appear as unused and will be skipped, resulting in bad MI.
+ MachineRegisterInfo &MRI = MF->getRegInfo();
+ for (DenseMap<unsigned, unsigned>::iterator I = FuncInfo->RegFixups.begin(),
+ E = FuncInfo->RegFixups.end();
+ I != E; ++I) {
+ unsigned From = I->first;
+ unsigned To = I->second;
+ // If To is also scheduled to be replaced, find what its ultimate
+ // replacement is.
+ while (true) {
+ DenseMap<unsigned, unsigned>::iterator J = FuncInfo->RegFixups.find(To);
+ if (J == E)
+ break;
+ To = J->second;
+ }
+ // Make sure the new register has a sufficiently constrained register class.
+ if (TargetRegisterInfo::isVirtualRegister(From) &&
+ TargetRegisterInfo::isVirtualRegister(To))
+ MRI.constrainRegClass(To, MRI.getRegClass(From));
+ // Replace it.
+
+ // Replacing one register with another won't touch the kill flags.
+ // We need to conservatively clear the kill flags as a kill on the old
+ // register might dominate existing uses of the new register.
+ if (!MRI.use_empty(To))
+ MRI.clearKillFlags(From);
+ MRI.replaceRegWith(From, To);
+ }
+
// If the first basic block in the function has live ins that need to be
// copied into vregs, emit the copies into the top of the block before
// emitting the code for the block.
@@ -507,7 +570,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) {
MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1];
bool hasFI = MI->getOperand(0).isFI();
- unsigned Reg =
+ Register Reg =
hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg))
EntryMBB->insert(EntryMBB->begin(), MI);
@@ -590,9 +653,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Determine if there is a call to setjmp in the machine function.
MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice());
+ // Determine if floating point is used for msvc
+ computeUsesMSVCFloatingPoint(TM.getTargetTriple(), Fn, MF->getMMI());
+
// Replace forward-declared registers with the registers containing
// the desired value.
- MachineRegisterInfo &MRI = MF->getRegInfo();
for (DenseMap<unsigned, unsigned>::iterator
I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end();
I != E; ++I) {
@@ -663,6 +728,7 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
// Make sure the root of the DAG is up-to-date.
CurDAG->setRoot(SDB->getControlRoot());
HadTailCall = SDB->HasTailCall;
+ SDB->resolveOrClearDbgInfo();
SDB->clear();
// Final step, emit the lowered DAG as machine code.
@@ -713,8 +779,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
StringRef GroupName = "sdag";
StringRef GroupDescription = "Instruction Selection and Scheduling";
std::string BlockName;
- int BlockNumber = -1;
- (void)BlockNumber;
bool MatchFilterBB = false; (void)MatchFilterBB;
#ifndef NDEBUG
TargetTransformInfo &TTI =
@@ -735,7 +799,6 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
ViewSUnitDAGs)
#endif
{
- BlockNumber = FuncInfo->MBB->getNumber();
BlockName =
(MF->getName() + ":" + FuncInfo->MBB->getBasicBlock()->getName()).str();
}
@@ -1092,16 +1155,14 @@ void SelectionDAGISel::DoInstructionSelection() {
#endif
// When we are using non-default rounding modes or FP exception behavior
- // FP operations are represented by StrictFP pseudo-operations. They
- // need to be simplified here so that the target-specific instruction
- // selectors know how to handle them.
- //
- // If the current node is a strict FP pseudo-op, the isStrictFPOp()
- // function will provide the corresponding normal FP opcode to which the
- // node should be mutated.
- //
- // FIXME: The backends need a way to handle FP constraints.
- if (Node->isStrictFPOpcode())
+ // FP operations are represented by StrictFP pseudo-operations. For
+ // targets that do not (yet) understand strict FP operations directly,
+ // we convert them to normal FP opcodes instead at this point. This
+ // will allow them to be handled by existing target-specific instruction
+ // selectors.
+ if (Node->isStrictFPOpcode() &&
+ (TLI->getOperationAction(Node->getOpcode(), Node->getValueType(0))
+ != TargetLowering::Legal))
Node = CurDAG->mutateStrictFPToFP(Node);
LLVM_DEBUG(dbgs() << "\nISEL: Starting selection on root node: ";
@@ -1228,77 +1289,6 @@ static bool isFoldedOrDeadInstruction(const Instruction *I,
!FuncInfo->isExportedInst(I); // Exported instrs must be computed.
}
-/// Set up SwiftErrorVals by going through the function. If the function has
-/// swifterror argument, it will be the first entry.
-static void setupSwiftErrorVals(const Function &Fn, const TargetLowering *TLI,
- FunctionLoweringInfo *FuncInfo) {
- if (!TLI->supportSwiftError())
- return;
-
- FuncInfo->SwiftErrorVals.clear();
- FuncInfo->SwiftErrorVRegDefMap.clear();
- FuncInfo->SwiftErrorVRegUpwardsUse.clear();
- FuncInfo->SwiftErrorVRegDefUses.clear();
- FuncInfo->SwiftErrorArg = nullptr;
-
- // Check if function has a swifterror argument.
- bool HaveSeenSwiftErrorArg = false;
- for (Function::const_arg_iterator AI = Fn.arg_begin(), AE = Fn.arg_end();
- AI != AE; ++AI)
- if (AI->hasSwiftErrorAttr()) {
- assert(!HaveSeenSwiftErrorArg &&
- "Must have only one swifterror parameter");
- (void)HaveSeenSwiftErrorArg; // silence warning.
- HaveSeenSwiftErrorArg = true;
- FuncInfo->SwiftErrorArg = &*AI;
- FuncInfo->SwiftErrorVals.push_back(&*AI);
- }
-
- for (const auto &LLVMBB : Fn)
- for (const auto &Inst : LLVMBB) {
- if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
- if (Alloca->isSwiftError())
- FuncInfo->SwiftErrorVals.push_back(Alloca);
- }
-}
-
-static void createSwiftErrorEntriesInEntryBlock(FunctionLoweringInfo *FuncInfo,
- FastISel *FastIS,
- const TargetLowering *TLI,
- const TargetInstrInfo *TII,
- SelectionDAGBuilder *SDB) {
- if (!TLI->supportSwiftError())
- return;
-
- // We only need to do this when we have swifterror parameter or swifterror
- // alloc.
- if (FuncInfo->SwiftErrorVals.empty())
- return;
-
- assert(FuncInfo->MBB == &*FuncInfo->MF->begin() &&
- "expected to insert into entry block");
- auto &DL = FuncInfo->MF->getDataLayout();
- auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- for (const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
- // We will always generate a copy from the argument. It is always used at
- // least by the 'return' of the swifterror.
- if (FuncInfo->SwiftErrorArg && FuncInfo->SwiftErrorArg == SwiftErrorVal)
- continue;
- unsigned VReg = FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- // Assign Undef to Vreg. We construct MI directly to make sure it works
- // with FastISel.
- BuildMI(*FuncInfo->MBB, FuncInfo->MBB->getFirstNonPHI(),
- SDB->getCurDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF),
- VReg);
-
- // Keep FastIS informed about the value we just inserted.
- if (FastIS)
- FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
-
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorVal, VReg);
- }
-}
-
/// Collect llvm.dbg.declare information. This is done after argument lowering
/// in case the declarations refer to arguments.
static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
@@ -1337,202 +1327,13 @@ static void processDbgDeclares(FunctionLoweringInfo *FuncInfo) {
DIExpression *Expr = DI->getExpression();
if (Offset.getBoolValue())
- Expr = DIExpression::prepend(Expr, DIExpression::NoDeref,
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset,
Offset.getZExtValue());
MF->setVariableDbgInfo(DI->getVariable(), Expr, FI, DI->getDebugLoc());
}
}
}
-/// Propagate swifterror values through the machine function CFG.
-static void propagateSwiftErrorVRegs(FunctionLoweringInfo *FuncInfo) {
- auto *TLI = FuncInfo->TLI;
- if (!TLI->supportSwiftError())
- return;
-
- // We only need to do this when we have swifterror parameter or swifterror
- // alloc.
- if (FuncInfo->SwiftErrorVals.empty())
- return;
-
- // For each machine basic block in reverse post order.
- ReversePostOrderTraversal<MachineFunction *> RPOT(FuncInfo->MF);
- for (MachineBasicBlock *MBB : RPOT) {
- // For each swifterror value in the function.
- for(const auto *SwiftErrorVal : FuncInfo->SwiftErrorVals) {
- auto Key = std::make_pair(MBB, SwiftErrorVal);
- auto UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
- auto VRegDefIt = FuncInfo->SwiftErrorVRegDefMap.find(Key);
- bool UpwardsUse = UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end();
- unsigned UUseVReg = UpwardsUse ? UUseIt->second : 0;
- bool DownwardDef = VRegDefIt != FuncInfo->SwiftErrorVRegDefMap.end();
- assert(!(UpwardsUse && !DownwardDef) &&
- "We can't have an upwards use but no downwards def");
-
- // If there is no upwards exposed use and an entry for the swifterror in
- // the def map for this value we don't need to do anything: We already
- // have a downward def for this basic block.
- if (!UpwardsUse && DownwardDef)
- continue;
-
- // Otherwise we either have an upwards exposed use vreg that we need to
- // materialize or need to forward the downward def from predecessors.
-
- // Check whether we have a single vreg def from all predecessors.
- // Otherwise we need a phi.
- SmallVector<std::pair<MachineBasicBlock *, unsigned>, 4> VRegs;
- SmallSet<const MachineBasicBlock*, 8> Visited;
- for (auto *Pred : MBB->predecessors()) {
- if (!Visited.insert(Pred).second)
- continue;
- VRegs.push_back(std::make_pair(
- Pred, FuncInfo->getOrCreateSwiftErrorVReg(Pred, SwiftErrorVal)));
- if (Pred != MBB)
- continue;
- // We have a self-edge.
- // If there was no upwards use in this basic block there is now one: the
- // phi needs to use it self.
- if (!UpwardsUse) {
- UpwardsUse = true;
- UUseIt = FuncInfo->SwiftErrorVRegUpwardsUse.find(Key);
- assert(UUseIt != FuncInfo->SwiftErrorVRegUpwardsUse.end());
- UUseVReg = UUseIt->second;
- }
- }
-
- // We need a phi node if we have more than one predecessor with different
- // downward defs.
- bool needPHI =
- VRegs.size() >= 1 &&
- std::find_if(
- VRegs.begin(), VRegs.end(),
- [&](const std::pair<const MachineBasicBlock *, unsigned> &V)
- -> bool { return V.second != VRegs[0].second; }) !=
- VRegs.end();
-
- // If there is no upwards exposed used and we don't need a phi just
- // forward the swifterror vreg from the predecessor(s).
- if (!UpwardsUse && !needPHI) {
- assert(!VRegs.empty() &&
- "No predecessors? The entry block should bail out earlier");
- // Just forward the swifterror vreg from the predecessor(s).
- FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, VRegs[0].second);
- continue;
- }
-
- auto DLoc = isa<Instruction>(SwiftErrorVal)
- ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
- : DebugLoc();
- const auto *TII = FuncInfo->MF->getSubtarget().getInstrInfo();
-
- // If we don't need a phi create a copy to the upward exposed vreg.
- if (!needPHI) {
- assert(UpwardsUse);
- assert(!VRegs.empty() &&
- "No predecessors? Is the Calling Convention correct?");
- unsigned DestReg = UUseVReg;
- BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
- DestReg)
- .addReg(VRegs[0].second);
- continue;
- }
-
- // We need a phi: if there is an upwards exposed use we already have a
- // destination virtual register number otherwise we generate a new one.
- auto &DL = FuncInfo->MF->getDataLayout();
- auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
- unsigned PHIVReg =
- UpwardsUse ? UUseVReg
- : FuncInfo->MF->getRegInfo().createVirtualRegister(RC);
- MachineInstrBuilder SwiftErrorPHI =
- BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
- TII->get(TargetOpcode::PHI), PHIVReg);
- for (auto BBRegPair : VRegs) {
- SwiftErrorPHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
- }
-
- // We did not have a definition in this block before: store the phi's vreg
- // as this block downward exposed def.
- if (!UpwardsUse)
- FuncInfo->setCurrentSwiftErrorVReg(MBB, SwiftErrorVal, PHIVReg);
- }
- }
-}
-
-static void preassignSwiftErrorRegs(const TargetLowering *TLI,
- FunctionLoweringInfo *FuncInfo,
- BasicBlock::const_iterator Begin,
- BasicBlock::const_iterator End) {
- if (!TLI->supportSwiftError() || FuncInfo->SwiftErrorVals.empty())
- return;
-
- // Iterator over instructions and assign vregs to swifterror defs and uses.
- for (auto It = Begin; It != End; ++It) {
- ImmutableCallSite CS(&*It);
- if (CS) {
- // A call-site with a swifterror argument is both use and def.
- const Value *SwiftErrorAddr = nullptr;
- for (auto &Arg : CS.args()) {
- if (!Arg->isSwiftError())
- continue;
- // Use of swifterror.
- assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
- SwiftErrorAddr = &*Arg;
- assert(SwiftErrorAddr->isSwiftError() &&
- "Must have a swifterror value argument");
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
- &*It, FuncInfo->MBB, SwiftErrorAddr);
- assert(CreatedReg);
- }
- if (!SwiftErrorAddr)
- continue;
-
- // Def of swifterror.
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) =
- FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
- assert(CreatedReg);
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
-
- // A load is a use.
- } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
- const Value *V = LI->getOperand(0);
- if (!V->isSwiftError())
- continue;
-
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) =
- FuncInfo->getOrCreateSwiftErrorVRegUseAt(LI, FuncInfo->MBB, V);
- assert(CreatedReg);
-
- // A store is a def.
- } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
- const Value *SwiftErrorAddr = SI->getOperand(1);
- if (!SwiftErrorAddr->isSwiftError())
- continue;
-
- // Def of swifterror.
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) =
- FuncInfo->getOrCreateSwiftErrorVRegDefAt(&*It);
- assert(CreatedReg);
- FuncInfo->setCurrentSwiftErrorVReg(FuncInfo->MBB, SwiftErrorAddr, VReg);
-
- // A return in a swiferror returning function is a use.
- } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
- const Function *F = R->getParent()->getParent();
- if(!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
- continue;
-
- unsigned VReg; bool CreatedReg;
- std::tie(VReg, CreatedReg) = FuncInfo->getOrCreateSwiftErrorVRegUseAt(
- R, FuncInfo->MBB, FuncInfo->SwiftErrorArg);
- assert(CreatedReg);
- }
- }
-}
-
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastISelFailed = false;
// Initialize the Fast-ISel state, if needed.
@@ -1542,8 +1343,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS = TLI->createFastISel(*FuncInfo, LibInfo);
}
- setupSwiftErrorVals(Fn, TLI, FuncInfo);
-
ReversePostOrderTraversal<const Function*> RPOT(&Fn);
// Lower arguments up front. An RPO iteration always visits the entry block
@@ -1589,7 +1388,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
else
FastIS->setLastLocalValue(nullptr);
}
- createSwiftErrorEntriesInEntryBlock(FuncInfo, FastIS, TLI, TII, SDB);
+
+ bool Inserted = SwiftError->createEntriesInEntryBlock(SDB->getCurDebugLoc());
+
+ if (FastIS && Inserted)
+ FastIS->setLastLocalValue(&*std::prev(FuncInfo->InsertPt));
processDbgDeclares(FuncInfo);
@@ -1644,7 +1447,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
unsigned NumFastIselRemaining = std::distance(Begin, End);
// Pre-assign swifterror vregs.
- preassignSwiftErrorRegs(TLI, FuncInfo, Begin, End);
+ SwiftError->preassignVRegs(FuncInfo->MBB, Begin, End);
// Do FastISel on as many instructions as possible.
for (; BI != Begin; --BI) {
@@ -1692,7 +1495,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// to keep track of gc-relocates for a particular gc-statepoint. This is
// done by SelectionDAGBuilder::LowerAsSTATEPOINT, called before
// visitGCRelocate.
- if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst)) {
+ if (isa<CallInst>(Inst) && !isStatepoint(Inst) && !isGCRelocate(Inst) &&
+ !isGCResult(Inst)) {
OptimizationRemarkMissed R("sdagisel", "FastISelFailure",
Inst->getDebugLoc(), LLVMBB);
@@ -1712,7 +1516,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
!Inst->use_empty()) {
unsigned &R = FuncInfo->ValueMap[Inst];
if (!R)
- R = FuncInfo->CreateRegs(Inst->getType());
+ R = FuncInfo->CreateRegs(Inst);
}
bool HadTailCall = false;
@@ -1799,7 +1603,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
SP.copyToMachineFrameInfo(MF->getFrameInfo());
- propagateSwiftErrorVRegs(FuncInfo);
+ SwiftError->propagateVRegs();
delete FastIS;
SDB->clearDanglingDebugInfo();
@@ -1969,7 +1773,7 @@ SelectionDAGISel::FinishBasicBlock() {
}
// Lower each BitTestBlock.
- for (auto &BTB : SDB->BitTestCases) {
+ for (auto &BTB : SDB->SL->BitTestCases) {
// Lower header first, if it wasn't already lowered
if (!BTB.Emitted) {
// Set the current basic block to the mbb we wish to insert the code into
@@ -2050,30 +1854,30 @@ SelectionDAGISel::FinishBasicBlock() {
}
}
}
- SDB->BitTestCases.clear();
+ SDB->SL->BitTestCases.clear();
// If the JumpTable record is filled in, then we need to emit a jump table.
// Updating the PHI nodes is tricky in this case, since we need to determine
// whether the PHI is a successor of the range check MBB or the jump table MBB
- for (unsigned i = 0, e = SDB->JTCases.size(); i != e; ++i) {
+ for (unsigned i = 0, e = SDB->SL->JTCases.size(); i != e; ++i) {
// Lower header first, if it wasn't already lowered
- if (!SDB->JTCases[i].first.Emitted) {
+ if (!SDB->SL->JTCases[i].first.Emitted) {
// Set the current basic block to the mbb we wish to insert the code into
- FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB;
+ FuncInfo->MBB = SDB->SL->JTCases[i].first.HeaderBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first,
- FuncInfo->MBB);
+ SDB->visitJumpTableHeader(SDB->SL->JTCases[i].second,
+ SDB->SL->JTCases[i].first, FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
}
// Set the current basic block to the mbb we wish to insert the code into
- FuncInfo->MBB = SDB->JTCases[i].second.MBB;
+ FuncInfo->MBB = SDB->SL->JTCases[i].second.MBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
- SDB->visitJumpTable(SDB->JTCases[i].second);
+ SDB->visitJumpTable(SDB->SL->JTCases[i].second);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
@@ -2086,31 +1890,31 @@ SelectionDAGISel::FinishBasicBlock() {
assert(PHI->isPHI() &&
"This is not a machine PHI node that we are updating!");
// "default" BB. We can go there only from header BB.
- if (PHIBB == SDB->JTCases[i].second.Default)
+ if (PHIBB == SDB->SL->JTCases[i].second.Default)
PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second)
- .addMBB(SDB->JTCases[i].first.HeaderBB);
+ .addMBB(SDB->SL->JTCases[i].first.HeaderBB);
// JT BB. Just iterate over successors here
if (FuncInfo->MBB->isSuccessor(PHIBB))
PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB);
}
}
- SDB->JTCases.clear();
+ SDB->SL->JTCases.clear();
// If we generated any switch lowering information, build and codegen any
// additional DAGs necessary.
- for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
+ for (unsigned i = 0, e = SDB->SL->SwitchCases.size(); i != e; ++i) {
// Set the current basic block to the mbb we wish to insert the code into
- FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->MBB = SDB->SL->SwitchCases[i].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Determine the unique successors.
SmallVector<MachineBasicBlock *, 2> Succs;
- Succs.push_back(SDB->SwitchCases[i].TrueBB);
- if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
- Succs.push_back(SDB->SwitchCases[i].FalseBB);
+ Succs.push_back(SDB->SL->SwitchCases[i].TrueBB);
+ if (SDB->SL->SwitchCases[i].TrueBB != SDB->SL->SwitchCases[i].FalseBB)
+ Succs.push_back(SDB->SL->SwitchCases[i].FalseBB);
// Emit the code. Note that this could result in FuncInfo->MBB being split.
- SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
+ SDB->visitSwitchCase(SDB->SL->SwitchCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
@@ -2146,7 +1950,7 @@ SelectionDAGISel::FinishBasicBlock() {
}
}
}
- SDB->SwitchCases.clear();
+ SDB->SL->SwitchCases.clear();
}
/// Create the scheduler. If a specific scheduler was specified
@@ -2413,14 +2217,14 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
return !findNonImmUse(Root, N.getNode(), U, IgnoreChains);
}
-void SelectionDAGISel::Select_INLINEASM(SDNode *N) {
+void SelectionDAGISel::Select_INLINEASM(SDNode *N, bool Branch) {
SDLoc DL(N);
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops, DL);
const EVT VTs[] = {MVT::Other, MVT::Glue};
- SDValue New = CurDAG->getNode(ISD::INLINEASM, DL, VTs, Ops);
+ SDValue New = CurDAG->getNode(Branch ? ISD::INLINEASM_BR : ISD::INLINEASM, DL, VTs, Ops);
New->setNodeId(-1);
ReplaceUses(N, New.getNode());
CurDAG->RemoveDeadNode(N);
@@ -2728,6 +2532,14 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
+CheckChild2CondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ if (2 >= N.getNumOperands())
+ return false;
+ return ::CheckCondCode(MatcherTable, MatcherIndex, N.getOperand(2));
+}
+
+LLVM_ATTRIBUTE_ALWAYS_INLINE static inline bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering *TLI, const DataLayout &DL) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
@@ -2842,6 +2654,9 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table,
case SelectionDAGISel::OPC_CheckCondCode:
Result = !::CheckCondCode(Table, Index, N);
return Index;
+ case SelectionDAGISel::OPC_CheckChild2CondCode:
+ Result = !::CheckChild2CondCode(Table, Index, N);
+ return Index;
case SelectionDAGISel::OPC_CheckValueType:
Result = !::CheckValueType(Table, Index, N, SDISel.TLI,
SDISel.CurDAG->getDataLayout());
@@ -2970,7 +2785,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
CurDAG->RemoveDeadNode(NodeToMatch);
return;
case ISD::INLINEASM:
- Select_INLINEASM(NodeToMatch);
+ case ISD::INLINEASM_BR:
+ Select_INLINEASM(NodeToMatch,
+ NodeToMatch->getOpcode() == ISD::INLINEASM_BR);
return;
case ISD::READ_REGISTER:
Select_READ_REGISTER(NodeToMatch);
@@ -3328,6 +3145,9 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case OPC_CheckCondCode:
if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
continue;
+ case OPC_CheckChild2CondCode:
+ if (!::CheckChild2CondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
case OPC_CheckValueType:
if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI,
CurDAG->getDataLayout()))
@@ -3348,6 +3168,12 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
case OPC_CheckOrImm:
if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
continue;
+ case OPC_CheckImmAllOnesV:
+ if (!ISD::isBuildVectorAllOnes(N.getNode())) break;
+ continue;
+ case OPC_CheckImmAllZerosV:
+ if (!ISD::isBuildVectorAllZeros(N.getNode())) break;
+ continue;
case OPC_CheckFoldableChainNode: {
assert(NodeStack.size() != 1 && "No parent node");
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 3b19bff4743d..cdc09d59f6a4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -1,9 +1,8 @@
//===-- SelectionDAGPrinter.cpp - Implement SelectionDAG::viewGraph() -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
index 3a283bc5fdc0..3a2df6f60593 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGTargetInfo.cpp
@@ -1,9 +1,8 @@
//===- SelectionDAGTargetInfo.cpp - SelectionDAG Info ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 90a1b350fc94..395e9a8a4fc5 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -1,9 +1,8 @@
//===- StatepointLowering.cpp - SDAGBuilder's statepoint code -------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -348,16 +347,28 @@ static std::pair<SDValue, SDNode *> lowerCallFromStatepointLoweringInfo(
return std::make_pair(ReturnValue, CallEnd->getOperand(0).getNode());
}
+static MachineMemOperand* getMachineMemOperand(MachineFunction &MF,
+ FrameIndexSDNode &FI) {
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, FI.getIndex());
+ auto MMOFlags = MachineMemOperand::MOStore |
+ MachineMemOperand::MOLoad | MachineMemOperand::MOVolatile;
+ auto &MFI = MF.getFrameInfo();
+ return MF.getMachineMemOperand(PtrInfo, MMOFlags,
+ MFI.getObjectSize(FI.getIndex()),
+ MFI.getObjectAlignment(FI.getIndex()));
+}
+
/// Spill a value incoming to the statepoint. It might be either part of
/// vmstate
/// or gcstate. In both cases unconditionally spill it on the stack unless it
/// is a null constant. Return pair with first element being frame index
/// containing saved value and second element with outgoing chain from the
/// emitted store
-static std::pair<SDValue, SDValue>
+static std::tuple<SDValue, SDValue, MachineMemOperand*>
spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
SelectionDAGBuilder &Builder) {
SDValue Loc = Builder.StatepointLowering.getLocation(Incoming);
+ MachineMemOperand* MMO = nullptr;
// Emit new store if we didn't do it for this ptr before
if (!Loc.getNode()) {
@@ -367,10 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// We use TargetFrameIndex so that isel will not select it into LEA
Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
- // TODO: We can create TokenFactor node instead of
- // chaining stores one after another, this may allow
- // a bit more optimal scheduling for them
-
#ifndef NDEBUG
// Right now we always allocate spill slots that are of the same
// size as the value we're about to spill (the size of spillee can
@@ -382,15 +389,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
"Bad spill: stack slot does not match!");
#endif
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
- MachinePointerInfo::getFixedStack(
- Builder.DAG.getMachineFunction(), Index));
+ PtrInfo);
+ MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
+
Builder.StatepointLowering.setLocation(Incoming, Loc);
}
assert(Loc.getNode());
- return std::make_pair(Loc, Chain);
+ return std::make_tuple(Loc, Chain, MMO);
}
/// Lower a single value incoming to a statepoint node. This value can be
@@ -398,7 +408,11 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
/// case constants and allocas, then fall back to spilling if required.
static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
SmallVectorImpl<SDValue> &Ops,
+ SmallVectorImpl<MachineMemOperand*> &MemRefs,
SelectionDAGBuilder &Builder) {
+ // Note: We know all of these spills are independent, but don't bother to
+ // exploit that chain wise. DAGCombine will happily do so as needed, so
+ // doing it here would be a small compile time win at most.
SDValue Chain = Builder.getRoot();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Incoming)) {
@@ -417,6 +431,11 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
"Incoming value is a frame index!");
Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
+
} else if (LiveInOnly) {
// If this value is live in (not live-on-return, or live-through), we can
// treat it the same way patchpoint treats it's "live in" values. We'll
@@ -433,8 +452,10 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
// need to be optional since it requires a lot of complexity on the
// runtime side which not all would support.
auto Res = spillIncomingStatepointValue(Incoming, Chain, Builder);
- Ops.push_back(Res.first);
- Chain = Res.second;
+ Ops.push_back(std::get<0>(Res));
+ if (auto *MMO = std::get<2>(Res))
+ MemRefs.push_back(MMO);
+ Chain = std::get<1>(Res);;
}
Builder.DAG.setRoot(Chain);
@@ -449,7 +470,7 @@ static void lowerIncomingStatepointValue(SDValue Incoming, bool LiveInOnly,
/// will be set to the last value spilled (if any were).
static void
lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
- SelectionDAGBuilder::StatepointLoweringInfo &SI,
+ SmallVectorImpl<MachineMemOperand*> &MemRefs, SelectionDAGBuilder::StatepointLoweringInfo &SI,
SelectionDAGBuilder &Builder) {
// Lower the deopt and gc arguments for this statepoint. Layout will be:
// deopt argument length, deopt arguments.., gc arguments...
@@ -533,7 +554,7 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
if (!Incoming.getNode())
Incoming = Builder.getValue(V);
const bool LiveInValue = LiveInDeopt && !isGCValue(V);
- lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, Builder);
+ lowerIncomingStatepointValue(Incoming, LiveInValue, Ops, MemRefs, Builder);
}
// Finally, go ahead and lower all the gc arguments. There's no prefixed
@@ -544,11 +565,11 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
for (unsigned i = 0; i < SI.Bases.size(); ++i) {
const Value *Base = SI.Bases[i];
lowerIncomingStatepointValue(Builder.getValue(Base), /*LiveInOnly*/ false,
- Ops, Builder);
+ Ops, MemRefs, Builder);
const Value *Ptr = SI.Ptrs[i];
lowerIncomingStatepointValue(Builder.getValue(Ptr), /*LiveInOnly*/ false,
- Ops, Builder);
+ Ops, MemRefs, Builder);
}
// If there are any explicit spill slots passed to the statepoint, record
@@ -564,6 +585,10 @@ lowerStatepointMetaArgs(SmallVectorImpl<SDValue> &Ops,
"Incoming value is a frame index!");
Ops.push_back(Builder.DAG.getTargetFrameIndex(FI->getIndex(),
Builder.getFrameIndexTy()));
+
+ auto &MF = Builder.DAG.getMachineFunction();
+ auto *MMO = getMachineMemOperand(MF, *FI);
+ MemRefs.push_back(MMO);
}
}
@@ -630,7 +655,8 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// Lower statepoint vmstate and gcstate arguments
SmallVector<SDValue, 10> LoweredMetaArgs;
- lowerStatepointMetaArgs(LoweredMetaArgs, SI, *this);
+ SmallVector<MachineMemOperand*, 16> MemRefs;
+ lowerStatepointMetaArgs(LoweredMetaArgs, MemRefs, SI, *this);
// Now that we've emitted the spills, we need to update the root so that the
// call sequence is ordered correctly.
@@ -746,8 +772,9 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
// input. This allows someone else to chain off us as needed.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
- SDNode *StatepointMCNode =
- DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+ MachineSDNode *StatepointMCNode =
+ DAG.getMachineNode(TargetOpcode::STATEPOINT, getCurSDLoc(), NodeTys, Ops);
+ DAG.setNodeMemRefs(StatepointMCNode, MemRefs);
SDNode *SinkNode = StatepointMCNode;
@@ -799,7 +826,7 @@ SDValue SelectionDAGBuilder::LowerAsSTATEPOINT(
void
SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
const BasicBlock *EHPadBB /*= nullptr*/) {
- assert(ISP.getCallSite().getCallingConv() != CallingConv::AnyReg &&
+ assert(ISP.getCall()->getCallingConv() != CallingConv::AnyReg &&
"anyregcc is not supported on statepoints!");
#ifndef NDEBUG
@@ -832,7 +859,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
}
StatepointLoweringInfo SI(DAG);
- populateCallLoweringInfo(SI.CLI, ISP.getCallSite(),
+ populateCallLoweringInfo(SI.CLI, ISP.getCall(),
ImmutableStatepoint::CallArgsBeginPos,
ISP.getNumCallArgs(), ActualCallee,
ISP.getActualReturnType(), false /* IsPatchPoint */);
@@ -859,7 +886,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
const GCResultInst *GCResult = ISP.getGCResult();
Type *RetTy = ISP.getActualReturnType();
if (!RetTy->isVoidTy() && GCResult) {
- if (GCResult->getParent() != ISP.getCallSite().getParent()) {
+ if (GCResult->getParent() != ISP.getCall()->getParent()) {
// Result value will be used in a different basic block so we need to
// export it now. Default exporting mechanism will not work here because
// statepoint call has a different type than the actual call. It means
@@ -871,7 +898,7 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
unsigned Reg = FuncInfo.CreateRegs(RetTy);
RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
DAG.getDataLayout(), Reg, RetTy,
- ISP.getCallSite().getCallingConv());
+ ISP.getCall()->getCallingConv());
SDValue Chain = DAG.getEntryNode();
RFV.getCopyToRegs(ReturnValue, DAG, getCurSDLoc(), Chain, nullptr);
@@ -891,22 +918,22 @@ SelectionDAGBuilder::LowerStatepoint(ImmutableStatepoint ISP,
}
void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
- ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB,
+ const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB,
bool VarArgDisallowed, bool ForceVoidReturnTy) {
StatepointLoweringInfo SI(DAG);
- unsigned ArgBeginIndex = CS.arg_begin() - CS.getInstruction()->op_begin();
+ unsigned ArgBeginIndex = Call->arg_begin() - Call->op_begin();
populateCallLoweringInfo(
- SI.CLI, CS, ArgBeginIndex, CS.getNumArgOperands(), Callee,
- ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : CS.getType(),
+ SI.CLI, Call, ArgBeginIndex, Call->getNumArgOperands(), Callee,
+ ForceVoidReturnTy ? Type::getVoidTy(*DAG.getContext()) : Call->getType(),
false);
if (!VarArgDisallowed)
- SI.CLI.IsVarArg = CS.getFunctionType()->isVarArg();
+ SI.CLI.IsVarArg = Call->getFunctionType()->isVarArg();
- auto DeoptBundle = *CS.getOperandBundle(LLVMContext::OB_deopt);
+ auto DeoptBundle = *Call->getOperandBundle(LLVMContext::OB_deopt);
unsigned DefaultID = StatepointDirectives::DeoptBundleStatepointID;
- auto SD = parseStatepointDirectivesFromAttrs(CS.getAttributes());
+ auto SD = parseStatepointDirectivesFromAttrs(Call->getAttributes());
SI.ID = SD.StatepointID.getValueOr(DefaultID);
SI.NumPatchBytes = SD.NumPatchBytes.getValueOr(0);
@@ -918,15 +945,14 @@ void SelectionDAGBuilder::LowerCallSiteWithDeoptBundleImpl(
// NB! The GC arguments are deliberately left empty.
if (SDValue ReturnVal = LowerAsSTATEPOINT(SI)) {
- const Instruction *Inst = CS.getInstruction();
- ReturnVal = lowerRangeToAssertZExt(DAG, *Inst, ReturnVal);
- setValue(Inst, ReturnVal);
+ ReturnVal = lowerRangeToAssertZExt(DAG, *Call, ReturnVal);
+ setValue(Call, ReturnVal);
}
}
void SelectionDAGBuilder::LowerCallSiteWithDeoptBundle(
- ImmutableCallSite CS, SDValue Callee, const BasicBlock *EHPadBB) {
- LowerCallSiteWithDeoptBundleImpl(CS, Callee, EHPadBB,
+ const CallBase *Call, SDValue Callee, const BasicBlock *EHPadBB) {
+ LowerCallSiteWithDeoptBundleImpl(Call, Callee, EHPadBB,
/* VarArgDisallowed = */ false,
/* ForceVoidReturnTy = */ false);
}
@@ -986,11 +1012,11 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
}
SDValue SpillSlot =
- DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
+ DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
- // Be conservative: flush all pending loads
- // TODO: Probably we can be less restrictive on this,
- // it may allow more scheduling opportunities.
+ // Note: We know all of these reloads are independent, but don't bother to
+ // exploit that chain wise. DAGCombine will happily do so as needed, so
+ // doing it here would be a small compile time win at most.
SDValue Chain = getRoot();
SDValue SpillLoad =
@@ -1000,7 +1026,6 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
*DerivedPtrLocation));
- // Again, be conservative, don't emit pending loads
DAG.setRoot(SpillLoad.getValue(1));
assert(SpillLoad.getNode());
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.h b/lib/CodeGen/SelectionDAG/StatepointLowering.h
index 372c82a359f6..70507932681d 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.h
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.h
@@ -1,9 +1,8 @@
//===- StatepointLowering.h - SDAGBuilder's statepoint code ---*- C++ -*---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -67,13 +66,18 @@ public:
/// before the next statepoint. If we don't see it, we'll report
/// an assertion.
void scheduleRelocCall(const CallInst &RelocCall) {
- PendingGCRelocateCalls.push_back(&RelocCall);
+ // We are not interested in lowering dead instructions.
+ if (!RelocCall.use_empty())
+ PendingGCRelocateCalls.push_back(&RelocCall);
}
/// Remove this gc_relocate from the list we're expecting to see
/// before the next statepoint. If we weren't expecting to see
/// it, we'll report an assertion.
void relocCallVisited(const CallInst &RelocCall) {
+ // We are not interested in lowering dead instructions.
+ if (RelocCall.use_empty())
+ return;
auto I = llvm::find(PendingGCRelocateCalls, &RelocCall);
assert(I != PendingGCRelocateCalls.end() &&
"Visited unexpected gcrelocate call");
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a2f05c1e3cef..b260cd91d468 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1,9 +1,8 @@
//===-- TargetLowering.cpp - Implement the TargetLowering class -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -100,19 +99,22 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
/// Set CallLoweringInfo attribute flags based on a call instruction
/// and called function attributes.
-void TargetLoweringBase::ArgListEntry::setAttributes(ImmutableCallSite *CS,
+void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
unsigned ArgIdx) {
- IsSExt = CS->paramHasAttr(ArgIdx, Attribute::SExt);
- IsZExt = CS->paramHasAttr(ArgIdx, Attribute::ZExt);
- IsInReg = CS->paramHasAttr(ArgIdx, Attribute::InReg);
- IsSRet = CS->paramHasAttr(ArgIdx, Attribute::StructRet);
- IsNest = CS->paramHasAttr(ArgIdx, Attribute::Nest);
- IsByVal = CS->paramHasAttr(ArgIdx, Attribute::ByVal);
- IsInAlloca = CS->paramHasAttr(ArgIdx, Attribute::InAlloca);
- IsReturned = CS->paramHasAttr(ArgIdx, Attribute::Returned);
- IsSwiftSelf = CS->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
- IsSwiftError = CS->paramHasAttr(ArgIdx, Attribute::SwiftError);
- Alignment = CS->getParamAlignment(ArgIdx);
+ IsSExt = Call->paramHasAttr(ArgIdx, Attribute::SExt);
+ IsZExt = Call->paramHasAttr(ArgIdx, Attribute::ZExt);
+ IsInReg = Call->paramHasAttr(ArgIdx, Attribute::InReg);
+ IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
+ IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
+ IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+ IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
+ IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
+ IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
+ IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
+ Alignment = Call->getParamAlignment(ArgIdx);
+ ByValType = nullptr;
+ if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+ ByValType = Call->getParamByValType(ArgIdx);
}
/// Generate a libcall taking the given operands as arguments and returning a
@@ -121,7 +123,8 @@ std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
ArrayRef<SDValue> Ops, bool isSigned,
const SDLoc &dl, bool doesNotReturn,
- bool isReturnValueUsed) const {
+ bool isReturnValueUsed,
+ bool isPostTypeLegalization) const {
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
@@ -147,11 +150,114 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
.setNoReturn(doesNotReturn)
.setDiscardResult(!isReturnValueUsed)
+ .setIsPostTypeLegalization(isPostTypeLegalization)
.setSExtResult(signExtend)
.setZExtResult(!signExtend);
return LowerCallTo(CLI);
}
+bool
+TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
+ unsigned Limit, uint64_t Size,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsMemset,
+ bool ZeroMemset,
+ bool MemcpyStrSrc,
+ bool AllowOverlap,
+ unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes) const {
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
+ if (!(SrcAlign == 0 || SrcAlign >= DstAlign))
+ return false;
+
+ EVT VT = getOptimalMemOpType(Size, DstAlign, SrcAlign,
+ IsMemset, ZeroMemset, MemcpyStrSrc,
+ FuncAttributes);
+
+ if (VT == MVT::Other) {
+ // Use the largest integer type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ VT = MVT::i64;
+ while (DstAlign && DstAlign < VT.getSizeInBits() / 8 &&
+ !allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign))
+ VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1);
+ assert(VT.isInteger());
+
+ // Find the largest legal integer type.
+ MVT LVT = MVT::i64;
+ while (!isTypeLegal(LVT))
+ LVT = (MVT::SimpleValueType)(LVT.SimpleTy - 1);
+ assert(LVT.isInteger());
+
+ // If the type we've chosen is larger than the largest legal integer type
+ // then use that instead.
+ if (VT.bitsGT(LVT))
+ VT = LVT;
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned VTSize = VT.getSizeInBits() / 8;
+ while (VTSize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ EVT NewVT = VT;
+ unsigned NewVTSize;
+
+ bool Found = false;
+ if (VT.isVector() || VT.isFloatingPoint()) {
+ NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32;
+ if (isOperationLegalOrCustom(ISD::STORE, NewVT) &&
+ isSafeMemOpType(NewVT.getSimpleVT()))
+ Found = true;
+ else if (NewVT == MVT::i64 &&
+ isOperationLegalOrCustom(ISD::STORE, MVT::f64) &&
+ isSafeMemOpType(MVT::f64)) {
+ // i64 is usually not legal on 32-bit targets, but f64 may be.
+ NewVT = MVT::f64;
+ Found = true;
+ }
+ }
+
+ if (!Found) {
+ do {
+ NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1);
+ if (NewVT == MVT::i8)
+ break;
+ } while (!isSafeMemOpType(NewVT.getSimpleVT()));
+ }
+ NewVTSize = NewVT.getSizeInBits() / 8;
+
+ // If the new VT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ bool Fast;
+ if (NumMemOps && AllowOverlap && NewVTSize < Size &&
+ allowsMisalignedMemoryAccesses(VT, DstAS, DstAlign,
+ MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ VTSize = Size;
+ else {
+ VT = NewVT;
+ VTSize = NewVTSize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(VT);
+ Size -= VTSize;
+ }
+
+ return true;
+}
+
/// Soften the operands of a comparison. This code is shared among BR_CC,
/// SELECT_CC, and SETCC handlers.
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
@@ -346,7 +452,6 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
/// return true.
bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
TargetLoweringOpt &TLO) const {
- SelectionDAG &DAG = TLO.DAG;
SDLoc DL(Op);
unsigned Opcode = Op.getOpcode();
@@ -372,8 +477,8 @@ bool TargetLowering::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded,
if (!C.isSubsetOf(Demanded)) {
EVT VT = Op.getValueType();
- SDValue NewC = DAG.getConstant(Demanded & C, DL, VT);
- SDValue NewOp = DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
+ SDValue NewC = TLO.DAG.getConstant(Demanded & C, DL, VT);
+ SDValue NewOp = TLO.DAG.getNode(Opcode, DL, VT, Op.getOperand(0), NewC);
return TLO.CombineTo(Op, NewOp);
}
@@ -487,6 +592,10 @@ bool TargetLowering::SimplifyDemandedBits(
// Don't know anything.
Known = KnownBits(BitWidth);
+ // Undef operand.
+ if (Op.isUndef())
+ return false;
+
if (Op.getOpcode() == ISD::Constant) {
// We know all of the bits for a constant!
Known.One = cast<ConstantSDNode>(Op)->getAPIntValue();
@@ -509,40 +618,116 @@ bool TargetLowering::SimplifyDemandedBits(
DemandedElts = APInt::getAllOnesValue(NumElts);
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
- if (!Op.isUndef())
- return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
- return false;
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
} else if (Depth == 6) { // Limit search depth.
return false;
}
KnownBits Known2, KnownOut;
switch (Op.getOpcode()) {
+ case ISD::SCALAR_TO_VECTOR: {
+ if (!DemandedElts[0])
+ return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
+
+ KnownBits SrcKnown;
+ SDValue Src = Op.getOperand(0);
+ unsigned SrcBitWidth = Src.getScalarValueSizeInBits();
+ APInt SrcDemandedBits = DemandedBits.zextOrSelf(SrcBitWidth);
+ if (SimplifyDemandedBits(Src, SrcDemandedBits, SrcKnown, TLO, Depth + 1))
+ return true;
+ Known = SrcKnown.zextOrTrunc(BitWidth, false);
+ break;
+ }
case ISD::BUILD_VECTOR:
- // Collect the known bits that are shared by every constant vector element.
- Known.Zero.setAllBits(); Known.One.setAllBits();
- for (SDValue SrcOp : Op->ops()) {
- if (!isa<ConstantSDNode>(SrcOp)) {
- // We can only handle all constant values - bail out with no known bits.
- Known = KnownBits(BitWidth);
- return false;
- }
- Known2.One = cast<ConstantSDNode>(SrcOp)->getAPIntValue();
- Known2.Zero = ~Known2.One;
-
- // BUILD_VECTOR can implicitly truncate sources, we must handle this.
- if (Known2.One.getBitWidth() != BitWidth) {
- assert(Known2.getBitWidth() > BitWidth &&
- "Expected BUILD_VECTOR implicit truncation");
- Known2 = Known2.trunc(BitWidth);
+ // Collect the known bits that are shared by every demanded element.
+ // TODO: Call SimplifyDemandedBits for non-constant demanded elements.
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false; // Don't fall through, will infinitely loop.
+ case ISD::LOAD: {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ if (getTargetConstantFromLoad(LD)) {
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
+ return false; // Don't fall through, will infinitely loop.
+ }
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ SDValue Vec = Op.getOperand(0);
+ SDValue Scl = Op.getOperand(1);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+
+ // If index isn't constant, assume we need all vector elements AND the
+ // inserted element.
+ APInt DemandedVecElts(DemandedElts);
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) {
+ unsigned Idx = CIdx->getZExtValue();
+ DemandedVecElts.clearBit(Idx);
+
+ // Inserted element is not required.
+ if (!DemandedElts[Idx])
+ return TLO.CombineTo(Op, Vec);
+ }
+
+ KnownBits KnownScl;
+ unsigned NumSclBits = Scl.getScalarValueSizeInBits();
+ APInt DemandedSclBits = DemandedBits.zextOrTrunc(NumSclBits);
+ if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1))
+ return true;
+
+ Known = KnownScl.zextOrTrunc(BitWidth, false);
+
+ KnownBits KnownVec;
+ if (SimplifyDemandedBits(Vec, DemandedBits, DemandedVecElts, KnownVec, TLO,
+ Depth + 1))
+ return true;
+
+ if (!!DemandedVecElts) {
+ Known.One &= KnownVec.One;
+ Known.Zero &= KnownVec.Zero;
+ }
+
+ return false;
+ }
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Base = Op.getOperand(0);
+ SDValue Sub = Op.getOperand(1);
+ EVT SubVT = Sub.getValueType();
+ unsigned NumSubElts = SubVT.getVectorNumElements();
+
+ // If index isn't constant, assume we need the original demanded base
+ // elements and ALL the inserted subvector elements.
+ APInt BaseElts = DemandedElts;
+ APInt SubElts = APInt::getAllOnesValue(NumSubElts);
+ if (isa<ConstantSDNode>(Op.getOperand(2))) {
+ const APInt &Idx = Op.getConstantOperandAPInt(2);
+ if (Idx.ule(NumElts - NumSubElts)) {
+ unsigned SubIdx = Idx.getZExtValue();
+ SubElts = DemandedElts.extractBits(NumSubElts, SubIdx);
+ BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
}
+ }
- // Known bits are the values that are shared by every element.
- // TODO: support per-element known bits.
- Known.One &= Known2.One;
- Known.Zero &= Known2.Zero;
+ KnownBits KnownSub, KnownBase;
+ if (SimplifyDemandedBits(Sub, DemandedBits, SubElts, KnownSub, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Base, DemandedBits, BaseElts, KnownBase, TLO,
+ Depth + 1))
+ return true;
+
+ Known.Zero.setAllBits();
+ Known.One.setAllBits();
+ if (!!SubElts) {
+ Known.One &= KnownSub.One;
+ Known.Zero &= KnownSub.Zero;
}
- return false; // Don't fall through, will infinitely loop.
+ if (!!BaseElts) {
+ Known.One &= KnownBase.One;
+ Known.Zero &= KnownBase.Zero;
+ }
+ break;
+ }
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
@@ -640,11 +825,12 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts, Known2, TLO,
- Depth + 1))
+ if (SimplifyDemandedBits(Op0, ~Known.Zero & DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
@@ -674,11 +860,12 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts, Known2, TLO,
- Depth + 1))
+ if (SimplifyDemandedBits(Op0, ~Known.One & DemandedBits, DemandedElts,
+ Known2, TLO, Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
@@ -705,10 +892,12 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedElts, Known2, TLO,
+ Depth + 1))
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
@@ -831,20 +1020,23 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
// If this is ((X >>u C1) << ShAmt), see if we can simplify this into a
// single shift. We can do this if the bottom bits (which are shifted
// out) are never demanded.
+ // TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
- if (ShAmt &&
- (DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
+ if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SHL;
@@ -862,8 +1054,14 @@ bool TargetLowering::SimplifyDemandedBits(
}
}
- if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts, Known, TLO,
- Depth + 1))
+ if (SimplifyDemandedBits(Op0, DemandedBits.lshr(ShAmt), DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+
+ // Try shrinking the operation as long as the shift amount will still be
+ // in range.
+ if ((ShAmt < DemandedBits.getActiveBits()) &&
+ ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO))
return true;
// Convert (shl (anyext x, c)) to (anyext (shl x, c)) if the high bits
@@ -919,12 +1117,16 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
+ EVT ShiftVT = Op1.getValueType();
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -935,10 +1137,11 @@ bool TargetLowering::SimplifyDemandedBits(
// If this is ((X << C1) >>u ShAmt), see if we can simplify this into a
// single shift. We can do this if the top bits (which are shifted out)
// are never demanded.
+ // TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SHL) {
- if (ConstantSDNode *SA2 = isConstOrConstSplat(Op0.getOperand(1))) {
- if (ShAmt &&
- (DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ if (ConstantSDNode *SA2 =
+ isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
+ if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
@@ -948,7 +1151,7 @@ bool TargetLowering::SimplifyDemandedBits(
Opc = ISD::SHL;
}
- SDValue NewSA = TLO.DAG.getConstant(Diff, dl, Op1.getValueType());
+ SDValue NewSA = TLO.DAG.getConstant(Diff, dl, ShiftVT);
return TLO.CombineTo(
Op, TLO.DAG.getNode(Opc, dl, VT, Op0.getOperand(0), NewSA));
}
@@ -957,7 +1160,8 @@ bool TargetLowering::SimplifyDemandedBits(
}
// Compute the new bits that are at the top now.
- if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
@@ -978,12 +1182,15 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.isOneValue())
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op0, Op1));
- if (ConstantSDNode *SA = isConstOrConstSplat(Op1)) {
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op1, DemandedElts)) {
// If the shift count is an invalid immediate, don't do anything.
if (SA->getAPIntValue().uge(BitWidth))
break;
unsigned ShAmt = SA->getZExtValue();
+ if (ShAmt == 0)
+ return TLO.CombineTo(Op, Op0);
+
APInt InDemandedMask = (DemandedBits << ShAmt);
// If the shift is exact, then it does demand the low bits (and knows that
@@ -996,7 +1203,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (DemandedBits.countLeadingZeros() < ShAmt)
InDemandedMask.setSignBit();
- if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, InDemandedMask, DemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
Known.Zero.lshrInPlace(ShAmt);
@@ -1026,6 +1234,55 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
+ case ISD::FSHL:
+ case ISD::FSHR: {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+ SDValue Op2 = Op.getOperand(2);
+ bool IsFSHL = (Op.getOpcode() == ISD::FSHL);
+
+ if (ConstantSDNode *SA = isConstOrConstSplat(Op2, DemandedElts)) {
+ unsigned Amt = SA->getAPIntValue().urem(BitWidth);
+
+ // For fshl, 0-shift returns the 1st arg.
+ // For fshr, 0-shift returns the 2nd arg.
+ if (Amt == 0) {
+ if (SimplifyDemandedBits(IsFSHL ? Op0 : Op1, DemandedBits, DemandedElts,
+ Known, TLO, Depth + 1))
+ return true;
+ break;
+ }
+
+ // fshl: (Op0 << Amt) | (Op1 >> (BW - Amt))
+ // fshr: (Op0 << (BW - Amt)) | (Op1 >> Amt)
+ APInt Demanded0 = DemandedBits.lshr(IsFSHL ? Amt : (BitWidth - Amt));
+ APInt Demanded1 = DemandedBits << (IsFSHL ? (BitWidth - Amt) : Amt);
+ if (SimplifyDemandedBits(Op0, Demanded0, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ if (SimplifyDemandedBits(Op1, Demanded1, DemandedElts, Known, TLO,
+ Depth + 1))
+ return true;
+
+ Known2.One <<= (IsFSHL ? Amt : (BitWidth - Amt));
+ Known2.Zero <<= (IsFSHL ? Amt : (BitWidth - Amt));
+ Known.One.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+ Known.Zero.lshrInPlace(IsFSHL ? (BitWidth - Amt) : Amt);
+ Known.One |= Known2.One;
+ Known.Zero |= Known2.Zero;
+ }
+ break;
+ }
+ case ISD::BITREVERSE: {
+ SDValue Src = Op.getOperand(0);
+ APInt DemandedSrcBits = DemandedBits.reverseBits();
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, Known2, TLO,
+ Depth + 1))
+ return true;
+ Known.One = Known2.One.reverseBits();
+ Known.Zero = Known2.Zero.reverseBits();
+ break;
+ }
case ISD::SIGN_EXTEND_INREG: {
SDValue Op0 = Op.getOperand(0);
EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
@@ -1033,8 +1290,8 @@ bool TargetLowering::SimplifyDemandedBits(
// If we only care about the highest bit, don't bother shifting right.
if (DemandedBits.isSignMask()) {
- bool AlreadySignExtended =
- TLO.DAG.ComputeNumSignBits(Op0) >= BitWidth - ExVTBits + 1;
+ unsigned NumSignBits = TLO.DAG.ComputeNumSignBits(Op0);
+ bool AlreadySignExtended = NumSignBits >= BitWidth - ExVTBits + 1;
// However if the input is already sign extended we expect the sign
// extension to be dropped altogether later and do not simplify.
if (!AlreadySignExtended) {
@@ -1099,79 +1356,116 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
Known.Zero = KnownLo.Zero.zext(BitWidth) |
- KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
+ KnownHi.Zero.zext(BitWidth).shl(HalfBitWidth);
Known.One = KnownLo.One.zext(BitWidth) |
- KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
+ KnownHi.One.zext(BitWidth).shl(HalfBitWidth);
break;
}
- case ISD::ZERO_EXTEND: {
+ case ISD::ZERO_EXTEND:
+ case ISD::ZERO_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
- unsigned InBits = Src.getScalarValueSizeInBits();
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
- if (DemandedBits.getActiveBits() <= InBits)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
+ if (DemandedBits.getActiveBits() <= InBits) {
+ // If we only need the non-extended bits of the bottom element
+ // then we can just bitcast to the result.
+ if (IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ TLO.DAG.getDataLayout().isLittleEndian())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ unsigned Opc =
+ IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
APInt InDemandedBits = DemandedBits.trunc(InBits);
- if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- Known = Known.zext(BitWidth);
- Known.Zero.setBitsFrom(InBits);
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
break;
}
- case ISD::SIGN_EXTEND: {
+ case ISD::SIGN_EXTEND:
+ case ISD::SIGN_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
- unsigned InBits = Src.getScalarValueSizeInBits();
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG;
// If none of the top bits are demanded, convert this into an any_extend.
- if (DemandedBits.getActiveBits() <= InBits)
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, Src));
+ if (DemandedBits.getActiveBits() <= InBits) {
+ // If we only need the non-extended bits of the bottom element
+ // then we can just bitcast to the result.
+ if (IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ TLO.DAG.getDataLayout().isLittleEndian())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+
+ unsigned Opc =
+ IsVecInReg ? ISD::ANY_EXTEND_VECTOR_INREG : ISD::ANY_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
+
+ APInt InDemandedBits = DemandedBits.trunc(InBits);
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
- APInt InDemandedBits = DemandedBits.trunc(InBits);
InDemandedBits.setBit(InBits - 1);
- if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+
// If the sign bit is known one, the top bits match.
Known = Known.sext(BitWidth);
// If the sign bit is known zero, convert this to a zero extend.
- if (Known.isNonNegative())
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Src));
+ if (Known.isNonNegative()) {
+ unsigned Opc =
+ IsVecInReg ? ISD::ZERO_EXTEND_VECTOR_INREG : ISD::ZERO_EXTEND;
+ if (!TLO.LegalOperations() || isOperationLegal(Opc, VT))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT, Src));
+ }
break;
}
- case ISD::SIGN_EXTEND_VECTOR_INREG: {
- // TODO - merge this with SIGN_EXTEND above?
+ case ISD::ANY_EXTEND:
+ case ISD::ANY_EXTEND_VECTOR_INREG: {
SDValue Src = Op.getOperand(0);
- unsigned InBits = Src.getScalarValueSizeInBits();
-
- APInt InDemandedBits = DemandedBits.trunc(InBits);
+ EVT SrcVT = Src.getValueType();
+ unsigned InBits = SrcVT.getScalarSizeInBits();
+ unsigned InElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ bool IsVecInReg = Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG;
- // If some of the sign extended bits are demanded, we know that the sign
- // bit is demanded.
- if (InBits < DemandedBits.getActiveBits())
- InDemandedBits.setBit(InBits - 1);
+ // If we only need the bottom element then we can just bitcast.
+ // TODO: Handle ANY_EXTEND?
+ if (IsVecInReg && DemandedElts == 1 &&
+ VT.getSizeInBits() == SrcVT.getSizeInBits() &&
+ TLO.DAG.getDataLayout().isLittleEndian())
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
- if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth + 1))
- return true;
- assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- // If the sign bit is known one, the top bits match.
- Known = Known.sext(BitWidth);
- break;
- }
- case ISD::ANY_EXTEND: {
- SDValue Src = Op.getOperand(0);
- unsigned InBits = Src.getScalarValueSizeInBits();
APInt InDemandedBits = DemandedBits.trunc(InBits);
- if (SimplifyDemandedBits(Src, InDemandedBits, Known, TLO, Depth+1))
+ APInt InDemandedElts = DemandedElts.zextOrSelf(InElts);
+ if (SimplifyDemandedBits(Src, InDemandedBits, InDemandedElts, Known, TLO,
+ Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
- Known = Known.zext(BitWidth);
+ assert(Known.getBitWidth() == InBits && "Src width has changed?");
+ Known = Known.zext(BitWidth, false /* => any extend */);
break;
}
case ISD::TRUNCATE: {
@@ -1198,29 +1492,29 @@ bool TargetLowering::SimplifyDemandedBits(
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
- ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
- if (!ShAmt)
+
+ auto *ShAmt = dyn_cast<ConstantSDNode>(Src.getOperand(1));
+ if (!ShAmt || ShAmt->getAPIntValue().uge(BitWidth))
break;
+
SDValue Shift = Src.getOperand(1);
- if (TLO.LegalTypes()) {
- uint64_t ShVal = ShAmt->getZExtValue();
+ uint64_t ShVal = ShAmt->getZExtValue();
+
+ if (TLO.LegalTypes())
Shift = TLO.DAG.getConstant(ShVal, dl, getShiftAmountTy(VT, DL));
- }
- if (ShAmt->getZExtValue() < BitWidth) {
- APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
- OperandBitWidth - BitWidth);
- HighBits.lshrInPlace(ShAmt->getZExtValue());
- HighBits = HighBits.trunc(BitWidth);
-
- if (!(HighBits & DemandedBits)) {
- // None of the shifted in bits are needed. Add a truncate of the
- // shift input, then shift it.
- SDValue NewTrunc =
- TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
- return TLO.CombineTo(
- Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
- }
+ APInt HighBits =
+ APInt::getHighBitsSet(OperandBitWidth, OperandBitWidth - BitWidth);
+ HighBits.lshrInPlace(ShVal);
+ HighBits = HighBits.trunc(BitWidth);
+
+ if (!(HighBits & DemandedBits)) {
+ // None of the shifted in bits are needed. Add a truncate of the
+ // shift input, then shift it.
+ SDValue NewTrunc =
+ TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, Src.getOperand(0));
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(ISD::SRL, dl, VT, NewTrunc, Shift));
}
break;
}
@@ -1234,8 +1528,8 @@ bool TargetLowering::SimplifyDemandedBits(
// demanded by its users.
EVT ZVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth, ZVT.getSizeInBits());
- if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits,
- Known, TLO, Depth+1))
+ if (SimplifyDemandedBits(Op.getOperand(0), ~InMask | DemandedBits, Known,
+ TLO, Depth + 1))
return true;
assert(!Known.hasConflict() && "Bits known to be one AND zero?");
@@ -1266,7 +1560,7 @@ bool TargetLowering::SimplifyDemandedBits(
Known = Known2;
if (BitWidth > EltBitWidth)
- Known = Known.zext(BitWidth);
+ Known = Known.zext(BitWidth, false /* => any extend */);
break;
}
case ISD::BITCAST: {
@@ -1297,40 +1591,68 @@ bool TargetLowering::SimplifyDemandedBits(
TLO.DAG.getNode(ISD::SHL, dl, VT, Sign, ShAmt));
}
}
- // If bitcast from a vector, see if we can use SimplifyDemandedVectorElts by
- // demanding the element if any bits from it are demanded.
+
+ // Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
+ // Demand the elt/bit if any of the original elts/bits are demanded.
// TODO - bigendian once we have test coverage.
// TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
if (SrcVT.isVector() && NumSrcEltBits > 1 &&
(BitWidth % NumSrcEltBits) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
- auto GetDemandedSubMask = [&](APInt &DemandedSubElts) -> bool {
- DemandedSubElts = APInt::getNullValue(Scale);
- for (unsigned i = 0; i != Scale; ++i) {
- unsigned Offset = i * NumSrcEltBits;
- APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
- if (!Sub.isNullValue())
- DemandedSubElts.setBit(i);
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned Offset = i * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ if (!Sub.isNullValue()) {
+ DemandedSrcBits |= Sub;
+ for (unsigned j = 0; j != NumElts; ++j)
+ if (DemandedElts[j])
+ DemandedSrcElts.setBit((j * Scale) + i);
}
+ }
+
+ APInt KnownSrcUndef, KnownSrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+ KnownSrcZero, TLO, Depth + 1))
return true;
- };
- APInt DemandedSubElts;
- if (GetDemandedSubMask(DemandedSubElts)) {
- unsigned NumSrcElts = SrcVT.getVectorNumElements();
- APInt DemandedElts = APInt::getSplat(NumSrcElts, DemandedSubElts);
+ KnownBits KnownSrcBits;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+ KnownSrcBits, TLO, Depth + 1))
+ return true;
+ } else if ((NumSrcEltBits % BitWidth) == 0 &&
+ TLO.DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumSrcEltBits / BitWidth;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * BitWidth;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
- APInt KnownUndef, KnownZero;
- if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero,
- TLO, Depth + 1))
+ if (SrcVT.isVector()) {
+ APInt KnownSrcUndef, KnownSrcZero;
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, KnownSrcUndef,
+ KnownSrcZero, TLO, Depth + 1))
return true;
}
+
+ KnownBits KnownSrcBits;
+ if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedSrcElts,
+ KnownSrcBits, TLO, Depth + 1))
+ return true;
}
+
// If this is a bitcast, let computeKnownBits handle it. Only do this on a
// recursive call where Known may be useful to the caller.
if (Depth > 0) {
- Known = TLO.DAG.computeKnownBits(Op, Depth);
+ Known = TLO.DAG.computeKnownBits(Op, DemandedElts, Depth);
return false;
}
break;
@@ -1343,8 +1665,10 @@ bool TargetLowering::SimplifyDemandedBits(
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
- if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
- SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO, Depth + 1) ||
+ if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
+ Depth + 1) ||
+ SimplifyDemandedBits(Op1, LoMask, DemandedElts, Known2, TLO,
+ Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
SDNodeFlags Flags = Op.getNode()->getFlags();
@@ -1353,8 +1677,8 @@ bool TargetLowering::SimplifyDemandedBits(
// won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
- SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1,
- Flags);
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
return TLO.CombineTo(Op, NewOp);
}
return true;
@@ -1431,15 +1755,64 @@ bool TargetLowering::SimplifyDemandedVectorElts(SDValue Op,
DCI.AddToWorklist(Op.getNode());
DCI.CommitTargetLoweringOpt(TLO);
}
+
return Simplified;
}
+/// Given a vector binary operation and known undefined elements for each input
+/// operand, compute whether each element of the output is undefined.
+static APInt getKnownUndefForVectorBinop(SDValue BO, SelectionDAG &DAG,
+ const APInt &UndefOp0,
+ const APInt &UndefOp1) {
+ EVT VT = BO.getValueType();
+ assert(DAG.getTargetLoweringInfo().isBinOp(BO.getOpcode()) && VT.isVector() &&
+ "Vector binop only");
+
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+ assert(UndefOp0.getBitWidth() == NumElts &&
+ UndefOp1.getBitWidth() == NumElts && "Bad type for undef analysis");
+
+ auto getUndefOrConstantElt = [&](SDValue V, unsigned Index,
+ const APInt &UndefVals) {
+ if (UndefVals[Index])
+ return DAG.getUNDEF(EltVT);
+
+ if (auto *BV = dyn_cast<BuildVectorSDNode>(V)) {
+ // Try hard to make sure that the getNode() call is not creating temporary
+ // nodes. Ignore opaque integers because they do not constant fold.
+ SDValue Elt = BV->getOperand(Index);
+ auto *C = dyn_cast<ConstantSDNode>(Elt);
+ if (isa<ConstantFPSDNode>(Elt) || Elt.isUndef() || (C && !C->isOpaque()))
+ return Elt;
+ }
+
+ return SDValue();
+ };
+
+ APInt KnownUndef = APInt::getNullValue(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ // If both inputs for this element are either constant or undef and match
+ // the element type, compute the constant/undef result for this element of
+ // the vector.
+ // TODO: Ideally we would use FoldConstantArithmetic() here, but that does
+ // not handle FP constants. The code within getNode() should be refactored
+ // to avoid the danger of creating a bogus temporary node here.
+ SDValue C0 = getUndefOrConstantElt(BO.getOperand(0), i, UndefOp0);
+ SDValue C1 = getUndefOrConstantElt(BO.getOperand(1), i, UndefOp1);
+ if (C0 && C1 && C0.getValueType() == EltVT && C1.getValueType() == EltVT)
+ if (DAG.getNode(BO.getOpcode(), SDLoc(BO), EltVT, C0, C1).isUndef())
+ KnownUndef.setBit(i);
+ }
+ return KnownUndef;
+}
+
bool TargetLowering::SimplifyDemandedVectorElts(
- SDValue Op, const APInt &DemandedEltMask, APInt &KnownUndef,
+ SDValue Op, const APInt &OriginalDemandedElts, APInt &KnownUndef,
APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth,
bool AssumeSingleUse) const {
EVT VT = Op.getValueType();
- APInt DemandedElts = DemandedEltMask;
+ APInt DemandedElts = OriginalDemandedElts;
unsigned NumElts = DemandedElts.getBitWidth();
assert(VT.isVector() && "Expected vector op");
assert(VT.getVectorNumElements() == NumElts &&
@@ -1617,7 +1990,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
SDValue Sub = Op.getOperand(1);
EVT SubVT = Sub.getValueType();
unsigned NumSubElts = SubVT.getVectorNumElements();
- const APInt& Idx = cast<ConstantSDNode>(Op.getOperand(2))->getAPIntValue();
+ const APInt &Idx = Op.getConstantOperandAPInt(2);
if (Idx.ugt(NumElts - NumSubElts))
break;
unsigned SubIdx = Idx.getZExtValue();
@@ -1786,18 +2159,26 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
+ case ISD::ANY_EXTEND_VECTOR_INREG:
case ISD::SIGN_EXTEND_VECTOR_INREG:
case ISD::ZERO_EXTEND_VECTOR_INREG: {
APInt SrcUndef, SrcZero;
SDValue Src = Op.getOperand(0);
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts);
- if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef,
- SrcZero, TLO, Depth + 1))
+ if (SimplifyDemandedVectorElts(Src, DemandedSrcElts, SrcUndef, SrcZero, TLO,
+ Depth + 1))
return true;
KnownZero = SrcZero.zextOrTrunc(NumElts);
KnownUndef = SrcUndef.zextOrTrunc(NumElts);
+ if (Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG &&
+ Op.getValueSizeInBits() == Src.getValueSizeInBits() &&
+ DemandedSrcElts == 1 && TLO.DAG.getDataLayout().isLittleEndian()) {
+ // aext - if we just need the bottom element then we can bitcast.
+ return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, Src));
+ }
+
if (Op.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG) {
// zext(undef) upper bits are guaranteed to be zero.
if (DemandedElts.isSubsetOf(KnownUndef))
@@ -1806,6 +2187,9 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
break;
}
+
+ // TODO: There are more binop opcodes that could be handled here - MUL, MIN,
+ // MAX, saturated math, etc.
case ISD::OR:
case ISD::XOR:
case ISD::ADD:
@@ -1815,17 +2199,38 @@ bool TargetLowering::SimplifyDemandedVectorElts(
case ISD::FMUL:
case ISD::FDIV:
case ISD::FREM: {
- APInt SrcUndef, SrcZero;
- if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
- SrcZero, TLO, Depth + 1))
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
return true;
- if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, KnownUndef,
- KnownZero, TLO, Depth + 1))
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
return true;
- KnownZero &= SrcZero;
- KnownUndef &= SrcUndef;
+
+ KnownZero = ZeroLHS & ZeroRHS;
+ KnownUndef = getKnownUndefForVectorBinop(Op, TLO.DAG, UndefLHS, UndefRHS);
+ break;
+ }
+ case ISD::SHL:
+ case ISD::SRL:
+ case ISD::SRA:
+ case ISD::ROTL:
+ case ISD::ROTR: {
+ APInt UndefRHS, ZeroRHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, UndefRHS,
+ ZeroRHS, TLO, Depth + 1))
+ return true;
+ APInt UndefLHS, ZeroLHS;
+ if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, UndefLHS,
+ ZeroLHS, TLO, Depth + 1))
+ return true;
+
+ KnownZero = ZeroLHS;
+ KnownUndef = UndefLHS & UndefRHS; // TODO: use getKnownUndefForVectorBinop?
break;
}
+ case ISD::MUL:
case ISD::AND: {
APInt SrcUndef, SrcZero;
if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, SrcUndef,
@@ -1837,6 +2242,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// If either side has a zero element, then the result element is zero, even
// if the other is an UNDEF.
+ // TODO: Extend getKnownUndefForVectorBinop to also deal with known zeros
+ // and then handle 'and' nodes with the rest of the binop opcodes.
KnownZero |= SrcZero;
KnownUndef &= SrcUndef;
KnownUndef &= ~KnownZero;
@@ -1864,8 +2271,8 @@ bool TargetLowering::SimplifyDemandedVectorElts(
} else {
KnownBits Known;
APInt DemandedBits = APInt::getAllOnesValue(EltSizeInBits);
- if (SimplifyDemandedBits(Op, DemandedBits, DemandedEltMask, Known, TLO,
- Depth, AssumeSingleUse))
+ if (SimplifyDemandedBits(Op, DemandedBits, OriginalDemandedElts, Known,
+ TLO, Depth, AssumeSingleUse))
return true;
}
break;
@@ -1950,6 +2357,10 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
return false;
}
+const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
+ return nullptr;
+}
+
bool TargetLowering::isKnownNeverNaNForTargetNode(SDValue Op,
const SelectionDAG &DAG,
bool SNaN,
@@ -2044,10 +2455,9 @@ bool TargetLowering::isExtendedTrueVal(const ConstantSDNode *N, EVT VT,
/// This helper function of SimplifySetCC tries to optimize the comparison when
/// either operand of the SetCC node is a bitwise-and instruction.
-SDValue TargetLowering::simplifySetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
- ISD::CondCode Cond,
- DAGCombinerInfo &DCI,
- const SDLoc &DL) const {
+SDValue TargetLowering::foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ DAGCombinerInfo &DCI) const {
// Match these patterns in any of their permutations:
// (X & Y) == Y
// (X & Y) != Y
@@ -2200,6 +2610,49 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
return T2;
}
+/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
+/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
+/// handle the commuted versions of these patterns.
+SDValue TargetLowering::foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1,
+ ISD::CondCode Cond, const SDLoc &DL,
+ DAGCombinerInfo &DCI) const {
+ unsigned BOpcode = N0.getOpcode();
+ assert((BOpcode == ISD::ADD || BOpcode == ISD::SUB || BOpcode == ISD::XOR) &&
+ "Unexpected binop");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
+
+ // (X + Y) == X --> Y == 0
+ // (X - Y) == X --> Y == 0
+ // (X ^ Y) == X --> Y == 0
+ SelectionDAG &DAG = DCI.DAG;
+ EVT OpVT = N0.getValueType();
+ SDValue X = N0.getOperand(0);
+ SDValue Y = N0.getOperand(1);
+ if (X == N1)
+ return DAG.getSetCC(DL, VT, Y, DAG.getConstant(0, DL, OpVT), Cond);
+
+ if (Y != N1)
+ return SDValue();
+
+ // (X + Y) == Y --> X == 0
+ // (X ^ Y) == Y --> X == 0
+ if (BOpcode == ISD::ADD || BOpcode == ISD::XOR)
+ return DAG.getSetCC(DL, VT, X, DAG.getConstant(0, DL, OpVT), Cond);
+
+ // The shift would not be valid if the operands are boolean (i1).
+ if (!N0.hasOneUse() || OpVT.getScalarSizeInBits() == 1)
+ return SDValue();
+
+ // (X - Y) == Y --> X == Y << 1
+ EVT ShiftVT = getShiftAmountTy(OpVT, DAG.getDataLayout(),
+ !DCI.isBeforeLegalize());
+ SDValue One = DAG.getConstant(1, DL, ShiftVT);
+ SDValue YShl1 = DAG.getNode(ISD::SHL, DL, N1.getValueType(), Y, One);
+ if (!DCI.isCalledByLegalizer())
+ DCI.AddToWorklist(YShl1.getNode());
+ return DAG.getSetCC(DL, VT, X, YShl1, Cond);
+}
+
/// Try to simplify a setcc built with the specified operands and cc. If it is
/// unable to simplify it, return a null SDValue.
SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -2209,14 +2662,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
SelectionDAG &DAG = DCI.DAG;
EVT OpVT = N0.getValueType();
- // These setcc operations always fold.
- switch (Cond) {
- default: break;
- case ISD::SETFALSE:
- case ISD::SETFALSE2: return DAG.getBoolConstant(false, dl, VT, OpVT);
- case ISD::SETTRUE:
- case ISD::SETTRUE2: return DAG.getBoolConstant(true, dl, VT, OpVT);
- }
+ // Constant fold or commute setcc.
+ if (SDValue Fold = DAG.FoldSetCC(VT, N0, N1, Cond, dl))
+ return Fold;
// Ensure that the constant occurs on the RHS and fold constant comparisons.
// TODO: Handle non-splat vector constants. All undef causes trouble.
@@ -2226,6 +2674,17 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isCondCodeLegal(SwappedCC, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+ // If we have a subtract with the same 2 non-constant operands as this setcc
+ // -- but in reverse order -- then try to commute the operands of this setcc
+ // to match. A matching pair of setcc (cmp) and sub may be combined into 1
+ // instruction on some targets.
+ if (!isConstOrConstSplat(N0) && !isConstOrConstSplat(N1) &&
+ (DCI.isBeforeLegalizeOps() ||
+ isCondCodeLegal(SwappedCC, N0.getSimpleValueType())) &&
+ DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N1, N0 } ) &&
+ !DAG.getNodeIfExists(ISD::SUB, DAG.getVTList(OpVT), { N0, N1 } ))
+ return DAG.getSetCC(dl, VT, N1, N0, SwappedCC);
+
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
const APInt &C1 = N1C->getAPIntValue();
@@ -2235,8 +2694,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0.getOpcode() == ISD::SRL && (C1.isNullValue() || C1.isOneValue()) &&
N0.getOperand(0).getOpcode() == ISD::CTLZ &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
- const APInt &ShAmt
- = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &ShAmt = N0.getConstantOperandAPInt(1);
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
ShAmt == Log2_32(N0.getValueSizeInBits())) {
if ((C1 == 0) == (Cond == ISD::SETEQ)) {
@@ -2275,7 +2733,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
}
- // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ // If ctpop is not supported, expand a power-of-2 comparison based on it.
+ if (C1 == 1 && !isOperationLegalOrCustom(ISD::CTPOP, CTVT) &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ // (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
+ // (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
+ SDValue Zero = DAG.getConstant(0, dl, CTVT);
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, true);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
+ SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
+ SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
+ unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
+ return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
+ }
}
// (zext x) == C --> x == (trunc C)
@@ -2387,8 +2859,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// 8 bits, but have to be careful...
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
origWidth = Lod->getMemoryVT().getSizeInBits();
- const APInt &Mask =
- cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ const APInt &Mask = N0.getConstantOperandAPInt(1);
for (unsigned width = origWidth / 2; width>=8; width /= 2) {
APInt newMask = APInt::getLowBitsSet(maskWidth, width);
for (unsigned offset=0; offset<origWidth/width; offset++) {
@@ -2480,7 +2951,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
break;
}
default:
- break; // todo, be more careful with signed comparisons
+ break; // todo, be more careful with signed comparisons
}
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
@@ -2501,7 +2972,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
} else {
APInt Imm = APInt::getLowBitsSet(ExtDstTyBits, ExtSrcTyBits);
ZextOp = DAG.getNode(ISD::AND, dl, Op0Ty, N0.getOperand(0),
- DAG.getConstant(Imm, dl, Op0Ty));
+ DAG.getConstant(Imm, dl, Op0Ty));
}
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(ZextOp.getNode());
@@ -2598,6 +3069,18 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ // Given:
+ // icmp eq/ne (urem %x, %y), 0
+ // Iff %x has 0 or 1 bits set, and %y has at least 2 bits set, omit 'urem':
+ // icmp eq/ne %x, 0
+ if (N0.getOpcode() == ISD::UREM && N1C->isNullValue() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ KnownBits XKnown = DAG.computeKnownBits(N0.getOperand(0));
+ KnownBits YKnown = DAG.computeKnownBits(N0.getOperand(1));
+ if (XKnown.countMaxPopulation() == 1 && YKnown.countMinPopulation() >= 2)
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1, Cond);
+ }
+
if (SDValue V =
optimizeSetCCOfSignedTruncationCheck(VT, N0, N1, Cond, DCI, dl))
return V;
@@ -2805,25 +3288,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
- if (isa<ConstantFPSDNode>(N0.getNode())) {
- // Constant fold or commute setcc.
- SDValue O = DAG.FoldSetCC(VT, N0, N1, Cond, dl);
- if (O.getNode()) return O;
- } else if (auto *CFP = dyn_cast<ConstantFPSDNode>(N1.getNode())) {
- // If the RHS of an FP comparison is a constant, simplify it away in
- // some cases.
- if (CFP->getValueAPF().isNaN()) {
- // If an operand is known to be a nan, we can fold it.
- switch (ISD::getUnorderedFlavor(Cond)) {
- default: llvm_unreachable("Unknown flavor!");
- case 0: // Known false.
- return DAG.getBoolConstant(false, dl, VT, OpVT);
- case 1: // Known true.
- return DAG.getBoolConstant(true, dl, VT, OpVT);
- case 2: // Undefined.
- return DAG.getUNDEF(VT);
- }
- }
+ if (!isa<ConstantFPSDNode>(N0) && isa<ConstantFPSDNode>(N1)) {
+ auto *CFP = cast<ConstantFPSDNode>(N1);
+ assert(!CFP->getValueAPF().isNaN() && "Unexpected NaN value");
// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
// constant if knowing that the operand is non-nan is enough. We prefer to
@@ -2883,15 +3350,12 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0 == N1) {
// The sext(setcc()) => setcc() optimization relies on the appropriate
// constant being emitted.
+ assert(!N0.getValueType().isInteger() &&
+ "Integer types should be handled by FoldSetCC");
bool EqTrue = ISD::isTrueWhenEqual(Cond);
-
- // We can always fold X == X for integer setcc's.
- if (N0.getValueType().isInteger())
- return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
-
unsigned UOF = ISD::getUnorderedFlavor(Cond);
- if (UOF == 2) // FP operators that are undefined on NaNs.
+ if (UOF == 2) // FP operators that are undefined on NaNs.
return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
if (UOF == unsigned(EqTrue))
return DAG.getBoolConstant(EqTrue, dl, VT, OpVT);
@@ -2900,7 +3364,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO;
if (NewCond != Cond &&
(DCI.isBeforeLegalizeOps() ||
- isCondCodeLegal(NewCond, N0.getSimpleValueType())))
+ isCondCodeLegal(NewCond, N0.getSimpleValueType())))
return DAG.getSetCC(dl, VT, N0, N1, NewCond);
}
@@ -2969,69 +3433,39 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
LegalRHSImm = isLegalICmpImmediate(RHSC->getSExtValue());
}
- // Simplify (X+Z) == X --> Z == 0
+ // (X+Y) == X --> Y == 0 and similar folds.
// Don't do this if X is an immediate that can fold into a cmp
- // instruction and X+Z has other uses. It could be an induction variable
+ // instruction and X+Y has other uses. It could be an induction variable
// chain, and the transform would increase register pressure.
- if (!LegalRHSImm || N0.getNode()->hasOneUse()) {
- if (N0.getOperand(0) == N1)
- return DAG.getSetCC(dl, VT, N0.getOperand(1),
- DAG.getConstant(0, dl, N0.getValueType()), Cond);
- if (N0.getOperand(1) == N1) {
- if (isCommutativeBinOp(N0.getOpcode()))
- return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(0, dl, N0.getValueType()),
- Cond);
- if (N0.getNode()->hasOneUse()) {
- assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
- auto &DL = DAG.getDataLayout();
- // (Z-X) == X --> Z == X<<1
- SDValue SH = DAG.getNode(
- ISD::SHL, dl, N1.getValueType(), N1,
- DAG.getConstant(1, dl,
- getShiftAmountTy(N1.getValueType(), DL,
- !DCI.isBeforeLegalize())));
- if (!DCI.isCalledByLegalizer())
- DCI.AddToWorklist(SH.getNode());
- return DAG.getSetCC(dl, VT, N0.getOperand(0), SH, Cond);
- }
- }
- }
+ if (!LegalRHSImm || N0.hasOneUse())
+ if (SDValue V = foldSetCCWithBinOp(VT, N0, N1, Cond, dl, DCI))
+ return V;
}
if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB ||
- N1.getOpcode() == ISD::XOR) {
- // Simplify X == (X+Z) --> Z == 0
- if (N1.getOperand(0) == N0)
- return DAG.getSetCC(dl, VT, N1.getOperand(1),
- DAG.getConstant(0, dl, N1.getValueType()), Cond);
- if (N1.getOperand(1) == N0) {
- if (isCommutativeBinOp(N1.getOpcode()))
- return DAG.getSetCC(dl, VT, N1.getOperand(0),
- DAG.getConstant(0, dl, N1.getValueType()), Cond);
- if (N1.getNode()->hasOneUse()) {
- assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
- auto &DL = DAG.getDataLayout();
- // X == (Z-X) --> X<<1 == Z
- SDValue SH = DAG.getNode(
- ISD::SHL, dl, N1.getValueType(), N0,
- DAG.getConstant(1, dl, getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize())));
- if (!DCI.isCalledByLegalizer())
- DCI.AddToWorklist(SH.getNode());
- return DAG.getSetCC(dl, VT, SH, N1.getOperand(0), Cond);
- }
- }
- }
+ N1.getOpcode() == ISD::XOR)
+ if (SDValue V = foldSetCCWithBinOp(VT, N1, N0, Cond, dl, DCI))
+ return V;
- if (SDValue V = simplifySetCCWithAnd(VT, N0, N1, Cond, DCI, dl))
+ if (SDValue V = foldSetCCWithAnd(VT, N0, N1, Cond, dl, DCI))
return V;
}
+ // Fold remainder of division by a constant.
+ if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() &&
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
+
+ // When division is cheap or optimizing for minimum size,
+ // fall through to DIVREM creation by skipping this fold.
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize))
+ if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ }
+
// Fold away ALL boolean setcc's.
- SDValue Temp;
if (N0.getValueType().getScalarType() == MVT::i1 && foldBooleans) {
- EVT OpVT = N0.getValueType();
+ SDValue Temp;
switch (Cond) {
default: llvm_unreachable("Unknown integer setcc!");
case ISD::SETEQ: // X == Y -> ~(X^Y)
@@ -3134,18 +3568,18 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
switch (Constraint[0]) {
default: break;
case 'r': return C_RegisterClass;
- case 'm': // memory
- case 'o': // offsetable
- case 'V': // not offsetable
+ case 'm': // memory
+ case 'o': // offsetable
+ case 'V': // not offsetable
return C_Memory;
- case 'i': // Simple Integer or Relocatable Constant
- case 'n': // Simple Integer
- case 'E': // Floating Point Constant
- case 'F': // Floating Point Constant
- case 's': // Relocatable Constant
- case 'p': // Address.
- case 'X': // Allow ANY value.
- case 'I': // Target registers.
+ case 'i': // Simple Integer or Relocatable Constant
+ case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
+ case 's': // Relocatable Constant
+ case 'p': // Address.
+ case 'X': // Allow ANY value.
+ case 'I': // Target registers.
case 'J':
case 'K':
case 'L':
@@ -3159,7 +3593,7 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
}
}
- if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') {
+ if (S > 1 && Constraint[0] == '{' && Constraint[S - 1] == '}') {
if (S == 8 && Constraint.substr(1, 6) == "memory") // "{memory}"
return C_Memory;
return C_Register;
@@ -3170,14 +3604,20 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
/// Try to replace an X constraint, which matches anything, with another that
/// has more specific requirements based on the type of the corresponding
/// operand.
-const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{
+const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const {
if (ConstraintVT.isInteger())
return "r";
if (ConstraintVT.isFloatingPoint())
- return "f"; // works for many targets
+ return "f"; // works for many targets
return nullptr;
}
+SDValue TargetLowering::LowerAsmOutputForConstraint(
+ SDValue &Chain, SDValue &Flag, SDLoc DL, const AsmOperandInfo &OpInfo,
+ SelectionDAG &DAG) const {
+ return SDValue();
+}
+
/// Lower the specified operand into the Ops vector.
/// If it is invalid, don't add anything to Ops.
void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
@@ -3191,7 +3631,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
switch (ConstraintLetter) {
default: break;
case 'X': // Allows any operand; labels (basic block) use this.
- if (Op.getOpcode() == ISD::BasicBlock) {
+ if (Op.getOpcode() == ISD::BasicBlock ||
+ Op.getOpcode() == ISD::TargetBlockAddress) {
Ops.push_back(Op);
return;
}
@@ -3199,46 +3640,57 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 's': { // Relocatable Constant
- // These operands are interested in values of the form (GV+C), where C may
- // be folded in as an offset of GV, or it may be explicitly added. Also, it
- // is possible and fine if either GV or C are missing.
- ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
- GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-
- // If we have "(add GV, C)", pull out GV/C
- if (Op.getOpcode() == ISD::ADD) {
- C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(0));
- if (!C || !GA) {
- C = dyn_cast<ConstantSDNode>(Op.getOperand(0));
- GA = dyn_cast<GlobalAddressSDNode>(Op.getOperand(1));
- }
- if (!C || !GA) {
- C = nullptr;
- GA = nullptr;
- }
- }
- // If we find a valid operand, map to the TargetXXX version so that the
- // value itself doesn't get selected.
- if (GA) { // Either &GV or &GV+C
- if (ConstraintLetter != 'n') {
- int64_t Offs = GA->getOffset();
- if (C) Offs += C->getZExtValue();
- Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
- C ? SDLoc(C) : SDLoc(),
- Op.getValueType(), Offs));
- }
- return;
- }
- if (C) { // just C, no GV.
- // Simple constants are not allowed for 's'.
- if (ConstraintLetter != 's') {
+ GlobalAddressSDNode *GA;
+ ConstantSDNode *C;
+ BlockAddressSDNode *BA;
+ uint64_t Offset = 0;
+
+ // Match (GA) or (C) or (GA+C) or (GA-C) or ((GA+C)+C) or (((GA+C)+C)+C),
+ // etc., since getelementpointer is variadic. We can't use
+ // SelectionDAG::FoldSymbolOffset because it expects the GA to be accessible
+ // while in this case the GA may be furthest from the root node which is
+ // likely an ISD::ADD.
+ while (1) {
+ if ((GA = dyn_cast<GlobalAddressSDNode>(Op)) && ConstraintLetter != 'n') {
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op),
+ GA->getValueType(0),
+ Offset + GA->getOffset()));
+ return;
+ } else if ((C = dyn_cast<ConstantSDNode>(Op)) &&
+ ConstraintLetter != 's') {
// gcc prints these as sign extended. Sign extend value to 64 bits
// now; without this it would get ZExt'd later in
// ScheduleDAGSDNodes::EmitNode, which is very generic.
- Ops.push_back(DAG.getTargetConstant(C->getSExtValue(),
+ bool IsBool = C->getConstantIntValue()->getBitWidth() == 1;
+ BooleanContent BCont = getBooleanContents(MVT::i64);
+ ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont)
+ : ISD::SIGN_EXTEND;
+ int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? C->getZExtValue()
+ : C->getSExtValue();
+ Ops.push_back(DAG.getTargetConstant(Offset + ExtVal,
SDLoc(C), MVT::i64));
+ return;
+ } else if ((BA = dyn_cast<BlockAddressSDNode>(Op)) &&
+ ConstraintLetter != 'n') {
+ Ops.push_back(DAG.getTargetBlockAddress(
+ BA->getBlockAddress(), BA->getValueType(0),
+ Offset + BA->getOffset(), BA->getTargetFlags()));
+ return;
+ } else {
+ const unsigned OpCode = Op.getOpcode();
+ if (OpCode == ISD::ADD || OpCode == ISD::SUB) {
+ if ((C = dyn_cast<ConstantSDNode>(Op.getOperand(0))))
+ Op = Op.getOperand(1);
+ // Subtraction is not commutative.
+ else if (OpCode == ISD::ADD &&
+ (C = dyn_cast<ConstantSDNode>(Op.getOperand(1))))
+ Op = Op.getOperand(0);
+ else
+ return;
+ Offset += (OpCode == ISD::ADD ? 1 : -1) * C->getSExtValue();
+ continue;
+ }
}
return;
}
@@ -3252,14 +3704,14 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
StringRef Constraint,
MVT VT) const {
if (Constraint.empty() || Constraint[0] != '{')
- return std::make_pair(0u, static_cast<TargetRegisterClass*>(nullptr));
- assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
+ return std::make_pair(0u, static_cast<TargetRegisterClass *>(nullptr));
+ assert(*(Constraint.end() - 1) == '}' && "Not a brace enclosed constraint?");
// Remove the braces from around the name.
- StringRef RegName(Constraint.data()+1, Constraint.size()-2);
+ StringRef RegName(Constraint.data() + 1, Constraint.size() - 2);
- std::pair<unsigned, const TargetRegisterClass*> R =
- std::make_pair(0u, static_cast<const TargetRegisterClass*>(nullptr));
+ std::pair<unsigned, const TargetRegisterClass *> R =
+ std::make_pair(0u, static_cast<const TargetRegisterClass *>(nullptr));
// Figure out which register class contains this reg.
for (const TargetRegisterClass *RC : RI->regclasses()) {
@@ -3271,8 +3723,8 @@ TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *RI,
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
if (RegName.equals_lower(RI->getRegAsmName(*I))) {
- std::pair<unsigned, const TargetRegisterClass*> S =
- std::make_pair(*I, RC);
+ std::pair<unsigned, const TargetRegisterClass *> S =
+ std::make_pair(*I, RC);
// If this register class has the requested value type, return it,
// otherwise keep searching and return the first class found
@@ -3321,8 +3773,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
// Do a prepass over the constraints, canonicalizing them, and building up the
// ConstraintOperands list.
- unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
- unsigned ResNo = 0; // ResNo - The result number of the next output.
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
for (InlineAsm::ConstraintInfo &CI : IA->ParseConstraints()) {
ConstraintOperands.emplace_back(std::move(CI));
@@ -3391,7 +3843,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
case 64:
case 128:
OpInfo.ConstraintVT =
- MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true);
break;
}
} else if (PointerType *PT = dyn_cast<PointerType>(OpTy)) {
@@ -3416,8 +3868,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
for (maIndex = 0; maIndex < maCount; ++maIndex) {
int weightSum = 0;
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
- cIndex != eIndex; ++cIndex) {
- AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
if (OpInfo.Type == InlineAsm::isClobber)
continue;
@@ -3432,7 +3884,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
Input.ConstraintVT.isInteger()) ||
(OpInfo.ConstraintVT.getSizeInBits() !=
Input.ConstraintVT.getSizeInBits())) {
- weightSum = -1; // Can't match.
+ weightSum = -1; // Can't match.
break;
}
}
@@ -3453,8 +3905,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
// Now select chosen alternative in each constraint.
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
- cIndex != eIndex; ++cIndex) {
- AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &cInfo = ConstraintOperands[cIndex];
if (cInfo.Type == InlineAsm::isClobber)
continue;
cInfo.selectAlternative(bestMAIndex);
@@ -3464,8 +3916,8 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
// Check and hook up tied operands, choose constraint code to use.
for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
- cIndex != eIndex; ++cIndex) {
- AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo &OpInfo = ConstraintOperands[cIndex];
// If this is an output operand with a matching input operand, look up the
// matching input. If their types mismatch, e.g. one is an integer, the
@@ -3577,9 +4029,9 @@ TargetLowering::ConstraintWeight
weight = CW_Register;
break;
case 'X': // any operand.
- default:
- weight = CW_Default;
- break;
+ default:
+ weight = CW_Default;
+ break;
}
return weight;
}
@@ -3678,6 +4130,9 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
return;
}
+ if (Op.getNode() && Op.getOpcode() == ISD::TargetBlockAddress)
+ return;
+
// Otherwise, try to resolve it to something we know about by looking at
// the actual operand type.
if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
@@ -3749,12 +4204,12 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N,
}
SDValue TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor,
- SelectionDAG &DAG,
- SmallVectorImpl<SDNode *> &Created) const {
+ SelectionDAG &DAG,
+ SmallVectorImpl<SDNode *> &Created) const {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLI.isIntDivCheap(N->getValueType(0), Attr))
- return SDValue(N,0); // Lower SDIV as SDIV
+ return SDValue(N, 0); // Lower SDIV as SDIV
return SDValue();
}
@@ -4000,6 +4455,104 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
+/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildUREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 2> Built;
+ if (SDValue Folded = prepareUREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+ DCI, DL, Built)) {
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
+ // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - Q = floor((2^W - 1) / D0)
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ EVT VT = REMNode.getValueType();
+
+ // If MUL is unavailable, we cannot proceed in any case.
+ if (!isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+
+ // TODO: Add non-uniform constant support.
+ ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1));
+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
+ if (!Divisor || !CompTarget || Divisor->isNullValue() ||
+ !CompTarget->isNullValue())
+ return SDValue();
+
+ const APInt &D = Divisor->getAPIntValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ bool DivisorIsEven = (K != 0);
+ APInt D0 = D.lshr(K);
+
+ // The fold is invalid when D0 == 1.
+ // This is reachable because visitSetCC happens before visitREM.
+ if (D0.isOneValue())
+ return SDValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // Q = floor((2^W - 1) / D)
+ APInt Q = APInt::getAllOnesValue(W).udiv(D);
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ SDValue PVal = DAG.getConstant(P, DL, VT);
+ SDValue QVal = DAG.getConstant(Q, DL, VT);
+ // (mul N, P)
+ SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal);
+ Created.push_back(Op1.getNode());
+
+ // Rotate right only if D was even.
+ if (DivisorIsEven) {
+ // We need ROTR to do this.
+ if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ SDValue ShAmt =
+ DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout()));
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ // UREM: (rotr (mul N, P), K)
+ Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags);
+ Created.push_back(Op1.getNode());
+ }
+
+ // UREM: (setule/setugt (rotr (mul N, P), K), Q)
+ return DAG.getSetCC(DL, SETCCVT, Op1, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+}
+
bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
@@ -4308,7 +4861,7 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
- SelectionDAG &DAG) const {
+ SelectionDAG &DAG) const {
SDValue Src = Node->getOperand(0);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -4320,7 +4873,7 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
- // https://github.com/llvm-mirror/compiler-rt/blob/master/lib/builtins/fixsfdi.c
+ // https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
unsigned SrcEltBits = SrcVT.getScalarSizeInBits();
EVT IntVT = SrcVT.changeTypeToInteger();
EVT IntShVT = getShiftAmountTy(IntVT, DAG.getDataLayout());
@@ -4544,6 +5097,17 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
return DAG.getNode(NewOp, dl, VT, Quiet0, Quiet1, Node->getFlags());
}
+ // If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
+ // instead if there are no NaNs.
+ if (Node->getFlags().hasNoNaNs()) {
+ unsigned IEEE2018Op =
+ Node->getOpcode() == ISD::FMINNUM ? ISD::FMINIMUM : ISD::FMAXIMUM;
+ if (isOperationLegalOrCustom(IEEE2018Op, VT)) {
+ return DAG.getNode(IEEE2018Op, dl, VT, Node->getOperand(0),
+ Node->getOperand(1), Node->getFlags());
+ }
+ }
+
return SDValue();
}
@@ -4771,7 +5335,7 @@ SDValue TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
- return DAG.getMergeValues({ Value, NewChain }, SL);
+ return DAG.getMergeValues({Value, NewChain}, SL);
}
SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
@@ -4826,7 +5390,7 @@ SDValue TargetLowering::scalarizeVectorStore(StoreSDNode *ST,
// Store Stride in bytes
unsigned Stride = MemSclVT.getSizeInBits() / 8;
- assert (Stride && "Zero stride!");
+ assert(Stride && "Zero stride!");
// Extract each of the elements from the original vector and save them into
// memory individually.
SmallVector<SDValue, 8> Stores;
@@ -5013,17 +5577,16 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
auto &MF = DAG.getMachineFunction();
- EVT MemVT = ST->getMemoryVT();
+ EVT StoreMemVT = ST->getMemoryVT();
SDLoc dl(ST);
- if (MemVT.isFloatingPoint() || MemVT.isVector()) {
+ if (StoreMemVT.isFloatingPoint() || StoreMemVT.isVector()) {
EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
if (isTypeLegal(intVT)) {
if (!isOperationLegalOrCustom(ISD::STORE, intVT) &&
- MemVT.isVector()) {
+ StoreMemVT.isVector()) {
// Scalarize the store and let the individual components be handled.
SDValue Result = scalarizeVectorStore(ST, DAG);
-
return Result;
}
// Expand to a bitconvert of the value to the integer type of the
@@ -5036,24 +5599,22 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
}
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
- EVT StoredVT = ST->getMemoryVT();
- MVT RegVT =
- getRegisterType(*DAG.getContext(),
- EVT::getIntegerVT(*DAG.getContext(),
- StoredVT.getSizeInBits()));
+ MVT RegVT = getRegisterType(
+ *DAG.getContext(),
+ EVT::getIntegerVT(*DAG.getContext(), StoreMemVT.getSizeInBits()));
EVT PtrVT = Ptr.getValueType();
- unsigned StoredBytes = StoredVT.getStoreSize();
+ unsigned StoredBytes = StoreMemVT.getStoreSize();
unsigned RegBytes = RegVT.getSizeInBits() / 8;
unsigned NumRegs = (StoredBytes + RegBytes - 1) / RegBytes;
// Make sure the stack slot is also aligned for the register type.
- SDValue StackPtr = DAG.CreateStackTemporary(StoredVT, RegVT);
+ SDValue StackPtr = DAG.CreateStackTemporary(StoreMemVT, RegVT);
auto FrameIndex = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
// Perform the original store, only redirected to the stack slot.
SDValue Store = DAG.getTruncStore(
Chain, dl, Val, StackPtr,
- MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoredVT);
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, 0), StoreMemVT);
EVT StackPtrVT = StackPtr.getValueType();
@@ -5082,17 +5643,17 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
// The last store may be partial. Do a truncating store. On big-endian
// machines this requires an extending load from the stack slot to ensure
// that the bits are in the right place.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (StoredBytes - Offset));
+ EVT LoadMemVT =
+ EVT::getIntegerVT(*DAG.getContext(), 8 * (StoredBytes - Offset));
// Load from the stack slot.
SDValue Load = DAG.getExtLoad(
ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
- MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), MemVT);
+ MachinePointerInfo::getFixedStack(MF, FrameIndex, Offset), LoadMemVT);
Stores.push_back(
DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
- ST->getPointerInfo().getWithOffset(Offset), MemVT,
+ ST->getPointerInfo().getWithOffset(Offset), LoadMemVT,
MinAlign(ST->getAlignment(), Offset),
ST->getMemOperand()->getFlags(), ST->getAAInfo()));
// The order of the stores doesn't matter - say it with a TokenFactor.
@@ -5100,18 +5661,16 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
return Result;
}
- assert(ST->getMemoryVT().isInteger() &&
- !ST->getMemoryVT().isVector() &&
+ assert(StoreMemVT.isInteger() && !StoreMemVT.isVector() &&
"Unaligned store of unknown type.");
// Get the half-size VT
- EVT NewStoredVT = ST->getMemoryVT().getHalfSizedIntegerVT(*DAG.getContext());
+ EVT NewStoredVT = StoreMemVT.getHalfSizedIntegerVT(*DAG.getContext());
int NumBits = NewStoredVT.getSizeInBits();
int IncrementSize = NumBits / 8;
// Divide the stored value in two parts.
- SDValue ShiftAmount =
- DAG.getConstant(NumBits, dl, getShiftAmountTy(Val.getValueType(),
- DAG.getDataLayout()));
+ SDValue ShiftAmount = DAG.getConstant(
+ NumBits, dl, getShiftAmountTy(Val.getValueType(), DAG.getDataLayout()));
SDValue Lo = Val;
SDValue Hi = DAG.getNode(ISD::SRL, dl, VT, Val, ShiftAmount);
@@ -5130,7 +5689,7 @@ SDValue TargetLowering::expandUnalignedStore(StoreSDNode *ST,
ST->getMemOperand()->getFlags(), ST->getAAInfo());
SDValue Result =
- DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
+ DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2);
return Result;
}
@@ -5242,7 +5801,7 @@ SDValue TargetLowering::LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA,
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
// At last for X86 targets, maybe good for other targets too?
MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
- MFI.setAdjustsStack(true); // Is this only for X86 target?
+ MFI.setAdjustsStack(true); // Is this only for X86 target?
MFI.setHasCalls(true);
assert((GA->getOffset() == 0) &&
@@ -5282,15 +5841,19 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
EVT VT = LHS.getValueType();
SDLoc dl(Node);
+ assert(VT == RHS.getValueType() && "Expected operands to be the same type");
+ assert(VT.isInteger() && "Expected operands to be integers");
+
// usub.sat(a, b) -> umax(a, b) - b
if (Opcode == ISD::USUBSAT && isOperationLegalOrCustom(ISD::UMAX, VT)) {
SDValue Max = DAG.getNode(ISD::UMAX, dl, VT, LHS, RHS);
return DAG.getNode(ISD::SUB, dl, VT, Max, RHS);
}
- if (VT.isVector()) {
- // TODO: Consider not scalarizing here.
- return SDValue();
+ if (Opcode == ISD::UADDSAT && isOperationLegalOrCustom(ISD::UMIN, VT)) {
+ SDValue InvRHS = DAG.getNOT(dl, RHS, VT);
+ SDValue Min = DAG.getNode(ISD::UMIN, dl, VT, LHS, InvRHS);
+ return DAG.getNode(ISD::ADD, dl, VT, Min, RHS);
}
unsigned OverflowOp;
@@ -5312,96 +5875,410 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
"addition or subtraction node.");
}
- assert(LHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
- assert(RHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
- assert(LHS.getValueType() == RHS.getValueType() &&
- "Expected both operands to be the same type");
-
- unsigned BitWidth = LHS.getValueSizeInBits();
- EVT ResultType = LHS.getValueType();
- EVT BoolVT =
- getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), ResultType);
- SDValue Result =
- DAG.getNode(OverflowOp, dl, DAG.getVTList(ResultType, BoolVT), LHS, RHS);
+ unsigned BitWidth = LHS.getScalarValueSizeInBits();
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
+ LHS, RHS);
SDValue SumDiff = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
- SDValue Zero = DAG.getConstant(0, dl, ResultType);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue AllOnes = DAG.getAllOnesConstant(dl, VT);
if (Opcode == ISD::UADDSAT) {
- // Just need to check overflow for SatMax.
- APInt MaxVal = APInt::getMaxValue(BitWidth);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
- return DAG.getSelect(dl, ResultType, Overflow, SatMax, SumDiff);
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS + RHS) | OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ return DAG.getNode(ISD::OR, dl, VT, SumDiff, OverflowMask);
+ }
+ // Overflow ? 0xffff.... : (LHS + RHS)
+ return DAG.getSelect(dl, VT, Overflow, AllOnes, SumDiff);
} else if (Opcode == ISD::USUBSAT) {
- // Just need to check overflow for SatMin.
- APInt MinVal = APInt::getMinValue(BitWidth);
- SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
- return DAG.getSelect(dl, ResultType, Overflow, SatMin, SumDiff);
+ if (getBooleanContents(VT) == ZeroOrNegativeOneBooleanContent) {
+ // (LHS - RHS) & ~OverflowMask
+ SDValue OverflowMask = DAG.getSExtOrTrunc(Overflow, dl, VT);
+ SDValue Not = DAG.getNOT(dl, OverflowMask, VT);
+ return DAG.getNode(ISD::AND, dl, VT, SumDiff, Not);
+ }
+ // Overflow ? 0 : (LHS - RHS)
+ return DAG.getSelect(dl, VT, Overflow, Zero, SumDiff);
} else {
// SatMax -> Overflow && SumDiff < 0
// SatMin -> Overflow && SumDiff >= 0
APInt MinVal = APInt::getSignedMinValue(BitWidth);
APInt MaxVal = APInt::getSignedMaxValue(BitWidth);
- SDValue SatMin = DAG.getConstant(MinVal, dl, ResultType);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, ResultType);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
SDValue SumNeg = DAG.getSetCC(dl, BoolVT, SumDiff, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, ResultType, SumNeg, SatMax, SatMin);
- return DAG.getSelect(dl, ResultType, Overflow, Result, SumDiff);
+ Result = DAG.getSelect(dl, VT, SumNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, VT, Overflow, Result, SumDiff);
}
}
SDValue
-TargetLowering::getExpandedFixedPointMultiplication(SDNode *Node,
- SelectionDAG &DAG) const {
- assert(Node->getOpcode() == ISD::SMULFIX && "Expected opcode to be SMULFIX.");
- assert(Node->getNumOperands() == 3 &&
- "Expected signed fixed point multiplication to have 3 operands.");
+TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
+ assert((Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::UMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT) &&
+ "Expected a fixed point multiplication opcode");
SDLoc dl(Node);
SDValue LHS = Node->getOperand(0);
SDValue RHS = Node->getOperand(1);
- assert(LHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
- assert(RHS.getValueType().isScalarInteger() &&
- "Expected operands to be integers. Vector of int arguments should "
- "already be unrolled.");
- assert(LHS.getValueType() == RHS.getValueType() &&
- "Expected both operands to be the same type");
-
- unsigned Scale = Node->getConstantOperandVal(2);
EVT VT = LHS.getValueType();
- assert(Scale < VT.getScalarSizeInBits() &&
- "Expected scale to be less than the number of bits.");
+ unsigned Scale = Node->getConstantOperandVal(2);
+ bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT;
+ EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ unsigned VTSize = VT.getScalarSizeInBits();
+
+ if (!Scale) {
+ // [us]mul.fix(a, b, 0) -> mul(a, b)
+ if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) {
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ return DAG.getSelect(dl, VT, Overflow, Result, Product);
+ }
+ }
- if (!Scale)
- return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ bool Signed =
+ Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT;
+ assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
+ "Expected scale to be less than the number of bits if signed or at "
+ "most the number of bits if unsigned.");
+ assert(LHS.getValueType() == RHS.getValueType() &&
+ "Expected both operands to be the same type");
// Get the upper and lower bits of the result.
SDValue Lo, Hi;
- if (isOperationLegalOrCustom(ISD::SMUL_LOHI, VT)) {
- SDValue Result =
- DAG.getNode(ISD::SMUL_LOHI, dl, DAG.getVTList(VT, VT), LHS, RHS);
+ unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
+ unsigned HiOp = Signed ? ISD::MULHS : ISD::MULHU;
+ if (isOperationLegalOrCustom(LoHiOp, VT)) {
+ SDValue Result = DAG.getNode(LoHiOp, dl, DAG.getVTList(VT, VT), LHS, RHS);
Lo = Result.getValue(0);
Hi = Result.getValue(1);
- } else if (isOperationLegalOrCustom(ISD::MULHS, VT)) {
+ } else if (isOperationLegalOrCustom(HiOp, VT)) {
Lo = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
- Hi = DAG.getNode(ISD::MULHS, dl, VT, LHS, RHS);
+ Hi = DAG.getNode(HiOp, dl, VT, LHS, RHS);
+ } else if (VT.isVector()) {
+ return SDValue();
} else {
- report_fatal_error("Unable to expand signed fixed point multiplication.");
+ report_fatal_error("Unable to expand fixed point multiplication.");
}
+ if (Scale == VTSize)
+ // Result is just the top half since we'd be shifting by the width of the
+ // operand.
+ return Hi;
+
// The result will need to be shifted right by the scale since both operands
// are scaled. The result is given to us in 2 halves, so we only want part of
// both in the result.
EVT ShiftTy = getShiftAmountTy(VT, DAG.getDataLayout());
- Lo = DAG.getNode(ISD::SRL, dl, VT, Lo, DAG.getConstant(Scale, dl, ShiftTy));
- Hi = DAG.getNode(
- ISD::SHL, dl, VT, Hi,
- DAG.getConstant(VT.getScalarSizeInBits() - Scale, dl, ShiftTy));
- return DAG.getNode(ISD::OR, dl, VT, Lo, Hi);
+ SDValue Result = DAG.getNode(ISD::FSHR, dl, VT, Hi, Lo,
+ DAG.getConstant(Scale, dl, ShiftTy));
+ if (!Saturating)
+ return Result;
+
+ unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign
+ SDValue HiMask =
+ DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT);
+ SDValue LoMask = DAG.getConstant(
+ APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+
+ Result = DAG.getSelectCC(dl, Hi, LoMask,
+ DAG.getConstant(MaxVal, dl, VT), Result,
+ ISD::SETGT);
+ return DAG.getSelectCC(dl, Hi, HiMask,
+ DAG.getConstant(MinVal, dl, VT), Result,
+ ISD::SETLT);
+}
+
+void TargetLowering::expandUADDSUBO(
+ SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool IsAdd = Node->getOpcode() == ISD::UADDO;
+
+ // If ADD/SUBCARRY is legal, use that instead.
+ unsigned OpcCarry = IsAdd ? ISD::ADDCARRY : ISD::SUBCARRY;
+ if (isOperationLegalOrCustom(OpcCarry, Node->getValueType(0))) {
+ SDValue CarryIn = DAG.getConstant(0, dl, Node->getValueType(1));
+ SDValue NodeCarry = DAG.getNode(OpcCarry, dl, Node->getVTList(),
+ { LHS, RHS, CarryIn });
+ Result = SDValue(NodeCarry.getNode(), 0);
+ Overflow = SDValue(NodeCarry.getNode(), 1);
+ return;
+ }
+
+ Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
+
+ EVT ResultType = Node->getValueType(1);
+ EVT SetCCType = getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+ ISD::CondCode CC = IsAdd ? ISD::SETULT : ISD::SETUGT;
+ SDValue SetCC = DAG.getSetCC(dl, SetCCType, Result, LHS, CC);
+ Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+}
+
+void TargetLowering::expandSADDSUBO(
+ SDNode *Node, SDValue &Result, SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool IsAdd = Node->getOpcode() == ISD::SADDO;
+
+ Result = DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, dl,
+ LHS.getValueType(), LHS, RHS);
+
+ EVT ResultType = Node->getValueType(1);
+ EVT OType = getSetCCResultType(
+ DAG.getDataLayout(), *DAG.getContext(), Node->getValueType(0));
+
+ // If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
+ unsigned OpcSat = IsAdd ? ISD::SADDSAT : ISD::SSUBSAT;
+ if (isOperationLegalOrCustom(OpcSat, LHS.getValueType())) {
+ SDValue Sat = DAG.getNode(OpcSat, dl, LHS.getValueType(), LHS, RHS);
+ SDValue SetCC = DAG.getSetCC(dl, OType, Result, Sat, ISD::SETNE);
+ Overflow = DAG.getBoolExtOrTrunc(SetCC, dl, ResultType, ResultType);
+ return;
+ }
+
+ SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
+
+ // LHSSign -> LHS >= 0
+ // RHSSign -> RHS >= 0
+ // SumSign -> Result >= 0
+ //
+ // Add:
+ // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
+ // Sub:
+ // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
+ SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
+ SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
+ SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
+ IsAdd ? ISD::SETEQ : ISD::SETNE);
+
+ SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);
+ SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
+
+ SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
+ Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);
+}
+
+bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
+ SDValue &Overflow, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ EVT VT = Node->getValueType(0);
+ EVT SetCCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue LHS = Node->getOperand(0);
+ SDValue RHS = Node->getOperand(1);
+ bool isSigned = Node->getOpcode() == ISD::SMULO;
+
+ // For power-of-two multiplications we can use a simpler shift expansion.
+ if (ConstantSDNode *RHSC = isConstOrConstSplat(RHS)) {
+ const APInt &C = RHSC->getAPIntValue();
+ // mulo(X, 1 << S) -> { X << S, (X << S) >> S != X }
+ if (C.isPowerOf2()) {
+ // smulo(x, signed_min) is same as umulo(x, signed_min).
+ bool UseArithShift = isSigned && !C.isMinSignedValue();
+ EVT ShiftAmtTy = getShiftAmountTy(VT, DAG.getDataLayout());
+ SDValue ShiftAmt = DAG.getConstant(C.logBase2(), dl, ShiftAmtTy);
+ Result = DAG.getNode(ISD::SHL, dl, VT, LHS, ShiftAmt);
+ Overflow = DAG.getSetCC(dl, SetCCVT,
+ DAG.getNode(UseArithShift ? ISD::SRA : ISD::SRL,
+ dl, VT, Result, ShiftAmt),
+ LHS, ISD::SETNE);
+ return true;
+ }
+ }
+
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getScalarSizeInBits() * 2);
+ if (VT.isVector())
+ WideVT = EVT::getVectorVT(*DAG.getContext(), WideVT,
+ VT.getVectorNumElements());
+
+ SDValue BottomHalf;
+ SDValue TopHalf;
+ static const unsigned Ops[2][3] =
+ { { ISD::MULHU, ISD::UMUL_LOHI, ISD::ZERO_EXTEND },
+ { ISD::MULHS, ISD::SMUL_LOHI, ISD::SIGN_EXTEND }};
+ if (isOperationLegalOrCustom(Ops[isSigned][0], VT)) {
+ BottomHalf = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ TopHalf = DAG.getNode(Ops[isSigned][0], dl, VT, LHS, RHS);
+ } else if (isOperationLegalOrCustom(Ops[isSigned][1], VT)) {
+ BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
+ RHS);
+ TopHalf = BottomHalf.getValue(1);
+ } else if (isTypeLegal(WideVT)) {
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+ SDValue Mul = DAG.getNode(ISD::MUL, dl, WideVT, LHS, RHS);
+ BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul);
+ SDValue ShiftAmt = DAG.getConstant(VT.getScalarSizeInBits(), dl,
+ getShiftAmountTy(WideVT, DAG.getDataLayout()));
+ TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT,
+ DAG.getNode(ISD::SRL, dl, WideVT, Mul, ShiftAmt));
+ } else {
+ if (VT.isVector())
+ return false;
+
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+
+ SDValue HiLHS;
+ SDValue HiRHS;
+ if (isSigned) {
+ // The high part is obtained by SRA'ing all but one of the bits of low
+ // part.
+ unsigned LoSize = VT.getSizeInBits();
+ HiLHS =
+ DAG.getNode(ISD::SRA, dl, VT, LHS,
+ DAG.getConstant(LoSize - 1, dl,
+ getPointerTy(DAG.getDataLayout())));
+ HiRHS =
+ DAG.getNode(ISD::SRA, dl, VT, RHS,
+ DAG.getConstant(LoSize - 1, dl,
+ getPointerTy(DAG.getDataLayout())));
+ } else {
+ HiLHS = DAG.getConstant(0, dl, VT);
+ HiRHS = DAG.getConstant(0, dl, VT);
+ }
+
+ // Here we're passing the 2 arguments explicitly as 4 arguments that are
+ // pre-lowered to the correct types. This all depends upon WideVT not
+ // being a legal type for the architecture and thus has to be split to
+ // two arguments.
+ SDValue Ret;
+ if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
+ // Halves of WideVT are packed into registers in different order
+ // depending on platform endianness. This is usually handled by
+ // the C calling convention, but we can't defer to it in
+ // the legalizer.
+ SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
+ Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
+ /* doesNotReturn */ false, /* isReturnValueUsed */ true,
+ /* isPostTypeLegalization */ true).first;
+ } else {
+ SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
+ Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
+ /* doesNotReturn */ false, /* isReturnValueUsed */ true,
+ /* isPostTypeLegalization */ true).first;
+ }
+ assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
+ "Ret value is a collection of constituent nodes holding result.");
+ if (DAG.getDataLayout().isLittleEndian()) {
+ // Same as above.
+ BottomHalf = Ret.getOperand(0);
+ TopHalf = Ret.getOperand(1);
+ } else {
+ BottomHalf = Ret.getOperand(1);
+ TopHalf = Ret.getOperand(0);
+ }
+ }
+
+ Result = BottomHalf;
+ if (isSigned) {
+ SDValue ShiftAmt = DAG.getConstant(
+ VT.getScalarSizeInBits() - 1, dl,
+ getShiftAmountTy(BottomHalf.getValueType(), DAG.getDataLayout()));
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, ShiftAmt);
+ Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf, Sign, ISD::SETNE);
+ } else {
+ Overflow = DAG.getSetCC(dl, SetCCVT, TopHalf,
+ DAG.getConstant(0, dl, VT), ISD::SETNE);
+ }
+
+ // Truncate the result if SetCC returns a larger type than needed.
+ EVT RType = Node->getValueType(1);
+ if (RType.getSizeInBits() < Overflow.getValueSizeInBits())
+ Overflow = DAG.getNode(ISD::TRUNCATE, dl, RType, Overflow);
+
+ assert(RType.getSizeInBits() == Overflow.getValueSizeInBits() &&
+ "Unexpected result type for S/UMULO legalization");
+ return true;
+}
+
+SDValue TargetLowering::expandVecReduce(SDNode *Node, SelectionDAG &DAG) const {
+ SDLoc dl(Node);
+ bool NoNaN = Node->getFlags().hasNoNaNs();
+ unsigned BaseOpcode = 0;
+ switch (Node->getOpcode()) {
+ default: llvm_unreachable("Expected VECREDUCE opcode");
+ case ISD::VECREDUCE_FADD: BaseOpcode = ISD::FADD; break;
+ case ISD::VECREDUCE_FMUL: BaseOpcode = ISD::FMUL; break;
+ case ISD::VECREDUCE_ADD: BaseOpcode = ISD::ADD; break;
+ case ISD::VECREDUCE_MUL: BaseOpcode = ISD::MUL; break;
+ case ISD::VECREDUCE_AND: BaseOpcode = ISD::AND; break;
+ case ISD::VECREDUCE_OR: BaseOpcode = ISD::OR; break;
+ case ISD::VECREDUCE_XOR: BaseOpcode = ISD::XOR; break;
+ case ISD::VECREDUCE_SMAX: BaseOpcode = ISD::SMAX; break;
+ case ISD::VECREDUCE_SMIN: BaseOpcode = ISD::SMIN; break;
+ case ISD::VECREDUCE_UMAX: BaseOpcode = ISD::UMAX; break;
+ case ISD::VECREDUCE_UMIN: BaseOpcode = ISD::UMIN; break;
+ case ISD::VECREDUCE_FMAX:
+ BaseOpcode = NoNaN ? ISD::FMAXNUM : ISD::FMAXIMUM;
+ break;
+ case ISD::VECREDUCE_FMIN:
+ BaseOpcode = NoNaN ? ISD::FMINNUM : ISD::FMINIMUM;
+ break;
+ }
+
+ SDValue Op = Node->getOperand(0);
+ EVT VT = Op.getValueType();
+
+ // Try to use a shuffle reduction for power of two vectors.
+ if (VT.isPow2VectorType()) {
+ while (VT.getVectorNumElements() > 1) {
+ EVT HalfVT = VT.getHalfNumVectorElementsVT(*DAG.getContext());
+ if (!isOperationLegalOrCustom(BaseOpcode, HalfVT))
+ break;
+
+ SDValue Lo, Hi;
+ std::tie(Lo, Hi) = DAG.SplitVector(Op, dl);
+ Op = DAG.getNode(BaseOpcode, dl, HalfVT, Lo, Hi);
+ VT = HalfVT;
+ }
+ }
+
+ EVT EltVT = VT.getVectorElementType();
+ unsigned NumElts = VT.getVectorNumElements();
+
+ SmallVector<SDValue, 8> Ops;
+ DAG.ExtractVectorElements(Op, Ops, 0, NumElts);
+
+ SDValue Res = Ops[0];
+ for (unsigned i = 1; i < NumElts; i++)
+ Res = DAG.getNode(BaseOpcode, dl, EltVT, Res, Ops[i], Node->getFlags());
+
+ // Result type may be wider than element type.
+ if (EltVT != Node->getValueType(0))
+ Res = DAG.getNode(ISD::ANY_EXTEND, dl, Node->getValueType(0), Res);
+ return Res;
}
diff --git a/lib/CodeGen/ShadowStackGCLowering.cpp b/lib/CodeGen/ShadowStackGCLowering.cpp
index 3e12b32b12d4..17a4d76c4c80 100644
--- a/lib/CodeGen/ShadowStackGCLowering.cpp
+++ b/lib/CodeGen/ShadowStackGCLowering.cpp
@@ -1,9 +1,8 @@
//===- ShadowStackGCLowering.cpp - Custom lowering for shadow-stack gc ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -313,7 +312,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
AtEntry.SetInsertPoint(IP->getParent(), IP);
// Initialize the map pointer and load the current head of the shadow stack.
- Instruction *CurrentHead = AtEntry.CreateLoad(Head, "gc_currhead");
+ Instruction *CurrentHead =
+ AtEntry.CreateLoad(StackEntryTy->getPointerTo(), Head, "gc_currhead");
Instruction *EntryMapPtr = CreateGEP(Context, AtEntry, ConcreteStackEntryTy,
StackEntry, 0, 1, "gc_frame.map");
AtEntry.CreateStore(FrameMap, EntryMapPtr);
@@ -354,7 +354,8 @@ bool ShadowStackGCLowering::runOnFunction(Function &F) {
Instruction *EntryNextPtr2 =
CreateGEP(Context, *AtExit, ConcreteStackEntryTy, StackEntry, 0, 0,
"gc_frame.next");
- Value *SavedHead = AtExit->CreateLoad(EntryNextPtr2, "gc_savedhead");
+ Value *SavedHead = AtExit->CreateLoad(StackEntryTy->getPointerTo(),
+ EntryNextPtr2, "gc_savedhead");
AtExit->CreateStore(SavedHead, Head);
}
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index d3454ca6ba6a..2db0ea570598 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -1,9 +1,8 @@
//===- ShrinkWrap.cpp - Compute safe point for prolog/epilog insertion ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -259,6 +258,15 @@ INITIALIZE_PASS_END(ShrinkWrap, DEBUG_TYPE, "Shrink Wrap Pass", false, false)
bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
RegScavenger *RS) const {
+ // This prevents premature stack popping when occurs a indirect stack
+ // access. It is overly aggressive for the moment.
+ // TODO: - Obvious non-stack loads and store, such as global values,
+ // are known to not access the stack.
+ // - Further, data dependency and alias analysis can validate
+ // that load and stores never derive from the stack pointer.
+ if (MI.mayLoadOrStore())
+ return true;
+
if (MI.getOpcode() == FrameSetupOpcode ||
MI.getOpcode() == FrameDestroyOpcode) {
LLVM_DEBUG(dbgs() << "Frame instruction: " << MI << '\n');
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 5d2669f5ae92..23e5ce0acae8 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -1,9 +1,8 @@
//===- SjLjEHPrepare.cpp - Eliminate Invoke & Unwind instructions ---------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -40,15 +39,15 @@ class SjLjEHPrepare : public FunctionPass {
Type *doubleUnderDataTy;
Type *doubleUnderJBufTy;
Type *FunctionContextTy;
- Constant *RegisterFn;
- Constant *UnregisterFn;
- Constant *BuiltinSetupDispatchFn;
- Constant *FrameAddrFn;
- Constant *StackAddrFn;
- Constant *StackRestoreFn;
- Constant *LSDAAddrFn;
- Constant *CallSiteFn;
- Constant *FuncCtxFn;
+ FunctionCallee RegisterFn;
+ FunctionCallee UnregisterFn;
+ Function *BuiltinSetupDispatchFn;
+ Function *FrameAddrFn;
+ Function *StackAddrFn;
+ Function *StackRestoreFn;
+ Function *LSDAAddrFn;
+ Function *CallSiteFn;
+ Function *FuncCtxFn;
AllocaInst *FuncCtx;
public:
@@ -190,14 +189,16 @@ Value *SjLjEHPrepare::setupFunctionContext(Function &F,
Builder.CreateConstGEP2_32(FunctionContextTy, FuncCtx, 0, 2, "__data");
// The exception values come back in context->__data[0].
+ Type *Int32Ty = Type::getInt32Ty(F.getContext());
Value *ExceptionAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
0, 0, "exception_gep");
- Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val");
+ Value *ExnVal = Builder.CreateLoad(Int32Ty, ExceptionAddr, true, "exn_val");
ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy());
Value *SelectorAddr = Builder.CreateConstGEP2_32(doubleUnderDataTy, FCData,
0, 1, "exn_selector_gep");
- Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val");
+ Value *SelVal =
+ Builder.CreateLoad(Int32Ty, SelectorAddr, true, "exn_selector_val");
substituteLPadValues(LPI, ExnVal, SelVal);
}
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index fccbb8ec91cb..9fff873324d0 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -1,9 +1,8 @@
//===-- SlotIndexes.cpp - Slot Indexes Pass ------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -23,7 +22,6 @@ INITIALIZE_PASS(SlotIndexes, DEBUG_TYPE,
"Slot index numbering", false, false)
STATISTIC(NumLocalRenum, "Number of local renumberings");
-STATISTIC(NumGlobalRenum, "Number of global renumberings");
void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesAll();
@@ -95,7 +93,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
}
// Sort the Idx2MBBMap
- llvm::sort(idx2MBBMap, Idx2MBBCompare());
+ llvm::sort(idx2MBBMap, less_first());
LLVM_DEBUG(mf->print(dbgs(), this));
@@ -145,20 +143,6 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
}
}
-void SlotIndexes::renumberIndexes() {
- // Renumber updates the index of every element of the index list.
- LLVM_DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
- ++NumGlobalRenum;
-
- unsigned index = 0;
-
- for (IndexList::iterator I = indexList.begin(), E = indexList.end();
- I != E; ++I) {
- I->setIndex(index);
- index += SlotIndex::InstrDist;
- }
-}
-
// Renumber indexes locally after curItr was inserted, but failed to get a new
// index.
void SlotIndexes::renumberIndexes(IndexList::iterator curItr) {
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
index f6786b30b21c..11452fdb747a 100644
--- a/lib/CodeGen/SpillPlacement.cpp
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -1,9 +1,8 @@
//===- SpillPlacement.cpp - Optimal Spill Code Placement ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
index aa3ac444e0da..aa0e07ef92e3 100644
--- a/lib/CodeGen/SpillPlacement.h
+++ b/lib/CodeGen/SpillPlacement.h
@@ -1,9 +1,8 @@
//===- SpillPlacement.h - Optimal Spill Code Placement ---------*- C++ -*--===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 330ee81342b6..66dabf78f873 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/Spiller.h - Spiller -------------------------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index d639f4475301..5c944fe3f6b3 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -1,9 +1,8 @@
//===- SplitKit.cpp - Toolkit for splitting live ranges -------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -521,17 +520,18 @@ SlotIndex SplitEditor::buildSingleSubRegCopy(unsigned FromReg, unsigned ToReg,
.addReg(FromReg, 0, SubIdx);
BumpPtrAllocator &Allocator = LIS.getVNInfoAllocator();
+ SlotIndexes &Indexes = *LIS.getSlotIndexes();
if (FirstCopy) {
- SlotIndexes &Indexes = *LIS.getSlotIndexes();
Def = Indexes.insertMachineInstrInMaps(*CopyMI, Late).getRegSlot();
} else {
CopyMI->bundleWithPred();
}
LaneBitmask LaneMask = TRI.getSubRegIndexLaneMask(SubIdx);
DestLI.refineSubRanges(Allocator, LaneMask,
- [Def, &Allocator](LiveInterval::SubRange& SR) {
- SR.createDeadDef(Def, Allocator);
- });
+ [Def, &Allocator](LiveInterval::SubRange &SR) {
+ SR.createDeadDef(Def, Allocator);
+ },
+ Indexes, TRI);
return Def;
}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index bcc8f8cf18bc..86ad3811e3ad 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -1,9 +1,8 @@
//===- SplitKit.h - Toolkit for splitting live ranges -----------*- C++ -*-===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp
index eb8552915e2a..641b54205d62 100644
--- a/lib/CodeGen/StackColoring.cpp
+++ b/lib/CodeGen/StackColoring.cpp
@@ -1,9 +1,8 @@
//===- StackColoring.cpp --------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1221,11 +1220,12 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) {
// Sort the slots according to their size. Place unused slots at the end.
// Use stable sort to guarantee deterministic code generation.
- std::stable_sort(SortedSlots.begin(), SortedSlots.end(),
- [this](int LHS, int RHS) {
+ llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
// We use -1 to denote a uninteresting slot. Place these slots at the end.
- if (LHS == -1) return false;
- if (RHS == -1) return true;
+ if (LHS == -1)
+ return false;
+ if (RHS == -1)
+ return true;
// Sort according to size.
return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
});
diff --git a/lib/CodeGen/StackMapLivenessAnalysis.cpp b/lib/CodeGen/StackMapLivenessAnalysis.cpp
index 00cf8070be5e..fb2abf3daa7f 100644
--- a/lib/CodeGen/StackMapLivenessAnalysis.cpp
+++ b/lib/CodeGen/StackMapLivenessAnalysis.cpp
@@ -1,9 +1,8 @@
//===-- StackMapLivenessAnalysis.cpp - StackMap live Out Analysis ----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index 0676fa2421e8..ae9401b89700 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -1,9 +1,8 @@
//===- StackMaps.cpp ------------------------------------------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 3b578c7391da..809960c7fdf9 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -1,9 +1,8 @@
//===- StackProtector.cpp - Stack Protector Insertion ---------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -18,6 +17,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
@@ -157,40 +157,6 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
-bool StackProtector::HasAddressTaken(const Instruction *AI) {
- for (const User *U : AI->users()) {
- if (const StoreInst *SI = dyn_cast<StoreInst>(U)) {
- if (AI == SI->getValueOperand())
- return true;
- } else if (const PtrToIntInst *SI = dyn_cast<PtrToIntInst>(U)) {
- if (AI == SI->getOperand(0))
- return true;
- } else if (const CallInst *CI = dyn_cast<CallInst>(U)) {
- // Ignore intrinsics that are not calls. TODO: Use isLoweredToCall().
- if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
- return true;
- } else if (isa<InvokeInst>(U)) {
- return true;
- } else if (const SelectInst *SI = dyn_cast<SelectInst>(U)) {
- if (HasAddressTaken(SI))
- return true;
- } else if (const PHINode *PN = dyn_cast<PHINode>(U)) {
- // Keep track of what PHI nodes we have already visited to ensure
- // they are only visited once.
- if (VisitedPHIs.insert(PN).second)
- if (HasAddressTaken(PN))
- return true;
- } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(U)) {
- if (HasAddressTaken(GEP))
- return true;
- } else if (const BitCastInst *BI = dyn_cast<BitCastInst>(U)) {
- if (HasAddressTaken(BI))
- return true;
- }
- }
- return false;
-}
-
/// Search for the first call to the llvm.stackprotector intrinsic and return it
/// if present.
static const CallInst *findStackProtectorIntrinsic(Function &F) {
@@ -298,7 +264,9 @@ bool StackProtector::RequiresStackProtector() {
continue;
}
- if (Strong && HasAddressTaken(AI)) {
+ if (Strong && PointerMayBeCaptured(AI,
+ /* ReturnCaptures */ false,
+ /* StoreCaptures */ true)) {
++NumAddrTaken;
Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
ORE.emit([&]() {
@@ -323,7 +291,7 @@ static Value *getStackGuard(const TargetLoweringBase *TLI, Module *M,
IRBuilder<> &B,
bool *SupportsSelectionDAGSP = nullptr) {
if (Value *Guard = TLI->getIRStackGuard(B))
- return B.CreateLoad(Guard, true, "StackGuard");
+ return B.CreateLoad(B.getInt8PtrTy(), Guard, true, "StackGuard");
// Use SelectionDAG SSP handling, since there isn't an IR guard.
//
@@ -414,15 +382,14 @@ bool StackProtector::InsertStackProtectors() {
// Generate epilogue instrumentation. The epilogue intrumentation can be
// function-based or inlined depending on which mechanism the target is
// providing.
- if (Value* GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
+ if (Function *GuardCheck = TLI->getSSPStackGuardCheck(*M)) {
// Generate the function-based epilogue instrumentation.
// The target provides a guard check function, generate a call to it.
IRBuilder<> B(RI);
- LoadInst *Guard = B.CreateLoad(AI, true, "Guard");
+ LoadInst *Guard = B.CreateLoad(B.getInt8PtrTy(), AI, true, "Guard");
CallInst *Call = B.CreateCall(GuardCheck, {Guard});
- llvm::Function *Function = cast<llvm::Function>(GuardCheck);
- Call->setAttributes(Function->getAttributes());
- Call->setCallingConv(Function->getCallingConv());
+ Call->setAttributes(GuardCheck->getAttributes());
+ Call->setCallingConv(GuardCheck->getCallingConv());
} else {
// Generate the epilogue with inline instrumentation.
// If we do not support SelectionDAG based tail calls, generate IR level
@@ -474,7 +441,7 @@ bool StackProtector::InsertStackProtectors() {
// Generate the stack protector instructions in the old basic block.
IRBuilder<> B(BB);
Value *Guard = getStackGuard(TLI, M, B);
- LoadInst *LI2 = B.CreateLoad(AI, true);
+ LoadInst *LI2 = B.CreateLoad(B.getInt8PtrTy(), AI, true);
Value *Cmp = B.CreateICmpEQ(Guard, LI2);
auto SuccessProb =
BranchProbabilityInfo::getBranchProbStackProtector(true);
@@ -500,14 +467,13 @@ BasicBlock *StackProtector::CreateFailBB() {
IRBuilder<> B(FailBB);
B.SetCurrentDebugLocation(DebugLoc::get(0, 0, F->getSubprogram()));
if (Trip.isOSOpenBSD()) {
- Constant *StackChkFail =
- M->getOrInsertFunction("__stack_smash_handler",
- Type::getVoidTy(Context),
- Type::getInt8PtrTy(Context));
+ FunctionCallee StackChkFail = M->getOrInsertFunction(
+ "__stack_smash_handler", Type::getVoidTy(Context),
+ Type::getInt8PtrTy(Context));
B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH"));
} else {
- Constant *StackChkFail =
+ FunctionCallee StackChkFail =
M->getOrInsertFunction("__stack_chk_fail", Type::getVoidTy(Context));
B.CreateCall(StackChkFail, {});
@@ -517,7 +483,7 @@ BasicBlock *StackProtector::CreateFailBB() {
}
bool StackProtector::shouldEmitSDCheck(const BasicBlock &BB) const {
- return HasPrologue && !HasIRCheck && dyn_cast<ReturnInst>(BB.getTerminator());
+ return HasPrologue && !HasIRCheck && isa<ReturnInst>(BB.getTerminator());
}
void StackProtector::copyToMachineFrameInfo(MachineFrameInfo &MFI) const {
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index d8c6a249e4da..99b533e10b87 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -1,9 +1,8 @@
//===- StackSlotColoring.cpp - Stack slot coloring pass. ------------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -243,7 +242,7 @@ void StackSlotColoring::InitializeSlots() {
LLVM_DEBUG(dbgs() << '\n');
// Sort them by weight.
- std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+ llvm::stable_sort(SSIntervals, IntervalSorter());
NextColors.resize(AllColors.size());
@@ -348,7 +347,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
li->weight = SlotWeights[SS];
}
// Sort them by new weight.
- std::stable_sort(SSIntervals.begin(), SSIntervals.end(), IntervalSorter());
+ llvm::stable_sort(SSIntervals, IntervalSorter());
#ifndef NDEBUG
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i)
diff --git a/lib/CodeGen/SwiftErrorValueTracking.cpp b/lib/CodeGen/SwiftErrorValueTracking.cpp
new file mode 100644
index 000000000000..96821cadb1b6
--- /dev/null
+++ b/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -0,0 +1,312 @@
+//===-- SwiftErrorValueTracking.cpp --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements a limited mem2reg-like analysis to promote uses of function
+// arguments and allocas marked with swiftalloc from memory into virtual
+// registers tracked by this class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/IR/Value.h"
+
+using namespace llvm;
+
+Register SwiftErrorValueTracking::getOrCreateVReg(const MachineBasicBlock *MBB,
+ const Value *Val) {
+ auto Key = std::make_pair(MBB, Val);
+ auto It = VRegDefMap.find(Key);
+ // If this is the first use of this swifterror value in this basic block,
+ // create a new virtual register.
+ // After we processed all basic blocks we will satisfy this "upwards exposed
+ // use" by inserting a copy or phi at the beginning of this block.
+ if (It == VRegDefMap.end()) {
+ auto &DL = MF->getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ auto VReg = MF->getRegInfo().createVirtualRegister(RC);
+ VRegDefMap[Key] = VReg;
+ VRegUpwardsUse[Key] = VReg;
+ return VReg;
+ } else
+ return It->second;
+}
+
+void SwiftErrorValueTracking::setCurrentVReg(const MachineBasicBlock *MBB,
+ const Value *Val, Register VReg) {
+ VRegDefMap[std::make_pair(MBB, Val)] = VReg;
+}
+
+Register SwiftErrorValueTracking::getOrCreateVRegDefAt(
+ const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, true);
+ auto It = VRegDefUses.find(Key);
+ if (It != VRegDefUses.end())
+ return It->second;
+
+ auto &DL = MF->getDataLayout();
+ const TargetRegisterClass *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ Register VReg = MF->getRegInfo().createVirtualRegister(RC);
+ VRegDefUses[Key] = VReg;
+ setCurrentVReg(MBB, Val, VReg);
+ return VReg;
+}
+
+Register SwiftErrorValueTracking::getOrCreateVRegUseAt(
+ const Instruction *I, const MachineBasicBlock *MBB, const Value *Val) {
+ auto Key = PointerIntPair<const Instruction *, 1, bool>(I, false);
+ auto It = VRegDefUses.find(Key);
+ if (It != VRegDefUses.end())
+ return It->second;
+
+ Register VReg = getOrCreateVReg(MBB, Val);
+ VRegDefUses[Key] = VReg;
+ return VReg;
+}
+
+/// Set up SwiftErrorVals by going through the function. If the function has
+/// swifterror argument, it will be the first entry.
+void SwiftErrorValueTracking::setFunction(MachineFunction &mf) {
+ MF = &mf;
+ Fn = &MF->getFunction();
+ TLI = MF->getSubtarget().getTargetLowering();
+ TII = MF->getSubtarget().getInstrInfo();
+
+ if (!TLI->supportSwiftError())
+ return;
+
+ SwiftErrorVals.clear();
+ VRegDefMap.clear();
+ VRegUpwardsUse.clear();
+ VRegDefUses.clear();
+ SwiftErrorArg = nullptr;
+
+ // Check if function has a swifterror argument.
+ bool HaveSeenSwiftErrorArg = false;
+ for (Function::const_arg_iterator AI = Fn->arg_begin(), AE = Fn->arg_end();
+ AI != AE; ++AI)
+ if (AI->hasSwiftErrorAttr()) {
+ assert(!HaveSeenSwiftErrorArg &&
+ "Must have only one swifterror parameter");
+ (void)HaveSeenSwiftErrorArg; // silence warning.
+ HaveSeenSwiftErrorArg = true;
+ SwiftErrorArg = &*AI;
+ SwiftErrorVals.push_back(&*AI);
+ }
+
+ for (const auto &LLVMBB : *Fn)
+ for (const auto &Inst : LLVMBB) {
+ if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(&Inst))
+ if (Alloca->isSwiftError())
+ SwiftErrorVals.push_back(Alloca);
+ }
+}
+
+bool SwiftErrorValueTracking::createEntriesInEntryBlock(DebugLoc DbgLoc) {
+ if (!TLI->supportSwiftError())
+ return false;
+
+ // We only need to do this when we have swifterror parameter or swifterror
+ // alloc.
+ if (SwiftErrorVals.empty())
+ return false;
+
+ MachineBasicBlock *MBB = &*MF->begin();
+ auto &DL = MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ bool Inserted = false;
+ for (const auto *SwiftErrorVal : SwiftErrorVals) {
+ // We will always generate a copy from the argument. It is always used at
+ // least by the 'return' of the swifterror.
+ if (SwiftErrorArg && SwiftErrorArg == SwiftErrorVal)
+ continue;
+ Register VReg = MF->getRegInfo().createVirtualRegister(RC);
+ // Assign Undef to Vreg. We construct MI directly to make sure it works
+ // with FastISel.
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DbgLoc,
+ TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
+
+ setCurrentVReg(MBB, SwiftErrorVal, VReg);
+ Inserted = true;
+ }
+
+ return Inserted;
+}
+
+/// Propagate swifterror values through the machine function CFG.
+void SwiftErrorValueTracking::propagateVRegs() {
+ if (!TLI->supportSwiftError())
+ return;
+
+ // We only need to do this when we have swifterror parameter or swifterror
+ // alloc.
+ if (SwiftErrorVals.empty())
+ return;
+
+ // For each machine basic block in reverse post order.
+ ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
+ for (MachineBasicBlock *MBB : RPOT) {
+ // For each swifterror value in the function.
+ for (const auto *SwiftErrorVal : SwiftErrorVals) {
+ auto Key = std::make_pair(MBB, SwiftErrorVal);
+ auto UUseIt = VRegUpwardsUse.find(Key);
+ auto VRegDefIt = VRegDefMap.find(Key);
+ bool UpwardsUse = UUseIt != VRegUpwardsUse.end();
+ Register UUseVReg = UpwardsUse ? UUseIt->second : Register();
+ bool DownwardDef = VRegDefIt != VRegDefMap.end();
+ assert(!(UpwardsUse && !DownwardDef) &&
+ "We can't have an upwards use but no downwards def");
+
+ // If there is no upwards exposed use and an entry for the swifterror in
+ // the def map for this value we don't need to do anything: We already
+ // have a downward def for this basic block.
+ if (!UpwardsUse && DownwardDef)
+ continue;
+
+ // Otherwise we either have an upwards exposed use vreg that we need to
+ // materialize or need to forward the downward def from predecessors.
+
+ // Check whether we have a single vreg def from all predecessors.
+ // Otherwise we need a phi.
+ SmallVector<std::pair<MachineBasicBlock *, Register>, 4> VRegs;
+ SmallSet<const MachineBasicBlock *, 8> Visited;
+ for (auto *Pred : MBB->predecessors()) {
+ if (!Visited.insert(Pred).second)
+ continue;
+ VRegs.push_back(std::make_pair(
+ Pred, getOrCreateVReg(Pred, SwiftErrorVal)));
+ if (Pred != MBB)
+ continue;
+ // We have a self-edge.
+ // If there was no upwards use in this basic block there is now one: the
+ // phi needs to use it self.
+ if (!UpwardsUse) {
+ UpwardsUse = true;
+ UUseIt = VRegUpwardsUse.find(Key);
+ assert(UUseIt != VRegUpwardsUse.end());
+ UUseVReg = UUseIt->second;
+ }
+ }
+
+ // We need a phi node if we have more than one predecessor with different
+ // downward defs.
+ bool needPHI =
+ VRegs.size() >= 1 &&
+ std::find_if(
+ VRegs.begin(), VRegs.end(),
+ [&](const std::pair<const MachineBasicBlock *, Register> &V)
+ -> bool { return V.second != VRegs[0].second; }) !=
+ VRegs.end();
+
+ // If there is no upwards exposed used and we don't need a phi just
+ // forward the swifterror vreg from the predecessor(s).
+ if (!UpwardsUse && !needPHI) {
+ assert(!VRegs.empty() &&
+ "No predecessors? The entry block should bail out earlier");
+ // Just forward the swifterror vreg from the predecessor(s).
+ setCurrentVReg(MBB, SwiftErrorVal, VRegs[0].second);
+ continue;
+ }
+
+ auto DLoc = isa<Instruction>(SwiftErrorVal)
+ ? cast<Instruction>(SwiftErrorVal)->getDebugLoc()
+ : DebugLoc();
+ const auto *TII = MF->getSubtarget().getInstrInfo();
+
+ // If we don't need a phi create a copy to the upward exposed vreg.
+ if (!needPHI) {
+ assert(UpwardsUse);
+ assert(!VRegs.empty() &&
+ "No predecessors? Is the Calling Convention correct?");
+ Register DestReg = UUseVReg;
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc, TII->get(TargetOpcode::COPY),
+ DestReg)
+ .addReg(VRegs[0].second);
+ continue;
+ }
+
+ // We need a phi: if there is an upwards exposed use we already have a
+ // destination virtual register number otherwise we generate a new one.
+ auto &DL = MF->getDataLayout();
+ auto const *RC = TLI->getRegClassFor(TLI->getPointerTy(DL));
+ Register PHIVReg =
+ UpwardsUse ? UUseVReg : MF->getRegInfo().createVirtualRegister(RC);
+ MachineInstrBuilder PHI =
+ BuildMI(*MBB, MBB->getFirstNonPHI(), DLoc,
+ TII->get(TargetOpcode::PHI), PHIVReg);
+ for (auto BBRegPair : VRegs) {
+ PHI.addReg(BBRegPair.second).addMBB(BBRegPair.first);
+ }
+
+ // We did not have a definition in this block before: store the phi's vreg
+ // as this block downward exposed def.
+ if (!UpwardsUse)
+ setCurrentVReg(MBB, SwiftErrorVal, PHIVReg);
+ }
+ }
+}
+
+void SwiftErrorValueTracking::preassignVRegs(
+ MachineBasicBlock *MBB, BasicBlock::const_iterator Begin,
+ BasicBlock::const_iterator End) {
+ if (!TLI->supportSwiftError() || SwiftErrorVals.empty())
+ return;
+
+ // Iterator over instructions and assign vregs to swifterror defs and uses.
+ for (auto It = Begin; It != End; ++It) {
+ ImmutableCallSite CS(&*It);
+ if (CS) {
+ // A call-site with a swifterror argument is both use and def.
+ const Value *SwiftErrorAddr = nullptr;
+ for (auto &Arg : CS.args()) {
+ if (!Arg->isSwiftError())
+ continue;
+ // Use of swifterror.
+ assert(!SwiftErrorAddr && "Cannot have multiple swifterror arguments");
+ SwiftErrorAddr = &*Arg;
+ assert(SwiftErrorAddr->isSwiftError() &&
+ "Must have a swifterror value argument");
+ getOrCreateVRegUseAt(&*It, MBB, SwiftErrorAddr);
+ }
+ if (!SwiftErrorAddr)
+ continue;
+
+ // Def of swifterror.
+ getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+ // A load is a use.
+ } else if (const LoadInst *LI = dyn_cast<const LoadInst>(&*It)) {
+ const Value *V = LI->getOperand(0);
+ if (!V->isSwiftError())
+ continue;
+
+ getOrCreateVRegUseAt(LI, MBB, V);
+
+ // A store is a def.
+ } else if (const StoreInst *SI = dyn_cast<const StoreInst>(&*It)) {
+ const Value *SwiftErrorAddr = SI->getOperand(1);
+ if (!SwiftErrorAddr->isSwiftError())
+ continue;
+
+ // Def of swifterror.
+ getOrCreateVRegDefAt(&*It, MBB, SwiftErrorAddr);
+
+ // A return in a swiferror returning function is a use.
+ } else if (const ReturnInst *R = dyn_cast<const ReturnInst>(&*It)) {
+ const Function *F = R->getParent()->getParent();
+ if (!F->getAttributes().hasAttrSomewhere(Attribute::SwiftError))
+ continue;
+
+ getOrCreateVRegUseAt(R, MBB, SwiftErrorArg);
+ }
+ }
+}
diff --git a/lib/CodeGen/SwitchLoweringUtils.cpp b/lib/CodeGen/SwitchLoweringUtils.cpp
new file mode 100644
index 000000000000..83acf7f80715
--- /dev/null
+++ b/lib/CodeGen/SwitchLoweringUtils.cpp
@@ -0,0 +1,489 @@
+//===- SwitchLoweringUtils.cpp - Switch Lowering --------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains switch inst lowering optimizations and utilities for
+// codegen, so that it can be used for both SelectionDAG and GlobalISel.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
+#include "llvm/CodeGen/SwitchLoweringUtils.h"
+
+using namespace llvm;
+using namespace SwitchCG;
+
+uint64_t SwitchCG::getJumpTableRange(const CaseClusterVector &Clusters,
+ unsigned First, unsigned Last) {
+ assert(Last >= First);
+ const APInt &LowCase = Clusters[First].Low->getValue();
+ const APInt &HighCase = Clusters[Last].High->getValue();
+ assert(LowCase.getBitWidth() == HighCase.getBitWidth());
+
+ // FIXME: A range of consecutive cases has 100% density, but only requires one
+ // comparison to lower. We should discriminate against such consecutive ranges
+ // in jump tables.
+ return (HighCase - LowCase).getLimitedValue((UINT64_MAX - 1) / 100) + 1;
+}
+
+uint64_t
+SwitchCG::getJumpTableNumCases(const SmallVectorImpl<unsigned> &TotalCases,
+ unsigned First, unsigned Last) {
+ assert(Last >= First);
+ assert(TotalCases[Last] >= TotalCases[First]);
+ uint64_t NumCases =
+ TotalCases[Last] - (First == 0 ? 0 : TotalCases[First - 1]);
+ return NumCases;
+}
+
+void SwitchCG::SwitchLowering::findJumpTables(CaseClusterVector &Clusters,
+ const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB) {
+#ifndef NDEBUG
+ // Clusters must be non-empty, sorted, and only contain Range clusters.
+ assert(!Clusters.empty());
+ for (CaseCluster &C : Clusters)
+ assert(C.Kind == CC_Range);
+ for (unsigned i = 1, e = Clusters.size(); i < e; ++i)
+ assert(Clusters[i - 1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+ assert(TLI && "TLI not set!");
+ if (!TLI->areJTsAllowed(SI->getParent()->getParent()))
+ return;
+
+ const unsigned MinJumpTableEntries = TLI->getMinimumJumpTableEntries();
+ const unsigned SmallNumberOfEntries = MinJumpTableEntries / 2;
+
+ // Bail if not enough cases.
+ const int64_t N = Clusters.size();
+ if (N < 2 || N < MinJumpTableEntries)
+ return;
+
+ // Accumulated number of cases in each cluster and those prior to it.
+ SmallVector<unsigned, 8> TotalCases(N);
+ for (unsigned i = 0; i < N; ++i) {
+ const APInt &Hi = Clusters[i].High->getValue();
+ const APInt &Lo = Clusters[i].Low->getValue();
+ TotalCases[i] = (Hi - Lo).getLimitedValue() + 1;
+ if (i != 0)
+ TotalCases[i] += TotalCases[i - 1];
+ }
+
+ uint64_t Range = getJumpTableRange(Clusters,0, N - 1);
+ uint64_t NumCases = getJumpTableNumCases(TotalCases, 0, N - 1);
+ assert(NumCases < UINT64_MAX / 100);
+ assert(Range >= NumCases);
+
+ // Cheap case: the whole range may be suitable for jump table.
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+ CaseCluster JTCluster;
+ if (buildJumpTable(Clusters, 0, N - 1, SI, DefaultMBB, JTCluster)) {
+ Clusters[0] = JTCluster;
+ Clusters.resize(1);
+ return;
+ }
+ }
+
+ // The algorithm below is not suitable for -O0.
+ if (TM->getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // Split Clusters into minimum number of dense partitions. The algorithm uses
+ // the same idea as Kannan & Proebsting "Correction to 'Producing Good Code
+ // for the Case Statement'" (1994), but builds the MinPartitions array in
+ // reverse order to make it easier to reconstruct the partitions in ascending
+ // order. In the choice between two optimal partitionings, it picks the one
+ // which yields more jump tables.
+
+ // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+ SmallVector<unsigned, 8> MinPartitions(N);
+ // LastElement[i] is the last element of the partition starting at i.
+ SmallVector<unsigned, 8> LastElement(N);
+ // PartitionsScore[i] is used to break ties when choosing between two
+ // partitionings resulting in the same number of partitions.
+ SmallVector<unsigned, 8> PartitionsScore(N);
+ // For PartitionsScore, a small number of comparisons is considered as good as
+ // a jump table and a single comparison is considered better than a jump
+ // table.
+ enum PartitionScores : unsigned {
+ NoTable = 0,
+ Table = 1,
+ FewCases = 1,
+ SingleCase = 2
+ };
+
+ // Base case: There is only one way to partition Clusters[N-1].
+ MinPartitions[N - 1] = 1;
+ LastElement[N - 1] = N - 1;
+ PartitionsScore[N - 1] = PartitionScores::SingleCase;
+
+ // Note: loop indexes are signed to avoid underflow.
+ for (int64_t i = N - 2; i >= 0; i--) {
+ // Find optimal partitioning of Clusters[i..N-1].
+ // Baseline: Put Clusters[i] into a partition on its own.
+ MinPartitions[i] = MinPartitions[i + 1] + 1;
+ LastElement[i] = i;
+ PartitionsScore[i] = PartitionsScore[i + 1] + PartitionScores::SingleCase;
+
+ // Search for a solution that results in fewer partitions.
+ for (int64_t j = N - 1; j > i; j--) {
+ // Try building a partition from Clusters[i..j].
+ Range = getJumpTableRange(Clusters, i, j);
+ NumCases = getJumpTableNumCases(TotalCases, i, j);
+ assert(NumCases < UINT64_MAX / 100);
+ assert(Range >= NumCases);
+
+ if (TLI->isSuitableForJumpTable(SI, NumCases, Range)) {
+ unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+ unsigned Score = j == N - 1 ? 0 : PartitionsScore[j + 1];
+ int64_t NumEntries = j - i + 1;
+
+ if (NumEntries == 1)
+ Score += PartitionScores::SingleCase;
+ else if (NumEntries <= SmallNumberOfEntries)
+ Score += PartitionScores::FewCases;
+ else if (NumEntries >= MinJumpTableEntries)
+ Score += PartitionScores::Table;
+
+ // If this leads to fewer partitions, or to the same number of
+ // partitions with better score, it is a better partitioning.
+ if (NumPartitions < MinPartitions[i] ||
+ (NumPartitions == MinPartitions[i] && Score > PartitionsScore[i])) {
+ MinPartitions[i] = NumPartitions;
+ LastElement[i] = j;
+ PartitionsScore[i] = Score;
+ }
+ }
+ }
+ }
+
+ // Iterate over the partitions, replacing some with jump tables in-place.
+ unsigned DstIndex = 0;
+ for (unsigned First = 0, Last; First < N; First = Last + 1) {
+ Last = LastElement[First];
+ assert(Last >= First);
+ assert(DstIndex <= First);
+ unsigned NumClusters = Last - First + 1;
+
+ CaseCluster JTCluster;
+ if (NumClusters >= MinJumpTableEntries &&
+ buildJumpTable(Clusters, First, Last, SI, DefaultMBB, JTCluster)) {
+ Clusters[DstIndex++] = JTCluster;
+ } else {
+ for (unsigned I = First; I <= Last; ++I)
+ std::memmove(&Clusters[DstIndex++], &Clusters[I], sizeof(Clusters[I]));
+ }
+ }
+ Clusters.resize(DstIndex);
+}
+
+bool SwitchCG::SwitchLowering::buildJumpTable(const CaseClusterVector &Clusters,
+ unsigned First, unsigned Last,
+ const SwitchInst *SI,
+ MachineBasicBlock *DefaultMBB,
+ CaseCluster &JTCluster) {
+ assert(First <= Last);
+
+ auto Prob = BranchProbability::getZero();
+ unsigned NumCmps = 0;
+ std::vector<MachineBasicBlock*> Table;
+ DenseMap<MachineBasicBlock*, BranchProbability> JTProbs;
+
+ // Initialize probabilities in JTProbs.
+ for (unsigned I = First; I <= Last; ++I)
+ JTProbs[Clusters[I].MBB] = BranchProbability::getZero();
+
+ for (unsigned I = First; I <= Last; ++I) {
+ assert(Clusters[I].Kind == CC_Range);
+ Prob += Clusters[I].Prob;
+ const APInt &Low = Clusters[I].Low->getValue();
+ const APInt &High = Clusters[I].High->getValue();
+ NumCmps += (Low == High) ? 1 : 2;
+ if (I != First) {
+ // Fill the gap between this and the previous cluster.
+ const APInt &PreviousHigh = Clusters[I - 1].High->getValue();
+ assert(PreviousHigh.slt(Low));
+ uint64_t Gap = (Low - PreviousHigh).getLimitedValue() - 1;
+ for (uint64_t J = 0; J < Gap; J++)
+ Table.push_back(DefaultMBB);
+ }
+ uint64_t ClusterSize = (High - Low).getLimitedValue() + 1;
+ for (uint64_t J = 0; J < ClusterSize; ++J)
+ Table.push_back(Clusters[I].MBB);
+ JTProbs[Clusters[I].MBB] += Clusters[I].Prob;
+ }
+
+ unsigned NumDests = JTProbs.size();
+ if (TLI->isSuitableForBitTests(NumDests, NumCmps,
+ Clusters[First].Low->getValue(),
+ Clusters[Last].High->getValue(), *DL)) {
+ // Clusters[First..Last] should be lowered as bit tests instead.
+ return false;
+ }
+
+ // Create the MBB that will load from and jump through the table.
+ // Note: We create it here, but it's not inserted into the function yet.
+ MachineFunction *CurMF = FuncInfo.MF;
+ MachineBasicBlock *JumpTableMBB =
+ CurMF->CreateMachineBasicBlock(SI->getParent());
+
+ // Add successors. Note: use table order for determinism.
+ SmallPtrSet<MachineBasicBlock *, 8> Done;
+ for (MachineBasicBlock *Succ : Table) {
+ if (Done.count(Succ))
+ continue;
+ addSuccessorWithProb(JumpTableMBB, Succ, JTProbs[Succ]);
+ Done.insert(Succ);
+ }
+ JumpTableMBB->normalizeSuccProbs();
+
+ unsigned JTI = CurMF->getOrCreateJumpTableInfo(TLI->getJumpTableEncoding())
+ ->createJumpTableIndex(Table);
+
+ // Set up the jump table info.
+ JumpTable JT(-1U, JTI, JumpTableMBB, nullptr);
+ JumpTableHeader JTH(Clusters[First].Low->getValue(),
+ Clusters[Last].High->getValue(), SI->getCondition(),
+ nullptr, false);
+ JTCases.emplace_back(std::move(JTH), std::move(JT));
+
+ JTCluster = CaseCluster::jumpTable(Clusters[First].Low, Clusters[Last].High,
+ JTCases.size() - 1, Prob);
+ return true;
+}
+
+void SwitchCG::SwitchLowering::findBitTestClusters(CaseClusterVector &Clusters,
+ const SwitchInst *SI) {
+ // Partition Clusters into as few subsets as possible, where each subset has a
+ // range that fits in a machine word and has <= 3 unique destinations.
+
+#ifndef NDEBUG
+ // Clusters must be sorted and contain Range or JumpTable clusters.
+ assert(!Clusters.empty());
+ assert(Clusters[0].Kind == CC_Range || Clusters[0].Kind == CC_JumpTable);
+ for (const CaseCluster &C : Clusters)
+ assert(C.Kind == CC_Range || C.Kind == CC_JumpTable);
+ for (unsigned i = 1; i < Clusters.size(); ++i)
+ assert(Clusters[i-1].High->getValue().slt(Clusters[i].Low->getValue()));
+#endif
+
+ // The algorithm below is not suitable for -O0.
+ if (TM->getOptLevel() == CodeGenOpt::None)
+ return;
+
+ // If target does not have legal shift left, do not emit bit tests at all.
+ EVT PTy = TLI->getPointerTy(*DL);
+ if (!TLI->isOperationLegal(ISD::SHL, PTy))
+ return;
+
+ int BitWidth = PTy.getSizeInBits();
+ const int64_t N = Clusters.size();
+
+ // MinPartitions[i] is the minimum nbr of partitions of Clusters[i..N-1].
+ SmallVector<unsigned, 8> MinPartitions(N);
+ // LastElement[i] is the last element of the partition starting at i.
+ SmallVector<unsigned, 8> LastElement(N);
+
+ // FIXME: This might not be the best algorithm for finding bit test clusters.
+
+ // Base case: There is only one way to partition Clusters[N-1].
+ MinPartitions[N - 1] = 1;
+ LastElement[N - 1] = N - 1;
+
+ // Note: loop indexes are signed to avoid underflow.
+ for (int64_t i = N - 2; i >= 0; --i) {
+ // Find optimal partitioning of Clusters[i..N-1].
+ // Baseline: Put Clusters[i] into a partition on its own.
+ MinPartitions[i] = MinPartitions[i + 1] + 1;
+ LastElement[i] = i;
+
+ // Search for a solution that results in fewer partitions.
+ // Note: the search is limited by BitWidth, reducing time complexity.
+ for (int64_t j = std::min(N - 1, i + BitWidth - 1); j > i; --j) {
+ // Try building a partition from Clusters[i..j].
+
+ // Check the range.
+ if (!TLI->rangeFitsInWord(Clusters[i].Low->getValue(),
+ Clusters[j].High->getValue(), *DL))
+ continue;
+
+ // Check nbr of destinations and cluster types.
+ // FIXME: This works, but doesn't seem very efficient.
+ bool RangesOnly = true;
+ BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+ for (int64_t k = i; k <= j; k++) {
+ if (Clusters[k].Kind != CC_Range) {
+ RangesOnly = false;
+ break;
+ }
+ Dests.set(Clusters[k].MBB->getNumber());
+ }
+ if (!RangesOnly || Dests.count() > 3)
+ break;
+
+ // Check if it's a better partition.
+ unsigned NumPartitions = 1 + (j == N - 1 ? 0 : MinPartitions[j + 1]);
+ if (NumPartitions < MinPartitions[i]) {
+ // Found a better partition.
+ MinPartitions[i] = NumPartitions;
+ LastElement[i] = j;
+ }
+ }
+ }
+
+ // Iterate over the partitions, replacing with bit-test clusters in-place.
+ unsigned DstIndex = 0;
+ for (unsigned First = 0, Last; First < N; First = Last + 1) {
+ Last = LastElement[First];
+ assert(First <= Last);
+ assert(DstIndex <= First);
+
+ CaseCluster BitTestCluster;
+ if (buildBitTests(Clusters, First, Last, SI, BitTestCluster)) {
+ Clusters[DstIndex++] = BitTestCluster;
+ } else {
+ size_t NumClusters = Last - First + 1;
+ std::memmove(&Clusters[DstIndex], &Clusters[First],
+ sizeof(Clusters[0]) * NumClusters);
+ DstIndex += NumClusters;
+ }
+ }
+ Clusters.resize(DstIndex);
+}
+
+bool SwitchCG::SwitchLowering::buildBitTests(CaseClusterVector &Clusters,
+ unsigned First, unsigned Last,
+ const SwitchInst *SI,
+ CaseCluster &BTCluster) {
+ assert(First <= Last);
+ if (First == Last)
+ return false;
+
+ BitVector Dests(FuncInfo.MF->getNumBlockIDs());
+ unsigned NumCmps = 0;
+ for (int64_t I = First; I <= Last; ++I) {
+ assert(Clusters[I].Kind == CC_Range);
+ Dests.set(Clusters[I].MBB->getNumber());
+ NumCmps += (Clusters[I].Low == Clusters[I].High) ? 1 : 2;
+ }
+ unsigned NumDests = Dests.count();
+
+ APInt Low = Clusters[First].Low->getValue();
+ APInt High = Clusters[Last].High->getValue();
+ assert(Low.slt(High));
+
+ if (!TLI->isSuitableForBitTests(NumDests, NumCmps, Low, High, *DL))
+ return false;
+
+ APInt LowBound;
+ APInt CmpRange;
+
+ const int BitWidth = TLI->getPointerTy(*DL).getSizeInBits();
+ assert(TLI->rangeFitsInWord(Low, High, *DL) &&
+ "Case range must fit in bit mask!");
+
+ // Check if the clusters cover a contiguous range such that no value in the
+ // range will jump to the default statement.
+ bool ContiguousRange = true;
+ for (int64_t I = First + 1; I <= Last; ++I) {
+ if (Clusters[I].Low->getValue() != Clusters[I - 1].High->getValue() + 1) {
+ ContiguousRange = false;
+ break;
+ }
+ }
+
+ if (Low.isStrictlyPositive() && High.slt(BitWidth)) {
+ // Optimize the case where all the case values fit in a word without having
+ // to subtract minValue. In this case, we can optimize away the subtraction.
+ LowBound = APInt::getNullValue(Low.getBitWidth());
+ CmpRange = High;
+ ContiguousRange = false;
+ } else {
+ LowBound = Low;
+ CmpRange = High - Low;
+ }
+
+ CaseBitsVector CBV;
+ auto TotalProb = BranchProbability::getZero();
+ for (unsigned i = First; i <= Last; ++i) {
+ // Find the CaseBits for this destination.
+ unsigned j;
+ for (j = 0; j < CBV.size(); ++j)
+ if (CBV[j].BB == Clusters[i].MBB)
+ break;
+ if (j == CBV.size())
+ CBV.push_back(
+ CaseBits(0, Clusters[i].MBB, 0, BranchProbability::getZero()));
+ CaseBits *CB = &CBV[j];
+
+ // Update Mask, Bits and ExtraProb.
+ uint64_t Lo = (Clusters[i].Low->getValue() - LowBound).getZExtValue();
+ uint64_t Hi = (Clusters[i].High->getValue() - LowBound).getZExtValue();
+ assert(Hi >= Lo && Hi < 64 && "Invalid bit case!");
+ CB->Mask |= (-1ULL >> (63 - (Hi - Lo))) << Lo;
+ CB->Bits += Hi - Lo + 1;
+ CB->ExtraProb += Clusters[i].Prob;
+ TotalProb += Clusters[i].Prob;
+ }
+
+ BitTestInfo BTI;
+ llvm::sort(CBV, [](const CaseBits &a, const CaseBits &b) {
+ // Sort by probability first, number of bits second, bit mask third.
+ if (a.ExtraProb != b.ExtraProb)
+ return a.ExtraProb > b.ExtraProb;
+ if (a.Bits != b.Bits)
+ return a.Bits > b.Bits;
+ return a.Mask < b.Mask;
+ });
+
+ for (auto &CB : CBV) {
+ MachineBasicBlock *BitTestBB =
+ FuncInfo.MF->CreateMachineBasicBlock(SI->getParent());
+ BTI.push_back(BitTestCase(CB.Mask, BitTestBB, CB.BB, CB.ExtraProb));
+ }
+ BitTestCases.emplace_back(std::move(LowBound), std::move(CmpRange),
+ SI->getCondition(), -1U, MVT::Other, false,
+ ContiguousRange, nullptr, nullptr, std::move(BTI),
+ TotalProb);
+
+ BTCluster = CaseCluster::bitTests(Clusters[First].Low, Clusters[Last].High,
+ BitTestCases.size() - 1, TotalProb);
+ return true;
+}
+
+void SwitchCG::sortAndRangeify(CaseClusterVector &Clusters) {
+#ifndef NDEBUG
+ for (const CaseCluster &CC : Clusters)
+ assert(CC.Low == CC.High && "Input clusters must be single-case");
+#endif
+
+ llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
+ return a.Low->getValue().slt(b.Low->getValue());
+ });
+
+ // Merge adjacent clusters with the same destination.
+ const unsigned N = Clusters.size();
+ unsigned DstIndex = 0;
+ for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
+ CaseCluster &CC = Clusters[SrcIndex];
+ const ConstantInt *CaseVal = CC.Low;
+ MachineBasicBlock *Succ = CC.MBB;
+
+ if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
+ (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
+ // If this case has the same successor and is a neighbour, merge it into
+ // the previous cluster.
+ Clusters[DstIndex - 1].High = CaseVal;
+ Clusters[DstIndex - 1].Prob += CC.Prob;
+ } else {
+ std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
+ sizeof(Clusters[SrcIndex]));
+ }
+ }
+ Clusters.resize(DstIndex);
+}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 25cd7802264e..ba348b4a9d41 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -1,9 +1,8 @@
//===- TailDuplication.cpp - Duplicate blocks into predecessors' tails ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index b118c176a897..a0590a8a6cc6 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -1,9 +1,8 @@
//===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -435,7 +434,7 @@ void TailDuplicator::duplicateInstruction(
if (NewRC == nullptr)
NewRC = OrigRC;
unsigned NewReg = MRI->createVirtualRegister(NewRC);
- BuildMI(*PredBB, MI, MI->getDebugLoc(),
+ BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(),
TII->get(TargetOpcode::COPY), NewReg)
.addReg(VI->second.Reg, 0, VI->second.SubReg);
LocalVRMap.erase(VI);
@@ -558,7 +557,7 @@ bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
unsigned MaxDuplicateCount;
if (TailDupSize == 0 &&
TailDuplicateSize.getNumOccurrences() == 0 &&
- MF->getFunction().optForSize())
+ MF->getFunction().hasOptSize())
MaxDuplicateCount = 1;
else if (TailDupSize == 0)
MaxDuplicateCount = TailDuplicateSize;
@@ -857,11 +856,6 @@ bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
}
appendCopies(PredBB, CopyInfos, Copies);
- // Simplify
- MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
- SmallVector<MachineOperand, 4> PredCond;
- TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond);
-
NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
// Update the CFG.
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index cf78fb5a1f12..9c4483cb240d 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -1,9 +1,8 @@
//===- TargetFrameLoweringImpl.cpp - Implement target frame interface ------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 2a17af391105..868617ffe14d 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -1,9 +1,8 @@
//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -86,11 +85,13 @@ static bool isAsmComment(const char *Str, const MCAsmInfo &MAI) {
/// simple--i.e. not a logical or arithmetic expression--size values without
/// the optional fill value. This is primarily used for creating arbitrary
/// sized inline asm blocks for testing purposes.
-unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
- const MCAsmInfo &MAI) const {
+unsigned TargetInstrInfo::getInlineAsmLength(
+ const char *Str,
+ const MCAsmInfo &MAI, const TargetSubtargetInfo *STI) const {
// Count the number of instructions in the asm.
bool AtInsnStart = true;
unsigned Length = 0;
+ const unsigned MaxInstLength = MAI.getMaxInstLength(STI);
for (; *Str; ++Str) {
if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(),
strlen(MAI.getSeparatorString())) == 0) {
@@ -102,7 +103,7 @@ unsigned TargetInstrInfo::getInlineAsmLength(const char *Str,
}
if (AtInsnStart && !std::isspace(static_cast<unsigned char>(*Str))) {
- unsigned AddLength = MAI.getMaxInstLength();
+ unsigned AddLength = MaxInstLength;
if (strncmp(Str, ".space", 6) == 0) {
char *EStr;
int SpaceSize;
@@ -136,8 +137,14 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
// Save off the debug loc before erasing the instruction.
DebugLoc DL = Tail->getDebugLoc();
- // Remove all the dead instructions from the end of MBB.
- MBB->erase(Tail, MBB->end());
+ // Update call site info and remove all the dead instructions
+ // from the end of MBB.
+ while (Tail != MBB->end()) {
+ auto MI = Tail++;
+ if (MI->isCall())
+ MBB->getParent()->updateCallSiteInfo(&*MI);
+ MBB->erase(MI);
+ }
// If MBB isn't immediately before MBB, insert a branch to it.
if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest))
@@ -162,9 +169,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
assert(MI.getOperand(Idx1).isReg() && MI.getOperand(Idx2).isReg() &&
"This only knows how to commute register operands so far");
- unsigned Reg0 = HasDef ? MI.getOperand(0).getReg() : 0;
- unsigned Reg1 = MI.getOperand(Idx1).getReg();
- unsigned Reg2 = MI.getOperand(Idx2).getReg();
+ Register Reg0 = HasDef ? MI.getOperand(0).getReg() : Register();
+ Register Reg1 = MI.getOperand(Idx1).getReg();
+ Register Reg2 = MI.getOperand(Idx2).getReg();
unsigned SubReg0 = HasDef ? MI.getOperand(0).getSubReg() : 0;
unsigned SubReg1 = MI.getOperand(Idx1).getSubReg();
unsigned SubReg2 = MI.getOperand(Idx2).getSubReg();
@@ -523,7 +530,8 @@ static MachineInstr *foldPatchpoint(MachineFunction &MF, MachineInstr &MI,
MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
ArrayRef<unsigned> Ops, int FI,
- LiveIntervals *LIS) const {
+ LiveIntervals *LIS,
+ VirtRegMap *VRM) const {
auto Flags = MachineMemOperand::MONone;
for (unsigned OpIdx : Ops)
Flags |= MI.getOperand(OpIdx).isDef() ? MachineMemOperand::MOStore
@@ -569,7 +577,7 @@ MachineInstr *TargetInstrInfo::foldMemoryOperand(MachineInstr &MI,
MBB->insert(MI, NewMI);
} else {
// Ask the target to do the actual folding.
- NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS);
+ NewMI = foldMemoryOperandImpl(MF, MI, Ops, MI, FI, LIS, VRM);
}
if (NewMI) {
@@ -898,7 +906,8 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
return true;
// Avoid instructions obviously unsafe for remat.
- if (MI.isNotDuplicable() || MI.mayStore() || MI.hasUnmodeledSideEffects())
+ if (MI.isNotDuplicable() || MI.mayStore() || MI.mayRaiseFPException() ||
+ MI.hasUnmodeledSideEffects())
return false;
// Don't remat inline asm. We have no idea how expensive it is
@@ -1010,7 +1019,7 @@ ScheduleHazardRecognizer *TargetInstrInfo::
CreateTargetMIHazardRecognizer(const InstrItineraryData *II,
const ScheduleDAG *DAG) const {
return (ScheduleHazardRecognizer *)
- new ScoreboardHazardRecognizer(II, DAG, "misched");
+ new ScoreboardHazardRecognizer(II, DAG, "machine-scheduler");
}
// Default implementation of CreateTargetPostRAHazardRecognizer.
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index e86190375642..9b28c1a6c450 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -1,9 +1,8 @@
//===- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ----===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -74,8 +73,8 @@ static cl::opt<unsigned> MinimumJumpTableEntries
cl::desc("Set minimum number of entries to use a jump table."));
static cl::opt<unsigned> MaximumJumpTableSize
- ("max-jump-table-size", cl::init(0), cl::Hidden,
- cl::desc("Set maximum size of jump tables; zero for no limit."));
+ ("max-jump-table-size", cl::init(UINT_MAX), cl::Hidden,
+ cl::desc("Set maximum size of jump tables."));
/// Minimum jump table density for normal functions.
static cl::opt<unsigned>
@@ -124,6 +123,34 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
for (int LC = 0; LC < RTLIB::UNKNOWN_LIBCALL; ++LC)
setLibcallCallingConv((RTLIB::Libcall)LC, CallingConv::C);
+ // For IEEE quad-precision libcall names, PPC uses "kf" instead of "tf".
+ if (TT.getArch() == Triple::ppc || TT.isPPC64()) {
+ setLibcallName(RTLIB::ADD_F128, "__addkf3");
+ setLibcallName(RTLIB::SUB_F128, "__subkf3");
+ setLibcallName(RTLIB::MUL_F128, "__mulkf3");
+ setLibcallName(RTLIB::DIV_F128, "__divkf3");
+ setLibcallName(RTLIB::FPEXT_F32_F128, "__extendsfkf2");
+ setLibcallName(RTLIB::FPEXT_F64_F128, "__extenddfkf2");
+ setLibcallName(RTLIB::FPROUND_F128_F32, "__trunckfsf2");
+ setLibcallName(RTLIB::FPROUND_F128_F64, "__trunckfdf2");
+ setLibcallName(RTLIB::FPTOSINT_F128_I32, "__fixkfsi");
+ setLibcallName(RTLIB::FPTOSINT_F128_I64, "__fixkfdi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I32, "__fixunskfsi");
+ setLibcallName(RTLIB::FPTOUINT_F128_I64, "__fixunskfdi");
+ setLibcallName(RTLIB::SINTTOFP_I32_F128, "__floatsikf");
+ setLibcallName(RTLIB::SINTTOFP_I64_F128, "__floatdikf");
+ setLibcallName(RTLIB::UINTTOFP_I32_F128, "__floatunsikf");
+ setLibcallName(RTLIB::UINTTOFP_I64_F128, "__floatundikf");
+ setLibcallName(RTLIB::OEQ_F128, "__eqkf2");
+ setLibcallName(RTLIB::UNE_F128, "__nekf2");
+ setLibcallName(RTLIB::OGE_F128, "__gekf2");
+ setLibcallName(RTLIB::OLT_F128, "__ltkf2");
+ setLibcallName(RTLIB::OLE_F128, "__lekf2");
+ setLibcallName(RTLIB::OGT_F128, "__gtkf2");
+ setLibcallName(RTLIB::UO_F128, "__unordkf2");
+ setLibcallName(RTLIB::O_F128, "__unordkf2");
+ }
+
// A few names are different on particular architectures or environments.
if (TT.isOSDarwin()) {
// For f16/f32 conversions, Darwin uses the standard naming scheme, instead
@@ -546,7 +573,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
JumpIsExpensive = JumpIsExpensiveOverride;
PredictableSelectIsExpensive = false;
EnableExtLdPromotion = false;
- HasFloatingPointExceptions = true;
StackPointerRegisterToSaveRestore = 0;
BooleanContents = UndefinedBooleanContent;
BooleanFloatContents = UndefinedBooleanContent;
@@ -583,6 +609,14 @@ void TargetLoweringBase::initActions() {
std::fill(std::begin(TargetDAGCombineArray),
std::end(TargetDAGCombineArray), 0);
+ for (MVT VT : MVT::fp_valuetypes()) {
+ MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
+ if (IntVT.isValid()) {
+ setOperationAction(ISD::ATOMIC_SWAP, VT, Promote);
+ AddPromotedToType(ISD::ATOMIC_SWAP, VT, IntVT);
+ }
+ }
+
// Set default actions for various operations.
for (MVT VT : MVT::all_valuetypes()) {
// Default all indexed load / store to expand.
@@ -617,6 +651,8 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SSUBSAT, VT, Expand);
setOperationAction(ISD::USUBSAT, VT, Expand);
setOperationAction(ISD::SMULFIX, VT, Expand);
+ setOperationAction(ISD::SMULFIXSAT, VT, Expand);
+ setOperationAction(ISD::UMULFIX, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -655,8 +691,51 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
}
+ // Constrained floating-point operations default to expand.
+ setOperationAction(ISD::STRICT_FADD, VT, Expand);
+ setOperationAction(ISD::STRICT_FSUB, VT, Expand);
+ setOperationAction(ISD::STRICT_FMUL, VT, Expand);
+ setOperationAction(ISD::STRICT_FDIV, VT, Expand);
+ setOperationAction(ISD::STRICT_FREM, VT, Expand);
+ setOperationAction(ISD::STRICT_FMA, VT, Expand);
+ setOperationAction(ISD::STRICT_FSQRT, VT, Expand);
+ setOperationAction(ISD::STRICT_FPOW, VT, Expand);
+ setOperationAction(ISD::STRICT_FPOWI, VT, Expand);
+ setOperationAction(ISD::STRICT_FSIN, VT, Expand);
+ setOperationAction(ISD::STRICT_FCOS, VT, Expand);
+ setOperationAction(ISD::STRICT_FEXP, VT, Expand);
+ setOperationAction(ISD::STRICT_FEXP2, VT, Expand);
+ setOperationAction(ISD::STRICT_FLOG, VT, Expand);
+ setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
+ setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
+ setOperationAction(ISD::STRICT_FRINT, VT, Expand);
+ setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
+ setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
+ setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
+ setOperationAction(ISD::STRICT_FROUND, VT, Expand);
+ setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
+ setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
+ setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
+
// For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
+
+ // Vector reduction default to expand.
+ setOperationAction(ISD::VECREDUCE_FADD, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_FMUL, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_ADD, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_MUL, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_AND, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_OR, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_XOR, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_SMAX, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_SMIN, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_UMAX, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_UMIN, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_FMAX, VT, Expand);
+ setOperationAction(ISD::VECREDUCE_FMIN, VT, Expand);
}
// Most targets ignore the @llvm.prefetch intrinsic.
@@ -688,6 +767,10 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::FRINT, VT, Expand);
setOperationAction(ISD::FTRUNC, VT, Expand);
setOperationAction(ISD::FROUND, VT, Expand);
+ setOperationAction(ISD::LROUND, VT, Expand);
+ setOperationAction(ISD::LLROUND, VT, Expand);
+ setOperationAction(ISD::LRINT, VT, Expand);
+ setOperationAction(ISD::LLRINT, VT, Expand);
}
// Default ISD::TRAP to expand (which turns it into abort).
@@ -700,7 +783,7 @@ void TargetLoweringBase::initActions() {
MVT TargetLoweringBase::getScalarShiftAmountTy(const DataLayout &DL,
EVT) const {
- return MVT::getIntegerVT(8 * DL.getPointerSize(0));
+ return MVT::getIntegerVT(DL.getPointerSizeInBits(0));
}
EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy, const DataLayout &DL,
@@ -985,16 +1068,16 @@ TargetLoweringBase::emitPatchPoint(MachineInstr &InitialMI,
// Add a new memory operand for this FI.
assert(MFI.getObjectOffset(FI) != -1);
- auto Flags = MachineMemOperand::MOLoad;
- if (MI->getOpcode() == TargetOpcode::STATEPOINT) {
- Flags |= MachineMemOperand::MOStore;
- Flags |= MachineMemOperand::MOVolatile;
+ // Note: STATEPOINT MMOs are added during SelectionDAG. STACKMAP, and
+ // PATCHPOINT should be updated to do the same. (TODO)
+ if (MI->getOpcode() != TargetOpcode::STATEPOINT) {
+ auto Flags = MachineMemOperand::MOLoad;
+ MachineMemOperand *MMO = MF.getMachineMemOperand(
+ MachinePointerInfo::getFixedStack(MF, FI), Flags,
+ MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
+ MIB->addMemOperand(MF, MMO);
}
- MachineMemOperand *MMO = MF.getMachineMemOperand(
- MachinePointerInfo::getFixedStack(MF, FI), Flags,
- MF.getDataLayout().getPointerSize(), MFI.getObjectAlignment(FI));
- MIB->addMemOperand(MF, MMO);
-
+
// Replace the instruction and update the operand index.
MBB->insert(MachineBasicBlock::iterator(MI), MIB);
OperIdx += (MIB->getNumOperands() - MI->getNumOperands()) - 1;
@@ -1393,7 +1476,7 @@ void llvm::GetReturnInfo(CallingConv::ID CC, Type *ReturnType,
Flags.setZExt();
for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0));
+ Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isfixed=*/true, 0, 0));
}
}
@@ -1409,6 +1492,7 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
unsigned AddrSpace,
unsigned Alignment,
+ MachineMemOperand::Flags Flags,
bool *Fast) const {
// Check if the specified alignment is sufficient based on the data layout.
// TODO: While using the data layout works in practice, a better solution
@@ -1424,7 +1508,15 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
}
// This is a misaligned access.
- return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Fast);
+ return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
+ const DataLayout &DL, EVT VT,
+ const MachineMemOperand &MMO,
+ bool *Fast) const {
+ return allowsMemoryAccess(Context, DL, VT, MMO.getAddrSpace(),
+ MMO.getAlignment(), MMO.getFlags(), Fast);
}
BranchProbability TargetLoweringBase::getPredictableBranchThreshold() const {
@@ -1447,6 +1539,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const {
case Switch: return 0;
case IndirectBr: return 0;
case Invoke: return 0;
+ case CallBr: return 0;
case Resume: return 0;
case Unreachable: return 0;
case CleanupRet: return 0;
@@ -1580,8 +1673,8 @@ Value *TargetLoweringBase::getSafeStackPointerLocation(IRBuilder<> &IRB) const {
// thread's unsafe stack pointer.
Module *M = IRB.GetInsertBlock()->getParent()->getParent();
Type *StackPtrTy = Type::getInt8PtrTy(M->getContext());
- Value *Fn = M->getOrInsertFunction("__safestack_pointer_address",
- StackPtrTy->getPointerTo(0));
+ FunctionCallee Fn = M->getOrInsertFunction("__safestack_pointer_address",
+ StackPtrTy->getPointerTo(0));
return IRB.CreateCall(Fn);
}
@@ -1656,7 +1749,7 @@ Value *TargetLoweringBase::getSDagStackGuard(const Module &M) const {
return M.getNamedValue("__stack_chk_guard");
}
-Value *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
+Function *TargetLoweringBase::getSSPStackGuardCheck(const Module &M) const {
return nullptr;
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index cb2fe691d702..4c8f75b237aa 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/TargetLoweringObjectFileImpl.cpp - Object File Info ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -219,6 +218,16 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
PersonalityEncoding = dwarf::DW_EH_PE_absptr;
TTypeEncoding = dwarf::DW_EH_PE_absptr;
}
+ CallSiteEncoding = dwarf::DW_EH_PE_udata4;
+ break;
+ case Triple::riscv32:
+ case Triple::riscv64:
+ LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
+ PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ TTypeEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |
+ dwarf::DW_EH_PE_sdata4;
+ CallSiteEncoding = dwarf::DW_EH_PE_udata4;
break;
case Triple::sparcv9:
LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4;
@@ -272,6 +281,19 @@ void TargetLoweringObjectFileELF::emitModuleMetadata(MCStreamer &Streamer,
}
}
+ if (NamedMDNode *DependentLibraries = M.getNamedMetadata("llvm.dependent-libraries")) {
+ auto *S = C.getELFSection(".deplibs", ELF::SHT_LLVM_DEPENDENT_LIBRARIES,
+ ELF::SHF_MERGE | ELF::SHF_STRINGS, 1, "");
+
+ Streamer.SwitchSection(S);
+
+ for (const auto &Operand : DependentLibraries->operands()) {
+ Streamer.EmitBytes(
+ cast<MDString>(cast<MDNode>(Operand)->getOperand(0))->getString());
+ Streamer.EmitIntValue(0, 1);
+ }
+ }
+
unsigned Version = 0;
unsigned Flags = 0;
StringRef Section;
@@ -1458,7 +1480,7 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
const TargetMachine &TM) {
TargetLoweringObjectFile::Initialize(Ctx, TM);
const Triple &T = TM.getTargetTriple();
- if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+ if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
StaticCtorSection =
Ctx.getCOFFSection(".CRT$XCU", COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
@@ -1484,7 +1506,7 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
unsigned Priority,
const MCSymbol *KeySym,
MCSectionCOFF *Default) {
- if (T.isKnownWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
+ if (T.isWindowsMSVCEnvironment() || T.isWindowsItaniumEnvironment()) {
// If the priority is the default, use .CRT$XCU, possibly associative.
if (Priority == 65535)
return Ctx.getAssociativeCOFFSection(Default, KeySym, 0);
@@ -1544,9 +1566,7 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
const GlobalValue *LHS, const GlobalValue *RHS,
const TargetMachine &TM) const {
const Triple &T = TM.getTargetTriple();
- if (!T.isKnownWindowsMSVCEnvironment() &&
- !T.isWindowsItaniumEnvironment() &&
- !T.isWindowsCoreCLREnvironment())
+ if (T.isOSCygMing())
return nullptr;
// Our symbols should exist in address space zero, cowardly no-op if
@@ -1694,8 +1714,11 @@ MCSection *TargetLoweringObjectFileWasm::getExplicitSectionGlobal(
Group = C->getName();
}
- return getContext().getWasmSection(Name, Kind, Group,
- MCContext::GenericSectionID);
+ MCSectionWasm* Section =
+ getContext().getWasmSection(Name, Kind, Group,
+ MCContext::GenericSectionID);
+
+ return Section;
}
static MCSectionWasm *selectWasmSectionForGlobal(
@@ -1724,6 +1747,7 @@ static MCSectionWasm *selectWasmSectionForGlobal(
UniqueID = *NextUniqueID;
(*NextUniqueID)++;
}
+
return Ctx.getWasmSection(Name, Kind, Group, UniqueID);
}
diff --git a/lib/CodeGen/TargetOptionsImpl.cpp b/lib/CodeGen/TargetOptionsImpl.cpp
index 3c133fb8594e..039748d817ca 100644
--- a/lib/CodeGen/TargetOptionsImpl.cpp
+++ b/lib/CodeGen/TargetOptionsImpl.cpp
@@ -1,9 +1,8 @@
//===-- TargetOptionsImpl.cpp - Options that apply to all targets ----------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 28126fcf766d..36df02692f86 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -1,9 +1,8 @@
//===- TargetPassConfig.cpp - Target independent code generation passes ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -23,6 +22,7 @@
#include "llvm/Analysis/ScopedNoAliasAA.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/CSEConfigBase.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachinePassRegistry.h"
#include "llvm/CodeGen/Passes.h"
@@ -408,7 +408,7 @@ TargetPassConfig::TargetPassConfig(LLVMTargetMachine &TM, PassManagerBase &pm)
TM.Options.EnableIPRA = EnableIPRA;
else {
// If not explicitly specified, use target default.
- TM.Options.EnableIPRA = TM.useIPRA();
+ TM.Options.EnableIPRA |= TM.useIPRA();
}
if (TM.Options.EnableIPRA)
@@ -646,7 +646,7 @@ void TargetPassConfig::addIRPasses() {
// into optimally-sized loads and compares. The transforms are enabled by a
// target lowering hook.
if (!DisableMergeICmps)
- addPass(createMergeICmpsPass());
+ addPass(createMergeICmpsLegacyPass());
addPass(createExpandMemCmpPass());
}
@@ -815,6 +815,13 @@ bool TargetPassConfig::addCoreISelPasses() {
} else if (addInstSelector())
return true;
+ // Expand pseudo-instructions emitted by ISel. Don't run the verifier before
+ // FinalizeISel.
+ addPass(&FinalizeISelID);
+
+ // Print the instruction selected machine code...
+ printAndVerify("After Instruction Selection");
+
return false;
}
@@ -874,12 +881,6 @@ void TargetPassConfig::addMachinePasses() {
}
}
- // Print the instruction selected machine code...
- printAndVerify("After Instruction Selection");
-
- // Expand pseudo-instructions emitted by ISel.
- addPass(&ExpandISelPseudosID);
-
// Add passes that optimize machine instructions in SSA form.
if (getOptLevel() != CodeGenOpt::None) {
addMachineSSAOptimization();
@@ -898,13 +899,9 @@ void TargetPassConfig::addMachinePasses() {
// Run register allocation and passes that are tightly coupled with it,
// including phi elimination and scheduling.
if (getOptimizeRegAlloc())
- addOptimizedRegAlloc(createRegAllocPass(true));
- else {
- if (RegAlloc != &useDefaultRegisterAllocator &&
- RegAlloc != &createFastRegisterAllocator)
- report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
- addFastRegAlloc(createRegAllocPass(false));
- }
+ addOptimizedRegAlloc();
+ else
+ addFastRegAlloc();
// Run post-ra passes.
addPostRegAlloc();
@@ -1039,10 +1036,6 @@ bool TargetPassConfig::getOptimizeRegAlloc() const {
llvm_unreachable("Invalid optimize-regalloc state");
}
-/// RegisterRegAlloc's global Registry tracks allocator registration.
-MachinePassRegistry<RegisterRegAlloc::FunctionPassCtor>
- RegisterRegAlloc::Registry;
-
/// A dummy default pass factory indicates whether the register allocator is
/// overridden on the command line.
static llvm::once_flag InitializeDefaultRegisterAllocatorFlag;
@@ -1053,12 +1046,8 @@ defaultRegAlloc("default",
useDefaultRegisterAllocator);
static void initializeDefaultRegisterAllocatorOnce() {
- RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault();
-
- if (!Ctor) {
- Ctor = RegAlloc;
+ if (!RegisterRegAlloc::getDefault())
RegisterRegAlloc::setDefault(RegAlloc);
- }
}
/// Instantiate the default register allocator pass for this target for either
@@ -1098,6 +1087,33 @@ FunctionPass *TargetPassConfig::createRegAllocPass(bool Optimized) {
return createTargetRegisterAllocator(Optimized);
}
+bool TargetPassConfig::addRegAssignmentFast() {
+ if (RegAlloc != &useDefaultRegisterAllocator &&
+ RegAlloc != &createFastRegisterAllocator)
+ report_fatal_error("Must use fast (default) register allocator for unoptimized regalloc.");
+
+ addPass(createRegAllocPass(false));
+ return true;
+}
+
+bool TargetPassConfig::addRegAssignmentOptimized() {
+ // Add the selected register allocation pass.
+ addPass(createRegAllocPass(true));
+
+ // Allow targets to change the register assignments before rewriting.
+ addPreRewrite();
+
+ // Finally rewrite virtual registers.
+ addPass(&VirtRegRewriterID);
+ // Perform stack slot coloring and post-ra machine LICM.
+ //
+ // FIXME: Re-enable coloring with register when it's capable of adding
+ // kill markers.
+ addPass(&StackSlotColoringID);
+
+ return true;
+}
+
/// Return true if the default global register allocator is in use and
/// has not be overriden on the command line with '-regalloc=...'
bool TargetPassConfig::usingDefaultRegAlloc() const {
@@ -1106,18 +1122,17 @@ bool TargetPassConfig::usingDefaultRegAlloc() const {
/// Add the minimum set of target-independent passes that are required for
/// register allocation. No coalescing or scheduling.
-void TargetPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
+void TargetPassConfig::addFastRegAlloc() {
addPass(&PHIEliminationID, false);
addPass(&TwoAddressInstructionPassID, false);
- if (RegAllocPass)
- addPass(RegAllocPass);
+ addRegAssignmentFast();
}
/// Add standard target-independent passes that are tightly coupled with
/// optimized register allocation, including coalescing, machine instruction
/// scheduling, and register allocation itself.
-void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
+void TargetPassConfig::addOptimizedRegAlloc() {
addPass(&DetectDeadLanesID, false);
addPass(&ProcessImplicitDefsID, false);
@@ -1149,21 +1164,10 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
// PreRA instruction scheduling.
addPass(&MachineSchedulerID);
- if (RegAllocPass) {
- // Add the selected register allocation pass.
- addPass(RegAllocPass);
-
- // Allow targets to change the register assignments before rewriting.
- addPreRewrite();
-
- // Finally rewrite virtual registers.
- addPass(&VirtRegRewriterID);
-
- // Perform stack slot coloring and post-ra machine LICM.
- //
- // FIXME: Re-enable coloring with register when it's capable of adding
- // kill markers.
- addPass(&StackSlotColoringID);
+ if (addRegAssignmentOptimized()) {
+ // Allow targets to expand pseudo instructions depending on the choice of
+ // registers before MachineCopyPropagation.
+ addPostRewrite();
// Copy propagate to forward register uses and try to eliminate COPYs that
// were not coalesced.
@@ -1221,3 +1225,11 @@ bool TargetPassConfig::isGlobalISelAbortEnabled() const {
bool TargetPassConfig::reportDiagnosticWhenGlobalISelFallback() const {
return TM->Options.GlobalISelAbort == GlobalISelAbortMode::DisableWithDiag;
}
+
+bool TargetPassConfig::isGISelCSEEnabled() const {
+ return true;
+}
+
+std::unique_ptr<CSEConfigBase> TargetPassConfig::getCSEConfig() const {
+ return make_unique<CSEConfigBase>();
+}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index 661dc18f7a85..f1b2ecf3243b 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -1,9 +1,8 @@
//==- TargetRegisterInfo.cpp - Target Register Information Implementation --==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -14,6 +13,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -398,6 +398,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
const std::pair<unsigned, SmallVector<unsigned, 4>> &Hints_MRI =
MRI.getRegAllocationHints(VirtReg);
+ SmallSet<unsigned, 32> HintedRegs;
// First hint may be a target hint.
bool Skip = (Hints_MRI.first != 0);
for (auto Reg : Hints_MRI.second) {
@@ -411,6 +412,10 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
if (VRM && isVirtualRegister(Phys))
Phys = VRM->getPhys(Phys);
+ // Don't add the same reg twice (Hints_MRI may contain multiple virtual
+ // registers allocated to the same physreg).
+ if (!HintedRegs.insert(Phys).second)
+ continue;
// Check that Phys is a valid hint in VirtReg's register class.
if (!isPhysicalRegister(Phys))
continue;
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 3cff31ad4933..195279719ad4 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -1,9 +1,8 @@
//===- llvm/Target/TargetSchedule.cpp - Sched Machine Model ---------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
diff --git a/lib/CodeGen/TargetSubtargetInfo.cpp b/lib/CodeGen/TargetSubtargetInfo.cpp
index fa29c05fd6c2..59eb2f9c88cb 100644
--- a/lib/CodeGen/TargetSubtargetInfo.cpp
+++ b/lib/CodeGen/TargetSubtargetInfo.cpp
@@ -1,9 +1,8 @@
//===- TargetSubtargetInfo.cpp - General Target Information ----------------==//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -12,24 +11,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetSubtargetInfo.h"
-#include "llvm/ADT/Optional.h"
-#include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/TargetInstrInfo.h"
-#include "llvm/CodeGen/TargetSchedule.h"
-#include "llvm/MC/MCInst.h"
-#include "llvm/Support/Format.h"
-#include "llvm/Support/raw_ostream.h"
-#include <string>
using namespace llvm;
TargetSubtargetInfo::TargetSubtargetInfo(
const Triple &TT, StringRef CPU, StringRef FS,
- ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetFeatureKV> PD,
- const SubtargetInfoKV *ProcSched, const MCWriteProcResEntry *WPR,
+ ArrayRef<SubtargetFeatureKV> PF, ArrayRef<SubtargetSubTypeKV> PD,
+ const MCWriteProcResEntry *WPR,
const MCWriteLatencyEntry *WL, const MCReadAdvanceEntry *RA,
const InstrStage *IS, const unsigned *OC, const unsigned *FP)
- : MCSubtargetInfo(TT, CPU, FS, PF, PD, ProcSched, WPR, WL, RA, IS, OC, FP) {
+ : MCSubtargetInfo(TT, CPU, FS, PF, PD, WPR, WL, RA, IS, OC, FP) {
}
TargetSubtargetInfo::~TargetSubtargetInfo() = default;
@@ -67,50 +58,4 @@ bool TargetSubtargetInfo::useAA() const {
return false;
}
-static std::string createSchedInfoStr(unsigned Latency, double RThroughput) {
- static const char *SchedPrefix = " sched: [";
- std::string Comment;
- raw_string_ostream CS(Comment);
- if (RThroughput != 0.0)
- CS << SchedPrefix << Latency << format(":%2.2f", RThroughput)
- << "]";
- else
- CS << SchedPrefix << Latency << ":?]";
- CS.flush();
- return Comment;
-}
-
-/// Returns string representation of scheduler comment
-std::string TargetSubtargetInfo::getSchedInfoStr(const MachineInstr &MI) const {
- if (MI.isPseudo() || MI.isTerminator())
- return std::string();
- // We don't cache TSchedModel because it depends on TargetInstrInfo
- // that could be changed during the compilation
- TargetSchedModel TSchedModel;
- TSchedModel.init(this);
- unsigned Latency = TSchedModel.computeInstrLatency(&MI);
- double RThroughput = TSchedModel.computeReciprocalThroughput(&MI);
- return createSchedInfoStr(Latency, RThroughput);
-}
-
-/// Returns string representation of scheduler comment
-std::string TargetSubtargetInfo::getSchedInfoStr(MCInst const &MCI) const {
- // We don't cache TSchedModel because it depends on TargetInstrInfo
- // that could be changed during the compilation
- TargetSchedModel TSchedModel;
- TSchedModel.init(this);
- unsigned Latency;
- if (TSchedModel.hasInstrSchedModel())
- Latency = TSchedModel.computeInstrLatency(MCI);
- else if (TSchedModel.hasInstrItineraries()) {
- auto *ItinData = TSchedModel.getInstrItineraries();
- Latency = ItinData->getStageLatency(
- getInstrInfo()->get(MCI.getOpcode()).getSchedClass());
- } else
- return std::string();
- double RThroughput = TSchedModel.computeReciprocalThroughput(MCI);
- return createSchedInfoStr(Latency, RThroughput);
-}
-
-void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const {
-}
+void TargetSubtargetInfo::mirFileLoaded(MachineFunction &MF) const { }
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 4b72f6a84ca1..43d876646967 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1,9 +1,8 @@
//===- TwoAddressInstructionPass.cpp - Two-Address instruction pass -------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1245,8 +1244,13 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
++NumAggrCommuted;
// There might be more than two commutable operands, update BaseOp and
// continue scanning.
+ // FIXME: This assumes that the new instruction's operands are in the
+ // same positions and were simply swapped.
BaseOpReg = OtherOpReg;
BaseOpKilled = OtherOpKilled;
+ // Resamples OpsNum in case the number of operands was reduced. This
+ // happens with X86.
+ OpsNum = MI->getDesc().getNumOperands();
continue;
}
// If this was a commute based on kill, we won't do better continuing.
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 5288ca672774..177bab32bccc 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -1,9 +1,8 @@
//===-- UnreachableBlockElim.cpp - Remove unreachable blocks for codegen --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -38,43 +37,13 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Type.h"
#include "llvm/Pass.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
using namespace llvm;
-static bool eliminateUnreachableBlock(Function &F) {
- df_iterator_default_set<BasicBlock*> Reachable;
-
- // Mark all reachable blocks.
- for (BasicBlock *BB : depth_first_ext(&F, Reachable))
- (void)BB/* Mark all reachable blocks */;
-
- // Loop over all dead blocks, remembering them and deleting all instructions
- // in them.
- std::vector<BasicBlock*> DeadBlocks;
- for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
- if (!Reachable.count(&*I)) {
- BasicBlock *BB = &*I;
- DeadBlocks.push_back(BB);
- while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
- PN->replaceAllUsesWith(Constant::getNullValue(PN->getType()));
- BB->getInstList().pop_front();
- }
- for (succ_iterator SI = succ_begin(BB), E = succ_end(BB); SI != E; ++SI)
- (*SI)->removePredecessor(BB);
- BB->dropAllReferences();
- }
-
- // Actually remove the blocks now.
- for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
- DeadBlocks[i]->eraseFromParent();
- }
-
- return !DeadBlocks.empty();
-}
-
namespace {
class UnreachableBlockElimLegacyPass : public FunctionPass {
bool runOnFunction(Function &F) override {
- return eliminateUnreachableBlock(F);
+ return llvm::EliminateUnreachableBlocks(F);
}
public:
@@ -99,7 +68,7 @@ FunctionPass *llvm::createUnreachableBlockEliminationPass() {
PreservedAnalyses UnreachableBlockElimPass::run(Function &F,
FunctionAnalysisManager &AM) {
- bool Changed = eliminateUnreachableBlock(F);
+ bool Changed = llvm::EliminateUnreachableBlocks(F);
if (!Changed)
return PreservedAnalyses::all();
PreservedAnalyses PA;
diff --git a/lib/CodeGen/ValueTypes.cpp b/lib/CodeGen/ValueTypes.cpp
index adb7075de651..a911cdcbec9d 100644
--- a/lib/CodeGen/ValueTypes.cpp
+++ b/lib/CodeGen/ValueTypes.cpp
@@ -1,9 +1,8 @@
//===----------- ValueTypes.cpp - Implementation of EVT methods -----------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
@@ -166,11 +165,18 @@ std::string EVT::getEVTString() const {
case MVT::v128i16: return "v128i16";
case MVT::v1i32: return "v1i32";
case MVT::v2i32: return "v2i32";
+ case MVT::v3i32: return "v3i32";
case MVT::v4i32: return "v4i32";
+ case MVT::v5i32: return "v5i32";
case MVT::v8i32: return "v8i32";
case MVT::v16i32: return "v16i32";
case MVT::v32i32: return "v32i32";
case MVT::v64i32: return "v64i32";
+ case MVT::v128i32: return "v128i32";
+ case MVT::v256i32: return "v256i32";
+ case MVT::v512i32: return "v512i32";
+ case MVT::v1024i32:return "v1024i32";
+ case MVT::v2048i32:return "v2048i32";
case MVT::v1i64: return "v1i64";
case MVT::v2i64: return "v2i64";
case MVT::v4i64: return "v4i64";
@@ -183,16 +189,25 @@ std::string EVT::getEVTString() const {
case MVT::v2f16: return "v2f16";
case MVT::v4f16: return "v4f16";
case MVT::v8f16: return "v8f16";
+ case MVT::v3f32: return "v3f32";
case MVT::v4f32: return "v4f32";
+ case MVT::v5f32: return "v5f32";
case MVT::v8f32: return "v8f32";
case MVT::v16f32: return "v16f32";
+ case MVT::v32f32: return "v32f32";
+ case MVT::v64f32: return "v64f32";
+ case MVT::v128f32: return "v128f32";
+ case MVT::v256f32: return "v256f32";
+ case MVT::v512f32: return "v512f32";
+ case MVT::v1024f32:return "v1024f32";
+ case MVT::v2048f32:return "v2048f32";
case MVT::v1f64: return "v1f64";
case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64";
case MVT::v8f64: return "v8f64";
case MVT::Metadata:return "Metadata";
case MVT::Untyped: return "Untyped";
- case MVT::ExceptRef: return "ExceptRef";
+ case MVT::exnref : return "exnref";
}
}
@@ -247,11 +262,18 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v128i16: return VectorType::get(Type::getInt16Ty(Context), 128);
case MVT::v1i32: return VectorType::get(Type::getInt32Ty(Context), 1);
case MVT::v2i32: return VectorType::get(Type::getInt32Ty(Context), 2);
+ case MVT::v3i32: return VectorType::get(Type::getInt32Ty(Context), 3);
case MVT::v4i32: return VectorType::get(Type::getInt32Ty(Context), 4);
+ case MVT::v5i32: return VectorType::get(Type::getInt32Ty(Context), 5);
case MVT::v8i32: return VectorType::get(Type::getInt32Ty(Context), 8);
case MVT::v16i32: return VectorType::get(Type::getInt32Ty(Context), 16);
case MVT::v32i32: return VectorType::get(Type::getInt32Ty(Context), 32);
case MVT::v64i32: return VectorType::get(Type::getInt32Ty(Context), 64);
+ case MVT::v128i32: return VectorType::get(Type::getInt32Ty(Context), 128);
+ case MVT::v256i32: return VectorType::get(Type::getInt32Ty(Context), 256);
+ case MVT::v512i32: return VectorType::get(Type::getInt32Ty(Context), 512);
+ case MVT::v1024i32:return VectorType::get(Type::getInt32Ty(Context), 1024);
+ case MVT::v2048i32:return VectorType::get(Type::getInt32Ty(Context), 2048);
case MVT::v1i64: return VectorType::get(Type::getInt64Ty(Context), 1);
case MVT::v2i64: return VectorType::get(Type::getInt64Ty(Context), 2);
case MVT::v4i64: return VectorType::get(Type::getInt64Ty(Context), 4);
@@ -264,9 +286,18 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
+ case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3);
case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4);
+ case MVT::v5f32: return VectorType::get(Type::getFloatTy(Context), 5);
case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8);
- case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16);
+ case MVT::v32f32: return VectorType::get(Type::getFloatTy(Context), 32);
+ case MVT::v64f32: return VectorType::get(Type::getFloatTy(Context), 64);
+ case MVT::v128f32: return VectorType::get(Type::getFloatTy(Context), 128);
+ case MVT::v256f32: return VectorType::get(Type::getFloatTy(Context), 256);
+ case MVT::v512f32: return VectorType::get(Type::getFloatTy(Context), 512);
+ case MVT::v1024f32:return VectorType::get(Type::getFloatTy(Context), 1024);
+ case MVT::v2048f32:return VectorType::get(Type::getFloatTy(Context), 2048);
case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1);
case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index ed7bef667e77..4a06704a8876 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -1,9 +1,8 @@
//===- llvm/CodeGen/VirtRegMap.cpp - Virtual Register Map -----------------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -385,7 +384,7 @@ void VirtRegRewriter::handleIdentityCopy(MachineInstr &MI) const {
// give us additional liveness information: The target (super-)register
// must not be valid before this point. Replace the COPY with a KILL
// instruction to maintain this information.
- if (MI.getOperand(0).isUndef() || MI.getNumOperands() > 2) {
+ if (MI.getOperand(1).isUndef() || MI.getNumOperands() > 2) {
MI.setDesc(TII->get(TargetOpcode::KILL));
LLVM_DEBUG(dbgs() << " replace by: " << MI);
return;
diff --git a/lib/CodeGen/WasmEHPrepare.cpp b/lib/CodeGen/WasmEHPrepare.cpp
index e5002eb95346..865a1cfbf43a 100644
--- a/lib/CodeGen/WasmEHPrepare.cpp
+++ b/lib/CodeGen/WasmEHPrepare.cpp
@@ -1,14 +1,14 @@
//===-- WasmEHPrepare - Prepare excepton handling for WebAssembly --------===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This transformation is designed for use by code generators which use
-// WebAssembly exception handling scheme.
+// WebAssembly exception handling scheme. This currently supports C++
+// exceptions.
//
// WebAssembly exception handling uses Windows exception IR for the middle level
// representation. This pass does the following transformation for every
@@ -23,53 +23,20 @@
//
// - After:
// catchpad ...
-// exn = wasm.catch(0); // 0 is a tag for C++
-// wasm.landingpad.index(index);
+// exn = wasm.extract.exception();
// // Only add below in case it's not a single catch (...)
+// wasm.landingpad.index(index);
// __wasm_lpad_context.lpad_index = index;
// __wasm_lpad_context.lsda = wasm.lsda();
// _Unwind_CallPersonality(exn);
-// int selector = __wasm.landingpad_context.selector;
+// selector = __wasm.landingpad_context.selector;
// ...
//
-// Also, does the following for a cleanuppad block with a call to
-// __clang_call_terminate():
-// - Before:
-// cleanuppad ...
-// exn = wasm.get.exception();
-// __clang_call_terminate(exn);
-//
-// - After:
-// cleanuppad ...
-// exn = wasm.catch(0); // 0 is a tag for C++
-// __clang_call_terminate(exn);
-//
-//
-// * Background: WebAssembly EH instructions
-// WebAssembly's try and catch instructions are structured as follows:
-// try
-// instruction*
-// catch (C++ tag)
-// instruction*
-// ...
-// catch_all
-// instruction*
-// try_end
-//
-// A catch instruction in WebAssembly does not correspond to a C++ catch clause.
-// In WebAssembly, there is a single catch instruction for all C++ exceptions.
-// There can be more catch instructions for exceptions in other languages, but
-// they are not generated for now. catch_all catches all exceptions including
-// foreign exceptions (e.g. JavaScript). We turn catchpads into catch (C++ tag)
-// and cleanuppads into catch_all, with one exception: cleanuppad with a call to
-// __clang_call_terminate should be both in catch (C++ tag) and catch_all.
-//
//
// * Background: Direct personality function call
// In WebAssembly EH, the VM is responsible for unwinding the stack once an
// exception is thrown. After the stack is unwound, the control flow is
-// transfered to WebAssembly 'catch' instruction, which returns a caught
-// exception object.
+// transfered to WebAssembly 'catch' instruction.
//
// Unwinding the stack is not done by libunwind but the VM, so the personality
// function in libcxxabi cannot be called from libunwind during the unwinding
@@ -137,19 +104,19 @@ class WasmEHPrepare : public FunctionPass {
Value *LSDAField = nullptr; // lsda field
Value *SelectorField = nullptr; // selector
- Function *ThrowF = nullptr; // wasm.throw() intrinsic
- Function *CatchF = nullptr; // wasm.catch.extract() intrinsic
- Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
- Function *LSDAF = nullptr; // wasm.lsda() intrinsic
- Function *GetExnF = nullptr; // wasm.get.exception() intrinsic
- Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic
- Function *CallPersonalityF = nullptr; // _Unwind_CallPersonality() wrapper
- Function *ClangCallTermF = nullptr; // __clang_call_terminate() function
+ Function *ThrowF = nullptr; // wasm.throw() intrinsic
+ Function *LPadIndexF = nullptr; // wasm.landingpad.index() intrinsic
+ Function *LSDAF = nullptr; // wasm.lsda() intrinsic
+ Function *GetExnF = nullptr; // wasm.get.exception() intrinsic
+ Function *ExtractExnF = nullptr; // wasm.extract.exception() intrinsic
+ Function *GetSelectorF = nullptr; // wasm.get.ehselector() intrinsic
+ FunctionCallee CallPersonalityF =
+ nullptr; // _Unwind_CallPersonality() wrapper
bool prepareEHPads(Function &F);
bool prepareThrows(Function &F);
- void prepareEHPad(BasicBlock *BB, unsigned Index);
+ void prepareEHPad(BasicBlock *BB, bool NeedLSDA, unsigned Index = 0);
void prepareTerminateCleanupPad(BasicBlock *BB);
public:
@@ -209,14 +176,12 @@ bool WasmEHPrepare::prepareThrows(Function &F) {
// wasm.throw() intinsic, which will be lowered to wasm 'throw' instruction.
ThrowF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_throw);
-
// Insert an unreachable instruction after a call to @llvm.wasm.throw and
// delete all following instructions within the BB, and delete all the dead
// children of the BB as well.
for (User *U : ThrowF->users()) {
- // A call to @llvm.wasm.throw() is only generated from
- // __builtin_wasm_throw() builtin call within libcxxabi, and cannot be an
- // InvokeInst.
+ // A call to @llvm.wasm.throw() is only generated from __cxa_throw()
+ // builtin call within libcxxabi, and cannot be an InvokeInst.
auto *ThrowI = cast<CallInst>(U);
if (ThrowI->getFunction() != &F)
continue;
@@ -263,8 +228,6 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
SelectorField = IRB.CreateConstGEP2_32(LPadContextTy, LPadContextGV, 0, 2,
"selector_gep");
- // wasm.catch() intinsic, which will be lowered to wasm 'catch' instruction.
- CatchF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_catch);
// wasm.landingpad.index() intrinsic, which is to specify landingpad index
LPadIndexF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_landingpad_index);
// wasm.lsda() intrinsic. Returns the address of LSDA table for the current
@@ -275,14 +238,18 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
GetExnF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_exception);
GetSelectorF = Intrinsic::getDeclaration(&M, Intrinsic::wasm_get_ehselector);
- // _Unwind_CallPersonality() wrapper function, which calls the personality
- CallPersonalityF = cast<Function>(M.getOrInsertFunction(
- "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy()));
- CallPersonalityF->setDoesNotThrow();
+ // wasm.extract.exception() is the same as wasm.get.exception() but it does
+ // not take a token argument. This will be lowered down to EXTRACT_EXCEPTION
+ // pseudo instruction in instruction selection, which will be expanded using
+ // 'br_on_exn' instruction later.
+ ExtractExnF =
+ Intrinsic::getDeclaration(&M, Intrinsic::wasm_extract_exception);
- // __clang_call_terminate() function, which is inserted by clang in case a
- // cleanup throws
- ClangCallTermF = M.getFunction("__clang_call_terminate");
+ // _Unwind_CallPersonality() wrapper function, which calls the personality
+ CallPersonalityF = M.getOrInsertFunction(
+ "_Unwind_CallPersonality", IRB.getInt32Ty(), IRB.getInt8PtrTy());
+ if (Function *F = dyn_cast<Function>(CallPersonalityF.getCallee()))
+ F->setDoesNotThrow();
unsigned Index = 0;
for (auto *BB : CatchPads) {
@@ -290,60 +257,52 @@ bool WasmEHPrepare::prepareEHPads(Function &F) {
// In case of a single catch (...), we don't need to emit LSDA
if (CPI->getNumArgOperands() == 1 &&
cast<Constant>(CPI->getArgOperand(0))->isNullValue())
- prepareEHPad(BB, -1);
+ prepareEHPad(BB, false);
else
- prepareEHPad(BB, Index++);
+ prepareEHPad(BB, true, Index++);
}
- if (!ClangCallTermF)
- return !CatchPads.empty();
-
- // Cleanuppads will turn into catch_all later, but cleanuppads with a call to
- // __clang_call_terminate() is a special case. __clang_call_terminate() takes
- // an exception object, so we have to duplicate call in both 'catch <C++ tag>'
- // and 'catch_all' clauses. Here we only insert a call to catch; the
- // duplication will be done later. In catch_all, the exception object will be
- // set to null.
+ // Cleanup pads don't need LSDA.
for (auto *BB : CleanupPads)
- for (auto &I : *BB)
- if (auto *CI = dyn_cast<CallInst>(&I))
- if (CI->getCalledValue() == ClangCallTermF)
- prepareEHPad(BB, -1);
+ prepareEHPad(BB, false);
return true;
}
-void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
+// Prepare an EH pad for Wasm EH handling. If NeedLSDA is false, Index is
+// ignored.
+void WasmEHPrepare::prepareEHPad(BasicBlock *BB, bool NeedLSDA,
+ unsigned Index) {
assert(BB->isEHPad() && "BB is not an EHPad!");
IRBuilder<> IRB(BB->getContext());
-
IRB.SetInsertPoint(&*BB->getFirstInsertionPt());
- // The argument to wasm.catch() is the tag for C++ exceptions, which we set to
- // 0 for this module.
- // Pseudocode: void *exn = wasm.catch(0);
- Instruction *Exn = IRB.CreateCall(CatchF, IRB.getInt32(0), "exn");
- // Replace the return value of wasm.get.exception() with the return value from
- // wasm.catch().
+
auto *FPI = cast<FuncletPadInst>(BB->getFirstNonPHI());
Instruction *GetExnCI = nullptr, *GetSelectorCI = nullptr;
for (auto &U : FPI->uses()) {
if (auto *CI = dyn_cast<CallInst>(U.getUser())) {
if (CI->getCalledValue() == GetExnF)
GetExnCI = CI;
- else if (CI->getCalledValue() == GetSelectorF)
+ if (CI->getCalledValue() == GetSelectorF)
GetSelectorCI = CI;
}
}
- assert(GetExnCI && "wasm.get.exception() call does not exist");
- GetExnCI->replaceAllUsesWith(Exn);
+ // Cleanup pads w/o __clang_call_terminate call do not have any of
+ // wasm.get.exception() or wasm.get.ehselector() calls. We need to do nothing.
+ if (!GetExnCI) {
+ assert(!GetSelectorCI &&
+ "wasm.get.ehselector() cannot exist w/o wasm.get.exception()");
+ return;
+ }
+
+ Instruction *ExtractExnCI = IRB.CreateCall(ExtractExnF, {}, "exn");
+ GetExnCI->replaceAllUsesWith(ExtractExnCI);
GetExnCI->eraseFromParent();
// In case it is a catchpad with single catch (...) or a cleanuppad, we don't
// need to call personality function because we don't need a selector.
- if (FPI->getNumArgOperands() == 0 ||
- (FPI->getNumArgOperands() == 1 &&
- cast<Constant>(FPI->getArgOperand(0))->isNullValue())) {
+ if (!NeedLSDA) {
if (GetSelectorCI) {
assert(GetSelectorCI->use_empty() &&
"wasm.get.ehselector() still has uses!");
@@ -351,7 +310,7 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
}
return;
}
- IRB.SetInsertPoint(Exn->getNextNode());
+ IRB.SetInsertPoint(ExtractExnCI->getNextNode());
// This is to create a map of <landingpad EH label, landingpad index> in
// SelectionDAGISel, which is to be used in EHStreamer to emit LSDA tables.
@@ -373,12 +332,13 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
IRB.CreateStore(IRB.CreateCall(LSDAF), LSDAField);
// Pseudocode: _Unwind_CallPersonality(exn);
- CallInst *PersCI =
- IRB.CreateCall(CallPersonalityF, Exn, OperandBundleDef("funclet", CPI));
+ CallInst *PersCI = IRB.CreateCall(CallPersonalityF, ExtractExnCI,
+ OperandBundleDef("funclet", CPI));
PersCI->setDoesNotThrow();
// Pseudocode: int selector = __wasm.landingpad_context.selector;
- Instruction *Selector = IRB.CreateLoad(SelectorField, "selector");
+ Instruction *Selector =
+ IRB.CreateLoad(IRB.getInt32Ty(), SelectorField, "selector");
// Replace the return value from wasm.get.ehselector() with the selector value
// loaded from __wasm_lpad_context.selector.
@@ -388,15 +348,15 @@ void WasmEHPrepare::prepareEHPad(BasicBlock *BB, unsigned Index) {
}
void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) {
+ // If an exception is not caught by a catchpad (i.e., it is a foreign
+ // exception), it will unwind to its parent catchswitch's unwind destination.
+ // We don't record an unwind destination for cleanuppads because every
+ // exception should be caught by it.
for (const auto &BB : *F) {
if (!BB.isEHPad())
continue;
const Instruction *Pad = BB.getFirstNonPHI();
- // If an exception is not caught by a catchpad (i.e., it is a foreign
- // exception), it will unwind to its parent catchswitch's unwind
- // destination. We don't record an unwind destination for cleanuppads
- // because every exception should be caught by it.
if (const auto *CatchPad = dyn_cast<CatchPadInst>(Pad)) {
const auto *UnwindBB = CatchPad->getCatchSwitch()->getUnwindDest();
if (!UnwindBB)
@@ -409,22 +369,4 @@ void llvm::calculateWasmEHInfo(const Function *F, WasmEHFuncInfo &EHInfo) {
EHInfo.setEHPadUnwindDest(&BB, UnwindBB);
}
}
-
- // Record the unwind destination for invoke and cleanupret instructions.
- for (const auto &BB : *F) {
- const Instruction *TI = BB.getTerminator();
- BasicBlock *UnwindBB = nullptr;
- if (const auto *Invoke = dyn_cast<InvokeInst>(TI))
- UnwindBB = Invoke->getUnwindDest();
- else if (const auto *CleanupRet = dyn_cast<CleanupReturnInst>(TI))
- UnwindBB = CleanupRet->getUnwindDest();
- if (!UnwindBB)
- continue;
- const Instruction *UnwindPad = UnwindBB->getFirstNonPHI();
- if (const auto *CatchSwitch = dyn_cast<CatchSwitchInst>(UnwindPad))
- // Currently there should be only one handler per a catchswitch.
- EHInfo.setThrowUnwindDest(&BB, *CatchSwitch->handlers().begin());
- else // cleanuppad
- EHInfo.setThrowUnwindDest(&BB, UnwindBB);
- }
}
diff --git a/lib/CodeGen/WinEHPrepare.cpp b/lib/CodeGen/WinEHPrepare.cpp
index 6a15240fa6e0..cdf79374e974 100644
--- a/lib/CodeGen/WinEHPrepare.cpp
+++ b/lib/CodeGen/WinEHPrepare.cpp
@@ -1,9 +1,8 @@
//===-- WinEHPrepare - Prepare exception handling for code generation ---===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -1080,7 +1079,8 @@ AllocaInst *WinEHPrepare::insertPHILoads(PHINode *PN, Function &F) {
SpillSlot = new AllocaInst(PN->getType(), DL->getAllocaAddrSpace(), nullptr,
Twine(PN->getName(), ".wineh.spillslot"),
&F.getEntryBlock().front());
- Value *V = new LoadInst(SpillSlot, Twine(PN->getName(), ".wineh.reload"),
+ Value *V = new LoadInst(PN->getType(), SpillSlot,
+ Twine(PN->getName(), ".wineh.reload"),
&*PHIBlock->getFirstInsertionPt());
PN->replaceAllUsesWith(V);
return SpillSlot;
@@ -1222,14 +1222,16 @@ void WinEHPrepare::replaceUseWithLoad(Value *V, Use &U, AllocaInst *&SpillSlot,
Value *&Load = Loads[IncomingBlock];
// Insert the load into the predecessor block
if (!Load)
- Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
- /*Volatile=*/false, IncomingBlock->getTerminator());
+ Load = new LoadInst(V->getType(), SpillSlot,
+ Twine(V->getName(), ".wineh.reload"),
+ /*isVolatile=*/false, IncomingBlock->getTerminator());
U.set(Load);
} else {
// Reload right before the old use.
- auto *Load = new LoadInst(SpillSlot, Twine(V->getName(), ".wineh.reload"),
- /*Volatile=*/false, UsingInst);
+ auto *Load = new LoadInst(V->getType(), SpillSlot,
+ Twine(V->getName(), ".wineh.reload"),
+ /*isVolatile=*/false, UsingInst);
U.set(Load);
}
}
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 32a7457c2060..19c59e9542b4 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -1,9 +1,8 @@
//===- XRayInstrumentation.cpp - Adds XRay instrumentation to functions. --===//
//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
@@ -111,6 +110,8 @@ void XRayInstrumentation::replaceRetWithPatchableRet(
for (auto &MO : T.operands())
MIB.add(MO);
Terminators.push_back(&T);
+ if (T.isCall())
+ MF.updateCallSiteInfo(&T);
}
}
}