aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp16
-rw-r--r--lib/CodeGen/Analysis.cpp12
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp255
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp3
-rw-r--r--lib/CodeGen/AsmPrinter/ByteStreamer.h12
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.cpp116
-rw-r--r--lib/CodeGen/AsmPrinter/CodeViewDebug.h3
-rw-r--r--lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp12
-rw-r--r--lib/CodeGen/AsmPrinter/DebugLocStream.h19
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp176
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCompileUnit.h31
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp644
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h22
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.cpp95
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfExpression.h95
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfFile.h19
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.cpp47
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfUnit.h14
-rw-r--r--lib/CodeGen/AsmPrinter/EHStreamer.cpp6
-rw-r--r--lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp2
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp4
-rw-r--r--lib/CodeGen/AsmPrinter/WinException.cpp3
-rw-r--r--lib/CodeGen/AtomicExpandPass.cpp12
-rw-r--r--lib/CodeGen/BranchFolding.cpp34
-rw-r--r--lib/CodeGen/BranchRelaxation.cpp22
-rw-r--r--lib/CodeGen/BreakFalseDeps.cpp23
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp22
-rw-r--r--lib/CodeGen/CallingConvLower.cpp42
-rw-r--r--lib/CodeGen/CodeGen.cpp5
-rw-r--r--lib/CodeGen/CodeGenPrepare.cpp122
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp9
-rw-r--r--lib/CodeGen/DFAPacketizer.cpp81
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp12
-rw-r--r--lib/CodeGen/DetectDeadLanes.cpp56
-rw-r--r--lib/CodeGen/EarlyIfConversion.cpp345
-rw-r--r--lib/CodeGen/ExecutionDomainFix.cpp1
-rw-r--r--lib/CodeGen/ExpandMemCmp.cpp2
-rw-r--r--lib/CodeGen/ExpandPostRAPseudos.cpp10
-rw-r--r--lib/CodeGen/GCMetadata.cpp2
-rw-r--r--lib/CodeGen/GCRootLowering.cpp4
-rw-r--r--lib/CodeGen/GlobalISel/CSEInfo.cpp7
-rw-r--r--lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp11
-rw-r--r--lib/CodeGen/GlobalISel/CallLowering.cpp288
-rw-r--r--lib/CodeGen/GlobalISel/Combiner.cpp14
-rw-r--r--lib/CodeGen/GlobalISel/CombinerHelper.cpp919
-rw-r--r--lib/CodeGen/GlobalISel/GISelKnownBits.cpp383
-rw-r--r--lib/CodeGen/GlobalISel/IRTranslator.cpp392
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelect.cpp38
-rw-r--r--lib/CodeGen/GlobalISel/InstructionSelector.cpp2
-rw-r--r--lib/CodeGen/GlobalISel/Legalizer.cpp35
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerHelper.cpp978
-rw-r--r--lib/CodeGen/GlobalISel/LegalizerInfo.cpp42
-rw-r--r--lib/CodeGen/GlobalISel/Localizer.cpp11
-rw-r--r--lib/CodeGen/GlobalISel/MachineIRBuilder.cpp93
-rw-r--r--lib/CodeGen/GlobalISel/RegBankSelect.cpp13
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBank.cpp1
-rw-r--r--lib/CodeGen/GlobalISel/RegisterBankInfo.cpp17
-rw-r--r--lib/CodeGen/GlobalISel/Utils.cpp98
-rw-r--r--lib/CodeGen/GlobalMerge.cpp8
-rw-r--r--lib/CodeGen/HardwareLoops.cpp2
-rw-r--r--lib/CodeGen/IfConversion.cpp200
-rw-r--r--lib/CodeGen/ImplicitNullChecks.cpp8
-rw-r--r--lib/CodeGen/InlineSpiller.cpp22
-rw-r--r--lib/CodeGen/InterleavedLoadCombinePass.cpp4
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp34
-rw-r--r--lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp6
-rw-r--r--lib/CodeGen/LexicalScopes.cpp1
-rw-r--r--lib/CodeGen/LiveDebugValues.cpp510
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp257
-rw-r--r--lib/CodeGen/LiveInterval.cpp7
-rw-r--r--lib/CodeGen/LiveIntervals.cpp59
-rw-r--r--lib/CodeGen/LivePhysRegs.cpp20
-rw-r--r--lib/CodeGen/LiveRangeCalc.cpp5
-rw-r--r--lib/CodeGen/LiveRangeCalc.h297
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp14
-rw-r--r--lib/CodeGen/LiveRangeShrink.cpp4
-rw-r--r--lib/CodeGen/LiveRegMatrix.cpp2
-rw-r--r--lib/CodeGen/LiveRegUnits.cpp12
-rw-r--r--lib/CodeGen/LiveStacks.cpp7
-rw-r--r--lib/CodeGen/LiveVariables.cpp29
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp10
-rw-r--r--lib/CodeGen/LowerEmuTLS.cpp7
-rw-r--r--lib/CodeGen/MIRCanonicalizerPass.cpp359
-rw-r--r--lib/CodeGen/MIRNamerPass.cpp77
-rw-r--r--lib/CodeGen/MIRParser/MILexer.cpp1
-rw-r--r--lib/CodeGen/MIRParser/MILexer.h2
-rw-r--r--lib/CodeGen/MIRParser/MIParser.cpp60
-rw-r--r--lib/CodeGen/MIRParser/MIRParser.cpp18
-rw-r--r--lib/CodeGen/MIRPrinter.cpp16
-rw-r--r--lib/CodeGen/MIRVRegNamerUtils.cpp348
-rw-r--r--lib/CodeGen/MIRVRegNamerUtils.h91
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp64
-rw-r--r--lib/CodeGen/MachineBlockPlacement.cpp28
-rw-r--r--lib/CodeGen/MachineCSE.cpp75
-rw-r--r--lib/CodeGen/MachineCombiner.cpp6
-rw-r--r--lib/CodeGen/MachineCopyPropagation.cpp78
-rw-r--r--lib/CodeGen/MachineDominators.cpp23
-rw-r--r--lib/CodeGen/MachineFrameInfo.cpp38
-rw-r--r--lib/CodeGen/MachineFunction.cpp58
-rw-r--r--lib/CodeGen/MachineFunctionPass.cpp6
-rw-r--r--lib/CodeGen/MachineInstr.cpp116
-rw-r--r--lib/CodeGen/MachineInstrBundle.cpp14
-rw-r--r--lib/CodeGen/MachineLICM.cpp61
-rw-r--r--lib/CodeGen/MachineLoopUtils.cpp132
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp85
-rw-r--r--lib/CodeGen/MachineOperand.cpp70
-rw-r--r--lib/CodeGen/MachineOptimizationRemarkEmitter.cpp2
-rw-r--r--lib/CodeGen/MachineOutliner.cpp16
-rw-r--r--lib/CodeGen/MachinePipeliner.cpp1235
-rw-r--r--lib/CodeGen/MachinePostDominators.cpp55
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp12
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp6
-rw-r--r--lib/CodeGen/MachineScheduler.cpp59
-rw-r--r--lib/CodeGen/MachineSink.cpp73
-rw-r--r--lib/CodeGen/MachineTraceMetrics.cpp24
-rw-r--r--lib/CodeGen/MachineVerifier.cpp163
-rw-r--r--lib/CodeGen/MacroFusion.cpp4
-rw-r--r--lib/CodeGen/ModuloSchedule.cpp2022
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp15
-rw-r--r--lib/CodeGen/PHIElimination.cpp43
-rw-r--r--lib/CodeGen/PatchableFunction.cpp2
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp83
-rw-r--r--lib/CodeGen/PreISelIntrinsicLowering.cpp2
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp8
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp2
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp6
-rw-r--r--lib/CodeGen/ReachingDefAnalysis.cpp1
-rw-r--r--lib/CodeGen/RegAllocBase.cpp4
-rw-r--r--lib/CodeGen/RegAllocFast.cpp117
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp16
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp12
-rw-r--r--lib/CodeGen/RegUsageInfoCollector.cpp10
-rw-r--r--lib/CodeGen/RegUsageInfoPropagate.cpp6
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp71
-rw-r--r--lib/CodeGen/RegisterPressure.cpp36
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp62
-rw-r--r--lib/CodeGen/RenameIndependentSubregs.cpp4
-rw-r--r--lib/CodeGen/SafeStack.cpp2
-rw-r--r--lib/CodeGen/ScalarizeMaskedMemIntrin.cpp167
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp57
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1758
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp67
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp7
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp77
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp222
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp430
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp510
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp56
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h61
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp46
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp50
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp139
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp18
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp283
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp495
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h2
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp9
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp35
-rw-r--r--lib/CodeGen/SelectionDAG/StatepointLowering.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp1406
-rw-r--r--lib/CodeGen/ShrinkWrap.cpp5
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp5
-rw-r--r--lib/CodeGen/SplitKit.cpp6
-rw-r--r--lib/CodeGen/SplitKit.h2
-rw-r--r--lib/CodeGen/StackMaps.cpp8
-rw-r--r--lib/CodeGen/StackProtector.cpp67
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp8
-rw-r--r--lib/CodeGen/SwiftErrorValueTracking.cpp3
-rw-r--r--lib/CodeGen/TailDuplicator.cpp22
-rw-r--r--lib/CodeGen/TargetFrameLoweringImpl.cpp19
-rw-r--r--lib/CodeGen/TargetInstrInfo.cpp82
-rw-r--r--lib/CodeGen/TargetLoweringBase.cpp95
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp107
-rw-r--r--lib/CodeGen/TargetPassConfig.cpp24
-rw-r--r--lib/CodeGen/TargetRegisterInfo.cpp60
-rw-r--r--lib/CodeGen/TargetSchedule.cpp2
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp90
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp15
-rw-r--r--lib/CodeGen/ValueTypes.cpp150
-rw-r--r--lib/CodeGen/VirtRegMap.cpp71
-rw-r--r--lib/CodeGen/XRayInstrumentation.cpp2
186 files changed, 13421 insertions, 6660 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 444f618d8b8c..f64b775a8b77 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -232,7 +232,7 @@ bool AggressiveAntiDepBreaker::IsImplicitDefUse(MachineInstr &MI,
if (!MO.isReg() || !MO.isImplicit())
return false;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0)
return false;
@@ -252,7 +252,7 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(
if (!MO.isReg()) continue;
if ((MO.isDef() && MI.isRegTiedToUseOperand(i)) ||
IsImplicitDefUse(MI, MO)) {
- const unsigned Reg = MO.getReg();
+ const Register Reg = MO.getReg();
for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
PassthruRegs.insert(*SubRegs);
@@ -365,7 +365,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
HandleLastUse(Reg, Count + 1, "", "\tDead Def: ", "\n");
@@ -375,7 +375,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g"
@@ -418,7 +418,7 @@ void AggressiveAntiDepBreaker::PrescanInstruction(
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
// Ignore KILLs and passthru registers for liveness...
if (MI.isKill() || (PassthruRegs.count(Reg) != 0))
@@ -471,7 +471,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
LLVM_DEBUG(dbgs() << " " << printReg(Reg, TRI) << "=g"
@@ -506,7 +506,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr &MI,
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
if (FirstReg != 0) {
@@ -790,7 +790,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
CriticalPathSU = SU;
}
}
-
+ assert(CriticalPathSU && "Failed to find SUnit critical path");
CriticalPathMI = CriticalPathSU->getInstr();
}
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index d158e70b86ac..4f24f077d120 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -309,7 +309,8 @@ static const Value *getNoopInput(const Value *V,
NoopInput = Op;
} else if (isa<TruncInst>(I) &&
TLI.allowTruncateForTailCall(Op->getType(), I->getType())) {
- DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits());
+ DataBits = std::min((uint64_t)DataBits,
+ I->getType()->getPrimitiveSizeInBits().getFixedSize());
NoopInput = Op;
} else if (auto CS = ImmutableCallSite(I)) {
const Value *ReturnedOp = CS.getReturnedArgOperand();
@@ -523,7 +524,8 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
// longjmp on x86), it can end up causing miscompilation that has not
// been fully understood.
if (!Ret &&
- (!TM.Options.GuaranteedTailCallOpt || !isa<UnreachableInst>(Term)))
+ ((!TM.Options.GuaranteedTailCallOpt &&
+ CS.getCallingConv() != CallingConv::Tail) || !isa<UnreachableInst>(Term)))
return false;
// If I will have a chain, make sure no other instruction that will have a
@@ -536,9 +538,11 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, const TargetMachine &TM) {
// Debug info intrinsics do not get in the way of tail call optimization.
if (isa<DbgInfoIntrinsic>(BBI))
continue;
- // A lifetime end intrinsic should not stop tail call optimization.
+ // A lifetime end or assume intrinsic should not stop tail call
+ // optimization.
if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(BBI))
- if (II->getIntrinsicID() == Intrinsic::lifetime_end)
+ if (II->getIntrinsicID() == Intrinsic::lifetime_end ||
+ II->getIntrinsicID() == Intrinsic::assume)
continue;
if (BBI->mayHaveSideEffects() || BBI->mayReadFromMemory() ||
!isSafeToSpeculativelyExecute(&*BBI))
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 54f6cc2d5571..73c53d6c4af5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -91,10 +91,12 @@
#include "llvm/MC/MCSectionCOFF.h"
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
+#include "llvm/MC/MCSymbolXCOFF.h"
#include "llvm/MC/MCTargetOptions.h"
#include "llvm/MC/MCValue.h"
#include "llvm/MC/SectionKind.h"
@@ -159,30 +161,30 @@ static gcp_map_type &getGCMap(void *&P) {
return *(gcp_map_type*)P;
}
-/// getGVAlignmentLog2 - Return the alignment to use for the specified global
-/// value in log2 form. This rounds up to the preferred alignment if possible
-/// and legal.
-static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &DL,
- unsigned InBits = 0) {
- unsigned NumBits = 0;
+/// getGVAlignment - Return the alignment to use for the specified global
+/// value. This rounds up to the preferred alignment if possible and legal.
+Align AsmPrinter::getGVAlignment(const GlobalValue *GV, const DataLayout &DL,
+ Align InAlign) {
+ Align Alignment;
if (const GlobalVariable *GVar = dyn_cast<GlobalVariable>(GV))
- NumBits = DL.getPreferredAlignmentLog(GVar);
+ Alignment = Align(DL.getPreferredAlignment(GVar));
- // If InBits is specified, round it to it.
- if (InBits > NumBits)
- NumBits = InBits;
+ // If InAlign is specified, round it to it.
+ if (InAlign > Alignment)
+ Alignment = InAlign;
// If the GV has a specified alignment, take it into account.
- if (GV->getAlignment() == 0)
- return NumBits;
+ const MaybeAlign GVAlign(GV->getAlignment());
+ if (!GVAlign)
+ return Alignment;
- unsigned GVAlign = Log2_32(GV->getAlignment());
+ assert(GVAlign && "GVAlign must be set");
// If the GVAlign is larger than NumBits, or if we are required to obey
// NumBits because the GV has an assigned section, obey it.
- if (GVAlign > NumBits || GV->hasSection())
- NumBits = GVAlign;
- return NumBits;
+ if (*GVAlign > Alignment || GV->hasSection())
+ Alignment = *GVAlign;
+ return Alignment;
}
AsmPrinter::AsmPrinter(TargetMachine &tm, std::unique_ptr<MCStreamer> Streamer)
@@ -248,13 +250,14 @@ const MCSection *AsmPrinter::getCurrentSection() const {
void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
- AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<MachineOptimizationRemarkEmitterPass>();
AU.addRequired<GCModuleInfo>();
}
bool AsmPrinter::doInitialization(Module &M) {
- MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
// Initialize TargetLoweringObjectFile.
const_cast<TargetLoweringObjectFile&>(getObjFileLowering())
@@ -311,7 +314,7 @@ bool AsmPrinter::doInitialization(Module &M) {
if (MAI->doesSupportDebugInformation()) {
bool EmitCodeView = MMI->getModule()->getCodeViewFlag();
if (EmitCodeView && TM.getTargetTriple().isOSWindows()) {
- Handlers.emplace_back(llvm::make_unique<CodeViewDebug>(this),
+ Handlers.emplace_back(std::make_unique<CodeViewDebug>(this),
DbgTimerName, DbgTimerDescription,
CodeViewLineTablesGroupName,
CodeViewLineTablesGroupDescription);
@@ -380,7 +383,7 @@ bool AsmPrinter::doInitialization(Module &M) {
if (mdconst::extract_or_null<ConstantInt>(
MMI->getModule()->getModuleFlag("cfguardtable")))
- Handlers.emplace_back(llvm::make_unique<WinCFGuard>(this), CFGuardName,
+ Handlers.emplace_back(std::make_unique<WinCFGuard>(this), CFGuardName,
CFGuardDescription, DWARFGroupName,
DWARFGroupDescription);
@@ -425,7 +428,10 @@ void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const {
OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Global);
return;
case GlobalValue::PrivateLinkage:
+ return;
case GlobalValue::InternalLinkage:
+ if (MAI->hasDotLGloblDirective())
+ OutStreamer->EmitSymbolAttribute(GVSym, MCSA_LGlobal);
return;
case GlobalValue::AppendingLinkage:
case GlobalValue::AvailableExternallyLinkage:
@@ -501,7 +507,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// If the alignment is specified, we *must* obey it. Overaligning a global
// with a specified alignment is a prompt way to break globals emitted to
// sections and expected to be contiguous (e.g. ObjC metadata).
- unsigned AlignLog = getGVAlignmentLog2(GV, DL);
+ const Align Alignment = getGVAlignment(GV, DL);
for (const HandlerInfo &HI : Handlers) {
NamedRegionTimer T(HI.TimerName, HI.TimerDescription,
@@ -513,12 +519,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Handle common symbols
if (GVKind.isCommon()) {
if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
- unsigned Align = 1 << AlignLog;
- if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
- Align = 0;
-
// .comm _foo, 42, 4
- OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ const bool SupportsAlignment =
+ getObjFileLowering().getCommDirectiveSupportsAlignment();
+ OutStreamer->EmitCommonSymbol(GVSym, Size,
+ SupportsAlignment ? Alignment.value() : 0);
return;
}
@@ -531,10 +536,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
TheSection->isVirtualSection()) {
if (Size == 0)
Size = 1; // zerofill of 0 bytes is undefined.
- unsigned Align = 1 << AlignLog;
EmitLinkage(GV, GVSym);
// .zerofill __DATA, __bss, _foo, 400, 5
- OutStreamer->EmitZerofill(TheSection, GVSym, Size, Align);
+ OutStreamer->EmitZerofill(TheSection, GVSym, Size, Alignment.value());
return;
}
@@ -544,7 +548,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
getObjFileLowering().getBSSSection() == TheSection) {
if (Size == 0)
Size = 1; // .comm Foo, 0 is undefined, avoid it.
- unsigned Align = 1 << AlignLog;
// Use .lcomm only if it supports user-specified alignment.
// Otherwise, while it would still be correct to use .lcomm in some
@@ -554,17 +557,17 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Prefer to simply fall back to .local / .comm in this case.
if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) {
// .lcomm _foo, 42
- OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Align);
+ OutStreamer->EmitLocalCommonSymbol(GVSym, Size, Alignment.value());
return;
}
- if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
- Align = 0;
-
// .local _foo
OutStreamer->EmitSymbolAttribute(GVSym, MCSA_Local);
// .comm _foo, 42, 4
- OutStreamer->EmitCommonSymbol(GVSym, Size, Align);
+ const bool SupportsAlignment =
+ getObjFileLowering().getCommDirectiveSupportsAlignment();
+ OutStreamer->EmitCommonSymbol(GVSym, Size,
+ SupportsAlignment ? Alignment.value() : 0);
return;
}
@@ -585,11 +588,11 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
if (GVKind.isThreadBSS()) {
TheSection = getObjFileLowering().getTLSBSSSection();
- OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog);
+ OutStreamer->EmitTBSSSymbol(TheSection, MangSym, Size, Alignment.value());
} else if (GVKind.isThreadData()) {
OutStreamer->SwitchSection(TheSection);
- EmitAlignment(AlignLog, GV);
+ EmitAlignment(Alignment, GV);
OutStreamer->EmitLabel(MangSym);
EmitGlobalConstant(GV->getParent()->getDataLayout(),
@@ -625,7 +628,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer->SwitchSection(TheSection);
EmitLinkage(GV, EmittedInitSym);
- EmitAlignment(AlignLog, GV);
+ EmitAlignment(Alignment, GV);
OutStreamer->EmitLabel(EmittedInitSym);
@@ -664,6 +667,10 @@ void AsmPrinter::EmitFunctionHeader() {
OutStreamer->SwitchSection(getObjFileLowering().SectionForGlobal(&F, TM));
EmitVisibility(CurrentFnSym, F.getVisibility());
+ if (MAI->needsFunctionDescriptors() &&
+ F.getLinkage() != GlobalValue::InternalLinkage)
+ EmitLinkage(&F, CurrentFnDescSym);
+
EmitLinkage(&F, CurrentFnSym);
if (MAI->hasFunctionAlignment())
EmitAlignment(MF->getAlignment(), &F);
@@ -699,8 +706,13 @@ void AsmPrinter::EmitFunctionHeader() {
}
}
- // Emit the CurrentFnSym. This is a virtual function to allow targets to
- // do their wild and crazy things as required.
+ // Emit the function descriptor. This is a virtual function to allow targets
+ // to emit their specific function descriptor.
+ if (MAI->needsFunctionDescriptors())
+ EmitFunctionDescriptor();
+
+ // Emit the CurrentFnSym. This is a virtual function to allow targets to do
+ // their wild and crazy things as required.
EmitFunctionEntryLabel();
// If the function had address-taken blocks that got deleted, then we have
@@ -783,7 +795,7 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) {
/// emitImplicitDef - This method emits the specified machine instruction
/// that is an implicit def.
void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const {
- unsigned RegNo = MI->getOperand(0).getReg();
+ Register RegNo = MI->getOperand(0).getReg();
SmallString<128> Str;
raw_svector_ostream OS(Str);
@@ -910,7 +922,8 @@ static bool emitDebugLabelComment(const MachineInstr *MI, AsmPrinter &AP) {
OS << "DEBUG_LABEL: ";
const DILabel *V = MI->getDebugLabel();
- if (auto *SP = dyn_cast<DISubprogram>(V->getScope())) {
+ if (auto *SP = dyn_cast<DISubprogram>(
+ V->getScope()->getNonLexicalBlockFileScope())) {
StringRef Name = SP->getName();
if (!Name.empty())
OS << Name << ":";
@@ -1024,7 +1037,7 @@ void AsmPrinter::EmitFunctionBody() {
// Get MachineDominatorTree or compute it on the fly if it's unavailable
MDT = getAnalysisIfAvailable<MachineDominatorTree>();
if (!MDT) {
- OwnedMDT = make_unique<MachineDominatorTree>();
+ OwnedMDT = std::make_unique<MachineDominatorTree>();
OwnedMDT->getBase().recalculate(*MF);
MDT = OwnedMDT.get();
}
@@ -1032,7 +1045,7 @@ void AsmPrinter::EmitFunctionBody() {
// Get MachineLoopInfo or compute it on the fly if it's unavailable
MLI = getAnalysisIfAvailable<MachineLoopInfo>();
if (!MLI) {
- OwnedMLI = make_unique<MachineLoopInfo>();
+ OwnedMLI = std::make_unique<MachineLoopInfo>();
OwnedMLI->getBase().analyze(MDT->getBase());
MLI = OwnedMLI.get();
}
@@ -1052,9 +1065,13 @@ void AsmPrinter::EmitFunctionBody() {
++NumInstsInFunction;
}
- // If there is a pre-instruction symbol, emit a label for it here.
+ // If there is a pre-instruction symbol, emit a label for it here. If the
+ // instruction was duplicated and the label has already been emitted,
+ // don't re-emit the same label.
+ // FIXME: Consider strengthening that to an assertion.
if (MCSymbol *S = MI.getPreInstrSymbol())
- OutStreamer->EmitLabel(S);
+ if (S->isUndefined())
+ OutStreamer->EmitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1107,9 +1124,13 @@ void AsmPrinter::EmitFunctionBody() {
break;
}
- // If there is a post-instruction symbol, emit a label for it here.
+ // If there is a post-instruction symbol, emit a label for it here. If
+ // the instruction was duplicated and the label has already been emitted,
+ // don't re-emit the same label.
+ // FIXME: Consider strengthening that to an assertion.
if (MCSymbol *S = MI.getPostInstrSymbol())
- OutStreamer->EmitLabel(S);
+ if (S->isUndefined())
+ OutStreamer->EmitLabel(S);
if (ShouldPrintDebugScopes) {
for (const HandlerInfo &HI : Handlers) {
@@ -1313,11 +1334,10 @@ void AsmPrinter::emitGlobalIndirectSymbol(Module &M,
// Set the symbol type to function if the alias has a function type.
// This affects codegen when the aliasee is not a function.
- if (IsFunction) {
- OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction);
- if (isa<GlobalIFunc>(GIS))
- OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeIndFunction);
- }
+ if (IsFunction)
+ OutStreamer->EmitSymbolAttribute(Name, isa<GlobalIFunc>(GIS)
+ ? MCSA_ELF_TypeIndFunction
+ : MCSA_ELF_TypeFunction);
EmitVisibility(Name, GIS.getVisibility());
@@ -1349,60 +1369,28 @@ void AsmPrinter::emitRemarksSection(Module &M) {
RemarkStreamer *RS = M.getContext().getRemarkStreamer();
if (!RS)
return;
- const remarks::Serializer &Serializer = RS->getSerializer();
+ remarks::RemarkSerializer &RemarkSerializer = RS->getSerializer();
+
+ Optional<SmallString<128>> Filename;
+ if (Optional<StringRef> FilenameRef = RS->getFilename()) {
+ Filename = *FilenameRef;
+ sys::fs::make_absolute(*Filename);
+ assert(!Filename->empty() && "The filename can't be empty.");
+ }
+
+ std::string Buf;
+ raw_string_ostream OS(Buf);
+ std::unique_ptr<remarks::MetaSerializer> MetaSerializer =
+ Filename ? RemarkSerializer.metaSerializer(OS, StringRef(*Filename))
+ : RemarkSerializer.metaSerializer(OS);
+ MetaSerializer->emit();
// Switch to the right section: .remarks/__remarks.
MCSection *RemarksSection =
OutContext.getObjectFileInfo()->getRemarksSection();
OutStreamer->SwitchSection(RemarksSection);
- // Emit the magic number.
- OutStreamer->EmitBytes(remarks::Magic);
- // Explicitly emit a '\0'.
- OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
-
- // Emit the version number: little-endian uint64_t.
- // The version number is located at the offset 0x0 in the section.
- std::array<char, 8> Version;
- support::endian::write64le(Version.data(), remarks::Version);
- OutStreamer->EmitBinaryData(StringRef(Version.data(), Version.size()));
-
- // Emit the string table in the section.
- // Note: we need to use the streamer here to emit it in the section. We can't
- // just use the serialize function with a raw_ostream because of the way
- // MCStreamers work.
- uint64_t StrTabSize =
- Serializer.StrTab ? Serializer.StrTab->SerializedSize : 0;
- // Emit the total size of the string table (the size itself excluded):
- // little-endian uint64_t.
- // The total size is located after the version number.
- // Note: even if no string table is used, emit 0.
- std::array<char, 8> StrTabSizeBuf;
- support::endian::write64le(StrTabSizeBuf.data(), StrTabSize);
- OutStreamer->EmitBinaryData(
- StringRef(StrTabSizeBuf.data(), StrTabSizeBuf.size()));
-
- if (const Optional<remarks::StringTable> &StrTab = Serializer.StrTab) {
- std::vector<StringRef> StrTabStrings = StrTab->serialize();
- // Emit a list of null-terminated strings.
- // Note: the order is important here: the ID used in the remarks corresponds
- // to the position of the string in the section.
- for (StringRef Str : StrTabStrings) {
- OutStreamer->EmitBytes(Str);
- // Explicitly emit a '\0'.
- OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
- }
- }
-
- // Emit the null-terminated absolute path to the remark file.
- // The path is located at the offset 0x4 in the section.
- StringRef FilenameRef = RS->getFilename();
- SmallString<128> Filename = FilenameRef;
- sys::fs::make_absolute(Filename);
- assert(!Filename.empty() && "The filename can't be empty.");
- OutStreamer->EmitBytes(Filename);
- // Explicitly emit a '\0'.
- OutStreamer->EmitIntValue(/*Value=*/0, /*Size=*/1);
+ OutStreamer->EmitBinaryData(OS.str());
}
bool AsmPrinter::doFinalization(Module &M) {
@@ -1455,7 +1443,7 @@ bool AsmPrinter::doFinalization(Module &M) {
OutStreamer->SwitchSection(TLOF.getDataSection());
const DataLayout &DL = M.getDataLayout();
- EmitAlignment(Log2_32(DL.getPointerSize()));
+ EmitAlignment(Align(DL.getPointerSize()));
for (const auto &Stub : Stubs) {
OutStreamer->EmitLabel(Stub.first);
OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
@@ -1482,7 +1470,7 @@ bool AsmPrinter::doFinalization(Module &M) {
COFF::IMAGE_SCN_LNK_COMDAT,
SectionKind::getReadOnly(), Stub.first->getName(),
COFF::IMAGE_COMDAT_SELECT_ANY));
- EmitAlignment(Log2_32(DL.getPointerSize()));
+ EmitAlignment(Align(DL.getPointerSize()));
OutStreamer->EmitSymbolAttribute(Stub.first, MCSA_Global);
OutStreamer->EmitLabel(Stub.first);
OutStreamer->EmitSymbolValue(Stub.second.getPointer(),
@@ -1607,8 +1595,7 @@ bool AsmPrinter::doFinalization(Module &M) {
"expected llvm.used to be an array type");
if (const auto *A = cast<ConstantArray>(LU->getInitializer())) {
for (const Value *Op : A->operands()) {
- const auto *GV =
- cast<GlobalValue>(Op->stripPointerCastsNoFollowAliases());
+ const auto *GV = cast<GlobalValue>(Op->stripPointerCasts());
// Global symbols with internal or private linkage are not visible to
// the linker, and thus would cause an error when the linker tried to
// preserve the symbol due to the `/include:` directive.
@@ -1679,8 +1666,27 @@ MCSymbol *AsmPrinter::getCurExceptionSym() {
void AsmPrinter::SetupMachineFunction(MachineFunction &MF) {
this->MF = &MF;
+
// Get the function symbol.
- CurrentFnSym = getSymbol(&MF.getFunction());
+ if (MAI->needsFunctionDescriptors()) {
+ assert(TM.getTargetTriple().isOSAIX() && "Function descriptor is only"
+ " supported on AIX.");
+ assert(CurrentFnDescSym && "The function descriptor symbol needs to be"
+ " initalized first.");
+
+ // Get the function entry point symbol.
+ CurrentFnSym =
+ OutContext.getOrCreateSymbol("." + CurrentFnDescSym->getName());
+
+ const Function &F = MF.getFunction();
+ MCSectionXCOFF *FnEntryPointSec =
+ cast<MCSectionXCOFF>(getObjFileLowering().SectionForGlobal(&F, TM));
+ // Set the containing csect.
+ cast<MCSymbolXCOFF>(CurrentFnSym)->setContainingCsect(FnEntryPointSec);
+ } else {
+ CurrentFnSym = getSymbol(&MF.getFunction());
+ }
+
CurrentFnSymForSize = CurrentFnSym;
CurrentFnBegin = nullptr;
CurExceptionSym = nullptr;
@@ -1765,7 +1771,7 @@ void AsmPrinter::EmitConstantPool() {
if (CurSection != CPSections[i].S) {
OutStreamer->SwitchSection(CPSections[i].S);
- EmitAlignment(Log2_32(CPSections[i].Alignment));
+ EmitAlignment(Align(CPSections[i].Alignment));
CurSection = CPSections[i].S;
Offset = 0;
}
@@ -1812,7 +1818,7 @@ void AsmPrinter::EmitJumpTableInfo() {
OutStreamer->SwitchSection(ReadOnlySection);
}
- EmitAlignment(Log2_32(MJTI->getEntryAlignment(DL)));
+ EmitAlignment(Align(MJTI->getEntryAlignment(DL)));
// Jump tables in code sections are marked with a data_region directive
// where that's supported.
@@ -2025,10 +2031,10 @@ void AsmPrinter::EmitXXStructorList(const DataLayout &DL, const Constant *List,
}
// Emit the function pointers in the target-specific order
- unsigned Align = Log2_32(DL.getPointerPrefAlignment());
llvm::stable_sort(Structors, [](const Structor &L, const Structor &R) {
return L.Priority < R.Priority;
});
+ const Align Align = DL.getPointerPrefAlignment();
for (Structor &S : Structors) {
const TargetLoweringObjectFile &Obj = getObjFileLowering();
const MCSymbol *KeySym = nullptr;
@@ -2149,23 +2155,20 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset,
//===----------------------------------------------------------------------===//
// EmitAlignment - Emit an alignment directive to the specified power of
-// two boundary. For example, if you pass in 3 here, you will get an 8
-// byte alignment. If a global value is specified, and if that global has
+// two boundary. If a global value is specified, and if that global has
// an explicit alignment requested, it will override the alignment request
// if required for correctness.
-void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalObject *GV) const {
+void AsmPrinter::EmitAlignment(Align Alignment, const GlobalObject *GV) const {
if (GV)
- NumBits = getGVAlignmentLog2(GV, GV->getParent()->getDataLayout(), NumBits);
+ Alignment = getGVAlignment(GV, GV->getParent()->getDataLayout(), Alignment);
- if (NumBits == 0) return; // 1-byte aligned: no need to emit alignment.
+ if (Alignment == Align::None())
+ return; // 1-byte aligned: no need to emit alignment.
- assert(NumBits <
- static_cast<unsigned>(std::numeric_limits<unsigned>::digits) &&
- "undefined behavior");
if (getCurrentSection()->getKind().isText())
- OutStreamer->EmitCodeAlignment(1u << NumBits);
+ OutStreamer->EmitCodeAlignment(Alignment.value());
else
- OutStreamer->EmitValueToAlignment(1u << NumBits);
+ OutStreamer->EmitValueToAlignment(Alignment.value());
}
//===----------------------------------------------------------------------===//
@@ -2481,6 +2484,7 @@ static void emitGlobalConstantStruct(const DataLayout &DL,
}
static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) {
+ assert(ET && "Unknown float type");
APInt API = APF.bitcastToAPInt();
// First print a comment with what we think the original floating-point value
@@ -2488,11 +2492,7 @@ static void emitGlobalConstantFP(APFloat APF, Type *ET, AsmPrinter &AP) {
if (AP.isVerbose()) {
SmallString<8> StrVal;
APF.toString(StrVal);
-
- if (ET)
- ET->print(AP.OutStreamer->GetCommentOS());
- else
- AP.OutStreamer->GetCommentOS() << "Printing <null> Type";
+ ET->print(AP.OutStreamer->GetCommentOS());
AP.OutStreamer->GetCommentOS() << ' ' << StrVal << '\n';
}
@@ -2670,7 +2670,7 @@ static void handleIndirectSymViaGOTPCRel(AsmPrinter &AP, const MCExpr **ME,
const GlobalValue *FinalGV = dyn_cast<GlobalValue>(GV->getOperand(0));
const MCSymbol *FinalSym = AP.getSymbol(FinalGV);
*ME = AP.getObjFileLowering().getIndirectSymViaGOTPCRel(
- FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer);
+ FinalGV, FinalSym, MV, Offset, AP.MMI, *AP.OutStreamer);
// Update GOT equivalent usage information
--NumUses;
@@ -2930,7 +2930,7 @@ void AsmPrinter::setupCodePaddingContext(const MachineBasicBlock &MBB,
/// EmitBasicBlockStart - This method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing
/// it if appropriate.
-void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
+void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) {
// End the previous funclet and start a new one.
if (MBB.isEHFuncletEntry()) {
for (const HandlerInfo &HI : Handlers) {
@@ -2940,8 +2940,9 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
}
// Emit an alignment directive for this block, if needed.
- if (unsigned Align = MBB.getAlignment())
- EmitAlignment(Align);
+ const Align Alignment = MBB.getAlignment();
+ if (Alignment != Align::None())
+ EmitAlignment(Alignment);
MCCodePaddingContext Context;
setupCodePaddingContext(MBB, Context);
OutStreamer->EmitCodePaddingBasicBlockStart(Context);
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index 7721e996aca5..420df26a2b8b 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -72,7 +72,7 @@ static void srcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
unsigned AsmPrinter::addInlineAsmDiagBuffer(StringRef AsmStr,
const MDNode *LocMDNode) const {
if (!DiagInfo) {
- DiagInfo = make_unique<SrcMgrDiagInfo>();
+ DiagInfo = std::make_unique<SrcMgrDiagInfo>();
MCContext &Context = MMI->getContext();
Context.setInlineSourceManager(&DiagInfo->SrcMgr);
@@ -432,6 +432,7 @@ static void EmitGCCInlineAsmStr(const char *AsmStr, const MachineInstr *MI,
const BlockAddress *BA = MI->getOperand(OpNo).getBlockAddress();
MCSymbol *Sym = AP->GetBlockAddressSymbol(BA);
Sym->print(OS, AP->MAI);
+ MMI->getContext().registerInlineAsmLabel(Sym);
} else if (MI->getOperand(OpNo).isMBB()) {
const MCSymbol *Sym = MI->getOperand(OpNo).getMBB()->getSymbol();
Sym->print(OS, AP->MAI);
diff --git a/lib/CodeGen/AsmPrinter/ByteStreamer.h b/lib/CodeGen/AsmPrinter/ByteStreamer.h
index db2ff458eb2e..09f7496cd4ef 100644
--- a/lib/CodeGen/AsmPrinter/ByteStreamer.h
+++ b/lib/CodeGen/AsmPrinter/ByteStreamer.h
@@ -73,18 +73,18 @@ class HashingByteStreamer final : public ByteStreamer {
class BufferByteStreamer final : public ByteStreamer {
private:
SmallVectorImpl<char> &Buffer;
- SmallVectorImpl<std::string> &Comments;
+ std::vector<std::string> &Comments;
+public:
/// Only verbose textual output needs comments. This will be set to
/// true for that case, and false otherwise. If false, comments passed in to
/// the emit methods will be ignored.
- bool GenerateComments;
+ const bool GenerateComments;
-public:
BufferByteStreamer(SmallVectorImpl<char> &Buffer,
- SmallVectorImpl<std::string> &Comments,
- bool GenerateComments)
- : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {}
+ std::vector<std::string> &Comments, bool GenerateComments)
+ : Buffer(Buffer), Comments(Comments), GenerateComments(GenerateComments) {
+ }
void EmitInt8(uint8_t Byte, const Twine &Comment) override {
Buffer.push_back(Byte);
if (GenerateComments)
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index 932959c311fa..c6457f3626d1 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -98,7 +98,8 @@ using namespace llvm::codeview;
namespace {
class CVMCAdapter : public CodeViewRecordStreamer {
public:
- CVMCAdapter(MCStreamer &OS) : OS(&OS) {}
+ CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable)
+ : OS(&OS), TypeTable(TypeTable) {}
void EmitBytes(StringRef Data) { OS->EmitBytes(Data); }
@@ -110,8 +111,24 @@ public:
void AddComment(const Twine &T) { OS->AddComment(T); }
+ void AddRawComment(const Twine &T) { OS->emitRawComment(T); }
+
+ bool isVerboseAsm() { return OS->isVerboseAsm(); }
+
+ std::string getTypeName(TypeIndex TI) {
+ std::string TypeName;
+ if (!TI.isNoneType()) {
+ if (TI.isSimple())
+ TypeName = TypeIndex::simpleTypeName(TI);
+ else
+ TypeName = TypeTable.getTypeName(TI);
+ }
+ return TypeName;
+ }
+
private:
MCStreamer *OS = nullptr;
+ TypeCollection &TypeTable;
};
} // namespace
@@ -617,13 +634,6 @@ emitNullTerminatedSymbolName(MCStreamer &OS, StringRef S,
OS.EmitBytes(NullTerminatedString);
}
-static StringRef getTypeLeafName(TypeLeafKind TypeKind) {
- for (const EnumEntry<TypeLeafKind> &EE : getTypeLeafNames())
- if (EE.Value == TypeKind)
- return EE.Name;
- return "";
-}
-
void CodeViewDebug::emitTypeInformation() {
if (TypeTable.empty())
return;
@@ -632,30 +642,11 @@ void CodeViewDebug::emitTypeInformation() {
OS.SwitchSection(Asm->getObjFileLowering().getCOFFDebugTypesSection());
emitCodeViewMagicVersion();
- SmallString<8> CommentPrefix;
- if (OS.isVerboseAsm()) {
- CommentPrefix += '\t';
- CommentPrefix += Asm->MAI->getCommentString();
- CommentPrefix += ' ';
- }
-
TypeTableCollection Table(TypeTable.records());
- SmallString<512> CommentBlock;
- raw_svector_ostream CommentOS(CommentBlock);
- std::unique_ptr<ScopedPrinter> SP;
- std::unique_ptr<TypeDumpVisitor> TDV;
TypeVisitorCallbackPipeline Pipeline;
- if (OS.isVerboseAsm()) {
- // To construct block comment describing the type record for readability.
- SP = llvm::make_unique<ScopedPrinter>(CommentOS);
- SP->setPrefix(CommentPrefix);
- TDV = llvm::make_unique<TypeDumpVisitor>(Table, SP.get(), false);
- Pipeline.addCallbackToPipeline(*TDV);
- }
-
// To emit type record using Codeview MCStreamer adapter
- CVMCAdapter CVMCOS(OS);
+ CVMCAdapter CVMCOS(OS, Table);
TypeRecordMapping typeMapping(CVMCOS);
Pipeline.addCallbackToPipeline(typeMapping);
@@ -664,17 +655,6 @@ void CodeViewDebug::emitTypeInformation() {
// This will fail if the record data is invalid.
CVType Record = Table.getType(*B);
- CommentBlock.clear();
-
- auto RecordLen = Record.length();
- auto RecordKind = Record.kind();
- if (OS.isVerboseAsm())
- CVMCOS.AddComment("Record length");
- CVMCOS.EmitIntValue(RecordLen - 2, 2);
- if (OS.isVerboseAsm())
- CVMCOS.AddComment("Record kind: " + getTypeLeafName(RecordKind));
- CVMCOS.EmitIntValue(RecordKind, sizeof(RecordKind));
-
Error E = codeview::visitTypeRecord(Record, *B, Pipeline);
if (E) {
@@ -682,13 +662,6 @@ void CodeViewDebug::emitTypeInformation() {
llvm_unreachable("produced malformed type record");
}
- if (OS.isVerboseAsm()) {
- // emitRawComment will insert its own tab and comment string before
- // the first line, so strip off our first one. It also prints its own
- // newline.
- OS.emitRawComment(
- CommentOS.str().drop_front(CommentPrefix.size() - 1).rtrim());
- }
B = Table.getNext(*B);
}
}
@@ -1135,7 +1108,7 @@ void CodeViewDebug::emitDebugInfoForFunction(const Function *GV,
if (!BeginLabel->isDefined() || !EndLabel->isDefined())
continue;
- DIType *DITy = std::get<2>(HeapAllocSite);
+ const DIType *DITy = std::get<2>(HeapAllocSite);
MCSymbol *HeapAllocEnd = beginSymbolRecord(SymbolKind::S_HEAPALLOCSITE);
OS.AddComment("Call site offset");
OS.EmitCOFFSecRel32(BeginLabel, /*Offset=*/0);
@@ -1363,7 +1336,7 @@ void CodeViewDebug::beginFunctionImpl(const MachineFunction *MF) {
const TargetRegisterInfo *TRI = TSI.getRegisterInfo();
const MachineFrameInfo &MFI = MF->getFrameInfo();
const Function &GV = MF->getFunction();
- auto Insertion = FnDebugInfo.insert({&GV, llvm::make_unique<FunctionInfo>()});
+ auto Insertion = FnDebugInfo.insert({&GV, std::make_unique<FunctionInfo>()});
assert(Insertion.second && "function already has info");
CurFn = Insertion.first->second.get();
CurFn->FuncId = NextFuncId++;
@@ -2633,17 +2606,6 @@ void CodeViewDebug::emitLocalVariableList(const FunctionInfo &FI,
emitLocalVariable(FI, L);
}
-/// Only call this on endian-specific types like ulittle16_t and little32_t, or
-/// structs composed of them.
-template <typename T>
-static void copyBytesForDefRange(SmallString<20> &BytePrefix,
- SymbolKind SymKind, const T &DefRangeHeader) {
- BytePrefix.resize(2 + sizeof(T));
- ulittle16_t SymKindLE = ulittle16_t(SymKind);
- memcpy(&BytePrefix[0], &SymKindLE, 2);
- memcpy(&BytePrefix[2], &DefRangeHeader, sizeof(T));
-}
-
void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
const LocalVariable &Var) {
// LocalSym record, see SymbolRecord.h for more info.
@@ -2692,8 +2654,9 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
(bool(Flags & LocalSymFlags::IsParameter)
? (EncFP == FI.EncodedParamFramePtrReg)
: (EncFP == FI.EncodedLocalFramePtrReg))) {
- little32_t FPOffset = little32_t(Offset);
- copyBytesForDefRange(BytePrefix, S_DEFRANGE_FRAMEPOINTER_REL, FPOffset);
+ DefRangeFramePointerRelHeader DRHdr;
+ DRHdr.Offset = Offset;
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
} else {
uint16_t RegRelFlags = 0;
if (DefRange.IsSubfield) {
@@ -2701,28 +2664,27 @@ void CodeViewDebug::emitLocalVariable(const FunctionInfo &FI,
(DefRange.StructOffset
<< DefRangeRegisterRelSym::OffsetInParentShift);
}
- DefRangeRegisterRelSym::Header DRHdr;
+ DefRangeRegisterRelHeader DRHdr;
DRHdr.Register = Reg;
DRHdr.Flags = RegRelFlags;
DRHdr.BasePointerOffset = Offset;
- copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER_REL, DRHdr);
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
}
} else {
assert(DefRange.DataOffset == 0 && "unexpected offset into register");
if (DefRange.IsSubfield) {
- DefRangeSubfieldRegisterSym::Header DRHdr;
+ DefRangeSubfieldRegisterHeader DRHdr;
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
DRHdr.OffsetInParent = DefRange.StructOffset;
- copyBytesForDefRange(BytePrefix, S_DEFRANGE_SUBFIELD_REGISTER, DRHdr);
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
} else {
- DefRangeRegisterSym::Header DRHdr;
+ DefRangeRegisterHeader DRHdr;
DRHdr.Register = DefRange.CVRegister;
DRHdr.MayHaveNoName = 0;
- copyBytesForDefRange(BytePrefix, S_DEFRANGE_REGISTER, DRHdr);
+ OS.EmitCVDefRangeDirective(DefRange.Ranges, DRHdr);
}
}
- OS.EmitCVDefRangeDirective(DefRange.Ranges, BytePrefix);
}
}
@@ -2896,6 +2858,14 @@ void CodeViewDebug::endFunctionImpl(const MachineFunction *MF) {
CurFn = nullptr;
}
+// Usable locations are valid with non-zero line numbers. A line number of zero
+// corresponds to optimized code that doesn't have a distinct source location.
+// In this case, we try to use the previous or next source location depending on
+// the context.
+static bool isUsableDebugLoc(DebugLoc DL) {
+ return DL && DL.getLine() != 0;
+}
+
void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
DebugHandlerBase::beginInstruction(MI);
@@ -2907,19 +2877,21 @@ void CodeViewDebug::beginInstruction(const MachineInstr *MI) {
// If the first instruction of a new MBB has no location, find the first
// instruction with a location and use that.
DebugLoc DL = MI->getDebugLoc();
- if (!DL && MI->getParent() != PrevInstBB) {
+ if (!isUsableDebugLoc(DL) && MI->getParent() != PrevInstBB) {
for (const auto &NextMI : *MI->getParent()) {
if (NextMI.isDebugInstr())
continue;
DL = NextMI.getDebugLoc();
- if (DL)
+ if (isUsableDebugLoc(DL))
break;
}
+ // FIXME: Handle the case where the BB has no valid locations. This would
+ // probably require doing a real dataflow analysis.
}
PrevInstBB = MI->getParent();
// If we still don't have a debug location, don't record a location.
- if (!DL)
+ if (!isUsableDebugLoc(DL))
return;
maybeRecordLocation(DL, Asm->MF);
@@ -3026,7 +2998,7 @@ void CodeViewDebug::collectGlobalVariableInfo() {
auto Insertion = ScopeGlobals.insert(
{Scope, std::unique_ptr<GlobalVariableList>()});
if (Insertion.second)
- Insertion.first->second = llvm::make_unique<GlobalVariableList>();
+ Insertion.first->second = std::make_unique<GlobalVariableList>();
VariableList = Insertion.first->second.get();
} else if (GV->hasComdat())
// Emit this global variable into a COMDAT section.
diff --git a/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index ce57b789d7fa..7ffd77926cf7 100644
--- a/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -148,7 +148,8 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
SmallVector<LexicalBlock *, 1> ChildBlocks;
std::vector<std::pair<MCSymbol *, MDNode *>> Annotations;
- std::vector<std::tuple<MCSymbol *, MCSymbol *, DIType *>> HeapAllocSites;
+ std::vector<std::tuple<MCSymbol *, MCSymbol *, const DIType *>>
+ HeapAllocSites;
const MCSymbol *Begin = nullptr;
const MCSymbol *End = nullptr;
diff --git a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
index ddd60575b6c0..7f9d6c618ad3 100644
--- a/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
+++ b/lib/CodeGen/AsmPrinter/DbgEntityHistoryCalculator.cpp
@@ -41,7 +41,7 @@ using EntryIndex = DbgValueHistoryMap::EntryIndex;
static Register isDescribedByReg(const MachineInstr &MI) {
assert(MI.isDebugValue());
assert(MI.getNumOperands() == 4);
- // If the location of variable is an entry value (DW_OP_entry_value)
+ // If the location of variable is an entry value (DW_OP_LLVM_entry_value)
// do not consider it as a register location.
if (MI.getDebugExpression()->isEntryValue())
return 0;
@@ -177,13 +177,13 @@ static void handleNewDebugValue(InlinedEntity Var, const MachineInstr &DV,
IndicesToErase.push_back(Index);
Entry.endEntry(NewIndex);
}
- if (unsigned Reg = isDescribedByReg(DV))
+ if (Register Reg = isDescribedByReg(DV))
TrackedRegs[Reg] |= !Overlaps;
}
// If the new debug value is described by a register, add tracking of
// that register if it is not already tracked.
- if (unsigned NewReg = isDescribedByReg(DV)) {
+ if (Register NewReg = isDescribedByReg(DV)) {
if (!TrackedRegs.count(NewReg))
addRegDescribedVar(RegVars, NewReg, Var);
LiveEntries[Var].insert(NewIndex);
@@ -234,7 +234,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
DbgLabelInstrMap &DbgLabels) {
const TargetLowering *TLI = MF->getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- unsigned FrameReg = TRI->getFrameRegister(*MF);
+ Register FrameReg = TRI->getFrameRegister(*MF);
RegDescribedVarsMap RegVars;
DbgValueEntriesMap LiveEntries;
for (const auto &MBB : *MF) {
@@ -275,7 +275,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
continue;
// If this is a virtual register, only clobber it since it doesn't
// have aliases.
- if (TRI->isVirtualRegister(MO.getReg()))
+ if (Register::isVirtualRegister(MO.getReg()))
clobberRegisterUses(RegVars, MO.getReg(), DbgValues, LiveEntries,
MI);
// If this is a register def operand, it may end a debug value
@@ -296,7 +296,7 @@ void llvm::calculateDbgEntityHistory(const MachineFunction *MF,
// Don't consider SP to be clobbered by register masks.
for (auto It : RegVars) {
unsigned int Reg = It.first;
- if (Reg != SP && TRI->isPhysicalRegister(Reg) &&
+ if (Reg != SP && Register::isPhysicalRegister(Reg) &&
MO.clobbersPhysReg(Reg))
RegsToClobber.push_back(Reg);
}
diff --git a/lib/CodeGen/AsmPrinter/DebugLocStream.h b/lib/CodeGen/AsmPrinter/DebugLocStream.h
index 789291771b5a..0db86b09d19a 100644
--- a/lib/CodeGen/AsmPrinter/DebugLocStream.h
+++ b/lib/CodeGen/AsmPrinter/DebugLocStream.h
@@ -38,21 +38,18 @@ public:
: CU(CU), EntryOffset(EntryOffset) {}
};
struct Entry {
- const MCSymbol *BeginSym;
- const MCSymbol *EndSym;
+ const MCSymbol *Begin;
+ const MCSymbol *End;
size_t ByteOffset;
size_t CommentOffset;
- Entry(const MCSymbol *BeginSym, const MCSymbol *EndSym, size_t ByteOffset,
- size_t CommentOffset)
- : BeginSym(BeginSym), EndSym(EndSym), ByteOffset(ByteOffset),
- CommentOffset(CommentOffset) {}
};
private:
SmallVector<List, 4> Lists;
SmallVector<Entry, 32> Entries;
SmallString<256> DWARFBytes;
- SmallVector<std::string, 32> Comments;
+ std::vector<std::string> Comments;
+ MCSymbol *Sym;
/// Only verbose textual output needs comments. This will be set to
/// true for that case, and false otherwise.
@@ -63,6 +60,12 @@ public:
size_t getNumLists() const { return Lists.size(); }
const List &getList(size_t LI) const { return Lists[LI]; }
ArrayRef<List> getLists() const { return Lists; }
+ MCSymbol *getSym() const {
+ return Sym;
+ }
+ void setSym(MCSymbol *Sym) {
+ this->Sym = Sym;
+ }
class ListBuilder;
class EntryBuilder;
@@ -93,7 +96,7 @@ private:
/// Until the next call, bytes added to the stream will be added to this
/// entry.
void startEntry(const MCSymbol *BeginSym, const MCSymbol *EndSym) {
- Entries.emplace_back(BeginSym, EndSym, DWARFBytes.size(), Comments.size());
+ Entries.push_back({BeginSym, EndSym, DWARFBytes.size(), Comments.size()});
}
/// Finalize a .debug_loc entry, deleting if it's empty.
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 9548ad9918c1..a61c98ec1c18 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -208,7 +208,7 @@ void DwarfCompileUnit::addLocationAttribute(
if (!Loc) {
addToAccelTable = true;
Loc = new (DIEValueAllocator) DIELoc;
- DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
+ DwarfExpr = std::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
}
if (Expr) {
@@ -326,14 +326,13 @@ void DwarfCompileUnit::addRange(RangeSpan Range) {
// emitted into and the subprogram was contained within. If these are the
// same then extend our current range, otherwise add this as a new range.
if (CURanges.empty() || !SameAsPrevCU ||
- (&CURanges.back().getEnd()->getSection() !=
- &Range.getEnd()->getSection())) {
+ (&CURanges.back().End->getSection() !=
+ &Range.End->getSection())) {
CURanges.push_back(Range);
- DD->addSectionLabel(Range.getStart());
return;
}
- CURanges.back().setEnd(Range.getEnd());
+ CURanges.back().End = Range.End;
}
void DwarfCompileUnit::initStmtList() {
@@ -399,7 +398,7 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
} else {
const TargetRegisterInfo *RI = Asm->MF->getSubtarget().getRegisterInfo();
MachineLocation Location(RI->getFrameRegister(*Asm->MF));
- if (RI->isPhysicalRegister(Location.getReg()))
+ if (Register::isPhysicalRegister(Location.getReg()))
addAddress(*SPDie, dwarf::DW_AT_frame_base, Location);
}
}
@@ -468,14 +467,6 @@ void DwarfCompileUnit::constructScopeDIE(
void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
SmallVector<RangeSpan, 2> Range) {
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
- // Emit the offset into .debug_ranges or .debug_rnglists as a relocatable
- // label. emitDIE() will handle emitting it appropriately.
- const MCSymbol *RangeSectionSym =
- DD->getDwarfVersion() >= 5
- ? TLOF.getDwarfRnglistsSection()->getBeginSymbol()
- : TLOF.getDwarfRangesSection()->getBeginSymbol();
HasRangeLists = true;
@@ -494,12 +485,17 @@ void DwarfCompileUnit::addScopeRangeList(DIE &ScopeDIE,
// (DW_RLE_startx_endx etc.).
if (DD->getDwarfVersion() >= 5)
addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_rnglistx, Index);
- else if (isDwoUnit())
- addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
- RangeSectionSym);
- else
- addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
- RangeSectionSym);
+ else {
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ const MCSymbol *RangeSectionSym =
+ TLOF.getDwarfRangesSection()->getBeginSymbol();
+ if (isDwoUnit())
+ addSectionDelta(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ else
+ addSectionLabel(ScopeDIE, dwarf::DW_AT_ranges, List.getSym(),
+ RangeSectionSym);
+ }
}
void DwarfCompileUnit::attachRangesOrLowHighPC(
@@ -507,7 +503,7 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
if (Ranges.size() == 1 || !DD->useRangesSection()) {
const RangeSpan &Front = Ranges.front();
const RangeSpan &Back = Ranges.back();
- attachLowHighPC(Die, Front.getStart(), Back.getEnd());
+ attachLowHighPC(Die, Front.Begin, Back.End);
} else
addScopeRangeList(Die, std::move(Ranges));
}
@@ -517,8 +513,8 @@ void DwarfCompileUnit::attachRangesOrLowHighPC(
SmallVector<RangeSpan, 2> List;
List.reserve(Ranges.size());
for (const InsnRange &R : Ranges)
- List.push_back(RangeSpan(DD->getLabelBeforeInsn(R.first),
- DD->getLabelAfterInsn(R.second)));
+ List.push_back(
+ {DD->getLabelBeforeInsn(R.first), DD->getLabelAfterInsn(R.second)});
attachRangesOrLowHighPC(Die, std::move(List));
}
@@ -647,8 +643,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
int Offset = TFI->getFrameIndexReference(*Asm->MF, Fragment.FI, FrameReg);
DwarfExpr.addFragmentOffset(Expr);
SmallVector<uint64_t, 8> Ops;
- Ops.push_back(dwarf::DW_OP_plus_uconst);
- Ops.push_back(Offset);
+ DIExpression::appendOffset(Ops, Offset);
// According to
// https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
// cuda-gdb requires DW_AT_address_class for all variables to be able to
@@ -892,32 +887,117 @@ void DwarfCompileUnit::constructAbstractSubprogramScopeDIE(
ContextCU->addDIEEntry(*AbsDef, dwarf::DW_AT_object_pointer, *ObjectPointer);
}
-DIE &DwarfCompileUnit::constructCallSiteEntryDIE(DIE &ScopeDIE,
- const DISubprogram &CalleeSP,
- bool IsTail,
- const MCExpr *PCOffset) {
+/// Whether to use the GNU analog for a DWARF5 tag, attribute, or location atom.
+static bool useGNUAnalogForDwarf5Feature(DwarfDebug *DD) {
+ return DD->getDwarfVersion() == 4 && DD->tuneForGDB();
+}
+
+dwarf::Tag DwarfCompileUnit::getDwarf5OrGNUTag(dwarf::Tag Tag) const {
+ if (!useGNUAnalogForDwarf5Feature(DD))
+ return Tag;
+ switch (Tag) {
+ case dwarf::DW_TAG_call_site:
+ return dwarf::DW_TAG_GNU_call_site;
+ case dwarf::DW_TAG_call_site_parameter:
+ return dwarf::DW_TAG_GNU_call_site_parameter;
+ default:
+ llvm_unreachable("DWARF5 tag with no GNU analog");
+ }
+}
+
+dwarf::Attribute
+DwarfCompileUnit::getDwarf5OrGNUAttr(dwarf::Attribute Attr) const {
+ if (!useGNUAnalogForDwarf5Feature(DD))
+ return Attr;
+ switch (Attr) {
+ case dwarf::DW_AT_call_all_calls:
+ return dwarf::DW_AT_GNU_all_call_sites;
+ case dwarf::DW_AT_call_target:
+ return dwarf::DW_AT_GNU_call_site_target;
+ case dwarf::DW_AT_call_origin:
+ return dwarf::DW_AT_abstract_origin;
+ case dwarf::DW_AT_call_pc:
+ return dwarf::DW_AT_low_pc;
+ case dwarf::DW_AT_call_value:
+ return dwarf::DW_AT_GNU_call_site_value;
+ case dwarf::DW_AT_call_tail_call:
+ return dwarf::DW_AT_GNU_tail_call;
+ default:
+ llvm_unreachable("DWARF5 attribute with no GNU analog");
+ }
+}
+
+dwarf::LocationAtom
+DwarfCompileUnit::getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const {
+ if (!useGNUAnalogForDwarf5Feature(DD))
+ return Loc;
+ switch (Loc) {
+ case dwarf::DW_OP_entry_value:
+ return dwarf::DW_OP_GNU_entry_value;
+ default:
+ llvm_unreachable("DWARF5 location atom with no GNU analog");
+ }
+}
+
+DIE &DwarfCompileUnit::constructCallSiteEntryDIE(
+ DIE &ScopeDIE, const DISubprogram *CalleeSP, bool IsTail,
+ const MCSymbol *PCAddr, const MCExpr *PCOffset, unsigned CallReg) {
// Insert a call site entry DIE within ScopeDIE.
- DIE &CallSiteDIE =
- createAndAddDIE(dwarf::DW_TAG_call_site, ScopeDIE, nullptr);
+ DIE &CallSiteDIE = createAndAddDIE(getDwarf5OrGNUTag(dwarf::DW_TAG_call_site),
+ ScopeDIE, nullptr);
- // For the purposes of showing tail call frames in backtraces, a key piece of
- // information is DW_AT_call_origin, a pointer to the callee DIE.
- DIE *CalleeDIE = getOrCreateSubprogramDIE(&CalleeSP);
- assert(CalleeDIE && "Could not create DIE for call site entry origin");
- addDIEEntry(CallSiteDIE, dwarf::DW_AT_call_origin, *CalleeDIE);
+ if (CallReg) {
+ // Indirect call.
+ addAddress(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_target),
+ MachineLocation(CallReg));
+ } else {
+ DIE *CalleeDIE = getOrCreateSubprogramDIE(CalleeSP);
+ assert(CalleeDIE && "Could not create DIE for call site entry origin");
+ addDIEEntry(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_origin),
+ *CalleeDIE);
+ }
- if (IsTail) {
+ if (IsTail)
// Attach DW_AT_call_tail_call to tail calls for standards compliance.
- addFlag(CallSiteDIE, dwarf::DW_AT_call_tail_call);
- } else {
- // Attach the return PC to allow the debugger to disambiguate call paths
- // from one function to another.
+ addFlag(CallSiteDIE, getDwarf5OrGNUAttr(dwarf::DW_AT_call_tail_call));
+
+ // Attach the return PC to allow the debugger to disambiguate call paths
+ // from one function to another.
+ if (DD->getDwarfVersion() == 4 && DD->tuneForGDB()) {
+ assert(PCAddr && "Missing PC information for a call");
+ addLabelAddress(CallSiteDIE, dwarf::DW_AT_low_pc, PCAddr);
+ } else if (!IsTail || DD->tuneForGDB()) {
assert(PCOffset && "Missing return PC information for a call");
addAddressExpr(CallSiteDIE, dwarf::DW_AT_call_return_pc, PCOffset);
}
+
return CallSiteDIE;
}
+void DwarfCompileUnit::constructCallSiteParmEntryDIEs(
+ DIE &CallSiteDIE, SmallVector<DbgCallSiteParam, 4> &Params) {
+ for (const auto &Param : Params) {
+ unsigned Register = Param.getRegister();
+ auto CallSiteDieParam =
+ DIE::get(DIEValueAllocator,
+ getDwarf5OrGNUTag(dwarf::DW_TAG_call_site_parameter));
+ insertDIE(CallSiteDieParam);
+ addAddress(*CallSiteDieParam, dwarf::DW_AT_location,
+ MachineLocation(Register));
+
+ DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+ DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+ DwarfExpr.setCallSiteParamValueFlag();
+
+ DwarfDebug::emitDebugLocValue(*Asm, nullptr, Param.getValue(), DwarfExpr);
+
+ addBlock(*CallSiteDieParam, getDwarf5OrGNUAttr(dwarf::DW_AT_call_value),
+ DwarfExpr.finalize());
+
+ CallSiteDIE.addChild(CallSiteDieParam);
+ }
+}
+
DIE *DwarfCompileUnit::constructImportedEntityDIE(
const DIImportedEntity *Module) {
DIE *IMDie = DIE::get(DIEValueAllocator, (dwarf::Tag)Module->getTag());
@@ -997,11 +1077,11 @@ void DwarfCompileUnit::createAbstractEntity(const DINode *Node,
assert(Scope && Scope->isAbstractScope());
auto &Entity = getAbstractEntities()[Node];
if (isa<const DILocalVariable>(Node)) {
- Entity = llvm::make_unique<DbgVariable>(
+ Entity = std::make_unique<DbgVariable>(
cast<const DILocalVariable>(Node), nullptr /* IA */);;
DU->addScopeVariable(Scope, cast<DbgVariable>(Entity.get()));
} else if (isa<const DILabel>(Node)) {
- Entity = llvm::make_unique<DbgLabel>(
+ Entity = std::make_unique<DbgLabel>(
cast<const DILabel>(Node), nullptr /* IA */);
DU->addScopeLabel(Scope, cast<DbgLabel>(Entity.get()));
}
@@ -1081,16 +1161,8 @@ void DwarfCompileUnit::addGlobalTypeUnitType(const DIType *Ty,
GlobalTypes.insert(std::make_pair(std::move(FullName), &getUnitDie()));
}
-/// addVariableAddress - Add DW_AT_location attribute for a
-/// DbgVariable based on provided MachineLocation.
void DwarfCompileUnit::addVariableAddress(const DbgVariable &DV, DIE &Die,
MachineLocation Location) {
- // addBlockByrefAddress is obsolete and will be removed soon.
- // The clang frontend always generates block byref variables with a
- // complex expression that encodes exactly what addBlockByrefAddress
- // would do.
- assert((!DV.isBlockByrefVariable() || DV.hasComplexAddress()) &&
- "block byref variable without a complex expression");
if (DV.hasComplexAddress())
addComplexAddress(DV, Die, dwarf::DW_AT_location, Location);
else
@@ -1133,7 +1205,7 @@ void DwarfCompileUnit::addComplexAddress(const DbgVariable &DV, DIE &Die,
if (DIExpr->isEntryValue()) {
DwarfExpr.setEntryValueFlag();
- DwarfExpr.addEntryValueExpression(Cursor);
+ DwarfExpr.beginEntryValueExpression(Cursor);
}
const TargetRegisterInfo &TRI = *Asm->MF->getSubtarget().getRegisterInfo();
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
index ea980dfda17e..1b7ea2673ac0 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h
@@ -227,12 +227,35 @@ public:
void constructAbstractSubprogramScopeDIE(LexicalScope *Scope);
+ /// This takes a DWARF 5 tag and returns it or a GNU analog.
+ dwarf::Tag getDwarf5OrGNUTag(dwarf::Tag Tag) const;
+
+ /// This takes a DWARF 5 attribute and returns it or a GNU analog.
+ dwarf::Attribute getDwarf5OrGNUAttr(dwarf::Attribute Attr) const;
+
+ /// This takes a DWARF 5 location atom and either returns it or a GNU analog.
+ dwarf::LocationAtom getDwarf5OrGNULocationAtom(dwarf::LocationAtom Loc) const;
+
/// Construct a call site entry DIE describing a call within \p Scope to a
- /// callee described by \p CalleeSP. \p IsTail specifies whether the call is
- /// a tail call. \p PCOffset must be non-zero for non-tail calls or be the
+ /// callee described by \p CalleeSP.
+ /// \p IsTail specifies whether the call is a tail call.
+ /// \p PCAddr (used for GDB + DWARF 4 tuning) points to the PC value after
+ /// the call instruction.
+ /// \p PCOffset (used for cases other than GDB + DWARF 4 tuning) must be
+ /// non-zero for non-tail calls (in the case of non-gdb tuning, since for
+ /// GDB + DWARF 5 tuning we still generate PC info for tail calls) or be the
/// function-local offset to PC value after the call instruction.
- DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram &CalleeSP,
- bool IsTail, const MCExpr *PCOffset);
+ /// \p CallReg is a register location for an indirect call. For direct calls
+ /// the \p CallReg is set to 0.
+ DIE &constructCallSiteEntryDIE(DIE &ScopeDIE, const DISubprogram *CalleeSP,
+ bool IsTail, const MCSymbol *PCAddr,
+ const MCExpr *PCOffset, unsigned CallReg);
+ /// Construct call site parameter DIEs for the \p CallSiteDIE. The \p Params
+ /// were collected by the \ref collectCallSiteParameters.
+ /// Note: The order of parameters does not matter, since debuggers recognize
+ /// call site parameters by the DW_AT_location attribute.
+ void constructCallSiteParmEntryDIEs(DIE &CallSiteDIE,
+ SmallVector<DbgCallSiteParam, 4> &Params);
/// Construct import_module DIE.
DIE *constructImportedEntityDIE(const DIImportedEntity *Module);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 71bb2b0858cc..c505e77e5acd 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/Triple.h"
#include "llvm/ADT/Twine.h"
#include "llvm/BinaryFormat/Dwarf.h"
@@ -39,6 +40,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
@@ -83,6 +85,8 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
+STATISTIC(NumCSParams, "Number of dbg call site params created");
+
static cl::opt<bool>
DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden,
cl::desc("Disable debug info printing"));
@@ -166,26 +170,26 @@ static const char *const DbgTimerDescription = "DWARF Debug Writer";
static constexpr unsigned ULEB128PadSize = 4;
void DebugLocDwarfExpression::emitOp(uint8_t Op, const char *Comment) {
- BS.EmitInt8(
+ getActiveStreamer().EmitInt8(
Op, Comment ? Twine(Comment) + " " + dwarf::OperationEncodingString(Op)
: dwarf::OperationEncodingString(Op));
}
void DebugLocDwarfExpression::emitSigned(int64_t Value) {
- BS.EmitSLEB128(Value, Twine(Value));
+ getActiveStreamer().EmitSLEB128(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitUnsigned(uint64_t Value) {
- BS.EmitULEB128(Value, Twine(Value));
+ getActiveStreamer().EmitULEB128(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitData1(uint8_t Value) {
- BS.EmitInt8(Value, Twine(Value));
+ getActiveStreamer().EmitInt8(Value, Twine(Value));
}
void DebugLocDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
assert(Idx < (1ULL << (ULEB128PadSize * 7)) && "Idx wont fit");
- BS.EmitULEB128(Idx, Twine(Idx), ULEB128PadSize);
+ getActiveStreamer().EmitULEB128(Idx, Twine(Idx), ULEB128PadSize);
}
bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
@@ -194,54 +198,34 @@ bool DebugLocDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
return false;
}
-bool DbgVariable::isBlockByrefVariable() const {
- assert(getVariable() && "Invalid complex DbgVariable!");
- return getVariable()->getType()->isBlockByrefStruct();
+void DebugLocDwarfExpression::enableTemporaryBuffer() {
+ assert(!IsBuffering && "Already buffering?");
+ if (!TmpBuf)
+ TmpBuf = std::make_unique<TempBuffer>(OutBS.GenerateComments);
+ IsBuffering = true;
}
-const DIType *DbgVariable::getType() const {
- DIType *Ty = getVariable()->getType();
- // FIXME: isBlockByrefVariable should be reformulated in terms of complex
- // addresses instead.
- if (Ty->isBlockByrefStruct()) {
- /* Byref variables, in Blocks, are declared by the programmer as
- "SomeType VarName;", but the compiler creates a
- __Block_byref_x_VarName struct, and gives the variable VarName
- either the struct, or a pointer to the struct, as its type. This
- is necessary for various behind-the-scenes things the compiler
- needs to do with by-reference variables in blocks.
-
- However, as far as the original *programmer* is concerned, the
- variable should still have type 'SomeType', as originally declared.
-
- The following function dives into the __Block_byref_x_VarName
- struct to find the original type of the variable. This will be
- passed back to the code generating the type for the Debug
- Information Entry for the variable 'VarName'. 'VarName' will then
- have the original type 'SomeType' in its debug information.
-
- The original type 'SomeType' will be the type of the field named
- 'VarName' inside the __Block_byref_x_VarName struct.
-
- NOTE: In order for this to not completely fail on the debugger
- side, the Debug Information Entry for the variable VarName needs to
- have a DW_AT_location that tells the debugger how to unwind through
- the pointers and __Block_byref_x_VarName struct to find the actual
- value of the variable. The function addBlockByrefType does this. */
- DIType *subType = Ty;
- uint16_t tag = Ty->getTag();
-
- if (tag == dwarf::DW_TAG_pointer_type)
- subType = cast<DIDerivedType>(Ty)->getBaseType();
-
- auto Elements = cast<DICompositeType>(subType)->getElements();
- for (unsigned i = 0, N = Elements.size(); i < N; ++i) {
- auto *DT = cast<DIDerivedType>(Elements[i]);
- if (getName() == DT->getName())
- return DT->getBaseType();
- }
+void DebugLocDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }
+
+unsigned DebugLocDwarfExpression::getTemporaryBufferSize() {
+ return TmpBuf ? TmpBuf->Bytes.size() : 0;
+}
+
+void DebugLocDwarfExpression::commitTemporaryBuffer() {
+ if (!TmpBuf)
+ return;
+ for (auto Byte : enumerate(TmpBuf->Bytes)) {
+ const char *Comment = (Byte.index() < TmpBuf->Comments.size())
+ ? TmpBuf->Comments[Byte.index()].c_str()
+ : "";
+ OutBS.EmitInt8(Byte.value(), Comment);
}
- return Ty;
+ TmpBuf->Bytes.clear();
+ TmpBuf->Comments.clear();
+}
+
+const DIType *DbgVariable::getType() const {
+ return getVariable()->getType();
}
/// Get .debug_loc entry for the instruction range starting at MI.
@@ -275,7 +259,7 @@ void DbgVariable::initializeDbgValue(const MachineInstr *DbgValue) {
assert(getInlinedAt() == DbgValue->getDebugLoc()->getInlinedAt() &&
"Wrong inlined-at");
- ValueLoc = llvm::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
+ ValueLoc = std::make_unique<DbgValueLoc>(getDebugLocValue(DbgValue));
if (auto *E = DbgValue->getDebugExpression())
if (E->getNumElements())
FrameIndexExprs.push_back({0, E});
@@ -551,6 +535,157 @@ void DwarfDebug::constructAbstractSubprogramScopeDIE(DwarfCompileUnit &SrcCU,
}
}
+/// Try to interpret values loaded into registers that forward parameters
+/// for \p CallMI. Store parameters with interpreted value into \p Params.
+static void collectCallSiteParameters(const MachineInstr *CallMI,
+ ParamSet &Params) {
+ auto *MF = CallMI->getMF();
+ auto CalleesMap = MF->getCallSitesInfo();
+ auto CallFwdRegsInfo = CalleesMap.find(CallMI);
+
+ // There is no information for the call instruction.
+ if (CallFwdRegsInfo == CalleesMap.end())
+ return;
+
+ auto *MBB = CallMI->getParent();
+ const auto &TRI = MF->getSubtarget().getRegisterInfo();
+ const auto &TII = MF->getSubtarget().getInstrInfo();
+ const auto &TLI = MF->getSubtarget().getTargetLowering();
+
+ // Skip the call instruction.
+ auto I = std::next(CallMI->getReverseIterator());
+
+ DenseSet<unsigned> ForwardedRegWorklist;
+ // Add all the forwarding registers into the ForwardedRegWorklist.
+ for (auto ArgReg : CallFwdRegsInfo->second) {
+ bool InsertedReg = ForwardedRegWorklist.insert(ArgReg.Reg).second;
+ assert(InsertedReg && "Single register used to forward two arguments?");
+ (void)InsertedReg;
+ }
+
+ // We erase, from the ForwardedRegWorklist, those forwarding registers for
+ // which we successfully describe a loaded value (by using
+ // the describeLoadedValue()). For those remaining arguments in the working
+ // list, for which we do not describe a loaded value by
+ // the describeLoadedValue(), we try to generate an entry value expression
+ // for their call site value desctipion, if the call is within the entry MBB.
+ // The RegsForEntryValues maps a forwarding register into the register holding
+ // the entry value.
+ // TODO: Handle situations when call site parameter value can be described
+ // as the entry value within basic blocks other then the first one.
+ bool ShouldTryEmitEntryVals = MBB->getIterator() == MF->begin();
+ DenseMap<unsigned, unsigned> RegsForEntryValues;
+
+ // If the MI is an instruction defining one or more parameters' forwarding
+ // registers, add those defines. We can currently only describe forwarded
+ // registers that are explicitly defined, but keep track of implicit defines
+ // also to remove those registers from the work list.
+ auto getForwardingRegsDefinedByMI = [&](const MachineInstr &MI,
+ SmallVectorImpl<unsigned> &Explicit,
+ SmallVectorImpl<unsigned> &Implicit) {
+ if (MI.isDebugInstr())
+ return;
+
+ for (const MachineOperand &MO : MI.operands()) {
+ if (MO.isReg() && MO.isDef() &&
+ Register::isPhysicalRegister(MO.getReg())) {
+ for (auto FwdReg : ForwardedRegWorklist) {
+ if (TRI->regsOverlap(FwdReg, MO.getReg())) {
+ if (MO.isImplicit())
+ Implicit.push_back(FwdReg);
+ else
+ Explicit.push_back(FwdReg);
+ break;
+ }
+ }
+ }
+ }
+ };
+
+ auto finishCallSiteParam = [&](DbgValueLoc DbgLocVal, unsigned Reg) {
+ unsigned FwdReg = Reg;
+ if (ShouldTryEmitEntryVals) {
+ auto EntryValReg = RegsForEntryValues.find(Reg);
+ if (EntryValReg != RegsForEntryValues.end())
+ FwdReg = EntryValReg->second;
+ }
+
+ DbgCallSiteParam CSParm(FwdReg, DbgLocVal);
+ Params.push_back(CSParm);
+ ++NumCSParams;
+ };
+
+ // Search for a loading value in forwaring registers.
+ for (; I != MBB->rend(); ++I) {
+ // If the next instruction is a call we can not interpret parameter's
+ // forwarding registers or we finished the interpretation of all parameters.
+ if (I->isCall())
+ return;
+
+ if (ForwardedRegWorklist.empty())
+ return;
+
+ SmallVector<unsigned, 4> ExplicitFwdRegDefs;
+ SmallVector<unsigned, 4> ImplicitFwdRegDefs;
+ getForwardingRegsDefinedByMI(*I, ExplicitFwdRegDefs, ImplicitFwdRegDefs);
+ if (ExplicitFwdRegDefs.empty() && ImplicitFwdRegDefs.empty())
+ continue;
+
+ // If the MI clobbers more then one forwarding register we must remove
+ // all of them from the working list.
+ for (auto Reg : concat<unsigned>(ExplicitFwdRegDefs, ImplicitFwdRegDefs))
+ ForwardedRegWorklist.erase(Reg);
+
+ // The describeLoadedValue() hook currently does not have any information
+ // about which register it should describe in case of multiple defines, so
+ // for now we only handle instructions where a forwarded register is (at
+ // least partially) defined by the instruction's single explicit define.
+ if (I->getNumExplicitDefs() != 1 || ExplicitFwdRegDefs.empty())
+ continue;
+ unsigned Reg = ExplicitFwdRegDefs[0];
+
+ if (auto ParamValue = TII->describeLoadedValue(*I)) {
+ if (ParamValue->first.isImm()) {
+ int64_t Val = ParamValue->first.getImm();
+ DbgValueLoc DbgLocVal(ParamValue->second, Val);
+ finishCallSiteParam(DbgLocVal, Reg);
+ } else if (ParamValue->first.isReg()) {
+ Register RegLoc = ParamValue->first.getReg();
+ unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
+ Register FP = TRI->getFrameRegister(*MF);
+ bool IsSPorFP = (RegLoc == SP) || (RegLoc == FP);
+ if (TRI->isCalleeSavedPhysReg(RegLoc, *MF) || IsSPorFP) {
+ DbgValueLoc DbgLocVal(ParamValue->second,
+ MachineLocation(RegLoc,
+ /*IsIndirect=*/IsSPorFP));
+ finishCallSiteParam(DbgLocVal, Reg);
+ } else if (ShouldTryEmitEntryVals) {
+ ForwardedRegWorklist.insert(RegLoc);
+ RegsForEntryValues[RegLoc] = Reg;
+ }
+ }
+ }
+ }
+
+ // Emit the call site parameter's value as an entry value.
+ if (ShouldTryEmitEntryVals) {
+ // Create an expression where the register's entry value is used.
+ DIExpression *EntryExpr = DIExpression::get(
+ MF->getFunction().getContext(), {dwarf::DW_OP_LLVM_entry_value, 1});
+ for (auto RegEntry : ForwardedRegWorklist) {
+ unsigned FwdReg = RegEntry;
+ auto EntryValReg = RegsForEntryValues.find(RegEntry);
+ if (EntryValReg != RegsForEntryValues.end())
+ FwdReg = EntryValReg->second;
+
+ DbgValueLoc DbgLocVal(EntryExpr, MachineLocation(RegEntry));
+ DbgCallSiteParam CSParm(FwdReg, DbgLocVal);
+ Params.push_back(CSParm);
+ ++NumCSParams;
+ }
+ }
+}
+
void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
DwarfCompileUnit &CU, DIE &ScopeDIE,
const MachineFunction &MF) {
@@ -563,10 +698,11 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
// for both tail and non-tail calls. Don't use DW_AT_call_all_source_calls
// because one of its requirements is not met: call site entries for
// optimized-out calls are elided.
- CU.addFlag(ScopeDIE, dwarf::DW_AT_call_all_calls);
+ CU.addFlag(ScopeDIE, CU.getDwarf5OrGNUAttr(dwarf::DW_AT_call_all_calls));
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
assert(TII && "TargetInstrInfo not found: cannot label tail calls");
+ bool ApplyGNUExtensions = getDwarfVersion() == 4 && tuneForGDB();
// Emit call site entries for each call or tail call in the function.
for (const MachineBasicBlock &MBB : MF) {
@@ -581,30 +717,66 @@ void DwarfDebug::constructCallSiteEntryDIEs(const DISubprogram &SP,
return;
// If this is a direct call, find the callee's subprogram.
+ // In the case of an indirect call find the register that holds
+ // the callee.
const MachineOperand &CalleeOp = MI.getOperand(0);
- if (!CalleeOp.isGlobal())
- continue;
- const Function *CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
- if (!CalleeDecl || !CalleeDecl->getSubprogram())
+ if (!CalleeOp.isGlobal() && !CalleeOp.isReg())
continue;
+ unsigned CallReg = 0;
+ const DISubprogram *CalleeSP = nullptr;
+ const Function *CalleeDecl = nullptr;
+ if (CalleeOp.isReg()) {
+ CallReg = CalleeOp.getReg();
+ if (!CallReg)
+ continue;
+ } else {
+ CalleeDecl = dyn_cast<Function>(CalleeOp.getGlobal());
+ if (!CalleeDecl || !CalleeDecl->getSubprogram())
+ continue;
+ CalleeSP = CalleeDecl->getSubprogram();
+ }
+
// TODO: Omit call site entries for runtime calls (objc_msgSend, etc).
- // TODO: Add support for indirect calls.
bool IsTail = TII->isTailCall(MI);
- // For tail calls, no return PC information is needed. For regular calls,
- // the return PC is needed to disambiguate paths in the call graph which
- // could lead to some target function.
+ // For tail calls, for non-gdb tuning, no return PC information is needed.
+ // For regular calls (and tail calls in GDB tuning), the return PC
+ // is needed to disambiguate paths in the call graph which could lead to
+ // some target function.
const MCExpr *PCOffset =
- IsTail ? nullptr : getFunctionLocalOffsetAfterInsn(&MI);
+ (IsTail && !tuneForGDB()) ? nullptr
+ : getFunctionLocalOffsetAfterInsn(&MI);
+
+ // Address of a call-like instruction for a normal call or a jump-like
+ // instruction for a tail call. This is needed for GDB + DWARF 4 tuning.
+ const MCSymbol *PCAddr =
+ ApplyGNUExtensions ? const_cast<MCSymbol*>(getLabelAfterInsn(&MI))
+ : nullptr;
+
+ assert((IsTail || PCOffset || PCAddr) &&
+ "Call without return PC information");
- assert((IsTail || PCOffset) && "Call without return PC information");
LLVM_DEBUG(dbgs() << "CallSiteEntry: " << MF.getName() << " -> "
- << CalleeDecl->getName() << (IsTail ? " [tail]" : "")
- << "\n");
- CU.constructCallSiteEntryDIE(ScopeDIE, *CalleeDecl->getSubprogram(),
- IsTail, PCOffset);
+ << (CalleeDecl ? CalleeDecl->getName()
+ : StringRef(MF.getSubtarget()
+ .getRegisterInfo()
+ ->getName(CallReg)))
+ << (IsTail ? " [IsTail]" : "") << "\n");
+
+ DIE &CallSiteDIE =
+ CU.constructCallSiteEntryDIE(ScopeDIE, CalleeSP, IsTail, PCAddr,
+ PCOffset, CallReg);
+
+ // GDB and LLDB support call site parameter debug info.
+ if (Asm->TM.Options.EnableDebugEntryValues &&
+ (tuneForGDB() || tuneForLLDB())) {
+ ParamSet Params;
+ // Try to interpret values of call site parameters.
+ collectCallSiteParameters(&MI, Params);
+ CU.constructCallSiteParmEntryDIEs(CallSiteDIE, Params);
+ }
}
}
}
@@ -680,7 +852,7 @@ DwarfDebug::getOrCreateDwarfCompileUnit(const DICompileUnit *DIUnit) {
CompilationDir = DIUnit->getDirectory();
- auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
+ auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
InfoHolder.getUnits().size(), DIUnit, Asm, this, &InfoHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
InfoHolder.addUnit(std::move(OwnedUnit));
@@ -793,8 +965,6 @@ void DwarfDebug::beginModule() {
DwarfFile &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder;
Holder.setRnglistsTableBaseSym(
Asm->createTempSymbol("rnglists_table_base"));
- Holder.setLoclistsTableBaseSym(
- Asm->createTempSymbol("loclists_table_base"));
if (useSplitDwarf())
InfoHolder.setRnglistsTableBaseSym(
@@ -907,7 +1077,7 @@ void DwarfDebug::finalizeModuleInfo() {
// If we're splitting the dwarf out now that we've got the entire
// CU then add the dwo id to it.
auto *SkCU = TheCU.getSkeleton();
- if (useSplitDwarf() && !empty(TheCU.getUnitDie().children())) {
+ if (useSplitDwarf() && !TheCU.getUnitDie().children().empty()) {
finishUnitAttributes(TheCU.getCUNode(), TheCU);
TheCU.addString(TheCU.getUnitDie(), dwarf::DW_AT_GNU_dwo_name,
Asm->TM.Options.MCOptions.SplitDwarfFile);
@@ -951,7 +1121,7 @@ void DwarfDebug::finalizeModuleInfo() {
// 2.17.3).
U.addUInt(U.getUnitDie(), dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0);
else
- U.setBaseAddress(TheCU.getRanges().front().getStart());
+ U.setBaseAddress(TheCU.getRanges().front().Begin);
U.attachRangesOrLowHighPC(U.getUnitDie(), TheCU.takeRanges());
}
@@ -959,15 +1129,19 @@ void DwarfDebug::finalizeModuleInfo() {
// is a bit pessimistic under LTO.
if (!AddrPool.isEmpty() &&
(getDwarfVersion() >= 5 ||
- (SkCU && !empty(TheCU.getUnitDie().children()))))
+ (SkCU && !TheCU.getUnitDie().children().empty())))
U.addAddrTableBase();
if (getDwarfVersion() >= 5) {
if (U.hasRangeLists())
U.addRnglistsBase();
- if (!DebugLocs.getLists().empty() && !useSplitDwarf())
- U.addLoclistsBase();
+ if (!DebugLocs.getLists().empty() && !useSplitDwarf()) {
+ DebugLocs.setSym(Asm->createTempSymbol("loclists_table_base"));
+ U.addSectionLabel(U.getUnitDie(), dwarf::DW_AT_loclists_base,
+ DebugLocs.getSym(),
+ TLOF.getDwarfLoclistsSection()->getBeginSymbol());
+ }
}
auto *CUNode = cast<DICompileUnit>(P.first);
@@ -1105,7 +1279,7 @@ void DwarfDebug::collectVariableInfoFromMFTable(
continue;
ensureAbstractEntityIsCreatedIfScoped(TheCU, Var.first, Scope->getScopeNode());
- auto RegVar = llvm::make_unique<DbgVariable>(
+ auto RegVar = std::make_unique<DbgVariable>(
cast<DILocalVariable>(Var.first), Var.second);
RegVar->initializeMMI(VI.Expr, VI.Slot);
if (DbgVariable *DbgVar = MFVars.lookup(Var))
@@ -1316,13 +1490,13 @@ DbgEntity *DwarfDebug::createConcreteEntity(DwarfCompileUnit &TheCU,
ensureAbstractEntityIsCreatedIfScoped(TheCU, Node, Scope.getScopeNode());
if (isa<const DILocalVariable>(Node)) {
ConcreteEntities.push_back(
- llvm::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
+ std::make_unique<DbgVariable>(cast<const DILocalVariable>(Node),
Location));
InfoHolder.addScopeVariable(&Scope,
cast<DbgVariable>(ConcreteEntities.back().get()));
} else if (isa<const DILabel>(Node)) {
ConcreteEntities.push_back(
- llvm::make_unique<DbgLabel>(cast<const DILabel>(Node),
+ std::make_unique<DbgLabel>(cast<const DILabel>(Node),
Location, Sym));
InfoHolder.addScopeLabel(&Scope,
cast<DbgLabel>(ConcreteEntities.back().get()));
@@ -1419,11 +1593,14 @@ void DwarfDebug::collectEntityInfo(DwarfCompileUnit &TheCU,
LexicalScope *Scope = nullptr;
const DILabel *Label = cast<DILabel>(IL.first);
+ // The scope could have an extra lexical block file.
+ const DILocalScope *LocalScope =
+ Label->getScope()->getNonLexicalBlockFileScope();
// Get inlined DILocation if it is inlined label.
if (const DILocation *IA = IL.second)
- Scope = LScopes.findInlinedScope(Label->getScope(), IA);
+ Scope = LScopes.findInlinedScope(LocalScope, IA);
else
- Scope = LScopes.findLexicalScope(Label->getScope());
+ Scope = LScopes.findLexicalScope(LocalScope);
// If label scope is not found then skip this label.
if (!Scope)
continue;
@@ -1607,6 +1784,9 @@ void DwarfDebug::beginFunctionImpl(const MachineFunction *MF) {
if (SP->getUnit()->getEmissionKind() == DICompileUnit::NoDebug)
return;
+ SectionLabels.insert(std::make_pair(&Asm->getFunctionBegin()->getSection(),
+ Asm->getFunctionBegin()));
+
DwarfCompileUnit &CU = getOrCreateDwarfCompileUnit(SP->getUnit());
// Set DwarfDwarfCompileUnitID in MCContext to the Compile Unit this function
@@ -1654,7 +1834,7 @@ void DwarfDebug::endFunctionImpl(const MachineFunction *MF) {
collectEntityInfo(TheCU, SP, Processed);
// Add the range of this function to the list of ranges for the CU.
- TheCU.addRange(RangeSpan(Asm->getFunctionBegin(), Asm->getFunctionEnd()));
+ TheCU.addRange({Asm->getFunctionBegin(), Asm->getFunctionEnd()});
// Under -gmlt, skip building the subprogram if there are no inlined
// subroutines inside it. But with -fdebug-info-for-profiling, the subprogram
@@ -1836,9 +2016,10 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU,
case dwarf::DW_TAG_union_type:
case dwarf::DW_TAG_enumeration_type:
return dwarf::PubIndexEntryDescriptor(
- dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus
- ? dwarf::GIEL_STATIC
- : dwarf::GIEL_EXTERNAL);
+ dwarf::GIEK_TYPE,
+ dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage())
+ ? dwarf::GIEL_EXTERNAL
+ : dwarf::GIEL_STATIC);
case dwarf::DW_TAG_typedef:
case dwarf::DW_TAG_base_type:
case dwarf::DW_TAG_subrange_type:
@@ -1967,7 +2148,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
DWARFExpression Expr(Data, getDwarfVersion(), PtrSize);
using Encoding = DWARFExpression::Operation::Encoding;
- uint32_t Offset = 0;
+ uint64_t Offset = 0;
for (auto &Op : Expr) {
assert(Op.getCode() != dwarf::DW_OP_const_type &&
"3 operand ops not yet supported");
@@ -1990,7 +2171,7 @@ void DwarfDebug::emitDebugLocEntry(ByteStreamer &Streamer,
if (Comment != End)
Comment++;
} else {
- for (uint32_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
+ for (uint64_t J = Offset; J < Op.getOperandEndOffset(I); ++J)
Streamer.EmitInt8(Data.getData()[J], Comment != End ? *(Comment++) : "");
}
Offset = Op.getOperandEndOffset(I);
@@ -2020,7 +2201,7 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
if (DIExpr->isEntryValue()) {
DwarfExpr.setEntryValueFlag();
- DwarfExpr.addEntryValueExpression(Cursor);
+ DwarfExpr.beginEntryValueExpression(Cursor);
}
const TargetRegisterInfo &TRI = *AP.MF->getSubtarget().getRegisterInfo();
@@ -2083,7 +2264,7 @@ void DwarfDebug::emitDebugLocEntryLocation(const DebugLocStream::Entry &Entry,
}
// Emit the common part of the DWARF 5 range/locations list tables header.
-static void emitListsTableHeaderStart(AsmPrinter *Asm, const DwarfFile &Holder,
+static void emitListsTableHeaderStart(AsmPrinter *Asm,
MCSymbol *TableStart,
MCSymbol *TableEnd) {
// Build the table header, which starts with the length field.
@@ -2108,7 +2289,7 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
const DwarfFile &Holder) {
MCSymbol *TableStart = Asm->createTempSymbol("debug_rnglist_table_start");
MCSymbol *TableEnd = Asm->createTempSymbol("debug_rnglist_table_end");
- emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+ emitListsTableHeaderStart(Asm, TableStart, TableEnd);
Asm->OutStreamer->AddComment("Offset entry count");
Asm->emitInt32(Holder.getRangeLists().size());
@@ -2125,94 +2306,147 @@ static MCSymbol *emitRnglistsTableHeader(AsmPrinter *Asm,
// designates the end of the table for the caller to emit when the table is
// complete.
static MCSymbol *emitLoclistsTableHeader(AsmPrinter *Asm,
- const DwarfFile &Holder) {
+ const DwarfDebug &DD) {
MCSymbol *TableStart = Asm->createTempSymbol("debug_loclist_table_start");
MCSymbol *TableEnd = Asm->createTempSymbol("debug_loclist_table_end");
- emitListsTableHeaderStart(Asm, Holder, TableStart, TableEnd);
+ emitListsTableHeaderStart(Asm, TableStart, TableEnd);
+
+ const auto &DebugLocs = DD.getDebugLocs();
// FIXME: Generate the offsets table and use DW_FORM_loclistx with the
// DW_AT_loclists_base attribute. Until then set the number of offsets to 0.
Asm->OutStreamer->AddComment("Offset entry count");
Asm->emitInt32(0);
- Asm->OutStreamer->EmitLabel(Holder.getLoclistsTableBaseSym());
+ Asm->OutStreamer->EmitLabel(DebugLocs.getSym());
return TableEnd;
}
-// Emit locations into the .debug_loc/.debug_rnglists section.
-void DwarfDebug::emitDebugLoc() {
- if (DebugLocs.getLists().empty())
- return;
+template <typename Ranges, typename PayloadEmitter>
+static void emitRangeList(
+ DwarfDebug &DD, AsmPrinter *Asm, MCSymbol *Sym, const Ranges &R,
+ const DwarfCompileUnit &CU, unsigned BaseAddressx, unsigned OffsetPair,
+ unsigned StartxLength, unsigned EndOfList,
+ StringRef (*StringifyEnum)(unsigned),
+ bool ShouldUseBaseAddress,
+ PayloadEmitter EmitPayload) {
- bool IsLocLists = getDwarfVersion() >= 5;
- MCSymbol *TableEnd = nullptr;
- if (IsLocLists) {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLoclistsSection());
- TableEnd = emitLoclistsTableHeader(Asm, useSplitDwarf() ? SkeletonHolder
- : InfoHolder);
- } else {
- Asm->OutStreamer->SwitchSection(
- Asm->getObjFileLowering().getDwarfLocSection());
- }
+ auto Size = Asm->MAI->getCodePointerSize();
+ bool UseDwarf5 = DD.getDwarfVersion() >= 5;
- unsigned char Size = Asm->MAI->getCodePointerSize();
- for (const auto &List : DebugLocs.getLists()) {
- Asm->OutStreamer->EmitLabel(List.Label);
+ // Emit our symbol so we can find the beginning of the range.
+ Asm->OutStreamer->EmitLabel(Sym);
- const DwarfCompileUnit *CU = List.CU;
- const MCSymbol *Base = CU->getBaseAddress();
- for (const auto &Entry : DebugLocs.getEntries(List)) {
+ // Gather all the ranges that apply to the same section so they can share
+ // a base address entry.
+ MapVector<const MCSection *, std::vector<decltype(&*R.begin())>> SectionRanges;
+
+ for (const auto &Range : R)
+ SectionRanges[&Range.Begin->getSection()].push_back(&Range);
+
+ const MCSymbol *CUBase = CU.getBaseAddress();
+ bool BaseIsSet = false;
+ for (const auto &P : SectionRanges) {
+ auto *Base = CUBase;
+ if (!Base && ShouldUseBaseAddress) {
+ const MCSymbol *Begin = P.second.front()->Begin;
+ const MCSymbol *NewBase = DD.getSectionLabel(&Begin->getSection());
+ if (!UseDwarf5) {
+ Base = NewBase;
+ BaseIsSet = true;
+ Asm->OutStreamer->EmitIntValue(-1, Size);
+ Asm->OutStreamer->AddComment(" base address");
+ Asm->OutStreamer->EmitSymbolValue(Base, Size);
+ } else if (NewBase != Begin || P.second.size() > 1) {
+ // Only use a base address if
+ // * the existing pool address doesn't match (NewBase != Begin)
+ // * or, there's more than one entry to share the base address
+ Base = NewBase;
+ BaseIsSet = true;
+ Asm->OutStreamer->AddComment(StringifyEnum(BaseAddressx));
+ Asm->emitInt8(BaseAddressx);
+ Asm->OutStreamer->AddComment(" base address index");
+ Asm->EmitULEB128(DD.getAddressPool().getIndex(Base));
+ }
+ } else if (BaseIsSet && !UseDwarf5) {
+ BaseIsSet = false;
+ assert(!Base);
+ Asm->OutStreamer->EmitIntValue(-1, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ }
+
+ for (const auto *RS : P.second) {
+ const MCSymbol *Begin = RS->Begin;
+ const MCSymbol *End = RS->End;
+ assert(Begin && "Range without a begin symbol?");
+ assert(End && "Range without an end symbol?");
if (Base) {
- // Set up the range. This range is relative to the entry point of the
- // compile unit. This is a hard coded 0 for low_pc when we're emitting
- // ranges, or the DW_AT_low_pc on the compile unit otherwise.
- if (IsLocLists) {
- Asm->OutStreamer->AddComment("DW_LLE_offset_pair");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_offset_pair, 1);
+ if (UseDwarf5) {
+ // Emit offset_pair when we have a base.
+ Asm->OutStreamer->AddComment(StringifyEnum(OffsetPair));
+ Asm->emitInt8(OffsetPair);
Asm->OutStreamer->AddComment(" starting offset");
- Asm->EmitLabelDifferenceAsULEB128(Entry.BeginSym, Base);
+ Asm->EmitLabelDifferenceAsULEB128(Begin, Base);
Asm->OutStreamer->AddComment(" ending offset");
- Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Base);
+ Asm->EmitLabelDifferenceAsULEB128(End, Base);
} else {
- Asm->EmitLabelDifference(Entry.BeginSym, Base, Size);
- Asm->EmitLabelDifference(Entry.EndSym, Base, Size);
+ Asm->EmitLabelDifference(Begin, Base, Size);
+ Asm->EmitLabelDifference(End, Base, Size);
}
-
- emitDebugLocEntryLocation(Entry, CU);
- continue;
- }
-
- // We have no base address.
- if (IsLocLists) {
- // TODO: Use DW_LLE_base_addressx + DW_LLE_offset_pair, or
- // DW_LLE_startx_length in case if there is only a single range.
- // That should reduce the size of the debug data emited.
- // For now just use the DW_LLE_startx_length for all cases.
- Asm->OutStreamer->AddComment("DW_LLE_startx_length");
- Asm->emitInt8(dwarf::DW_LLE_startx_length);
- Asm->OutStreamer->AddComment(" start idx");
- Asm->EmitULEB128(AddrPool.getIndex(Entry.BeginSym));
+ } else if (UseDwarf5) {
+ Asm->OutStreamer->AddComment(StringifyEnum(StartxLength));
+ Asm->emitInt8(StartxLength);
+ Asm->OutStreamer->AddComment(" start index");
+ Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin));
Asm->OutStreamer->AddComment(" length");
- Asm->EmitLabelDifferenceAsULEB128(Entry.EndSym, Entry.BeginSym);
+ Asm->EmitLabelDifferenceAsULEB128(End, Begin);
} else {
- Asm->OutStreamer->EmitSymbolValue(Entry.BeginSym, Size);
- Asm->OutStreamer->EmitSymbolValue(Entry.EndSym, Size);
+ Asm->OutStreamer->EmitSymbolValue(Begin, Size);
+ Asm->OutStreamer->EmitSymbolValue(End, Size);
}
-
- emitDebugLocEntryLocation(Entry, CU);
+ EmitPayload(*RS);
}
+ }
- if (IsLocLists) {
- // .debug_loclists section ends with DW_LLE_end_of_list.
- Asm->OutStreamer->AddComment("DW_LLE_end_of_list");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_LLE_end_of_list, 1);
- } else {
- // Terminate the .debug_loc list with two 0 values.
- Asm->OutStreamer->EmitIntValue(0, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
- }
+ if (UseDwarf5) {
+ Asm->OutStreamer->AddComment(StringifyEnum(EndOfList));
+ Asm->emitInt8(EndOfList);
+ } else {
+ // Terminate the list with two 0 values.
+ Asm->OutStreamer->EmitIntValue(0, Size);
+ Asm->OutStreamer->EmitIntValue(0, Size);
}
+}
+
+static void emitLocList(DwarfDebug &DD, AsmPrinter *Asm, const DebugLocStream::List &List) {
+ emitRangeList(
+ DD, Asm, List.Label, DD.getDebugLocs().getEntries(List), *List.CU,
+ dwarf::DW_LLE_base_addressx, dwarf::DW_LLE_offset_pair,
+ dwarf::DW_LLE_startx_length, dwarf::DW_LLE_end_of_list,
+ llvm::dwarf::LocListEncodingString,
+ /* ShouldUseBaseAddress */ true,
+ [&](const DebugLocStream::Entry &E) {
+ DD.emitDebugLocEntryLocation(E, List.CU);
+ });
+}
+
+// Emit locations into the .debug_loc/.debug_rnglists section.
+void DwarfDebug::emitDebugLoc() {
+ if (DebugLocs.getLists().empty())
+ return;
+
+ MCSymbol *TableEnd = nullptr;
+ if (getDwarfVersion() >= 5) {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLoclistsSection());
+ TableEnd = emitLoclistsTableHeader(Asm, *this);
+ } else {
+ Asm->OutStreamer->SwitchSection(
+ Asm->getObjFileLowering().getDwarfLocSection());
+ }
+
+ for (const auto &List : DebugLocs.getLists())
+ emitLocList(*this, Asm, List);
if (TableEnd)
Asm->OutStreamer->EmitLabel(TableEnd);
@@ -2232,9 +2466,9 @@ void DwarfDebug::emitDebugLocDWO() {
// Ideally/in v5, this could use SectionLabels to reuse existing addresses
// in the address pool to minimize object size/relocations.
Asm->emitInt8(dwarf::DW_LLE_startx_length);
- unsigned idx = AddrPool.getIndex(Entry.BeginSym);
+ unsigned idx = AddrPool.getIndex(Entry.Begin);
Asm->EmitULEB128(idx);
- Asm->EmitLabelDifference(Entry.EndSym, Entry.BeginSym, 4);
+ Asm->EmitLabelDifference(Entry.End, Entry.Begin, 4);
emitDebugLocEntryLocation(Entry, List.CU);
}
@@ -2360,7 +2594,7 @@ void DwarfDebug::emitDebugARanges() {
// 7.20 in the Dwarf specs requires the table to be aligned to a tuple.
unsigned Padding =
- OffsetToAlignment(sizeof(int32_t) + ContentSize, TupleSize);
+ offsetToAlignment(sizeof(int32_t) + ContentSize, Align(TupleSize));
ContentSize += Padding;
ContentSize += (List.size() + 1) * TupleSize;
@@ -2405,93 +2639,13 @@ void DwarfDebug::emitDebugARanges() {
/// Emit a single range list. We handle both DWARF v5 and earlier.
static void emitRangeList(DwarfDebug &DD, AsmPrinter *Asm,
const RangeSpanList &List) {
-
- auto DwarfVersion = DD.getDwarfVersion();
- // Emit our symbol so we can find the beginning of the range.
- Asm->OutStreamer->EmitLabel(List.getSym());
- // Gather all the ranges that apply to the same section so they can share
- // a base address entry.
- MapVector<const MCSection *, std::vector<const RangeSpan *>> SectionRanges;
- // Size for our labels.
- auto Size = Asm->MAI->getCodePointerSize();
-
- for (const RangeSpan &Range : List.getRanges())
- SectionRanges[&Range.getStart()->getSection()].push_back(&Range);
-
- const DwarfCompileUnit &CU = List.getCU();
- const MCSymbol *CUBase = CU.getBaseAddress();
- bool BaseIsSet = false;
- for (const auto &P : SectionRanges) {
- // Don't bother with a base address entry if there's only one range in
- // this section in this range list - for example ranges for a CU will
- // usually consist of single regions from each of many sections
- // (-ffunction-sections, or just C++ inline functions) except under LTO
- // or optnone where there may be holes in a single CU's section
- // contributions.
- auto *Base = CUBase;
- if (!Base && (P.second.size() > 1 || DwarfVersion < 5) &&
- (CU.getCUNode()->getRangesBaseAddress() || DwarfVersion >= 5)) {
- BaseIsSet = true;
- // FIXME/use care: This may not be a useful base address if it's not
- // the lowest address/range in this object.
- Base = P.second.front()->getStart();
- if (DwarfVersion >= 5) {
- Base = DD.getSectionLabel(&Base->getSection());
- Asm->OutStreamer->AddComment("DW_RLE_base_addressx");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_base_addressx, 1);
- Asm->OutStreamer->AddComment(" base address index");
- Asm->EmitULEB128(DD.getAddressPool().getIndex(Base));
- } else {
- Asm->OutStreamer->EmitIntValue(-1, Size);
- Asm->OutStreamer->AddComment(" base address");
- Asm->OutStreamer->EmitSymbolValue(Base, Size);
- }
- } else if (BaseIsSet && DwarfVersion < 5) {
- BaseIsSet = false;
- assert(!Base);
- Asm->OutStreamer->EmitIntValue(-1, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
- }
-
- for (const auto *RS : P.second) {
- const MCSymbol *Begin = RS->getStart();
- const MCSymbol *End = RS->getEnd();
- assert(Begin && "Range without a begin symbol?");
- assert(End && "Range without an end symbol?");
- if (Base) {
- if (DwarfVersion >= 5) {
- // Emit DW_RLE_offset_pair when we have a base.
- Asm->OutStreamer->AddComment("DW_RLE_offset_pair");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_offset_pair, 1);
- Asm->OutStreamer->AddComment(" starting offset");
- Asm->EmitLabelDifferenceAsULEB128(Begin, Base);
- Asm->OutStreamer->AddComment(" ending offset");
- Asm->EmitLabelDifferenceAsULEB128(End, Base);
- } else {
- Asm->EmitLabelDifference(Begin, Base, Size);
- Asm->EmitLabelDifference(End, Base, Size);
- }
- } else if (DwarfVersion >= 5) {
- Asm->OutStreamer->AddComment("DW_RLE_startx_length");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_startx_length, 1);
- Asm->OutStreamer->AddComment(" start index");
- Asm->EmitULEB128(DD.getAddressPool().getIndex(Begin));
- Asm->OutStreamer->AddComment(" length");
- Asm->EmitLabelDifferenceAsULEB128(End, Begin);
- } else {
- Asm->OutStreamer->EmitSymbolValue(Begin, Size);
- Asm->OutStreamer->EmitSymbolValue(End, Size);
- }
- }
- }
- if (DwarfVersion >= 5) {
- Asm->OutStreamer->AddComment("DW_RLE_end_of_list");
- Asm->OutStreamer->EmitIntValue(dwarf::DW_RLE_end_of_list, 1);
- } else {
- // Terminate the list with two 0 values.
- Asm->OutStreamer->EmitIntValue(0, Size);
- Asm->OutStreamer->EmitIntValue(0, Size);
- }
+ emitRangeList(DD, Asm, List.getSym(), List.getRanges(), List.getCU(),
+ dwarf::DW_RLE_base_addressx, dwarf::DW_RLE_offset_pair,
+ dwarf::DW_RLE_startx_length, dwarf::DW_RLE_end_of_list,
+ llvm::dwarf::RangeListEncodingString,
+ List.getCU().getCUNode()->getRangesBaseAddress() ||
+ DD.getDwarfVersion() >= 5,
+ [](auto) {});
}
static void emitDebugRangesImpl(DwarfDebug &DD, AsmPrinter *Asm,
@@ -2637,7 +2791,7 @@ void DwarfDebug::initSkeletonUnit(const DwarfUnit &U, DIE &Die,
DwarfCompileUnit &DwarfDebug::constructSkeletonCU(const DwarfCompileUnit &CU) {
- auto OwnedUnit = llvm::make_unique<DwarfCompileUnit>(
+ auto OwnedUnit = std::make_unique<DwarfCompileUnit>(
CU.getUniqueID(), CU.getCUNode(), Asm, this, &SkeletonHolder);
DwarfCompileUnit &NewCU = *OwnedUnit;
NewCU.setSection(Asm->getObjFileLowering().getDwarfInfoSection());
@@ -2737,7 +2891,7 @@ void DwarfDebug::addDwarfTypeUnitType(DwarfCompileUnit &CU,
bool TopLevelType = TypeUnitsUnderConstruction.empty();
AddrPool.resetUsedFlag();
- auto OwnedUnit = llvm::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
+ auto OwnedUnit = std::make_unique<DwarfTypeUnit>(CU, Asm, this, &InfoHolder,
getDwoLineTable(CU));
DwarfTypeUnit &NewTU = *OwnedUnit;
DIE &UnitDie = NewTU.getUnitDie();
@@ -2879,10 +3033,6 @@ uint16_t DwarfDebug::getDwarfVersion() const {
return Asm->OutStreamer->getContext().getDwarfVersion();
}
-void DwarfDebug::addSectionLabel(const MCSymbol *Sym) {
- SectionLabels.insert(std::make_pair(&Sym->getSection(), Sym));
-}
-
const MCSymbol *DwarfDebug::getSectionLabel(const MCSection *S) {
return SectionLabels.find(S)->second;
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 3ac474e2bdda..c8c511f67c2a 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -153,7 +153,7 @@ public:
assert(!ValueLoc && "Already initialized?");
assert(!Value.getExpression()->isFragment() && "Fragments not supported.");
- ValueLoc = llvm::make_unique<DbgValueLoc>(Value);
+ ValueLoc = std::make_unique<DbgValueLoc>(Value);
if (auto *E = ValueLoc->getExpression())
if (E->getNumElements())
FrameIndexExprs.push_back({0, E});
@@ -216,7 +216,6 @@ public:
return !FrameIndexExprs.empty();
}
- bool isBlockByrefVariable() const;
const DIType *getType() const;
static bool classof(const DbgEntity *N) {
@@ -254,6 +253,25 @@ public:
}
};
+/// Used for tracking debug info about call site parameters.
+class DbgCallSiteParam {
+private:
+ unsigned Register; ///< Parameter register at the callee entry point.
+ DbgValueLoc Value; ///< Corresponding location for the parameter value at
+ ///< the call site.
+public:
+ DbgCallSiteParam(unsigned Reg, DbgValueLoc Val)
+ : Register(Reg), Value(Val) {
+ assert(Reg && "Parameter register cannot be undef");
+ }
+
+ unsigned getRegister() const { return Register; }
+ DbgValueLoc getValue() const { return Value; }
+};
+
+/// Collection used for storing debug call site parameters.
+using ParamSet = SmallVector<DbgCallSiteParam, 4>;
+
/// Helper used to pair up a symbol and its DWARF compile unit.
struct SymbolCU {
SymbolCU(DwarfCompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index 2858afaa1cf1..1c5a244d7c5d 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -15,6 +15,7 @@
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/SmallBitVector.h"
#include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/CodeGen/Register.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/Support/ErrorHandling.h"
@@ -97,7 +98,7 @@ void DwarfExpression::addAnd(unsigned Mask) {
bool DwarfExpression::addMachineReg(const TargetRegisterInfo &TRI,
unsigned MachineReg, unsigned MaxSize) {
- if (!TRI.isPhysicalRegister(MachineReg)) {
+ if (!llvm::Register::isPhysicalRegister(MachineReg)) {
if (isFrameRegister(TRI, MachineReg)) {
DwarfRegs.push_back({-1, 0, nullptr});
return true;
@@ -241,15 +242,22 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return false;
}
- // Handle simple register locations.
- if (!isMemoryLocation() && !HasComplexExpression) {
+ // Handle simple register locations. If we are supposed to emit
+ // a call site parameter expression and if that expression is just a register
+ // location, emit it with addBReg and offset 0, because we should emit a DWARF
+ // expression representing a value, rather than a location.
+ if (!isMemoryLocation() && !HasComplexExpression &&
+ (!isParameterValue() || isEntryValue())) {
for (auto &Reg : DwarfRegs) {
if (Reg.DwarfRegNo >= 0)
addReg(Reg.DwarfRegNo, Reg.Comment);
addOpPiece(Reg.Size);
}
- if (isEntryValue() && DwarfVersion >= 4)
+ if (isEntryValue())
+ finalizeEntryValue();
+
+ if (isEntryValue() && !isParameterValue() && DwarfVersion >= 4)
emitOp(dwarf::DW_OP_stack_value);
DwarfRegs.clear();
@@ -275,19 +283,27 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
// Pattern-match combinations for which more efficient representations exist.
// [Reg, DW_OP_plus_uconst, Offset] --> [DW_OP_breg, Offset].
if (Op && (Op->getOp() == dwarf::DW_OP_plus_uconst)) {
- SignedOffset = Op->getArg(0);
- ExprCursor.take();
+ uint64_t Offset = Op->getArg(0);
+ uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max());
+ if (Offset <= IntMax) {
+ SignedOffset = Offset;
+ ExprCursor.take();
+ }
}
// [Reg, DW_OP_constu, Offset, DW_OP_plus] --> [DW_OP_breg, Offset]
// [Reg, DW_OP_constu, Offset, DW_OP_minus] --> [DW_OP_breg,-Offset]
// If Reg is a subregister we need to mask it out before subtracting.
if (Op && Op->getOp() == dwarf::DW_OP_constu) {
+ uint64_t Offset = Op->getArg(0);
+ uint64_t IntMax = static_cast<uint64_t>(std::numeric_limits<int>::max());
auto N = ExprCursor.peekNext();
- if (N && (N->getOp() == dwarf::DW_OP_plus ||
- (N->getOp() == dwarf::DW_OP_minus && !SubRegisterSizeInBits))) {
- int Offset = Op->getArg(0);
- SignedOffset = (N->getOp() == dwarf::DW_OP_minus) ? -Offset : Offset;
+ if (N && N->getOp() == dwarf::DW_OP_plus && Offset <= IntMax) {
+ SignedOffset = Offset;
+ ExprCursor.consume(2);
+ } else if (N && N->getOp() == dwarf::DW_OP_minus &&
+ !SubRegisterSizeInBits && Offset <= IntMax + 1) {
+ SignedOffset = -static_cast<int64_t>(Offset);
ExprCursor.consume(2);
}
}
@@ -300,17 +316,34 @@ bool DwarfExpression::addMachineRegExpression(const TargetRegisterInfo &TRI,
return true;
}
-void DwarfExpression::addEntryValueExpression(DIExpressionCursor &ExprCursor) {
+void DwarfExpression::beginEntryValueExpression(
+ DIExpressionCursor &ExprCursor) {
auto Op = ExprCursor.take();
- assert(Op && Op->getOp() == dwarf::DW_OP_entry_value);
+ (void)Op;
+ assert(Op && Op->getOp() == dwarf::DW_OP_LLVM_entry_value);
assert(!isMemoryLocation() &&
"We don't support entry values of memory locations yet");
+ assert(!IsEmittingEntryValue && "Already emitting entry value?");
+ assert(Op->getArg(0) == 1 &&
+ "Can currently only emit entry values covering a single operation");
- if (DwarfVersion >= 5)
- emitOp(dwarf::DW_OP_entry_value);
- else
- emitOp(dwarf::DW_OP_GNU_entry_value);
- emitUnsigned(Op->getArg(0));
+ emitOp(CU.getDwarf5OrGNULocationAtom(dwarf::DW_OP_entry_value));
+ IsEmittingEntryValue = true;
+ enableTemporaryBuffer();
+}
+
+void DwarfExpression::finalizeEntryValue() {
+ assert(IsEmittingEntryValue && "Entry value not open?");
+ disableTemporaryBuffer();
+
+ // Emit the entry value's size operand.
+ unsigned Size = getTemporaryBufferSize();
+ emitUnsigned(Size);
+
+ // Emit the entry value's DWARF block operand.
+ commitTemporaryBuffer();
+
+ IsEmittingEntryValue = false;
}
/// Assuming a well-formed expression, match "DW_OP_deref* DW_OP_LLVM_fragment?".
@@ -340,7 +373,17 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
while (ExprCursor) {
auto Op = ExprCursor.take();
- switch (Op->getOp()) {
+ uint64_t OpNum = Op->getOp();
+
+ if (OpNum >= dwarf::DW_OP_reg0 && OpNum <= dwarf::DW_OP_reg31) {
+ emitOp(OpNum);
+ continue;
+ } else if (OpNum >= dwarf::DW_OP_breg0 && OpNum <= dwarf::DW_OP_breg31) {
+ addBReg(OpNum - dwarf::DW_OP_breg0, Op->getArg(0));
+ continue;
+ }
+
+ switch (OpNum) {
case dwarf::DW_OP_LLVM_fragment: {
unsigned SizeInBits = Op->getArg(1);
unsigned FragmentOffset = Op->getArg(0);
@@ -389,10 +432,13 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
case dwarf::DW_OP_lit0:
case dwarf::DW_OP_not:
case dwarf::DW_OP_dup:
- emitOp(Op->getOp());
+ emitOp(OpNum);
break;
case dwarf::DW_OP_deref:
assert(!isRegisterLocation());
+ // For more detailed explanation see llvm.org/PR43343.
+ assert(!isParameterValue() && "Parameter entry values should not be "
+ "dereferenced due to safety reasons.");
if (!isMemoryLocation() && ::isMemoryLocation(ExprCursor))
// Turning this into a memory location description makes the deref
// implicit.
@@ -458,12 +504,21 @@ void DwarfExpression::addExpression(DIExpressionCursor &&ExprCursor,
case dwarf::DW_OP_LLVM_tag_offset:
TagOffset = Op->getArg(0);
break;
+ case dwarf::DW_OP_regx:
+ emitOp(dwarf::DW_OP_regx);
+ emitUnsigned(Op->getArg(0));
+ break;
+ case dwarf::DW_OP_bregx:
+ emitOp(dwarf::DW_OP_bregx);
+ emitUnsigned(Op->getArg(0));
+ emitSigned(Op->getArg(1));
+ break;
default:
llvm_unreachable("unhandled opcode found in expression");
}
}
- if (isImplicitLocation())
+ if (isImplicitLocation() && !isParameterValue())
// Turn this into an implicit location description.
addStackValue();
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfExpression.h b/lib/CodeGen/AsmPrinter/DwarfExpression.h
index ec2ef6e575f7..1ad46669f9b2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -13,6 +13,7 @@
#ifndef LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
#define LLVM_LIB_CODEGEN_ASMPRINTER_DWARFEXPRESSION_H
+#include "ByteStreamer.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
@@ -26,7 +27,6 @@ namespace llvm {
class AsmPrinter;
class APInt;
-class ByteStreamer;
class DwarfCompileUnit;
class DIELoc;
class TargetRegisterInfo;
@@ -95,6 +95,13 @@ public:
/// Base class containing the logic for constructing DWARF expressions
/// independently of whether they are emitted into a DIE or into a .debug_loc
/// entry.
+///
+/// Some DWARF operations, e.g. DW_OP_entry_value, need to calculate the size
+/// of a succeeding DWARF block before the latter is emitted to the output.
+/// To handle such cases, data can conditionally be emitted to a temporary
+/// buffer, which can later on be committed to the main output. The size of the
+/// temporary buffer is queryable, allowing for the size of the data to be
+/// emitted before the data is committed.
class DwarfExpression {
protected:
/// Holds information about all subregisters comprising a register location.
@@ -104,6 +111,9 @@ protected:
const char *Comment;
};
+ /// Whether we are currently emitting an entry value operation.
+ bool IsEmittingEntryValue = false;
+
DwarfCompileUnit &CU;
/// The register location, if any.
@@ -120,7 +130,7 @@ protected:
enum { Unknown = 0, Register, Memory, Implicit };
/// The flags of location description being produced.
- enum { EntryValue = 1 };
+ enum { EntryValue = 1, CallSiteParamValue };
unsigned LocationKind : 3;
unsigned LocationFlags : 2;
@@ -147,6 +157,10 @@ public:
return LocationFlags & EntryValue;
}
+ bool isParameterValue() {
+ return LocationFlags & CallSiteParamValue;
+ }
+
Optional<uint8_t> TagOffset;
protected:
@@ -174,6 +188,22 @@ protected:
virtual void emitBaseTypeRef(uint64_t Idx) = 0;
+ /// Start emitting data to the temporary buffer. The data stored in the
+ /// temporary buffer can be committed to the main output using
+ /// commitTemporaryBuffer().
+ virtual void enableTemporaryBuffer() = 0;
+
+ /// Disable emission to the temporary buffer. This does not commit data
+ /// in the temporary buffer to the main output.
+ virtual void disableTemporaryBuffer() = 0;
+
+ /// Return the emitted size, in number of bytes, for the data stored in the
+ /// temporary buffer.
+ virtual unsigned getTemporaryBufferSize() = 0;
+
+ /// Commit the data stored in the temporary buffer to the main output.
+ virtual void commitTemporaryBuffer() = 0;
+
/// Emit a normalized unsigned constant.
void emitConstu(uint64_t Value);
@@ -233,6 +263,10 @@ protected:
/// expression. See PR21176 for more details.
void addStackValue();
+ /// Finalize an entry value by emitting its size operand, and committing the
+ /// DWARF block which has been emitted to the temporary buffer.
+ void finalizeEntryValue();
+
~DwarfExpression() = default;
public:
@@ -264,6 +298,11 @@ public:
LocationFlags |= EntryValue;
}
+ /// Lock this down to become a call site parameter location.
+ void setCallSiteParamValueFlag() {
+ LocationFlags |= CallSiteParamValue;
+ }
+
/// Emit a machine register location. As an optimization this may also consume
/// the prefix of a DwarfExpression if a more efficient representation for
/// combining the register location and the first operation exists.
@@ -278,8 +317,11 @@ public:
DIExpressionCursor &Expr, unsigned MachineReg,
unsigned FragmentOffsetInBits = 0);
- /// Emit entry value dwarf operation.
- void addEntryValueExpression(DIExpressionCursor &ExprCursor);
+ /// Begin emission of an entry value dwarf operation. The entry value's
+ /// first operand is the size of the DWARF block (its second operand),
+ /// which needs to be calculated at time of emission, so we don't emit
+ /// any operands here.
+ void beginEntryValueExpression(DIExpressionCursor &ExprCursor);
/// Emit all remaining operations in the DIExpressionCursor.
///
@@ -299,31 +341,62 @@ public:
/// DwarfExpression implementation for .debug_loc entries.
class DebugLocDwarfExpression final : public DwarfExpression {
- ByteStreamer &BS;
+
+ struct TempBuffer {
+ SmallString<32> Bytes;
+ std::vector<std::string> Comments;
+ BufferByteStreamer BS;
+
+ TempBuffer(bool GenerateComments) : BS(Bytes, Comments, GenerateComments) {}
+ };
+
+ std::unique_ptr<TempBuffer> TmpBuf;
+ BufferByteStreamer &OutBS;
+ bool IsBuffering = false;
+
+ /// Return the byte streamer that currently is being emitted to.
+ ByteStreamer &getActiveStreamer() { return IsBuffering ? TmpBuf->BS : OutBS; }
void emitOp(uint8_t Op, const char *Comment = nullptr) override;
void emitSigned(int64_t Value) override;
void emitUnsigned(uint64_t Value) override;
void emitData1(uint8_t Value) override;
void emitBaseTypeRef(uint64_t Idx) override;
+
+ void enableTemporaryBuffer() override;
+ void disableTemporaryBuffer() override;
+ unsigned getTemporaryBufferSize() override;
+ void commitTemporaryBuffer() override;
+
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
-
public:
- DebugLocDwarfExpression(unsigned DwarfVersion, ByteStreamer &BS, DwarfCompileUnit &CU)
- : DwarfExpression(DwarfVersion, CU), BS(BS) {}
+ DebugLocDwarfExpression(unsigned DwarfVersion, BufferByteStreamer &BS,
+ DwarfCompileUnit &CU)
+ : DwarfExpression(DwarfVersion, CU), OutBS(BS) {}
};
/// DwarfExpression implementation for singular DW_AT_location.
class DIEDwarfExpression final : public DwarfExpression {
-const AsmPrinter &AP;
- DIELoc &DIE;
+ const AsmPrinter &AP;
+ DIELoc &OutDIE;
+ DIELoc TmpDIE;
+ bool IsBuffering = false;
+
+ /// Return the DIE that currently is being emitted to.
+ DIELoc &getActiveDIE() { return IsBuffering ? TmpDIE : OutDIE; }
void emitOp(uint8_t Op, const char *Comment = nullptr) override;
void emitSigned(int64_t Value) override;
void emitUnsigned(uint64_t Value) override;
void emitData1(uint8_t Value) override;
void emitBaseTypeRef(uint64_t Idx) override;
+
+ void enableTemporaryBuffer() override;
+ void disableTemporaryBuffer() override;
+ unsigned getTemporaryBufferSize() override;
+ void commitTemporaryBuffer() override;
+
bool isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) override;
public:
@@ -331,7 +404,7 @@ public:
DIELoc *finalize() {
DwarfExpression::finalize();
- return &DIE;
+ return &OutDIE;
}
};
diff --git a/lib/CodeGen/AsmPrinter/DwarfFile.h b/lib/CodeGen/AsmPrinter/DwarfFile.h
index 244678ce9dc1..35fa51fb24c4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfFile.h
+++ b/lib/CodeGen/AsmPrinter/DwarfFile.h
@@ -32,15 +32,9 @@ class LexicalScope;
class MCSection;
// Data structure to hold a range for range lists.
-class RangeSpan {
-public:
- RangeSpan(MCSymbol *S, MCSymbol *E) : Start(S), End(E) {}
- const MCSymbol *getStart() const { return Start; }
- const MCSymbol *getEnd() const { return End; }
- void setEnd(const MCSymbol *E) { End = E; }
-
-private:
- const MCSymbol *Start, *End;
+struct RangeSpan {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
};
class RangeSpanList {
@@ -86,10 +80,6 @@ class DwarfFile {
/// The table is shared by all units.
MCSymbol *RnglistsTableBaseSym = nullptr;
- /// DWARF v5: The symbol that designates the base of the locations list table.
- /// The table is shared by all units.
- MCSymbol *LoclistsTableBaseSym = nullptr;
-
/// The variables of a lexical scope.
struct ScopeVars {
/// We need to sort Args by ArgNo and check for duplicates. This could also
@@ -167,9 +157,6 @@ public:
MCSymbol *getRnglistsTableBaseSym() const { return RnglistsTableBaseSym; }
void setRnglistsTableBaseSym(MCSymbol *Sym) { RnglistsTableBaseSym = Sym; }
- MCSymbol *getLoclistsTableBaseSym() const { return LoclistsTableBaseSym; }
- void setLoclistsTableBaseSym(MCSymbol *Sym) { LoclistsTableBaseSym = Sym; }
-
/// \returns false if the variable was merged with a previous one.
bool addScopeVariable(LexicalScope *LS, DbgVariable *Var);
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
index 991ab94b50ab..37c68c085792 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp
@@ -47,31 +47,42 @@ using namespace llvm;
#define DEBUG_TYPE "dwarfdebug"
DIEDwarfExpression::DIEDwarfExpression(const AsmPrinter &AP,
- DwarfCompileUnit &CU,
- DIELoc &DIE)
- : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP),
- DIE(DIE) {}
+ DwarfCompileUnit &CU, DIELoc &DIE)
+ : DwarfExpression(AP.getDwarfVersion(), CU), AP(AP), OutDIE(DIE) {}
void DIEDwarfExpression::emitOp(uint8_t Op, const char* Comment) {
- CU.addUInt(DIE, dwarf::DW_FORM_data1, Op);
+ CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Op);
}
void DIEDwarfExpression::emitSigned(int64_t Value) {
- CU.addSInt(DIE, dwarf::DW_FORM_sdata, Value);
+ CU.addSInt(getActiveDIE(), dwarf::DW_FORM_sdata, Value);
}
void DIEDwarfExpression::emitUnsigned(uint64_t Value) {
- CU.addUInt(DIE, dwarf::DW_FORM_udata, Value);
+ CU.addUInt(getActiveDIE(), dwarf::DW_FORM_udata, Value);
}
void DIEDwarfExpression::emitData1(uint8_t Value) {
- CU.addUInt(DIE, dwarf::DW_FORM_data1, Value);
+ CU.addUInt(getActiveDIE(), dwarf::DW_FORM_data1, Value);
}
void DIEDwarfExpression::emitBaseTypeRef(uint64_t Idx) {
- CU.addBaseTypeRef(DIE, Idx);
+ CU.addBaseTypeRef(getActiveDIE(), Idx);
}
+void DIEDwarfExpression::enableTemporaryBuffer() {
+ assert(!IsBuffering && "Already buffering?");
+ IsBuffering = true;
+}
+
+void DIEDwarfExpression::disableTemporaryBuffer() { IsBuffering = false; }
+
+unsigned DIEDwarfExpression::getTemporaryBufferSize() {
+ return TmpDIE.ComputeSize(&AP);
+}
+
+void DIEDwarfExpression::commitTemporaryBuffer() { OutDIE.takeValues(TmpDIE); }
+
bool DIEDwarfExpression::isFrameRegister(const TargetRegisterInfo &TRI,
unsigned MachineReg) {
return MachineReg == TRI.getFrameRegister(*AP.MF);
@@ -205,6 +216,10 @@ void DwarfUnit::insertDIE(const DINode *Desc, DIE *D) {
MDNodeToDieMap.insert(std::make_pair(Desc, D));
}
+void DwarfUnit::insertDIE(DIE *D) {
+ MDNodeToDieMap.insert(std::make_pair(nullptr, D));
+}
+
void DwarfUnit::addFlag(DIE &Die, dwarf::Attribute Attribute) {
if (DD->getDwarfVersion() >= 4)
Die.addValue(DIEValueAllocator, Attribute, dwarf::DW_FORM_flag_present,
@@ -718,7 +733,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const {
return "";
// FIXME: Decide whether to implement this for non-C++ languages.
- if (getLanguage() != dwarf::DW_LANG_C_plus_plus)
+ if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage()))
return "";
std::string CS;
@@ -942,6 +957,9 @@ void DwarfUnit::constructTypeDIE(DIE &Buffer, const DICompositeType *CTy) {
if (CTy->isAppleBlockExtension())
addFlag(Buffer, dwarf::DW_AT_APPLE_block);
+ if (CTy->getExportSymbols())
+ addFlag(Buffer, dwarf::DW_AT_export_symbols);
+
// This is outside the DWARF spec, but GDB expects a DW_AT_containing_type
// inside C++ composite types to point to the base class with the vtable.
// Rust uses DW_AT_containing_type to link a vtable to the type
@@ -1696,15 +1714,6 @@ void DwarfUnit::addRnglistsBase() {
TLOF.getDwarfRnglistsSection()->getBeginSymbol());
}
-void DwarfUnit::addLoclistsBase() {
- assert(DD->getDwarfVersion() >= 5 &&
- "DW_AT_loclists_base requires DWARF version 5 or later");
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- addSectionLabel(getUnitDie(), dwarf::DW_AT_loclists_base,
- DU->getLoclistsTableBaseSym(),
- TLOF.getDwarfLoclistsSection()->getBeginSymbol());
-}
-
void DwarfTypeUnit::finishNonUnitTypeDIE(DIE& D, const DICompositeType *CTy) {
addFlag(D, dwarf::DW_AT_declaration);
StringRef Name = CTy->getName();
diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.h b/lib/CodeGen/AsmPrinter/DwarfUnit.h
index 56c934a35ae8..46c52a1faf4b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfUnit.h
+++ b/lib/CodeGen/AsmPrinter/DwarfUnit.h
@@ -127,6 +127,8 @@ public:
/// the mappings are kept in DwarfDebug.
void insertDIE(const DINode *Desc, DIE *D);
+ void insertDIE(DIE *D);
+
/// Add a flag that is true to the DIE.
void addFlag(DIE &Die, dwarf::Attribute Attribute);
@@ -214,15 +216,6 @@ public:
/// Add thrown types.
void addThrownTypes(DIE &Die, DINodeArray ThrownTypes);
- // FIXME: Should be reformulated in terms of addComplexAddress.
- /// Start with the address based on the location provided, and generate the
- /// DWARF information necessary to find the actual Block variable (navigating
- /// the Block struct) based on the starting location. Add the DWARF
- /// information to the die. Obsolete, please use addComplexAddress instead.
- void addBlockByrefAddress(const DbgVariable &DV, DIE &Die,
- dwarf::Attribute Attribute,
- const MachineLocation &Location);
-
/// Add a new type attribute to the specified entity.
///
/// This takes and attribute parameter because DW_AT_friend attributes are
@@ -279,9 +272,6 @@ public:
/// Add the DW_AT_rnglists_base attribute to the unit DIE.
void addRnglistsBase();
- /// Add the DW_AT_loclists_base attribute to the unit DIE.
- void addLoclistsBase();
-
virtual DwarfCompileUnit &getCU() = 0;
void constructTypeDIE(DIE &Buffer, const DICompositeType *CTy);
diff --git a/lib/CodeGen/AsmPrinter/EHStreamer.cpp b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
index 99e3687b36b8..31dfaaac836e 100644
--- a/lib/CodeGen/AsmPrinter/EHStreamer.cpp
+++ b/lib/CodeGen/AsmPrinter/EHStreamer.cpp
@@ -426,7 +426,7 @@ MCSymbol *EHStreamer::emitExceptionTable() {
// EHABI). In this case LSDASection will be NULL.
if (LSDASection)
Asm->OutStreamer->SwitchSection(LSDASection);
- Asm->EmitAlignment(2);
+ Asm->EmitAlignment(Align(4));
// Emit the LSDA.
MCSymbol *GCCETSym =
@@ -602,11 +602,11 @@ MCSymbol *EHStreamer::emitExceptionTable() {
}
if (HaveTTData) {
- Asm->EmitAlignment(2);
+ Asm->EmitAlignment(Align(4));
emitTypeInfos(TTypeEncoding, TTBaseLabel);
}
- Asm->EmitAlignment(2);
+ Asm->EmitAlignment(Align(4));
return GCCETSym;
}
diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
index 39392b79e960..3849644d1584 100644
--- a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp
@@ -72,7 +72,7 @@ void ErlangGCPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
**/
// Align to address width.
- AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
// Emit PointCount.
OS.AddComment("safe point count");
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index 3145cc90dc73..b4eda5fa8c58 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -129,7 +129,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
report_fatal_error(" Too much descriptor for ocaml GC");
}
AP.emitInt16(NumDescriptors);
- AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
for (GCModuleInfo::FuncInfoVec::iterator I = Info.funcinfo_begin(),
IE = Info.funcinfo_end();
@@ -180,7 +180,7 @@ void OcamlGCMetadataPrinter::finishAssembly(Module &M, GCModuleInfo &Info,
AP.emitInt16(K->StackOffset);
}
- AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3);
+ AP.EmitAlignment(IntPtrSize == 4 ? Align(4) : Align(8));
}
}
}
diff --git a/lib/CodeGen/AsmPrinter/WinException.cpp b/lib/CodeGen/AsmPrinter/WinException.cpp
index 155e91ce61a1..0398675577cd 100644
--- a/lib/CodeGen/AsmPrinter/WinException.cpp
+++ b/lib/CodeGen/AsmPrinter/WinException.cpp
@@ -982,8 +982,7 @@ void WinException::emitExceptHandlerTable(const MachineFunction *MF) {
OS.EmitValueToAlignment(4);
OS.EmitLabel(LSDALabel);
- const Function *Per =
- dyn_cast<Function>(F.getPersonalityFn()->stripPointerCasts());
+ const auto *Per = cast<Function>(F.getPersonalityFn()->stripPointerCasts());
StringRef PerName = Per->getName();
int BaseState = -1;
if (PerName == "_except_handler4") {
diff --git a/lib/CodeGen/AtomicExpandPass.cpp b/lib/CodeGen/AtomicExpandPass.cpp
index dc7eaf6a5fe7..27b298dcf6af 100644
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@@ -382,7 +382,7 @@ LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
- NewLI->setAlignment(LI->getAlignment());
+ NewLI->setAlignment(MaybeAlign(LI->getAlignment()));
NewLI->setVolatile(LI->isVolatile());
NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
@@ -469,7 +469,7 @@ StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
Value *NewAddr = Builder.CreateBitCast(Addr, PT);
StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
- NewSI->setAlignment(SI->getAlignment());
+ NewSI->setAlignment(MaybeAlign(SI->getAlignment()));
NewSI->setVolatile(SI->isVolatile());
NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
@@ -1376,7 +1376,7 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
Builder.SetInsertPoint(BB);
LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
// Atomics require at least natural alignment.
- InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
+ InitLoaded->setAlignment(MaybeAlign(ResultTy->getPrimitiveSizeInBits() / 8));
Builder.CreateBr(LoopBB);
// Start the main loop block now that we've taken care of the preliminaries.
@@ -1711,7 +1711,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// 'expected' argument, if present.
if (CASExpected) {
AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
- AllocaCASExpected->setAlignment(AllocaAlignment);
+ AllocaCASExpected->setAlignment(MaybeAlign(AllocaAlignment));
unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
AllocaCASExpected_i8 =
@@ -1730,7 +1730,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
Args.push_back(IntValue);
} else {
AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
- AllocaValue->setAlignment(AllocaAlignment);
+ AllocaValue->setAlignment(MaybeAlign(AllocaAlignment));
AllocaValue_i8 =
Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
@@ -1742,7 +1742,7 @@ bool AtomicExpand::expandAtomicOpToLibcall(
// 'ret' argument.
if (!CASExpected && HasResult && !UseSizedLibcall) {
AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
- AllocaResult->setAlignment(AllocaAlignment);
+ AllocaResult->setAlignment(MaybeAlign(AllocaAlignment));
unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
AllocaResult_i8 =
Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index fb54b5d6c8d8..455916eeb82f 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -129,9 +129,10 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
getAnalysis<MachineBlockFrequencyInfo>());
BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo,
getAnalysis<MachineBranchProbabilityInfo>());
- return Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
- MF.getSubtarget().getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>());
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ return Folder.OptimizeFunction(
+ MF, MF.getSubtarget().getInstrInfo(), MF.getSubtarget().getRegisterInfo(),
+ MMIWP ? &MMIWP->getMMI() : nullptr);
}
BranchFolder::BranchFolder(bool defaultEnableTailMerge, bool CommonHoist,
@@ -161,6 +162,11 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
// Avoid matching if this pointer gets reused.
TriedMerging.erase(MBB);
+ // Update call site info.
+ std::for_each(MBB->begin(), MBB->end(), [MF](const MachineInstr &MI) {
+ if (MI.isCall(MachineInstr::IgnoreBundle))
+ MF->eraseCallSiteInfo(&MI);
+ });
// Remove the block.
MF->erase(MBB);
EHScopeMembership.erase(MBB);
@@ -1306,6 +1312,8 @@ static bool IsBranchOnlyBlock(MachineBasicBlock *MBB) {
/// result in infinite loops.
static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2) {
+ assert(MBB1 && MBB2 && "Unknown MachineBasicBlock");
+
// Right now, we use a simple heuristic. If MBB2 ends with a call, and
// MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
// optimize branches that branch to either a return block or an assert block
@@ -1843,7 +1851,7 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB,
template <class Container>
static void addRegAndItsAliases(unsigned Reg, const TargetRegisterInfo *TRI,
Container &Set) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
Set.insert(*AI);
} else {
@@ -1871,7 +1879,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
for (const MachineOperand &MO : Loc->operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isUse()) {
@@ -1909,7 +1917,7 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
return Loc;
if (!MO.isReg() || MO.isUse())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (Uses.count(Reg)) {
@@ -1937,14 +1945,14 @@ MachineBasicBlock::iterator findHoistingInsertPosAndDeps(MachineBasicBlock *MBB,
for (const MachineOperand &MO : PI->operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isUse()) {
addRegAndItsAliases(Reg, TRI, Uses);
} else {
if (Uses.erase(Reg)) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
Uses.erase(*SubRegs); // Use sub-registers to be conservative
}
@@ -2010,7 +2018,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
}
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef()) {
@@ -2060,13 +2068,13 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
for (const MachineOperand &MO : TIB->operands()) {
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (!AllDefsSet.count(Reg)) {
continue;
}
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
ActiveDefsSet.erase(*AI);
} else {
@@ -2078,8 +2086,8 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
for (const MachineOperand &MO : TIB->operands()) {
if (!MO.isReg() || !MO.isDef() || MO.isDead())
continue;
- unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Reg || Register::isVirtualRegister(Reg))
continue;
addRegAndItsAliases(Reg, TRI, ActiveDefsSet);
addRegAndItsAliases(Reg, TRI, AllDefsSet);
diff --git a/lib/CodeGen/BranchRelaxation.cpp b/lib/CodeGen/BranchRelaxation.cpp
index 3ad6266d4f35..6efdc9efa968 100644
--- a/lib/CodeGen/BranchRelaxation.cpp
+++ b/lib/CodeGen/BranchRelaxation.cpp
@@ -64,19 +64,18 @@ class BranchRelaxation : public MachineFunctionPass {
/// Compute the offset immediately following this block. \p MBB is the next
/// block.
unsigned postOffset(const MachineBasicBlock &MBB) const {
- unsigned PO = Offset + Size;
- unsigned Align = MBB.getAlignment();
- if (Align == 0)
+ const unsigned PO = Offset + Size;
+ const Align Alignment = MBB.getAlignment();
+ if (Alignment == 1)
return PO;
- unsigned AlignAmt = 1 << Align;
- unsigned ParentAlign = MBB.getParent()->getAlignment();
- if (Align <= ParentAlign)
- return PO + OffsetToAlignment(PO, AlignAmt);
+ const Align ParentAlign = MBB.getParent()->getAlignment();
+ if (Alignment <= ParentAlign)
+ return PO + offsetToAlignment(PO, Alignment);
// The alignment of this MBB is larger than the function's alignment, so we
// can't tell whether or not it will insert nops. Assume that it will.
- return PO + AlignAmt + OffsetToAlignment(PO, AlignAmt);
+ return PO + Alignment.value() + offsetToAlignment(PO, Alignment);
}
};
@@ -128,9 +127,8 @@ void BranchRelaxation::verify() {
#ifndef NDEBUG
unsigned PrevNum = MF->begin()->getNumber();
for (MachineBasicBlock &MBB : *MF) {
- unsigned Align = MBB.getAlignment();
- unsigned Num = MBB.getNumber();
- assert(BlockInfo[Num].Offset % (1u << Align) == 0);
+ const unsigned Num = MBB.getNumber();
+ assert(isAligned(MBB.getAlignment(), BlockInfo[Num].Offset));
assert(!Num || BlockInfo[PrevNum].postOffset(MBB) <= BlockInfo[Num].Offset);
assert(BlockInfo[Num].Size == computeBlockSize(MBB));
PrevNum = Num;
@@ -143,7 +141,7 @@ void BranchRelaxation::verify() {
LLVM_DUMP_METHOD void BranchRelaxation::dumpBBs() {
for (auto &MBB : *MF) {
const BasicBlockInfo &BBI = BlockInfo[MBB.getNumber()];
- dbgs() << format("%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
+ dbgs() << format("%%bb.%u\toffset=%08x\t", MBB.getNumber(), BBI.Offset)
<< format("size=%#x\n", BBI.Size);
}
}
diff --git a/lib/CodeGen/BreakFalseDeps.cpp b/lib/CodeGen/BreakFalseDeps.cpp
index cc4b2caa9bed..709164e5f178 100644
--- a/lib/CodeGen/BreakFalseDeps.cpp
+++ b/lib/CodeGen/BreakFalseDeps.cpp
@@ -9,12 +9,11 @@
/// \file Break False Dependency pass.
///
/// Some instructions have false dependencies which cause unnecessary stalls.
-/// For exmaple, instructions that only write part of a register, and implicitly
-/// need to read the other parts of the register. This may cause unwanted
+/// For example, instructions may write part of a register and implicitly
+/// need to read the other parts of the register. This may cause unwanted
/// stalls preventing otherwise unrelated instructions from executing in
/// parallel in an out-of-order CPU.
-/// This pass is aimed at identifying and avoiding these depepndencies when
-/// possible.
+/// This pass is aimed at identifying and avoiding these dependencies.
//
//===----------------------------------------------------------------------===//
@@ -24,6 +23,7 @@
#include "llvm/CodeGen/RegisterClassInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
@@ -109,7 +109,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
MachineOperand &MO = MI->getOperand(OpIdx);
assert(MO.isUndef() && "Expected undef machine operand");
- unsigned OriginalReg = MO.getReg();
+ Register OriginalReg = MO.getReg();
// Update only undef operands that have reg units that are mapped to one root.
for (MCRegUnitIterator Unit(OriginalReg, TRI); Unit.isValid(); ++Unit) {
@@ -162,7 +162,7 @@ bool BreakFalseDeps::pickBestRegisterForUndef(MachineInstr *MI, unsigned OpIdx,
bool BreakFalseDeps::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx,
unsigned Pref) {
- unsigned reg = MI->getOperand(OpIdx).getReg();
+ Register reg = MI->getOperand(OpIdx).getReg();
unsigned Clearance = RDA->getClearance(MI, reg);
LLVM_DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref);
@@ -178,6 +178,7 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) {
assert(!MI->isDebugInstr() && "Won't process debug values");
// Break dependence on undef uses. Do this before updating LiveRegs below.
+ // This can remove a false dependence with no additional instructions.
unsigned OpNum;
unsigned Pref = TII->getUndefRegClearance(*MI, OpNum, TRI);
if (Pref) {
@@ -189,6 +190,11 @@ void BreakFalseDeps::processDefs(MachineInstr *MI) {
UndefReads.push_back(std::make_pair(MI, OpNum));
}
+ // The code below allows the target to create a new instruction to break the
+ // dependence. That opposes the goal of minimizing size, so bail out now.
+ if (MF->getFunction().hasMinSize())
+ return;
+
const MCInstrDesc &MCID = MI->getDesc();
for (unsigned i = 0,
e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs();
@@ -209,6 +215,11 @@ void BreakFalseDeps::processUndefReads(MachineBasicBlock *MBB) {
if (UndefReads.empty())
return;
+ // The code below allows the target to create a new instruction to break the
+ // dependence. That opposes the goal of minimizing size, so bail out now.
+ if (MF->getFunction().hasMinSize())
+ return;
+
// Collect this block's live out register units.
LiveRegSet.init(*TRI);
// We do not need to care about pristine registers as they are just preserved
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 7164fdfb7886..bf97aaee3665 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -40,7 +40,7 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
MachineRegisterInfo &MRI = MF.getRegInfo();
VirtRegAuxInfo VRAI(MF, LIS, VRM, MLI, MBFI, norm);
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (MRI.reg_nodbg_empty(Reg))
continue;
VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg));
@@ -48,10 +48,11 @@ void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS,
}
// Return the preferred allocation register for reg, given a COPY instruction.
-static unsigned copyHint(const MachineInstr *mi, unsigned reg,
+static Register copyHint(const MachineInstr *mi, unsigned reg,
const TargetRegisterInfo &tri,
const MachineRegisterInfo &mri) {
- unsigned sub, hreg, hsub;
+ unsigned sub, hsub;
+ Register hreg;
if (mi->getOperand(0).getReg() == reg) {
sub = mi->getOperand(0).getSubReg();
hreg = mi->getOperand(1).getReg();
@@ -65,11 +66,11 @@ static unsigned copyHint(const MachineInstr *mi, unsigned reg,
if (!hreg)
return 0;
- if (TargetRegisterInfo::isVirtualRegister(hreg))
- return sub == hsub ? hreg : 0;
+ if (Register::isVirtualRegister(hreg))
+ return sub == hsub ? hreg : Register();
const TargetRegisterClass *rc = mri.getRegClass(reg);
- unsigned CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
+ Register CopiedPReg = (hsub ? tri.getSubReg(hreg, hsub) : hreg);
if (rc->contains(CopiedPReg))
return CopiedPReg;
@@ -112,7 +113,7 @@ static bool isRematerializable(const LiveInterval &LI,
// If the original (pre-splitting) registers match this
// copy came from a split.
- if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
+ if (!Register::isVirtualRegister(Reg) ||
VRM->getOriginal(Reg) != Original)
return false;
@@ -243,7 +244,7 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
// Get allocation hints from copies.
if (!mi->isCopy())
continue;
- unsigned hint = copyHint(mi, li.reg, tri, mri);
+ Register hint = copyHint(mi, li.reg, tri, mri);
if (!hint)
continue;
// Force hweight onto the stack so that x86 doesn't add hidden precision,
@@ -251,8 +252,9 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
//
// FIXME: we probably shouldn't use floats at all.
volatile float hweight = Hint[hint] += weight;
- if (TargetRegisterInfo::isVirtualRegister(hint) || mri.isAllocatable(hint))
- CopyHints.insert(CopyHint(hint, hweight, tri.isPhysicalRegister(hint)));
+ if (Register::isVirtualRegister(hint) || mri.isAllocatable(hint))
+ CopyHints.insert(
+ CopyHint(hint, hweight, Register::isPhysicalRegister(hint)));
}
Hint.clear();
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 497fcb147849..a397039180a4 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -32,7 +32,6 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
TRI(*MF.getSubtarget().getRegisterInfo()), Locs(locs), Context(C) {
// No stack is used.
StackOffset = 0;
- MaxStackArgAlign = 1;
clearByValRegsInfo();
UsedRegs.resize((TRI.getNumRegs()+31)/32);
@@ -41,20 +40,21 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf,
/// Allocate space on the stack large enough to pass an argument by value.
/// The size and alignment information of the argument is encoded in
/// its parameter attribute.
-void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
- MVT LocVT, CCValAssign::LocInfo LocInfo,
- int MinSize, int MinAlign,
- ISD::ArgFlagsTy ArgFlags) {
- unsigned Align = ArgFlags.getByValAlign();
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo, int MinSize,
+ int MinAlignment, ISD::ArgFlagsTy ArgFlags) {
+ Align MinAlign(MinAlignment);
+ Align Alignment(ArgFlags.getByValAlign());
unsigned Size = ArgFlags.getByValSize();
if (MinSize > (int)Size)
Size = MinSize;
- if (MinAlign > (int)Align)
- Align = MinAlign;
- ensureMaxAlignment(Align);
- MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size, Align);
+ if (MinAlign > Alignment)
+ Alignment = MinAlign;
+ ensureMaxAlignment(Alignment);
+ MF.getSubtarget().getTargetLowering()->HandleByVal(this, Size,
+ Alignment.value());
Size = unsigned(alignTo(Size, MinAlign));
- unsigned Offset = AllocateStack(Size, Align);
+ unsigned Offset = AllocateStack(Size, Alignment.value());
addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
}
@@ -90,13 +90,8 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
for (unsigned i = 0; i != NumArgs; ++i) {
MVT ArgVT = Ins[i].VT;
ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
- if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
-#ifndef NDEBUG
- dbgs() << "Formal argument #" << i << " has unhandled type "
- << EVT(ArgVT).getEVTString() << '\n';
-#endif
- llvm_unreachable(nullptr);
- }
+ if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this))
+ report_fatal_error("unable to allocate function argument #" + Twine(i));
}
}
@@ -122,13 +117,8 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
- if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
-#ifndef NDEBUG
- dbgs() << "Return operand #" << i << " has unhandled type "
- << EVT(VT).getEVTString() << '\n';
-#endif
- llvm_unreachable(nullptr);
- }
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ report_fatal_error("unable to allocate function return #" + Twine(i));
}
}
@@ -209,7 +199,7 @@ static bool isValueTypeInRegForCC(CallingConv::ID CC, MVT VT) {
void CCState::getRemainingRegParmsForType(SmallVectorImpl<MCPhysReg> &Regs,
MVT VT, CCAssignFn Fn) {
unsigned SavedStackOffset = StackOffset;
- unsigned SavedMaxStackArgAlign = MaxStackArgAlign;
+ Align SavedMaxStackArgAlign = MaxStackArgAlign;
unsigned NumLocs = Locs.size();
// Set the 'inreg' flag if it is used for this calling convention.
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
index c37ed57781d4..ad9525f927e8 100644
--- a/lib/CodeGen/CodeGen.cpp
+++ b/lib/CodeGen/CodeGen.cpp
@@ -28,6 +28,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeDetectDeadLanesPass(Registry);
initializeDwarfEHPreparePass(Registry);
initializeEarlyIfConverterPass(Registry);
+ initializeEarlyIfPredicatorPass(Registry);
initializeEarlyMachineLICMPass(Registry);
initializeEarlyTailDuplicatePass(Registry);
initializeExpandMemCmpPassPass(Registry);
@@ -53,6 +54,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeLocalStackSlotPassPass(Registry);
initializeLowerIntrinsicsPass(Registry);
initializeMIRCanonicalizerPass(Registry);
+ initializeMIRNamerPass(Registry);
initializeMachineBlockFrequencyInfoPass(Registry);
initializeMachineBlockPlacementPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
@@ -63,10 +65,11 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMachineFunctionPrinterPassPass(Registry);
initializeMachineLICMPass(Registry);
initializeMachineLoopInfoPass(Registry);
- initializeMachineModuleInfoPass(Registry);
+ initializeMachineModuleInfoWrapperPassPass(Registry);
initializeMachineOptimizationRemarkEmitterPassPass(Registry);
initializeMachineOutlinerPass(Registry);
initializeMachinePipelinerPass(Registry);
+ initializeModuloScheduleTestPass(Registry);
initializeMachinePostDominatorTreePass(Registry);
initializeMachineRegionInfoPassPass(Registry);
initializeMachineSchedulerPass(Registry);
diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp
index 52b4bbea012b..fa4432ea23ec 100644
--- a/lib/CodeGen/CodeGenPrepare.cpp
+++ b/lib/CodeGen/CodeGenPrepare.cpp
@@ -344,7 +344,7 @@ class TypePromotionTransaction;
// Get the DominatorTree, building if necessary.
DominatorTree &getDT(Function &F) {
if (!DT)
- DT = llvm::make_unique<DominatorTree>(F);
+ DT = std::make_unique<DominatorTree>(F);
return *DT;
}
@@ -424,7 +424,7 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
TLI = SubtargetInfo->getTargetLowering();
TRI = SubtargetInfo->getRegisterInfo();
}
- TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ TLInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
BPI.reset(new BranchProbabilityInfo(F, *LI));
@@ -1524,7 +1524,7 @@ SinkShiftAndTruncate(BinaryOperator *ShiftI, Instruction *User, ConstantInt *CI,
const TargetLowering &TLI, const DataLayout &DL) {
BasicBlock *UserBB = User->getParent();
DenseMap<BasicBlock *, CastInst *> InsertedTruncs;
- TruncInst *TruncI = dyn_cast<TruncInst>(User);
+ auto *TruncI = cast<TruncInst>(User);
bool MadeChange = false;
for (Value::user_iterator TruncUI = TruncI->user_begin(),
@@ -1682,10 +1682,11 @@ static bool OptimizeExtractBits(BinaryOperator *ShiftI, ConstantInt *CI,
TheUse = InsertedShift;
}
- // If we removed all uses, nuke the shift.
+ // If we removed all uses, or there are none, nuke the shift.
if (ShiftI->use_empty()) {
salvageDebugInfo(*ShiftI);
ShiftI->eraseFromParent();
+ MadeChange = true;
}
return MadeChange;
@@ -1811,7 +1812,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
AllocaInst *AI;
if ((AI = dyn_cast<AllocaInst>(Val)) && AI->getAlignment() < PrefAlign &&
DL->getTypeAllocSize(AI->getAllocatedType()) >= MinSize + Offset2)
- AI->setAlignment(PrefAlign);
+ AI->setAlignment(MaybeAlign(PrefAlign));
// Global variables can only be aligned if they are defined in this
// object (i.e. they are uniquely initialized in this object), and
// over-aligning global variables that have an explicit section is
@@ -1821,7 +1822,7 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
GV->getPointerAlignment(*DL) < PrefAlign &&
DL->getTypeAllocSize(GV->getValueType()) >=
MinSize + Offset2)
- GV->setAlignment(PrefAlign);
+ GV->setAlignment(MaybeAlign(PrefAlign));
}
// If this is a memcpy (or similar) then we may be able to improve the
// alignment
@@ -1867,24 +1868,10 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
});
return true;
}
- case Intrinsic::objectsize: {
- // Lower all uses of llvm.objectsize.*
- Value *RetVal =
- lowerObjectSizeCall(II, *DL, TLInfo, /*MustSucceed=*/true);
-
- resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
- replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
- });
- return true;
- }
- case Intrinsic::is_constant: {
- // If is_constant hasn't folded away yet, lower it to false now.
- Constant *RetVal = ConstantInt::get(II->getType(), 0);
- resetIteratorIfInvalidatedWhileCalling(BB, [&]() {
- replaceAndRecursivelySimplify(CI, RetVal, TLInfo, nullptr);
- });
- return true;
- }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
case Intrinsic::aarch64_stlxr:
case Intrinsic::aarch64_stxr: {
ZExtInst *ExtVal = dyn_cast<ZExtInst>(CI->getArgOperand(0));
@@ -2024,17 +2011,18 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
/// Only dup the ReturnInst if the CallInst is likely to be emitted as a tail
/// call.
const Function *F = BB->getParent();
- SmallVector<CallInst*, 4> TailCalls;
+ SmallVector<BasicBlock*, 4> TailCallBBs;
if (PN) {
for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I) {
// Look through bitcasts.
Value *IncomingVal = PN->getIncomingValue(I)->stripPointerCasts();
CallInst *CI = dyn_cast<CallInst>(IncomingVal);
+ BasicBlock *PredBB = PN->getIncomingBlock(I);
// Make sure the phi value is indeed produced by the tail call.
- if (CI && CI->hasOneUse() && CI->getParent() == PN->getIncomingBlock(I) &&
+ if (CI && CI->hasOneUse() && CI->getParent() == PredBB &&
TLI->mayBeEmittedAsTailCall(CI) &&
attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCalls.push_back(CI);
+ TailCallBBs.push_back(PredBB);
}
} else {
SmallPtrSet<BasicBlock*, 4> VisitedBBs;
@@ -2052,24 +2040,20 @@ bool CodeGenPrepare::dupRetToEnableTailCallOpts(BasicBlock *BB, bool &ModifiedDT
CallInst *CI = dyn_cast<CallInst>(&*RI);
if (CI && CI->use_empty() && TLI->mayBeEmittedAsTailCall(CI) &&
attributesPermitTailCall(F, CI, RetI, *TLI))
- TailCalls.push_back(CI);
+ TailCallBBs.push_back(*PI);
}
}
bool Changed = false;
- for (unsigned i = 0, e = TailCalls.size(); i != e; ++i) {
- CallInst *CI = TailCalls[i];
- CallSite CS(CI);
-
+ for (auto const &TailCallBB : TailCallBBs) {
// Make sure the call instruction is followed by an unconditional branch to
// the return block.
- BasicBlock *CallBB = CI->getParent();
- BranchInst *BI = dyn_cast<BranchInst>(CallBB->getTerminator());
+ BranchInst *BI = dyn_cast<BranchInst>(TailCallBB->getTerminator());
if (!BI || !BI->isUnconditional() || BI->getSuccessor(0) != BB)
continue;
- // Duplicate the return into CallBB.
- (void)FoldReturnIntoUncondBranch(RetI, BB, CallBB);
+ // Duplicate the return into TailCallBB.
+ (void)FoldReturnIntoUncondBranch(RetI, BB, TailCallBB);
ModifiedDT = Changed = true;
++NumRetsDup;
}
@@ -2683,26 +2667,26 @@ private:
void TypePromotionTransaction::setOperand(Instruction *Inst, unsigned Idx,
Value *NewVal) {
- Actions.push_back(llvm::make_unique<TypePromotionTransaction::OperandSetter>(
+ Actions.push_back(std::make_unique<TypePromotionTransaction::OperandSetter>(
Inst, Idx, NewVal));
}
void TypePromotionTransaction::eraseInstruction(Instruction *Inst,
Value *NewVal) {
Actions.push_back(
- llvm::make_unique<TypePromotionTransaction::InstructionRemover>(
+ std::make_unique<TypePromotionTransaction::InstructionRemover>(
Inst, RemovedInsts, NewVal));
}
void TypePromotionTransaction::replaceAllUsesWith(Instruction *Inst,
Value *New) {
Actions.push_back(
- llvm::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
+ std::make_unique<TypePromotionTransaction::UsesReplacer>(Inst, New));
}
void TypePromotionTransaction::mutateType(Instruction *Inst, Type *NewTy) {
Actions.push_back(
- llvm::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
+ std::make_unique<TypePromotionTransaction::TypeMutator>(Inst, NewTy));
}
Value *TypePromotionTransaction::createTrunc(Instruction *Opnd,
@@ -2732,7 +2716,7 @@ Value *TypePromotionTransaction::createZExt(Instruction *Inst,
void TypePromotionTransaction::moveBefore(Instruction *Inst,
Instruction *Before) {
Actions.push_back(
- llvm::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
+ std::make_unique<TypePromotionTransaction::InstructionMoveBefore>(
Inst, Before));
}
@@ -3048,7 +3032,7 @@ public:
To = dyn_cast<PHINode>(OldReplacement);
OldReplacement = Get(From);
}
- assert(Get(To) == To && "Replacement PHI node is already replaced.");
+ assert(To && Get(To) == To && "Replacement PHI node is already replaced.");
Put(From, To);
From->replaceAllUsesWith(To);
AllPhiNodes.erase(From);
@@ -3334,7 +3318,7 @@ private:
// So the values are different and does not match. So we need them to
// match. (But we register no more than one match per PHI node, so that
// we won't later try to replace them twice.)
- if (!MatchedPHIs.insert(FirstPhi).second)
+ if (MatchedPHIs.insert(FirstPhi).second)
Matcher.insert({ FirstPhi, SecondPhi });
// But me must check it.
WorkList.push_back({ FirstPhi, SecondPhi });
@@ -3412,11 +3396,10 @@ private:
Select->setFalseValue(ST.Get(Map[FalseValue]));
} else {
// Must be a Phi node then.
- PHINode *PHI = cast<PHINode>(V);
- auto *CurrentPhi = dyn_cast<PHINode>(Current);
+ auto *PHI = cast<PHINode>(V);
// Fill the Phi node with values from predecessors.
for (auto B : predecessors(PHI->getParent())) {
- Value *PV = CurrentPhi->getIncomingValueForBlock(B);
+ Value *PV = cast<PHINode>(Current)->getIncomingValueForBlock(B);
assert(Map.find(PV) != Map.end() && "No predecessor Value!");
PHI->addIncoming(ST.Get(Map[PV]), B);
}
@@ -3785,13 +3768,11 @@ bool TypePromotionHelper::canGetThrough(const Instruction *Inst,
// poisoned value regular value
// It should be OK since undef covers valid value.
if (Inst->getOpcode() == Instruction::Shl && Inst->hasOneUse()) {
- const Instruction *ExtInst =
- dyn_cast<const Instruction>(*Inst->user_begin());
+ const auto *ExtInst = cast<const Instruction>(*Inst->user_begin());
if (ExtInst->hasOneUse()) {
- const Instruction *AndInst =
- dyn_cast<const Instruction>(*ExtInst->user_begin());
+ const auto *AndInst = dyn_cast<const Instruction>(*ExtInst->user_begin());
if (AndInst && AndInst->getOpcode() == Instruction::And) {
- const ConstantInt *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
+ const auto *Cst = dyn_cast<ConstantInt>(AndInst->getOperand(1));
if (Cst &&
Cst->getValue().isIntN(Inst->getType()->getIntegerBitWidth()))
return true;
@@ -4793,8 +4774,8 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
<< " for " << *MemoryInst << "\n");
if (SunkAddr->getType() != Addr->getType())
SunkAddr = Builder.CreatePointerCast(SunkAddr, Addr->getType());
- } else if (AddrSinkUsingGEPs ||
- (!AddrSinkUsingGEPs.getNumOccurrences() && TM && TTI->useAA())) {
+ } else if (AddrSinkUsingGEPs || (!AddrSinkUsingGEPs.getNumOccurrences() &&
+ TM && SubtargetInfo->addrSinkUsingGEPs())) {
// By default, we use the GEP-based method when AA is used later. This
// prevents new inttoptr/ptrtoint pairs from degrading AA capabilities.
LLVM_DEBUG(dbgs() << "CGP: SINKING nonlocal addrmode: " << AddrMode
@@ -5816,7 +5797,7 @@ bool CodeGenPrepare::optimizeLoadExt(LoadInst *Load) {
return false;
IRBuilder<> Builder(Load->getNextNode());
- auto *NewAnd = dyn_cast<Instruction>(
+ auto *NewAnd = cast<Instruction>(
Builder.CreateAnd(Load, ConstantInt::get(Ctx, DemandBits)));
// Mark this instruction as "inserted by CGP", so that other
// optimizations don't touch it.
@@ -6193,35 +6174,49 @@ bool CodeGenPrepare::tryToSinkFreeOperands(Instruction *I) {
// OpsToSink can contain multiple uses in a use chain (e.g.
// (%u1 with %u1 = shufflevector), (%u2 with %u2 = zext %u1)). The dominating
- // uses must come first, which means they are sunk first, temporarily creating
- // invalid IR. This will be fixed once their dominated users are sunk and
- // updated.
+ // uses must come first, so we process the ops in reverse order so as to not
+ // create invalid IR.
BasicBlock *TargetBB = I->getParent();
bool Changed = false;
SmallVector<Use *, 4> ToReplace;
- for (Use *U : OpsToSink) {
+ for (Use *U : reverse(OpsToSink)) {
auto *UI = cast<Instruction>(U->get());
if (UI->getParent() == TargetBB || isa<PHINode>(UI))
continue;
ToReplace.push_back(U);
}
- SmallPtrSet<Instruction *, 4> MaybeDead;
+ SetVector<Instruction *> MaybeDead;
+ DenseMap<Instruction *, Instruction *> NewInstructions;
+ Instruction *InsertPoint = I;
for (Use *U : ToReplace) {
auto *UI = cast<Instruction>(U->get());
Instruction *NI = UI->clone();
+ NewInstructions[UI] = NI;
MaybeDead.insert(UI);
LLVM_DEBUG(dbgs() << "Sinking " << *UI << " to user " << *I << "\n");
- NI->insertBefore(I);
+ NI->insertBefore(InsertPoint);
+ InsertPoint = NI;
InsertedInsts.insert(NI);
- U->set(NI);
+
+ // Update the use for the new instruction, making sure that we update the
+ // sunk instruction uses, if it is part of a chain that has already been
+ // sunk.
+ Instruction *OldI = cast<Instruction>(U->getUser());
+ if (NewInstructions.count(OldI))
+ NewInstructions[OldI]->setOperand(U->getOperandNo(), NI);
+ else
+ U->set(NI);
Changed = true;
}
// Remove instructions that are dead after sinking.
- for (auto *I : MaybeDead)
- if (!I->hasNUsesOrMore(1))
+ for (auto *I : MaybeDead) {
+ if (!I->hasNUsesOrMore(1)) {
+ LLVM_DEBUG(dbgs() << "Removing dead instruction: " << *I << "\n");
I->eraseFromParent();
+ }
+ }
return Changed;
}
@@ -7106,7 +7101,6 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
for (auto &I : reverse(BB)) {
if (makeBitReverse(I, *DL, *TLI)) {
MadeBitReverse = MadeChange = true;
- ModifiedDT = true;
break;
}
}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 4144c243a341..702e7e244bce 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -187,7 +187,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr &MI) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
const TargetRegisterClass *NewRC = nullptr;
@@ -272,7 +272,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
}
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isDef()) continue;
@@ -303,7 +303,7 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr &MI, unsigned Count) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
if (!MO.isUse()) continue;
@@ -457,6 +457,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency)
Max = SU;
}
+ assert(Max && "Failed to find bottom of the critical path");
#ifndef NDEBUG
{
@@ -612,7 +613,7 @@ BreakAntiDependencies(const std::vector<SUnit> &SUnits,
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) {
AntiDepReg = 0;
diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp
index b99be5d7a87c..a169c3cb16b2 100644
--- a/lib/CodeGen/DFAPacketizer.cpp
+++ b/lib/CodeGen/DFAPacketizer.cpp
@@ -23,6 +23,8 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/DFAPacketizer.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBundle.h"
@@ -71,39 +73,13 @@ static DFAInput getDFAInsnInput(const std::vector<unsigned> &InsnClass) {
// --------------------------------------------------------------------
-DFAPacketizer::DFAPacketizer(const InstrItineraryData *I,
- const DFAStateInput (*SIT)[2],
- const unsigned *SET):
- InstrItins(I), DFAStateInputTable(SIT), DFAStateEntryTable(SET) {
- // Make sure DFA types are large enough for the number of terms & resources.
- static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
- (8 * sizeof(DFAInput)),
- "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
- static_assert(
- (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
- "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
-}
-
-// Read the DFA transition table and update CachedTable.
-//
-// Format of the transition tables:
-// DFAStateInputTable[][2] = pairs of <Input, Transition> for all valid
-// transitions
-// DFAStateEntryTable[i] = Index of the first entry in DFAStateInputTable
-// for the ith state
-//
-void DFAPacketizer::ReadTable(unsigned int state) {
- unsigned ThisState = DFAStateEntryTable[state];
- unsigned NextStateInTable = DFAStateEntryTable[state+1];
- // Early exit in case CachedTable has already contains this
- // state's transitions.
- if (CachedTable.count(UnsignPair(state, DFAStateInputTable[ThisState][0])))
- return;
-
- for (unsigned i = ThisState; i < NextStateInTable; i++)
- CachedTable[UnsignPair(state, DFAStateInputTable[i][0])] =
- DFAStateInputTable[i][1];
-}
+// Make sure DFA types are large enough for the number of terms & resources.
+static_assert((DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <=
+ (8 * sizeof(DFAInput)),
+ "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAInput");
+static_assert(
+ (DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) <= (8 * sizeof(DFAStateInput)),
+ "(DFA_MAX_RESTERMS * DFA_MAX_RESOURCES) too big for DFAStateInput");
// Return the DFAInput for an instruction class.
DFAInput DFAPacketizer::getInsnInput(unsigned InsnClass) {
@@ -129,9 +105,7 @@ DFAInput DFAPacketizer::getInsnInput(const std::vector<unsigned> &InsnClass) {
bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
- UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
- ReadTable(CurrentState);
- return CachedTable.count(StateTrans) != 0;
+ return A.canAdd(InsnInput);
}
// Reserve the resources occupied by a MCInstrDesc and change the current
@@ -139,10 +113,7 @@ bool DFAPacketizer::canReserveResources(const MCInstrDesc *MID) {
void DFAPacketizer::reserveResources(const MCInstrDesc *MID) {
unsigned InsnClass = MID->getSchedClass();
DFAInput InsnInput = getInsnInput(InsnClass);
- UnsignPair StateTrans = UnsignPair(CurrentState, InsnInput);
- ReadTable(CurrentState);
- assert(CachedTable.count(StateTrans) != 0);
- CurrentState = CachedTable[StateTrans];
+ A.add(InsnInput);
}
// Check if the resources occupied by a machine instruction are available
@@ -159,19 +130,33 @@ void DFAPacketizer::reserveResources(MachineInstr &MI) {
reserveResources(&MID);
}
+unsigned DFAPacketizer::getUsedResources(unsigned InstIdx) {
+ ArrayRef<NfaPath> NfaPaths = A.getNfaPaths();
+ assert(!NfaPaths.empty() && "Invalid bundle!");
+ const NfaPath &RS = NfaPaths.front();
+
+ // RS stores the cumulative resources used up to and including the I'th
+ // instruction. The 0th instruction is the base case.
+ if (InstIdx == 0)
+ return RS[0];
+ // Return the difference between the cumulative resources used by InstIdx and
+ // its predecessor.
+ return RS[InstIdx] ^ RS[InstIdx - 1];
+}
+
namespace llvm {
// This class extends ScheduleDAGInstrs and overrides the schedule method
// to build the dependence graph.
class DefaultVLIWScheduler : public ScheduleDAGInstrs {
private:
- AliasAnalysis *AA;
+ AAResults *AA;
/// Ordered list of DAG postprocessing steps.
std::vector<std::unique_ptr<ScheduleDAGMutation>> Mutations;
public:
DefaultVLIWScheduler(MachineFunction &MF, MachineLoopInfo &MLI,
- AliasAnalysis *AA);
+ AAResults *AA);
// Actual scheduling work.
void schedule() override;
@@ -189,7 +174,7 @@ protected:
DefaultVLIWScheduler::DefaultVLIWScheduler(MachineFunction &MF,
MachineLoopInfo &MLI,
- AliasAnalysis *AA)
+ AAResults *AA)
: ScheduleDAGInstrs(MF, &MLI), AA(AA) {
CanHandleTerminators = true;
}
@@ -207,9 +192,10 @@ void DefaultVLIWScheduler::schedule() {
}
VLIWPacketizerList::VLIWPacketizerList(MachineFunction &mf,
- MachineLoopInfo &mli, AliasAnalysis *aa)
+ MachineLoopInfo &mli, AAResults *aa)
: MF(mf), TII(mf.getSubtarget().getInstrInfo()), AA(aa) {
ResourceTracker = TII->CreateTargetScheduleState(MF.getSubtarget());
+ ResourceTracker->setTrackResources(true);
VLIWScheduler = new DefaultVLIWScheduler(MF, mli, AA);
}
@@ -224,8 +210,11 @@ void VLIWPacketizerList::endPacket(MachineBasicBlock *MBB,
LLVM_DEBUG({
if (!CurrentPacketMIs.empty()) {
dbgs() << "Finalizing packet:\n";
- for (MachineInstr *MI : CurrentPacketMIs)
- dbgs() << " * " << *MI;
+ unsigned Idx = 0;
+ for (MachineInstr *MI : CurrentPacketMIs) {
+ unsigned R = ResourceTracker->getUsedResources(Idx++);
+ dbgs() << " * [res:0x" << utohexstr(R) << "] " << *MI;
+ }
}
});
if (CurrentPacketMIs.size() > 1) {
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 049ce7063307..9a537c859a67 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -75,8 +75,8 @@ bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
// Don't delete live physreg defs, or any reserved register defs.
if (LivePhysRegs.test(Reg) || MRI->isReserved(Reg))
return false;
@@ -140,8 +140,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
// Check the subreg set, not the alias set, because a def
// of a super-register may still be partially live after
// this def.
@@ -159,8 +159,8 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isUse()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
LivePhysRegs.set(*AI);
}
diff --git a/lib/CodeGen/DetectDeadLanes.cpp b/lib/CodeGen/DetectDeadLanes.cpp
index fe78acf4d80a..6d5306c1dc0c 100644
--- a/lib/CodeGen/DetectDeadLanes.cpp
+++ b/lib/CodeGen/DetectDeadLanes.cpp
@@ -154,7 +154,7 @@ static bool isCrossCopy(const MachineRegisterInfo &MRI,
const TargetRegisterClass *DstRC,
const MachineOperand &MO) {
assert(lowersToCopies(MI));
- unsigned SrcReg = MO.getReg();
+ Register SrcReg = MO.getReg();
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
if (DstRC == SrcRC)
return false;
@@ -194,8 +194,8 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
LaneBitmask UsedLanes) {
if (!MO.readsReg())
return;
- unsigned MOReg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(MOReg))
+ Register MOReg = MO.getReg();
+ if (!Register::isVirtualRegister(MOReg))
return;
unsigned MOSubReg = MO.getSubReg();
@@ -203,7 +203,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
UsedLanes = TRI->composeSubRegIndexLaneMask(MOSubReg, UsedLanes);
UsedLanes &= MRI->getMaxLaneMaskForVReg(MOReg);
- unsigned MORegIdx = TargetRegisterInfo::virtReg2Index(MOReg);
+ unsigned MORegIdx = Register::virtReg2Index(MOReg);
VRegInfo &MORegInfo = VRegInfos[MORegIdx];
LaneBitmask PrevUsedLanes = MORegInfo.UsedLanes;
// Any change at all?
@@ -219,7 +219,7 @@ void DetectDeadLanes::addUsedLanesOnOperand(const MachineOperand &MO,
void DetectDeadLanes::transferUsedLanesStep(const MachineInstr &MI,
LaneBitmask UsedLanes) {
for (const MachineOperand &MO : MI.uses()) {
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
LaneBitmask UsedOnMO = transferUsedLanes(MI, UsedLanes, MO);
addUsedLanesOnOperand(MO, UsedOnMO);
@@ -230,8 +230,8 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
LaneBitmask UsedLanes,
const MachineOperand &MO) const {
unsigned OpNum = MI.getOperandNo(&MO);
- assert(lowersToCopies(MI) && DefinedByCopy[
- TargetRegisterInfo::virtReg2Index(MI.getOperand(0).getReg())]);
+ assert(lowersToCopies(MI) &&
+ DefinedByCopy[Register::virtReg2Index(MI.getOperand(0).getReg())]);
switch (MI.getOpcode()) {
case TargetOpcode::COPY:
@@ -250,7 +250,7 @@ LaneBitmask DetectDeadLanes::transferUsedLanes(const MachineInstr &MI,
return MO2UsedLanes;
const MachineOperand &Def = MI.getOperand(0);
- unsigned DefReg = Def.getReg();
+ Register DefReg = Def.getReg();
const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
LaneBitmask MO1UsedLanes;
if (RC->CoveredBySubRegs)
@@ -285,10 +285,10 @@ void DetectDeadLanes::transferDefinedLanesStep(const MachineOperand &Use,
if (MI.getOpcode() == TargetOpcode::PATCHPOINT)
return;
const MachineOperand &Def = *MI.defs().begin();
- unsigned DefReg = Def.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ Register DefReg = Def.getReg();
+ if (!Register::isVirtualRegister(DefReg))
return;
- unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
+ unsigned DefRegIdx = Register::virtReg2Index(DefReg);
if (!DefinedByCopy.test(DefRegIdx))
return;
@@ -360,7 +360,7 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
if (lowersToCopies(DefMI)) {
// Start optimisatically with no used or defined lanes for copy
// instructions. The following dataflow analysis will add more bits.
- unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ unsigned RegIdx = Register::virtReg2Index(Reg);
DefinedByCopy.set(RegIdx);
PutInWorklist(RegIdx);
@@ -377,17 +377,17 @@ LaneBitmask DetectDeadLanes::determineInitialDefinedLanes(unsigned Reg) {
for (const MachineOperand &MO : DefMI.uses()) {
if (!MO.isReg() || !MO.readsReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
LaneBitmask MODefinedLanes;
- if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ if (Register::isPhysicalRegister(MOReg)) {
MODefinedLanes = LaneBitmask::getAll();
} else if (isCrossCopy(*MRI, DefMI, DefRC, MO)) {
MODefinedLanes = LaneBitmask::getAll();
} else {
- assert(TargetRegisterInfo::isVirtualRegister(MOReg));
+ assert(Register::isVirtualRegister(MOReg));
if (MRI->hasOneDef(MOReg)) {
const MachineOperand &MODef = *MRI->def_begin(MOReg);
const MachineInstr &MODefMI = *MODef.getParent();
@@ -428,10 +428,10 @@ LaneBitmask DetectDeadLanes::determineInitialUsedLanes(unsigned Reg) {
if (lowersToCopies(UseMI)) {
assert(UseMI.getDesc().getNumDefs() == 1);
const MachineOperand &Def = *UseMI.defs().begin();
- unsigned DefReg = Def.getReg();
+ Register DefReg = Def.getReg();
// The used lanes of COPY-like instruction operands are determined by the
// following dataflow analysis.
- if (TargetRegisterInfo::isVirtualRegister(DefReg)) {
+ if (Register::isVirtualRegister(DefReg)) {
// But ignore copies across incompatible register classes.
bool CrossCopy = false;
if (lowersToCopies(UseMI)) {
@@ -470,10 +470,10 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
if (!lowersToCopies(MI))
return false;
const MachineOperand &Def = MI.getOperand(0);
- unsigned DefReg = Def.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ Register DefReg = Def.getReg();
+ if (!Register::isVirtualRegister(DefReg))
return false;
- unsigned DefRegIdx = TargetRegisterInfo::virtReg2Index(DefReg);
+ unsigned DefRegIdx = Register::virtReg2Index(DefReg);
if (!DefinedByCopy.test(DefRegIdx))
return false;
@@ -482,8 +482,8 @@ bool DetectDeadLanes::isUndefInput(const MachineOperand &MO,
if (UsedLanes.any())
return false;
- unsigned MOReg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(MOReg)) {
+ Register MOReg = MO.getReg();
+ if (Register::isVirtualRegister(MOReg)) {
const TargetRegisterClass *DstRC = MRI->getRegClass(DefReg);
*CrossCopy = isCrossCopy(*MRI, MI, DstRC, MO);
}
@@ -494,7 +494,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
// First pass: Populate defs/uses of vregs with initial values
unsigned NumVirtRegs = MRI->getNumVirtRegs();
for (unsigned RegIdx = 0; RegIdx < NumVirtRegs; ++RegIdx) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ unsigned Reg = Register::index2VirtReg(RegIdx);
// Determine used/defined lanes and add copy instructions to worklist.
VRegInfo &Info = VRegInfos[RegIdx];
@@ -508,7 +508,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
Worklist.pop_front();
WorklistMembers.reset(RegIdx);
VRegInfo &Info = VRegInfos[RegIdx];
- unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ unsigned Reg = Register::index2VirtReg(RegIdx);
// Transfer UsedLanes to operands of DefMI (backwards dataflow).
MachineOperand &Def = *MRI->def_begin(Reg);
@@ -522,7 +522,7 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "Defined/Used lanes:\n"; for (unsigned RegIdx = 0;
RegIdx < NumVirtRegs;
++RegIdx) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(RegIdx);
+ unsigned Reg = Register::index2VirtReg(RegIdx);
const VRegInfo &Info = VRegInfos[RegIdx];
dbgs() << printReg(Reg, nullptr)
<< " Used: " << PrintLaneMask(Info.UsedLanes)
@@ -536,10 +536,10 @@ bool DetectDeadLanes::runOnce(MachineFunction &MF) {
for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
- unsigned RegIdx = TargetRegisterInfo::virtReg2Index(Reg);
+ unsigned RegIdx = Register::virtReg2Index(Reg);
const VRegInfo &RegInfo = VRegInfos[RegIdx];
if (MO.isDef() && !MO.isDead() && RegInfo.UsedLanes.none()) {
LLVM_DEBUG(dbgs()
diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp
index 0a83760befaa..e5694218b5c3 100644
--- a/lib/CodeGen/EarlyIfConversion.cpp
+++ b/lib/CodeGen/EarlyIfConversion.cpp
@@ -25,6 +25,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineTraceMetrics.h"
@@ -140,6 +141,18 @@ private:
/// speculated.
bool canSpeculateInstrs(MachineBasicBlock *MBB);
+ /// Return true if all non-terminator instructions in MBB can be safely
+ /// predicated.
+ bool canPredicateInstrs(MachineBasicBlock *MBB);
+
+ /// Scan through instruction dependencies and update InsertAfter array.
+ /// Return false if any dependency is incompatible with if conversion.
+ bool InstrDependenciesAllowIfConv(MachineInstr *I);
+
+ /// Predicate all instructions of the basic block with current condition
+ /// except for terminators. Reverse the condition if ReversePredicate is set.
+ void PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate);
+
/// Find a valid insertion point in Head.
bool findInsertionPoint();
@@ -163,11 +176,14 @@ public:
/// canConvertIf - If the sub-CFG headed by MBB can be if-converted,
/// initialize the internal state, and return true.
- bool canConvertIf(MachineBasicBlock *MBB);
+ /// If predicate is set try to predicate the block otherwise try to
+ /// speculatively execute it.
+ bool canConvertIf(MachineBasicBlock *MBB, bool Predicate = false);
/// convertIf - If-convert the last block passed to canConvertIf(), assuming
/// it is possible. Add any erased blocks to RemovedBlocks.
- void convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks);
+ void convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
+ bool Predicate = false);
};
} // end anonymous namespace
@@ -225,37 +241,112 @@ bool SSAIfConv::canSpeculateInstrs(MachineBasicBlock *MBB) {
}
// Check for any dependencies on Head instructions.
- for (const MachineOperand &MO : I->operands()) {
- if (MO.isRegMask()) {
- LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);
- return false;
- }
- if (!MO.isReg())
- continue;
- unsigned Reg = MO.getReg();
+ if (!InstrDependenciesAllowIfConv(&(*I)))
+ return false;
+ }
+ return true;
+}
- // Remember clobbered regunits.
- if (MO.isDef() && TargetRegisterInfo::isPhysicalRegister(Reg))
- for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
- ClobberedRegUnits.set(*Units);
+/// Check that there is no dependencies preventing if conversion.
+///
+/// If instruction uses any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+bool SSAIfConv::InstrDependenciesAllowIfConv(MachineInstr *I) {
+ for (const MachineOperand &MO : I->operands()) {
+ if (MO.isRegMask()) {
+ LLVM_DEBUG(dbgs() << "Won't speculate regmask: " << *I);
+ return false;
+ }
+ if (!MO.isReg())
+ continue;
+ Register Reg = MO.getReg();
- if (!MO.readsReg() || !TargetRegisterInfo::isVirtualRegister(Reg))
- continue;
- MachineInstr *DefMI = MRI->getVRegDef(Reg);
- if (!DefMI || DefMI->getParent() != Head)
- continue;
- if (InsertAfter.insert(DefMI).second)
- LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " depends on "
- << *DefMI);
- if (DefMI->isTerminator()) {
- LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
- return false;
- }
+ // Remember clobbered regunits.
+ if (MO.isDef() && Register::isPhysicalRegister(Reg))
+ for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
+ ClobberedRegUnits.set(*Units);
+
+ if (!MO.readsReg() || !Register::isVirtualRegister(Reg))
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (!DefMI || DefMI->getParent() != Head)
+ continue;
+ if (InsertAfter.insert(DefMI).second)
+ LLVM_DEBUG(dbgs() << printMBBReference(*I->getParent()) << " depends on "
+ << *DefMI);
+ if (DefMI->isTerminator()) {
+ LLVM_DEBUG(dbgs() << "Can't insert instructions below terminator.\n");
+ return false;
}
}
return true;
}
+/// canPredicateInstrs - Returns true if all the instructions in MBB can safely
+/// be predicates. The terminators are not considered.
+///
+/// If instructions use any values that are defined in the head basic block,
+/// the defining instructions are added to InsertAfter.
+///
+/// Any clobbered regunits are added to ClobberedRegUnits.
+///
+bool SSAIfConv::canPredicateInstrs(MachineBasicBlock *MBB) {
+ // Reject any live-in physregs. It's probably CPSR/EFLAGS, and very hard to
+ // get right.
+ if (!MBB->livein_empty()) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has live-ins.\n");
+ return false;
+ }
+
+ unsigned InstrCount = 0;
+
+ // Check all instructions, except the terminators. It is assumed that
+ // terminators never have side effects or define any used register values.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator();
+ I != E; ++I) {
+ if (I->isDebugInstr())
+ continue;
+
+ if (++InstrCount > BlockInstrLimit && !Stress) {
+ LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has more than "
+ << BlockInstrLimit << " instructions.\n");
+ return false;
+ }
+
+ // There shouldn't normally be any phis in a single-predecessor block.
+ if (I->isPHI()) {
+ LLVM_DEBUG(dbgs() << "Can't predicate: " << *I);
+ return false;
+ }
+
+ // Check that instruction is predicable and that it is not already
+ // predicated.
+ if (!TII->isPredicable(*I) || TII->isPredicated(*I)) {
+ return false;
+ }
+
+ // Check for any dependencies on Head instructions.
+ if (!InstrDependenciesAllowIfConv(&(*I)))
+ return false;
+ }
+ return true;
+}
+
+// Apply predicate to all instructions in the machine block.
+void SSAIfConv::PredicateBlock(MachineBasicBlock *MBB, bool ReversePredicate) {
+ auto Condition = Cond;
+ if (ReversePredicate)
+ TII->reverseBranchCondition(Condition);
+ // Terminators don't need to be predicated as they will be removed.
+ for (MachineBasicBlock::iterator I = MBB->begin(),
+ E = MBB->getFirstTerminator();
+ I != E; ++I) {
+ if (I->isDebugInstr())
+ continue;
+ TII->PredicateInstruction(*I, Condition);
+ }
+}
/// Find an insertion point in Head for the speculated instructions. The
/// insertion point must be:
@@ -288,8 +379,8 @@ bool SSAIfConv::findInsertionPoint() {
// We're ignoring regmask operands. That is conservatively correct.
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
// I clobbers Reg, so it isn't live before I.
if (MO.isDef())
@@ -337,7 +428,7 @@ bool SSAIfConv::findInsertionPoint() {
/// canConvertIf - analyze the sub-cfg rooted in MBB, and return true if it is
/// a potential candidate for if-conversion. Fill out the internal state.
///
-bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
+bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB, bool Predicate) {
Head = MBB;
TBB = FBB = Tail = nullptr;
@@ -378,8 +469,9 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
}
// This is a triangle or a diamond.
- // If Tail doesn't have any phis, there must be side effects.
- if (Tail->empty() || !Tail->front().isPHI()) {
+ // Skip if we cannot predicate and there are no phis skip as there must be
+ // side effects that can only be handled with predication.
+ if (!Predicate && (Tail->empty() || !Tail->front().isPHI())) {
LLVM_DEBUG(dbgs() << "No phis in tail.\n");
return false;
}
@@ -423,8 +515,8 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
if (PI.PHI->getOperand(i+1).getMBB() == FPred)
PI.FReg = PI.PHI->getOperand(i).getReg();
}
- assert(TargetRegisterInfo::isVirtualRegister(PI.TReg) && "Bad PHI");
- assert(TargetRegisterInfo::isVirtualRegister(PI.FReg) && "Bad PHI");
+ assert(Register::isVirtualRegister(PI.TReg) && "Bad PHI");
+ assert(Register::isVirtualRegister(PI.FReg) && "Bad PHI");
// Get target information.
if (!TII->canInsertSelect(*Head, Cond, PI.TReg, PI.FReg,
@@ -437,10 +529,17 @@ bool SSAIfConv::canConvertIf(MachineBasicBlock *MBB) {
// Check that the conditional instructions can be speculated.
InsertAfter.clear();
ClobberedRegUnits.reset();
- if (TBB != Tail && !canSpeculateInstrs(TBB))
- return false;
- if (FBB != Tail && !canSpeculateInstrs(FBB))
- return false;
+ if (Predicate) {
+ if (TBB != Tail && !canPredicateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canPredicateInstrs(FBB))
+ return false;
+ } else {
+ if (TBB != Tail && !canSpeculateInstrs(TBB))
+ return false;
+ if (FBB != Tail && !canSpeculateInstrs(FBB))
+ return false;
+ }
// Try to find a valid insertion point for the speculated instructions in the
// head basic block.
@@ -467,7 +566,7 @@ void SSAIfConv::replacePHIInstrs() {
for (unsigned i = 0, e = PHIs.size(); i != e; ++i) {
PHIInfo &PI = PHIs[i];
LLVM_DEBUG(dbgs() << "If-converting " << *PI.PHI);
- unsigned DstReg = PI.PHI->getOperand(0).getReg();
+ Register DstReg = PI.PHI->getOperand(0).getReg();
TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg);
LLVM_DEBUG(dbgs() << " --> " << *std::prev(FirstTerm));
PI.PHI->eraseFromParent();
@@ -494,7 +593,7 @@ void SSAIfConv::rewritePHIOperands() {
// equal.
DstReg = PI.TReg;
} else {
- unsigned PHIDst = PI.PHI->getOperand(0).getReg();
+ Register PHIDst = PI.PHI->getOperand(0).getReg();
DstReg = MRI->createVirtualRegister(MRI->getRegClass(PHIDst));
TII->insertSelect(*Head, FirstTerm, HeadDL,
DstReg, Cond, PI.TReg, PI.FReg);
@@ -521,7 +620,8 @@ void SSAIfConv::rewritePHIOperands() {
///
/// Any basic blocks erased will be added to RemovedBlocks.
///
-void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
+void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock *> &RemovedBlocks,
+ bool Predicate) {
assert(Head && Tail && TBB && FBB && "Call canConvertIf first.");
// Update statistics.
@@ -531,11 +631,16 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
++NumDiamondsConv;
// Move all instructions into Head, except for the terminators.
- if (TBB != Tail)
+ if (TBB != Tail) {
+ if (Predicate)
+ PredicateBlock(TBB, /*ReversePredicate=*/false);
Head->splice(InsertionPoint, TBB, TBB->begin(), TBB->getFirstTerminator());
- if (FBB != Tail)
+ }
+ if (FBB != Tail) {
+ if (Predicate)
+ PredicateBlock(FBB, /*ReversePredicate=*/true);
Head->splice(InsertionPoint, FBB, FBB->begin(), FBB->getFirstTerminator());
-
+ }
// Are there extra Tail predecessors?
bool ExtraPreds = Tail->pred_size() != 2;
if (ExtraPreds)
@@ -587,7 +692,6 @@ void SSAIfConv::convertIf(SmallVectorImpl<MachineBasicBlock*> &RemovedBlocks) {
LLVM_DEBUG(dbgs() << *Head);
}
-
//===----------------------------------------------------------------------===//
// EarlyIfConverter Pass
//===----------------------------------------------------------------------===//
@@ -613,8 +717,6 @@ public:
private:
bool tryConvertIf(MachineBasicBlock*);
- void updateDomTree(ArrayRef<MachineBasicBlock*> Removed);
- void updateLoops(ArrayRef<MachineBasicBlock*> Removed);
void invalidateTraces();
bool shouldConvertIf();
};
@@ -642,32 +744,36 @@ void EarlyIfConverter::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+namespace {
/// Update the dominator tree after if-conversion erased some blocks.
-void EarlyIfConverter::updateDomTree(ArrayRef<MachineBasicBlock*> Removed) {
+void updateDomTree(MachineDominatorTree *DomTree, const SSAIfConv &IfConv,
+ ArrayRef<MachineBasicBlock *> Removed) {
// convertIf can remove TBB, FBB, and Tail can be merged into Head.
// TBB and FBB should not dominate any blocks.
// Tail children should be transferred to Head.
MachineDomTreeNode *HeadNode = DomTree->getNode(IfConv.Head);
- for (unsigned i = 0, e = Removed.size(); i != e; ++i) {
- MachineDomTreeNode *Node = DomTree->getNode(Removed[i]);
+ for (auto B : Removed) {
+ MachineDomTreeNode *Node = DomTree->getNode(B);
assert(Node != HeadNode && "Cannot erase the head node");
while (Node->getNumChildren()) {
assert(Node->getBlock() == IfConv.Tail && "Unexpected children");
DomTree->changeImmediateDominator(Node->getChildren().back(), HeadNode);
}
- DomTree->eraseNode(Removed[i]);
+ DomTree->eraseNode(B);
}
}
/// Update LoopInfo after if-conversion.
-void EarlyIfConverter::updateLoops(ArrayRef<MachineBasicBlock*> Removed) {
+void updateLoops(MachineLoopInfo *Loops,
+ ArrayRef<MachineBasicBlock *> Removed) {
if (!Loops)
return;
// If-conversion doesn't change loop structure, and it doesn't mess with back
// edges, so updating LoopInfo is simply removing the dead blocks.
- for (unsigned i = 0, e = Removed.size(); i != e; ++i)
- Loops->removeBlock(Removed[i]);
+ for (auto B : Removed)
+ Loops->removeBlock(B);
}
+} // namespace
/// Invalidate MachineTraceMetrics before if-conversion.
void EarlyIfConverter::invalidateTraces() {
@@ -783,8 +889,8 @@ bool EarlyIfConverter::tryConvertIf(MachineBasicBlock *MBB) {
SmallVector<MachineBasicBlock*, 4> RemovedBlocks;
IfConv.convertIf(RemovedBlocks);
Changed = true;
- updateDomTree(RemovedBlocks);
- updateLoops(RemovedBlocks);
+ updateDomTree(DomTree, IfConv, RemovedBlocks);
+ updateLoops(Loops, RemovedBlocks);
}
return Changed;
}
@@ -822,3 +928,132 @@ bool EarlyIfConverter::runOnMachineFunction(MachineFunction &MF) {
return Changed;
}
+
+//===----------------------------------------------------------------------===//
+// EarlyIfPredicator Pass
+//===----------------------------------------------------------------------===//
+
+namespace {
+class EarlyIfPredicator : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ TargetSchedModel SchedModel;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ SSAIfConv IfConv;
+
+public:
+ static char ID;
+ EarlyIfPredicator() : MachineFunctionPass(ID) {}
+ void getAnalysisUsage(AnalysisUsage &AU) const override;
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ StringRef getPassName() const override { return "Early If-predicator"; }
+
+protected:
+ bool tryConvertIf(MachineBasicBlock *);
+ bool shouldConvertIf();
+};
+} // end anonymous namespace
+
+#undef DEBUG_TYPE
+#define DEBUG_TYPE "early-if-predicator"
+
+char EarlyIfPredicator::ID = 0;
+char &llvm::EarlyIfPredicatorID = EarlyIfPredicator::ID;
+
+INITIALIZE_PASS_BEGIN(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(EarlyIfPredicator, DEBUG_TYPE, "Early If Predicator", false,
+ false)
+
+void EarlyIfPredicator::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Apply the target heuristic to decide if the transformation is profitable.
+bool EarlyIfPredicator::shouldConvertIf() {
+ if (IfConv.isTriangle()) {
+ MachineBasicBlock &IfBlock =
+ (IfConv.TBB == IfConv.Tail) ? *IfConv.FBB : *IfConv.TBB;
+
+ unsigned ExtraPredCost = 0;
+ unsigned Cycles = 0;
+ for (MachineInstr &I : IfBlock) {
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ Cycles += NumCycles - 1;
+ ExtraPredCost += TII->getPredicationCost(I);
+ }
+
+ return TII->isProfitableToIfCvt(IfBlock, Cycles, ExtraPredCost,
+ BranchProbability::getUnknown());
+ }
+ unsigned TExtra = 0;
+ unsigned FExtra = 0;
+ unsigned TCycle = 0;
+ unsigned FCycle = 0;
+ for (MachineInstr &I : *IfConv.TBB) {
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ TCycle += NumCycles - 1;
+ TExtra += TII->getPredicationCost(I);
+ }
+ for (MachineInstr &I : *IfConv.FBB) {
+ unsigned NumCycles = SchedModel.computeInstrLatency(&I, false);
+ if (NumCycles > 1)
+ FCycle += NumCycles - 1;
+ FExtra += TII->getPredicationCost(I);
+ }
+ return TII->isProfitableToIfCvt(*IfConv.TBB, TCycle, TExtra, *IfConv.FBB,
+ FCycle, FExtra,
+ BranchProbability::getUnknown());
+}
+
+/// Attempt repeated if-conversion on MBB, return true if successful.
+///
+bool EarlyIfPredicator::tryConvertIf(MachineBasicBlock *MBB) {
+ bool Changed = false;
+ while (IfConv.canConvertIf(MBB, /*Predicate*/ true) && shouldConvertIf()) {
+ // If-convert MBB and update analyses.
+ SmallVector<MachineBasicBlock *, 4> RemovedBlocks;
+ IfConv.convertIf(RemovedBlocks, /*Predicate*/ true);
+ Changed = true;
+ updateDomTree(DomTree, IfConv, RemovedBlocks);
+ updateLoops(Loops, RemovedBlocks);
+ }
+ return Changed;
+}
+
+bool EarlyIfPredicator::runOnMachineFunction(MachineFunction &MF) {
+ LLVM_DEBUG(dbgs() << "********** EARLY IF-PREDICATOR **********\n"
+ << "********** Function: " << MF.getName() << '\n');
+ if (skipFunction(MF.getFunction()))
+ return false;
+
+ const TargetSubtargetInfo &STI = MF.getSubtarget();
+ TII = STI.getInstrInfo();
+ TRI = STI.getRegisterInfo();
+ MRI = &MF.getRegInfo();
+ SchedModel.init(&STI);
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ Loops = getAnalysisIfAvailable<MachineLoopInfo>();
+
+ bool Changed = false;
+ IfConv.runOnMachineFunction(MF);
+
+ // Visit blocks in dominator tree post-order. The post-order enables nested
+ // if-conversion in a single pass. The tryConvertIf() function may erase
+ // blocks, but only blocks dominated by the head block. This makes it safe to
+ // update the dominator tree while the post-order iterator is still active.
+ for (auto DomNode : post_order(DomTree))
+ if (tryConvertIf(DomNode->getBlock()))
+ Changed = true;
+
+ return Changed;
+}
diff --git a/lib/CodeGen/ExecutionDomainFix.cpp b/lib/CodeGen/ExecutionDomainFix.cpp
index a2dd5eee33b7..2cca05ea6f55 100644
--- a/lib/CodeGen/ExecutionDomainFix.cpp
+++ b/lib/CodeGen/ExecutionDomainFix.cpp
@@ -9,6 +9,7 @@
#include "llvm/CodeGen/ExecutionDomainFix.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/lib/CodeGen/ExpandMemCmp.cpp b/lib/CodeGen/ExpandMemCmp.cpp
index b425482e6adf..9916f2de0414 100644
--- a/lib/CodeGen/ExpandMemCmp.cpp
+++ b/lib/CodeGen/ExpandMemCmp.cpp
@@ -795,7 +795,7 @@ public:
TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
const TargetLibraryInfo *TLI =
- &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
const TargetTransformInfo *TTI =
&getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto PA = runImpl(F, TLI, TTI, TL);
diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp
index 0ab70aff7dc4..1fc57fac1489 100644
--- a/lib/CodeGen/ExpandPostRAPseudos.cpp
+++ b/lib/CodeGen/ExpandPostRAPseudos.cpp
@@ -79,17 +79,17 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) {
(MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) &&
MI->getOperand(3).isImm() && "Invalid subreg_to_reg");
- unsigned DstReg = MI->getOperand(0).getReg();
- unsigned InsReg = MI->getOperand(2).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
+ Register InsReg = MI->getOperand(2).getReg();
assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?");
unsigned SubIdx = MI->getOperand(3).getImm();
assert(SubIdx != 0 && "Invalid index for insert_subreg");
- unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx);
+ Register DstSubReg = TRI->getSubReg(DstReg, SubIdx);
- assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ assert(Register::isPhysicalRegister(DstReg) &&
"Insert destination must be in a physical register");
- assert(TargetRegisterInfo::isPhysicalRegister(InsReg) &&
+ assert(Register::isPhysicalRegister(InsReg) &&
"Inserted value must be in a physical register");
LLVM_DEBUG(dbgs() << "subreg: CONVERTING: " << *MI);
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 9c53550eaa9d..c1d22ef89195 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -72,7 +72,7 @@ GCFunctionInfo &GCModuleInfo::getFunctionInfo(const Function &F) {
return *I->second;
GCStrategy *S = getGCStrategy(F.getGC());
- Functions.push_back(llvm::make_unique<GCFunctionInfo>(F, *S));
+ Functions.push_back(std::make_unique<GCFunctionInfo>(F, *S));
GCFunctionInfo *GFI = Functions.back().get();
FInfoMap[&F] = GFI;
return *GFI;
diff --git a/lib/CodeGen/GCRootLowering.cpp b/lib/CodeGen/GCRootLowering.cpp
index 90571d090bfb..0dc0a5bce747 100644
--- a/lib/CodeGen/GCRootLowering.cpp
+++ b/lib/CodeGen/GCRootLowering.cpp
@@ -249,7 +249,7 @@ GCMachineCodeAnalysis::GCMachineCodeAnalysis() : MachineFunctionPass(ID) {}
void GCMachineCodeAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
AU.setPreservesAll();
- AU.addRequired<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
AU.addRequired<GCModuleInfo>();
}
@@ -310,7 +310,7 @@ bool GCMachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
return false;
FI = &getAnalysis<GCModuleInfo>().getFunctionInfo(MF.getFunction());
- MMI = &getAnalysis<MachineModuleInfo>();
+ MMI = &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
TII = MF.getSubtarget().getInstrInfo();
// Find the size of the stack frame. There may be no correct static frame
diff --git a/lib/CodeGen/GlobalISel/CSEInfo.cpp b/lib/CodeGen/GlobalISel/CSEInfo.cpp
index 4518dbee1a9f..7d9d812d34bc 100644
--- a/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -52,6 +52,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
case TargetOpcode::G_ANYEXT:
case TargetOpcode::G_UNMERGE_VALUES:
case TargetOpcode::G_TRUNC:
+ case TargetOpcode::G_GEP:
return true;
}
return false;
@@ -65,9 +66,9 @@ std::unique_ptr<CSEConfigBase>
llvm::getStandardCSEConfigForOpt(CodeGenOpt::Level Level) {
std::unique_ptr<CSEConfigBase> Config;
if (Level == CodeGenOpt::None)
- Config = make_unique<CSEConfigConstantOnly>();
+ Config = std::make_unique<CSEConfigConstantOnly>();
else
- Config = make_unique<CSEConfigFull>();
+ Config = std::make_unique<CSEConfigFull>();
return Config;
}
@@ -332,7 +333,7 @@ GISelInstProfileBuilder::addNodeIDFlag(unsigned Flag) const {
const GISelInstProfileBuilder &GISelInstProfileBuilder::addNodeIDMachineOperand(
const MachineOperand &MO) const {
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!MO.isDef())
addNodeIDRegNum(Reg);
LLT Ty = MRI.getType(Reg);
diff --git a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 461bc6038c2c..51a74793f029 100644
--- a/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -162,6 +162,17 @@ MachineInstrBuilder CSEMIRBuilder::buildInstr(unsigned Opc,
return buildConstant(DstOps[0], Cst->getSExtValue());
break;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ assert(DstOps.size() == 1 && "Invalid dst ops");
+ assert(SrcOps.size() == 2 && "Invalid src ops");
+ const DstOp &Dst = DstOps[0];
+ const SrcOp &Src0 = SrcOps[0];
+ const SrcOp &Src1 = SrcOps[1];
+ if (auto MaybeCst =
+ ConstantFoldExtOp(Opc, Src0.getReg(), Src1.getImm(), *getMRI()))
+ return buildConstant(Dst, MaybeCst->getSExtValue());
+ break;
+ }
}
bool CanCopy = checkCopyToDefsPossible(DstOps);
if (!canPerformCSEForOpc(Opc))
diff --git a/lib/CodeGen/GlobalISel/CallLowering.cpp b/lib/CodeGen/GlobalISel/CallLowering.cpp
index a5d8205a34a8..cdad92f7db4f 100644
--- a/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -11,14 +11,16 @@
///
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/GlobalISel/CallLowering.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#define DEBUG_TYPE "call-lowering"
@@ -32,66 +34,70 @@ bool CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, ImmutableCallSite CS,
ArrayRef<ArrayRef<Register>> ArgRegs,
Register SwiftErrorVReg,
std::function<unsigned()> GetCalleeReg) const {
+ CallLoweringInfo Info;
auto &DL = CS.getParent()->getParent()->getParent()->getDataLayout();
// First step is to marshall all the function's parameters into the correct
// physregs and memory locations. Gather the sequence of argument types that
// we'll pass to the assigner function.
- SmallVector<ArgInfo, 8> OrigArgs;
unsigned i = 0;
unsigned NumFixedArgs = CS.getFunctionType()->getNumParams();
for (auto &Arg : CS.args()) {
ArgInfo OrigArg{ArgRegs[i], Arg->getType(), ISD::ArgFlagsTy{},
i < NumFixedArgs};
setArgFlags(OrigArg, i + AttributeList::FirstArgIndex, DL, CS);
- // We don't currently support swiftself args.
- if (OrigArg.Flags.isSwiftSelf())
- return false;
- OrigArgs.push_back(OrigArg);
+ Info.OrigArgs.push_back(OrigArg);
++i;
}
- MachineOperand Callee = MachineOperand::CreateImm(0);
if (const Function *F = CS.getCalledFunction())
- Callee = MachineOperand::CreateGA(F, 0);
+ Info.Callee = MachineOperand::CreateGA(F, 0);
else
- Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
-
- ArgInfo OrigRet{ResRegs, CS.getType(), ISD::ArgFlagsTy{}};
- if (!OrigRet.Ty->isVoidTy())
- setArgFlags(OrigRet, AttributeList::ReturnIndex, DL, CS);
-
- return lowerCall(MIRBuilder, CS.getCallingConv(), Callee, OrigRet, OrigArgs,
- SwiftErrorVReg);
+ Info.Callee = MachineOperand::CreateReg(GetCalleeReg(), false);
+
+ Info.OrigRet = ArgInfo{ResRegs, CS.getType(), ISD::ArgFlagsTy{}};
+ if (!Info.OrigRet.Ty->isVoidTy())
+ setArgFlags(Info.OrigRet, AttributeList::ReturnIndex, DL, CS);
+
+ Info.KnownCallees =
+ CS.getInstruction()->getMetadata(LLVMContext::MD_callees);
+ Info.CallConv = CS.getCallingConv();
+ Info.SwiftErrorVReg = SwiftErrorVReg;
+ Info.IsMustTailCall = CS.isMustTailCall();
+ Info.IsTailCall = CS.isTailCall() &&
+ isInTailCallPosition(CS, MIRBuilder.getMF().getTarget());
+ Info.IsVarArg = CS.getFunctionType()->isVarArg();
+ return lowerCall(MIRBuilder, Info);
}
template <typename FuncInfoTy>
void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
const DataLayout &DL,
const FuncInfoTy &FuncInfo) const {
+ auto &Flags = Arg.Flags[0];
const AttributeList &Attrs = FuncInfo.getAttributes();
if (Attrs.hasAttribute(OpIdx, Attribute::ZExt))
- Arg.Flags.setZExt();
+ Flags.setZExt();
if (Attrs.hasAttribute(OpIdx, Attribute::SExt))
- Arg.Flags.setSExt();
+ Flags.setSExt();
if (Attrs.hasAttribute(OpIdx, Attribute::InReg))
- Arg.Flags.setInReg();
+ Flags.setInReg();
if (Attrs.hasAttribute(OpIdx, Attribute::StructRet))
- Arg.Flags.setSRet();
+ Flags.setSRet();
if (Attrs.hasAttribute(OpIdx, Attribute::SwiftSelf))
- Arg.Flags.setSwiftSelf();
+ Flags.setSwiftSelf();
if (Attrs.hasAttribute(OpIdx, Attribute::SwiftError))
- Arg.Flags.setSwiftError();
+ Flags.setSwiftError();
if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
- Arg.Flags.setByVal();
+ Flags.setByVal();
if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
- Arg.Flags.setInAlloca();
+ Flags.setInAlloca();
- if (Arg.Flags.isByVal() || Arg.Flags.isInAlloca()) {
+ if (Flags.isByVal() || Flags.isInAlloca()) {
Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();
- Arg.Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
+ Flags.setByValSize(DL.getTypeAllocSize(Ty ? Ty : ElementTy));
// For ByVal, alignment should be passed from FE. BE will guess if
// this info is not there but there are cases it cannot get right.
@@ -100,11 +106,11 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
FrameAlign = FuncInfo.getParamAlignment(OpIdx - 2);
else
FrameAlign = getTLI()->getByValTypeAlignment(ElementTy, DL);
- Arg.Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Attrs.hasAttribute(OpIdx, Attribute::Nest))
- Arg.Flags.setNest();
- Arg.Flags.setOrigAlign(DL.getABITypeAlignment(Arg.Ty));
+ Flags.setNest();
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
}
template void
@@ -159,7 +165,7 @@ void CallLowering::unpackRegs(ArrayRef<Register> DstRegs, Register SrcReg,
}
bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
- ArrayRef<ArgInfo> Args,
+ SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
@@ -171,7 +177,7 @@ bool CallLowering::handleAssignments(MachineIRBuilder &MIRBuilder,
bool CallLowering::handleAssignments(CCState &CCInfo,
SmallVectorImpl<CCValAssign> &ArgLocs,
MachineIRBuilder &MIRBuilder,
- ArrayRef<ArgInfo> Args,
+ SmallVectorImpl<ArgInfo> &Args,
ValueHandler &Handler) const {
MachineFunction &MF = MIRBuilder.getMF();
const Function &F = MF.getFunction();
@@ -180,14 +186,99 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
unsigned NumArgs = Args.size();
for (unsigned i = 0; i != NumArgs; ++i) {
MVT CurVT = MVT::getVT(Args[i].Ty);
- if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo)) {
- // Try to use the register type if we couldn't assign the VT.
- if (!Handler.isArgumentHandler() || !CurVT.isValid())
+ if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[0], CCInfo)) {
+ if (!CurVT.isValid())
return false;
- CurVT = TLI->getRegisterTypeForCallingConv(
+ MVT NewVT = TLI->getRegisterTypeForCallingConv(
F.getContext(), F.getCallingConv(), EVT(CurVT));
- if (Handler.assignArg(i, CurVT, CurVT, CCValAssign::Full, Args[i], CCInfo))
- return false;
+
+ // If we need to split the type over multiple regs, check it's a scenario
+ // we currently support.
+ unsigned NumParts = TLI->getNumRegistersForCallingConv(
+ F.getContext(), F.getCallingConv(), CurVT);
+ if (NumParts > 1) {
+ // For now only handle exact splits.
+ if (NewVT.getSizeInBits() * NumParts != CurVT.getSizeInBits())
+ return false;
+ }
+
+ // For incoming arguments (physregs to vregs), we could have values in
+ // physregs (or memlocs) which we want to extract and copy to vregs.
+ // During this, we might have to deal with the LLT being split across
+ // multiple regs, so we have to record this information for later.
+ //
+ // If we have outgoing args, then we have the opposite case. We have a
+ // vreg with an LLT which we want to assign to a physical location, and
+ // we might have to record that the value has to be split later.
+ if (Handler.isIncomingArgumentHandler()) {
+ if (NumParts == 1) {
+ // Try to use the register type if we couldn't assign the VT.
+ if (Handler.assignArg(i, NewVT, NewVT, CCValAssign::Full, Args[i],
+ Args[i].Flags[0], CCInfo))
+ return false;
+ } else {
+ // We're handling an incoming arg which is split over multiple regs.
+ // E.g. passing an s128 on AArch64.
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ Args[i].OrigRegs.push_back(Args[i].Regs[0]);
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ LLT NewLLT = getLLTForMVT(NewVT);
+ // For each split register, create and assign a vreg that will store
+ // the incoming component of the larger value. These will later be
+ // merged to form the final vreg.
+ for (unsigned Part = 0; Part < NumParts; ++Part) {
+ Register Reg =
+ MIRBuilder.getMRI()->createGenericVirtualRegister(NewLLT);
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (Part == 0) {
+ Flags.setSplit();
+ } else {
+ Flags.setOrigAlign(Align::None());
+ if (Part == NumParts - 1)
+ Flags.setSplitEnd();
+ }
+ Args[i].Regs.push_back(Reg);
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i + Part, NewVT, NewVT, CCValAssign::Full,
+ Args[i], Args[i].Flags[Part], CCInfo)) {
+ // Still couldn't assign this smaller part type for some reason.
+ return false;
+ }
+ }
+ }
+ } else {
+ // Handling an outgoing arg that might need to be split.
+ if (NumParts < 2)
+ return false; // Don't know how to deal with this type combination.
+
+ // This type is passed via multiple registers in the calling convention.
+ // We need to extract the individual parts.
+ Register LargeReg = Args[i].Regs[0];
+ LLT SmallTy = LLT::scalar(NewVT.getSizeInBits());
+ auto Unmerge = MIRBuilder.buildUnmerge(SmallTy, LargeReg);
+ assert(Unmerge->getNumOperands() == NumParts + 1);
+ ISD::ArgFlagsTy OrigFlags = Args[i].Flags[0];
+ // We're going to replace the regs and flags with the split ones.
+ Args[i].Regs.clear();
+ Args[i].Flags.clear();
+ for (unsigned PartIdx = 0; PartIdx < NumParts; ++PartIdx) {
+ ISD::ArgFlagsTy Flags = OrigFlags;
+ if (PartIdx == 0) {
+ Flags.setSplit();
+ } else {
+ Flags.setOrigAlign(Align::None());
+ if (PartIdx == NumParts - 1)
+ Flags.setSplitEnd();
+ }
+ Args[i].Regs.push_back(Unmerge.getReg(PartIdx));
+ Args[i].Flags.push_back(Flags);
+ if (Handler.assignArg(i + PartIdx, NewVT, NewVT, CCValAssign::Full,
+ Args[i], Args[i].Flags[PartIdx], CCInfo))
+ return false;
+ }
+ }
}
}
@@ -202,18 +293,32 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
continue;
}
- assert(Args[i].Regs.size() == 1 &&
- "Can't handle multiple virtual regs yet");
-
// FIXME: Pack registers if we have more than one.
Register ArgReg = Args[i].Regs[0];
+ MVT OrigVT = MVT::getVT(Args[i].Ty);
+ MVT VAVT = VA.getValVT();
if (VA.isRegLoc()) {
- MVT OrigVT = MVT::getVT(Args[i].Ty);
- MVT VAVT = VA.getValVT();
- if (Handler.isArgumentHandler() && VAVT != OrigVT) {
- if (VAVT.getSizeInBits() < OrigVT.getSizeInBits())
- return false; // Can't handle this type of arg yet.
+ if (Handler.isIncomingArgumentHandler() && VAVT != OrigVT) {
+ if (VAVT.getSizeInBits() < OrigVT.getSizeInBits()) {
+ // Expected to be multiple regs for a single incoming arg.
+ unsigned NumArgRegs = Args[i].Regs.size();
+ if (NumArgRegs < 2)
+ return false;
+
+ assert((j + (NumArgRegs - 1)) < ArgLocs.size() &&
+ "Too many regs for number of args");
+ for (unsigned Part = 0; Part < NumArgRegs; ++Part) {
+ // There should be Regs.size() ArgLocs per argument.
+ VA = ArgLocs[j + Part];
+ Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+ }
+ j += NumArgRegs - 1;
+ // Merge the split registers into the expected larger result vreg
+ // of the original call.
+ MIRBuilder.buildMerge(Args[i].OrigRegs[0], Args[i].Regs);
+ continue;
+ }
const LLT VATy(VAVT);
Register NewReg =
MIRBuilder.getMRI()->createGenericVirtualRegister(VATy);
@@ -234,10 +339,28 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
} else {
MIRBuilder.buildTrunc(ArgReg, {NewReg}).getReg(0);
}
+ } else if (!Handler.isIncomingArgumentHandler()) {
+ assert((j + (Args[i].Regs.size() - 1)) < ArgLocs.size() &&
+ "Too many regs for number of args");
+ // This is an outgoing argument that might have been split.
+ for (unsigned Part = 0; Part < Args[i].Regs.size(); ++Part) {
+ // There should be Regs.size() ArgLocs per argument.
+ VA = ArgLocs[j + Part];
+ Handler.assignValueToReg(Args[i].Regs[Part], VA.getLocReg(), VA);
+ }
+ j += Args[i].Regs.size() - 1;
} else {
Handler.assignValueToReg(ArgReg, VA.getLocReg(), VA);
}
} else if (VA.isMemLoc()) {
+ // Don't currently support loading/storing a type that needs to be split
+ // to the stack. Should be easy, just not implemented yet.
+ if (Args[i].Regs.size() > 1) {
+ LLVM_DEBUG(
+ dbgs()
+ << "Load/store a split arg to/from the stack not implemented yet");
+ return false;
+ }
MVT VT = MVT::getVT(Args[i].Ty);
unsigned Size = VT == MVT::iPTR ? DL.getPointerSize()
: alignTo(VT.getSizeInBits(), 8) / 8;
@@ -253,6 +376,81 @@ bool CallLowering::handleAssignments(CCState &CCInfo,
return true;
}
+bool CallLowering::analyzeArgInfo(CCState &CCState,
+ SmallVectorImpl<ArgInfo> &Args,
+ CCAssignFn &AssignFnFixed,
+ CCAssignFn &AssignFnVarArg) const {
+ for (unsigned i = 0, e = Args.size(); i < e; ++i) {
+ MVT VT = MVT::getVT(Args[i].Ty);
+ CCAssignFn &Fn = Args[i].IsFixed ? AssignFnFixed : AssignFnVarArg;
+ if (Fn(i, VT, VT, CCValAssign::Full, Args[i].Flags[0], CCState)) {
+ // Bail out on anything we can't handle.
+ LLVM_DEBUG(dbgs() << "Cannot analyze " << EVT(VT).getEVTString()
+ << " (arg number = " << i << "\n");
+ return false;
+ }
+ }
+ return true;
+}
+
+bool CallLowering::resultsCompatible(CallLoweringInfo &Info,
+ MachineFunction &MF,
+ SmallVectorImpl<ArgInfo> &InArgs,
+ CCAssignFn &CalleeAssignFnFixed,
+ CCAssignFn &CalleeAssignFnVarArg,
+ CCAssignFn &CallerAssignFnFixed,
+ CCAssignFn &CallerAssignFnVarArg) const {
+ const Function &F = MF.getFunction();
+ CallingConv::ID CalleeCC = Info.CallConv;
+ CallingConv::ID CallerCC = F.getCallingConv();
+
+ if (CallerCC == CalleeCC)
+ return true;
+
+ SmallVector<CCValAssign, 16> ArgLocs1;
+ CCState CCInfo1(CalleeCC, false, MF, ArgLocs1, F.getContext());
+ if (!analyzeArgInfo(CCInfo1, InArgs, CalleeAssignFnFixed,
+ CalleeAssignFnVarArg))
+ return false;
+
+ SmallVector<CCValAssign, 16> ArgLocs2;
+ CCState CCInfo2(CallerCC, false, MF, ArgLocs2, F.getContext());
+ if (!analyzeArgInfo(CCInfo2, InArgs, CallerAssignFnFixed,
+ CalleeAssignFnVarArg))
+ return false;
+
+ // We need the argument locations to match up exactly. If there's more in
+ // one than the other, then we are done.
+ if (ArgLocs1.size() != ArgLocs2.size())
+ return false;
+
+ // Make sure that each location is passed in exactly the same way.
+ for (unsigned i = 0, e = ArgLocs1.size(); i < e; ++i) {
+ const CCValAssign &Loc1 = ArgLocs1[i];
+ const CCValAssign &Loc2 = ArgLocs2[i];
+
+ // We need both of them to be the same. So if one is a register and one
+ // isn't, we're done.
+ if (Loc1.isRegLoc() != Loc2.isRegLoc())
+ return false;
+
+ if (Loc1.isRegLoc()) {
+ // If they don't have the same register location, we're done.
+ if (Loc1.getLocReg() != Loc2.getLocReg())
+ return false;
+
+ // They matched, so we can move to the next ArgLoc.
+ continue;
+ }
+
+ // Loc1 wasn't a RegLoc, so they both must be MemLocs. Check if they match.
+ if (Loc1.getLocMemOffset() != Loc2.getLocMemOffset())
+ return false;
+ }
+
+ return true;
+}
+
Register CallLowering::ValueHandler::extendRegister(Register ValReg,
CCValAssign &VA) {
LLT LocTy{VA.getLocVT()};
diff --git a/lib/CodeGen/GlobalISel/Combiner.cpp b/lib/CodeGen/GlobalISel/Combiner.cpp
index 31cb1dbbc9b5..b4562a5c6601 100644
--- a/lib/CodeGen/GlobalISel/Combiner.cpp
+++ b/lib/CodeGen/GlobalISel/Combiner.cpp
@@ -27,6 +27,18 @@
using namespace llvm;
+namespace llvm {
+cl::OptionCategory GICombinerOptionCategory(
+ "GlobalISel Combiner",
+ "Control the rules which are enabled. These options all take a comma "
+ "separated list of rules to disable and may be specified by number "
+ "or number range (e.g. 1-10)."
+#ifndef NDEBUG
+ " They may also be specified by name."
+#endif
+);
+} // end namespace llvm
+
namespace {
/// This class acts as the glue the joins the CombinerHelper to the overall
/// Combine algorithm. The CombinerHelper is intended to report the
@@ -92,7 +104,7 @@ bool Combiner::combineMachineInstrs(MachineFunction &MF,
return false;
Builder =
- CSEInfo ? make_unique<CSEMIRBuilder>() : make_unique<MachineIRBuilder>();
+ CSEInfo ? std::make_unique<CSEMIRBuilder>() : std::make_unique<MachineIRBuilder>();
MRI = &MF.getRegInfo();
Builder->setMF(MF);
if (CSEInfo)
diff --git a/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 9cbf3dd83ff1..854769d283f7 100644
--- a/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -8,19 +8,36 @@
#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
#include "llvm/CodeGen/GlobalISel/Combiner.h"
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
#define DEBUG_TYPE "gi-combiner"
using namespace llvm;
+// Option to allow testing of the combiner while no targets know about indexed
+// addressing.
+static cl::opt<bool>
+ ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
+ cl::desc("Force all indexed operations to be "
+ "legal for the GlobalISel combiner"));
+
+
CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
- MachineIRBuilder &B)
- : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer) {}
+ MachineIRBuilder &B, GISelKnownBits *KB,
+ MachineDominatorTree *MDT)
+ : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
+ KB(KB), MDT(MDT) {
+ (void)this->KB;
+}
void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
Register ToReg) const {
@@ -55,8 +72,8 @@ bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
if (MI.getOpcode() != TargetOpcode::COPY)
return false;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
LLT DstTy = MRI.getType(DstReg);
LLT SrcTy = MRI.getType(SrcReg);
// Simple Copy Propagation.
@@ -66,12 +83,183 @@ bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
return false;
}
void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
MI.eraseFromParent();
replaceRegWith(MRI, DstReg, SrcReg);
}
+bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
+ bool IsUndef = false;
+ SmallVector<Register, 4> Ops;
+ if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
+ applyCombineConcatVectors(MI, IsUndef, Ops);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
+ SmallVectorImpl<Register> &Ops) {
+ assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
+ "Invalid instruction");
+ IsUndef = true;
+ MachineInstr *Undef = nullptr;
+
+ // Walk over all the operands of concat vectors and check if they are
+ // build_vector themselves or undef.
+ // Then collect their operands in Ops.
+ for (const MachineOperand &MO : MI.operands()) {
+ // Skip the instruction definition.
+ if (MO.isDef())
+ continue;
+ Register Reg = MO.getReg();
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ assert(Def && "Operand not defined");
+ switch (Def->getOpcode()) {
+ case TargetOpcode::G_BUILD_VECTOR:
+ IsUndef = false;
+ // Remember the operands of the build_vector to fold
+ // them into the yet-to-build flattened concat vectors.
+ for (const MachineOperand &BuildVecMO : Def->operands()) {
+ // Skip the definition.
+ if (BuildVecMO.isDef())
+ continue;
+ Ops.push_back(BuildVecMO.getReg());
+ }
+ break;
+ case TargetOpcode::G_IMPLICIT_DEF: {
+ LLT OpType = MRI.getType(Reg);
+ // Keep one undef value for all the undef operands.
+ if (!Undef) {
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Undef = Builder.buildUndef(OpType.getScalarType());
+ }
+ assert(MRI.getType(Undef->getOperand(0).getReg()) ==
+ OpType.getScalarType() &&
+ "All undefs should have the same type");
+ // Break the undef vector in as many scalar elements as needed
+ // for the flattening.
+ for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
+ EltIdx != EltEnd; ++EltIdx)
+ Ops.push_back(Undef->getOperand(0).getReg());
+ break;
+ }
+ default:
+ return false;
+ }
+ }
+ return true;
+}
+void CombinerHelper::applyCombineConcatVectors(
+ MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
+ // We determined that the concat_vectors can be flatten.
+ // Generate the flattened build_vector.
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
+
+ // Note: IsUndef is sort of redundant. We could have determine it by
+ // checking that at all Ops are undef. Alternatively, we could have
+ // generate a build_vector of undefs and rely on another combine to
+ // clean that up. For now, given we already gather this information
+ // in tryCombineConcatVectors, just save compile time and issue the
+ // right thing.
+ if (IsUndef)
+ Builder.buildUndef(NewDstReg);
+ else
+ Builder.buildBuildVector(NewDstReg, Ops);
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, NewDstReg);
+}
+
+bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
+ SmallVector<Register, 4> Ops;
+ if (matchCombineShuffleVector(MI, Ops)) {
+ applyCombineShuffleVector(MI, Ops);
+ return true;
+ }
+ return false;
+}
+
+bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
+ SmallVectorImpl<Register> &Ops) {
+ assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
+ "Invalid instruction kind");
+ LLT DstType = MRI.getType(MI.getOperand(0).getReg());
+ Register Src1 = MI.getOperand(1).getReg();
+ LLT SrcType = MRI.getType(Src1);
+ unsigned DstNumElts = DstType.getNumElements();
+ unsigned SrcNumElts = SrcType.getNumElements();
+
+ // If the resulting vector is smaller than the size of the source
+ // vectors being concatenated, we won't be able to replace the
+ // shuffle vector into a concat_vectors.
+ //
+ // Note: We may still be able to produce a concat_vectors fed by
+ // extract_vector_elt and so on. It is less clear that would
+ // be better though, so don't bother for now.
+ if (DstNumElts < 2 * SrcNumElts)
+ return false;
+
+ // Check that the shuffle mask can be broken evenly between the
+ // different sources.
+ if (DstNumElts % SrcNumElts != 0)
+ return false;
+
+ // Mask length is a multiple of the source vector length.
+ // Check if the shuffle is some kind of concatenation of the input
+ // vectors.
+ unsigned NumConcat = DstNumElts / SrcNumElts;
+ SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(MI.getOperand(3).getShuffleMask(), Mask);
+ for (unsigned i = 0; i != DstNumElts; ++i) {
+ int Idx = Mask[i];
+ // Undef value.
+ if (Idx < 0)
+ continue;
+ // Ensure the indices in each SrcType sized piece are sequential and that
+ // the same source is used for the whole piece.
+ if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
+ (ConcatSrcs[i / SrcNumElts] >= 0 &&
+ ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
+ return false;
+ // Remember which source this index came from.
+ ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
+ }
+
+ // The shuffle is concatenating multiple vectors together.
+ // Collect the different operands for that.
+ Register UndefReg;
+ Register Src2 = MI.getOperand(2).getReg();
+ for (auto Src : ConcatSrcs) {
+ if (Src < 0) {
+ if (!UndefReg) {
+ Builder.setInsertPt(*MI.getParent(), MI);
+ UndefReg = Builder.buildUndef(SrcType).getReg(0);
+ }
+ Ops.push_back(UndefReg);
+ } else if (Src == 0)
+ Ops.push_back(Src1);
+ else
+ Ops.push_back(Src2);
+ }
+ return true;
+}
+
+void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
+ const ArrayRef<Register> Ops) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Builder.setInsertPt(*MI.getParent(), MI);
+ Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
+
+ Builder.buildConcatVectors(NewDstReg, Ops);
+
+ MI.eraseFromParent();
+ replaceRegWith(MRI, DstReg, NewDstReg);
+}
+
namespace {
/// Select a preference between two uses. CurrentUse is the current preference
@@ -279,7 +467,7 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
// up the type and extend so that it uses the preferred use.
if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
- unsigned UseDstReg = UseMI->getOperand(0).getReg();
+ Register UseDstReg = UseMI->getOperand(0).getReg();
MachineOperand &UseSrcMO = UseMI->getOperand(1);
const LLT &UseDstTy = MRI.getType(UseDstReg);
if (UseDstReg != ChosenDstReg) {
@@ -342,8 +530,212 @@ void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
Observer.changedInstr(MI);
}
-bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
- assert(MI.getOpcode() == TargetOpcode::G_BR && "Expected a G_BR");
+bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
+ assert(DefMI.getParent() == UseMI.getParent());
+ if (&DefMI == &UseMI)
+ return false;
+
+ // Loop through the basic block until we find one of the instructions.
+ MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
+ for (; &*I != &DefMI && &*I != &UseMI; ++I)
+ return &*I == &DefMI;
+
+ llvm_unreachable("Block must contain instructions");
+}
+
+bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
+ if (MDT)
+ return MDT->dominates(&DefMI, &UseMI);
+ else if (DefMI.getParent() != UseMI.getParent())
+ return false;
+
+ return isPredecessor(DefMI, UseMI);
+}
+
+bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
+ Register &Base, Register &Offset) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+#ifndef NDEBUG
+ unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+ Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+#endif
+
+ Base = MI.getOperand(1).getReg();
+ MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
+ if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
+ return false;
+
+ LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
+
+ for (auto &Use : MRI.use_instructions(Base)) {
+ if (Use.getOpcode() != TargetOpcode::G_GEP)
+ continue;
+
+ Offset = Use.getOperand(2).getReg();
+ if (!ForceLegalIndexing &&
+ !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
+ LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "
+ << Use);
+ continue;
+ }
+
+ // Make sure the offset calculation is before the potentially indexed op.
+ // FIXME: we really care about dependency here. The offset calculation might
+ // be movable.
+ MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
+ if (!OffsetDef || !dominates(*OffsetDef, MI)) {
+ LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "
+ << Use);
+ continue;
+ }
+
+ // FIXME: check whether all uses of Base are load/store with foldable
+ // addressing modes. If so, using the normal addr-modes is better than
+ // forming an indexed one.
+
+ bool MemOpDominatesAddrUses = true;
+ for (auto &GEPUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
+ if (!dominates(MI, GEPUse)) {
+ MemOpDominatesAddrUses = false;
+ break;
+ }
+ }
+
+ if (!MemOpDominatesAddrUses) {
+ LLVM_DEBUG(
+ dbgs() << " Ignoring candidate as memop does not dominate uses: "
+ << Use);
+ continue;
+ }
+
+ LLVM_DEBUG(dbgs() << " Found match: " << Use);
+ Addr = Use.getOperand(0).getReg();
+ return true;
+ }
+
+ return false;
+}
+
+bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
+ Register &Base, Register &Offset) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+#ifndef NDEBUG
+ unsigned Opcode = MI.getOpcode();
+ assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
+ Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
+#endif
+
+ Addr = MI.getOperand(1).getReg();
+ MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_GEP, Addr, MRI);
+ if (!AddrDef || MRI.hasOneUse(Addr))
+ return false;
+
+ Base = AddrDef->getOperand(1).getReg();
+ Offset = AddrDef->getOperand(2).getReg();
+
+ LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
+
+ if (!ForceLegalIndexing &&
+ !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
+ LLVM_DEBUG(dbgs() << " Skipping, not legal for target");
+ return false;
+ }
+
+ MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
+ if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.");
+ return false;
+ }
+
+ if (MI.getOpcode() == TargetOpcode::G_STORE) {
+ // Would require a copy.
+ if (Base == MI.getOperand(0).getReg()) {
+ LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.");
+ return false;
+ }
+
+ // We're expecting one use of Addr in MI, but it could also be the
+ // value stored, which isn't actually dominated by the instruction.
+ if (MI.getOperand(0).getReg() == Addr) {
+ LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses");
+ return false;
+ }
+ }
+
+ // FIXME: check whether all uses of the base pointer are constant GEPs. That
+ // might allow us to end base's liveness here by adjusting the constant.
+
+ for (auto &UseMI : MRI.use_instructions(Addr)) {
+ if (!dominates(MI, UseMI)) {
+ LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
+ unsigned Opcode = MI.getOpcode();
+ if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
+ Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
+ return false;
+
+ bool IsStore = Opcode == TargetOpcode::G_STORE;
+ Register Addr, Base, Offset;
+ bool IsPre = findPreIndexCandidate(MI, Addr, Base, Offset);
+ if (!IsPre && !findPostIndexCandidate(MI, Addr, Base, Offset))
+ return false;
+
+
+ unsigned NewOpcode;
+ switch (Opcode) {
+ case TargetOpcode::G_LOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_LOAD;
+ break;
+ case TargetOpcode::G_SEXTLOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
+ break;
+ case TargetOpcode::G_ZEXTLOAD:
+ NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
+ break;
+ case TargetOpcode::G_STORE:
+ NewOpcode = TargetOpcode::G_INDEXED_STORE;
+ break;
+ default:
+ llvm_unreachable("Unknown load/store opcode");
+ }
+
+ MachineInstr &AddrDef = *MRI.getUniqueVRegDef(Addr);
+ MachineIRBuilder MIRBuilder(MI);
+ auto MIB = MIRBuilder.buildInstr(NewOpcode);
+ if (IsStore) {
+ MIB.addDef(Addr);
+ MIB.addUse(MI.getOperand(0).getReg());
+ } else {
+ MIB.addDef(MI.getOperand(0).getReg());
+ MIB.addDef(Addr);
+ }
+
+ MIB.addUse(Base);
+ MIB.addUse(Offset);
+ MIB.addImm(IsPre);
+ MI.eraseFromParent();
+ AddrDef.eraseFromParent();
+
+ LLVM_DEBUG(dbgs() << " Combinined to indexed operation");
+ return true;
+}
+
+bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
+ if (MI.getOpcode() != TargetOpcode::G_BR)
+ return false;
+
// Try to match the following:
// bb1:
// %c(s32) = G_ICMP pred, %a, %b
@@ -380,9 +772,14 @@ bool CombinerHelper::matchCombineBr(MachineInstr &MI) {
return true;
}
-bool CombinerHelper::tryCombineBr(MachineInstr &MI) {
- if (!matchCombineBr(MI))
+bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
+ if (!matchElideBrByInvertingCond(MI))
return false;
+ applyElideBrByInvertingCond(MI);
+ return true;
+}
+
+void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
MachineBasicBlock::iterator BrIt(MI);
MachineInstr *BrCond = &*std::prev(BrIt);
@@ -401,11 +798,509 @@ bool CombinerHelper::tryCombineBr(MachineInstr &MI) {
BrCond->getOperand(1).setMBB(BrTarget);
Observer.changedInstr(*BrCond);
MI.eraseFromParent();
+}
+
+static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
+ // On Darwin, -Os means optimize for size without hurting performance, so
+ // only really optimize for size when -Oz (MinSize) is used.
+ if (MF.getTarget().getTargetTriple().isOSDarwin())
+ return MF.getFunction().hasMinSize();
+ return MF.getFunction().hasOptSize();
+}
+
+// Returns a list of types to use for memory op lowering in MemOps. A partial
+// port of findOptimalMemOpLowering in TargetLowering.
+static bool findGISelOptimalMemOpLowering(
+ std::vector<LLT> &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign,
+ unsigned SrcAlign, bool IsMemset, bool ZeroMemset, bool MemcpyStrSrc,
+ bool AllowOverlap, unsigned DstAS, unsigned SrcAS,
+ const AttributeList &FuncAttributes, const TargetLowering &TLI) {
+ // If 'SrcAlign' is zero, that means the memory operation does not need to
+ // load the value, i.e. memset or memcpy from constant string. Otherwise,
+ // it's the inferred alignment of the source. 'DstAlign', on the other hand,
+ // is the specified alignment of the memory operation. If it is zero, that
+ // means it's possible to change the alignment of the destination.
+ // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does
+ // not need to be loaded.
+ if (SrcAlign != 0 && SrcAlign < DstAlign)
+ return false;
+
+ LLT Ty = TLI.getOptimalMemOpLLT(Size, DstAlign, SrcAlign, IsMemset,
+ ZeroMemset, MemcpyStrSrc, FuncAttributes);
+
+ if (Ty == LLT()) {
+ // Use the largest scalar type whose alignment constraints are satisfied.
+ // We only need to check DstAlign here as SrcAlign is always greater or
+ // equal to DstAlign (or zero).
+ Ty = LLT::scalar(64);
+ while (DstAlign && DstAlign < Ty.getSizeInBytes() &&
+ !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, DstAlign))
+ Ty = LLT::scalar(Ty.getSizeInBytes());
+ assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
+ // FIXME: check for the largest legal type we can load/store to.
+ }
+
+ unsigned NumMemOps = 0;
+ while (Size != 0) {
+ unsigned TySize = Ty.getSizeInBytes();
+ while (TySize > Size) {
+ // For now, only use non-vector load / store's for the left-over pieces.
+ LLT NewTy = Ty;
+ // FIXME: check for mem op safety and legality of the types. Not all of
+ // SDAGisms map cleanly to GISel concepts.
+ if (NewTy.isVector())
+ NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
+ NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
+ unsigned NewTySize = NewTy.getSizeInBytes();
+ assert(NewTySize > 0 && "Could not find appropriate type");
+
+ // If the new LLT cannot cover all of the remaining bits, then consider
+ // issuing a (or a pair of) unaligned and overlapping load / store.
+ bool Fast;
+ // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
+ MVT VT = getMVTForLLT(Ty);
+ if (NumMemOps && AllowOverlap && NewTySize < Size &&
+ TLI.allowsMisalignedMemoryAccesses(
+ VT, DstAS, DstAlign, MachineMemOperand::MONone, &Fast) &&
+ Fast)
+ TySize = Size;
+ else {
+ Ty = NewTy;
+ TySize = NewTySize;
+ }
+ }
+
+ if (++NumMemOps > Limit)
+ return false;
+
+ MemOps.push_back(Ty);
+ Size -= TySize;
+ }
+
+ return true;
+}
+
+static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
+ if (Ty.isVector())
+ return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
+ Ty.getNumElements());
+ return IntegerType::get(C, Ty.getSizeInBits());
+}
+
+// Get a vectorized representation of the memset value operand, GISel edition.
+static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
+ MachineRegisterInfo &MRI = *MIB.getMRI();
+ unsigned NumBits = Ty.getScalarSizeInBits();
+ auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
+ if (!Ty.isVector() && ValVRegAndVal) {
+ unsigned KnownVal = ValVRegAndVal->Value;
+ APInt Scalar = APInt(8, KnownVal);
+ APInt SplatVal = APInt::getSplat(NumBits, Scalar);
+ return MIB.buildConstant(Ty, SplatVal).getReg(0);
+ }
+ // FIXME: for vector types create a G_BUILD_VECTOR.
+ if (Ty.isVector())
+ return Register();
+
+ // Extend the byte value to the larger type, and then multiply by a magic
+ // value 0x010101... in order to replicate it across every byte.
+ LLT ExtType = Ty.getScalarType();
+ auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
+ if (NumBits > 8) {
+ APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
+ auto MagicMI = MIB.buildConstant(ExtType, Magic);
+ Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
+ }
+
+ assert(ExtType == Ty && "Vector memset value type not supported yet");
+ return Val;
+}
+
+bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst, Register Val,
+ unsigned KnownLen, unsigned Align,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memset length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+
+ auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
+ bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
+
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Align), 0,
+ /*IsMemset=*/true,
+ /*ZeroMemset=*/IsZeroVal, /*MemcpyStrSrc=*/false,
+ /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(), ~0u,
+ MF.getFunction().getAttributes(), TLI))
+ return false;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+ if (NewAlign > Align) {
+ Align = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlignment(FI) < Align)
+ MFI.setObjectAlignment(FI, Align);
+ }
+ }
+
+ MachineIRBuilder MIB(MI);
+ // Find the largest store and generate the bit pattern for it.
+ LLT LargestTy = MemOps[0];
+ for (unsigned i = 1; i < MemOps.size(); i++)
+ if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
+ LargestTy = MemOps[i];
+
+ // The memset stored value is always defined as an s8, so in order to make it
+ // work with larger store types we need to repeat the bit pattern across the
+ // wider type.
+ Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
+
+ if (!MemSetValue)
+ return false;
+
+ // Generate the stores. For each store type in the list, we generate the
+ // matching store of that type to the destination address.
+ LLT PtrTy = MRI.getType(Dst);
+ unsigned DstOff = 0;
+ unsigned Size = KnownLen;
+ for (unsigned I = 0; I < MemOps.size(); I++) {
+ LLT Ty = MemOps[I];
+ unsigned TySize = Ty.getSizeInBytes();
+ if (TySize > Size) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ assert(I == MemOps.size() - 1 && I != 0);
+ DstOff -= TySize - Size;
+ }
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ Register Value = MemSetValue;
+ if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
+ MVT VT = getMVTForLLT(Ty);
+ MVT LargestVT = getMVTForLLT(LargestTy);
+ if (!LargestTy.isVector() && !Ty.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
+ else
+ Value = getMemsetValue(Val, Ty, MIB);
+ if (!Value)
+ return false;
+ }
+
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
+
+ Register Ptr = Dst;
+ if (DstOff != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
+ Ptr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ }
+
+ MIB.buildStore(Value, Ptr, *StoreMMO);
+ DstOff += Ty.getSizeInBytes();
+ Size -= TySize;
+ }
+
+ MI.eraseFromParent();
+ return true;
+}
+
+
+bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
+ Register Src, unsigned KnownLen,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memcpy length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ unsigned Alignment = MinAlign(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ // FIXME: infer better src pointer alignment like SelectionDAG does here.
+ // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
+ // if the memcpy is in a tail call position.
+
+ unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
+ SrcAlign,
+ /*IsMemset=*/false,
+ /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+ /*AllowOverlap=*/!IsVolatile, DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
+ return false;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Alignment &&
+ DL.exceedsNaturalStackAlignment(Align(NewAlign)))
+ NewAlign /= 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlignment(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Now we need to emit a pair of load and stores for each of the types we've
+ // collected. I.e. for each type, generate a load from the source pointer of
+ // that type width, and then generate a corresponding store to the dest buffer
+ // of that value loaded. This can result in a sequence of loads and stores
+ // mixed types, depending on what the target specifies as good types to use.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ unsigned Size = KnownLen;
+ for (auto CopyTy : MemOps) {
+ // Issuing an unaligned load / store pair that overlaps with the previous
+ // pair. Adjust the offset accordingly.
+ if (CopyTy.getSizeInBytes() > Size)
+ CurrOffset -= CopyTy.getSizeInBytes() - Size;
+
+ // Construct MMOs for the accesses.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ Register Offset;
+ if (CurrOffset != 0) {
+ Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
+ .getReg(0);
+ LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+ }
+ auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
+
+ // Create the store.
+ Register StorePtr =
+ CurrOffset == 0 ? Dst : MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ MIB.buildStore(LdVal, StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ Size -= CopyTy.getSizeInBytes();
+ }
+
+ MI.eraseFromParent();
return true;
}
+bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
+ Register Src, unsigned KnownLen,
+ unsigned DstAlign, unsigned SrcAlign,
+ bool IsVolatile) {
+ auto &MF = *MI.getParent()->getParent();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+ auto &DL = MF.getDataLayout();
+ LLVMContext &C = MF.getFunction().getContext();
+
+ assert(KnownLen != 0 && "Have a zero length memmove length!");
+
+ bool DstAlignCanChange = false;
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ bool OptSize = shouldLowerMemFuncForSize(MF);
+ unsigned Alignment = MinAlign(DstAlign, SrcAlign);
+
+ MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
+ if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
+ DstAlignCanChange = true;
+
+ unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
+ std::vector<LLT> MemOps;
+
+ const auto &DstMMO = **MI.memoperands_begin();
+ const auto &SrcMMO = **std::next(MI.memoperands_begin());
+ MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
+ MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
+
+ // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
+ // to a bug in it's findOptimalMemOpLowering implementation. For now do the
+ // same thing here.
+ if (!findGISelOptimalMemOpLowering(
+ MemOps, Limit, KnownLen, (DstAlignCanChange ? 0 : Alignment),
+ SrcAlign,
+ /*IsMemset=*/false,
+ /*ZeroMemset=*/false, /*MemcpyStrSrc=*/false,
+ /*AllowOverlap=*/false, DstPtrInfo.getAddrSpace(),
+ SrcPtrInfo.getAddrSpace(), MF.getFunction().getAttributes(), TLI))
+ return false;
+
+ if (DstAlignCanChange) {
+ // Get an estimate of the type from the LLT.
+ Type *IRTy = getTypeForLLT(MemOps[0], C);
+ unsigned NewAlign = (unsigned)DL.getABITypeAlignment(IRTy);
+
+ // Don't promote to an alignment that would require dynamic stack
+ // realignment.
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ if (!TRI->needsStackRealignment(MF))
+ while (NewAlign > Alignment &&
+ DL.exceedsNaturalStackAlignment(Align(NewAlign)))
+ NewAlign /= 2;
+
+ if (NewAlign > Alignment) {
+ Alignment = NewAlign;
+ unsigned FI = FIDef->getOperand(1).getIndex();
+ // Give the stack frame object a larger alignment if needed.
+ if (MFI.getObjectAlignment(FI) < Alignment)
+ MFI.setObjectAlignment(FI, Alignment);
+ }
+ }
+
+ LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
+
+ MachineIRBuilder MIB(MI);
+ // Memmove requires that we perform the loads first before issuing the stores.
+ // Apart from that, this loop is pretty much doing the same thing as the
+ // memcpy codegen function.
+ unsigned CurrOffset = 0;
+ LLT PtrTy = MRI.getType(Src);
+ SmallVector<Register, 16> LoadVals;
+ for (auto CopyTy : MemOps) {
+ // Construct MMO for the load.
+ auto *LoadMMO =
+ MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ // Create the load.
+ Register LoadPtr = Src;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ LoadPtr = MIB.buildGEP(PtrTy, Src, Offset).getReg(0);
+ }
+ LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+
+ CurrOffset = 0;
+ for (unsigned I = 0; I < MemOps.size(); ++I) {
+ LLT CopyTy = MemOps[I];
+ // Now store the values loaded.
+ auto *StoreMMO =
+ MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
+
+ Register StorePtr = Dst;
+ if (CurrOffset != 0) {
+ auto Offset =
+ MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
+ StorePtr = MIB.buildGEP(PtrTy, Dst, Offset).getReg(0);
+ }
+ MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
+ CurrOffset += CopyTy.getSizeInBytes();
+ }
+ MI.eraseFromParent();
+ return true;
+}
+
+bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
+ // This combine is fairly complex so it's not written with a separate
+ // matcher function.
+ assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
+ assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
+ ID == Intrinsic::memset) &&
+ "Expected a memcpy like intrinsic");
+
+ auto MMOIt = MI.memoperands_begin();
+ const MachineMemOperand *MemOp = *MMOIt;
+ bool IsVolatile = MemOp->isVolatile();
+ // Don't try to optimize volatile.
+ if (IsVolatile)
+ return false;
+
+ unsigned DstAlign = MemOp->getBaseAlignment();
+ unsigned SrcAlign = 0;
+ Register Dst = MI.getOperand(1).getReg();
+ Register Src = MI.getOperand(2).getReg();
+ Register Len = MI.getOperand(3).getReg();
+
+ if (ID != Intrinsic::memset) {
+ assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
+ MemOp = *(++MMOIt);
+ SrcAlign = MemOp->getBaseAlignment();
+ }
+
+ // See if this is a constant length copy
+ auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
+ if (!LenVRegAndVal)
+ return false; // Leave it to the legalizer to lower it to a libcall.
+ unsigned KnownLen = LenVRegAndVal->Value;
+
+ if (KnownLen == 0) {
+ MI.eraseFromParent();
+ return true;
+ }
+
+ if (MaxLen && KnownLen > MaxLen)
+ return false;
+
+ if (ID == Intrinsic::memcpy)
+ return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+ if (ID == Intrinsic::memmove)
+ return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
+ if (ID == Intrinsic::memset)
+ return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
+ return false;
+}
+
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;
- return tryCombineExtendingLoads(MI);
+ if (tryCombineExtendingLoads(MI))
+ return true;
+ if (tryCombineIndexedLoadStore(MI))
+ return true;
+ return false;
}
diff --git a/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
new file mode 100644
index 000000000000..be8efa8795f3
--- /dev/null
+++ b/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -0,0 +1,383 @@
+//===- lib/CodeGen/GlobalISel/GISelKnownBits.cpp --------------*- C++ *-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Provides analysis for querying information about KnownBits during GISel
+/// passes.
+//
+//===------------------
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/CodeGen/GlobalISel/Utils.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
+#include "llvm/CodeGen/TargetOpcodes.h"
+
+#define DEBUG_TYPE "gisel-known-bits"
+
+using namespace llvm;
+
+char llvm::GISelKnownBitsAnalysis::ID = 0;
+
+INITIALIZE_PASS_BEGIN(GISelKnownBitsAnalysis, DEBUG_TYPE,
+ "Analysis for ComputingKnownBits", false, true)
+INITIALIZE_PASS_END(GISelKnownBitsAnalysis, DEBUG_TYPE,
+ "Analysis for ComputingKnownBits", false, true)
+
+GISelKnownBits::GISelKnownBits(MachineFunction &MF)
+ : MF(MF), MRI(MF.getRegInfo()), TL(*MF.getSubtarget().getTargetLowering()),
+ DL(MF.getFunction().getParent()->getDataLayout()) {}
+
+Align GISelKnownBits::inferAlignmentForFrameIdx(int FrameIdx, int Offset,
+ const MachineFunction &MF) {
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ return commonAlignment(Align(MFI.getObjectAlignment(FrameIdx)), Offset);
+ // TODO: How to handle cases with Base + Offset?
+}
+
+MaybeAlign GISelKnownBits::inferPtrAlignment(const MachineInstr &MI) {
+ if (MI.getOpcode() == TargetOpcode::G_FRAME_INDEX) {
+ int FrameIdx = MI.getOperand(1).getIndex();
+ return inferAlignmentForFrameIdx(FrameIdx, 0, *MI.getMF());
+ }
+ return None;
+}
+
+void GISelKnownBits::computeKnownBitsForFrameIndex(Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ const MachineInstr &MI = *MRI.getVRegDef(R);
+ computeKnownBitsForAlignment(Known, inferPtrAlignment(MI));
+}
+
+void GISelKnownBits::computeKnownBitsForAlignment(KnownBits &Known,
+ MaybeAlign Alignment) {
+ if (Alignment)
+ // The low bits are known zero if the pointer is aligned.
+ Known.Zero.setLowBits(Log2(Alignment));
+}
+
+KnownBits GISelKnownBits::getKnownBits(MachineInstr &MI) {
+ return getKnownBits(MI.getOperand(0).getReg());
+}
+
+KnownBits GISelKnownBits::getKnownBits(Register R) {
+ KnownBits Known;
+ LLT Ty = MRI.getType(R);
+ APInt DemandedElts =
+ Ty.isVector() ? APInt::getAllOnesValue(Ty.getNumElements()) : APInt(1, 1);
+ computeKnownBitsImpl(R, Known, DemandedElts);
+ return Known;
+}
+
+bool GISelKnownBits::signBitIsZero(Register R) {
+ LLT Ty = MRI.getType(R);
+ unsigned BitWidth = Ty.getScalarSizeInBits();
+ return maskedValueIsZero(R, APInt::getSignMask(BitWidth));
+}
+
+APInt GISelKnownBits::getKnownZeroes(Register R) {
+ return getKnownBits(R).Zero;
+}
+
+APInt GISelKnownBits::getKnownOnes(Register R) { return getKnownBits(R).One; }
+
+void GISelKnownBits::computeKnownBitsImpl(Register R, KnownBits &Known,
+ const APInt &DemandedElts,
+ unsigned Depth) {
+ MachineInstr &MI = *MRI.getVRegDef(R);
+ unsigned Opcode = MI.getOpcode();
+ LLT DstTy = MRI.getType(R);
+
+ // Handle the case where this is called on a register that does not have a
+ // type constraint (i.e. it has a register class constraint instead). This is
+ // unlikely to occur except by looking through copies but it is possible for
+ // the initial register being queried to be in this state.
+ if (!DstTy.isValid()) {
+ Known = KnownBits();
+ return;
+ }
+
+ unsigned BitWidth = DstTy.getSizeInBits();
+ Known = KnownBits(BitWidth); // Don't know anything
+
+ if (DstTy.isVector())
+ return; // TODO: Handle vectors.
+
+ if (Depth == getMaxDepth())
+ return;
+
+ if (!DemandedElts)
+ return; // No demanded elts, better to assume we don't know anything.
+
+ KnownBits Known2;
+
+ switch (Opcode) {
+ default:
+ TL.computeKnownBitsForTargetInstr(*this, R, Known, DemandedElts, MRI,
+ Depth);
+ break;
+ case TargetOpcode::COPY: {
+ MachineOperand Dst = MI.getOperand(0);
+ MachineOperand Src = MI.getOperand(1);
+ // Look through trivial copies but don't look through trivial copies of the
+ // form `%1:(s32) = OP %0:gpr32` known-bits analysis is currently unable to
+ // determine the bit width of a register class.
+ //
+ // We can't use NoSubRegister by name as it's defined by each target but
+ // it's always defined to be 0 by tablegen.
+ if (Dst.getSubReg() == 0 /*NoSubRegister*/ && Src.getReg().isVirtual() &&
+ Src.getSubReg() == 0 /*NoSubRegister*/ &&
+ MRI.getType(Src.getReg()).isValid()) {
+ // Don't increment Depth for this one since we didn't do any work.
+ computeKnownBitsImpl(Src.getReg(), Known, DemandedElts, Depth);
+ }
+ break;
+ }
+ case TargetOpcode::G_CONSTANT: {
+ auto CstVal = getConstantVRegVal(R, MRI);
+ if (!CstVal)
+ break;
+ Known.One = *CstVal;
+ Known.Zero = ~Known.One;
+ break;
+ }
+ case TargetOpcode::G_FRAME_INDEX: {
+ computeKnownBitsForFrameIndex(R, Known, DemandedElts);
+ break;
+ }
+ case TargetOpcode::G_SUB: {
+ // If low bits are known to be zero in both operands, then we know they are
+ // going to be 0 in the result. Both addition and complement operations
+ // preserve the low zero bits.
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ unsigned KnownZeroLow = Known2.countMinTrailingZeros();
+ if (KnownZeroLow == 0)
+ break;
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
+ Known.Zero.setLowBits(KnownZeroLow);
+ break;
+ }
+ case TargetOpcode::G_XOR: {
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ // Output known-0 bits are known if clear or set in both the LHS & RHS.
+ APInt KnownZeroOut = (Known.Zero & Known2.Zero) | (Known.One & Known2.One);
+ // Output known-1 are known to be set if set in only one of the LHS, RHS.
+ Known.One = (Known.Zero & Known2.One) | (Known.One & Known2.Zero);
+ Known.Zero = KnownZeroOut;
+ break;
+ }
+ case TargetOpcode::G_GEP: {
+ // G_GEP is like G_ADD. FIXME: Is this true for all targets?
+ LLT Ty = MRI.getType(MI.getOperand(1).getReg());
+ if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
+ break;
+ LLVM_FALLTHROUGH;
+ }
+ case TargetOpcode::G_ADD: {
+ // Output known-0 bits are known if clear or set in both the low clear bits
+ // common to both LHS & RHS. For example, 8+(X<<3) is known to have the
+ // low 3 bits clear.
+ // Output known-0 bits are also known if the top bits of each input are
+ // known to be clear. For example, if one input has the top 10 bits clear
+ // and the other has the top 8 bits clear, we know the top 7 bits of the
+ // output must be clear.
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ unsigned KnownZeroHigh = Known2.countMinLeadingZeros();
+ unsigned KnownZeroLow = Known2.countMinTrailingZeros();
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ KnownZeroHigh = std::min(KnownZeroHigh, Known2.countMinLeadingZeros());
+ KnownZeroLow = std::min(KnownZeroLow, Known2.countMinTrailingZeros());
+ Known.Zero.setLowBits(KnownZeroLow);
+ if (KnownZeroHigh > 1)
+ Known.Zero.setHighBits(KnownZeroHigh - 1);
+ break;
+ }
+ case TargetOpcode::G_AND: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ // Output known-1 bits are only known if set in both the LHS & RHS.
+ Known.One &= Known2.One;
+ // Output known-0 are known to be clear if zero in either the LHS | RHS.
+ Known.Zero |= Known2.Zero;
+ break;
+ }
+ case TargetOpcode::G_OR: {
+ // If either the LHS or the RHS are Zero, the result is zero.
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+
+ // Output known-0 bits are only known if clear in both the LHS & RHS.
+ Known.Zero &= Known2.Zero;
+ // Output known-1 are known to be set if set in either the LHS | RHS.
+ Known.One |= Known2.One;
+ break;
+ }
+ case TargetOpcode::G_MUL: {
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known, DemandedElts,
+ Depth + 1);
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ // If low bits are zero in either operand, output low known-0 bits.
+ // Also compute a conservative estimate for high known-0 bits.
+ // More trickiness is possible, but this is sufficient for the
+ // interesting case of alignment computation.
+ unsigned TrailZ =
+ Known.countMinTrailingZeros() + Known2.countMinTrailingZeros();
+ unsigned LeadZ =
+ std::max(Known.countMinLeadingZeros() + Known2.countMinLeadingZeros(),
+ BitWidth) -
+ BitWidth;
+
+ Known.resetAll();
+ Known.Zero.setLowBits(std::min(TrailZ, BitWidth));
+ Known.Zero.setHighBits(std::min(LeadZ, BitWidth));
+ break;
+ }
+ case TargetOpcode::G_SELECT: {
+ computeKnownBitsImpl(MI.getOperand(3).getReg(), Known, DemandedElts,
+ Depth + 1);
+ // If we don't know any bits, early out.
+ if (Known.isUnknown())
+ break;
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
+ Depth + 1);
+ // Only known if known in both the LHS and RHS.
+ Known.One &= Known2.One;
+ Known.Zero &= Known2.Zero;
+ break;
+ }
+ case TargetOpcode::G_FCMP:
+ case TargetOpcode::G_ICMP: {
+ if (TL.getBooleanContents(DstTy.isVector(),
+ Opcode == TargetOpcode::G_FCMP) ==
+ TargetLowering::ZeroOrOneBooleanContent &&
+ BitWidth > 1)
+ Known.Zero.setBitsFrom(1);
+ break;
+ }
+ case TargetOpcode::G_SEXT: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ // If the sign bit is known to be zero or one, then sext will extend
+ // it to the top bits, else it will just zext.
+ Known = Known.sext(BitWidth);
+ break;
+ }
+ case TargetOpcode::G_ANYEXT: {
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+ Known = Known.zext(BitWidth, true /* ExtendedBitsAreKnownZero */);
+ break;
+ }
+ case TargetOpcode::G_LOAD: {
+ if (MI.hasOneMemOperand()) {
+ const MachineMemOperand *MMO = *MI.memoperands_begin();
+ if (const MDNode *Ranges = MMO->getRanges()) {
+ computeKnownBitsFromRangeMetadata(*Ranges, Known);
+ }
+ }
+ break;
+ }
+ case TargetOpcode::G_ZEXTLOAD: {
+ // Everything above the retrieved bits is zero
+ if (MI.hasOneMemOperand())
+ Known.Zero.setBitsFrom((*MI.memoperands_begin())->getSizeInBits());
+ break;
+ }
+ case TargetOpcode::G_ASHR:
+ case TargetOpcode::G_LSHR:
+ case TargetOpcode::G_SHL: {
+ KnownBits RHSKnown;
+ computeKnownBitsImpl(MI.getOperand(2).getReg(), RHSKnown, DemandedElts,
+ Depth + 1);
+ if (!RHSKnown.isConstant()) {
+ LLVM_DEBUG(
+ MachineInstr *RHSMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+ dbgs() << '[' << Depth << "] Shift not known constant: " << *RHSMI);
+ break;
+ }
+ uint64_t Shift = RHSKnown.getConstant().getZExtValue();
+ LLVM_DEBUG(dbgs() << '[' << Depth << "] Shift is " << Shift << '\n');
+
+ computeKnownBitsImpl(MI.getOperand(1).getReg(), Known, DemandedElts,
+ Depth + 1);
+
+ switch (Opcode) {
+ case TargetOpcode::G_ASHR:
+ Known.Zero = Known.Zero.ashr(Shift);
+ Known.One = Known.One.ashr(Shift);
+ break;
+ case TargetOpcode::G_LSHR:
+ Known.Zero = Known.Zero.lshr(Shift);
+ Known.One = Known.One.lshr(Shift);
+ Known.Zero.setBitsFrom(Known.Zero.getBitWidth() - Shift);
+ break;
+ case TargetOpcode::G_SHL:
+ Known.Zero = Known.Zero.shl(Shift);
+ Known.One = Known.One.shl(Shift);
+ Known.Zero.setBits(0, Shift);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_INTTOPTR:
+ case TargetOpcode::G_PTRTOINT:
+ // Fall through and handle them the same as zext/trunc.
+ LLVM_FALLTHROUGH;
+ case TargetOpcode::G_ZEXT:
+ case TargetOpcode::G_TRUNC: {
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ unsigned SrcBitWidth = SrcTy.isPointer()
+ ? DL.getIndexSizeInBits(SrcTy.getAddressSpace())
+ : SrcTy.getSizeInBits();
+ assert(SrcBitWidth && "SrcBitWidth can't be zero");
+ Known = Known.zextOrTrunc(SrcBitWidth, true);
+ computeKnownBitsImpl(SrcReg, Known, DemandedElts, Depth + 1);
+ Known = Known.zextOrTrunc(BitWidth, true);
+ if (BitWidth > SrcBitWidth)
+ Known.Zero.setBitsFrom(SrcBitWidth);
+ break;
+ }
+ }
+
+ assert(!Known.hasConflict() && "Bits known to be one AND zero?");
+ LLVM_DEBUG(dbgs() << "[" << Depth << "] Compute known bits: " << MI << "["
+ << Depth << "] Computed for: " << MI << "[" << Depth
+ << "] Known: 0x"
+ << (Known.Zero | Known.One).toString(16, false) << "\n"
+ << "[" << Depth << "] Zero: 0x"
+ << Known.Zero.toString(16, false) << "\n"
+ << "[" << Depth << "] One: 0x"
+ << Known.One.toString(16, false) << "\n");
+}
+
+void GISelKnownBitsAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool GISelKnownBitsAnalysis::runOnMachineFunction(MachineFunction &MF) {
+ return false;
+}
diff --git a/lib/CodeGen/GlobalISel/IRTranslator.cpp b/lib/CodeGen/GlobalISel/IRTranslator.cpp
index 6e99bdbd8264..45cef4aca888 100644
--- a/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -32,6 +32,7 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/StackProtector.h"
#include "llvm/CodeGen/TargetFrameLowering.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
@@ -334,7 +335,7 @@ bool IRTranslator::translateFNeg(const User &U, MachineIRBuilder &MIRBuilder) {
bool IRTranslator::translateCompare(const User &U,
MachineIRBuilder &MIRBuilder) {
- const CmpInst *CI = dyn_cast<CmpInst>(&U);
+ auto *CI = dyn_cast<CmpInst>(&U);
Register Op0 = getOrCreateVReg(*U.getOperand(0));
Register Op1 = getOrCreateVReg(*U.getOperand(1));
Register Res = getOrCreateVReg(U);
@@ -345,11 +346,12 @@ bool IRTranslator::translateCompare(const User &U,
MIRBuilder.buildICmp(Pred, Res, Op0, Op1);
else if (Pred == CmpInst::FCMP_FALSE)
MIRBuilder.buildCopy(
- Res, getOrCreateVReg(*Constant::getNullValue(CI->getType())));
+ Res, getOrCreateVReg(*Constant::getNullValue(U.getType())));
else if (Pred == CmpInst::FCMP_TRUE)
MIRBuilder.buildCopy(
- Res, getOrCreateVReg(*Constant::getAllOnesValue(CI->getType())));
+ Res, getOrCreateVReg(*Constant::getAllOnesValue(U.getType())));
else {
+ assert(CI && "Instruction should be CmpInst");
MIRBuilder.buildInstr(TargetOpcode::G_FCMP, {Res}, {Pred, Op0, Op1},
MachineInstr::copyFlagsFromInstruction(*CI));
}
@@ -588,8 +590,8 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
Cond = MIB.buildICmp(CB.PredInfo.Pred, i1Ty, CondLHS, CondRHS).getReg(0);
} else {
- assert(CB.PredInfo.Pred == CmpInst::ICMP_ULE &&
- "Can only handle ULE ranges");
+ assert(CB.PredInfo.Pred == CmpInst::ICMP_SLE &&
+ "Can only handle SLE ranges");
const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
@@ -598,7 +600,7 @@ void IRTranslator::emitSwitchCase(SwitchCG::CaseBlock &CB,
if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
Register CondRHS = getOrCreateVReg(*CB.CmpRHS);
Cond =
- MIB.buildICmp(CmpInst::ICMP_ULE, i1Ty, CmpOpReg, CondRHS).getReg(0);
+ MIB.buildICmp(CmpInst::ICMP_SLE, i1Ty, CmpOpReg, CondRHS).getReg(0);
} else {
const LLT &CmpTy = MRI->getType(CmpOpReg);
auto Sub = MIB.buildSub({CmpTy}, CmpOpReg, CondLHS);
@@ -728,7 +730,7 @@ bool IRTranslator::lowerSwitchRangeWorkItem(SwitchCG::CaseClusterIt I,
MHS = nullptr;
} else {
// Check I->Low <= Cond <= I->High.
- Pred = CmpInst::ICMP_ULE;
+ Pred = CmpInst::ICMP_SLE;
LHS = I->Low;
MHS = Cond;
RHS = I->High;
@@ -879,7 +881,8 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
return true;
}
-
+ const MDNode *Ranges =
+ Regs.size() == 1 ? LI.getMetadata(LLVMContext::MD_range) : nullptr;
for (unsigned i = 0; i < Regs.size(); ++i) {
Register Addr;
MIRBuilder.materializeGEP(Addr, Base, OffsetTy, Offsets[i] / 8);
@@ -888,7 +891,7 @@ bool IRTranslator::translateLoad(const User &U, MachineIRBuilder &MIRBuilder) {
unsigned BaseAlign = getMemOpAlignment(LI);
auto MMO = MF->getMachineMemOperand(
Ptr, Flags, (MRI->getType(Regs[i]).getSizeInBits() + 7) / 8,
- MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), nullptr,
+ MinAlign(BaseAlign, Offsets[i] / 8), AAMDNodes(), Ranges,
LI.getSyncScopeID(), LI.getOrdering());
MIRBuilder.buildLoad(Regs[i], Addr, *MMO);
}
@@ -1075,36 +1078,29 @@ bool IRTranslator::translateGetElementPtr(const User &U,
}
if (Offset != 0) {
- Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
LLT OffsetTy = getLLTForType(*OffsetIRTy, *DL);
auto OffsetMIB = MIRBuilder.buildConstant({OffsetTy}, Offset);
- MIRBuilder.buildGEP(NewBaseReg, BaseReg, OffsetMIB.getReg(0));
-
- BaseReg = NewBaseReg;
+ BaseReg =
+ MIRBuilder.buildGEP(PtrTy, BaseReg, OffsetMIB.getReg(0)).getReg(0);
Offset = 0;
}
Register IdxReg = getOrCreateVReg(*Idx);
- if (MRI->getType(IdxReg) != OffsetTy) {
- Register NewIdxReg = MRI->createGenericVirtualRegister(OffsetTy);
- MIRBuilder.buildSExtOrTrunc(NewIdxReg, IdxReg);
- IdxReg = NewIdxReg;
- }
+ if (MRI->getType(IdxReg) != OffsetTy)
+ IdxReg = MIRBuilder.buildSExtOrTrunc(OffsetTy, IdxReg).getReg(0);
// N = N + Idx * ElementSize;
// Avoid doing it for ElementSize of 1.
Register GepOffsetReg;
if (ElementSize != 1) {
- GepOffsetReg = MRI->createGenericVirtualRegister(OffsetTy);
auto ElementSizeMIB = MIRBuilder.buildConstant(
getLLTForType(*OffsetIRTy, *DL), ElementSize);
- MIRBuilder.buildMul(GepOffsetReg, ElementSizeMIB.getReg(0), IdxReg);
+ GepOffsetReg =
+ MIRBuilder.buildMul(OffsetTy, ElementSizeMIB, IdxReg).getReg(0);
} else
GepOffsetReg = IdxReg;
- Register NewBaseReg = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildGEP(NewBaseReg, BaseReg, GepOffsetReg);
- BaseReg = NewBaseReg;
+ BaseReg = MIRBuilder.buildGEP(PtrTy, BaseReg, GepOffsetReg).getReg(0);
}
}
@@ -1119,54 +1115,51 @@ bool IRTranslator::translateGetElementPtr(const User &U,
return true;
}
-bool IRTranslator::translateMemfunc(const CallInst &CI,
+bool IRTranslator::translateMemFunc(const CallInst &CI,
MachineIRBuilder &MIRBuilder,
- unsigned ID) {
+ Intrinsic::ID ID) {
// If the source is undef, then just emit a nop.
- if (isa<UndefValue>(CI.getArgOperand(1))) {
- switch (ID) {
- case Intrinsic::memmove:
- case Intrinsic::memcpy:
- case Intrinsic::memset:
- return true;
- default:
- break;
- }
- }
-
- LLT SizeTy = getLLTForType(*CI.getArgOperand(2)->getType(), *DL);
- Type *DstTy = CI.getArgOperand(0)->getType();
- if (cast<PointerType>(DstTy)->getAddressSpace() != 0 ||
- SizeTy.getSizeInBits() != DL->getPointerSizeInBits(0))
- return false;
+ if (isa<UndefValue>(CI.getArgOperand(1)))
+ return true;
- SmallVector<CallLowering::ArgInfo, 8> Args;
- for (int i = 0; i < 3; ++i) {
- const auto &Arg = CI.getArgOperand(i);
- Args.emplace_back(getOrCreateVReg(*Arg), Arg->getType());
+ ArrayRef<Register> Res;
+ auto ICall = MIRBuilder.buildIntrinsic(ID, Res, true);
+ for (auto AI = CI.arg_begin(), AE = CI.arg_end(); std::next(AI) != AE; ++AI)
+ ICall.addUse(getOrCreateVReg(**AI));
+
+ unsigned DstAlign = 0, SrcAlign = 0;
+ unsigned IsVol =
+ cast<ConstantInt>(CI.getArgOperand(CI.getNumArgOperands() - 1))
+ ->getZExtValue();
+
+ if (auto *MCI = dyn_cast<MemCpyInst>(&CI)) {
+ DstAlign = std::max<unsigned>(MCI->getDestAlignment(), 1);
+ SrcAlign = std::max<unsigned>(MCI->getSourceAlignment(), 1);
+ } else if (auto *MMI = dyn_cast<MemMoveInst>(&CI)) {
+ DstAlign = std::max<unsigned>(MMI->getDestAlignment(), 1);
+ SrcAlign = std::max<unsigned>(MMI->getSourceAlignment(), 1);
+ } else {
+ auto *MSI = cast<MemSetInst>(&CI);
+ DstAlign = std::max<unsigned>(MSI->getDestAlignment(), 1);
}
- const char *Callee;
- switch (ID) {
- case Intrinsic::memmove:
- case Intrinsic::memcpy: {
- Type *SrcTy = CI.getArgOperand(1)->getType();
- if(cast<PointerType>(SrcTy)->getAddressSpace() != 0)
- return false;
- Callee = ID == Intrinsic::memcpy ? "memcpy" : "memmove";
- break;
- }
- case Intrinsic::memset:
- Callee = "memset";
- break;
- default:
- return false;
- }
+ // We need to propagate the tail call flag from the IR inst as an argument.
+ // Otherwise, we have to pessimize and assume later that we cannot tail call
+ // any memory intrinsics.
+ ICall.addImm(CI.isTailCall() ? 1 : 0);
- return CLI->lowerCall(MIRBuilder, CI.getCallingConv(),
- MachineOperand::CreateES(Callee),
- CallLowering::ArgInfo({0}, CI.getType()), Args);
+ // Create mem operands to store the alignment and volatile info.
+ auto VolFlag = IsVol ? MachineMemOperand::MOVolatile : MachineMemOperand::MONone;
+ ICall.addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(CI.getArgOperand(0)),
+ MachineMemOperand::MOStore | VolFlag, 1, DstAlign));
+ if (ID != Intrinsic::memset)
+ ICall.addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(CI.getArgOperand(1)),
+ MachineMemOperand::MOLoad | VolFlag, 1, SrcAlign));
+
+ return true;
}
void IRTranslator::getStackGuard(Register DstReg,
@@ -1186,7 +1179,7 @@ void IRTranslator::getStackGuard(Register DstReg,
MachineMemOperand::MODereferenceable;
MachineMemOperand *MemRef =
MF->getMachineMemOperand(MPInfo, Flags, DL->getPointerSizeInBits() / 8,
- DL->getPointerABIAlignment(0));
+ DL->getPointerABIAlignment(0).value());
MIB.setMemRefs({MemRef});
}
@@ -1208,6 +1201,8 @@ unsigned IRTranslator::getSimpleIntrinsicOpcode(Intrinsic::ID ID) {
break;
case Intrinsic::bswap:
return TargetOpcode::G_BSWAP;
+ case Intrinsic::bitreverse:
+ return TargetOpcode::G_BITREVERSE;
case Intrinsic::ceil:
return TargetOpcode::G_FCEIL;
case Intrinsic::cos:
@@ -1383,16 +1378,17 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
if (!V) {
// Currently the optimizer can produce this; insert an undef to
// help debugging. Probably the optimizer should not do this.
- MIRBuilder.buildIndirectDbgValue(0, DI.getVariable(), DI.getExpression());
+ MIRBuilder.buildDirectDbgValue(0, DI.getVariable(), DI.getExpression());
} else if (const auto *CI = dyn_cast<Constant>(V)) {
MIRBuilder.buildConstDbgValue(*CI, DI.getVariable(), DI.getExpression());
} else {
- Register Reg = getOrCreateVReg(*V);
- // FIXME: This does not handle register-indirect values at offset 0. The
- // direct/indirect thing shouldn't really be handled by something as
- // implicit as reg+noreg vs reg+imm in the first palce, but it seems
- // pretty baked in right now.
- MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
+ for (Register Reg : getOrCreateVRegs(*V)) {
+ // FIXME: This does not handle register-indirect values at offset 0. The
+ // direct/indirect thing shouldn't really be handled by something as
+ // implicit as reg+noreg vs reg+imm in the first place, but it seems
+ // pretty baked in right now.
+ MIRBuilder.buildDirectDbgValue(Reg, DI.getVariable(), DI.getExpression());
+ }
}
return true;
}
@@ -1433,7 +1429,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
case Intrinsic::memcpy:
case Intrinsic::memmove:
case Intrinsic::memset:
- return translateMemfunc(CI, MIRBuilder, ID);
+ return translateMemFunc(CI, MIRBuilder, ID);
case Intrinsic::eh_typeid_for: {
GlobalValue *GV = ExtractTypeInfo(CI.getArgOperand(0));
Register Reg = getOrCreateVReg(CI);
@@ -1441,18 +1437,12 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
MIRBuilder.buildConstant(Reg, TypeID);
return true;
}
- case Intrinsic::objectsize: {
- // If we don't know by now, we're never going to know.
- const ConstantInt *Min = cast<ConstantInt>(CI.getArgOperand(1));
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
- MIRBuilder.buildConstant(getOrCreateVReg(CI), Min->isZero() ? -1ULL : 0);
- return true;
- }
case Intrinsic::is_constant:
- // If this wasn't constant-folded away by now, then it's not a
- // constant.
- MIRBuilder.buildConstant(getOrCreateVReg(CI), 0);
- return true;
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
case Intrinsic::stackguard:
getStackGuard(getOrCreateVReg(CI), MIRBuilder);
return true;
@@ -1551,6 +1541,46 @@ bool IRTranslator::translateInlineAsm(const CallInst &CI,
return true;
}
+bool IRTranslator::translateCallSite(const ImmutableCallSite &CS,
+ MachineIRBuilder &MIRBuilder) {
+ const Instruction &I = *CS.getInstruction();
+ ArrayRef<Register> Res = getOrCreateVRegs(I);
+
+ SmallVector<ArrayRef<Register>, 8> Args;
+ Register SwiftInVReg = 0;
+ Register SwiftErrorVReg = 0;
+ for (auto &Arg : CS.args()) {
+ if (CLI->supportSwiftError() && isSwiftError(Arg)) {
+ assert(SwiftInVReg == 0 && "Expected only one swift error argument");
+ LLT Ty = getLLTForType(*Arg->getType(), *DL);
+ SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
+ MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
+ &I, &MIRBuilder.getMBB(), Arg));
+ Args.emplace_back(makeArrayRef(SwiftInVReg));
+ SwiftErrorVReg =
+ SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg);
+ continue;
+ }
+ Args.push_back(getOrCreateVRegs(*Arg));
+ }
+
+ // We don't set HasCalls on MFI here yet because call lowering may decide to
+ // optimize into tail calls. Instead, we defer that to selection where a final
+ // scan is done to check if any instructions are calls.
+ bool Success =
+ CLI->lowerCall(MIRBuilder, CS, Res, Args, SwiftErrorVReg,
+ [&]() { return getOrCreateVReg(*CS.getCalledValue()); });
+
+ // Check if we just inserted a tail call.
+ if (Success) {
+ assert(!HasTailCall && "Can't tail call return twice from block?");
+ const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
+ HasTailCall = TII->isTailCall(*std::prev(MIRBuilder.getInsertPt()));
+ }
+
+ return Success;
+}
+
bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
const CallInst &CI = cast<CallInst>(U);
auto TII = MF->getTarget().getIntrinsicInfo();
@@ -1570,34 +1600,8 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
ID = static_cast<Intrinsic::ID>(TII->getIntrinsicID(F));
}
- if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic) {
- ArrayRef<Register> Res = getOrCreateVRegs(CI);
-
- SmallVector<ArrayRef<Register>, 8> Args;
- Register SwiftInVReg = 0;
- Register SwiftErrorVReg = 0;
- for (auto &Arg: CI.arg_operands()) {
- if (CLI->supportSwiftError() && isSwiftError(Arg)) {
- assert(SwiftInVReg == 0 && "Expected only one swift error argument");
- LLT Ty = getLLTForType(*Arg->getType(), *DL);
- SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
- MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
- &CI, &MIRBuilder.getMBB(), Arg));
- Args.emplace_back(makeArrayRef(SwiftInVReg));
- SwiftErrorVReg =
- SwiftError.getOrCreateVRegDefAt(&CI, &MIRBuilder.getMBB(), Arg);
- continue;
- }
- Args.push_back(getOrCreateVRegs(*Arg));
- }
-
- MF->getFrameInfo().setHasCalls(true);
- bool Success =
- CLI->lowerCall(MIRBuilder, &CI, Res, Args, SwiftErrorVReg,
- [&]() { return getOrCreateVReg(*CI.getCalledValue()); });
-
- return Success;
- }
+ if (!F || !F->isIntrinsic() || ID == Intrinsic::not_intrinsic)
+ return translateCallSite(&CI, MIRBuilder);
assert(ID != Intrinsic::not_intrinsic && "unknown intrinsic");
@@ -1615,14 +1619,29 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
if (isa<FPMathOperator>(CI))
MIB->copyIRFlags(CI);
- for (auto &Arg : CI.arg_operands()) {
+ for (auto &Arg : enumerate(CI.arg_operands())) {
// Some intrinsics take metadata parameters. Reject them.
- if (isa<MetadataAsValue>(Arg))
- return false;
- ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg);
- if (VRegs.size() > 1)
+ if (isa<MetadataAsValue>(Arg.value()))
return false;
- MIB.addUse(VRegs[0]);
+
+ // If this is required to be an immediate, don't materialize it in a
+ // register.
+ if (CI.paramHasAttr(Arg.index(), Attribute::ImmArg)) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(Arg.value())) {
+ // imm arguments are more convenient than cimm (and realistically
+ // probably sufficient), so use them.
+ assert(CI->getBitWidth() <= 64 &&
+ "large intrinsic immediates not handled");
+ MIB.addImm(CI->getSExtValue());
+ } else {
+ MIB.addFPImm(cast<ConstantFP>(Arg.value()));
+ }
+ } else {
+ ArrayRef<Register> VRegs = getOrCreateVRegs(*Arg.value());
+ if (VRegs.size() > 1)
+ return false;
+ MIB.addUse(VRegs[0]);
+ }
}
// Add a MachineMemOperand if it is a target mem intrinsic.
@@ -1630,13 +1649,14 @@ bool IRTranslator::translateCall(const User &U, MachineIRBuilder &MIRBuilder) {
TargetLowering::IntrinsicInfo Info;
// TODO: Add a GlobalISel version of getTgtMemIntrinsic.
if (TLI.getTgtMemIntrinsic(Info, CI, *MF, ID)) {
- unsigned Align = Info.align;
- if (Align == 0)
- Align = DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext()));
+ MaybeAlign Align = Info.align;
+ if (!Align)
+ Align = MaybeAlign(
+ DL->getABITypeAlignment(Info.memVT.getTypeForEVT(F->getContext())));
uint64_t Size = Info.memVT.getStoreSize();
- MIB.addMemOperand(MF->getMachineMemOperand(MachinePointerInfo(Info.ptrVal),
- Info.flags, Size, Align));
+ MIB.addMemOperand(MF->getMachineMemOperand(
+ MachinePointerInfo(Info.ptrVal), Info.flags, Size, Align->value()));
}
return true;
@@ -1672,30 +1692,7 @@ bool IRTranslator::translateInvoke(const User &U,
MCSymbol *BeginSymbol = Context.createTempSymbol();
MIRBuilder.buildInstr(TargetOpcode::EH_LABEL).addSym(BeginSymbol);
- ArrayRef<Register> Res;
- if (!I.getType()->isVoidTy())
- Res = getOrCreateVRegs(I);
- SmallVector<ArrayRef<Register>, 8> Args;
- Register SwiftErrorVReg = 0;
- Register SwiftInVReg = 0;
- for (auto &Arg : I.arg_operands()) {
- if (CLI->supportSwiftError() && isSwiftError(Arg)) {
- assert(SwiftInVReg == 0 && "Expected only one swift error argument");
- LLT Ty = getLLTForType(*Arg->getType(), *DL);
- SwiftInVReg = MRI->createGenericVirtualRegister(Ty);
- MIRBuilder.buildCopy(SwiftInVReg, SwiftError.getOrCreateVRegUseAt(
- &I, &MIRBuilder.getMBB(), Arg));
- Args.push_back(makeArrayRef(SwiftInVReg));
- SwiftErrorVReg =
- SwiftError.getOrCreateVRegDefAt(&I, &MIRBuilder.getMBB(), Arg);
- continue;
- }
-
- Args.push_back(getOrCreateVRegs(*Arg));
- }
-
- if (!CLI->lowerCall(MIRBuilder, &I, Res, Args, SwiftErrorVReg,
- [&]() { return getOrCreateVReg(*I.getCalledValue()); }))
+ if (!translateCallSite(&I, MIRBuilder))
return false;
MCSymbol *EndSymbol = Context.createTempSymbol();
@@ -1811,36 +1808,25 @@ bool IRTranslator::translateAlloca(const User &U,
Register AllocSize = MRI->createGenericVirtualRegister(IntPtrTy);
Register TySize =
- getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, -DL->getTypeAllocSize(Ty)));
+ getOrCreateVReg(*ConstantInt::get(IntPtrIRTy, DL->getTypeAllocSize(Ty)));
MIRBuilder.buildMul(AllocSize, NumElts, TySize);
- LLT PtrTy = getLLTForType(*AI.getType(), *DL);
- auto &TLI = *MF->getSubtarget().getTargetLowering();
- Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
-
- Register SPTmp = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildCopy(SPTmp, SPReg);
-
- Register AllocTmp = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildGEP(AllocTmp, SPTmp, AllocSize);
-
- // Handle alignment. We have to realign if the allocation granule was smaller
- // than stack alignment, or the specific alloca requires more than stack
- // alignment.
unsigned StackAlign =
MF->getSubtarget().getFrameLowering()->getStackAlignment();
- Align = std::max(Align, StackAlign);
- if (Align > StackAlign || DL->getTypeAllocSize(Ty) % StackAlign != 0) {
- // Round the size of the allocation up to the stack alignment size
- // by add SA-1 to the size. This doesn't overflow because we're computing
- // an address inside an alloca.
- Register AlignedAlloc = MRI->createGenericVirtualRegister(PtrTy);
- MIRBuilder.buildPtrMask(AlignedAlloc, AllocTmp, Log2_32(Align));
- AllocTmp = AlignedAlloc;
- }
+ if (Align <= StackAlign)
+ Align = 0;
+
+ // Round the size of the allocation up to the stack alignment size
+ // by add SA-1 to the size. This doesn't overflow because we're computing
+ // an address inside an alloca.
+ auto SAMinusOne = MIRBuilder.buildConstant(IntPtrTy, StackAlign - 1);
+ auto AllocAdd = MIRBuilder.buildAdd(IntPtrTy, AllocSize, SAMinusOne,
+ MachineInstr::NoUWrap);
+ auto AlignCst =
+ MIRBuilder.buildConstant(IntPtrTy, ~(uint64_t)(StackAlign - 1));
+ auto AlignedAlloc = MIRBuilder.buildAnd(IntPtrTy, AllocAdd, AlignCst);
- MIRBuilder.buildCopy(SPReg, AllocTmp);
- MIRBuilder.buildCopy(getOrCreateVReg(AI), AllocTmp);
+ MIRBuilder.buildDynStackAlloc(getOrCreateVReg(AI), AlignedAlloc, Align);
MF->getFrameInfo().CreateVariableSizedObject(Align ? Align : 1, &AI);
assert(MF->getFrameInfo().hasVarSizedObjects());
@@ -1926,7 +1912,7 @@ bool IRTranslator::translateShuffleVector(const User &U,
.addDef(getOrCreateVReg(U))
.addUse(getOrCreateVReg(*U.getOperand(0)))
.addUse(getOrCreateVReg(*U.getOperand(1)))
- .addUse(getOrCreateVReg(*U.getOperand(2)));
+ .addShuffleMask(cast<Constant>(U.getOperand(2)));
return true;
}
@@ -1991,7 +1977,6 @@ bool IRTranslator::translateAtomicRMW(const User &U,
unsigned Opcode = 0;
switch (I.getOperation()) {
default:
- llvm_unreachable("Unknown atomicrmw op");
return false;
case AtomicRMWInst::Xchg:
Opcode = TargetOpcode::G_ATOMICRMW_XCHG;
@@ -2026,6 +2011,12 @@ bool IRTranslator::translateAtomicRMW(const User &U,
case AtomicRMWInst::UMin:
Opcode = TargetOpcode::G_ATOMICRMW_UMIN;
break;
+ case AtomicRMWInst::FAdd:
+ Opcode = TargetOpcode::G_ATOMICRMW_FADD;
+ break;
+ case AtomicRMWInst::FSub:
+ Opcode = TargetOpcode::G_ATOMICRMW_FSUB;
+ break;
}
MIRBuilder.buildAtomicRMW(
@@ -2197,6 +2188,20 @@ void IRTranslator::finalizeFunction() {
FuncInfo.clear();
}
+/// Returns true if a BasicBlock \p BB within a variadic function contains a
+/// variadic musttail call.
+static bool checkForMustTailInVarArgFn(bool IsVarArg, const BasicBlock &BB) {
+ if (!IsVarArg)
+ return false;
+
+ // Walk the block backwards, because tail calls usually only appear at the end
+ // of a block.
+ return std::any_of(BB.rbegin(), BB.rend(), [](const Instruction &I) {
+ const auto *CI = dyn_cast<CallInst>(&I);
+ return CI && CI->isMustTailCall();
+ });
+}
+
bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
MF = &CurMF;
const Function &F = MF->getFunction();
@@ -2212,26 +2217,26 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
: TPC->isGISelCSEEnabled();
if (EnableCSE) {
- EntryBuilder = make_unique<CSEMIRBuilder>(CurMF);
+ EntryBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
CSEInfo = &Wrapper.get(TPC->getCSEConfig());
EntryBuilder->setCSEInfo(CSEInfo);
- CurBuilder = make_unique<CSEMIRBuilder>(CurMF);
+ CurBuilder = std::make_unique<CSEMIRBuilder>(CurMF);
CurBuilder->setCSEInfo(CSEInfo);
} else {
- EntryBuilder = make_unique<MachineIRBuilder>();
- CurBuilder = make_unique<MachineIRBuilder>();
+ EntryBuilder = std::make_unique<MachineIRBuilder>();
+ CurBuilder = std::make_unique<MachineIRBuilder>();
}
CLI = MF->getSubtarget().getCallLowering();
CurBuilder->setMF(*MF);
EntryBuilder->setMF(*MF);
MRI = &MF->getRegInfo();
DL = &F.getParent()->getDataLayout();
- ORE = llvm::make_unique<OptimizationRemarkEmitter>(&F);
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&F);
FuncInfo.MF = MF;
FuncInfo.BPI = nullptr;
const auto &TLI = *MF->getSubtarget().getTargetLowering();
const TargetMachine &TM = MF->getTarget();
- SL = make_unique<GISelSwitchLowering>(this, FuncInfo);
+ SL = std::make_unique<GISelSwitchLowering>(this, FuncInfo);
SL->init(TLI, TM, *DL);
EnableOpts = TM.getOptLevel() != CodeGenOpt::None && !skipFunction(F);
@@ -2258,6 +2263,9 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
SwiftError.setFunction(CurMF);
SwiftError.createEntriesInEntryBlock(DbgLoc);
+ bool IsVarArg = F.isVarArg();
+ bool HasMustTailInVarArgFn = false;
+
// Create all blocks, in IR order, to preserve the layout.
for (const BasicBlock &BB: F) {
auto *&MBB = BBToMBB[&BB];
@@ -2267,8 +2275,13 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
if (BB.hasAddressTaken())
MBB->setHasAddressTaken();
+
+ if (!HasMustTailInVarArgFn)
+ HasMustTailInVarArgFn = checkForMustTailInVarArgFn(IsVarArg, BB);
}
+ MF->getFrameInfo().setHasMustTailInVarArgFunc(HasMustTailInVarArgFn);
+
// Make our arguments/constants entry block fallthrough to the IR entry block.
EntryBB->addSuccessor(&getMBB(F.front()));
@@ -2286,18 +2299,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
}
}
- // We don't currently support translating swifterror or swiftself functions.
- for (auto &Arg : F.args()) {
- if (Arg.hasSwiftSelfAttr()) {
- OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
- F.getSubprogram(), &F.getEntryBlock());
- R << "unable to lower arguments due to swiftself: "
- << ore::NV("Prototype", F.getType());
- reportTranslationError(*MF, *TPC, *ORE, R);
- return false;
- }
- }
-
if (!CLI->lowerFormalArguments(*EntryBuilder.get(), F, VRegArgs)) {
OptimizationRemarkMissed R("gisel-irtranslator", "GISelFailure",
F.getSubprogram(), &F.getEntryBlock());
@@ -2322,8 +2323,15 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
// Set the insertion point of all the following translations to
// the end of this basic block.
CurBuilder->setMBB(MBB);
-
+ HasTailCall = false;
for (const Instruction &Inst : *BB) {
+ // If we translated a tail call in the last step, then we know
+ // everything after the call is either a return, or something that is
+ // handled by the call itself. (E.g. a lifetime marker or assume
+ // intrinsic.) In this case, we should stop translating the block and
+ // move on.
+ if (HasTailCall)
+ break;
#ifndef NDEBUG
Verifier.setCurrentInst(&Inst);
#endif // ifndef NDEBUG
diff --git a/lib/CodeGen/GlobalISel/InstructionSelect.cpp b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
index 70694fe6b6c8..7c4fd2d140d3 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelect.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelect.cpp
@@ -12,11 +12,14 @@
#include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/Twine.h"
+#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -45,6 +48,7 @@ INITIALIZE_PASS_BEGIN(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
+INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
INITIALIZE_PASS_END(InstructionSelect, DEBUG_TYPE,
"Select target instructions out of generic instructions",
false, false)
@@ -53,6 +57,8 @@ InstructionSelect::InstructionSelect() : MachineFunctionPass(ID) { }
void InstructionSelect::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<TargetPassConfig>();
+ AU.addRequired<GISelKnownBitsAnalysis>();
+ AU.addPreserved<GISelKnownBitsAnalysis>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -64,11 +70,13 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
return false;
LLVM_DEBUG(dbgs() << "Selecting function: " << MF.getName() << '\n');
+ GISelKnownBits &KB = getAnalysis<GISelKnownBitsAnalysis>().get(MF);
const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
- const InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
+ InstructionSelector *ISel = MF.getSubtarget().getInstructionSelector();
CodeGenCoverage CoverageInfo;
assert(ISel && "Cannot work without InstructionSelector");
+ ISel->setupMF(MF, KB, CoverageInfo);
// An optimization remark emitter. Used to report failures.
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
@@ -124,7 +132,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
continue;
}
- if (!ISel->select(MI, CoverageInfo)) {
+ if (!ISel->select(MI)) {
// FIXME: It would be nice to dump all inserted instructions. It's
// not obvious how, esp. considering select() can insert after MI.
reportGISelFailure(MF, TPC, MORE, "gisel-select", "cannot select", MI);
@@ -159,10 +167,10 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
--MII;
if (MI.getOpcode() != TargetOpcode::COPY)
continue;
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
- TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ if (Register::isVirtualRegister(SrcReg) &&
+ Register::isVirtualRegister(DstReg)) {
auto SrcRC = MRI.getRegClass(SrcReg);
auto DstRC = MRI.getRegClass(DstReg);
if (SrcRC == DstRC) {
@@ -179,7 +187,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
// that the size of the now-constrained vreg is unchanged and that it has a
// register class.
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned VReg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned VReg = Register::index2VirtReg(I);
MachineInstr *MI = nullptr;
if (!MRI.def_empty(VReg))
@@ -217,6 +225,22 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
auto &TLI = *MF.getSubtarget().getTargetLowering();
TLI.finalizeLowering(MF);
+ // Determine if there are any calls in this machine function. Ported from
+ // SelectionDAG.
+ MachineFrameInfo &MFI = MF.getFrameInfo();
+ for (const auto &MBB : MF) {
+ if (MFI.hasCalls() && MF.hasInlineAsm())
+ break;
+
+ for (const auto &MI : MBB) {
+ if ((MI.isCall() && !MI.isReturn()) || MI.isStackAligningInlineAsm())
+ MFI.setHasCalls(true);
+ if (MI.isInlineAsm())
+ MF.setHasInlineAsm(true);
+ }
+ }
+
+
LLVM_DEBUG({
dbgs() << "Rules covered by selecting function: " << MF.getName() << ":";
for (auto RuleID : CoverageInfo.covered())
diff --git a/lib/CodeGen/GlobalISel/InstructionSelector.cpp b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
index 2ad35b3a72c9..28143b30d4e8 100644
--- a/lib/CodeGen/GlobalISel/InstructionSelector.cpp
+++ b/lib/CodeGen/GlobalISel/InstructionSelector.cpp
@@ -79,5 +79,5 @@ bool InstructionSelector::isObviouslySafeToFold(MachineInstr &MI,
return true;
return !MI.mayLoadOrStore() && !MI.mayRaiseFPException() &&
- !MI.hasUnmodeledSideEffects() && empty(MI.implicit_operands());
+ !MI.hasUnmodeledSideEffects() && MI.implicit_operands().empty();
}
diff --git a/lib/CodeGen/GlobalISel/Legalizer.cpp b/lib/CodeGen/GlobalISel/Legalizer.cpp
index b5b26bff34bb..1593e21fe07e 100644
--- a/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -184,11 +184,11 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
: TPC.isGISelCSEEnabled();
if (EnableCSE) {
- MIRBuilder = make_unique<CSEMIRBuilder>();
+ MIRBuilder = std::make_unique<CSEMIRBuilder>();
CSEInfo = &Wrapper.get(TPC.getCSEConfig());
MIRBuilder->setCSEInfo(CSEInfo);
} else
- MIRBuilder = make_unique<MachineIRBuilder>();
+ MIRBuilder = std::make_unique<MachineIRBuilder>();
// This observer keeps the worklist updated.
LegalizerWorkListManager WorkListObserver(InstList, ArtifactList);
// We want both WorkListObserver as well as CSEInfo to observe all changes.
@@ -206,8 +206,16 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
auto RemoveDeadInstFromLists = [&WrapperObserver](MachineInstr *DeadMI) {
WrapperObserver.erasingInstr(*DeadMI);
};
+ auto stopLegalizing = [&](MachineInstr &MI) {
+ Helper.MIRBuilder.stopObservingChanges();
+ reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
+ "unable to legalize instruction", MI);
+ };
bool Changed = false;
+ SmallVector<MachineInstr *, 128> RetryList;
do {
+ assert(RetryList.empty() && "Expected no instructions in RetryList");
+ unsigned NumArtifacts = ArtifactList.size();
while (!InstList.empty()) {
MachineInstr &MI = *InstList.pop_back_val();
assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
@@ -222,14 +230,31 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
// Error out if we couldn't legalize this instruction. We may want to
// fall back to DAG ISel instead in the future.
if (Res == LegalizerHelper::UnableToLegalize) {
- Helper.MIRBuilder.stopObservingChanges();
- reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
- "unable to legalize instruction", MI);
+ // Move illegal artifacts to RetryList instead of aborting because
+ // legalizing InstList may generate artifacts that allow
+ // ArtifactCombiner to combine away them.
+ if (isArtifact(MI)) {
+ RetryList.push_back(&MI);
+ continue;
+ }
+ stopLegalizing(MI);
return false;
}
WorkListObserver.printNewInstrs();
Changed |= Res == LegalizerHelper::Legalized;
}
+ // Try to combine the instructions in RetryList again if there
+ // are new artifacts. If not, stop legalizing.
+ if (!RetryList.empty()) {
+ if (ArtifactList.size() > NumArtifacts) {
+ while (!RetryList.empty())
+ ArtifactList.insert(RetryList.pop_back_val());
+ } else {
+ MachineInstr *MI = *RetryList.begin();
+ stopLegalizing(*MI);
+ return false;
+ }
+ }
while (!ArtifactList.empty()) {
MachineInstr &MI = *ArtifactList.pop_back_val();
assert(isPreISelGenericOpcode(MI.getOpcode()) && "Expecting generic opcode");
diff --git a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index f5cf7fc9bd9b..21512e543878 100644
--- a/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetFrameLowering.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
@@ -171,6 +172,26 @@ bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
return true;
}
+static LLT getGCDType(LLT OrigTy, LLT TargetTy) {
+ if (OrigTy.isVector() && TargetTy.isVector()) {
+ assert(OrigTy.getElementType() == TargetTy.getElementType());
+ int GCD = greatestCommonDivisor(OrigTy.getNumElements(),
+ TargetTy.getNumElements());
+ return LLT::scalarOrVector(GCD, OrigTy.getElementType());
+ }
+
+ if (OrigTy.isVector() && !TargetTy.isVector()) {
+ assert(OrigTy.getElementType() == TargetTy);
+ return TargetTy;
+ }
+
+ assert(!OrigTy.isVector() && !TargetTy.isVector());
+
+ int GCD = greatestCommonDivisor(OrigTy.getSizeInBits(),
+ TargetTy.getSizeInBits());
+ return LLT::scalar(GCD);
+}
+
void LegalizerHelper::insertParts(Register DstReg,
LLT ResultTy, LLT PartTy,
ArrayRef<Register> PartRegs,
@@ -219,11 +240,29 @@ void LegalizerHelper::insertParts(Register DstReg,
static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
switch (Opcode) {
case TargetOpcode::G_SDIV:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::SDIV_I64 : RTLIB::SDIV_I32;
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ switch (Size) {
+ case 32:
+ return RTLIB::SDIV_I32;
+ case 64:
+ return RTLIB::SDIV_I64;
+ case 128:
+ return RTLIB::SDIV_I128;
+ default:
+ llvm_unreachable("unexpected size");
+ }
case TargetOpcode::G_UDIV:
- assert((Size == 32 || Size == 64) && "Unsupported size");
- return Size == 64 ? RTLIB::UDIV_I64 : RTLIB::UDIV_I32;
+ assert((Size == 32 || Size == 64 || Size == 128) && "Unsupported size");
+ switch (Size) {
+ case 32:
+ return RTLIB::UDIV_I32;
+ case 64:
+ return RTLIB::UDIV_I64;
+ case 128:
+ return RTLIB::UDIV_I128;
+ default:
+ llvm_unreachable("unexpected size");
+ }
case TargetOpcode::G_SREM:
assert((Size == 32 || Size == 64) && "Unsupported size");
return Size == 64 ? RTLIB::SREM_I64 : RTLIB::SREM_I32;
@@ -288,6 +327,35 @@ static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
llvm_unreachable("Unknown libcall function");
}
+/// True if an instruction is in tail position in its caller. Intended for
+/// legalizing libcalls as tail calls when possible.
+static bool isLibCallInTailPosition(MachineInstr &MI) {
+ const Function &F = MI.getParent()->getParent()->getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore NoAlias and NonNull because they don't affect the
+ // call sequence.
+ AttributeList CallerAttrs = F.getAttributes();
+ if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
+ .removeAttribute(Attribute::NoAlias)
+ .removeAttribute(Attribute::NonNull)
+ .hasAttributes())
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
+ CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
+ return false;
+
+ // Only tail call if the following instruction is a standard return.
+ auto &TII = *MI.getMF()->getSubtarget().getInstrInfo();
+ MachineInstr *Next = MI.getNextNode();
+ if (!Next || TII.isTailCall(*Next) || !Next->isReturn())
+ return false;
+
+ return true;
+}
+
LegalizerHelper::LegalizeResult
llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
const CallLowering::ArgInfo &Result,
@@ -296,9 +364,12 @@ llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
const char *Name = TLI.getLibcallName(Libcall);
- MIRBuilder.getMF().getFrameInfo().setHasCalls(true);
- if (!CLI.lowerCall(MIRBuilder, TLI.getLibcallCallingConv(Libcall),
- MachineOperand::CreateES(Name), Result, Args))
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(Libcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = Result;
+ std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
+ if (!CLI.lowerCall(MIRBuilder, Info))
return LegalizerHelper::UnableToLegalize;
return LegalizerHelper::Legalized;
@@ -317,6 +388,74 @@ simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
Args);
}
+LegalizerHelper::LegalizeResult
+llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
+ MachineInstr &MI) {
+ assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
+ auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
+
+ SmallVector<CallLowering::ArgInfo, 3> Args;
+ // Add all the args, except for the last which is an imm denoting 'tail'.
+ for (unsigned i = 1; i < MI.getNumOperands() - 1; i++) {
+ Register Reg = MI.getOperand(i).getReg();
+
+ // Need derive an IR type for call lowering.
+ LLT OpLLT = MRI.getType(Reg);
+ Type *OpTy = nullptr;
+ if (OpLLT.isPointer())
+ OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
+ else
+ OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
+ Args.push_back({Reg, OpTy});
+ }
+
+ auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
+ auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
+ Intrinsic::ID ID = MI.getOperand(0).getIntrinsicID();
+ RTLIB::Libcall RTLibcall;
+ switch (ID) {
+ case Intrinsic::memcpy:
+ RTLibcall = RTLIB::MEMCPY;
+ break;
+ case Intrinsic::memset:
+ RTLibcall = RTLIB::MEMSET;
+ break;
+ case Intrinsic::memmove:
+ RTLibcall = RTLIB::MEMMOVE;
+ break;
+ default:
+ return LegalizerHelper::UnableToLegalize;
+ }
+ const char *Name = TLI.getLibcallName(RTLibcall);
+
+ MIRBuilder.setInstr(MI);
+
+ CallLowering::CallLoweringInfo Info;
+ Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
+ Info.Callee = MachineOperand::CreateES(Name);
+ Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
+ Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() == 1 &&
+ isLibCallInTailPosition(MI);
+
+ std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
+ if (!CLI.lowerCall(MIRBuilder, Info))
+ return LegalizerHelper::UnableToLegalize;
+
+ if (Info.LoweredTailCall) {
+ assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
+ // We must have a return following the call to get past
+ // isLibCallInTailPosition.
+ assert(MI.getNextNode() && MI.getNextNode()->isReturn() &&
+ "Expected instr following MI to be a return?");
+
+ // We lowered a tail call, so the call is now the return from the block.
+ // Delete the old return.
+ MI.getNextNode()->eraseFromParent();
+ }
+
+ return LegalizerHelper::Legalized;
+}
+
static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
Type *FromType) {
auto ToMVT = MVT::getVT(ToType);
@@ -518,6 +657,65 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SEXT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ // FIXME: support the general case where the requested NarrowTy may not be
+ // the same as the source type. E.g. s128 = sext(s32)
+ if ((SrcTy.getSizeInBits() != SizeOp0 / 2) ||
+ SrcTy.getSizeInBits() != NarrowTy.getSizeInBits()) {
+ LLVM_DEBUG(dbgs() << "Can't narrow sext to type " << NarrowTy << "\n");
+ return UnableToLegalize;
+ }
+
+ // Shift the sign bit of the low register through the high register.
+ auto ShiftAmt =
+ MIRBuilder.buildConstant(LLT::scalar(64), NarrowTy.getSizeInBits() - 1);
+ auto Shift = MIRBuilder.buildAShr(NarrowTy, SrcReg, ShiftAmt);
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), {SrcReg, Shift.getReg(0)});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_ZEXT: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
+ uint64_t SizeOp1 = SrcTy.getSizeInBits();
+ if (SizeOp0 % SizeOp1 != 0)
+ return UnableToLegalize;
+
+ // Generate a merge where the bottom bits are taken from the source, and
+ // zero everything else.
+ Register ZeroReg = MIRBuilder.buildConstant(SrcTy, 0).getReg(0);
+ unsigned NumParts = SizeOp0 / SizeOp1;
+ SmallVector<Register, 4> Srcs = {MI.getOperand(1).getReg()};
+ for (unsigned Part = 1; Part < NumParts; ++Part)
+ Srcs.push_back(ZeroReg);
+ MIRBuilder.buildMerge(MI.getOperand(0).getReg(), Srcs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case TargetOpcode::G_TRUNC: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
+ if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
+ LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
+ return UnableToLegalize;
+ }
+
+ auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1).getReg());
+ MIRBuilder.buildCopy(MI.getOperand(0).getReg(), Unmerge.getReg(0));
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
case TargetOpcode::G_ADD: {
// FIXME: add support for when SizeOp0 isn't an exact multiple of
// NarrowSize.
@@ -530,15 +728,17 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, Src1Regs);
extractParts(MI.getOperand(2).getReg(), NarrowTy, NumParts, Src2Regs);
- Register CarryIn = MRI.createGenericVirtualRegister(LLT::scalar(1));
- MIRBuilder.buildConstant(CarryIn, 0);
-
+ Register CarryIn;
for (int i = 0; i < NumParts; ++i) {
Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
- MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
- Src2Regs[i], CarryIn);
+ if (i == 0)
+ MIRBuilder.buildUAddo(DstReg, CarryOut, Src1Regs[i], Src2Regs[i]);
+ else {
+ MIRBuilder.buildUAdde(DstReg, CarryOut, Src1Regs[i],
+ Src2Regs[i], CarryIn);
+ }
DstRegs.push_back(DstReg);
CarryIn = CarryOut;
@@ -730,7 +930,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
}
- MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
+ MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
MIRBuilder.buildMerge(MI.getOperand(0).getReg(), DstRegs);
Observer.changedInstr(MI);
MI.eraseFromParent();
@@ -763,6 +963,7 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
CmpInst::Predicate Pred =
static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
+ LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
@@ -771,18 +972,109 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
MIRBuilder.buildICmp(Pred, MI.getOperand(0).getReg(), Or, Zero);
} else {
- const LLT s1 = LLT::scalar(1);
- MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, s1, LHSH, RHSH);
+ MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpHEQ =
- MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, s1, LHSH, RHSH);
+ MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
- ICmpInst::getUnsignedPredicate(Pred), s1, LHSL, RHSL);
+ ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
MIRBuilder.buildSelect(MI.getOperand(0).getReg(), CmpHEQ, CmpLU, CmpH);
}
Observer.changedInstr(MI);
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SEXT_INREG: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ if (!MI.getOperand(2).isImm())
+ return UnableToLegalize;
+ int64_t SizeInBits = MI.getOperand(2).getImm();
+
+ // So long as the new type has more bits than the bits we're extending we
+ // don't need to break it apart.
+ if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
+ Observer.changingInstr(MI);
+ // We don't lose any non-extension bits by truncating the src and
+ // sign-extending the dst.
+ MachineOperand &MO1 = MI.getOperand(1);
+ auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1.getReg());
+ MO1.setReg(TruncMIB->getOperand(0).getReg());
+
+ MachineOperand &MO2 = MI.getOperand(0);
+ Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+ MIRBuilder.buildInstr(TargetOpcode::G_SEXT, {MO2.getReg()}, {DstExt});
+ MO2.setReg(DstExt);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
+
+ // Break it apart. Components below the extension point are unmodified. The
+ // component containing the extension point becomes a narrower SEXT_INREG.
+ // Components above it are ashr'd from the component containing the
+ // extension point.
+ if (SizeOp0 % NarrowSize != 0)
+ return UnableToLegalize;
+ int NumParts = SizeOp0 / NarrowSize;
+
+ // List the registers where the destination will be scattered.
+ SmallVector<Register, 2> DstRegs;
+ // List the registers where the source will be split.
+ SmallVector<Register, 2> SrcRegs;
+
+ // Create all the temporary registers.
+ for (int i = 0; i < NumParts; ++i) {
+ Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
+
+ SrcRegs.push_back(SrcReg);
+ }
+
+ // Explode the big arguments into smaller chunks.
+ MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1).getReg());
+
+ Register AshrCstReg =
+ MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
+ ->getOperand(0)
+ .getReg();
+ Register FullExtensionReg = 0;
+ Register PartialExtensionReg = 0;
+
+ // Do the operation on each small part.
+ for (int i = 0; i < NumParts; ++i) {
+ if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
+ DstRegs.push_back(SrcRegs[i]);
+ else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
+ assert(PartialExtensionReg &&
+ "Expected to visit partial extension before full");
+ if (FullExtensionReg) {
+ DstRegs.push_back(FullExtensionReg);
+ continue;
+ }
+ DstRegs.push_back(MIRBuilder
+ .buildInstr(TargetOpcode::G_ASHR, {NarrowTy},
+ {PartialExtensionReg, AshrCstReg})
+ ->getOperand(0)
+ .getReg());
+ FullExtensionReg = DstRegs.back();
+ } else {
+ DstRegs.push_back(
+ MIRBuilder
+ .buildInstr(
+ TargetOpcode::G_SEXT_INREG, {NarrowTy},
+ {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
+ ->getOperand(0)
+ .getReg());
+ PartialExtensionReg = DstRegs.back();
+ }
+ }
+
+ // Gather the destination registers into the final destination.
+ Register DstReg = MI.getOperand(0).getReg();
+ MIRBuilder.buildMerge(DstReg, DstRegs);
+ MI.eraseFromParent();
+ return Legalized;
+ }
}
}
@@ -892,7 +1184,7 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
- Register NextResult = I + 1 == NumOps && WideSize == DstSize ? DstReg :
+ Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
MRI.createGenericVirtualRegister(WideTy);
auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
@@ -903,6 +1195,8 @@ LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
if (WideSize > DstSize)
MIRBuilder.buildTrunc(DstReg, ResultReg);
+ else if (DstTy.isPointer())
+ MIRBuilder.buildIntToPtr(DstReg, ResultReg);
MI.eraseFromParent();
return Legalized;
@@ -1218,6 +1512,24 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_BITREVERSE: {
+ Observer.changingInstr(MI);
+
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
+
+ Register DstExt = MRI.createGenericVirtualRegister(WideTy);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ MI.getOperand(0).setReg(DstExt);
+ MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
+
+ auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
+ auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
+ MIRBuilder.buildTrunc(DstReg, Shift);
+ Observer.changedInstr(MI);
+ return Legalized;
+ }
case TargetOpcode::G_ADD:
case TargetOpcode::G_AND:
case TargetOpcode::G_MUL:
@@ -1310,13 +1622,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FPTOSI:
case TargetOpcode::G_FPTOUI:
- if (TypeIdx != 0)
- return UnableToLegalize;
Observer.changingInstr(MI);
- widenScalarDst(MI, WideTy);
+
+ if (TypeIdx == 0)
+ widenScalarDst(MI, WideTy);
+ else
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
+
Observer.changedInstr(MI);
return Legalized;
-
case TargetOpcode::G_SITOFP:
if (TypeIdx != 1)
return UnableToLegalize;
@@ -1483,6 +1797,7 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
case TargetOpcode::G_FMUL:
case TargetOpcode::G_FSUB:
case TargetOpcode::G_FMA:
+ case TargetOpcode::G_FMAD:
case TargetOpcode::G_FNEG:
case TargetOpcode::G_FABS:
case TargetOpcode::G_FCANONICALIZE:
@@ -1553,6 +1868,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_SEXT_INREG:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+
+ Observer.changingInstr(MI);
+ widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+ widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
+ Observer.changedInstr(MI);
+ return Legalized;
}
}
@@ -1579,6 +1903,9 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_SADDO:
+ case TargetOpcode::G_SSUBO:
+ return lowerSADDO_SSUBO(MI);
case TargetOpcode::G_SMULO:
case TargetOpcode::G_UMULO: {
// Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
@@ -1669,6 +1996,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
MI.eraseFromParent();
return Legalized;
}
+ case TargetOpcode::G_FMAD:
+ return lowerFMad(MI);
case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
Register OldValRes = MI.getOperand(0).getReg();
Register SuccessRes = MI.getOperand(1).getReg();
@@ -1690,11 +2019,57 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
LLT DstTy = MRI.getType(DstReg);
auto &MMO = **MI.memoperands_begin();
- if (DstTy.getSizeInBits() == MMO.getSize() /* in bytes */ * 8) {
- // In the case of G_LOAD, this was a non-extending load already and we're
- // about to lower to the same instruction.
- if (MI.getOpcode() == TargetOpcode::G_LOAD)
+ if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
+ if (MI.getOpcode() == TargetOpcode::G_LOAD) {
+ // This load needs splitting into power of 2 sized loads.
+ if (DstTy.isVector())
return UnableToLegalize;
+ if (isPowerOf2_32(DstTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Our strategy here is to generate anyextending loads for the smaller
+ // types up to next power-2 result type, and then combine the two larger
+ // result values together, before truncating back down to the non-pow-2
+ // type.
+ // E.g. v1 = i24 load =>
+ // v2 = i32 load (2 byte)
+ // v3 = i32 load (1 byte)
+ // v4 = i32 shl v3, 16
+ // v5 = i32 or v4, v2
+ // v1 = i24 trunc v5
+ // By doing this we generate the correct truncate which should get
+ // combined away as an artifact with a matching extend.
+ uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
+ uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
+ &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+
+ LLT PtrTy = MRI.getType(PtrReg);
+ unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
+ LLT AnyExtTy = LLT::scalar(AnyExtSize);
+ Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
+ auto LargeLoad =
+ MIRBuilder.buildLoad(LargeLdReg, PtrReg, *LargeMMO);
+
+ auto OffsetCst =
+ MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
+ Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+ auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
+ *SmallMMO);
+
+ auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
+ auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
+ auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
+ MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
+ MI.eraseFromParent();
+ return Legalized;
+ }
MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
MI.eraseFromParent();
return Legalized;
@@ -1723,6 +2098,51 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
+ case TargetOpcode::G_STORE: {
+ // Lower a non-power of 2 store into multiple pow-2 stores.
+ // E.g. split an i24 store into an i16 store + i8 store.
+ // We do this by first extending the stored value to the next largest power
+ // of 2 type, and then using truncating stores to store the components.
+ // By doing this, likewise with G_LOAD, generate an extend that can be
+ // artifact-combined away instead of leaving behind extracts.
+ Register SrcReg = MI.getOperand(0).getReg();
+ Register PtrReg = MI.getOperand(1).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+ MachineMemOperand &MMO = **MI.memoperands_begin();
+ if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
+ return UnableToLegalize;
+ if (SrcTy.isVector())
+ return UnableToLegalize;
+ if (isPowerOf2_32(SrcTy.getSizeInBits()))
+ return UnableToLegalize; // Don't know what we're being asked to do.
+
+ // Extend to the next pow-2.
+ const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
+ auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
+
+ // Obtain the smaller value by shifting away the larger value.
+ uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
+ uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
+ auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
+ auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
+
+ // Generate the GEP and truncating stores.
+ LLT PtrTy = MRI.getType(PtrReg);
+ auto OffsetCst =
+ MIRBuilder.buildConstant(LLT::scalar(64), LargeSplitSize / 8);
+ Register GEPReg = MRI.createGenericVirtualRegister(PtrTy);
+ auto SmallPtr = MIRBuilder.buildGEP(GEPReg, PtrReg, OffsetCst.getReg(0));
+
+ MachineFunction &MF = MIRBuilder.getMF();
+ MachineMemOperand *LargeMMO =
+ MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
+ MachineMemOperand *SmallMMO =
+ MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
+ MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
+ MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_CTLZ_ZERO_UNDEF:
case TargetOpcode::G_CTTZ_ZERO_UNDEF:
case TargetOpcode::G_CTLZ:
@@ -1797,6 +2217,8 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return lowerUITOFP(MI, TypeIdx, Ty);
case G_SITOFP:
return lowerSITOFP(MI, TypeIdx, Ty);
+ case G_FPTOUI:
+ return lowerFPTOUI(MI, TypeIdx, Ty);
case G_SMIN:
case G_SMAX:
case G_UMIN:
@@ -1807,6 +2229,31 @@ LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
case G_FMINNUM:
case G_FMAXNUM:
return lowerFMinNumMaxNum(MI);
+ case G_UNMERGE_VALUES:
+ return lowerUnmergeValues(MI);
+ case TargetOpcode::G_SEXT_INREG: {
+ assert(MI.getOperand(2).isImm() && "Expected immediate");
+ int64_t SizeInBits = MI.getOperand(2).getImm();
+
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
+
+ auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
+ MIRBuilder.buildInstr(TargetOpcode::G_SHL, {TmpRes}, {SrcReg, MIBSz->getOperand(0).getReg()});
+ MIRBuilder.buildInstr(TargetOpcode::G_ASHR, {DstReg}, {TmpRes, MIBSz->getOperand(0).getReg()});
+ MI.eraseFromParent();
+ return Legalized;
+ }
+ case G_SHUFFLE_VECTOR:
+ return lowerShuffleVector(MI);
+ case G_DYN_STACKALLOC:
+ return lowerDynStackAlloc(MI);
+ case G_EXTRACT:
+ return lowerExtract(MI);
+ case G_INSERT:
+ return lowerInsert(MI);
}
}
@@ -2283,6 +2730,105 @@ LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
}
LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy) {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ const int NumDst = MI.getNumOperands() - 1;
+ const Register SrcReg = MI.getOperand(NumDst).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+
+ // TODO: Create sequence of extracts.
+ if (DstTy == NarrowTy)
+ return UnableToLegalize;
+
+ LLT GCDTy = getGCDType(SrcTy, NarrowTy);
+ if (DstTy == GCDTy) {
+ // This would just be a copy of the same unmerge.
+ // TODO: Create extracts, pad with undef and create intermediate merges.
+ return UnableToLegalize;
+ }
+
+ auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
+ const int NumUnmerge = Unmerge->getNumOperands() - 1;
+ const int PartsPerUnmerge = NumDst / NumUnmerge;
+
+ for (int I = 0; I != NumUnmerge; ++I) {
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+
+ for (int J = 0; J != PartsPerUnmerge; ++J)
+ MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
+ MIB.addUse(Unmerge.getReg(I));
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::fewerElementsVectorBuildVector(MachineInstr &MI,
+ unsigned TypeIdx,
+ LLT NarrowTy) {
+ assert(TypeIdx == 0 && "not a vector type index");
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(DstReg);
+ LLT SrcTy = DstTy.getElementType();
+
+ int DstNumElts = DstTy.getNumElements();
+ int NarrowNumElts = NarrowTy.getNumElements();
+ int NumConcat = (DstNumElts + NarrowNumElts - 1) / NarrowNumElts;
+ LLT WidenedDstTy = LLT::vector(NarrowNumElts * NumConcat, SrcTy);
+
+ SmallVector<Register, 8> ConcatOps;
+ SmallVector<Register, 8> SubBuildVector;
+
+ Register UndefReg;
+ if (WidenedDstTy != DstTy)
+ UndefReg = MIRBuilder.buildUndef(SrcTy).getReg(0);
+
+ // Create a G_CONCAT_VECTORS of NarrowTy pieces, padding with undef as
+ // necessary.
+ //
+ // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
+ // -> <2 x s16>
+ //
+ // %4:_(s16) = G_IMPLICIT_DEF
+ // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
+ // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
+ // %7:_(<4 x s16>) = G_CONCAT_VECTORS %5, %6
+ // %3:_(<3 x s16>) = G_EXTRACT %7, 0
+ for (int I = 0; I != NumConcat; ++I) {
+ for (int J = 0; J != NarrowNumElts; ++J) {
+ int SrcIdx = NarrowNumElts * I + J;
+
+ if (SrcIdx < DstNumElts) {
+ Register SrcReg = MI.getOperand(SrcIdx + 1).getReg();
+ SubBuildVector.push_back(SrcReg);
+ } else
+ SubBuildVector.push_back(UndefReg);
+ }
+
+ auto BuildVec = MIRBuilder.buildBuildVector(NarrowTy, SubBuildVector);
+ ConcatOps.push_back(BuildVec.getReg(0));
+ SubBuildVector.clear();
+ }
+
+ if (DstTy == WidenedDstTy)
+ MIRBuilder.buildConcatVectors(DstReg, ConcatOps);
+ else {
+ auto Concat = MIRBuilder.buildConcatVectors(WidenedDstTy, ConcatOps);
+ MIRBuilder.buildExtract(DstReg, Concat, 0);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
LLT NarrowTy) {
// FIXME: Don't know how to handle secondary types yet.
@@ -2395,6 +2941,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FDIV:
case G_FREM:
case G_FMA:
+ case G_FMAD:
case G_FPOW:
case G_FEXP:
case G_FEXP2:
@@ -2411,6 +2958,7 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
case G_FSIN:
case G_FSQRT:
case G_BSWAP:
+ case G_BITREVERSE:
case G_SDIV:
case G_SMIN:
case G_SMAX:
@@ -2453,6 +3001,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
case G_PHI:
return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
+ case G_UNMERGE_VALUES:
+ return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
+ case G_BUILD_VECTOR:
+ return fewerElementsVectorBuildVector(MI, TypeIdx, NarrowTy);
case G_LOAD:
case G_STORE:
return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
@@ -2604,11 +3156,11 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
switch (MI.getOpcode()) {
case TargetOpcode::G_SHL: {
// Short: ShAmt < NewBitSize
- auto LoS = MIRBuilder.buildShl(HalfTy, InH, Amt);
+ auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
- auto OrLHS = MIRBuilder.buildShl(HalfTy, InH, Amt);
- auto OrRHS = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
- auto HiS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+ auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
+ auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
+ auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
// Long: ShAmt >= NewBitSize
auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
@@ -2622,41 +3174,25 @@ LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
ResultRegs[1] = Hi.getReg(0);
break;
}
- case TargetOpcode::G_LSHR: {
- // Short: ShAmt < NewBitSize
- auto HiS = MIRBuilder.buildLShr(HalfTy, InH, Amt);
-
- auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
- auto OrRHS = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
- auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
-
- // Long: ShAmt >= NewBitSize
- auto HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
- auto LoL = MIRBuilder.buildLShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
-
- auto Lo = MIRBuilder.buildSelect(
- HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
- auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
-
- ResultRegs[0] = Lo.getReg(0);
- ResultRegs[1] = Hi.getReg(0);
- break;
- }
+ case TargetOpcode::G_LSHR:
case TargetOpcode::G_ASHR: {
// Short: ShAmt < NewBitSize
- auto HiS = MIRBuilder.buildAShr(HalfTy, InH, Amt);
+ auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
- auto OrLHS = MIRBuilder.buildLShr(HalfTy, InL, Amt);
- auto OrRHS = MIRBuilder.buildLShr(HalfTy, InH, AmtLack);
- auto LoS = MIRBuilder.buildOr(HalfTy, OrLHS, OrRHS);
+ auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
+ auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
+ auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
// Long: ShAmt >= NewBitSize
-
- // Sign of Hi part.
- auto HiL = MIRBuilder.buildAShr(
- HalfTy, InH, MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1));
-
- auto LoL = MIRBuilder.buildAShr(HalfTy, InH, AmtExcess); // Lo from Hi part.
+ MachineInstrBuilder HiL;
+ if (MI.getOpcode() == TargetOpcode::G_LSHR) {
+ HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
+ } else {
+ auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
+ HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
+ }
+ auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
+ {InH, AmtExcess}); // Lo from Hi part.
auto Lo = MIRBuilder.buildSelect(
HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
@@ -2701,12 +3237,22 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
MIRBuilder.setInstr(MI);
unsigned Opc = MI.getOpcode();
switch (Opc) {
- case TargetOpcode::G_IMPLICIT_DEF: {
+ case TargetOpcode::G_IMPLICIT_DEF:
+ case TargetOpcode::G_LOAD: {
+ if (TypeIdx != 0)
+ return UnableToLegalize;
Observer.changingInstr(MI);
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
}
+ case TargetOpcode::G_STORE:
+ if (TypeIdx != 0)
+ return UnableToLegalize;
+ Observer.changingInstr(MI);
+ moreElementsVectorSrc(MI, MoreTy, 0);
+ Observer.changedInstr(MI);
+ return Legalized;
case TargetOpcode::G_AND:
case TargetOpcode::G_OR:
case TargetOpcode::G_XOR:
@@ -2748,6 +3294,26 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
moreElementsVectorDst(MI, MoreTy, 0);
Observer.changedInstr(MI);
return Legalized;
+ case TargetOpcode::G_UNMERGE_VALUES: {
+ if (TypeIdx != 1)
+ return UnableToLegalize;
+
+ LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
+ int NumDst = MI.getNumOperands() - 1;
+ moreElementsVectorSrc(MI, MoreTy, NumDst);
+
+ auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
+ for (int I = 0; I != NumDst; ++I)
+ MIB.addDef(MI.getOperand(I).getReg());
+
+ int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
+ for (int I = NumDst; I != NewNumDst; ++I)
+ MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
+
+ MIB.addUse(MI.getOperand(NumDst).getReg());
+ MI.eraseFromParent();
+ return Legalized;
+ }
case TargetOpcode::G_PHI:
return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
default:
@@ -3310,6 +3876,48 @@ LegalizerHelper::lowerSITOFP(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
return UnableToLegalize;
}
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerFPTOUI(MachineInstr &MI, unsigned TypeIdx, LLT Ty) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+ const LLT S64 = LLT::scalar(64);
+ const LLT S32 = LLT::scalar(32);
+
+ if (SrcTy != S64 && SrcTy != S32)
+ return UnableToLegalize;
+ if (DstTy != S32 && DstTy != S64)
+ return UnableToLegalize;
+
+ // FPTOSI gives same result as FPTOUI for positive signed integers.
+ // FPTOUI needs to deal with fp values that convert to unsigned integers
+ // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
+
+ APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
+ APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
+ : APFloat::IEEEdouble(),
+ APInt::getNullValue(SrcTy.getSizeInBits()));
+ TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
+
+ MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
+
+ MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
+ // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
+ // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
+ MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
+ MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
+ MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
+ MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
+
+ MachineInstrBuilder FCMP =
+ MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, DstTy, Src, Threshold);
+ MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
switch (Opc) {
case TargetOpcode::G_SMIN:
@@ -3419,3 +4027,251 @@ LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
MI.eraseFromParent();
return Legalized;
}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
+ // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
+ Register DstReg = MI.getOperand(0).getReg();
+ LLT Ty = MRI.getType(DstReg);
+ unsigned Flags = MI.getFlags();
+
+ auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
+ Flags);
+ MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
+ const unsigned NumDst = MI.getNumOperands() - 1;
+ const Register SrcReg = MI.getOperand(NumDst).getReg();
+ LLT SrcTy = MRI.getType(SrcReg);
+
+ Register Dst0Reg = MI.getOperand(0).getReg();
+ LLT DstTy = MRI.getType(Dst0Reg);
+
+
+ // Expand scalarizing unmerge as bitcast to integer and shift.
+ if (!DstTy.isVector() && SrcTy.isVector() &&
+ SrcTy.getElementType() == DstTy) {
+ LLT IntTy = LLT::scalar(SrcTy.getSizeInBits());
+ Register Cast = MIRBuilder.buildBitcast(IntTy, SrcReg).getReg(0);
+
+ MIRBuilder.buildTrunc(Dst0Reg, Cast);
+
+ const unsigned DstSize = DstTy.getSizeInBits();
+ unsigned Offset = DstSize;
+ for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
+ auto Shift = MIRBuilder.buildLShr(IntTy, Cast, ShiftAmt);
+ MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register Src0Reg = MI.getOperand(1).getReg();
+ Register Src1Reg = MI.getOperand(2).getReg();
+ LLT Src0Ty = MRI.getType(Src0Reg);
+ LLT DstTy = MRI.getType(DstReg);
+ LLT IdxTy = LLT::scalar(32);
+
+ const Constant *ShufMask = MI.getOperand(3).getShuffleMask();
+
+ SmallVector<int, 32> Mask;
+ ShuffleVectorInst::getShuffleMask(ShufMask, Mask);
+
+ if (DstTy.isScalar()) {
+ if (Src0Ty.isVector())
+ return UnableToLegalize;
+
+ // This is just a SELECT.
+ assert(Mask.size() == 1 && "Expected a single mask element");
+ Register Val;
+ if (Mask[0] < 0 || Mask[0] > 1)
+ Val = MIRBuilder.buildUndef(DstTy).getReg(0);
+ else
+ Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
+ MIRBuilder.buildCopy(DstReg, Val);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ Register Undef;
+ SmallVector<Register, 32> BuildVec;
+ LLT EltTy = DstTy.getElementType();
+
+ for (int Idx : Mask) {
+ if (Idx < 0) {
+ if (!Undef.isValid())
+ Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
+ BuildVec.push_back(Undef);
+ continue;
+ }
+
+ if (Src0Ty.isScalar()) {
+ BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
+ } else {
+ int NumElts = Src0Ty.getNumElements();
+ Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
+ int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
+ auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
+ auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
+ BuildVec.push_back(Extract.getReg(0));
+ }
+ }
+
+ MIRBuilder.buildBuildVector(DstReg, BuildVec);
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register AllocSize = MI.getOperand(1).getReg();
+ unsigned Align = MI.getOperand(2).getImm();
+
+ const auto &MF = *MI.getMF();
+ const auto &TLI = *MF.getSubtarget().getTargetLowering();
+
+ LLT PtrTy = MRI.getType(Dst);
+ LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
+
+ Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
+ auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
+ SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
+
+ // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
+ // have to generate an extra instruction to negate the alloc and then use
+ // G_GEP to add the negative offset.
+ auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
+ if (Align) {
+ APInt AlignMask(IntPtrTy.getSizeInBits(), Align, true);
+ AlignMask.negate();
+ auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
+ Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
+ }
+
+ SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
+ MIRBuilder.buildCopy(SPReg, SPTmp);
+ MIRBuilder.buildCopy(Dst, SPTmp);
+
+ MI.eraseFromParent();
+ return Legalized;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerExtract(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ unsigned Offset = MI.getOperand(2).getImm();
+
+ LLT DstTy = MRI.getType(Dst);
+ LLT SrcTy = MRI.getType(Src);
+
+ if (DstTy.isScalar() &&
+ (SrcTy.isScalar() ||
+ (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
+ LLT SrcIntTy = SrcTy;
+ if (!SrcTy.isScalar()) {
+ SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
+ Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
+ }
+
+ if (Offset == 0)
+ MIRBuilder.buildTrunc(Dst, Src);
+ else {
+ auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
+ auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
+ MIRBuilder.buildTrunc(Dst, Shr);
+ }
+
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
+ Register Dst = MI.getOperand(0).getReg();
+ Register Src = MI.getOperand(1).getReg();
+ Register InsertSrc = MI.getOperand(2).getReg();
+ uint64_t Offset = MI.getOperand(3).getImm();
+
+ LLT DstTy = MRI.getType(Src);
+ LLT InsertTy = MRI.getType(InsertSrc);
+
+ if (InsertTy.isScalar() &&
+ (DstTy.isScalar() ||
+ (DstTy.isVector() && DstTy.getElementType() == InsertTy))) {
+ LLT IntDstTy = DstTy;
+ if (!DstTy.isScalar()) {
+ IntDstTy = LLT::scalar(DstTy.getSizeInBits());
+ Src = MIRBuilder.buildBitcast(IntDstTy, Src).getReg(0);
+ }
+
+ Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
+ if (Offset != 0) {
+ auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
+ ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
+ }
+
+ APInt MaskVal = ~APInt::getBitsSet(DstTy.getSizeInBits(), Offset,
+ InsertTy.getSizeInBits());
+
+ auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
+ auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
+ auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
+
+ MIRBuilder.buildBitcast(Dst, Or);
+ MI.eraseFromParent();
+ return Legalized;
+ }
+
+ return UnableToLegalize;
+}
+
+LegalizerHelper::LegalizeResult
+LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
+ Register Dst0 = MI.getOperand(0).getReg();
+ Register Dst1 = MI.getOperand(1).getReg();
+ Register LHS = MI.getOperand(2).getReg();
+ Register RHS = MI.getOperand(3).getReg();
+ const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
+
+ LLT Ty = MRI.getType(Dst0);
+ LLT BoolTy = MRI.getType(Dst1);
+
+ if (IsAdd)
+ MIRBuilder.buildAdd(Dst0, LHS, RHS);
+ else
+ MIRBuilder.buildSub(Dst0, LHS, RHS);
+
+ // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
+
+ auto Zero = MIRBuilder.buildConstant(Ty, 0);
+
+ // For an addition, the result should be less than one of the operands (LHS)
+ // if and only if the other operand (RHS) is negative, otherwise there will
+ // be overflow.
+ // For a subtraction, the result should be less than one of the operands
+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+ // otherwise there will be overflow.
+ auto ResultLowerThanLHS =
+ MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
+ auto ConditionRHS = MIRBuilder.buildICmp(
+ IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
+
+ MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
+ MI.eraseFromParent();
+ return Legalized;
+}
diff --git a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
index 6e1de95b3277..70045512fae5 100644
--- a/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
+++ b/lib/CodeGen/GlobalISel/LegalizerInfo.cpp
@@ -215,7 +215,30 @@ bool LegalizeRuleSet::verifyTypeIdxsCoverage(unsigned NumTypeIdxs) const {
return true;
}
const bool AllCovered = (FirstUncovered >= NumTypeIdxs);
- LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered
+ if (NumTypeIdxs > 0)
+ LLVM_DEBUG(dbgs() << ".. the first uncovered type index: " << FirstUncovered
+ << ", " << (AllCovered ? "OK" : "FAIL") << "\n");
+ return AllCovered;
+#else
+ return true;
+#endif
+}
+
+bool LegalizeRuleSet::verifyImmIdxsCoverage(unsigned NumImmIdxs) const {
+#ifndef NDEBUG
+ if (Rules.empty()) {
+ LLVM_DEBUG(
+ dbgs() << ".. imm index coverage check SKIPPED: no rules defined\n");
+ return true;
+ }
+ const int64_t FirstUncovered = ImmIdxsCovered.find_first_unset();
+ if (FirstUncovered < 0) {
+ LLVM_DEBUG(dbgs() << ".. imm index coverage check SKIPPED:"
+ " user-defined predicate detected\n");
+ return true;
+ }
+ const bool AllCovered = (FirstUncovered >= NumImmIdxs);
+ LLVM_DEBUG(dbgs() << ".. the first uncovered imm index: " << FirstUncovered
<< ", " << (AllCovered ? "OK" : "FAIL") << "\n");
return AllCovered;
#else
@@ -387,8 +410,6 @@ unsigned LegalizerInfo::getActionDefinitionsIdx(unsigned Opcode) const {
LLVM_DEBUG(dbgs() << ".. opcode " << Opcode << " is aliased to " << Alias
<< "\n");
OpcodeIdx = getOpcodeIdxForOpcode(Alias);
- LLVM_DEBUG(dbgs() << ".. opcode " << Alias << " is aliased to "
- << RulesForOpcode[OpcodeIdx].getAlias() << "\n");
assert(RulesForOpcode[OpcodeIdx].getAlias() == 0 && "Cannot chain aliases");
}
@@ -412,7 +433,7 @@ LegalizeRuleSet &LegalizerInfo::getActionDefinitionsBuilder(
std::initializer_list<unsigned> Opcodes) {
unsigned Representative = *Opcodes.begin();
- assert(!empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() &&
+ assert(!llvm::empty(Opcodes) && Opcodes.begin() + 1 != Opcodes.end() &&
"Initializer list must have at least two opcodes");
for (auto I = Opcodes.begin() + 1, E = Opcodes.end(); I != E; ++I)
@@ -677,12 +698,23 @@ void LegalizerInfo::verify(const MCInstrInfo &MII) const {
? std::max(OpInfo.getGenericTypeIndex() + 1U, Acc)
: Acc;
});
+ const unsigned NumImmIdxs = std::accumulate(
+ MCID.opInfo_begin(), MCID.opInfo_end(), 0U,
+ [](unsigned Acc, const MCOperandInfo &OpInfo) {
+ return OpInfo.isGenericImm()
+ ? std::max(OpInfo.getGenericImmIndex() + 1U, Acc)
+ : Acc;
+ });
LLVM_DEBUG(dbgs() << MII.getName(Opcode) << " (opcode " << Opcode
<< "): " << NumTypeIdxs << " type ind"
- << (NumTypeIdxs == 1 ? "ex" : "ices") << "\n");
+ << (NumTypeIdxs == 1 ? "ex" : "ices") << ", "
+ << NumImmIdxs << " imm ind"
+ << (NumImmIdxs == 1 ? "ex" : "ices") << "\n");
const LegalizeRuleSet &RuleSet = getActionDefinitions(Opcode);
if (!RuleSet.verifyTypeIdxsCoverage(NumTypeIdxs))
FailedOpcodes.push_back(Opcode);
+ else if (!RuleSet.verifyImmIdxsCoverage(NumImmIdxs))
+ FailedOpcodes.push_back(Opcode);
}
if (!FailedOpcodes.empty()) {
errs() << "The following opcodes have ill-defined legalization rules:";
diff --git a/lib/CodeGen/GlobalISel/Localizer.cpp b/lib/CodeGen/GlobalISel/Localizer.cpp
index 3592409710a7..f882ecbf5db3 100644
--- a/lib/CodeGen/GlobalISel/Localizer.cpp
+++ b/lib/CodeGen/GlobalISel/Localizer.cpp
@@ -79,7 +79,7 @@ bool Localizer::shouldLocalize(const MachineInstr &MI) {
return true;
case TargetOpcode::G_GLOBAL_VALUE: {
unsigned RematCost = TTI->getGISelRematGlobalCost();
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
unsigned MaxUses = maxUses(RematCost);
if (MaxUses == UINT_MAX)
return true; // Remats are "free" so always localize.
@@ -121,7 +121,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LLVM_DEBUG(dbgs() << "Should localize: " << MI);
assert(MI.getDesc().getNumDefs() == 1 &&
"More than one definition not supported yet");
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
// Check if all the users of MI are local.
// We are going to invalidation the list of use operands, so we
// can't use range iterator.
@@ -151,7 +151,7 @@ bool Localizer::localizeInterBlock(MachineFunction &MF,
LocalizedMI);
// Set a new register for the definition.
- unsigned NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
+ Register NewReg = MRI->createGenericVirtualRegister(MRI->getType(Reg));
MRI->setRegClassOrRegBank(NewReg, MRI->getRegClassOrRegBank(Reg));
LocalizedMI->getOperand(0).setReg(NewReg);
NewVRegIt =
@@ -177,7 +177,7 @@ bool Localizer::localizeIntraBlock(LocalizedSetVecT &LocalizedInstrs) {
// many users, but this case may be better served by regalloc improvements.
for (MachineInstr *MI : LocalizedInstrs) {
- unsigned Reg = MI->getOperand(0).getReg();
+ Register Reg = MI->getOperand(0).getReg();
MachineBasicBlock &MBB = *MI->getParent();
// All of the user MIs of this reg.
SmallPtrSet<MachineInstr *, 32> Users;
@@ -220,5 +220,6 @@ bool Localizer::runOnMachineFunction(MachineFunction &MF) {
LocalizedSetVecT LocalizedInstrs;
bool Changed = localizeInterBlock(MF, LocalizedInstrs);
- return Changed |= localizeIntraBlock(LocalizedInstrs);
+ Changed |= localizeIntraBlock(LocalizedInstrs);
+ return Changed;
}
diff --git a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
index b7a73326b85c..df770f6664ca 100644
--- a/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
+++ b/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp
@@ -107,9 +107,13 @@ MachineIRBuilder::buildIndirectDbgValue(Register Reg, const MDNode *Variable,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
+ // DBG_VALUE insts now carry IR-level indirection in their DIExpression
+ // rather than encoding it in the instruction itself.
+ const DIExpression *DIExpr = cast<DIExpression>(Expr);
+ DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref});
return insertInstr(BuildMI(getMF(), getDL(),
getTII().get(TargetOpcode::DBG_VALUE),
- /*IsIndirect*/ true, Reg, Variable, Expr));
+ /*IsIndirect*/ false, Reg, Variable, DIExpr));
}
MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
@@ -120,11 +124,15 @@ MachineInstrBuilder MachineIRBuilder::buildFIDbgValue(int FI,
assert(
cast<DILocalVariable>(Variable)->isValidLocationForIntrinsic(getDL()) &&
"Expected inlined-at fields to agree");
+ // DBG_VALUE insts now carry IR-level indirection in their DIExpression
+ // rather than encoding it in the instruction itself.
+ const DIExpression *DIExpr = cast<DIExpression>(Expr);
+ DIExpr = DIExpression::append(DIExpr, {dwarf::DW_OP_deref});
return buildInstr(TargetOpcode::DBG_VALUE)
.addFrameIndex(FI)
- .addImm(0)
+ .addReg(0)
.addMetadata(Variable)
- .addMetadata(Expr);
+ .addMetadata(DIExpr);
}
MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
@@ -148,7 +156,7 @@ MachineInstrBuilder MachineIRBuilder::buildConstDbgValue(const Constant &C,
MIB.addReg(0U);
}
- return MIB.addImm(0).addMetadata(Variable).addMetadata(Expr);
+ return MIB.addReg(0).addMetadata(Variable).addMetadata(Expr);
}
MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
@@ -160,6 +168,17 @@ MachineInstrBuilder MachineIRBuilder::buildDbgLabel(const MDNode *Label) {
return MIB.addMetadata(Label);
}
+MachineInstrBuilder MachineIRBuilder::buildDynStackAlloc(const DstOp &Res,
+ const SrcOp &Size,
+ unsigned Align) {
+ assert(Res.getLLTTy(*getMRI()).isPointer() && "expected ptr dst type");
+ auto MIB = buildInstr(TargetOpcode::G_DYN_STACKALLOC);
+ Res.addDefToMIB(*getMRI(), MIB);
+ Size.addSrcToMIB(MIB);
+ MIB.addImm(Align);
+ return MIB;
+}
+
MachineInstrBuilder MachineIRBuilder::buildFrameIndex(const DstOp &Res,
int Idx) {
assert(Res.getLLTTy(*getMRI()).isPointer() && "invalid operand type");
@@ -207,11 +226,7 @@ MachineInstrBuilder MachineIRBuilder::buildGEP(const DstOp &Res,
Res.getLLTTy(*getMRI()) == Op0.getLLTTy(*getMRI()) && "type mismatch");
assert(Op1.getLLTTy(*getMRI()).isScalar() && "invalid offset type");
- auto MIB = buildInstr(TargetOpcode::G_GEP);
- Res.addDefToMIB(*getMRI(), MIB);
- Op0.addSrcToMIB(MIB);
- Op1.addSrcToMIB(MIB);
- return MIB;
+ return buildInstr(TargetOpcode::G_GEP, {Res}, {Op0, Op1});
}
Optional<MachineInstrBuilder>
@@ -697,17 +712,19 @@ MachineInstrBuilder MachineIRBuilder::buildICmp(CmpInst::Predicate Pred,
MachineInstrBuilder MachineIRBuilder::buildFCmp(CmpInst::Predicate Pred,
const DstOp &Res,
const SrcOp &Op0,
- const SrcOp &Op1) {
+ const SrcOp &Op1,
+ Optional<unsigned> Flags) {
- return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1});
+ return buildInstr(TargetOpcode::G_FCMP, Res, {Pred, Op0, Op1}, Flags);
}
MachineInstrBuilder MachineIRBuilder::buildSelect(const DstOp &Res,
const SrcOp &Tst,
const SrcOp &Op0,
- const SrcOp &Op1) {
+ const SrcOp &Op1,
+ Optional<unsigned> Flags) {
- return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1});
+ return buildInstr(TargetOpcode::G_SELECT, {Res}, {Tst, Op0, Op1}, Flags);
}
MachineInstrBuilder
@@ -774,26 +791,28 @@ MachineIRBuilder::buildAtomicCmpXchg(Register OldValRes, Register Addr,
.addMemOperand(&MMO);
}
-MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(unsigned Opcode,
- Register OldValRes,
- Register Addr,
- Register Val,
- MachineMemOperand &MMO) {
+MachineInstrBuilder MachineIRBuilder::buildAtomicRMW(
+ unsigned Opcode, const DstOp &OldValRes,
+ const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO) {
+
#ifndef NDEBUG
- LLT OldValResTy = getMRI()->getType(OldValRes);
- LLT AddrTy = getMRI()->getType(Addr);
- LLT ValTy = getMRI()->getType(Val);
+ LLT OldValResTy = OldValRes.getLLTTy(*getMRI());
+ LLT AddrTy = Addr.getLLTTy(*getMRI());
+ LLT ValTy = Val.getLLTTy(*getMRI());
assert(OldValResTy.isScalar() && "invalid operand type");
assert(AddrTy.isPointer() && "invalid operand type");
assert(ValTy.isValid() && "invalid operand type");
assert(OldValResTy == ValTy && "type mismatch");
+ assert(MMO.isAtomic() && "not atomic mem operand");
#endif
- return buildInstr(Opcode)
- .addDef(OldValRes)
- .addUse(Addr)
- .addUse(Val)
- .addMemOperand(&MMO);
+ auto MIB = buildInstr(Opcode);
+ OldValRes.addDefToMIB(*getMRI(), MIB);
+ Addr.addSrcToMIB(MIB);
+ Val.addSrcToMIB(MIB);
+ MIB.addMemOperand(&MMO);
+ return MIB;
}
MachineInstrBuilder
@@ -865,6 +884,21 @@ MachineIRBuilder::buildAtomicRMWUmin(Register OldValRes, Register Addr,
}
MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFAdd(
+ const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FADD, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
+MachineIRBuilder::buildAtomicRMWFSub(const DstOp &OldValRes, const SrcOp &Addr, const SrcOp &Val,
+ MachineMemOperand &MMO) {
+ return buildAtomicRMW(TargetOpcode::G_ATOMICRMW_FSUB, OldValRes, Addr, Val,
+ MMO);
+}
+
+MachineInstrBuilder
MachineIRBuilder::buildFence(unsigned Ordering, unsigned Scope) {
return buildInstr(TargetOpcode::G_FENCE)
.addImm(Ordering)
@@ -1037,8 +1071,11 @@ MachineInstrBuilder MachineIRBuilder::buildInstr(unsigned Opc,
"input operands do not cover output register");
if (SrcOps.size() == 1)
return buildCast(DstOps[0], SrcOps[0]);
- if (DstOps[0].getLLTTy(*getMRI()).isVector())
- return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps);
+ if (DstOps[0].getLLTTy(*getMRI()).isVector()) {
+ if (SrcOps[0].getLLTTy(*getMRI()).isVector())
+ return buildInstr(TargetOpcode::G_CONCAT_VECTORS, DstOps, SrcOps);
+ return buildInstr(TargetOpcode::G_BUILD_VECTOR, DstOps, SrcOps);
+ }
break;
}
case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
diff --git a/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
index 42be88fcf947..f0e35c65c53b 100644
--- a/lib/CodeGen/GlobalISel/RegBankSelect.cpp
+++ b/lib/CodeGen/GlobalISel/RegBankSelect.cpp
@@ -92,7 +92,7 @@ void RegBankSelect::init(MachineFunction &MF) {
MBPI = nullptr;
}
MIRBuilder.setMF(MF);
- MORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ MORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
}
void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -139,7 +139,7 @@ bool RegBankSelect::repairReg(
"need new vreg for each breakdown");
// An empty range of new register means no repairing.
- assert(!empty(NewVRegs) && "We should not have to repair");
+ assert(!NewVRegs.empty() && "We should not have to repair");
MachineInstr *MI;
if (ValMapping.NumBreakDowns == 1) {
@@ -154,7 +154,7 @@ bool RegBankSelect::repairReg(
std::swap(Src, Dst);
assert((RepairPt.getNumInsertPoints() == 1 ||
- TargetRegisterInfo::isPhysicalRegister(Dst)) &&
+ Register::isPhysicalRegister(Dst)) &&
"We are about to create several defs for Dst");
// Build the instruction used to repair, then clone it at the right
@@ -398,7 +398,7 @@ void RegBankSelect::tryAvoidingSplit(
// Check if this is a physical or virtual register.
Register Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
// We are going to split every outgoing edges.
// Check that this is possible.
// FIXME: The machine representation is currently broken
@@ -687,8 +687,9 @@ bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
// iterator before hand.
MachineInstr &MI = *MII++;
- // Ignore target-specific instructions: they should use proper regclasses.
- if (isTargetSpecificOpcode(MI.getOpcode()))
+ // Ignore target-specific post-isel instructions: they should use proper
+ // regclasses.
+ if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode())
continue;
if (!assignInstr(MI)) {
diff --git a/lib/CodeGen/GlobalISel/RegisterBank.cpp b/lib/CodeGen/GlobalISel/RegisterBank.cpp
index 4e41f338934d..fc9c802693ab 100644
--- a/lib/CodeGen/GlobalISel/RegisterBank.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBank.cpp
@@ -12,6 +12,7 @@
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "registerbank"
diff --git a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
index 159422e38878..3fcc55286beb 100644
--- a/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
+++ b/lib/CodeGen/GlobalISel/RegisterBankInfo.cpp
@@ -82,7 +82,7 @@ bool RegisterBankInfo::verify(const TargetRegisterInfo &TRI) const {
const RegisterBank *
RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return &getRegBankFromRegClass(getMinimalPhysRegClass(Reg, TRI));
assert(Reg && "NoRegister does not have a register bank");
@@ -97,8 +97,7 @@ RegisterBankInfo::getRegBank(Register Reg, const MachineRegisterInfo &MRI,
const TargetRegisterClass &
RegisterBankInfo::getMinimalPhysRegClass(Register Reg,
const TargetRegisterInfo &TRI) const {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
- "Reg must be a physreg");
+ assert(Register::isPhysicalRegister(Reg) && "Reg must be a physreg");
const auto &RegRCIt = PhysRegMinimalRCs.find(Reg);
if (RegRCIt != PhysRegMinimalRCs.end())
return *RegRCIt->second;
@@ -284,7 +283,7 @@ RegisterBankInfo::getPartialMapping(unsigned StartIdx, unsigned Length,
++NumPartialMappingsCreated;
auto &PartMapping = MapOfPartialMappings[Hash];
- PartMapping = llvm::make_unique<PartialMapping>(StartIdx, Length, RegBank);
+ PartMapping = std::make_unique<PartialMapping>(StartIdx, Length, RegBank);
return *PartMapping;
}
@@ -318,7 +317,7 @@ RegisterBankInfo::getValueMapping(const PartialMapping *BreakDown,
++NumValueMappingsCreated;
auto &ValMapping = MapOfValueMappings[Hash];
- ValMapping = llvm::make_unique<ValueMapping>(BreakDown, NumBreakDowns);
+ ValMapping = std::make_unique<ValueMapping>(BreakDown, NumBreakDowns);
return *ValMapping;
}
@@ -342,7 +341,7 @@ RegisterBankInfo::getOperandsMapping(Iterator Begin, Iterator End) const {
// mapping, because we use the pointer of the ValueMapping
// to hash and we expect them to uniquely identify an instance
// of value mapping.
- Res = llvm::make_unique<ValueMapping[]>(std::distance(Begin, End));
+ Res = std::make_unique<ValueMapping[]>(std::distance(Begin, End));
unsigned Idx = 0;
for (Iterator It = Begin; It != End; ++It, ++Idx) {
const ValueMapping *ValMap = *It;
@@ -392,7 +391,7 @@ RegisterBankInfo::getInstructionMappingImpl(
++NumInstructionMappingsCreated;
auto &InstrMapping = MapOfInstructionMappings[Hash];
- InstrMapping = llvm::make_unique<InstructionMapping>(
+ InstrMapping = std::make_unique<InstructionMapping>(
ID, Cost, OperandsMapping, NumOperands);
return *InstrMapping;
}
@@ -456,7 +455,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
"This mapping is too complex for this function");
iterator_range<SmallVectorImpl<Register>::const_iterator> NewRegs =
OpdMapper.getVRegs(OpIdx);
- if (empty(NewRegs)) {
+ if (NewRegs.empty()) {
LLVM_DEBUG(dbgs() << " has not been repaired, nothing to be done\n");
continue;
}
@@ -489,7 +488,7 @@ void RegisterBankInfo::applyDefaultMapping(const OperandsMapper &OpdMapper) {
unsigned RegisterBankInfo::getSizeInBits(Register Reg,
const MachineRegisterInfo &MRI,
const TargetRegisterInfo &TRI) const {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
// The size is not directly available for physical registers.
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
diff --git a/lib/CodeGen/GlobalISel/Utils.cpp b/lib/CodeGen/GlobalISel/Utils.cpp
index 766ea1d60bac..45618d7992ad 100644
--- a/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/lib/CodeGen/GlobalISel/Utils.cpp
@@ -43,10 +43,9 @@ unsigned llvm::constrainOperandRegClass(
const RegisterBankInfo &RBI, MachineInstr &InsertPt,
const TargetRegisterClass &RegClass, const MachineOperand &RegMO,
unsigned OpIdx) {
- unsigned Reg = RegMO.getReg();
+ Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
- "PhysReg not implemented");
+ assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
unsigned ConstrainedReg = constrainRegToClass(MRI, TII, RBI, Reg, RegClass);
// If we created a new virtual register because the class is not compatible
@@ -73,10 +72,9 @@ unsigned llvm::constrainOperandRegClass(
MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
const RegisterBankInfo &RBI, MachineInstr &InsertPt, const MCInstrDesc &II,
const MachineOperand &RegMO, unsigned OpIdx) {
- unsigned Reg = RegMO.getReg();
+ Register Reg = RegMO.getReg();
// Assume physical registers are properly constrained.
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
- "PhysReg not implemented");
+ assert(Register::isVirtualRegister(Reg) && "PhysReg not implemented");
const TargetRegisterClass *RegClass = TII.getRegClass(II, OpIdx, &TRI, MF);
// Some of the target independent instructions, like COPY, may not impose any
@@ -130,9 +128,9 @@ bool llvm::constrainSelectedInstRegOperands(MachineInstr &I,
LLVM_DEBUG(dbgs() << "Converting operand: " << MO << '\n');
assert(MO.isReg() && "Unsupported non-reg operand");
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// Physical registers don't need to be constrained.
- if (TRI.isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
// Register operands with a value of 0 (e.g. predicate operands) don't need
@@ -170,9 +168,8 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !MRI.use_nodbg_empty(Reg))
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg) || !MRI.use_nodbg_empty(Reg))
return false;
}
return true;
@@ -219,11 +216,33 @@ Optional<int64_t> llvm::getConstantVRegVal(unsigned VReg,
}
Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
- unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs) {
+ unsigned VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs,
+ bool HandleFConstant) {
SmallVector<std::pair<unsigned, unsigned>, 4> SeenOpcodes;
MachineInstr *MI;
- while ((MI = MRI.getVRegDef(VReg)) &&
- MI->getOpcode() != TargetOpcode::G_CONSTANT && LookThroughInstrs) {
+ auto IsConstantOpcode = [HandleFConstant](unsigned Opcode) {
+ return Opcode == TargetOpcode::G_CONSTANT ||
+ (HandleFConstant && Opcode == TargetOpcode::G_FCONSTANT);
+ };
+ auto GetImmediateValue = [HandleFConstant,
+ &MRI](const MachineInstr &MI) -> Optional<APInt> {
+ const MachineOperand &CstVal = MI.getOperand(1);
+ if (!CstVal.isImm() && !CstVal.isCImm() &&
+ (!HandleFConstant || !CstVal.isFPImm()))
+ return None;
+ if (!CstVal.isFPImm()) {
+ unsigned BitWidth =
+ MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+ APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
+ : CstVal.getCImm()->getValue();
+ assert(Val.getBitWidth() == BitWidth &&
+ "Value bitwidth doesn't match definition type");
+ return Val;
+ }
+ return CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
+ };
+ while ((MI = MRI.getVRegDef(VReg)) && !IsConstantOpcode(MI->getOpcode()) &&
+ LookThroughInstrs) {
switch (MI->getOpcode()) {
case TargetOpcode::G_TRUNC:
case TargetOpcode::G_SEXT:
@@ -235,7 +254,7 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
break;
case TargetOpcode::COPY:
VReg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(VReg))
+ if (Register::isPhysicalRegister(VReg))
return None;
break;
case TargetOpcode::G_INTTOPTR:
@@ -245,16 +264,13 @@ Optional<ValueAndVReg> llvm::getConstantVRegValWithLookThrough(
return None;
}
}
- if (!MI || MI->getOpcode() != TargetOpcode::G_CONSTANT ||
- (!MI->getOperand(1).isImm() && !MI->getOperand(1).isCImm()))
+ if (!MI || !IsConstantOpcode(MI->getOpcode()))
return None;
- const MachineOperand &CstVal = MI->getOperand(1);
- unsigned BitWidth = MRI.getType(MI->getOperand(0).getReg()).getSizeInBits();
- APInt Val = CstVal.isImm() ? APInt(BitWidth, CstVal.getImm())
- : CstVal.getCImm()->getValue();
- assert(Val.getBitWidth() == BitWidth &&
- "Value bitwidth doesn't match definition type");
+ Optional<APInt> MaybeVal = GetImmediateValue(*MI);
+ if (!MaybeVal)
+ return None;
+ APInt &Val = *MaybeVal;
while (!SeenOpcodes.empty()) {
std::pair<unsigned, unsigned> OpcodeAndSize = SeenOpcodes.pop_back_val();
switch (OpcodeAndSize.first) {
@@ -291,7 +307,7 @@ llvm::MachineInstr *llvm::getDefIgnoringCopies(Register Reg,
if (!DstTy.isValid())
return nullptr;
while (DefMI->getOpcode() == TargetOpcode::COPY) {
- unsigned SrcReg = DefMI->getOperand(1).getReg();
+ Register SrcReg = DefMI->getOperand(1).getReg();
auto SrcTy = MRI.getType(SrcReg);
if (!SrcTy.isValid() || SrcTy != DstTy)
break;
@@ -395,6 +411,40 @@ bool llvm::isKnownNeverNaN(Register Val, const MachineRegisterInfo &MRI,
return false;
}
+Optional<APInt> llvm::ConstantFoldExtOp(unsigned Opcode, const unsigned Op1,
+ uint64_t Imm,
+ const MachineRegisterInfo &MRI) {
+ auto MaybeOp1Cst = getConstantVRegVal(Op1, MRI);
+ if (MaybeOp1Cst) {
+ LLT Ty = MRI.getType(Op1);
+ APInt C1(Ty.getSizeInBits(), *MaybeOp1Cst, true);
+ switch (Opcode) {
+ default:
+ break;
+ case TargetOpcode::G_SEXT_INREG:
+ return C1.trunc(Imm).sext(C1.getBitWidth());
+ }
+ }
+ return None;
+}
+
void llvm::getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU) {
AU.addPreserved<StackProtector>();
}
+
+MVT llvm::getMVTForLLT(LLT Ty) {
+ if (!Ty.isVector())
+ return MVT::getIntegerVT(Ty.getSizeInBits());
+
+ return MVT::getVectorVT(
+ MVT::getIntegerVT(Ty.getElementType().getSizeInBits()),
+ Ty.getNumElements());
+}
+
+LLT llvm::getLLTForMVT(MVT Ty) {
+ if (!Ty.isVector())
+ return LLT::scalar(Ty.getSizeInBits());
+
+ return LLT::vector(Ty.getVectorNumElements(),
+ Ty.getVectorElementType().getSizeInBits());
+}
diff --git a/lib/CodeGen/GlobalMerge.cpp b/lib/CodeGen/GlobalMerge.cpp
index 09201c2e7bae..d4fa45fcb405 100644
--- a/lib/CodeGen/GlobalMerge.cpp
+++ b/lib/CodeGen/GlobalMerge.cpp
@@ -456,14 +456,14 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
bool HasExternal = false;
StringRef FirstExternalName;
- unsigned MaxAlign = 1;
+ Align MaxAlign;
unsigned CurIdx = 0;
for (j = i; j != -1; j = GlobalSet.find_next(j)) {
Type *Ty = Globals[j]->getValueType();
// Make sure we use the same alignment AsmPrinter would use.
- unsigned Align = DL.getPreferredAlignment(Globals[j]);
- unsigned Padding = alignTo(MergedSize, Align) - MergedSize;
+ Align Alignment(DL.getPreferredAlignment(Globals[j]));
+ unsigned Padding = alignTo(MergedSize, Alignment) - MergedSize;
MergedSize += Padding;
MergedSize += DL.getTypeAllocSize(Ty);
if (MergedSize > MaxOffset) {
@@ -478,7 +478,7 @@ bool GlobalMerge::doMerge(const SmallVectorImpl<GlobalVariable *> &Globals,
Inits.push_back(Globals[j]->getInitializer());
StructIdxs.push_back(CurIdx++);
- MaxAlign = std::max(MaxAlign, Align);
+ MaxAlign = std::max(MaxAlign, Alignment);
if (Globals[j]->hasExternalLinkage() && !HasExternal) {
HasExternal = true;
diff --git a/lib/CodeGen/HardwareLoops.cpp b/lib/CodeGen/HardwareLoops.cpp
index 5f57cabbe865..6a0f98d2e2b4 100644
--- a/lib/CodeGen/HardwareLoops.cpp
+++ b/lib/CodeGen/HardwareLoops.cpp
@@ -183,7 +183,7 @@ bool HardwareLoops::runOnFunction(Function &F) {
TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
DL = &F.getParent()->getDataLayout();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- LibInfo = TLIP ? &TLIP->getTLI() : nullptr;
+ LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
M = F.getParent();
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index b17a253fe23f..d9caa5660695 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -285,14 +285,113 @@ namespace {
Prediction);
}
- bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
- unsigned TCycle, unsigned TExtra,
- MachineBasicBlock &FBB,
- unsigned FCycle, unsigned FExtra,
- BranchProbability Prediction) const {
- return TCycle > 0 && FCycle > 0 &&
- TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
- Prediction);
+ bool MeetIfcvtSizeLimit(BBInfo &TBBInfo, BBInfo &FBBInfo,
+ MachineBasicBlock &CommBB, unsigned Dups,
+ BranchProbability Prediction, bool Forked) const {
+ const MachineFunction &MF = *TBBInfo.BB->getParent();
+ if (MF.getFunction().hasMinSize()) {
+ MachineBasicBlock::iterator TIB = TBBInfo.BB->begin();
+ MachineBasicBlock::iterator FIB = FBBInfo.BB->begin();
+ MachineBasicBlock::iterator TIE = TBBInfo.BB->end();
+ MachineBasicBlock::iterator FIE = FBBInfo.BB->end();
+
+ unsigned Dups1, Dups2;
+ if (!CountDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+ *TBBInfo.BB, *FBBInfo.BB,
+ /*SkipUnconditionalBranches*/ true))
+ llvm_unreachable("should already have been checked by ValidDiamond");
+
+ unsigned BranchBytes = 0;
+ unsigned CommonBytes = 0;
+
+ // Count common instructions at the start of the true and false blocks.
+ for (auto &I : make_range(TBBInfo.BB->begin(), TIB)) {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ for (auto &I : make_range(FBBInfo.BB->begin(), FIB)) {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+
+ // Count instructions at the end of the true and false blocks, after
+ // the ones we plan to predicate. Analyzable branches will be removed
+ // (unless this is a forked diamond), and all other instructions are
+ // common between the two blocks.
+ for (auto &I : make_range(TIE, TBBInfo.BB->end())) {
+ if (I.isBranch() && TBBInfo.IsBrAnalyzable && !Forked) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ } else {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ }
+ for (auto &I : make_range(FIE, FBBInfo.BB->end())) {
+ if (I.isBranch() && FBBInfo.IsBrAnalyzable && !Forked) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ } else {
+ LLVM_DEBUG(dbgs() << "Common inst: " << I);
+ CommonBytes += TII->getInstSizeInBytes(I);
+ }
+ }
+ for (auto &I : CommBB.terminators()) {
+ if (I.isBranch()) {
+ LLVM_DEBUG(dbgs() << "Saving branch: " << I);
+ BranchBytes += TII->predictBranchSizeForIfCvt(I);
+ }
+ }
+
+ // The common instructions in one branch will be eliminated, halving
+ // their code size.
+ CommonBytes /= 2;
+
+ // Count the instructions which we need to predicate.
+ unsigned NumPredicatedInstructions = 0;
+ for (auto &I : make_range(TIB, TIE)) {
+ if (!I.isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << "Predicating: " << I);
+ NumPredicatedInstructions++;
+ }
+ }
+ for (auto &I : make_range(FIB, FIE)) {
+ if (!I.isDebugInstr()) {
+ LLVM_DEBUG(dbgs() << "Predicating: " << I);
+ NumPredicatedInstructions++;
+ }
+ }
+
+ // Even though we're optimising for size at the expense of performance,
+ // avoid creating really long predicated blocks.
+ if (NumPredicatedInstructions > 15)
+ return false;
+
+ // Some targets (e.g. Thumb2) need to insert extra instructions to
+ // start predicated blocks.
+ unsigned ExtraPredicateBytes = TII->extraSizeToPredicateInstructions(
+ MF, NumPredicatedInstructions);
+
+ LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(BranchBytes=" << BranchBytes
+ << ", CommonBytes=" << CommonBytes
+ << ", NumPredicatedInstructions="
+ << NumPredicatedInstructions
+ << ", ExtraPredicateBytes=" << ExtraPredicateBytes
+ << ")\n");
+ return (BranchBytes + CommonBytes) > ExtraPredicateBytes;
+ } else {
+ unsigned TCycle = TBBInfo.NonPredSize + TBBInfo.ExtraCost - Dups;
+ unsigned FCycle = FBBInfo.NonPredSize + FBBInfo.ExtraCost - Dups;
+ bool Res = TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(
+ *TBBInfo.BB, TCycle, TBBInfo.ExtraCost2, *FBBInfo.BB,
+ FCycle, FBBInfo.ExtraCost2, Prediction);
+ LLVM_DEBUG(dbgs() << "MeetIfcvtSizeLimit(TCycle=" << TCycle
+ << ", FCycle=" << FCycle
+ << ", TExtra=" << TBBInfo.ExtraCost2 << ", FExtra="
+ << FBBInfo.ExtraCost2 << ") = " << Res << "\n");
+ return Res;
+ }
}
/// Returns true if Block ends without a terminator.
@@ -356,8 +455,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (!PreRegAlloc) {
// Tail merge tend to expose more if-conversion opportunities.
BranchFolder BF(true, false, MBFI, *MBPI);
- BFChange = BF.OptimizeFunction(MF, TII, ST.getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>());
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ BFChange = BF.OptimizeFunction(
+ MF, TII, ST.getRegisterInfo(),
+ MMIWP ? &MMIWP->getMMI() : nullptr);
}
LLVM_DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'"
@@ -496,8 +597,10 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
if (MadeChange && IfCvtBranchFold) {
BranchFolder BF(false, false, MBFI, *MBPI);
- BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>());
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ BF.OptimizeFunction(
+ MF, TII, MF.getSubtarget().getRegisterInfo(),
+ MMIWP ? &MMIWP->getMMI() : nullptr);
}
MadeChange |= BFChange;
@@ -569,6 +672,9 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
bool FalseBranch, unsigned &Dups,
BranchProbability Prediction) const {
Dups = 0;
+ if (TrueBBI.BB == FalseBBI.BB)
+ return false;
+
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -835,6 +941,8 @@ bool IfConverter::ValidForkedDiamond(
TrueBBICalc.BB = TrueBBI.BB;
FalseBBICalc.BB = FalseBBI.BB;
+ TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+ FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
return false;
@@ -892,6 +1000,8 @@ bool IfConverter::ValidDiamond(
TrueBBICalc.BB = TrueBBI.BB;
FalseBBICalc.BB = FalseBBI.BB;
+ TrueBBICalc.IsBrAnalyzable = TrueBBI.IsBrAnalyzable;
+ FalseBBICalc.IsBrAnalyzable = FalseBBI.IsBrAnalyzable;
if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
return false;
// The size is used to decide whether to if-convert, and the shared portions
@@ -912,6 +1022,12 @@ void IfConverter::AnalyzeBranches(BBInfo &BBI) {
BBI.BrCond.clear();
BBI.IsBrAnalyzable =
!TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ if (!BBI.IsBrAnalyzable) {
+ BBI.TrueBB = nullptr;
+ BBI.FalseBB = nullptr;
+ BBI.BrCond.clear();
+ }
+
SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
BBI.IsBrReversible = (RevCond.size() == 0) ||
!TII->reverseBranchCondition(RevCond);
@@ -1173,13 +1289,9 @@ void IfConverter::AnalyzeBlock(
if (CanRevCond) {
BBInfo TrueBBICalc, FalseBBICalc;
- auto feasibleDiamond = [&]() {
- bool MeetsSize = MeetIfcvtSizeLimit(
- *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
- TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
- *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
- FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
- Prediction);
+ auto feasibleDiamond = [&](bool Forked) {
+ bool MeetsSize = MeetIfcvtSizeLimit(TrueBBICalc, FalseBBICalc, *BB,
+ Dups + Dups2, Prediction, Forked);
bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
/* IsTriangle */ false, /* RevCond */ false,
/* hasCommonTail */ true);
@@ -1191,7 +1303,7 @@ void IfConverter::AnalyzeBlock(
if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
TrueBBICalc, FalseBBICalc)) {
- if (feasibleDiamond()) {
+ if (feasibleDiamond(false)) {
// Diamond:
// EBB
// / \_
@@ -1200,14 +1312,14 @@ void IfConverter::AnalyzeBlock(
// \ /
// TailBB
// Note TailBB can be empty.
- Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ Tokens.push_back(std::make_unique<IfcvtToken>(
BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2,
(bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
Enqueued = true;
}
} else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
TrueBBICalc, FalseBBICalc)) {
- if (feasibleDiamond()) {
+ if (feasibleDiamond(true)) {
// ForkedDiamond:
// if TBB and FBB have a common tail that includes their conditional
// branch instructions, then we can If Convert this pattern.
@@ -1218,7 +1330,7 @@ void IfConverter::AnalyzeBlock(
// / \ / \
// FalseBB TrueBB FalseBB
//
- Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ Tokens.push_back(std::make_unique<IfcvtToken>(
BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
(bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
Enqueued = true;
@@ -1238,7 +1350,7 @@ void IfConverter::AnalyzeBlock(
// | /
// FBB
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICTriangle, TNeedSub, Dups));
Enqueued = true;
}
@@ -1247,7 +1359,7 @@ void IfConverter::AnalyzeBlock(
TrueBBI.ExtraCost2, Prediction) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICTriangleRev, TNeedSub, Dups));
Enqueued = true;
}
@@ -1263,7 +1375,7 @@ void IfConverter::AnalyzeBlock(
// |
// FBB
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICSimple, TNeedSub, Dups));
Enqueued = true;
}
@@ -1275,7 +1387,7 @@ void IfConverter::AnalyzeBlock(
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
- Tokens.push_back(llvm::make_unique<IfcvtToken>(BBI, ICTriangleFalse,
+ Tokens.push_back(std::make_unique<IfcvtToken>(BBI, ICTriangleFalse,
FNeedSub, Dups));
Enqueued = true;
}
@@ -1287,7 +1399,7 @@ void IfConverter::AnalyzeBlock(
FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICTriangleFRev, FNeedSub, Dups));
Enqueued = true;
}
@@ -1297,7 +1409,7 @@ void IfConverter::AnalyzeBlock(
FalseBBI.ExtraCost2, Prediction.getCompl()) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
Tokens.push_back(
- llvm::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups));
+ std::make_unique<IfcvtToken>(BBI, ICSimpleFalse, FNeedSub, Dups));
Enqueued = true;
}
}
@@ -1730,6 +1842,11 @@ bool IfConverter::IfConvertDiamondCommon(
++i;
}
while (NumDups1 != 0) {
+ // Since this instruction is going to be deleted, update call
+ // site info state if the instruction is call instruction.
+ if (DI2->isCall(MachineInstr::IgnoreBundle))
+ MBB2.getParent()->eraseCallSiteInfo(&*DI2);
+
++DI2;
if (DI2 == MBB2.end())
break;
@@ -1758,14 +1875,27 @@ bool IfConverter::IfConvertDiamondCommon(
if (!BBI1->IsBrAnalyzable)
verifySameBranchInstructions(&MBB1, &MBB2);
#endif
- BBI1->NonPredSize -= TII->removeBranch(*BBI1->BB);
- // Remove duplicated instructions.
+ // Remove duplicated instructions from the tail of MBB1: any branch
+ // instructions, and the common instructions counted by NumDups2.
DI1 = MBB1.end();
+ while (DI1 != MBB1.begin()) {
+ MachineBasicBlock::iterator Prev = std::prev(DI1);
+ if (!Prev->isBranch() && !Prev->isDebugInstr())
+ break;
+ DI1 = Prev;
+ }
for (unsigned i = 0; i != NumDups2; ) {
// NumDups2 only counted non-dbg_value instructions, so this won't
// run off the head of the list.
assert(DI1 != MBB1.begin());
+
--DI1;
+
+ // Since this instruction is going to be deleted, update call
+ // site info state if the instruction is call instruction.
+ if (DI1->isCall(MachineInstr::IgnoreBundle))
+ MBB1.getParent()->eraseCallSiteInfo(&*DI1);
+
// skip dbg_value instructions
if (!DI1->isDebugInstr())
++i;
@@ -1815,7 +1945,7 @@ bool IfConverter::IfConvertDiamondCommon(
for (const MachineOperand &MO : FI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef()) {
@@ -1983,7 +2113,7 @@ static bool MaySpeculate(const MachineInstr &MI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef() && !LaterRedefs.count(Reg))
@@ -2050,6 +2180,10 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
break;
MachineInstr *MI = MF.CloneMachineInstr(&I);
+ // Make a copy of the call site info.
+ if (MI->isCall(MachineInstr::IgnoreBundle))
+ MF.copyCallSiteInfo(&I,MI);
+
ToBBI.BB->insert(ToBBI.BB->end(), MI);
ToBBI.NonPredSize++;
unsigned ExtraPredCost = TII->getPredicationCost(I);
diff --git a/lib/CodeGen/ImplicitNullChecks.cpp b/lib/CodeGen/ImplicitNullChecks.cpp
index 1e82ea659617..b7dcaec90106 100644
--- a/lib/CodeGen/ImplicitNullChecks.cpp
+++ b/lib/CodeGen/ImplicitNullChecks.cpp
@@ -278,12 +278,12 @@ bool ImplicitNullChecks::canReorder(const MachineInstr *A,
if (!(MOA.isReg() && MOA.getReg()))
continue;
- unsigned RegA = MOA.getReg();
+ Register RegA = MOA.getReg();
for (auto MOB : B->operands()) {
if (!(MOB.isReg() && MOB.getReg()))
continue;
- unsigned RegB = MOB.getReg();
+ Register RegB = MOB.getReg();
if (TRI->regsOverlap(RegA, RegB) && (MOA.isDef() || MOB.isDef()))
return false;
@@ -517,7 +517,7 @@ bool ImplicitNullChecks::analyzeBlockForNullChecks(
//
// we must ensure that there are no instructions between the 'test' and
// conditional jump that modify %rax.
- const unsigned PointerReg = MBP.LHS.getReg();
+ const Register PointerReg = MBP.LHS.getReg();
assert(MBP.ConditionDef->getParent() == &MBB && "Should be in basic block");
@@ -689,7 +689,7 @@ void ImplicitNullChecks::rewriteNullChecks(
for (const MachineOperand &MO : FaultingInstr->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg || MBB->isLiveIn(Reg))
continue;
MBB->addLiveIn(Reg);
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index 41ae8061a917..2408f18678e4 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -11,7 +11,6 @@
//
//===----------------------------------------------------------------------===//
-#include "LiveRangeCalc.h"
#include "Spiller.h"
#include "SplitKit.h"
#include "llvm/ADT/ArrayRef.h"
@@ -26,6 +25,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -346,8 +346,7 @@ void InlineSpiller::collectRegsToSpill() {
}
bool InlineSpiller::isSibling(unsigned Reg) {
- return TargetRegisterInfo::isVirtualRegister(Reg) &&
- VRM.getOriginal(Reg) == Original;
+ return Register::isVirtualRegister(Reg) && VRM.getOriginal(Reg) == Original;
}
/// It is beneficial to spill to earlier place in the same BB in case
@@ -377,7 +376,7 @@ bool InlineSpiller::hoistSpillInsideBB(LiveInterval &SpillLI,
assert(VNI && VNI->def == Idx.getRegSlot() && "Not defined by copy");
#endif
- unsigned SrcReg = CopyMI.getOperand(1).getReg();
+ Register SrcReg = CopyMI.getOperand(1).getReg();
LiveInterval &SrcLI = LIS.getInterval(SrcReg);
VNInfo *SrcVNI = SrcLI.getVNInfoAt(Idx);
LiveQueryResult SrcQ = SrcLI.Query(Idx);
@@ -845,9 +844,8 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
for (MIBundleOperands MO(*MI); MO.isValid(); ++MO) {
if (!MO->isReg())
continue;
- unsigned Reg = MO->getReg();
- if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) ||
- MRI.isReserved(Reg)) {
+ Register Reg = MO->getReg();
+ if (!Reg || Register::isVirtualRegister(Reg) || MRI.isReserved(Reg)) {
continue;
}
// Skip non-Defs, including undef uses and internal reads.
@@ -869,7 +867,7 @@ foldMemoryOperand(ArrayRef<std::pair<MachineInstr *, unsigned>> Ops,
--NumSpills;
LIS.ReplaceMachineInstrInMaps(*MI, *FoldMI);
if (MI->isCall())
- MI->getMF()->updateCallSiteInfo(MI, FoldMI);
+ MI->getMF()->moveCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
// Insert any new instructions other than FoldMI into the LIS maps.
@@ -1111,8 +1109,8 @@ void InlineSpiller::spillAll() {
void InlineSpiller::spill(LiveRangeEdit &edit) {
++NumSpilledRanges;
Edit = &edit;
- assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
- && "Trying to spill a stack slot.");
+ assert(!Register::isStackSlot(edit.getReg()) &&
+ "Trying to spill a stack slot.");
// Share a stack slot among all descendants of Original.
Original = VRM.getOriginal(edit.getReg());
StackSlot = VRM.getStackSlot(Original);
@@ -1147,7 +1145,7 @@ void HoistSpillHelper::addToMergeableSpills(MachineInstr &Spill, int StackSlot,
// save a copy of LiveInterval in StackSlotToOrigLI because the original
// LiveInterval may be cleared after all its references are spilled.
if (StackSlotToOrigLI.find(StackSlot) == StackSlotToOrigLI.end()) {
- auto LI = llvm::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight);
+ auto LI = std::make_unique<LiveInterval>(OrigLI.reg, OrigLI.weight);
LI->assign(OrigLI, Allocator);
StackSlotToOrigLI[StackSlot] = std::move(LI);
}
@@ -1459,7 +1457,7 @@ void HoistSpillHelper::hoistAllSpills() {
LiveRangeEdit Edit(nullptr, NewVRegs, MF, LIS, &VRM, this);
for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
unsigned Original = VRM.getPreSplitReg(Reg);
if (!MRI.def_empty(Reg))
Virt2SiblingsMap[Original].insert(Reg);
diff --git a/lib/CodeGen/InterleavedLoadCombinePass.cpp b/lib/CodeGen/InterleavedLoadCombinePass.cpp
index 9525da849e2a..770c4952d169 100644
--- a/lib/CodeGen/InterleavedLoadCombinePass.cpp
+++ b/lib/CodeGen/InterleavedLoadCombinePass.cpp
@@ -940,8 +940,8 @@ public:
/// \param V input value
/// \param Result result polynomial
static void computePolynomial(Value &V, Polynomial &Result) {
- if (isa<BinaryOperator>(&V))
- computePolynomialBinOp(*dyn_cast<BinaryOperator>(&V), Result);
+ if (auto *BO = dyn_cast<BinaryOperator>(&V))
+ computePolynomialBinOp(*BO, Result);
else
Result = Polynomial(&V);
}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 886ae7e94adb..1c362aec6e67 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -96,14 +96,15 @@ LLVMTargetMachine::getTargetTransformInfo(const Function &F) {
/// addPassesToX helper drives creation and initialization of TargetPassConfig.
static TargetPassConfig *
addPassesToGenerateCode(LLVMTargetMachine &TM, PassManagerBase &PM,
- bool DisableVerify, MachineModuleInfo &MMI) {
+ bool DisableVerify,
+ MachineModuleInfoWrapperPass &MMIWP) {
// Targets may override createPassConfig to provide a target-specific
// subclass.
TargetPassConfig *PassConfig = TM.createPassConfig(PM);
// Set PassConfig options provided by TargetMachine.
PassConfig->setDisableVerify(DisableVerify);
PM.add(PassConfig);
- PM.add(&MMI);
+ PM.add(&MMIWP);
if (PassConfig->addISelPasses())
return nullptr;
@@ -139,7 +140,7 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
std::unique_ptr<MCAsmBackend> MAB(
getTarget().createMCAsmBackend(STI, MRI, Options.MCOptions));
- auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
+ auto FOut = std::make_unique<formatted_raw_ostream>(Out);
MCStreamer *S = getTarget().createAsmStreamer(
Context, std::move(FOut), Options.MCOptions.AsmVerbose,
Options.MCOptions.MCUseDwarfDirectory, InstPrinter, std::move(MCE),
@@ -186,17 +187,15 @@ bool LLVMTargetMachine::addAsmPrinter(PassManagerBase &PM,
return false;
}
-bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
- raw_pwrite_stream &Out,
- raw_pwrite_stream *DwoOut,
- CodeGenFileType FileType,
- bool DisableVerify,
- MachineModuleInfo *MMI) {
+bool LLVMTargetMachine::addPassesToEmitFile(
+ PassManagerBase &PM, raw_pwrite_stream &Out, raw_pwrite_stream *DwoOut,
+ CodeGenFileType FileType, bool DisableVerify,
+ MachineModuleInfoWrapperPass *MMIWP) {
// Add common CodeGen passes.
- if (!MMI)
- MMI = new MachineModuleInfo(this);
+ if (!MMIWP)
+ MMIWP = new MachineModuleInfoWrapperPass(this);
TargetPassConfig *PassConfig =
- addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP);
if (!PassConfig)
return true;
@@ -206,12 +205,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
// testing to be meaningful, we need to ensure that the symbols created
// are MCSymbolXCOFF variants, which requires that
// the TargetLoweringObjectFile instance has been initialized.
- MCContext &Ctx = MMI->getContext();
+ MCContext &Ctx = MMIWP->getMMI().getContext();
const_cast<TargetLoweringObjectFile &>(*this->getObjFileLowering())
.Initialize(Ctx, *this);
}
PM.add(createPrintMIRPass(Out));
- } else if (addAsmPrinter(PM, Out, DwoOut, FileType, MMI->getContext()))
+ } else if (addAsmPrinter(PM, Out, DwoOut, FileType,
+ MMIWP->getMMI().getContext()))
return true;
PM.add(createFreeMachineFunctionPass());
@@ -227,15 +227,15 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, MCContext *&Ctx,
raw_pwrite_stream &Out,
bool DisableVerify) {
// Add common CodeGen passes.
- MachineModuleInfo *MMI = new MachineModuleInfo(this);
+ MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(this);
TargetPassConfig *PassConfig =
- addPassesToGenerateCode(*this, PM, DisableVerify, *MMI);
+ addPassesToGenerateCode(*this, PM, DisableVerify, *MMIWP);
if (!PassConfig)
return true;
assert(TargetPassConfig::willCompleteCodeGenPipeline() &&
"Cannot emit MC with limited codegen pipeline");
- Ctx = &MMI->getContext();
+ Ctx = &MMIWP->getMMI().getContext();
if (Options.MCOptions.MCSaveTempLabels)
Ctx->setAllowTemporaryLabels(false);
diff --git a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
index 200ac0ba15bf..cef5085ae079 100644
--- a/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
+++ b/lib/CodeGen/LazyMachineBlockFrequencyInfo.cpp
@@ -73,18 +73,18 @@ LazyMachineBlockFrequencyInfoPass::calculateIfNotAvailable() const {
if (!MDT) {
LLVM_DEBUG(dbgs() << "Building DominatorTree on the fly\n");
- OwnedMDT = make_unique<MachineDominatorTree>();
+ OwnedMDT = std::make_unique<MachineDominatorTree>();
OwnedMDT->getBase().recalculate(*MF);
MDT = OwnedMDT.get();
}
// Generate LoopInfo from it.
- OwnedMLI = make_unique<MachineLoopInfo>();
+ OwnedMLI = std::make_unique<MachineLoopInfo>();
OwnedMLI->getBase().analyze(MDT->getBase());
MLI = OwnedMLI.get();
}
- OwnedMBFI = make_unique<MachineBlockFrequencyInfo>();
+ OwnedMBFI = std::make_unique<MachineBlockFrequencyInfo>();
OwnedMBFI->calculate(*MF, MBPI, *MLI);
return *OwnedMBFI.get();
}
diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp
index 503821537ed9..ac3ef0e709f3 100644
--- a/lib/CodeGen/LexicalScopes.cpp
+++ b/lib/CodeGen/LexicalScopes.cpp
@@ -21,6 +21,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
#include "llvm/IR/Metadata.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
diff --git a/lib/CodeGen/LiveDebugValues.cpp b/lib/CodeGen/LiveDebugValues.cpp
index a669e64692b9..f1b237d83e8c 100644
--- a/lib/CodeGen/LiveDebugValues.cpp
+++ b/lib/CodeGen/LiveDebugValues.cpp
@@ -7,14 +7,23 @@
//===----------------------------------------------------------------------===//
///
/// This pass implements a data flow analysis that propagates debug location
-/// information by inserting additional DBG_VALUE instructions into the machine
-/// instruction stream. The pass internally builds debug location liveness
-/// ranges to determine the points where additional DBG_VALUEs need to be
-/// inserted.
+/// information by inserting additional DBG_VALUE insts into the machine
+/// instruction stream. Before running, each DBG_VALUE inst corresponds to a
+/// source assignment of a variable. Afterwards, a DBG_VALUE inst specifies a
+/// variable location for the current basic block (see SourceLevelDebugging.rst).
///
/// This is a separate pass from DbgValueHistoryCalculator to facilitate
/// testing and improve modularity.
///
+/// Each variable location is represented by a VarLoc object that identifies the
+/// source variable, its current machine-location, and the DBG_VALUE inst that
+/// specifies the location. Each VarLoc is indexed in the (function-scope)
+/// VarLocMap, giving each VarLoc a unique index. Rather than operate directly
+/// on machine locations, the dataflow analysis in this pass identifies
+/// locations by their index in the VarLocMap, meaning all the variable
+/// locations in a block can be described by a sparse vector of VarLocMap
+/// indexes.
+///
//===----------------------------------------------------------------------===//
#include "llvm/ADT/DenseMap.h"
@@ -68,6 +77,7 @@ using namespace llvm;
#define DEBUG_TYPE "livedebugvalues"
STATISTIC(NumInserted, "Number of DBG_VALUE instructions inserted");
+STATISTIC(NumRemoved, "Number of DBG_VALUE instructions removed");
// If @MI is a DBG_VALUE with debug value described by a defined
// register, returns the number of this register. In the other case, returns 0.
@@ -179,8 +189,16 @@ private:
}
};
+ /// Identity of the variable at this location.
const DebugVariable Var;
- const MachineInstr &MI; ///< Only used for cloning a new DBG_VALUE.
+
+ /// The expression applied to this location.
+ const DIExpression *Expr;
+
+ /// DBG_VALUE to clone var/expr information from if this location
+ /// is moved.
+ const MachineInstr &MI;
+
mutable UserValueScopes UVS;
enum VarLocKind {
InvalidKind = 0,
@@ -201,9 +219,9 @@ private:
const ConstantInt *CImm;
} Loc;
- VarLoc(const MachineInstr &MI, LexicalScopes &LS,
- VarLocKind K = InvalidKind)
- : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS){
+ VarLoc(const MachineInstr &MI, LexicalScopes &LS)
+ : Var(MI), Expr(MI.getDebugExpression()), MI(MI),
+ UVS(MI.getDebugLoc(), LS) {
static_assert((sizeof(Loc) == sizeof(uint64_t)),
"hash does not cover all members of Loc");
assert(MI.isDebugValue() && "not a DBG_VALUE");
@@ -225,17 +243,78 @@ private:
"entry values must be register locations");
}
- /// The constructor for spill locations.
- VarLoc(const MachineInstr &MI, unsigned SpillBase, int SpillOffset,
- LexicalScopes &LS)
- : Var(MI), MI(MI), UVS(MI.getDebugLoc(), LS) {
- assert(MI.isDebugValue() && "not a DBG_VALUE");
- assert(MI.getNumOperands() == 4 && "malformed DBG_VALUE");
- Kind = SpillLocKind;
- Loc.SpillLocation = {SpillBase, SpillOffset};
+ /// Take the variable and machine-location in DBG_VALUE MI, and build an
+ /// entry location using the given expression.
+ static VarLoc CreateEntryLoc(const MachineInstr &MI, LexicalScopes &LS,
+ const DIExpression *EntryExpr) {
+ VarLoc VL(MI, LS);
+ VL.Kind = EntryValueKind;
+ VL.Expr = EntryExpr;
+ return VL;
+ }
+
+ /// Copy the register location in DBG_VALUE MI, updating the register to
+ /// be NewReg.
+ static VarLoc CreateCopyLoc(const MachineInstr &MI, LexicalScopes &LS,
+ unsigned NewReg) {
+ VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
+ VL.Loc.RegNo = NewReg;
+ return VL;
+ }
+
+ /// Take the variable described by DBG_VALUE MI, and create a VarLoc
+ /// locating it in the specified spill location.
+ static VarLoc CreateSpillLoc(const MachineInstr &MI, unsigned SpillBase,
+ int SpillOffset, LexicalScopes &LS) {
+ VarLoc VL(MI, LS);
+ assert(VL.Kind == RegisterKind);
+ VL.Kind = SpillLocKind;
+ VL.Loc.SpillLocation = {SpillBase, SpillOffset};
+ return VL;
}
- // Is the Loc field a constant or constant object?
+ /// Create a DBG_VALUE representing this VarLoc in the given function.
+ /// Copies variable-specific information such as DILocalVariable and
+ /// inlining information from the original DBG_VALUE instruction, which may
+ /// have been several transfers ago.
+ MachineInstr *BuildDbgValue(MachineFunction &MF) const {
+ const DebugLoc &DbgLoc = MI.getDebugLoc();
+ bool Indirect = MI.isIndirectDebugValue();
+ const auto &IID = MI.getDesc();
+ const DILocalVariable *Var = MI.getDebugVariable();
+ const DIExpression *DIExpr = MI.getDebugExpression();
+
+ switch (Kind) {
+ case EntryValueKind:
+ // An entry value is a register location -- but with an updated
+ // expression.
+ return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, Expr);
+ case RegisterKind:
+ // Register locations are like the source DBG_VALUE, but with the
+ // register number from this VarLoc.
+ return BuildMI(MF, DbgLoc, IID, Indirect, Loc.RegNo, Var, DIExpr);
+ case SpillLocKind: {
+ // Spills are indirect DBG_VALUEs, with a base register and offset.
+ // Use the original DBG_VALUEs expression to build the spilt location
+ // on top of. FIXME: spill locations created before this pass runs
+ // are not recognized, and not handled here.
+ auto *SpillExpr = DIExpression::prepend(
+ DIExpr, DIExpression::ApplyOffset, Loc.SpillLocation.SpillOffset);
+ unsigned Base = Loc.SpillLocation.SpillBase;
+ return BuildMI(MF, DbgLoc, IID, true, Base, Var, SpillExpr);
+ }
+ case ImmediateKind: {
+ MachineOperand MO = MI.getOperand(0);
+ return BuildMI(MF, DbgLoc, IID, Indirect, MO, Var, DIExpr);
+ }
+ case InvalidKind:
+ llvm_unreachable("Tried to produce DBG_VALUE for invalid VarLoc");
+ }
+ llvm_unreachable("Unrecognized LiveDebugValues.VarLoc.Kind enum");
+ }
+
+ /// Is the Loc field a constant or constant object?
bool isConstant() const { return Kind == ImmediateKind; }
/// If this variable is described by a register, return it,
@@ -251,18 +330,42 @@ private:
bool dominates(MachineBasicBlock &MBB) const { return UVS.dominates(&MBB); }
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- LLVM_DUMP_METHOD void dump() const { MI.dump(); }
+ // TRI can be null.
+ void dump(const TargetRegisterInfo *TRI, raw_ostream &Out = dbgs()) const {
+ dbgs() << "VarLoc(";
+ switch (Kind) {
+ case RegisterKind:
+ case EntryValueKind:
+ dbgs() << printReg(Loc.RegNo, TRI);
+ break;
+ case SpillLocKind:
+ dbgs() << printReg(Loc.SpillLocation.SpillBase, TRI);
+ dbgs() << "[" << Loc.SpillLocation.SpillOffset << "]";
+ break;
+ case ImmediateKind:
+ dbgs() << Loc.Immediate;
+ break;
+ case InvalidKind:
+ llvm_unreachable("Invalid VarLoc in dump method");
+ }
+
+ dbgs() << ", \"" << Var.getVar()->getName() << "\", " << *Expr << ", ";
+ if (Var.getInlinedAt())
+ dbgs() << "!" << Var.getInlinedAt()->getMetadataID() << ")\n";
+ else
+ dbgs() << "(null))\n";
+ }
#endif
bool operator==(const VarLoc &Other) const {
return Kind == Other.Kind && Var == Other.Var &&
- Loc.Hash == Other.Loc.Hash;
+ Loc.Hash == Other.Loc.Hash && Expr == Other.Expr;
}
/// This operator guarantees that VarLocs are sorted by Variable first.
bool operator<(const VarLoc &Other) const {
- return std::tie(Var, Kind, Loc.Hash) <
- std::tie(Other.Var, Other.Kind, Other.Loc.Hash);
+ return std::tie(Var, Kind, Loc.Hash, Expr) <
+ std::tie(Other.Var, Other.Kind, Other.Loc.Hash, Other.Expr);
}
};
@@ -271,8 +374,8 @@ private:
using VarLocSet = SparseBitVector<>;
using VarLocInMBB = SmallDenseMap<const MachineBasicBlock *, VarLocSet>;
struct TransferDebugPair {
- MachineInstr *TransferInst;
- MachineInstr *DebugInst;
+ MachineInstr *TransferInst; /// Instruction where this transfer occurs.
+ unsigned LocationID; /// Location number for the transfer dest.
};
using TransferMap = SmallVector<TransferDebugPair, 4>;
@@ -320,6 +423,14 @@ private:
Vars.insert({Var, VarLocID});
}
+ /// Insert a set of ranges.
+ void insertFromLocSet(const VarLocSet &ToLoad, const VarLocMap &Map) {
+ for (unsigned Id : ToLoad) {
+ const VarLoc &Var = Map[Id];
+ insert(Id, Var.Var);
+ }
+ }
+
/// Empty the set.
void clear() {
VarLocs.clear();
@@ -333,8 +444,18 @@ private:
}
};
- bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF,
- unsigned &Reg);
+ /// Tests whether this instruction is a spill to a stack location.
+ bool isSpillInstruction(const MachineInstr &MI, MachineFunction *MF);
+
+ /// Decide if @MI is a spill instruction and return true if it is. We use 2
+ /// criteria to make this decision:
+ /// - Is this instruction a store to a spill slot?
+ /// - Is there a register operand that is both used and killed?
+ /// TODO: Store optimization can fold spills into other stores (including
+ /// other spills). We do not handle this yet (more than one memory operand).
+ bool isLocationSpill(const MachineInstr &MI, MachineFunction *MF,
+ unsigned &Reg);
+
/// If a given instruction is identified as a spill, return the spill location
/// and set \p Reg to the spilled register.
Optional<VarLoc::SpillLoc> isRestoreInstruction(const MachineInstr &MI,
@@ -361,13 +482,13 @@ private:
void transferRegisterDef(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocMap &VarLocIDs, TransferMap &Transfers,
DebugParamMap &DebugEntryVals);
- bool transferTerminatorInst(MachineInstr &MI, OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
+ bool transferTerminator(MachineBasicBlock *MBB, OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs, const VarLocMap &VarLocIDs);
- bool process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+ void process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
TransferMap &Transfers, DebugParamMap &DebugEntryVals,
- bool transferChanges, OverlapMap &OverlapFragments,
+ OverlapMap &OverlapFragments,
VarToFragments &SeenFragments);
void accumulateFragmentMap(MachineInstr &MI, VarToFragments &SeenFragments,
@@ -376,7 +497,12 @@ private:
bool join(MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks);
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks,
+ VarLocInMBB &PendingInLocs);
+
+ /// Create DBG_VALUE insts for inlocs that have been propagated but
+ /// had their instruction creation deferred.
+ void flushPendingLocs(VarLocInMBB &PendingInLocs, VarLocMap &VarLocIDs);
bool ExtendRanges(MachineFunction &MF);
@@ -518,7 +644,7 @@ void LiveDebugValues::printVarLocInMBB(const MachineFunction &MF,
const VarLoc &VL = VarLocIDs[VLL];
Out << " Var: " << VL.Var.getVar()->getName();
Out << " MI: ";
- VL.dump();
+ VL.dump(TRI, Out);
}
}
Out << "\n";
@@ -567,11 +693,7 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
ID = VarLocIDs.insert(VL);
OpenRanges.insert(ID, VL.Var);
} else if (MI.hasOneMemOperand()) {
- // It's a stack spill -- fetch spill base and offset.
- VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
- VarLoc VL(MI, SpillLocation.SpillBase, SpillLocation.SpillOffset, LS);
- ID = VarLocIDs.insert(VL);
- OpenRanges.insert(ID, VL.Var);
+ llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?");
} else {
// This must be an undefined location. We should leave OpenRanges closed.
assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 &&
@@ -585,7 +707,6 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI,
TransferMap &Transfers,
DebugParamMap &DebugEntryVals,
SparseBitVector<> &KillSet) {
- MachineFunction *MF = MI.getParent()->getParent();
for (unsigned ID : KillSet) {
if (!VarLocIDs[ID].Var.getVar()->isParameter())
continue;
@@ -600,20 +721,12 @@ void LiveDebugValues::emitEntryValues(MachineInstr &MI,
auto ParamDebugInstr = DebugEntryVals[CurrDebugInstr->getDebugVariable()];
DIExpression *NewExpr = DIExpression::prepend(
ParamDebugInstr->getDebugExpression(), DIExpression::EntryValue);
- MachineInstr *EntryValDbgMI =
- BuildMI(*MF, ParamDebugInstr->getDebugLoc(), ParamDebugInstr->getDesc(),
- ParamDebugInstr->isIndirectDebugValue(),
- ParamDebugInstr->getOperand(0).getReg(),
- ParamDebugInstr->getDebugVariable(), NewExpr);
-
- if (ParamDebugInstr->isIndirectDebugValue())
- EntryValDbgMI->getOperand(1).setImm(
- ParamDebugInstr->getOperand(1).getImm());
-
- Transfers.push_back({&MI, EntryValDbgMI});
- VarLoc VL(*EntryValDbgMI, LS);
- unsigned EntryValLocID = VarLocIDs.insert(VL);
- OpenRanges.insert(EntryValLocID, VL.Var);
+
+ VarLoc EntryLoc = VarLoc::CreateEntryLoc(*ParamDebugInstr, LS, NewExpr);
+
+ unsigned EntryValLocID = VarLocIDs.insert(EntryLoc);
+ Transfers.push_back({&MI, EntryValLocID});
+ OpenRanges.insert(EntryValLocID, EntryLoc.Var);
}
}
@@ -627,21 +740,19 @@ void LiveDebugValues::insertTransferDebugPair(
VarLocMap &VarLocIDs, unsigned OldVarID, TransferKind Kind,
unsigned NewReg) {
const MachineInstr *DebugInstr = &VarLocIDs[OldVarID].MI;
- MachineFunction *MF = MI.getParent()->getParent();
- MachineInstr *NewDebugInstr;
auto ProcessVarLoc = [&MI, &OpenRanges, &Transfers, &DebugInstr,
- &VarLocIDs](VarLoc &VL, MachineInstr *NewDebugInstr) {
+ &VarLocIDs](VarLoc &VL) {
unsigned LocId = VarLocIDs.insert(VL);
// Close this variable's previous location range.
DebugVariable V(*DebugInstr);
OpenRanges.erase(V);
+ // Record the new location as an open range, and a postponed transfer
+ // inserting a DBG_VALUE for this location.
OpenRanges.insert(LocId, VL.Var);
- // The newly created DBG_VALUE instruction NewDebugInstr must be inserted
- // after MI. Keep track of the pairing.
- TransferDebugPair MIP = {&MI, NewDebugInstr};
+ TransferDebugPair MIP = {&MI, LocId};
Transfers.push_back(MIP);
};
@@ -653,37 +764,25 @@ void LiveDebugValues::insertTransferDebugPair(
"No register supplied when handling a copy of a debug value");
// Create a DBG_VALUE instruction to describe the Var in its new
// register location.
- NewDebugInstr = BuildMI(
- *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(),
- DebugInstr->isIndirectDebugValue(), NewReg,
- DebugInstr->getDebugVariable(), DebugInstr->getDebugExpression());
- if (DebugInstr->isIndirectDebugValue())
- NewDebugInstr->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
- VarLoc VL(*NewDebugInstr, LS);
- ProcessVarLoc(VL, NewDebugInstr);
- LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register copy: ";
- NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
- /*SkipOpers*/false, /*SkipDebugLoc*/false,
- /*AddNewLine*/true, TII));
+ VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg);
+ ProcessVarLoc(VL);
+ LLVM_DEBUG({
+ dbgs() << "Creating VarLoc for register copy:";
+ VL.dump(TRI);
+ });
return;
}
case TransferKind::TransferSpill: {
// Create a DBG_VALUE instruction to describe the Var in its spilled
// location.
VarLoc::SpillLoc SpillLocation = extractSpillBaseRegAndOffset(MI);
- auto *SpillExpr = DIExpression::prepend(DebugInstr->getDebugExpression(),
- DIExpression::ApplyOffset,
- SpillLocation.SpillOffset);
- NewDebugInstr = BuildMI(
- *MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), true,
- SpillLocation.SpillBase, DebugInstr->getDebugVariable(), SpillExpr);
- VarLoc VL(*NewDebugInstr, SpillLocation.SpillBase,
- SpillLocation.SpillOffset, LS);
- ProcessVarLoc(VL, NewDebugInstr);
- LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for spill: ";
- NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
- /*SkipOpers*/false, /*SkipDebugLoc*/false,
- /*AddNewLine*/true, TII));
+ VarLoc VL = VarLoc::CreateSpillLoc(*DebugInstr, SpillLocation.SpillBase,
+ SpillLocation.SpillOffset, LS);
+ ProcessVarLoc(VL);
+ LLVM_DEBUG({
+ dbgs() << "Creating VarLoc for spill:";
+ VL.dump(TRI);
+ });
return;
}
case TransferKind::TransferRestore: {
@@ -691,15 +790,14 @@ void LiveDebugValues::insertTransferDebugPair(
"No register supplied when handling a restore of a debug value");
MachineFunction *MF = MI.getMF();
DIBuilder DIB(*const_cast<Function &>(MF->getFunction()).getParent());
- NewDebugInstr =
- BuildMI(*MF, DebugInstr->getDebugLoc(), DebugInstr->getDesc(), false,
- NewReg, DebugInstr->getDebugVariable(), DIB.createExpression());
- VarLoc VL(*NewDebugInstr, LS);
- ProcessVarLoc(VL, NewDebugInstr);
- LLVM_DEBUG(dbgs() << "Creating DBG_VALUE inst for register restore: ";
- NewDebugInstr->print(dbgs(), /*IsStandalone*/false,
- /*SkipOpers*/false, /*SkipDebugLoc*/false,
- /*AddNewLine*/true, TII));
+ // DebugInstr refers to the pre-spill location, therefore we can reuse
+ // its expression.
+ VarLoc VL = VarLoc::CreateCopyLoc(*DebugInstr, LS, NewReg);
+ ProcessVarLoc(VL);
+ LLVM_DEBUG({
+ dbgs() << "Creating VarLoc for restore:";
+ VL.dump(TRI);
+ });
return;
}
}
@@ -719,7 +817,7 @@ void LiveDebugValues::transferRegisterDef(
// instructions never clobber SP, because some backends (e.g., AArch64)
// never list SP in the regmask.
if (MO.isReg() && MO.isDef() && MO.getReg() &&
- TRI->isPhysicalRegister(MO.getReg()) &&
+ Register::isPhysicalRegister(MO.getReg()) &&
!(MI.isCall() && MO.getReg() == SP)) {
// Remove ranges of all aliased registers.
for (MCRegAliasIterator RAI(MO.getReg(), TRI, true); RAI.isValid(); ++RAI)
@@ -748,16 +846,8 @@ void LiveDebugValues::transferRegisterDef(
}
}
-/// Decide if @MI is a spill instruction and return true if it is. We use 2
-/// criteria to make this decision:
-/// - Is this instruction a store to a spill slot?
-/// - Is there a register operand that is both used and killed?
-/// TODO: Store optimization can fold spills into other stores (including
-/// other spills). We do not handle this yet (more than one memory operand).
bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
- MachineFunction *MF, unsigned &Reg) {
- SmallVector<const MachineMemOperand*, 1> Accesses;
-
+ MachineFunction *MF) {
// TODO: Handle multiple stores folded into one.
if (!MI.hasOneMemOperand())
return false;
@@ -766,6 +856,14 @@ bool LiveDebugValues::isSpillInstruction(const MachineInstr &MI,
return false; // This is not a spill instruction, since no valid size was
// returned from either function.
+ return true;
+}
+
+bool LiveDebugValues::isLocationSpill(const MachineInstr &MI,
+ MachineFunction *MF, unsigned &Reg) {
+ if (!isSpillInstruction(MI, MF))
+ return false;
+
auto isKilledReg = [&](const MachineOperand MO, unsigned &Reg) {
if (!MO.isReg() || !MO.isUse()) {
Reg = 0;
@@ -834,7 +932,37 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Examining instruction: "; MI.dump(););
- if (isSpillInstruction(MI, MF, Reg)) {
+ // First, if there are any DBG_VALUEs pointing at a spill slot that is
+ // written to, then close the variable location. The value in memory
+ // will have changed.
+ VarLocSet KillSet;
+ if (isSpillInstruction(MI, MF)) {
+ Loc = extractSpillBaseRegAndOffset(MI);
+ for (unsigned ID : OpenRanges.getVarLocs()) {
+ const VarLoc &VL = VarLocIDs[ID];
+ if (VL.Kind == VarLoc::SpillLocKind && VL.Loc.SpillLocation == *Loc) {
+ // This location is overwritten by the current instruction -- terminate
+ // the open range, and insert an explicit DBG_VALUE $noreg.
+ //
+ // Doing this at a later stage would require re-interpreting all
+ // DBG_VALUes and DIExpressions to identify whether they point at
+ // memory, and then analysing all memory writes to see if they
+ // overwrite that memory, which is expensive.
+ //
+ // At this stage, we already know which DBG_VALUEs are for spills and
+ // where they are located; it's best to fix handle overwrites now.
+ KillSet.set(ID);
+ VarLoc UndefVL = VarLoc::CreateCopyLoc(VL.MI, LS, 0);
+ unsigned UndefLocID = VarLocIDs.insert(UndefVL);
+ Transfers.push_back({&MI, UndefLocID});
+ }
+ }
+ OpenRanges.erase(KillSet, VarLocIDs);
+ }
+
+ // Try to recognise spill and restore instructions that may create a new
+ // variable location.
+ if (isLocationSpill(MI, MF, Reg)) {
TKind = TransferKind::TransferSpill;
LLVM_DEBUG(dbgs() << "Recognized as spill: "; MI.dump(););
LLVM_DEBUG(dbgs() << "Register: " << Reg << " " << printReg(Reg, TRI)
@@ -854,6 +982,7 @@ void LiveDebugValues::transferSpillOrRestoreInst(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "Spilling Register " << printReg(Reg, TRI) << '('
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
} else if (TKind == TransferKind::TransferRestore &&
+ VarLocIDs[ID].Kind == VarLoc::SpillLocKind &&
VarLocIDs[ID].Loc.SpillLocation == *Loc) {
LLVM_DEBUG(dbgs() << "Restoring Register " << printReg(Reg, TRI) << '('
<< VarLocIDs[ID].Var.getVar()->getName() << ")\n");
@@ -885,8 +1014,8 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
return false;
};
- unsigned SrcReg = SrcRegOp->getReg();
- unsigned DestReg = DestRegOp->getReg();
+ Register SrcReg = SrcRegOp->getReg();
+ Register DestReg = DestRegOp->getReg();
// We want to recognize instructions where destination register is callee
// saved register. If register that could be clobbered by the call is
@@ -906,26 +1035,20 @@ void LiveDebugValues::transferRegisterCopy(MachineInstr &MI,
}
/// Terminate all open ranges at the end of the current basic block.
-bool LiveDebugValues::transferTerminatorInst(MachineInstr &MI,
- OpenRangesSet &OpenRanges,
- VarLocInMBB &OutLocs,
- const VarLocMap &VarLocIDs) {
+bool LiveDebugValues::transferTerminator(MachineBasicBlock *CurMBB,
+ OpenRangesSet &OpenRanges,
+ VarLocInMBB &OutLocs,
+ const VarLocMap &VarLocIDs) {
bool Changed = false;
- const MachineBasicBlock *CurMBB = MI.getParent();
- if (!(MI.isTerminator() || (&MI == &CurMBB->back())))
- return false;
-
- if (OpenRanges.empty())
- return false;
LLVM_DEBUG(for (unsigned ID
: OpenRanges.getVarLocs()) {
// Copy OpenRanges to OutLocs, if not already present.
dbgs() << "Add to OutLocs in MBB #" << CurMBB->getNumber() << ": ";
- VarLocIDs[ID].dump();
+ VarLocIDs[ID].dump(TRI);
});
VarLocSet &VLS = OutLocs[CurMBB];
- Changed = VLS |= OpenRanges.getVarLocs();
+ Changed = VLS != OpenRanges.getVarLocs();
// New OutLocs set may be different due to spill, restore or register
// copy instruction processing.
if (Changed)
@@ -995,26 +1118,17 @@ void LiveDebugValues::accumulateFragmentMap(MachineInstr &MI,
}
/// This routine creates OpenRanges and OutLocs.
-bool LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
+void LiveDebugValues::process(MachineInstr &MI, OpenRangesSet &OpenRanges,
VarLocInMBB &OutLocs, VarLocMap &VarLocIDs,
- TransferMap &Transfers, DebugParamMap &DebugEntryVals,
- bool transferChanges,
+ TransferMap &Transfers,
+ DebugParamMap &DebugEntryVals,
OverlapMap &OverlapFragments,
VarToFragments &SeenFragments) {
- bool Changed = false;
transferDebugValue(MI, OpenRanges, VarLocIDs);
transferRegisterDef(MI, OpenRanges, VarLocIDs, Transfers,
DebugEntryVals);
- if (transferChanges) {
- transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
- transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
- } else {
- // Build up a map of overlapping fragments on the first run through.
- if (MI.isDebugValue())
- accumulateFragmentMap(MI, SeenFragments, OverlapFragments);
- }
- Changed = transferTerminatorInst(MI, OpenRanges, OutLocs, VarLocIDs);
- return Changed;
+ transferRegisterCopy(MI, OpenRanges, VarLocIDs, Transfers);
+ transferSpillOrRestoreInst(MI, OpenRanges, VarLocIDs, Transfers);
}
/// This routine joins the analysis results of all incoming edges in @MBB by
@@ -1024,7 +1138,8 @@ bool LiveDebugValues::join(
MachineBasicBlock &MBB, VarLocInMBB &OutLocs, VarLocInMBB &InLocs,
const VarLocMap &VarLocIDs,
SmallPtrSet<const MachineBasicBlock *, 16> &Visited,
- SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks) {
+ SmallPtrSetImpl<const MachineBasicBlock *> &ArtificialBlocks,
+ VarLocInMBB &PendingInLocs) {
LLVM_DEBUG(dbgs() << "join MBB: " << MBB.getNumber() << "\n");
bool Changed = false;
@@ -1034,9 +1149,11 @@ bool LiveDebugValues::join(
// can be joined.
int NumVisited = 0;
for (auto p : MBB.predecessors()) {
- // Ignore unvisited predecessor blocks. As we are processing
- // the blocks in reverse post-order any unvisited block can
- // be considered to not remove any incoming values.
+ // Ignore backedges if we have not visited the predecessor yet. As the
+ // predecessor hasn't yet had locations propagated into it, most locations
+ // will not yet be valid, so treat them as all being uninitialized and
+ // potentially valid. If a location guessed to be correct here is
+ // invalidated later, we will remove it when we revisit this block.
if (!Visited.count(p)) {
LLVM_DEBUG(dbgs() << " ignoring unvisited pred MBB: " << p->getNumber()
<< "\n");
@@ -1086,44 +1203,59 @@ bool LiveDebugValues::join(
// is the entry block which has no predecessor.
assert((NumVisited || MBB.pred_empty()) &&
"Should have processed at least one predecessor");
- if (InLocsT.empty())
- return false;
VarLocSet &ILS = InLocs[&MBB];
+ VarLocSet &Pending = PendingInLocs[&MBB];
- // Insert DBG_VALUE instructions, if not already inserted.
+ // New locations will have DBG_VALUE insts inserted at the start of the
+ // block, after location propagation has finished. Record the insertions
+ // that we need to perform in the Pending set.
VarLocSet Diff = InLocsT;
Diff.intersectWithComplement(ILS);
for (auto ID : Diff) {
- // This VarLoc is not found in InLocs i.e. it is not yet inserted. So, a
- // new range is started for the var from the mbb's beginning by inserting
- // a new DBG_VALUE. process() will end this range however appropriate.
- const VarLoc &DiffIt = VarLocIDs[ID];
- const MachineInstr *DebugInstr = &DiffIt.MI;
- MachineInstr *MI = nullptr;
- if (DiffIt.isConstant()) {
- MachineOperand MO(DebugInstr->getOperand(0));
- MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
- DebugInstr->getDesc(), false, MO,
- DebugInstr->getDebugVariable(),
- DebugInstr->getDebugExpression());
- } else {
- MI = BuildMI(MBB, MBB.instr_begin(), DebugInstr->getDebugLoc(),
- DebugInstr->getDesc(), DebugInstr->isIndirectDebugValue(),
- DebugInstr->getOperand(0).getReg(),
- DebugInstr->getDebugVariable(),
- DebugInstr->getDebugExpression());
- if (DebugInstr->isIndirectDebugValue())
- MI->getOperand(1).setImm(DebugInstr->getOperand(1).getImm());
- }
- LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ Pending.set(ID);
ILS.set(ID);
++NumInserted;
Changed = true;
}
+
+ // We may have lost locations by learning about a predecessor that either
+ // loses or moves a variable. Find any locations in ILS that are not in the
+ // new in-locations, and delete those.
+ VarLocSet Removed = ILS;
+ Removed.intersectWithComplement(InLocsT);
+ for (auto ID : Removed) {
+ Pending.reset(ID);
+ ILS.reset(ID);
+ ++NumRemoved;
+ Changed = true;
+ }
+
return Changed;
}
+void LiveDebugValues::flushPendingLocs(VarLocInMBB &PendingInLocs,
+ VarLocMap &VarLocIDs) {
+ // PendingInLocs records all locations propagated into blocks, which have
+ // not had DBG_VALUE insts created. Go through and create those insts now.
+ for (auto &Iter : PendingInLocs) {
+ // Map is keyed on a constant pointer, unwrap it so we can insert insts.
+ auto &MBB = const_cast<MachineBasicBlock &>(*Iter.first);
+ VarLocSet &Pending = Iter.second;
+
+ for (unsigned ID : Pending) {
+ // The ID location is live-in to MBB -- work out what kind of machine
+ // location it is and create a DBG_VALUE.
+ const VarLoc &DiffIt = VarLocIDs[ID];
+ MachineInstr *MI = DiffIt.BuildDbgValue(*MBB.getParent());
+ MBB.insert(MBB.instr_begin(), MI);
+
+ (void)MI;
+ LLVM_DEBUG(dbgs() << "Inserted: "; MI->dump(););
+ }
+ }
+}
+
/// Calculate the liveness information for the given machine function and
/// extend ranges across basic blocks.
bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
@@ -1140,6 +1272,9 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
VarLocInMBB OutLocs; // Ranges that exist beyond bb.
VarLocInMBB InLocs; // Ranges that are incoming after joining.
TransferMap Transfers; // DBG_VALUEs associated with spills.
+ VarLocInMBB PendingInLocs; // Ranges that are incoming after joining, but
+ // that we have deferred creating DBG_VALUE insts
+ // for immediately.
VarToFragments SeenFragments;
@@ -1156,8 +1291,6 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
std::greater<unsigned int>>
Pending;
- enum : bool { dontTransferChanges = false, transferChanges = true };
-
// Besides parameter's modification, check whether a DBG_VALUE is inlined
// in order to deduce whether the variable that it tracks comes from
// a different function. If that is the case we can't track its entry value.
@@ -1169,7 +1302,7 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
const TargetLowering *TLI = MF.getSubtarget().getTargetLowering();
unsigned SP = TLI->getStackPointerRegisterToSaveRestore();
- unsigned FP = TRI->getFrameRegister(MF);
+ Register FP = TRI->getFrameRegister(MF);
auto IsRegOtherThanSPAndFP = [&](const MachineOperand &Op) -> bool {
return Op.isReg() && Op.getReg() != SP && Op.getReg() != FP;
};
@@ -1195,23 +1328,14 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
!MI.getDebugExpression()->isFragment())
DebugEntryVals[MI.getDebugVariable()] = &MI;
- // Initialize every mbb with OutLocs.
- // We are not looking at any spill instructions during the initial pass
- // over the BBs. The LiveDebugVariables pass has already created DBG_VALUE
- // instructions for spills of registers that are known to be user variables
- // within the BB in which the spill occurs.
+ // Initialize per-block structures and scan for fragment overlaps.
for (auto &MBB : MF) {
+ PendingInLocs[&MBB] = VarLocSet();
+
for (auto &MI : MBB) {
- process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers, DebugEntryVals,
- dontTransferChanges, OverlapFragments, SeenFragments);
- }
- // Add any entry DBG_VALUE instructions necessitated by parameter
- // clobbering.
- for (auto &TR : Transfers) {
- MBB.insertAfter(MachineBasicBlock::iterator(*TR.TransferInst),
- TR.DebugInst);
+ if (MI.isDebugValue())
+ accumulateFragmentMap(MI, SeenFragments, OverlapFragments);
}
- Transfers.clear();
}
auto hasNonArtificialLocation = [](const MachineInstr &MI) -> bool {
@@ -1248,26 +1372,21 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
while (!Worklist.empty()) {
MachineBasicBlock *MBB = OrderToBB[Worklist.top()];
Worklist.pop();
- MBBJoined =
- join(*MBB, OutLocs, InLocs, VarLocIDs, Visited, ArtificialBlocks);
- Visited.insert(MBB);
+ MBBJoined = join(*MBB, OutLocs, InLocs, VarLocIDs, Visited,
+ ArtificialBlocks, PendingInLocs);
+ MBBJoined |= Visited.insert(MBB).second;
if (MBBJoined) {
MBBJoined = false;
Changed = true;
// Now that we have started to extend ranges across BBs we need to
// examine spill instructions to see whether they spill registers that
// correspond to user variables.
+ // First load any pending inlocs.
+ OpenRanges.insertFromLocSet(PendingInLocs[MBB], VarLocIDs);
for (auto &MI : *MBB)
- OLChanged |=
process(MI, OpenRanges, OutLocs, VarLocIDs, Transfers,
- DebugEntryVals, transferChanges, OverlapFragments,
- SeenFragments);
-
- // Add any DBG_VALUE instructions necessitated by spills.
- for (auto &TR : Transfers)
- MBB->insertAfter(MachineBasicBlock::iterator(*TR.TransferInst),
- TR.DebugInst);
- Transfers.clear();
+ DebugEntryVals, OverlapFragments, SeenFragments);
+ OLChanged |= transferTerminator(MBB, OpenRanges, OutLocs, VarLocIDs);
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs,
"OutLocs after propagating", dbgs()));
@@ -1289,6 +1408,19 @@ bool LiveDebugValues::ExtendRanges(MachineFunction &MF) {
assert(Pending.empty() && "Pending should be empty");
}
+ // Add any DBG_VALUE instructions created by location transfers.
+ for (auto &TR : Transfers) {
+ MachineBasicBlock *MBB = TR.TransferInst->getParent();
+ const VarLoc &VL = VarLocIDs[TR.LocationID];
+ MachineInstr *MI = VL.BuildDbgValue(MF);
+ MBB->insertAfterBundle(TR.TransferInst->getIterator(), MI);
+ }
+ Transfers.clear();
+
+ // Deferred inlocs will not have had any DBG_VALUE insts created; do
+ // that now.
+ flushPendingLocs(PendingInLocs, VarLocIDs);
+
LLVM_DEBUG(printVarLocInMBB(MF, OutLocs, VarLocIDs, "Final OutLocs", dbgs()));
LLVM_DEBUG(printVarLocInMBB(MF, InLocs, VarLocIDs, "Final InLocs", dbgs()));
return Changed;
@@ -1308,7 +1440,7 @@ bool LiveDebugValues::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getSubtarget().getInstrInfo();
TFI = MF.getSubtarget().getFrameLowering();
TFI->determineCalleeSaves(MF, CalleeSavedRegs,
- make_unique<RegScavenger>().get());
+ std::make_unique<RegScavenger>().get());
LS.initialize(MF);
bool Changed = ExtendRanges(MF);
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
index 656ec7d4bdfd..2dd462fc72b3 100644
--- a/lib/CodeGen/LiveDebugVariables.cpp
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -99,28 +99,27 @@ enum : unsigned { UndefLocNo = ~0U };
/// usage of the location.
class DbgValueLocation {
public:
- DbgValueLocation(unsigned LocNo, bool WasIndirect)
- : LocNo(LocNo), WasIndirect(WasIndirect) {
+ DbgValueLocation(unsigned LocNo)
+ : LocNo(LocNo) {
static_assert(sizeof(*this) == sizeof(unsigned), "bad bitfield packing");
assert(locNo() == LocNo && "location truncation");
}
- DbgValueLocation() : LocNo(0), WasIndirect(0) {}
+ DbgValueLocation() : LocNo(0) {}
unsigned locNo() const {
// Fix up the undef location number, which gets truncated.
return LocNo == INT_MAX ? UndefLocNo : LocNo;
}
- bool wasIndirect() const { return WasIndirect; }
bool isUndef() const { return locNo() == UndefLocNo; }
DbgValueLocation changeLocNo(unsigned NewLocNo) const {
- return DbgValueLocation(NewLocNo, WasIndirect);
+ return DbgValueLocation(NewLocNo);
}
friend inline bool operator==(const DbgValueLocation &LHS,
const DbgValueLocation &RHS) {
- return LHS.LocNo == RHS.LocNo && LHS.WasIndirect == RHS.WasIndirect;
+ return LHS.LocNo == RHS.LocNo;
}
friend inline bool operator!=(const DbgValueLocation &LHS,
@@ -129,8 +128,7 @@ public:
}
private:
- unsigned LocNo : 31;
- unsigned WasIndirect : 1;
+ unsigned LocNo;
};
/// Map of where a user value is live, and its location.
@@ -144,22 +142,51 @@ namespace {
class LDVImpl;
+/// A UserValue is uniquely identified by the source variable it refers to
+/// (Variable), the expression describing how to get the value (Expression) and
+/// the specific usage (InlinedAt). InlinedAt differentiates both between
+/// inline and non-inline functions, and multiple inlined instances in the same
+/// scope. FIXME: The only part of the Expression which matters for UserValue
+/// identification is the fragment part.
+class UserValueIdentity {
+private:
+ /// The debug info variable we are part of.
+ const DILocalVariable *Variable;
+ /// Any complex address expression.
+ const DIExpression *Expression;
+ /// Function usage identification.
+ const DILocation *InlinedAt;
+
+public:
+ UserValueIdentity(const DILocalVariable *Var, const DIExpression *Expr,
+ const DILocation *IA)
+ : Variable(Var), Expression(Expr), InlinedAt(IA) {}
+
+ bool match(const DILocalVariable *Var, const DIExpression *Expr,
+ const DILocation *IA) const {
+ // FIXME: The fragment should be part of the identity, but not
+ // other things in the expression like stack values.
+ return Var == Variable && Expr == Expression && IA == InlinedAt;
+ }
+
+ bool match(const UserValueIdentity &Other) const {
+ return match(Other.Variable, Other.Expression, Other.InlinedAt);
+ }
+
+ unsigned hash_value() const {
+ return hash_combine(Variable, Expression, InlinedAt);
+ }
+};
+
/// A user value is a part of a debug info user variable.
///
/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
/// holds part of a user variable. The part is identified by a byte offset.
-///
-/// UserValues are grouped into equivalence classes for easier searching. Two
-/// user values are related if they refer to the same variable, or if they are
-/// held by the same virtual register. The equivalence class is the transitive
-/// closure of that relation.
class UserValue {
const DILocalVariable *Variable; ///< The debug info variable we are part of.
const DIExpression *Expression; ///< Any complex address expression.
DebugLoc dl; ///< The debug location for the variable. This is
///< used by dwarf writer to find lexical scope.
- UserValue *leader; ///< Equivalence class leader.
- UserValue *next = nullptr; ///< Next value in equivalence class, or null.
/// Numbered locations referenced by locmap.
SmallVector<MachineOperand, 4> locations;
@@ -180,49 +207,15 @@ class UserValue {
LiveIntervals &LIS);
public:
+ UserValue(const UserValue &) = delete;
+
/// Create a new UserValue.
UserValue(const DILocalVariable *var, const DIExpression *expr, DebugLoc L,
LocMap::Allocator &alloc)
- : Variable(var), Expression(expr), dl(std::move(L)), leader(this),
- locInts(alloc) {}
-
- /// Get the leader of this value's equivalence class.
- UserValue *getLeader() {
- UserValue *l = leader;
- while (l != l->leader)
- l = l->leader;
- return leader = l;
- }
+ : Variable(var), Expression(expr), dl(std::move(L)), locInts(alloc) {}
- /// Return the next UserValue in the equivalence class.
- UserValue *getNext() const { return next; }
-
- /// Does this UserValue match the parameters?
- bool match(const DILocalVariable *Var, const DIExpression *Expr,
- const DILocation *IA) const {
- // FIXME: The fragment should be part of the equivalence class, but not
- // other things in the expression like stack values.
- return Var == Variable && Expr == Expression && dl->getInlinedAt() == IA;
- }
-
- /// Merge equivalence classes.
- static UserValue *merge(UserValue *L1, UserValue *L2) {
- L2 = L2->getLeader();
- if (!L1)
- return L2;
- L1 = L1->getLeader();
- if (L1 == L2)
- return L1;
- // Splice L2 before L1's members.
- UserValue *End = L2;
- while (End->next) {
- End->leader = L1;
- End = End->next;
- }
- End->leader = L1;
- End->next = L1->next;
- L1->next = L2;
- return L1;
+ UserValueIdentity getId() {
+ return UserValueIdentity(Variable, Expression, dl->getInlinedAt());
}
/// Return the location number that matches Loc.
@@ -261,8 +254,8 @@ public:
void mapVirtRegs(LDVImpl *LDV);
/// Add a definition point to this value.
- void addDef(SlotIndex Idx, const MachineOperand &LocMO, bool IsIndirect) {
- DbgValueLocation Loc(getLocationNo(LocMO), IsIndirect);
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ DbgValueLocation Loc(getLocationNo(LocMO));
// Add a singular (Idx,Idx) -> Loc mapping.
LocMap::iterator I = locInts.find(Idx);
if (!I.valid() || I.start() != Idx)
@@ -297,11 +290,10 @@ public:
///
/// \param LI Scan for copies of the value in LI->reg.
/// \param LocNo Location number of LI->reg.
- /// \param WasIndirect Indicates if the original use of LI->reg was indirect
/// \param Kills Points where the range of LocNo could be extended.
/// \param [in,out] NewDefs Append (Idx, LocNo) of inserted defs here.
void addDefsFromCopies(
- LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+ LiveInterval *LI, unsigned LocNo,
const SmallVectorImpl<SlotIndex> &Kills,
SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS);
@@ -335,7 +327,29 @@ public:
void print(raw_ostream &, const TargetRegisterInfo *);
};
+} // namespace
+namespace llvm {
+template <> struct DenseMapInfo<UserValueIdentity> {
+ static UserValueIdentity getEmptyKey() {
+ auto Key = DenseMapInfo<DILocalVariable *>::getEmptyKey();
+ return UserValueIdentity(Key, nullptr, nullptr);
+ }
+ static UserValueIdentity getTombstoneKey() {
+ auto Key = DenseMapInfo<DILocalVariable *>::getTombstoneKey();
+ return UserValueIdentity(Key, nullptr, nullptr);
+ }
+ static unsigned getHashValue(const UserValueIdentity &Val) {
+ return Val.hash_value();
+ }
+ static bool isEqual(const UserValueIdentity &LHS,
+ const UserValueIdentity &RHS) {
+ return LHS.match(RHS);
+ }
+};
+} // namespace llvm
+
+namespace {
/// A user label is a part of a debug info user label.
class UserLabel {
const DILabel *Label; ///< The debug info label we are part of.
@@ -387,20 +401,20 @@ class LDVImpl {
/// All allocated UserLabel instances.
SmallVector<std::unique_ptr<UserLabel>, 2> userLabels;
- /// Map virtual register to eq class leader.
- using VRMap = DenseMap<unsigned, UserValue *>;
- VRMap virtRegToEqClass;
+ /// Map virtual register to UserValues which use it.
+ using VRMap = DenseMap<unsigned, SmallVector<UserValue *, 4>>;
+ VRMap VirtRegToUserVals;
- /// Map user variable to eq class leader.
- using UVMap = DenseMap<const DILocalVariable *, UserValue *>;
- UVMap userVarMap;
+ /// Map unique UserValue identity to UserValue.
+ using UVMap = DenseMap<UserValueIdentity, UserValue *>;
+ UVMap UserVarMap;
/// Find or create a UserValue.
UserValue *getUserValue(const DILocalVariable *Var, const DIExpression *Expr,
const DebugLoc &DL);
- /// Find the EC leader for VirtReg or null.
- UserValue *lookupVirtReg(unsigned VirtReg);
+ /// Find the UserValues for VirtReg or null.
+ SmallVectorImpl<UserValue *> *lookupVirtReg(unsigned VirtReg);
/// Add DBG_VALUE instruction to our maps.
///
@@ -440,8 +454,8 @@ public:
MF = nullptr;
userValues.clear();
userLabels.clear();
- virtRegToEqClass.clear();
- userVarMap.clear();
+ VirtRegToUserVals.clear();
+ UserVarMap.clear();
// Make sure we call emitDebugValues if the machine function was modified.
assert((!ModifiedMF || EmitDone) &&
"Dbg values are not emitted in LDV");
@@ -449,8 +463,8 @@ public:
ModifiedMF = false;
}
- /// Map virtual register to an equivalence class.
- void mapVirtReg(unsigned VirtReg, UserValue *EC);
+ /// Map virtual register to a UserValue.
+ void mapVirtReg(unsigned VirtReg, UserValue *UV);
/// Replace all references to OldReg with NewRegs.
void splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs);
@@ -521,8 +535,6 @@ void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
OS << "undef";
else {
OS << I.value().locNo();
- if (I.value().wasIndirect())
- OS << " ind";
}
}
for (unsigned i = 0, e = locations.size(); i != e; ++i) {
@@ -554,37 +566,33 @@ void LDVImpl::print(raw_ostream &OS) {
void UserValue::mapVirtRegs(LDVImpl *LDV) {
for (unsigned i = 0, e = locations.size(); i != e; ++i)
if (locations[i].isReg() &&
- TargetRegisterInfo::isVirtualRegister(locations[i].getReg()))
+ Register::isVirtualRegister(locations[i].getReg()))
LDV->mapVirtReg(locations[i].getReg(), this);
}
UserValue *LDVImpl::getUserValue(const DILocalVariable *Var,
const DIExpression *Expr, const DebugLoc &DL) {
- UserValue *&Leader = userVarMap[Var];
- if (Leader) {
- UserValue *UV = Leader->getLeader();
- Leader = UV;
- for (; UV; UV = UV->getNext())
- if (UV->match(Var, Expr, DL->getInlinedAt()))
- return UV;
- }
+ auto Ident = UserValueIdentity(Var, Expr, DL->getInlinedAt());
+ UserValue *&UVEntry = UserVarMap[Ident];
- userValues.push_back(
- llvm::make_unique<UserValue>(Var, Expr, DL, allocator));
- UserValue *UV = userValues.back().get();
- Leader = UserValue::merge(Leader, UV);
- return UV;
+ if (UVEntry)
+ return UVEntry;
+
+ userValues.push_back(std::make_unique<UserValue>(Var, Expr, DL, allocator));
+ return UVEntry = userValues.back().get();
}
-void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
- UserValue *&Leader = virtRegToEqClass[VirtReg];
- Leader = UserValue::merge(Leader, EC);
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *UV) {
+ assert(Register::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ assert(UserVarMap.find(UV->getId()) != UserVarMap.end() &&
+ "UserValue should exist in UserVarMap");
+ VirtRegToUserVals[VirtReg].push_back(UV);
}
-UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
- if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
- return UV->getLeader();
+SmallVectorImpl<UserValue *> *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ VRMap::iterator Itr = VirtRegToUserVals.find(VirtReg);
+ if (Itr != VirtRegToUserVals.end())
+ return &Itr->getSecond();
return nullptr;
}
@@ -606,8 +614,8 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
// could be removed or replaced by asserts.
bool Discard = false;
if (MI.getOperand(0).isReg() &&
- TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) {
- const unsigned Reg = MI.getOperand(0).getReg();
+ Register::isVirtualRegister(MI.getOperand(0).getReg())) {
+ const Register Reg = MI.getOperand(0).getReg();
if (!LIS->hasInterval(Reg)) {
// The DBG_VALUE is described by a virtual register that does not have a
// live interval. Discard the DBG_VALUE.
@@ -631,19 +639,18 @@ bool LDVImpl::handleDebugValue(MachineInstr &MI, SlotIndex Idx) {
}
// Get or create the UserValue for (variable,offset) here.
- bool IsIndirect = MI.getOperand(1).isImm();
- if (IsIndirect)
- assert(MI.getOperand(1).getImm() == 0 && "DBG_VALUE with nonzero offset");
+ assert(!MI.getOperand(1).isImm() && "DBG_VALUE with indirect flag before "
+ "LiveDebugVariables");
const DILocalVariable *Var = MI.getDebugVariable();
const DIExpression *Expr = MI.getDebugExpression();
UserValue *UV =
getUserValue(Var, Expr, MI.getDebugLoc());
if (!Discard)
- UV->addDef(Idx, MI.getOperand(0), IsIndirect);
+ UV->addDef(Idx, MI.getOperand(0));
else {
MachineOperand MO = MachineOperand::CreateReg(0U, false);
MO.setIsDebug();
- UV->addDef(Idx, MO, false);
+ UV->addDef(Idx, MO);
}
return true;
}
@@ -666,7 +673,7 @@ bool LDVImpl::handleDebugLabel(MachineInstr &MI, SlotIndex Idx) {
}
}
if (!Found)
- userLabels.push_back(llvm::make_unique<UserLabel>(Label, DL, Idx));
+ userLabels.push_back(std::make_unique<UserLabel>(Label, DL, Idx));
return true;
}
@@ -751,14 +758,14 @@ void UserValue::extendDef(SlotIndex Idx, DbgValueLocation Loc, LiveRange *LR,
}
void UserValue::addDefsFromCopies(
- LiveInterval *LI, unsigned LocNo, bool WasIndirect,
+ LiveInterval *LI, unsigned LocNo,
const SmallVectorImpl<SlotIndex> &Kills,
SmallVectorImpl<std::pair<SlotIndex, DbgValueLocation>> &NewDefs,
MachineRegisterInfo &MRI, LiveIntervals &LIS) {
if (Kills.empty())
return;
// Don't track copies from physregs, there are too many uses.
- if (!TargetRegisterInfo::isVirtualRegister(LI->reg))
+ if (!Register::isVirtualRegister(LI->reg))
return;
// Collect all the (vreg, valno) pairs that are copies of LI.
@@ -768,13 +775,13 @@ void UserValue::addDefsFromCopies(
// Copies of the full value.
if (MO.getSubReg() || !MI->isCopy())
continue;
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
// Don't follow copies to physregs. These are usually setting up call
// arguments, and the argument registers are always call clobbered. We are
// better off in the source register which could be a callee-saved register,
// or it could be spilled.
- if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (!Register::isVirtualRegister(DstReg))
continue;
// Is LocNo extended to reach this copy? If not, another def may be blocking
@@ -815,7 +822,7 @@ void UserValue::addDefsFromCopies(
MachineInstr *CopyMI = LIS.getInstructionFromIndex(DstVNI->def);
assert(CopyMI && CopyMI->isCopy() && "Bad copy value");
unsigned LocNo = getLocationNo(CopyMI->getOperand(0));
- DbgValueLocation NewLoc(LocNo, WasIndirect);
+ DbgValueLocation NewLoc(LocNo);
I.insert(Idx, Idx.getNextSlot(), NewLoc);
NewDefs.push_back(std::make_pair(Idx, NewLoc));
break;
@@ -845,7 +852,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
}
// Register locations are constrained to where the register value is live.
- if (TargetRegisterInfo::isVirtualRegister(LocMO.getReg())) {
+ if (Register::isVirtualRegister(LocMO.getReg())) {
LiveInterval *LI = nullptr;
const VNInfo *VNI = nullptr;
if (LIS.hasInterval(LocMO.getReg())) {
@@ -863,8 +870,7 @@ void UserValue::computeIntervals(MachineRegisterInfo &MRI,
// sub-register in that regclass). For now, simply skip handling copies if
// a sub-register is involved.
if (LI && !LocMO.getSubReg())
- addDefsFromCopies(LI, Loc.locNo(), Loc.wasIndirect(), Kills, Defs, MRI,
- LIS);
+ addDefsFromCopies(LI, Loc.locNo(), Kills, Defs, MRI, LIS);
continue;
}
@@ -1123,16 +1129,18 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs,
void LDVImpl::splitRegister(unsigned OldReg, ArrayRef<unsigned> NewRegs) {
bool DidChange = false;
- for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext())
- DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
+ if (auto *UserVals = lookupVirtReg(OldReg))
+ for (auto *UV : *UserVals)
+ DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS);
if (!DidChange)
return;
// Map all of the new virtual registers.
- UserValue *UV = lookupVirtReg(OldReg);
- for (unsigned i = 0; i != NewRegs.size(); ++i)
- mapVirtReg(NewRegs[i], UV);
+ if (auto *UserVals = lookupVirtReg(OldReg))
+ for (auto *UV : *UserVals)
+ for (unsigned i = 0; i != NewRegs.size(); ++i)
+ mapVirtReg(NewRegs[i], UV);
}
void LiveDebugVariables::
@@ -1161,10 +1169,10 @@ void UserValue::rewriteLocations(VirtRegMap &VRM, const MachineFunction &MF,
MachineOperand Loc = locations[I];
// Only virtual registers are rewritten.
if (Loc.isReg() && Loc.getReg() &&
- TargetRegisterInfo::isVirtualRegister(Loc.getReg())) {
- unsigned VirtReg = Loc.getReg();
+ Register::isVirtualRegister(Loc.getReg())) {
+ Register VirtReg = Loc.getReg();
if (VRM.isAssignedReg(VirtReg) &&
- TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ Register::isPhysicalRegister(VRM.getPhys(VirtReg))) {
// This can create a %noreg operand in rare cases when the sub-register
// index is no longer available. That means the user value is in a
// non-existent sub-register, and %noreg is exactly what we want.
@@ -1258,7 +1266,7 @@ findNextInsertLocation(MachineBasicBlock *MBB,
const TargetRegisterInfo &TRI) {
if (!LocMO.isReg())
return MBB->instr_end();
- unsigned Reg = LocMO.getReg();
+ Register Reg = LocMO.getReg();
// Find the next instruction in the MBB that define the register Reg.
while (I != MBB->end() && !I->isTerminator()) {
@@ -1302,21 +1310,14 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex StartIdx,
// that the original virtual register was a pointer. Also, add the stack slot
// offset for the spilled register to the expression.
const DIExpression *Expr = Expression;
- uint8_t DIExprFlags = DIExpression::ApplyOffset;
- bool IsIndirect = Loc.wasIndirect();
- if (Spilled) {
- if (IsIndirect)
- DIExprFlags |= DIExpression::DerefAfter;
- Expr =
- DIExpression::prepend(Expr, DIExprFlags, SpillOffset);
- IsIndirect = true;
- }
+ if (Spilled)
+ Expr = DIExpression::prepend(Expr, DIExpression::ApplyOffset, SpillOffset);
assert((!Spilled || MO.isFI()) && "a spilled location must be a frame index");
do {
BuildMI(*MBB, I, getDebugLoc(), TII.get(TargetOpcode::DBG_VALUE),
- IsIndirect, MO, Variable, Expr);
+ Spilled, MO, Variable, Expr);
// Continue and insert DBG_VALUES after every redefinition of register
// associated with the debug value within the range
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 70b2a77fe800..54ac46f2e7ce 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -886,7 +886,7 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
const TargetRegisterInfo &TRI) {
// Phys reg should not be tracked at subreg level.
// Same for noreg (Reg == 0).
- if (!TargetRegisterInfo::isVirtualRegister(Reg) || !Reg)
+ if (!Register::isVirtualRegister(Reg) || !Reg)
return;
// Remove the values that don't define those lanes.
SmallVector<VNInfo *, 8> ToBeRemoved;
@@ -917,7 +917,8 @@ static void stripValuesNotDefiningMask(unsigned Reg, LiveInterval::SubRange &SR,
for (VNInfo *VNI : ToBeRemoved)
SR.removeValNo(VNI);
- assert(!SR.empty() && "At least one value should be defined by this mask");
+ // If the subrange is empty at this point, the MIR is invalid. Do not assert
+ // and let the verifier catch this case.
}
void LiveInterval::refineSubRanges(
@@ -967,7 +968,7 @@ void LiveInterval::computeSubRangeUndefs(SmallVectorImpl<SlotIndex> &Undefs,
LaneBitmask LaneMask,
const MachineRegisterInfo &MRI,
const SlotIndexes &Indexes) const {
- assert(TargetRegisterInfo::isVirtualRegister(reg));
+ assert(Register::isVirtualRegister(reg));
LaneBitmask VRegMask = MRI.getMaxLaneMaskForVReg(reg);
assert((VRegMask & LaneMask).any());
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
diff --git a/lib/CodeGen/LiveIntervals.cpp b/lib/CodeGen/LiveIntervals.cpp
index aa85569063b3..2989930ad093 100644
--- a/lib/CodeGen/LiveIntervals.cpp
+++ b/lib/CodeGen/LiveIntervals.cpp
@@ -14,7 +14,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveIntervals.h"
-#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -22,6 +21,7 @@
#include "llvm/ADT/iterator_range.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -108,7 +108,7 @@ LiveIntervals::~LiveIntervals() {
void LiveIntervals::releaseMemory() {
// Free the live intervals themselves.
for (unsigned i = 0, e = VirtRegIntervals.size(); i != e; ++i)
- delete VirtRegIntervals[TargetRegisterInfo::index2VirtReg(i)];
+ delete VirtRegIntervals[Register::index2VirtReg(i)];
VirtRegIntervals.clear();
RegMaskSlots.clear();
RegMaskBits.clear();
@@ -161,7 +161,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
// Dump the virtregs.
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (hasInterval(Reg))
OS << getInterval(Reg) << '\n';
}
@@ -186,7 +186,7 @@ LLVM_DUMP_METHOD void LiveIntervals::dumpInstrs() const {
#endif
LiveInterval* LiveIntervals::createInterval(unsigned reg) {
- float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? huge_valf : 0.0F;
+ float Weight = Register::isPhysicalRegister(reg) ? huge_valf : 0.0F;
return new LiveInterval(reg, Weight);
}
@@ -201,7 +201,7 @@ void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) {
void LiveIntervals::computeVirtRegs() {
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
createAndComputeVirtRegInterval(Reg);
@@ -441,8 +441,8 @@ void LiveIntervals::extendSegmentsToUses(LiveRange &Segments,
bool LiveIntervals::shrinkToUses(LiveInterval *li,
SmallVectorImpl<MachineInstr*> *dead) {
LLVM_DEBUG(dbgs() << "Shrink: " << *li << '\n');
- assert(TargetRegisterInfo::isVirtualRegister(li->reg)
- && "Can only shrink virtual registers");
+ assert(Register::isVirtualRegister(li->reg) &&
+ "Can only shrink virtual registers");
// Shrink subregister live ranges.
bool NeedsCleanup = false;
@@ -541,8 +541,8 @@ bool LiveIntervals::computeDeadValues(LiveInterval &LI,
void LiveIntervals::shrinkToUses(LiveInterval::SubRange &SR, unsigned Reg) {
LLVM_DEBUG(dbgs() << "Shrink: " << SR << '\n');
- assert(TargetRegisterInfo::isVirtualRegister(Reg)
- && "Can only shrink virtual registers");
+ assert(Register::isVirtualRegister(Reg) &&
+ "Can only shrink virtual registers");
// Find all the values used, including PHI kills.
ShrinkToUsesWorkList WorkList;
@@ -688,7 +688,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) {
LiveRange::const_iterator>, 4> SRs;
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
const LiveInterval &LI = getInterval(Reg);
@@ -986,10 +986,10 @@ public:
MO.setIsKill(false);
}
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
LiveInterval &LI = LIS.getInterval(Reg);
if (LI.hasSubRanges()) {
unsigned SubReg = MO.getSubReg();
@@ -1023,7 +1023,7 @@ private:
return;
LLVM_DEBUG({
dbgs() << " ";
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
dbgs() << printReg(Reg);
if (LaneMask.any())
dbgs() << " L" << PrintLaneMask(LaneMask);
@@ -1288,6 +1288,20 @@ private:
const SlotIndex SplitPos = NewIdxDef;
OldIdxVNI = OldIdxIn->valno;
+ SlotIndex NewDefEndPoint = std::next(NewIdxIn)->end;
+ LiveRange::iterator Prev = std::prev(OldIdxIn);
+ if (OldIdxIn != LR.begin() &&
+ SlotIndex::isEarlierInstr(NewIdx, Prev->end)) {
+ // If the segment before OldIdx read a value defined earlier than
+ // NewIdx, the moved instruction also reads and forwards that
+ // value. Extend the lifetime of the new def point.
+
+ // Extend to where the previous range started, unless there is
+ // another redef first.
+ NewDefEndPoint = std::min(OldIdxIn->start,
+ std::next(NewIdxOut)->start);
+ }
+
// Merge the OldIdxIn and OldIdxOut segments into OldIdxOut.
OldIdxOut->valno->def = OldIdxIn->start;
*OldIdxOut = LiveRange::Segment(OldIdxIn->start, OldIdxOut->end,
@@ -1305,7 +1319,8 @@ private:
// There is no gap between NewSegment and its predecessor.
*NewSegment = LiveRange::Segment(Next->start, SplitPos,
Next->valno);
- *Next = LiveRange::Segment(SplitPos, Next->end, OldIdxVNI);
+
+ *Next = LiveRange::Segment(SplitPos, NewDefEndPoint, OldIdxVNI);
Next->valno->def = SplitPos;
} else {
// There is a gap between NewSegment and its predecessor
@@ -1384,7 +1399,7 @@ private:
// Return the last use of reg between NewIdx and OldIdx.
SlotIndex findLastUseBefore(SlotIndex Before, unsigned Reg,
LaneBitmask LaneMask) {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
SlotIndex LastUse = Before;
for (MachineOperand &MO : MRI.use_nodbg_operands(Reg)) {
if (MO.isUndef())
@@ -1429,7 +1444,7 @@ private:
// Check if MII uses Reg.
for (MIBundleOperands MO(*MII); MO.isValid(); ++MO)
if (MO->isReg() && !MO->isUndef() &&
- TargetRegisterInfo::isPhysicalRegister(MO->getReg()) &&
+ Register::isPhysicalRegister(MO->getReg()) &&
TRI.hasRegUnit(MO->getReg(), Reg))
return Idx.getRegSlot();
}
@@ -1439,7 +1454,10 @@ private:
};
void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) {
- assert(!MI.isBundled() && "Can't handle bundled instructions yet.");
+ // It is fine to move a bundle as a whole, but not an individual instruction
+ // inside it.
+ assert((!MI.isBundled() || MI.getOpcode() == TargetOpcode::BUNDLE) &&
+ "Cannot move instruction in bundle");
SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
Indexes->removeMachineInstrFromMaps(MI);
SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(MI);
@@ -1582,8 +1600,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(),
MOE = MI.operands_end();
MOI != MOE; ++MOI) {
- if (MOI->isReg() &&
- TargetRegisterInfo::isVirtualRegister(MOI->getReg()) &&
+ if (MOI->isReg() && Register::isVirtualRegister(MOI->getReg()) &&
!hasInterval(MOI->getReg())) {
createAndComputeVirtRegInterval(MOI->getReg());
}
@@ -1591,7 +1608,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB,
}
for (unsigned Reg : OrigRegs) {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
continue;
LiveInterval &LI = getInterval(Reg);
@@ -1642,7 +1659,7 @@ void LiveIntervals::splitSeparateComponents(LiveInterval &LI,
unsigned Reg = LI.reg;
const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
for (unsigned I = 1; I < NumComp; ++I) {
- unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+ Register NewVReg = MRI->createVirtualRegister(RegClass);
LiveInterval &NewLI = createEmptyInterval(NewVReg);
SplitLIs.push_back(&NewLI);
}
diff --git a/lib/CodeGen/LivePhysRegs.cpp b/lib/CodeGen/LivePhysRegs.cpp
index cd3d248ac878..c2a1cc7c6490 100644
--- a/lib/CodeGen/LivePhysRegs.cpp
+++ b/lib/CodeGen/LivePhysRegs.cpp
@@ -46,8 +46,8 @@ void LivePhysRegs::removeDefs(const MachineInstr &MI) {
if (O->isReg()) {
if (!O->isDef() || O->isDebug())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
removeReg(Reg);
} else if (O->isRegMask())
@@ -60,8 +60,8 @@ void LivePhysRegs::addUses(const MachineInstr &MI) {
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (!O->isReg() || !O->readsReg() || O->isDebug())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
addReg(Reg);
}
@@ -86,8 +86,8 @@ void LivePhysRegs::stepForward(const MachineInstr &MI,
// Remove killed registers from the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg() && !O->isDebug()) {
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
if (O->isDef()) {
// Note, dead defs are still recorded. The caller should decide how to
@@ -292,10 +292,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
if (!MO->isReg() || !MO->isDef() || MO->isDebug())
continue;
- unsigned Reg = MO->getReg();
+ Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Register::isPhysicalRegister(Reg));
bool IsNotLive = LiveRegs.available(MRI, Reg);
MO->setIsDead(IsNotLive);
@@ -309,10 +309,10 @@ void llvm::recomputeLivenessFlags(MachineBasicBlock &MBB) {
if (!MO->isReg() || !MO->readsReg() || MO->isDebug())
continue;
- unsigned Reg = MO->getReg();
+ Register Reg = MO->getReg();
if (Reg == 0)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+ assert(Register::isPhysicalRegister(Reg));
bool IsNotLive = LiveRegs.available(MRI, Reg);
MO->setIsKill(IsNotLive);
diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp
index d670f28df6ba..24b57be0da00 100644
--- a/lib/CodeGen/LiveRangeCalc.cpp
+++ b/lib/CodeGen/LiveRangeCalc.cpp
@@ -10,7 +10,7 @@
//
//===----------------------------------------------------------------------===//
-#include "LiveRangeCalc.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
@@ -372,8 +372,7 @@ bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
report_fatal_error("Use not jointly dominated by defs.");
}
- if (TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
- !MBB->isLiveIn(PhysReg)) {
+ if (Register::isPhysicalRegister(PhysReg) && !MBB->isLiveIn(PhysReg)) {
MBB->getParent()->verify();
const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
errs() << "The register " << printReg(PhysReg, TRI)
diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h
deleted file mode 100644
index 11aea5a3b016..000000000000
--- a/lib/CodeGen/LiveRangeCalc.h
+++ /dev/null
@@ -1,297 +0,0 @@
-//===- LiveRangeCalc.h - Calculate live ranges ------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// The LiveRangeCalc class can be used to compute live ranges from scratch. It
-// caches information about values in the CFG to speed up repeated operations
-// on the same live range. The cache can be shared by non-overlapping live
-// ranges. SplitKit uses that when computing the live range of split products.
-//
-// A low-level interface is available to clients that know where a variable is
-// live, but don't know which value it has as every point. LiveRangeCalc will
-// propagate values down the dominator tree, and even insert PHI-defs where
-// needed. SplitKit uses this faster interface when possible.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_LIB_CODEGEN_LIVERANGECALC_H
-#define LLVM_LIB_CODEGEN_LIVERANGECALC_H
-
-#include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/BitVector.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/IndexedMap.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/CodeGen/LiveInterval.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/SlotIndexes.h"
-#include "llvm/MC/LaneBitmask.h"
-#include <utility>
-
-namespace llvm {
-
-template <class NodeT> class DomTreeNodeBase;
-class MachineDominatorTree;
-class MachineFunction;
-class MachineRegisterInfo;
-
-using MachineDomTreeNode = DomTreeNodeBase<MachineBasicBlock>;
-
-class LiveRangeCalc {
- const MachineFunction *MF = nullptr;
- const MachineRegisterInfo *MRI = nullptr;
- SlotIndexes *Indexes = nullptr;
- MachineDominatorTree *DomTree = nullptr;
- VNInfo::Allocator *Alloc = nullptr;
-
- /// LiveOutPair - A value and the block that defined it. The domtree node is
- /// redundant, it can be computed as: MDT[Indexes.getMBBFromIndex(VNI->def)].
- using LiveOutPair = std::pair<VNInfo *, MachineDomTreeNode *>;
-
- /// LiveOutMap - Map basic blocks to the value leaving the block.
- using LiveOutMap = IndexedMap<LiveOutPair, MBB2NumberFunctor>;
-
- /// Bit vector of active entries in LiveOut, also used as a visited set by
- /// findReachingDefs. One entry per basic block, indexed by block number.
- /// This is kept as a separate bit vector because it can be cleared quickly
- /// when switching live ranges.
- BitVector Seen;
-
- /// Map LiveRange to sets of blocks (represented by bit vectors) that
- /// in the live range are defined on entry and undefined on entry.
- /// A block is defined on entry if there is a path from at least one of
- /// the defs in the live range to the entry of the block, and conversely,
- /// a block is undefined on entry, if there is no such path (i.e. no
- /// definition reaches the entry of the block). A single LiveRangeCalc
- /// object is used to track live-out information for multiple registers
- /// in live range splitting (which is ok, since the live ranges of these
- /// registers do not overlap), but the defined/undefined information must
- /// be kept separate for each individual range.
- /// By convention, EntryInfoMap[&LR] = { Defined, Undefined }.
- using EntryInfoMap = DenseMap<LiveRange *, std::pair<BitVector, BitVector>>;
- EntryInfoMap EntryInfos;
-
- /// Map each basic block where a live range is live out to the live-out value
- /// and its defining block.
- ///
- /// For every basic block, MBB, one of these conditions shall be true:
- ///
- /// 1. !Seen.count(MBB->getNumber())
- /// Blocks without a Seen bit are ignored.
- /// 2. LiveOut[MBB].second.getNode() == MBB
- /// The live-out value is defined in MBB.
- /// 3. forall P in preds(MBB): LiveOut[P] == LiveOut[MBB]
- /// The live-out value passses through MBB. All predecessors must carry
- /// the same value.
- ///
- /// The domtree node may be null, it can be computed.
- ///
- /// The map can be shared by multiple live ranges as long as no two are
- /// live-out of the same block.
- LiveOutMap Map;
-
- /// LiveInBlock - Information about a basic block where a live range is known
- /// to be live-in, but the value has not yet been determined.
- struct LiveInBlock {
- // The live range set that is live-in to this block. The algorithms can
- // handle multiple non-overlapping live ranges simultaneously.
- LiveRange &LR;
-
- // DomNode - Dominator tree node for the block.
- // Cleared when the final value has been determined and LI has been updated.
- MachineDomTreeNode *DomNode;
-
- // Position in block where the live-in range ends, or SlotIndex() if the
- // range passes through the block. When the final value has been
- // determined, the range from the block start to Kill will be added to LI.
- SlotIndex Kill;
-
- // Live-in value filled in by updateSSA once it is known.
- VNInfo *Value = nullptr;
-
- LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill)
- : LR(LR), DomNode(node), Kill(kill) {}
- };
-
- /// LiveIn - Work list of blocks where the live-in value has yet to be
- /// determined. This list is typically computed by findReachingDefs() and
- /// used as a work list by updateSSA(). The low-level interface may also be
- /// used to add entries directly.
- SmallVector<LiveInBlock, 16> LiveIn;
-
- /// Check if the entry to block @p MBB can be reached by any of the defs
- /// in @p LR. Return true if none of the defs reach the entry to @p MBB.
- bool isDefOnEntry(LiveRange &LR, ArrayRef<SlotIndex> Undefs,
- MachineBasicBlock &MBB, BitVector &DefOnEntry,
- BitVector &UndefOnEntry);
-
- /// Find the set of defs that can reach @p Kill. @p Kill must belong to
- /// @p UseMBB.
- ///
- /// If exactly one def can reach @p UseMBB, and the def dominates @p Kill,
- /// all paths from the def to @p UseMBB are added to @p LR, and the function
- /// returns true.
- ///
- /// If multiple values can reach @p UseMBB, the blocks that need @p LR to be
- /// live in are added to the LiveIn array, and the function returns false.
- ///
- /// The array @p Undef provides the locations where the range @p LR becomes
- /// undefined by <def,read-undef> operands on other subranges. If @p Undef
- /// is non-empty and @p Kill is jointly dominated only by the entries of
- /// @p Undef, the function returns false.
- ///
- /// PhysReg, when set, is used to verify live-in lists on basic blocks.
- bool findReachingDefs(LiveRange &LR, MachineBasicBlock &UseMBB,
- SlotIndex Use, unsigned PhysReg,
- ArrayRef<SlotIndex> Undefs);
-
- /// updateSSA - Compute the values that will be live in to all requested
- /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form.
- ///
- /// Every live-in block must be jointly dominated by the added live-out
- /// blocks. No values are read from the live ranges.
- void updateSSA();
-
- /// Transfer information from the LiveIn vector to the live ranges and update
- /// the given @p LiveOuts.
- void updateFromLiveIns();
-
- /// Extend the live range of @p LR to reach all uses of Reg.
- ///
- /// If @p LR is a main range, or if @p LI is null, then all uses must be
- /// jointly dominated by the definitions from @p LR. If @p LR is a subrange
- /// of the live interval @p LI, corresponding to lane mask @p LaneMask,
- /// all uses must be jointly dominated by the definitions from @p LR
- /// together with definitions of other lanes where @p LR becomes undefined
- /// (via <def,read-undef> operands).
- /// If @p LR is a main range, the @p LaneMask should be set to ~0, i.e.
- /// LaneBitmask::getAll().
- void extendToUses(LiveRange &LR, unsigned Reg, LaneBitmask LaneMask,
- LiveInterval *LI = nullptr);
-
- /// Reset Map and Seen fields.
- void resetLiveOutMap();
-
-public:
- LiveRangeCalc() = default;
-
- //===--------------------------------------------------------------------===//
- // High-level interface.
- //===--------------------------------------------------------------------===//
- //
- // Calculate live ranges from scratch.
- //
-
- /// reset - Prepare caches for a new set of non-overlapping live ranges. The
- /// caches must be reset before attempting calculations with a live range
- /// that may overlap a previously computed live range, and before the first
- /// live range in a function. If live ranges are not known to be
- /// non-overlapping, call reset before each.
- void reset(const MachineFunction *mf, SlotIndexes *SI,
- MachineDominatorTree *MDT, VNInfo::Allocator *VNIA);
-
- //===--------------------------------------------------------------------===//
- // Mid-level interface.
- //===--------------------------------------------------------------------===//
- //
- // Modify existing live ranges.
- //
-
- /// Extend the live range of @p LR to reach @p Use.
- ///
- /// The existing values in @p LR must be live so they jointly dominate @p Use.
- /// If @p Use is not dominated by a single existing value, PHI-defs are
- /// inserted as required to preserve SSA form.
- ///
- /// PhysReg, when set, is used to verify live-in lists on basic blocks.
- void extend(LiveRange &LR, SlotIndex Use, unsigned PhysReg,
- ArrayRef<SlotIndex> Undefs);
-
- /// createDeadDefs - Create a dead def in LI for every def operand of Reg.
- /// Each instruction defining Reg gets a new VNInfo with a corresponding
- /// minimal live range.
- void createDeadDefs(LiveRange &LR, unsigned Reg);
-
- /// Extend the live range of @p LR to reach all uses of Reg.
- ///
- /// All uses must be jointly dominated by existing liveness. PHI-defs are
- /// inserted as needed to preserve SSA form.
- void extendToUses(LiveRange &LR, unsigned PhysReg) {
- extendToUses(LR, PhysReg, LaneBitmask::getAll());
- }
-
- /// Calculates liveness for the register specified in live interval @p LI.
- /// Creates subregister live ranges as needed if subreg liveness tracking is
- /// enabled.
- void calculate(LiveInterval &LI, bool TrackSubRegs);
-
- /// For live interval \p LI with correct SubRanges construct matching
- /// information for the main live range. Expects the main live range to not
- /// have any segments or value numbers.
- void constructMainRangeFromSubranges(LiveInterval &LI);
-
- //===--------------------------------------------------------------------===//
- // Low-level interface.
- //===--------------------------------------------------------------------===//
- //
- // These functions can be used to compute live ranges where the live-in and
- // live-out blocks are already known, but the SSA value in each block is
- // unknown.
- //
- // After calling reset(), add known live-out values and known live-in blocks.
- // Then call calculateValues() to compute the actual value that is
- // live-in to each block, and add liveness to the live ranges.
- //
-
- /// setLiveOutValue - Indicate that VNI is live out from MBB. The
- /// calculateValues() function will not add liveness for MBB, the caller
- /// should take care of that.
- ///
- /// VNI may be null only if MBB is a live-through block also passed to
- /// addLiveInBlock().
- void setLiveOutValue(MachineBasicBlock *MBB, VNInfo *VNI) {
- Seen.set(MBB->getNumber());
- Map[MBB] = LiveOutPair(VNI, nullptr);
- }
-
- /// addLiveInBlock - Add a block with an unknown live-in value. This
- /// function can only be called once per basic block. Once the live-in value
- /// has been determined, calculateValues() will add liveness to LI.
- ///
- /// @param LR The live range that is live-in to the block.
- /// @param DomNode The domtree node for the block.
- /// @param Kill Index in block where LI is killed. If the value is
- /// live-through, set Kill = SLotIndex() and also call
- /// setLiveOutValue(MBB, 0).
- void addLiveInBlock(LiveRange &LR,
- MachineDomTreeNode *DomNode,
- SlotIndex Kill = SlotIndex()) {
- LiveIn.push_back(LiveInBlock(LR, DomNode, Kill));
- }
-
- /// calculateValues - Calculate the value that will be live-in to each block
- /// added with addLiveInBlock. Add PHI-def values as needed to preserve SSA
- /// form. Add liveness to all live-in blocks up to the Kill point, or the
- /// whole block for live-through blocks.
- ///
- /// Every predecessor of a live-in block must have been given a value with
- /// setLiveOutValue, the value may be null for live-trough blocks.
- void calculateValues();
-
- /// A diagnostic function to check if the end of the block @p MBB is
- /// jointly dominated by the blocks corresponding to the slot indices
- /// in @p Defs. This function is mainly for use in self-verification
- /// checks.
- LLVM_ATTRIBUTE_UNUSED
- static bool isJointlyDominated(const MachineBasicBlock *MBB,
- ArrayRef<SlotIndex> Defs,
- const SlotIndexes &Indexes);
-};
-
-} // end namespace llvm
-
-#endif // LLVM_LIB_CODEGEN_LIVERANGECALC_H
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
index 882e562ba95c..34bac082bcd7 100644
--- a/lib/CodeGen/LiveRangeEdit.cpp
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -32,7 +32,7 @@ void LiveRangeEdit::Delegate::anchor() { }
LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg,
bool createSubRanges) {
- unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
if (VRM)
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
@@ -52,7 +52,7 @@ LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg,
}
unsigned LiveRangeEdit::createFrom(unsigned OldReg) {
- unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
+ Register VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg));
if (VRM) {
VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg));
}
@@ -114,7 +114,7 @@ bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
continue;
// We can't remat physreg uses, unless it is a constant.
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
+ if (Register::isPhysicalRegister(MO.getReg())) {
if (MRI.isConstantPhysReg(MO.getReg()))
continue;
return false;
@@ -232,7 +232,7 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI,
LLVM_DEBUG(dbgs() << " folded: " << *FoldMI);
LIS.ReplaceMachineInstrInMaps(*UseMI, *FoldMI);
if (UseMI->isCall())
- UseMI->getMF()->updateCallSiteInfo(UseMI, FoldMI);
+ UseMI->getMF()->moveCallSiteInfo(UseMI, FoldMI);
UseMI->eraseFromParent();
DefMI->addRegisterDead(LI->reg, nullptr);
Dead.push_back(DefMI);
@@ -308,8 +308,8 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
MOE = MI->operands_end(); MOI != MOE; ++MOI) {
if (!MOI->isReg())
continue;
- unsigned Reg = MOI->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MOI->getReg();
+ if (!Register::isVirtualRegister(Reg)) {
// Check if MI reads any unreserved physregs.
if (Reg && MOI->readsReg() && !MRI.isReserved(Reg))
ReadsPhysRegs = true;
@@ -349,7 +349,7 @@ void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink,
// Remove all operands that aren't physregs.
for (unsigned i = MI->getNumOperands(); i; --i) {
const MachineOperand &MO = MI->getOperand(i-1);
- if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ if (MO.isReg() && Register::isPhysicalRegister(MO.getReg()))
continue;
MI->RemoveOperand(i-1);
}
diff --git a/lib/CodeGen/LiveRangeShrink.cpp b/lib/CodeGen/LiveRangeShrink.cpp
index 8818f1ce0ad9..cbf112ee2bd5 100644
--- a/lib/CodeGen/LiveRangeShrink.cpp
+++ b/lib/CodeGen/LiveRangeShrink.cpp
@@ -172,10 +172,10 @@ bool LiveRangeShrink::runOnMachineFunction(MachineFunction &MF) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || MO.isDead() || MO.isDebug())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// Do not move the instruction if it def/uses a physical register,
// unless it is a constant physical register or a noreg.
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (!Register::isVirtualRegister(Reg)) {
if (!Reg || MRI.isConstantPhysReg(Reg))
continue;
Insert = nullptr;
diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp
index ce99e5535c25..72c79e5f8a75 100644
--- a/lib/CodeGen/LiveRegMatrix.cpp
+++ b/lib/CodeGen/LiveRegMatrix.cpp
@@ -118,7 +118,7 @@ void LiveRegMatrix::assign(LiveInterval &VirtReg, unsigned PhysReg) {
}
void LiveRegMatrix::unassign(LiveInterval &VirtReg) {
- unsigned PhysReg = VRM->getPhys(VirtReg.reg);
+ Register PhysReg = VRM->getPhys(VirtReg.reg);
LLVM_DEBUG(dbgs() << "unassigning " << printReg(VirtReg.reg, TRI) << " from "
<< printReg(PhysReg, TRI) << ':');
VRM->clearVirt(VirtReg.reg);
diff --git a/lib/CodeGen/LiveRegUnits.cpp b/lib/CodeGen/LiveRegUnits.cpp
index 6afb7fb7aa11..97763def1f40 100644
--- a/lib/CodeGen/LiveRegUnits.cpp
+++ b/lib/CodeGen/LiveRegUnits.cpp
@@ -47,8 +47,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) {
if (O->isReg()) {
if (!O->isDef() || O->isDebug())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
removeReg(Reg);
} else if (O->isRegMask())
@@ -59,8 +59,8 @@ void LiveRegUnits::stepBackward(const MachineInstr &MI) {
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (!O->isReg() || !O->readsReg() || O->isDebug())
continue;
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
addReg(Reg);
}
@@ -70,8 +70,8 @@ void LiveRegUnits::accumulate(const MachineInstr &MI) {
// Add defs, uses and regmask clobbers to the set.
for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
if (O->isReg()) {
- unsigned Reg = O->getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = O->getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
if (!O->isDef() && !O->readsReg())
continue;
diff --git a/lib/CodeGen/LiveStacks.cpp b/lib/CodeGen/LiveStacks.cpp
index f55977d72723..8df84ebf4f06 100644
--- a/lib/CodeGen/LiveStacks.cpp
+++ b/lib/CodeGen/LiveStacks.cpp
@@ -58,9 +58,10 @@ LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
assert(Slot >= 0 && "Spill slot indice must be >= 0");
SS2IntervalMap::iterator I = S2IMap.find(Slot);
if (I == S2IMap.end()) {
- I = S2IMap.emplace(std::piecewise_construct, std::forward_as_tuple(Slot),
- std::forward_as_tuple(
- TargetRegisterInfo::index2StackSlot(Slot), 0.0F))
+ I = S2IMap
+ .emplace(
+ std::piecewise_construct, std::forward_as_tuple(Slot),
+ std::forward_as_tuple(Register::index2StackSlot(Slot), 0.0F))
.first;
S2RCMap.insert(std::make_pair(Slot, RC));
} else {
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index aaff982ef1b0..9bd55c6f750f 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -26,6 +26,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
@@ -82,7 +83,7 @@ LLVM_DUMP_METHOD void LiveVariables::VarInfo::dump() const {
/// getVarInfo - Get (possibly creating) a VarInfo object for the given vreg.
LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
- assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
+ assert(Register::isVirtualRegister(RegIdx) &&
"getVarInfo: not a virtual register!");
VirtRegInfo.grow(RegIdx);
return VirtRegInfo[RegIdx];
@@ -214,7 +215,7 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
MachineOperand &MO = LastDef->getOperand(i);
if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
continue;
- unsigned DefReg = MO.getReg();
+ Register DefReg = MO.getReg();
if (TRI->isSubRegister(Reg, DefReg)) {
for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true);
SubRegs.isValid(); ++SubRegs)
@@ -519,10 +520,9 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
}
if (!MO.isReg() || MO.getReg() == 0)
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (MO.isUse()) {
- if (!(TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- MRI->isReserved(MOReg)))
+ if (!(Register::isPhysicalRegister(MOReg) && MRI->isReserved(MOReg)))
MO.setIsKill(false);
if (MO.readsReg())
UseRegs.push_back(MOReg);
@@ -530,8 +530,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
assert(MO.isDef());
// FIXME: We should not remove any dead flags. However the MIPS RDDSP
// instruction needs it at the moment: http://llvm.org/PR27116.
- if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- !MRI->isReserved(MOReg))
+ if (Register::isPhysicalRegister(MOReg) && !MRI->isReserved(MOReg))
MO.setIsDead(false);
DefRegs.push_back(MOReg);
}
@@ -541,7 +540,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
// Process all uses.
for (unsigned i = 0, e = UseRegs.size(); i != e; ++i) {
unsigned MOReg = UseRegs[i];
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ if (Register::isVirtualRegister(MOReg))
HandleVirtRegUse(MOReg, MBB, MI);
else if (!MRI->isReserved(MOReg))
HandlePhysRegUse(MOReg, MI);
@@ -554,7 +553,7 @@ void LiveVariables::runOnInstr(MachineInstr &MI,
// Process all defs.
for (unsigned i = 0, e = DefRegs.size(); i != e; ++i) {
unsigned MOReg = DefRegs[i];
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ if (Register::isVirtualRegister(MOReg))
HandleVirtRegDef(MOReg, MI);
else if (!MRI->isReserved(MOReg))
HandlePhysRegDef(MOReg, &MI, Defs);
@@ -566,7 +565,7 @@ void LiveVariables::runOnBlock(MachineBasicBlock *MBB, const unsigned NumRegs) {
// Mark live-in registers as live-in.
SmallVector<unsigned, 4> Defs;
for (const auto &LI : MBB->liveins()) {
- assert(TargetRegisterInfo::isPhysicalRegister(LI.PhysReg) &&
+ assert(Register::isPhysicalRegister(LI.PhysReg) &&
"Cannot have a live-in virtual register!");
HandlePhysRegDef(LI.PhysReg, nullptr, Defs);
}
@@ -654,7 +653,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Convert and transfer the dead / killed information we have gathered into
// VirtRegInfo onto MI's.
for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
- const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ const unsigned Reg = Register::index2VirtReg(i);
for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
@@ -692,8 +691,8 @@ void LiveVariables::removeVirtualRegistersKilled(MachineInstr &MI) {
MachineOperand &MO = MI.getOperand(i);
if (MO.isReg() && MO.isKill()) {
MO.setIsKill(false);
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isVirtualRegister(Reg)) {
bool removed = getVarInfo(Reg).removeKill(MI);
assert(removed && "kill not in register's VarInfo?");
(void)removed;
@@ -783,7 +782,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
for (; BBI != BBE; ++BBI) {
for (MachineInstr::mop_iterator I = BBI->operands_begin(),
E = BBI->operands_end(); I != E; ++I) {
- if (I->isReg() && TargetRegisterInfo::isVirtualRegister(I->getReg())) {
+ if (I->isReg() && Register::isVirtualRegister(I->getReg())) {
if (I->isDef())
Defs.insert(I->getReg());
else if (I->isKill())
@@ -794,7 +793,7 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
// Update info for all live variables
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
// If the Defs is defined in the successor it can't be live in BB.
if (Defs.count(Reg))
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index b14d76a585f7..2392d4d00b56 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -261,7 +261,7 @@ void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Remember how big this blob of stack space is
MFI.setLocalFrameSize(Offset);
- MFI.setLocalFrameMaxAlign(MaxAlign);
+ MFI.setLocalFrameMaxAlign(assumeAligned(MaxAlign));
}
static inline bool
@@ -351,6 +351,14 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
assert(MFI.isObjectPreAllocated(FrameIdx) &&
"Only pre-allocated locals expected!");
+ // We need to keep the references to the stack protector slot through frame
+ // index operands so that it gets resolved by PEI rather than this pass.
+ // This avoids accesses to the stack protector though virtual base
+ // registers, and forces PEI to address it using fp/sp/bp.
+ if (MFI.hasStackProtectorIndex() &&
+ FrameIdx == MFI.getStackProtectorIndex())
+ continue;
+
LLVM_DEBUG(dbgs() << "Considering: " << MI);
unsigned idx = 0;
diff --git a/lib/CodeGen/LowerEmuTLS.cpp b/lib/CodeGen/LowerEmuTLS.cpp
index c8cf6abda4fc..ed48365b0102 100644
--- a/lib/CodeGen/LowerEmuTLS.cpp
+++ b/lib/CodeGen/LowerEmuTLS.cpp
@@ -142,7 +142,7 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
assert(EmuTlsTmplVar && "Failed to create emualted TLS initializer");
EmuTlsTmplVar->setConstant(true);
EmuTlsTmplVar->setInitializer(const_cast<Constant*>(InitValue));
- EmuTlsTmplVar->setAlignment(GVAlignment);
+ EmuTlsTmplVar->setAlignment(Align(GVAlignment));
copyLinkageVisibility(M, GV, EmuTlsTmplVar);
}
@@ -155,9 +155,8 @@ bool LowerEmuTLS::addEmuTlsVar(Module &M, const GlobalVariable *GV) {
ArrayRef<Constant*> ElementValueArray(ElementValues, 4);
EmuTlsVar->setInitializer(
ConstantStruct::get(EmuTlsVarType, ElementValueArray));
- unsigned MaxAlignment = std::max(
- DL.getABITypeAlignment(WordType),
- DL.getABITypeAlignment(VoidPtrType));
+ Align MaxAlignment(std::max(DL.getABITypeAlignment(WordType),
+ DL.getABITypeAlignment(VoidPtrType)));
EmuTlsVar->setAlignment(MaxAlignment);
return true;
}
diff --git a/lib/CodeGen/MIRCanonicalizerPass.cpp b/lib/CodeGen/MIRCanonicalizerPass.cpp
index f49bc854e23f..c9bb5461aa3c 100644
--- a/lib/CodeGen/MIRCanonicalizerPass.cpp
+++ b/lib/CodeGen/MIRCanonicalizerPass.cpp
@@ -23,12 +23,14 @@
//
//===----------------------------------------------------------------------===//
+#include "MIRVRegNamerUtils.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include <queue>
@@ -71,28 +73,6 @@ public:
} // end anonymous namespace
-enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
-class TypedVReg {
- VRType type;
- unsigned reg;
-
-public:
- TypedVReg(unsigned reg) : type(RSE_Reg), reg(reg) {}
- TypedVReg(VRType type) : type(type), reg(~0U) {
- assert(type != RSE_Reg && "Expected a non-register type.");
- }
-
- bool isReg() const { return type == RSE_Reg; }
- bool isFrameIndex() const { return type == RSE_FrameIndex; }
- bool isCandidate() const { return type == RSE_NewCandidate; }
-
- VRType getType() const { return type; }
- unsigned getReg() const {
- assert(this->isReg() && "Expected a virtual or physical register.");
- return reg;
- }
-};
-
char MIRCanonicalizer::ID;
char &llvm::MIRCanonicalizerID = MIRCanonicalizer::ID;
@@ -190,7 +170,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
if (!MO.isReg())
continue;
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (Register::isVirtualRegister(MO.getReg()))
continue;
if (!MO.isDef())
@@ -207,7 +187,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
continue;
MachineOperand &MO = II->getOperand(0);
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
if (!MO.isDef())
continue;
@@ -220,7 +200,7 @@ static bool rescheduleCanonically(unsigned &PseudoIdempotentInstCount,
}
if (II->getOperand(i).isReg()) {
- if (!TargetRegisterInfo::isVirtualRegister(II->getOperand(i).getReg()))
+ if (!Register::isVirtualRegister(II->getOperand(i).getReg()))
if (llvm::find(PhysRegDefs, II->getOperand(i).getReg()) ==
PhysRegDefs.end()) {
continue;
@@ -340,12 +320,12 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
if (!MI->getOperand(1).isReg())
continue;
- const unsigned Dst = MI->getOperand(0).getReg();
- const unsigned Src = MI->getOperand(1).getReg();
+ const Register Dst = MI->getOperand(0).getReg();
+ const Register Src = MI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Dst))
+ if (!Register::isVirtualRegister(Dst))
continue;
- if (!TargetRegisterInfo::isVirtualRegister(Src))
+ if (!Register::isVirtualRegister(Src))
continue;
// Not folding COPY instructions if regbankselect has not set the RCs.
// Why are we only considering Register Classes? Because the verifier
@@ -370,258 +350,6 @@ static bool propagateLocalCopies(MachineBasicBlock *MBB) {
return Changed;
}
-/// Here we find our candidates. What makes an interesting candidate?
-/// An candidate for a canonicalization tree root is normally any kind of
-/// instruction that causes side effects such as a store to memory or a copy to
-/// a physical register or a return instruction. We use these as an expression
-/// tree root that we walk inorder to build a canonical walk which should result
-/// in canoncal vreg renaming.
-static std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
- std::vector<MachineInstr *> Candidates;
- MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
-
- for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
- MachineInstr *MI = &*II;
-
- bool DoesMISideEffect = false;
-
- if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
- const unsigned Dst = MI->getOperand(0).getReg();
- DoesMISideEffect |= !TargetRegisterInfo::isVirtualRegister(Dst);
-
- for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
- if (DoesMISideEffect)
- break;
- DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
- }
- }
-
- if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
- continue;
-
- LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
- Candidates.push_back(MI);
- }
-
- return Candidates;
-}
-
-static void doCandidateWalk(std::vector<TypedVReg> &VRegs,
- std::queue<TypedVReg> &RegQueue,
- std::vector<MachineInstr *> &VisitedMIs,
- const MachineBasicBlock *MBB) {
-
- const MachineFunction &MF = *MBB->getParent();
- const MachineRegisterInfo &MRI = MF.getRegInfo();
-
- while (!RegQueue.empty()) {
-
- auto TReg = RegQueue.front();
- RegQueue.pop();
-
- if (TReg.isFrameIndex()) {
- LLVM_DEBUG(dbgs() << "Popping frame index.\n";);
- VRegs.push_back(TypedVReg(RSE_FrameIndex));
- continue;
- }
-
- assert(TReg.isReg() && "Expected vreg or physreg.");
- unsigned Reg = TReg.getReg();
-
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
- LLVM_DEBUG({
- dbgs() << "Popping vreg ";
- MRI.def_begin(Reg)->dump();
- dbgs() << "\n";
- });
-
- if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
- return TR.isReg() && TR.getReg() == Reg;
- })) {
- VRegs.push_back(TypedVReg(Reg));
- }
- } else {
- LLVM_DEBUG(dbgs() << "Popping physreg.\n";);
- VRegs.push_back(TypedVReg(Reg));
- continue;
- }
-
- for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
- MachineInstr *Def = RI->getParent();
-
- if (Def->getParent() != MBB)
- continue;
-
- if (llvm::any_of(VisitedMIs,
- [&](const MachineInstr *VMI) { return Def == VMI; })) {
- break;
- }
-
- LLVM_DEBUG({
- dbgs() << "\n========================\n";
- dbgs() << "Visited MI: ";
- Def->dump();
- dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
- dbgs() << "\n========================\n";
- });
- VisitedMIs.push_back(Def);
- for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
-
- MachineOperand &MO = Def->getOperand(I);
- if (MO.isFI()) {
- LLVM_DEBUG(dbgs() << "Pushing frame index.\n";);
- RegQueue.push(TypedVReg(RSE_FrameIndex));
- }
-
- if (!MO.isReg())
- continue;
- RegQueue.push(TypedVReg(MO.getReg()));
- }
- }
- }
-}
-
-namespace {
-class NamedVRegCursor {
- MachineRegisterInfo &MRI;
- unsigned virtualVRegNumber;
-
-public:
- NamedVRegCursor(MachineRegisterInfo &MRI) : MRI(MRI), virtualVRegNumber(0) {}
-
- void SkipVRegs() {
- unsigned VRegGapIndex = 1;
- if (!virtualVRegNumber) {
- VRegGapIndex = 0;
- virtualVRegNumber = MRI.createIncompleteVirtualRegister();
- }
- const unsigned VR_GAP = (++VRegGapIndex * 1000);
-
- unsigned I = virtualVRegNumber;
- const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
-
- virtualVRegNumber = E;
- }
-
- unsigned getVirtualVReg() const { return virtualVRegNumber; }
-
- unsigned incrementVirtualVReg(unsigned incr = 1) {
- virtualVRegNumber += incr;
- return virtualVRegNumber;
- }
-
- unsigned createVirtualRegister(unsigned VReg) {
- if (!virtualVRegNumber)
- SkipVRegs();
- std::string S;
- raw_string_ostream OS(S);
- OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
- OS.flush();
- virtualVRegNumber++;
- if (auto RC = MRI.getRegClassOrNull(VReg))
- return MRI.createVirtualRegister(RC, OS.str());
- return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
- }
-};
-} // namespace
-
-static std::map<unsigned, unsigned>
-GetVRegRenameMap(const std::vector<TypedVReg> &VRegs,
- const std::vector<unsigned> &renamedInOtherBB,
- MachineRegisterInfo &MRI, NamedVRegCursor &NVC) {
- std::map<unsigned, unsigned> VRegRenameMap;
- bool FirstCandidate = true;
-
- for (auto &vreg : VRegs) {
- if (vreg.isFrameIndex()) {
- // We skip one vreg for any frame index because there is a good chance
- // (especially when comparing SelectionDAG to GlobalISel generated MIR)
- // that in the other file we are just getting an incoming vreg that comes
- // from a copy from a frame index. So it's safe to skip by one.
- unsigned LastRenameReg = NVC.incrementVirtualVReg();
- (void)LastRenameReg;
- LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
- continue;
- } else if (vreg.isCandidate()) {
-
- // After the first candidate, for every subsequent candidate, we skip mod
- // 10 registers so that the candidates are more likely to start at the
- // same vreg number making it more likely that the canonical walk from the
- // candidate insruction. We don't need to skip from the first candidate of
- // the BasicBlock because we already skip ahead several vregs for each BB.
- unsigned LastRenameReg = NVC.getVirtualVReg();
- if (FirstCandidate)
- NVC.incrementVirtualVReg(LastRenameReg % 10);
- FirstCandidate = false;
- continue;
- } else if (!TargetRegisterInfo::isVirtualRegister(vreg.getReg())) {
- unsigned LastRenameReg = NVC.incrementVirtualVReg();
- (void)LastRenameReg;
- LLVM_DEBUG({
- dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
- });
- continue;
- }
-
- auto Reg = vreg.getReg();
- if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
- LLVM_DEBUG(dbgs() << "Vreg " << Reg
- << " already renamed in other BB.\n";);
- continue;
- }
-
- auto Rename = NVC.createVirtualRegister(Reg);
-
- if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
- LLVM_DEBUG(dbgs() << "Mapping vreg ";);
- if (MRI.reg_begin(Reg) != MRI.reg_end()) {
- LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
- } else {
- LLVM_DEBUG(dbgs() << Reg;);
- }
- LLVM_DEBUG(dbgs() << " to ";);
- if (MRI.reg_begin(Rename) != MRI.reg_end()) {
- LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
- } else {
- LLVM_DEBUG(dbgs() << Rename;);
- }
- LLVM_DEBUG(dbgs() << "\n";);
-
- VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
- }
- }
-
- return VRegRenameMap;
-}
-
-static bool doVRegRenaming(std::vector<unsigned> &RenamedInOtherBB,
- const std::map<unsigned, unsigned> &VRegRenameMap,
- MachineRegisterInfo &MRI) {
- bool Changed = false;
- for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
-
- auto VReg = I->first;
- auto Rename = I->second;
-
- RenamedInOtherBB.push_back(Rename);
-
- std::vector<MachineOperand *> RenameMOs;
- for (auto &MO : MRI.reg_operands(VReg)) {
- RenameMOs.push_back(&MO);
- }
-
- for (auto *MO : RenameMOs) {
- Changed = true;
- MO->setReg(Rename);
-
- if (!MO->isDef())
- MO->setIsKill(false);
- }
- }
-
- return Changed;
-}
-
static bool doDefKillClear(MachineBasicBlock *MBB) {
bool Changed = false;
@@ -646,9 +374,7 @@ static bool doDefKillClear(MachineBasicBlock *MBB) {
static bool runOnBasicBlock(MachineBasicBlock *MBB,
std::vector<StringRef> &bbNames,
- std::vector<unsigned> &renamedInOtherBB,
- unsigned &basicBlockNum, unsigned &VRegGapIndex,
- NamedVRegCursor &NVC) {
+ unsigned &basicBlockNum, NamedVRegCursor &NVC) {
if (CanonicalizeBasicBlockNumber != ~0U) {
if (CanonicalizeBasicBlockNumber != basicBlockNum++)
@@ -687,74 +413,20 @@ static bool runOnBasicBlock(MachineBasicBlock *MBB,
Changed |= rescheduleCanonically(IdempotentInstCount, MBB);
LLVM_DEBUG(dbgs() << "MBB After Scheduling:\n"; MBB->dump(););
- std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
- std::vector<MachineInstr *> VisitedMIs;
- llvm::copy(Candidates, std::back_inserter(VisitedMIs));
-
- std::vector<TypedVReg> VRegs;
- for (auto candidate : Candidates) {
- VRegs.push_back(TypedVReg(RSE_NewCandidate));
-
- std::queue<TypedVReg> RegQueue;
-
- // Here we walk the vreg operands of a non-root node along our walk.
- // The root nodes are the original candidates (stores normally).
- // These are normally not the root nodes (except for the case of copies to
- // physical registers).
- for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
- if (candidate->mayStore() || candidate->isBranch())
- break;
-
- MachineOperand &MO = candidate->getOperand(i);
- if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
- continue;
-
- LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
- RegQueue.push(TypedVReg(MO.getReg()));
- }
-
- // Here we walk the root candidates. We start from the 0th operand because
- // the root is normally a store to a vreg.
- for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
-
- if (!candidate->mayStore() && !candidate->isBranch())
- break;
-
- MachineOperand &MO = candidate->getOperand(i);
-
- // TODO: Do we want to only add vregs here?
- if (!MO.isReg() && !MO.isFI())
- continue;
-
- LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
-
- RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg())
- : TypedVReg(RSE_FrameIndex));
- }
-
- doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
- }
-
- // If we have populated no vregs to rename then bail.
- // The rest of this function does the vreg remaping.
- if (VRegs.size() == 0)
- return Changed;
-
- auto VRegRenameMap = GetVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC);
- Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
+ Changed |= NVC.renameVRegs(MBB);
// Here we renumber the def vregs for the idempotent instructions from the top
// of the MachineBasicBlock so that they are named in the order that we sorted
// them alphabetically. Eventually we wont need SkipVRegs because we will use
// named vregs instead.
if (IdempotentInstCount)
- NVC.SkipVRegs();
+ NVC.skipVRegs();
auto MII = MBB->begin();
for (unsigned i = 0; i < IdempotentInstCount && MII != MBB->end(); ++i) {
MachineInstr &MI = *MII++;
Changed = true;
- unsigned vRegToRename = MI.getOperand(0).getReg();
+ Register vRegToRename = MI.getOperand(0).getReg();
auto Rename = NVC.createVirtualRegister(vRegToRename);
std::vector<MachineOperand *> RenameMOs;
@@ -799,9 +471,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
<< "\n\n================================================\n\n";);
std::vector<StringRef> BBNames;
- std::vector<unsigned> RenamedInOtherBB;
- unsigned GapIdx = 0;
unsigned BBNum = 0;
bool Changed = false;
@@ -809,8 +479,7 @@ bool MIRCanonicalizer::runOnMachineFunction(MachineFunction &MF) {
MachineRegisterInfo &MRI = MF.getRegInfo();
NamedVRegCursor NVC(MRI);
for (auto MBB : RPOList)
- Changed |=
- runOnBasicBlock(MBB, BBNames, RenamedInOtherBB, BBNum, GapIdx, NVC);
+ Changed |= runOnBasicBlock(MBB, BBNames, BBNum, NVC);
return Changed;
}
diff --git a/lib/CodeGen/MIRNamerPass.cpp b/lib/CodeGen/MIRNamerPass.cpp
new file mode 100644
index 000000000000..9d719f3917ce
--- /dev/null
+++ b/lib/CodeGen/MIRNamerPass.cpp
@@ -0,0 +1,77 @@
+//===----------------------- MIRNamer.cpp - MIR Namer ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of this pass is to rename virtual register operands with the goal
+// of making it easier to author easier to read tests for MIR. This pass reuses
+// the vreg renamer used by MIRCanonicalizerPass.
+//
+// Basic Usage:
+//
+// llc -o - -run-pass mir-namer example.mir
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRVRegNamerUtils.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace llvm {
+extern char &MIRNamerID;
+} // namespace llvm
+
+#define DEBUG_TYPE "mir-namer"
+
+namespace {
+
+class MIRNamer : public MachineFunctionPass {
+public:
+ static char ID;
+ MIRNamer() : MachineFunctionPass(ID) {}
+
+ StringRef getPassName() const override {
+ return "Rename virtual register operands";
+ }
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ bool Changed = false;
+
+ if (MF.empty())
+ return Changed;
+
+ NamedVRegCursor NVC(MF.getRegInfo());
+
+ ReversePostOrderTraversal<MachineBasicBlock *> RPOT(&*MF.begin());
+ for (auto &MBB : RPOT)
+ Changed |= NVC.renameVRegs(MBB);
+
+ return Changed;
+ }
+};
+
+} // end anonymous namespace
+
+char MIRNamer::ID;
+
+char &llvm::MIRNamerID = MIRNamer::ID;
+
+INITIALIZE_PASS_BEGIN(MIRNamer, "mir-namer", "Rename Register Operands", false,
+ false)
+
+INITIALIZE_PASS_END(MIRNamer, "mir-namer", "Rename Register Operands", false,
+ false)
diff --git a/lib/CodeGen/MIRParser/MILexer.cpp b/lib/CodeGen/MIRParser/MILexer.cpp
index 4899bd3f5811..ad5c617623f2 100644
--- a/lib/CodeGen/MIRParser/MILexer.cpp
+++ b/lib/CodeGen/MIRParser/MILexer.cpp
@@ -249,6 +249,7 @@ static MIToken::TokenKind getIdentifierKind(StringRef Identifier) {
.Case("successors", MIToken::kw_successors)
.Case("floatpred", MIToken::kw_floatpred)
.Case("intpred", MIToken::kw_intpred)
+ .Case("shufflemask", MIToken::kw_shufflemask)
.Case("pre-instr-symbol", MIToken::kw_pre_instr_symbol)
.Case("post-instr-symbol", MIToken::kw_post_instr_symbol)
.Case("unknown-size", MIToken::kw_unknown_size)
diff --git a/lib/CodeGen/MIRParser/MILexer.h b/lib/CodeGen/MIRParser/MILexer.h
index 0fe3f9f706db..200f9d026cc8 100644
--- a/lib/CodeGen/MIRParser/MILexer.h
+++ b/lib/CodeGen/MIRParser/MILexer.h
@@ -117,6 +117,7 @@ struct MIToken {
kw_successors,
kw_floatpred,
kw_intpred,
+ kw_shufflemask,
kw_pre_instr_symbol,
kw_post_instr_symbol,
kw_unknown_size,
@@ -146,6 +147,7 @@ struct MIToken {
IntegerLiteral,
FloatingPointLiteral,
HexLiteral,
+ VectorLiteral,
VirtualRegister,
ConstantPoolItem,
JumpTableIndex,
diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp
index c0b800a0b870..6498acc9fa51 100644
--- a/lib/CodeGen/MIRParser/MIParser.cpp
+++ b/lib/CodeGen/MIRParser/MIParser.cpp
@@ -451,6 +451,7 @@ public:
bool parseBlockAddressOperand(MachineOperand &Dest);
bool parseIntrinsicOperand(MachineOperand &Dest);
bool parsePredicateOperand(MachineOperand &Dest);
+ bool parseShuffleMaskOperand(MachineOperand &Dest);
bool parseTargetIndexOperand(MachineOperand &Dest);
bool parseCustomRegisterMaskOperand(MachineOperand &Dest);
bool parseLiveoutRegisterMaskOperand(MachineOperand &Dest);
@@ -640,7 +641,7 @@ bool MIParser::parseBasicBlockDefinition(
return error(Loc, Twine("redefinition of machine basic block with id #") +
Twine(ID));
if (Alignment)
- MBB->setAlignment(Alignment);
+ MBB->setAlignment(Align(Alignment));
if (HasAddressTaken)
MBB->setHasAddressTaken();
MBB->setIsEHPad(IsLandingPad);
@@ -1078,7 +1079,7 @@ static const char *printImplicitRegisterFlag(const MachineOperand &MO) {
static std::string getRegisterName(const TargetRegisterInfo *TRI,
unsigned Reg) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "expected phys reg");
+ assert(Register::isPhysicalRegister(Reg) && "expected phys reg");
return StringRef(TRI->getName(Reg)).lower();
}
@@ -1408,11 +1409,11 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (Token.is(MIToken::dot)) {
if (parseSubRegisterIndex(SubReg))
return true;
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return error("subregister index expects a virtual register");
}
if (Token.is(MIToken::colon)) {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return error("register class specification expects a virtual register");
lex();
if (parseRegisterClassOrBank(*RegInfo))
@@ -1436,12 +1437,13 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
return error("inconsistent type for generic virtual register");
+ MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr));
MRI.setType(Reg, Ty);
}
}
} else if (consumeIfPresent(MIToken::lparen)) {
// Virtual registers may have a tpe with GlobalISel.
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return error("unexpected type on physical register");
LLT Ty;
@@ -1454,8 +1456,9 @@ bool MIParser::parseRegisterOperand(MachineOperand &Dest,
if (MRI.getType(Reg).isValid() && MRI.getType(Reg) != Ty)
return error("inconsistent type for generic virtual register");
+ MRI.setRegClassOrRegBank(Reg, static_cast<RegisterBank *>(nullptr));
MRI.setType(Reg, Ty);
- } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ } else if (Register::isVirtualRegister(Reg)) {
// Generic virtual registers must have a type.
// If we end up here this means the type hasn't been specified and
// this is bad!
@@ -2285,6 +2288,49 @@ bool MIParser::parsePredicateOperand(MachineOperand &Dest) {
return false;
}
+bool MIParser::parseShuffleMaskOperand(MachineOperand &Dest) {
+ assert(Token.is(MIToken::kw_shufflemask));
+
+ lex();
+ if (expectAndConsume(MIToken::lparen))
+ return error("expected syntax shufflemask(<integer or undef>, ...)");
+
+ SmallVector<Constant *, 32> ShufMask;
+ LLVMContext &Ctx = MF.getFunction().getContext();
+ Type *I32Ty = Type::getInt32Ty(Ctx);
+
+ bool AllZero = true;
+ bool AllUndef = true;
+
+ do {
+ if (Token.is(MIToken::kw_undef)) {
+ ShufMask.push_back(UndefValue::get(I32Ty));
+ AllZero = false;
+ } else if (Token.is(MIToken::IntegerLiteral)) {
+ AllUndef = false;
+ const APSInt &Int = Token.integerValue();
+ if (!Int.isNullValue())
+ AllZero = false;
+ ShufMask.push_back(ConstantInt::get(I32Ty, Int.getExtValue()));
+ } else
+ return error("expected integer constant");
+
+ lex();
+ } while (consumeIfPresent(MIToken::comma));
+
+ if (expectAndConsume(MIToken::rparen))
+ return error("shufflemask should be terminated by ')'.");
+
+ if (AllZero || AllUndef) {
+ VectorType *VT = VectorType::get(I32Ty, ShufMask.size());
+ Constant *C = AllZero ? Constant::getNullValue(VT) : UndefValue::get(VT);
+ Dest = MachineOperand::CreateShuffleMask(C);
+ } else
+ Dest = MachineOperand::CreateShuffleMask(ConstantVector::get(ShufMask));
+
+ return false;
+}
+
bool MIParser::parseTargetIndexOperand(MachineOperand &Dest) {
assert(Token.is(MIToken::kw_target_index));
lex();
@@ -2432,6 +2478,8 @@ bool MIParser::parseMachineOperand(MachineOperand &Dest,
case MIToken::kw_floatpred:
case MIToken::kw_intpred:
return parsePredicateOperand(Dest);
+ case MIToken::kw_shufflemask:
+ return parseShuffleMaskOperand(Dest);
case MIToken::Error:
return true;
case MIToken::Identifier:
diff --git a/lib/CodeGen/MIRParser/MIRParser.cpp b/lib/CodeGen/MIRParser/MIRParser.cpp
index b242934def80..55fac93d8991 100644
--- a/lib/CodeGen/MIRParser/MIRParser.cpp
+++ b/lib/CodeGen/MIRParser/MIRParser.cpp
@@ -216,7 +216,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
return nullptr;
// Create an empty module when the MIR file is empty.
NoMIRDocuments = true;
- return llvm::make_unique<Module>(Filename, Context);
+ return std::make_unique<Module>(Filename, Context);
}
std::unique_ptr<Module> M;
@@ -236,7 +236,7 @@ std::unique_ptr<Module> MIRParserImpl::parseIRModule() {
NoMIRDocuments = true;
} else {
// Create an new, empty module.
- M = llvm::make_unique<Module>(Filename, Context);
+ M = std::make_unique<Module>(Filename, Context);
NoLLVMIR = true;
}
return M;
@@ -306,7 +306,7 @@ bool MIRParserImpl::parseMachineFunction(Module &M, MachineModuleInfo &MMI) {
static bool isSSA(const MachineFunction &MF) {
const MachineRegisterInfo &MRI = MF.getRegInfo();
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
if (!MRI.hasOneDef(Reg) && !MRI.def_empty(Reg))
return false;
}
@@ -355,10 +355,10 @@ bool MIRParserImpl::initializeCallSiteInfo(
if (MILoc.Offset >= CallB->size())
return error(Twine(MF.getName()) +
Twine(" call instruction offset out of range.") +
- "Unable to reference instruction at bb: " +
+ " Unable to reference instruction at bb: " +
Twine(MILoc.BlockNum) + " at offset:" + Twine(MILoc.Offset));
- auto CallI = std::next(CallB->begin(), MILoc.Offset);
- if (!CallI->isCall())
+ auto CallI = std::next(CallB->instr_begin(), MILoc.Offset);
+ if (!CallI->isCall(MachineInstr::IgnoreBundle))
return error(Twine(MF.getName()) +
Twine(" call site info should reference call "
"instruction. Instruction at bb:") +
@@ -393,7 +393,7 @@ MIRParserImpl::initializeMachineFunction(const yaml::MachineFunction &YamlMF,
}
if (YamlMF.Alignment)
- MF.setAlignment(YamlMF.Alignment);
+ MF.setAlignment(Align(YamlMF.Alignment));
MF.setExposesReturnsTwice(YamlMF.ExposesReturnsTwice);
MF.setHasWinCFI(YamlMF.HasWinCFI);
@@ -949,6 +949,6 @@ llvm::createMIRParser(std::unique_ptr<MemoryBuffer> Contents,
"Can't read MIR with a Context that discards named Values")));
return nullptr;
}
- return llvm::make_unique<MIRParser>(
- llvm::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
+ return std::make_unique<MIRParser>(
+ std::make_unique<MIRParserImpl>(std::move(Contents), Filename, Context));
}
diff --git a/lib/CodeGen/MIRPrinter.cpp b/lib/CodeGen/MIRPrinter.cpp
index 0a95a0ced0f5..1a4e21ac06a9 100644
--- a/lib/CodeGen/MIRPrinter.cpp
+++ b/lib/CodeGen/MIRPrinter.cpp
@@ -197,7 +197,7 @@ void MIRPrinter::print(const MachineFunction &MF) {
yaml::MachineFunction YamlMF;
YamlMF.Name = MF.getName();
- YamlMF.Alignment = MF.getAlignment();
+ YamlMF.Alignment = MF.getAlignment().value();
YamlMF.ExposesReturnsTwice = MF.exposesReturnsTwice();
YamlMF.HasWinCFI = MF.hasWinCFI();
@@ -290,7 +290,7 @@ void MIRPrinter::convert(yaml::MachineFunction &MF,
// Print the virtual register definitions.
for (unsigned I = 0, E = RegInfo.getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
yaml::VirtualRegisterDefinition VReg;
VReg.ID = I;
if (RegInfo.getVRegName(Reg) != "")
@@ -473,10 +473,11 @@ void MIRPrinter::convertCallSiteObjects(yaml::MachineFunction &YMF,
yaml::CallSiteInfo::MachineInstrLoc CallLocation;
// Prepare instruction position.
- MachineBasicBlock::const_iterator CallI = CSInfo.first->getIterator();
+ MachineBasicBlock::const_instr_iterator CallI = CSInfo.first->getIterator();
CallLocation.BlockNum = CallI->getParent()->getNumber();
// Get call instruction offset from the beginning of block.
- CallLocation.Offset = std::distance(CallI->getParent()->begin(), CallI);
+ CallLocation.Offset =
+ std::distance(CallI->getParent()->instr_begin(), CallI);
YmlCS.CallLocation = CallLocation;
// Construct call arguments and theirs forwarding register info.
for (auto ArgReg : CSInfo.second) {
@@ -628,9 +629,9 @@ void MIPrinter::print(const MachineBasicBlock &MBB) {
OS << "landing-pad";
HasAttributes = true;
}
- if (MBB.getAlignment()) {
+ if (MBB.getAlignment() != Align::None()) {
OS << (HasAttributes ? ", " : " (");
- OS << "align " << MBB.getAlignment();
+ OS << "align " << MBB.getAlignment().value();
HasAttributes = true;
}
if (HasAttributes)
@@ -842,7 +843,8 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
case MachineOperand::MO_CFIIndex:
case MachineOperand::MO_IntrinsicID:
case MachineOperand::MO_Predicate:
- case MachineOperand::MO_BlockAddress: {
+ case MachineOperand::MO_BlockAddress:
+ case MachineOperand::MO_ShuffleMask: {
unsigned TiedOperandIdx = 0;
if (ShouldPrintRegisterTies && Op.isReg() && Op.isTied() && !Op.isDef())
TiedOperandIdx = Op.getParent()->findTiedOperandIdx(OpIdx);
diff --git a/lib/CodeGen/MIRVRegNamerUtils.cpp b/lib/CodeGen/MIRVRegNamerUtils.cpp
new file mode 100644
index 000000000000..6629000f468f
--- /dev/null
+++ b/lib/CodeGen/MIRVRegNamerUtils.cpp
@@ -0,0 +1,348 @@
+//===---------- MIRVRegNamerUtils.cpp - MIR VReg Renaming Utilities -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MIRVRegNamerUtils.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "mir-vregnamer-utils"
+
+namespace {
+
+// TypedVReg and VRType are used to tell the renamer what to do at points in a
+// sequence of values to be renamed. A TypedVReg can either contain
+// an actual VReg, a FrameIndex, or it could just be a barrier for the next
+// candidate (side-effecting instruction). This tells the renamer to increment
+// to the next vreg name, or to skip modulo some skip-gap value.
+enum VRType { RSE_Reg = 0, RSE_FrameIndex, RSE_NewCandidate };
+class TypedVReg {
+ VRType Type;
+ Register Reg;
+
+public:
+ TypedVReg(Register Reg) : Type(RSE_Reg), Reg(Reg) {}
+ TypedVReg(VRType Type) : Type(Type), Reg(~0U) {
+ assert(Type != RSE_Reg && "Expected a non-Register Type.");
+ }
+
+ bool isReg() const { return Type == RSE_Reg; }
+ bool isFrameIndex() const { return Type == RSE_FrameIndex; }
+ bool isCandidate() const { return Type == RSE_NewCandidate; }
+
+ VRType getType() const { return Type; }
+ Register getReg() const {
+ assert(this->isReg() && "Expected a virtual or physical Register.");
+ return Reg;
+ }
+};
+
+/// Here we find our candidates. What makes an interesting candidate?
+/// A candidate for a canonicalization tree root is normally any kind of
+/// instruction that causes side effects such as a store to memory or a copy to
+/// a physical register or a return instruction. We use these as an expression
+/// tree root that we walk in order to build a canonical walk which should
+/// result in canonical vreg renaming.
+std::vector<MachineInstr *> populateCandidates(MachineBasicBlock *MBB) {
+ std::vector<MachineInstr *> Candidates;
+ MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
+
+ for (auto II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
+ MachineInstr *MI = &*II;
+
+ bool DoesMISideEffect = false;
+
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg()) {
+ const Register Dst = MI->getOperand(0).getReg();
+ DoesMISideEffect |= !Register::isVirtualRegister(Dst);
+
+ for (auto UI = MRI.use_begin(Dst); UI != MRI.use_end(); ++UI) {
+ if (DoesMISideEffect)
+ break;
+ DoesMISideEffect |= (UI->getParent()->getParent() != MI->getParent());
+ }
+ }
+
+ if (!MI->mayStore() && !MI->isBranch() && !DoesMISideEffect)
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Found Candidate: "; MI->dump(););
+ Candidates.push_back(MI);
+ }
+
+ return Candidates;
+}
+
+void doCandidateWalk(std::vector<TypedVReg> &VRegs,
+ std::queue<TypedVReg> &RegQueue,
+ std::vector<MachineInstr *> &VisitedMIs,
+ const MachineBasicBlock *MBB) {
+
+ const MachineFunction &MF = *MBB->getParent();
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ while (!RegQueue.empty()) {
+
+ auto TReg = RegQueue.front();
+ RegQueue.pop();
+
+ if (TReg.isFrameIndex()) {
+ LLVM_DEBUG(dbgs() << "Popping frame index.\n";);
+ VRegs.push_back(TypedVReg(RSE_FrameIndex));
+ continue;
+ }
+
+ assert(TReg.isReg() && "Expected vreg or physreg.");
+ Register Reg = TReg.getReg();
+
+ if (Register::isVirtualRegister(Reg)) {
+ LLVM_DEBUG({
+ dbgs() << "Popping vreg ";
+ MRI.def_begin(Reg)->dump();
+ dbgs() << "\n";
+ });
+
+ if (!llvm::any_of(VRegs, [&](const TypedVReg &TR) {
+ return TR.isReg() && TR.getReg() == Reg;
+ })) {
+ VRegs.push_back(TypedVReg(Reg));
+ }
+ } else {
+ LLVM_DEBUG(dbgs() << "Popping physreg.\n";);
+ VRegs.push_back(TypedVReg(Reg));
+ continue;
+ }
+
+ for (auto RI = MRI.def_begin(Reg), RE = MRI.def_end(); RI != RE; ++RI) {
+ MachineInstr *Def = RI->getParent();
+
+ if (Def->getParent() != MBB)
+ continue;
+
+ if (llvm::any_of(VisitedMIs,
+ [&](const MachineInstr *VMI) { return Def == VMI; })) {
+ break;
+ }
+
+ LLVM_DEBUG({
+ dbgs() << "\n========================\n";
+ dbgs() << "Visited MI: ";
+ Def->dump();
+ dbgs() << "BB Name: " << Def->getParent()->getName() << "\n";
+ dbgs() << "\n========================\n";
+ });
+ VisitedMIs.push_back(Def);
+ for (unsigned I = 1, E = Def->getNumOperands(); I != E; ++I) {
+
+ MachineOperand &MO = Def->getOperand(I);
+ if (MO.isFI()) {
+ LLVM_DEBUG(dbgs() << "Pushing frame index.\n";);
+ RegQueue.push(TypedVReg(RSE_FrameIndex));
+ }
+
+ if (!MO.isReg())
+ continue;
+ RegQueue.push(TypedVReg(MO.getReg()));
+ }
+ }
+ }
+}
+
+std::map<unsigned, unsigned>
+getVRegRenameMap(const std::vector<TypedVReg> &VRegs,
+ const std::vector<Register> &renamedInOtherBB,
+ MachineRegisterInfo &MRI, NamedVRegCursor &NVC) {
+ std::map<unsigned, unsigned> VRegRenameMap;
+ bool FirstCandidate = true;
+
+ for (auto &vreg : VRegs) {
+ if (vreg.isFrameIndex()) {
+ // We skip one vreg for any frame index because there is a good chance
+ // (especially when comparing SelectionDAG to GlobalISel generated MIR)
+ // that in the other file we are just getting an incoming vreg that comes
+ // from a copy from a frame index. So it's safe to skip by one.
+ unsigned LastRenameReg = NVC.incrementVirtualVReg();
+ (void)LastRenameReg;
+ LLVM_DEBUG(dbgs() << "Skipping rename for FI " << LastRenameReg << "\n";);
+ continue;
+ } else if (vreg.isCandidate()) {
+
+ // After the first candidate, for every subsequent candidate, we skip mod
+ // 10 registers so that the candidates are more likely to start at the
+ // same vreg number making it more likely that the canonical walk from the
+ // candidate insruction. We don't need to skip from the first candidate of
+ // the BasicBlock because we already skip ahead several vregs for each BB.
+ unsigned LastRenameReg = NVC.getVirtualVReg();
+ if (FirstCandidate)
+ NVC.incrementVirtualVReg(LastRenameReg % 10);
+ FirstCandidate = false;
+ continue;
+ } else if (!Register::isVirtualRegister(vreg.getReg())) {
+ unsigned LastRenameReg = NVC.incrementVirtualVReg();
+ (void)LastRenameReg;
+ LLVM_DEBUG({
+ dbgs() << "Skipping rename for Phys Reg " << LastRenameReg << "\n";
+ });
+ continue;
+ }
+
+ auto Reg = vreg.getReg();
+ if (llvm::find(renamedInOtherBB, Reg) != renamedInOtherBB.end()) {
+ LLVM_DEBUG(dbgs() << "Vreg " << Reg
+ << " already renamed in other BB.\n";);
+ continue;
+ }
+
+ auto Rename = NVC.createVirtualRegister(Reg);
+
+ if (VRegRenameMap.find(Reg) == VRegRenameMap.end()) {
+ LLVM_DEBUG(dbgs() << "Mapping vreg ";);
+ if (MRI.reg_begin(Reg) != MRI.reg_end()) {
+ LLVM_DEBUG(auto foo = &*MRI.reg_begin(Reg); foo->dump(););
+ } else {
+ LLVM_DEBUG(dbgs() << Reg;);
+ }
+ LLVM_DEBUG(dbgs() << " to ";);
+ if (MRI.reg_begin(Rename) != MRI.reg_end()) {
+ LLVM_DEBUG(auto foo = &*MRI.reg_begin(Rename); foo->dump(););
+ } else {
+ LLVM_DEBUG(dbgs() << Rename;);
+ }
+ LLVM_DEBUG(dbgs() << "\n";);
+
+ VRegRenameMap.insert(std::pair<unsigned, unsigned>(Reg, Rename));
+ }
+ }
+
+ return VRegRenameMap;
+}
+
+bool doVRegRenaming(std::vector<Register> &renamedInOtherBB,
+ const std::map<unsigned, unsigned> &VRegRenameMap,
+ MachineRegisterInfo &MRI) {
+ bool Changed = false;
+ for (auto I = VRegRenameMap.begin(), E = VRegRenameMap.end(); I != E; ++I) {
+
+ auto VReg = I->first;
+ auto Rename = I->second;
+
+ renamedInOtherBB.push_back(Rename);
+
+ std::vector<MachineOperand *> RenameMOs;
+ for (auto &MO : MRI.reg_operands(VReg)) {
+ RenameMOs.push_back(&MO);
+ }
+
+ for (auto *MO : RenameMOs) {
+ Changed = true;
+ MO->setReg(Rename);
+
+ if (!MO->isDef())
+ MO->setIsKill(false);
+ }
+ }
+
+ return Changed;
+}
+
+bool renameVRegs(MachineBasicBlock *MBB,
+ std::vector<Register> &renamedInOtherBB,
+ NamedVRegCursor &NVC) {
+ bool Changed = false;
+ MachineFunction &MF = *MBB->getParent();
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+
+ std::vector<MachineInstr *> Candidates = populateCandidates(MBB);
+ std::vector<MachineInstr *> VisitedMIs;
+ llvm::copy(Candidates, std::back_inserter(VisitedMIs));
+
+ std::vector<TypedVReg> VRegs;
+ for (auto candidate : Candidates) {
+ VRegs.push_back(TypedVReg(RSE_NewCandidate));
+
+ std::queue<TypedVReg> RegQueue;
+
+ // Here we walk the vreg operands of a non-root node along our walk.
+ // The root nodes are the original candidates (stores normally).
+ // These are normally not the root nodes (except for the case of copies to
+ // physical registers).
+ for (unsigned i = 1; i < candidate->getNumOperands(); i++) {
+ if (candidate->mayStore() || candidate->isBranch())
+ break;
+
+ MachineOperand &MO = candidate->getOperand(i);
+ if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Enqueue register"; MO.dump(); dbgs() << "\n";);
+ RegQueue.push(TypedVReg(MO.getReg()));
+ }
+
+ // Here we walk the root candidates. We start from the 0th operand because
+ // the root is normally a store to a vreg.
+ for (unsigned i = 0; i < candidate->getNumOperands(); i++) {
+
+ if (!candidate->mayStore() && !candidate->isBranch())
+ break;
+
+ MachineOperand &MO = candidate->getOperand(i);
+
+ // TODO: Do we want to only add vregs here?
+ if (!MO.isReg() && !MO.isFI())
+ continue;
+
+ LLVM_DEBUG(dbgs() << "Enqueue Reg/FI"; MO.dump(); dbgs() << "\n";);
+
+ RegQueue.push(MO.isReg() ? TypedVReg(MO.getReg())
+ : TypedVReg(RSE_FrameIndex));
+ }
+
+ doCandidateWalk(VRegs, RegQueue, VisitedMIs, MBB);
+ }
+
+ // If we have populated no vregs to rename then bail.
+ // The rest of this function does the vreg remaping.
+ if (VRegs.size() == 0)
+ return Changed;
+
+ auto VRegRenameMap = getVRegRenameMap(VRegs, renamedInOtherBB, MRI, NVC);
+ Changed |= doVRegRenaming(renamedInOtherBB, VRegRenameMap, MRI);
+ return Changed;
+}
+} // anonymous namespace
+
+void NamedVRegCursor::skipVRegs() {
+ unsigned VRegGapIndex = 1;
+ if (!virtualVRegNumber) {
+ VRegGapIndex = 0;
+ virtualVRegNumber = MRI.createIncompleteVirtualRegister();
+ }
+ const unsigned VR_GAP = (++VRegGapIndex * SkipGapSize);
+
+ unsigned I = virtualVRegNumber;
+ const unsigned E = (((I + VR_GAP) / VR_GAP) + 1) * VR_GAP;
+
+ virtualVRegNumber = E;
+}
+
+unsigned NamedVRegCursor::createVirtualRegister(unsigned VReg) {
+ if (!virtualVRegNumber)
+ skipVRegs();
+ std::string S;
+ raw_string_ostream OS(S);
+ OS << "namedVReg" << (virtualVRegNumber & ~0x80000000);
+ OS.flush();
+ virtualVRegNumber++;
+ if (auto RC = MRI.getRegClassOrNull(VReg))
+ return MRI.createVirtualRegister(RC, OS.str());
+ return MRI.createGenericVirtualRegister(MRI.getType(VReg), OS.str());
+}
+
+bool NamedVRegCursor::renameVRegs(MachineBasicBlock *MBB) {
+ return ::renameVRegs(MBB, RenamedInOtherBB, *this);
+}
diff --git a/lib/CodeGen/MIRVRegNamerUtils.h b/lib/CodeGen/MIRVRegNamerUtils.h
new file mode 100644
index 000000000000..c5b52a968538
--- /dev/null
+++ b/lib/CodeGen/MIRVRegNamerUtils.h
@@ -0,0 +1,91 @@
+
+//===------------ MIRVRegNamerUtils.h - MIR VReg Renaming Utilities -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The purpose of these utilities is to abstract out parts of the MIRCanon pass
+// that are responsible for renaming virtual registers with the purpose of
+// sharing code with a MIRVRegNamer pass that could be the analog of the
+// opt -instnamer pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H
+#define LLVM_LIB_CODEGEN_MIRVREGNAMERUTILS_H
+
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <queue>
+
+namespace llvm {
+
+/// NamedVRegCursor - The cursor is an object that keeps track of what the next
+/// vreg name should be. It does book keeping to determine when to skip the
+/// index value and by how much, or if the next vreg name should be an increment
+/// from the previous.
+class NamedVRegCursor {
+ MachineRegisterInfo &MRI;
+
+ /// virtualVRegNumber - Book keeping of the last vreg position.
+ unsigned virtualVRegNumber;
+
+ /// SkipGapSize - Used to calculate a modulo amount to skip by after every
+ /// sequence of instructions starting from a given side-effecting
+ /// MachineInstruction for a given MachineBasicBlock. The general idea is that
+ /// for a given program compiled with two different opt pipelines, there
+ /// shouldn't be greater than SkipGapSize difference in how many vregs are in
+ /// play between the two and for every def-use graph of vregs we rename we
+ /// will round up to the next SkipGapSize'th number so that we have a high
+ /// change of landing on the same name for two given matching side-effects
+ /// for the two compilation outcomes.
+ const unsigned SkipGapSize;
+
+ /// RenamedInOtherBB - VRegs that we already renamed: ie breadcrumbs.
+ std::vector<Register> RenamedInOtherBB;
+
+public:
+ NamedVRegCursor() = delete;
+ /// 1000 for the SkipGapSize was a good heuristic at the time of the writing
+ /// of the MIRCanonicalizerPass. Adjust as needed.
+ NamedVRegCursor(MachineRegisterInfo &MRI, unsigned SkipGapSize = 1000)
+ : MRI(MRI), virtualVRegNumber(0), SkipGapSize(SkipGapSize) {}
+
+ /// SkipGapSize - Skips modulo a gap value of indices. Indices are used to
+ /// produce the next vreg name.
+ void skipVRegs();
+
+ unsigned getVirtualVReg() const { return virtualVRegNumber; }
+
+ /// incrementVirtualVReg - This increments an index value that us used to
+ /// create a new vreg name. This is not a Register.
+ unsigned incrementVirtualVReg(unsigned incr = 1) {
+ virtualVRegNumber += incr;
+ return virtualVRegNumber;
+ }
+
+ /// createVirtualRegister - Given an existing vreg, create a named vreg to
+ /// take its place.
+ unsigned createVirtualRegister(unsigned VReg);
+
+ /// renameVRegs - For a given MachineBasicBlock, scan for side-effecting
+ /// instructions, walk the def-use from each side-effecting root (in sorted
+ /// root order) and rename the encountered vregs in the def-use graph in a
+ /// canonical ordering. This method maintains book keeping for which vregs
+ /// were already renamed in RenamedInOtherBB.
+ // @return changed
+ bool renameVRegs(MachineBasicBlock *MBB);
+};
+
+} // namespace llvm
+
+#endif
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 4d29e883d879..854bef3aab05 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -39,6 +39,12 @@ using namespace llvm;
#define DEBUG_TYPE "codegen"
+static cl::opt<bool> PrintSlotIndexes(
+ "print-slotindexes",
+ cl::desc("When printing machine IR, annotate instructions and blocks with "
+ "SlotIndexes when available"),
+ cl::init(true), cl::Hidden);
+
MachineBasicBlock::MachineBasicBlock(MachineFunction &MF, const BasicBlock *B)
: BB(B), Number(-1), xParent(&MF) {
Insts.Parent = this;
@@ -291,7 +297,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
return;
}
- if (Indexes)
+ if (Indexes && PrintSlotIndexes)
OS << Indexes->getMBBStartIdx(this) << '\t';
OS << "bb." << getNumber();
@@ -320,9 +326,9 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "landing-pad";
HasAttributes = true;
}
- if (getAlignment()) {
+ if (getAlignment() != Align::None()) {
OS << (HasAttributes ? ", " : " (");
- OS << "align " << getAlignment();
+ OS << "align " << Log2(getAlignment());
HasAttributes = true;
}
if (HasAttributes)
@@ -402,7 +408,7 @@ void MachineBasicBlock::print(raw_ostream &OS, ModuleSlotTracker &MST,
bool IsInBundle = false;
for (const MachineInstr &MI : instrs()) {
- if (Indexes) {
+ if (Indexes && PrintSlotIndexes) {
if (Indexes->hasIndex(MI))
OS << Indexes->getInstructionIndex(MI);
OS << '\t';
@@ -484,9 +490,9 @@ void MachineBasicBlock::sortUniqueLiveIns() {
}
unsigned
-MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) {
+MachineBasicBlock::addLiveIn(MCRegister PhysReg, const TargetRegisterClass *RC) {
assert(getParent() && "MBB must be inserted in function");
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Expected physreg");
+ assert(PhysReg.isPhysical() && "Expected physreg");
assert(RC && "Register class is required");
assert((isEHPad() || this == &getParent()->front()) &&
"Only the entry block and landing pads can have physreg live ins");
@@ -500,14 +506,14 @@ MachineBasicBlock::addLiveIn(MCPhysReg PhysReg, const TargetRegisterClass *RC) {
if (LiveIn)
for (;I != E && I->isCopy(); ++I)
if (I->getOperand(1).getReg() == PhysReg) {
- unsigned VirtReg = I->getOperand(0).getReg();
+ Register VirtReg = I->getOperand(0).getReg();
if (!MRI.constrainRegClass(VirtReg, RC))
llvm_unreachable("Incompatible live-in register class.");
return VirtReg;
}
// No luck, create a virtual register.
- unsigned VirtReg = MRI.createVirtualRegister(RC);
+ Register VirtReg = MRI.createVirtualRegister(RC);
BuildMI(*this, I, DebugLoc(), TII.get(TargetOpcode::COPY), VirtReg)
.addReg(PhysReg, RegState::Kill);
if (!LiveIn)
@@ -772,7 +778,8 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *FromMBB) {
while (!FromMBB->succ_empty()) {
MachineBasicBlock *Succ = *FromMBB->succ_begin();
- // If probability list is empty it means we don't use it (disabled optimization).
+ // If probability list is empty it means we don't use it (disabled
+ // optimization).
if (!FromMBB->Probs.empty()) {
auto Prob = *FromMBB->Probs.begin();
addSuccessor(Succ, Prob);
@@ -798,13 +805,7 @@ MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB) {
FromMBB->removeSuccessor(Succ);
// Fix up any PHI nodes in the successor.
- for (MachineBasicBlock::instr_iterator MI = Succ->instr_begin(),
- ME = Succ->instr_end(); MI != ME && MI->isPHI(); ++MI)
- for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.getMBB() == FromMBB)
- MO.setMBB(this);
- }
+ Succ->replacePhiUsesWith(FromMBB, this);
}
normalizeSuccProbs();
}
@@ -907,8 +908,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
if (!OI->isReg() || OI->getReg() == 0 ||
!OI->isUse() || !OI->isKill() || OI->isUndef())
continue;
- unsigned Reg = OI->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
+ Register Reg = OI->getReg();
+ if (Register::isPhysicalRegister(Reg) ||
LV->getVarInfo(Reg).removeKill(*MI)) {
KilledRegs.push_back(Reg);
LLVM_DEBUG(dbgs() << "Removing terminator kill: " << *MI);
@@ -928,7 +929,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
if (!OI->isReg() || OI->getReg() == 0)
continue;
- unsigned Reg = OI->getReg();
+ Register Reg = OI->getReg();
if (!is_contained(UsedRegs, Reg))
UsedRegs.push_back(Reg);
}
@@ -979,13 +980,8 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
}
}
- // Fix PHI nodes in Succ so they refer to NMBB instead of this
- for (MachineBasicBlock::instr_iterator
- i = Succ->instr_begin(),e = Succ->instr_end();
- i != e && i->isPHI(); ++i)
- for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
- if (i->getOperand(ni+1).getMBB() == this)
- i->getOperand(ni+1).setMBB(NMBB);
+ // Fix PHI nodes in Succ so they refer to NMBB instead of this.
+ Succ->replacePhiUsesWith(this, NMBB);
// Inherit live-ins from the successor
for (const auto &LI : Succ->liveins())
@@ -1000,7 +996,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
for (instr_iterator I = instr_end(), E = instr_begin(); I != E;) {
if (!(--I)->addRegisterKilled(Reg, TRI, /* AddIfNotFound= */ false))
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
LV->getVarInfo(Reg).Kills.push_back(&*I);
LLVM_DEBUG(dbgs() << "Restored terminator kill: " << *I);
break;
@@ -1033,7 +1029,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) {
if (I->getOperand(ni+1).getMBB() == NMBB) {
MachineOperand &MO = I->getOperand(ni);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
PHISrcRegs.insert(Reg);
if (MO.isUndef())
continue;
@@ -1049,7 +1045,7 @@ MachineBasicBlock *MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ,
MachineRegisterInfo *MRI = &getParent()->getRegInfo();
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg))
continue;
@@ -1217,6 +1213,16 @@ void MachineBasicBlock::ReplaceUsesOfBlockWith(MachineBasicBlock *Old,
replaceSuccessor(Old, New);
}
+void MachineBasicBlock::replacePhiUsesWith(MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ for (MachineInstr &MI : phis())
+ for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (MO.getMBB() == Old)
+ MO.setMBB(New);
+ }
+}
+
/// Various pieces of code can cause excess edges in the CFG to be inserted. If
/// we have proven that MBB can only branch to DestA and DestB, remove any other
/// MBB successors from the CFG. DestA and DestB can be null.
diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp
index 639b588766a1..ac19bc0bd8ea 100644
--- a/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/lib/CodeGen/MachineBlockPlacement.cpp
@@ -79,16 +79,17 @@ STATISTIC(CondBranchTakenFreq,
STATISTIC(UncondBranchTakenFreq,
"Potential frequency of taking unconditional branches");
-static cl::opt<unsigned> AlignAllBlock("align-all-blocks",
- cl::desc("Force the alignment of all "
- "blocks in the function."),
- cl::init(0), cl::Hidden);
+static cl::opt<unsigned> AlignAllBlock(
+ "align-all-blocks",
+ cl::desc("Force the alignment of all blocks in the function in log2 format "
+ "(e.g 4 means align on 16B boundaries)."),
+ cl::init(0), cl::Hidden);
static cl::opt<unsigned> AlignAllNonFallThruBlocks(
"align-all-nofallthru-blocks",
- cl::desc("Force the alignment of all "
- "blocks that have no fall-through predecessors (i.e. don't add "
- "nops that are executed)."),
+ cl::desc("Force the alignment of all blocks that have no fall-through "
+ "predecessors (i.e. don't add nops that are executed). In log2 "
+ "format (e.g 4 means align on 16B boundaries)."),
cl::init(0), cl::Hidden);
// FIXME: Find a good default for this flag and remove the flag.
@@ -2763,8 +2764,8 @@ void MachineBlockPlacement::alignBlocks() {
if (!L)
continue;
- unsigned Align = TLI->getPrefLoopAlignment(L);
- if (!Align)
+ const Align Align = TLI->getPrefLoopAlignment(L);
+ if (Align == 1)
continue; // Don't care about loop alignment.
// If the block is cold relative to the function entry don't waste space
@@ -2981,7 +2982,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
F = &MF;
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
- MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
+ MBFI = std::make_unique<BranchFolder::MBFIWrapper>(
getAnalysis<MachineBlockFrequencyInfo>());
MLI = &getAnalysis<MachineLoopInfo>();
TII = MF.getSubtarget().getInstrInfo();
@@ -3038,8 +3039,9 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
*MBPI, TailMergeSize);
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
if (BF.OptimizeFunction(MF, TII, MF.getSubtarget().getRegisterInfo(),
- getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
+ MMIWP ? &MMIWP->getMMI() : nullptr, MLI,
/*AfterPlacement=*/true)) {
// Redo the layout if tail merging creates/removes/moves blocks.
BlockToChain.clear();
@@ -3062,14 +3064,14 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (AlignAllBlock)
// Align all of the blocks in the function to a specific alignment.
for (MachineBasicBlock &MBB : MF)
- MBB.setAlignment(AlignAllBlock);
+ MBB.setAlignment(Align(1ULL << AlignAllBlock));
else if (AlignAllNonFallThruBlocks) {
// Align all of the blocks that have no fall-through predecessors to a
// specific alignment.
for (auto MBI = std::next(MF.begin()), MBE = MF.end(); MBI != MBE; ++MBI) {
auto LayoutPred = std::prev(MBI);
if (!LayoutPred->isSuccessor(&*MBI))
- MBI->setAlignment(AlignAllNonFallThruBlocks);
+ MBI->setAlignment(Align(1ULL << AlignAllNonFallThruBlocks));
}
}
if (ViewBlockLayoutWithBFI != GVDT_None &&
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 2df6d40d9293..d9bd32b2fbab 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -21,6 +21,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -66,6 +67,7 @@ namespace {
AliasAnalysis *AA;
MachineDominatorTree *DT;
MachineRegisterInfo *MRI;
+ MachineBlockFrequencyInfo *MBFI;
public:
static char ID; // Pass identification
@@ -83,6 +85,8 @@ namespace {
AU.addPreservedID(MachineLoopInfoID);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineBlockFrequencyInfo>();
+ AU.addPreserved<MachineBlockFrequencyInfo>();
}
void releaseMemory() override {
@@ -133,6 +137,11 @@ namespace {
bool isPRECandidate(MachineInstr *MI);
bool ProcessBlockPRE(MachineDominatorTree *MDT, MachineBasicBlock *MBB);
bool PerformSimplePRE(MachineDominatorTree *DT);
+ /// Heuristics to see if it's profitable to move common computations of MBB
+ /// and MBB1 to CandidateBB.
+ bool isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *MBB1);
};
} // end anonymous namespace
@@ -158,15 +167,15 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
for (MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
bool OnlyOneUse = MRI->hasOneNonDBGUse(Reg);
MachineInstr *DefMI = MRI->getVRegDef(Reg);
if (!DefMI->isCopy())
continue;
- unsigned SrcReg = DefMI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = DefMI->getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
continue;
if (DefMI->getOperand(0).getSubReg())
continue;
@@ -189,14 +198,16 @@ bool MachineCSE::PerformTrivialCopyPropagation(MachineInstr *MI,
LLVM_DEBUG(dbgs() << "Coalescing: " << *DefMI);
LLVM_DEBUG(dbgs() << "*** to: " << *MI);
- // Update matching debug values.
- DefMI->changeDebugValuesDefReg(SrcReg);
-
// Propagate SrcReg of copies to MI.
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
// Coalesce single use copies.
if (OnlyOneUse) {
+ // If (and only if) we've eliminated all uses of the copy, also
+ // copy-propagate to any debug-users of MI, or they'll be left using
+ // an undefined value.
+ DefMI->changeDebugValuesDefReg(SrcReg);
+
DefMI->eraseFromParent();
++NumCoalesces;
}
@@ -271,10 +282,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
continue;
// Reading either caller preserved or constant physregs is ok.
if (!isCallerPreservedOrConstPhysReg(Reg, *MI->getMF(), *TRI))
@@ -290,10 +301,10 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
const MachineOperand &MO = MOP.value();
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
continue;
// Check against PhysRefs even if the def is "dead".
if (PhysRefs.count(Reg))
@@ -367,8 +378,8 @@ bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
return false;
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned MOReg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ Register MOReg = MO.getReg();
+ if (Register::isVirtualRegister(MOReg))
continue;
if (PhysRefs.count(MOReg))
return false;
@@ -424,8 +435,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// If CSReg is used at all uses of Reg, CSE should not increase register
// pressure of CSReg.
bool MayIncreasePressure = true;
- if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
- TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(CSReg) && Register::isVirtualRegister(Reg)) {
MayIncreasePressure = false;
SmallPtrSet<MachineInstr*, 8> CSUses;
for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
@@ -453,8 +463,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
// of the redundant computation are copies, do not cse.
bool HasVRegUse = false;
for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isUse() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && MO.isUse() && Register::isVirtualRegister(MO.getReg())) {
HasVRegUse = true;
break;
}
@@ -586,8 +595,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned OldReg = MO.getReg();
- unsigned NewReg = CSMI->getOperand(i).getReg();
+ Register OldReg = MO.getReg();
+ Register NewReg = CSMI->getOperand(i).getReg();
// Go through implicit defs of CSMI and MI, if a def is not dead at MI,
// we should make sure it is not dead at CSMI.
@@ -604,8 +613,8 @@ bool MachineCSE::ProcessBlockCSE(MachineBasicBlock *MBB) {
continue;
}
- assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
- TargetRegisterInfo::isVirtualRegister(NewReg) &&
+ assert(Register::isVirtualRegister(OldReg) &&
+ Register::isVirtualRegister(NewReg) &&
"Do not CSE physical register defs!");
if (!isProfitableToCSE(NewReg, OldReg, CSMI->getParent(), MI)) {
@@ -769,11 +778,11 @@ bool MachineCSE::isPRECandidate(MachineInstr *MI) {
return false;
for (auto def : MI->defs())
- if (!TRI->isVirtualRegister(def.getReg()))
+ if (!Register::isVirtualRegister(def.getReg()))
return false;
for (auto use : MI->uses())
- if (use.isReg() && !TRI->isVirtualRegister(use.getReg()))
+ if (use.isReg() && !Register::isVirtualRegister(use.getReg()))
return false;
return true;
@@ -802,6 +811,9 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
if (!CMBB->isLegalToHoistInto())
continue;
+ if (!isProfitableToHoistInto(CMBB, MBB, MBB1))
+ continue;
+
// Two instrs are partial redundant if their basic blocks are reachable
// from one to another but one doesn't dominate another.
if (CMBB != MBB1) {
@@ -812,8 +824,8 @@ bool MachineCSE::ProcessBlockPRE(MachineDominatorTree *DT,
assert(MI->getOperand(0).isDef() &&
"First operand of instr with one explicit def must be this def");
- unsigned VReg = MI->getOperand(0).getReg();
- unsigned NewReg = MRI->cloneVirtualRegister(VReg);
+ Register VReg = MI->getOperand(0).getReg();
+ Register NewReg = MRI->cloneVirtualRegister(VReg);
if (!isProfitableToCSE(NewReg, VReg, CMBB, MI))
continue;
MachineInstr &NewMI =
@@ -854,6 +866,18 @@ bool MachineCSE::PerformSimplePRE(MachineDominatorTree *DT) {
return Changed;
}
+bool MachineCSE::isProfitableToHoistInto(MachineBasicBlock *CandidateBB,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *MBB1) {
+ if (CandidateBB->getParent()->getFunction().hasMinSize())
+ return true;
+ assert(DT->dominates(CandidateBB, MBB) && "CandidateBB should dominate MBB");
+ assert(DT->dominates(CandidateBB, MBB1) &&
+ "CandidateBB should dominate MBB1");
+ return MBFI->getBlockFreq(CandidateBB) <=
+ MBFI->getBlockFreq(MBB) + MBFI->getBlockFreq(MBB1);
+}
+
bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction()))
return false;
@@ -863,6 +887,7 @@ bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
DT = &getAnalysis<MachineDominatorTree>();
+ MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
LookAheadLimit = TII->getMachineCSELookAheadLimit();
bool ChangedPRE, ChangedCSE;
ChangedPRE = PerformSimplePRE(DT);
diff --git a/lib/CodeGen/MachineCombiner.cpp b/lib/CodeGen/MachineCombiner.cpp
index 0584ec0bd2b3..e9f462fd1b37 100644
--- a/lib/CodeGen/MachineCombiner.cpp
+++ b/lib/CodeGen/MachineCombiner.cpp
@@ -137,7 +137,7 @@ void MachineCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
MachineInstr *MachineCombiner::getOperandDef(const MachineOperand &MO) {
MachineInstr *DefInstr = nullptr;
// We need a virtual register definition.
- if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
DefInstr = MRI->getUniqueVRegDef(MO.getReg());
// PHI's have no depth etc.
if (DefInstr && DefInstr->isPHI())
@@ -168,7 +168,7 @@ MachineCombiner::getDepth(SmallVectorImpl<MachineInstr *> &InsInstrs,
unsigned IDepth = 0;
for (const MachineOperand &MO : InstrPtr->operands()) {
// Check for virtual register operand.
- if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
continue;
if (!MO.isUse())
continue;
@@ -223,7 +223,7 @@ unsigned MachineCombiner::getLatency(MachineInstr *Root, MachineInstr *NewRoot,
for (const MachineOperand &MO : NewRoot->operands()) {
// Check for virtual register operand.
- if (!(MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())))
+ if (!(MO.isReg() && Register::isVirtualRegister(MO.getReg())))
continue;
if (!MO.isDef())
continue;
diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp
index 9fc12ac89e12..ebe76e31dca9 100644
--- a/lib/CodeGen/MachineCopyPropagation.cpp
+++ b/lib/CodeGen/MachineCopyPropagation.cpp
@@ -119,8 +119,8 @@ public:
void trackCopy(MachineInstr *MI, const TargetRegisterInfo &TRI) {
assert(MI->isCopy() && "Tracking non-copy?");
- unsigned Def = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
+ Register Def = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
// Remember Def is defined by the copy.
for (MCRegUnitIterator RUI(Def, &TRI); RUI.isValid(); ++RUI)
@@ -163,8 +163,8 @@ public:
// Check that the available copy isn't clobbered by any regmasks between
// itself and the destination.
- unsigned AvailSrc = AvailCopy->getOperand(1).getReg();
- unsigned AvailDef = AvailCopy->getOperand(0).getReg();
+ Register AvailSrc = AvailCopy->getOperand(1).getReg();
+ Register AvailDef = AvailCopy->getOperand(0).getReg();
for (const MachineInstr &MI :
make_range(AvailCopy->getIterator(), DestCopy.getIterator()))
for (const MachineOperand &MO : MI.operands())
@@ -205,8 +205,11 @@ public:
}
private:
+ typedef enum { DebugUse = false, RegularUse = true } DebugType;
+
void ClobberRegister(unsigned Reg);
- void ReadRegister(unsigned Reg);
+ void ReadRegister(unsigned Reg, MachineInstr &Reader,
+ DebugType DT);
void CopyPropagateBlock(MachineBasicBlock &MBB);
bool eraseIfRedundant(MachineInstr &Copy, unsigned Src, unsigned Def);
void forwardUses(MachineInstr &MI);
@@ -217,6 +220,9 @@ private:
/// Candidates for deletion.
SmallSetVector<MachineInstr *, 8> MaybeDeadCopies;
+ /// Multimap tracking debug users in current BB
+ DenseMap<MachineInstr*, SmallVector<MachineInstr*, 2>> CopyDbgUsers;
+
CopyTracker Tracker;
bool Changed;
@@ -231,13 +237,19 @@ char &llvm::MachineCopyPropagationID = MachineCopyPropagation::ID;
INITIALIZE_PASS(MachineCopyPropagation, DEBUG_TYPE,
"Machine Copy Propagation Pass", false, false)
-void MachineCopyPropagation::ReadRegister(unsigned Reg) {
+void MachineCopyPropagation::ReadRegister(unsigned Reg, MachineInstr &Reader,
+ DebugType DT) {
// If 'Reg' is defined by a copy, the copy is no longer a candidate
- // for elimination.
+ // for elimination. If a copy is "read" by a debug user, record the user
+ // for propagation.
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI) {
if (MachineInstr *Copy = Tracker.findCopyForUnit(*RUI, *TRI)) {
- LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
- MaybeDeadCopies.remove(Copy);
+ if (DT == RegularUse) {
+ LLVM_DEBUG(dbgs() << "MCP: Copy is used - not dead: "; Copy->dump());
+ MaybeDeadCopies.remove(Copy);
+ } else {
+ CopyDbgUsers[Copy].push_back(&Reader);
+ }
}
}
}
@@ -250,8 +262,8 @@ void MachineCopyPropagation::ReadRegister(unsigned Reg) {
/// isNopCopy("ecx = COPY eax", AH, CL) == false
static bool isNopCopy(const MachineInstr &PreviousCopy, unsigned Src,
unsigned Def, const TargetRegisterInfo *TRI) {
- unsigned PreviousSrc = PreviousCopy.getOperand(1).getReg();
- unsigned PreviousDef = PreviousCopy.getOperand(0).getReg();
+ Register PreviousSrc = PreviousCopy.getOperand(1).getReg();
+ Register PreviousDef = PreviousCopy.getOperand(0).getReg();
if (Src == PreviousSrc) {
assert(Def == PreviousDef);
return true;
@@ -288,7 +300,7 @@ bool MachineCopyPropagation::eraseIfRedundant(MachineInstr &Copy, unsigned Src,
// Copy was redundantly redefining either Src or Def. Remove earlier kill
// flags between Copy and PrevCopy because the value will be reused now.
assert(Copy.isCopy());
- unsigned CopyDef = Copy.getOperand(0).getReg();
+ Register CopyDef = Copy.getOperand(0).getReg();
assert(CopyDef == Src || CopyDef == Def);
for (MachineInstr &MI :
make_range(PrevCopy->getIterator(), Copy.getIterator()))
@@ -307,7 +319,7 @@ bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy,
const MachineInstr &UseI,
unsigned UseIdx) {
- unsigned CopySrcReg = Copy.getOperand(1).getReg();
+ Register CopySrcReg = Copy.getOperand(1).getReg();
// If the new register meets the opcode register constraints, then allow
// forwarding.
@@ -398,9 +410,9 @@ void MachineCopyPropagation::forwardUses(MachineInstr &MI) {
if (!Copy)
continue;
- unsigned CopyDstReg = Copy->getOperand(0).getReg();
+ Register CopyDstReg = Copy->getOperand(0).getReg();
const MachineOperand &CopySrc = Copy->getOperand(1);
- unsigned CopySrcReg = CopySrc.getReg();
+ Register CopySrcReg = CopySrc.getReg();
// FIXME: Don't handle partial uses of wider COPYs yet.
if (MOUse.getReg() != CopyDstReg) {
@@ -456,11 +468,11 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// Analyze copies (which don't overlap themselves).
if (MI->isCopy() && !TRI->regsOverlap(MI->getOperand(0).getReg(),
MI->getOperand(1).getReg())) {
- unsigned Def = MI->getOperand(0).getReg();
- unsigned Src = MI->getOperand(1).getReg();
+ Register Def = MI->getOperand(0).getReg();
+ Register Src = MI->getOperand(1).getReg();
- assert(!TargetRegisterInfo::isVirtualRegister(Def) &&
- !TargetRegisterInfo::isVirtualRegister(Src) &&
+ assert(!Register::isVirtualRegister(Def) &&
+ !Register::isVirtualRegister(Src) &&
"MachineCopyPropagation should be run after register allocation!");
// The two copies cancel out and the source of the first copy
@@ -488,14 +500,14 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// If Src is defined by a previous copy, the previous copy cannot be
// eliminated.
- ReadRegister(Src);
+ ReadRegister(Src, *MI, RegularUse);
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.readsReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- ReadRegister(Reg);
+ ReadRegister(Reg, *MI, RegularUse);
}
LLVM_DEBUG(dbgs() << "MCP: Copy is a deletion candidate: "; MI->dump());
@@ -515,7 +527,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
for (const MachineOperand &MO : MI->implicit_operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
Tracker.clobberRegister(Reg, *TRI);
@@ -529,12 +541,12 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
// Clobber any earlyclobber regs first.
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isEarlyClobber()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// If we have a tied earlyclobber, that means it is also read by this
// instruction, so we need to make sure we don't remove it as dead
// later.
if (MO.isTied())
- ReadRegister(Reg);
+ ReadRegister(Reg, *MI, RegularUse);
Tracker.clobberRegister(Reg, *TRI);
}
@@ -548,18 +560,18 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
RegMask = &MO;
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- assert(!TargetRegisterInfo::isVirtualRegister(Reg) &&
+ assert(!Register::isVirtualRegister(Reg) &&
"MachineCopyPropagation should be run after register allocation!");
if (MO.isDef() && !MO.isEarlyClobber()) {
Defs.push_back(Reg);
continue;
- } else if (!MO.isDebug() && MO.readsReg())
- ReadRegister(Reg);
+ } else if (MO.readsReg())
+ ReadRegister(Reg, *MI, MO.isDebug() ? DebugUse : RegularUse);
}
// The instruction has a register mask operand which means that it clobbers
@@ -571,7 +583,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
MaybeDeadCopies.begin();
DI != MaybeDeadCopies.end();) {
MachineInstr *MaybeDead = *DI;
- unsigned Reg = MaybeDead->getOperand(0).getReg();
+ Register Reg = MaybeDead->getOperand(0).getReg();
assert(!MRI->isReserved(Reg));
if (!RegMask->clobbersPhysReg(Reg)) {
@@ -609,9 +621,10 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
MaybeDead->dump());
assert(!MRI->isReserved(MaybeDead->getOperand(0).getReg()));
- // Update matching debug values.
+ // Update matching debug values, if any.
assert(MaybeDead->isCopy());
- MaybeDead->changeDebugValuesDefReg(MaybeDead->getOperand(1).getReg());
+ unsigned SrcReg = MaybeDead->getOperand(1).getReg();
+ MRI->updateDbgUsersToReg(SrcReg, CopyDbgUsers[MaybeDead]);
MaybeDead->eraseFromParent();
Changed = true;
@@ -620,6 +633,7 @@ void MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) {
}
MaybeDeadCopies.clear();
+ CopyDbgUsers.clear();
Tracker.clear();
}
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 1dfba8638c22..706c706d7527 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -18,12 +18,15 @@
using namespace llvm;
+namespace llvm {
// Always verify dominfo if expensive checking is enabled.
#ifdef EXPENSIVE_CHECKS
-static bool VerifyMachineDomInfo = true;
+bool VerifyMachineDomInfo = true;
#else
-static bool VerifyMachineDomInfo = false;
+bool VerifyMachineDomInfo = false;
#endif
+} // namespace llvm
+
static cl::opt<bool, true> VerifyMachineDomInfoX(
"verify-machine-dom-info", cl::location(VerifyMachineDomInfo), cl::Hidden,
cl::desc("Verify machine dominator info (time consuming)"));
@@ -64,21 +67,11 @@ void MachineDominatorTree::releaseMemory() {
}
void MachineDominatorTree::verifyAnalysis() const {
- if (DT && VerifyMachineDomInfo) {
- MachineFunction &F = *getRoot()->getParent();
-
- DomTreeBase<MachineBasicBlock> OtherDT;
- OtherDT.recalculate(F);
- if (getRootNode()->getBlock() != OtherDT.getRootNode()->getBlock() ||
- DT->compare(OtherDT)) {
- errs() << "MachineDominatorTree for function " << F.getName()
- << " is not up to date!\nComputed:\n";
- DT->print(errs());
- errs() << "\nActual:\n";
- OtherDT.print(errs());
+ if (DT && VerifyMachineDomInfo)
+ if (!DT->verify(DomTreeT::VerificationLevel::Basic)) {
+ errs() << "MachineDominatorTree verification failed\n";
abort();
}
- }
}
void MachineDominatorTree::print(raw_ostream &OS, const Module*) const {
diff --git a/lib/CodeGen/MachineFrameInfo.cpp b/lib/CodeGen/MachineFrameInfo.cpp
index bae3a4333bda..604f5145b1a0 100644
--- a/lib/CodeGen/MachineFrameInfo.cpp
+++ b/lib/CodeGen/MachineFrameInfo.cpp
@@ -28,25 +28,26 @@
using namespace llvm;
-void MachineFrameInfo::ensureMaxAlignment(unsigned Align) {
+void MachineFrameInfo::ensureMaxAlignment(Align Alignment) {
if (!StackRealignable)
- assert(Align <= StackAlignment &&
- "For targets without stack realignment, Align is out of limit!");
- if (MaxAlignment < Align) MaxAlignment = Align;
+ assert(Alignment <= StackAlignment &&
+ "For targets without stack realignment, Alignment is out of limit!");
+ if (MaxAlignment < Alignment)
+ MaxAlignment = Alignment;
}
/// Clamp the alignment if requested and emit a warning.
-static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align,
- unsigned StackAlign) {
- if (!ShouldClamp || Align <= StackAlign)
- return Align;
- LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Align
- << " exceeds the stack alignment " << StackAlign
+static inline Align clampStackAlignment(bool ShouldClamp, Align Alignment,
+ Align StackAlignment) {
+ if (!ShouldClamp || Alignment <= StackAlignment)
+ return Alignment;
+ LLVM_DEBUG(dbgs() << "Warning: requested alignment " << Alignment.value()
+ << " exceeds the stack alignment " << StackAlignment.value()
<< " when stack realignment is off" << '\n');
- return StackAlign;
+ return StackAlignment;
}
-int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
+int MachineFrameInfo::CreateStackObject(uint64_t Size, Align Alignment,
bool IsSpillSlot,
const AllocaInst *Alloca,
uint8_t StackID) {
@@ -61,8 +62,7 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment,
return Index;
}
-int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
- unsigned Alignment) {
+int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, Align Alignment) {
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
CreateStackObject(Size, Alignment, true);
int Index = (int)Objects.size() - NumFixedObjects - 1;
@@ -70,7 +70,7 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size,
return Index;
}
-int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment,
+int MachineFrameInfo::CreateVariableSizedObject(Align Alignment,
const AllocaInst *Alloca) {
HasVarSizedObjects = true;
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
@@ -88,7 +88,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
// object is 16-byte aligned. Note that unlike the non-fixed case, if the
// stack needs realignment, we can't assume that the stack will in fact be
// aligned.
- unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align Alignment =
+ commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset);
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
StackObject(Size, Alignment, SPOffset, IsImmutable,
@@ -100,7 +101,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
int MachineFrameInfo::CreateFixedSpillStackObject(uint64_t Size,
int64_t SPOffset,
bool IsImmutable) {
- unsigned Alignment = MinAlign(SPOffset, ForcedRealign ? 1 : StackAlignment);
+ Align Alignment =
+ commonAlignment(ForcedRealign ? Align::None() : StackAlignment, SPOffset);
Alignment = clampStackAlignment(!StackRealignable, Alignment, StackAlignment);
Objects.insert(Objects.begin(),
StackObject(Size, Alignment, SPOffset, IsImmutable,
@@ -232,7 +234,7 @@ void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
OS << "variable sized";
else
OS << "size=" << SO.Size;
- OS << ", align=" << SO.Alignment;
+ OS << ", align=" << SO.Alignment.value();
if (i < NumFixedObjects)
OS << ", fixed";
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 4df5ce2dcedc..7d2ee230ca9f 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -78,10 +78,11 @@ using namespace llvm;
#define DEBUG_TYPE "codegen"
-static cl::opt<unsigned>
-AlignAllFunctions("align-all-functions",
- cl::desc("Force the alignment of all functions."),
- cl::init(0), cl::Hidden);
+static cl::opt<unsigned> AlignAllFunctions(
+ "align-all-functions",
+ cl::desc("Force the alignment of all functions in log2 format (e.g. 4 "
+ "means align on 16B boundaries)."),
+ cl::init(0), cl::Hidden);
static const char *getPropertyName(MachineFunctionProperties::Property Prop) {
using P = MachineFunctionProperties::Property;
@@ -181,7 +182,7 @@ void MachineFunction::init() {
STI->getTargetLowering()->getPrefFunctionAlignment());
if (AlignAllFunctions)
- Alignment = AlignAllFunctions;
+ Alignment = Align(1ULL << AlignAllFunctions);
JumpTableInfo = nullptr;
@@ -200,7 +201,7 @@ void MachineFunction::init() {
"Target-incompatible DataLayout attached\n");
PSVManager =
- llvm::make_unique<PseudoSourceValueManager>(*(getSubtarget().
+ std::make_unique<PseudoSourceValueManager>(*(getSubtarget().
getInstrInfo()));
}
@@ -823,30 +824,47 @@ try_next:;
return FilterID;
}
-void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I, MDNode *MD) {
+void MachineFunction::addCodeViewHeapAllocSite(MachineInstr *I,
+ const MDNode *MD) {
MCSymbol *BeginLabel = Ctx.createTempSymbol("heapallocsite", true);
MCSymbol *EndLabel = Ctx.createTempSymbol("heapallocsite", true);
I->setPreInstrSymbol(*this, BeginLabel);
I->setPostInstrSymbol(*this, EndLabel);
- DIType *DI = dyn_cast<DIType>(MD);
+ const DIType *DI = dyn_cast<DIType>(MD);
CodeViewHeapAllocSites.push_back(std::make_tuple(BeginLabel, EndLabel, DI));
}
-void MachineFunction::updateCallSiteInfo(const MachineInstr *Old,
- const MachineInstr *New) {
- if (!Target.Options.EnableDebugEntryValues || Old == New)
- return;
+void MachineFunction::moveCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New) {
+ assert(New->isCall() && "Call site info refers only to call instructions!");
- assert(Old->isCall() && (!New || New->isCall()) &&
- "Call site info referes only to call instructions!");
- CallSiteInfoMap::iterator CSIt = CallSitesInfo.find(Old);
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old);
if (CSIt == CallSitesInfo.end())
return;
+
CallSiteInfo CSInfo = std::move(CSIt->second);
CallSitesInfo.erase(CSIt);
- if (New)
- CallSitesInfo[New] = CSInfo;
+ CallSitesInfo[New] = CSInfo;
+}
+
+void MachineFunction::eraseCallSiteInfo(const MachineInstr *MI) {
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(MI);
+ if (CSIt == CallSitesInfo.end())
+ return;
+ CallSitesInfo.erase(CSIt);
+}
+
+void MachineFunction::copyCallSiteInfo(const MachineInstr *Old,
+ const MachineInstr *New) {
+ assert(New->isCall() && "Call site info refers only to call instructions!");
+
+ CallSiteInfoMap::iterator CSIt = getCallSiteInfo(Old);
+ if (CSIt == CallSitesInfo.end())
+ return;
+
+ CallSiteInfo CSInfo = CSIt->second;
+ CallSitesInfo[New] = CSInfo;
}
/// \}
@@ -881,13 +899,13 @@ unsigned MachineJumpTableInfo::getEntryAlignment(const DataLayout &TD) const {
// alignment.
switch (getEntryKind()) {
case MachineJumpTableInfo::EK_BlockAddress:
- return TD.getPointerABIAlignment(0);
+ return TD.getPointerABIAlignment(0).value();
case MachineJumpTableInfo::EK_GPRel64BlockAddress:
- return TD.getABIIntegerTypeAlignment(64);
+ return TD.getABIIntegerTypeAlignment(64).value();
case MachineJumpTableInfo::EK_GPRel32BlockAddress:
case MachineJumpTableInfo::EK_LabelDifference32:
case MachineJumpTableInfo::EK_Custom32:
- return TD.getABIIntegerTypeAlignment(32);
+ return TD.getABIIntegerTypeAlignment(32).value();
case MachineJumpTableInfo::EK_Inline:
return 1;
}
diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp
index 0da4cf3fc90c..03149aa7db4a 100644
--- a/lib/CodeGen/MachineFunctionPass.cpp
+++ b/lib/CodeGen/MachineFunctionPass.cpp
@@ -41,7 +41,7 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
if (F.hasAvailableExternallyLinkage())
return false;
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(F);
MachineFunctionProperties &MFProps = MF.getProperties();
@@ -101,8 +101,8 @@ bool MachineFunctionPass::runOnFunction(Function &F) {
}
void MachineFunctionPass::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequired<MachineModuleInfo>();
- AU.addPreserved<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
// MachineFunctionPass preserves all LLVM IR passes, but there's no
// high-level way to express this. Instead, just list a bunch of
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index e5c398a2d10c..fec20b2b1a05 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -636,8 +636,8 @@ bool MachineInstr::isIdenticalTo(const MachineInstr &Other,
if (Check == IgnoreDefs)
continue;
else if (Check == IgnoreVRegDefs) {
- if (!TargetRegisterInfo::isVirtualRegister(MO.getReg()) ||
- !TargetRegisterInfo::isVirtualRegister(OMO.getReg()))
+ if (!Register::isVirtualRegister(MO.getReg()) ||
+ !Register::isVirtualRegister(OMO.getReg()))
if (!MO.isIdenticalTo(OMO))
return false;
} else {
@@ -692,8 +692,8 @@ void MachineInstr::eraseFromParentAndMarkDBGValuesForRemoval() {
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Reg.isVirtual())
continue;
MRI.markUsesInDebugValueAsUndef(Reg);
}
@@ -832,6 +832,10 @@ const DIExpression *MachineInstr::getDebugExpression() const {
return cast<DIExpression>(getOperand(3).getMetadata());
}
+bool MachineInstr::isDebugEntryValue() const {
+ return isDebugValue() && getDebugExpression()->isEntryValue();
+}
+
const TargetRegisterClass*
MachineInstr::getRegClassConstraint(unsigned OpIdx,
const TargetInstrInfo *TII,
@@ -873,7 +877,7 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx,
}
const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
- unsigned Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII,
+ Register Reg, const TargetRegisterClass *CurRC, const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI, bool ExploreBundle) const {
// Check every operands inside the bundle if we have
// been asked to.
@@ -890,7 +894,7 @@ const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVReg(
}
const TargetRegisterClass *MachineInstr::getRegClassConstraintEffectForVRegImpl(
- unsigned OpIdx, unsigned Reg, const TargetRegisterClass *CurRC,
+ unsigned OpIdx, Register Reg, const TargetRegisterClass *CurRC,
const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) const {
assert(CurRC && "Invalid initial register class");
// Check if Reg is constrained by some of its use/def from MI.
@@ -933,7 +937,7 @@ unsigned MachineInstr::getBundleSize() const {
/// Returns true if the MachineInstr has an implicit-use operand of exactly
/// the given register (not considering sub/super-registers).
-bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const {
+bool MachineInstr::hasRegisterImplicitUseOperand(Register Reg) const {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
if (MO.isReg() && MO.isUse() && MO.isImplicit() && MO.getReg() == Reg)
@@ -946,12 +950,12 @@ bool MachineInstr::hasRegisterImplicitUseOperand(unsigned Reg) const {
/// the specific register or -1 if it is not found. It further tightens
/// the search criteria to a use that kills the register if isKill is true.
int MachineInstr::findRegisterUseOperandIdx(
- unsigned Reg, bool isKill, const TargetRegisterInfo *TRI) const {
+ Register Reg, bool isKill, const TargetRegisterInfo *TRI) const {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MOReg == Reg || (TRI && Reg && MOReg && TRI->regsOverlap(MOReg, Reg)))
@@ -965,7 +969,7 @@ int MachineInstr::findRegisterUseOperandIdx(
/// indicating if this instruction reads or writes Reg. This also considers
/// partial defines.
std::pair<bool,bool>
-MachineInstr::readsWritesVirtualRegister(unsigned Reg,
+MachineInstr::readsWritesVirtualRegister(Register Reg,
SmallVectorImpl<unsigned> *Ops) const {
bool PartDef = false; // Partial redefine.
bool FullDef = false; // Full define.
@@ -994,9 +998,9 @@ MachineInstr::readsWritesVirtualRegister(unsigned Reg,
/// that are not dead are skipped. If TargetRegisterInfo is non-null, then it
/// also checks if there is a def of a super-register.
int
-MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
+MachineInstr::findRegisterDefOperandIdx(Register Reg, bool isDead, bool Overlap,
const TargetRegisterInfo *TRI) const {
- bool isPhys = TargetRegisterInfo::isPhysicalRegister(Reg);
+ bool isPhys = Register::isPhysicalRegister(Reg);
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
// Accept regmask operands when Overlap is set.
@@ -1005,10 +1009,9 @@ MachineInstr::findRegisterDefOperandIdx(unsigned Reg, bool isDead, bool Overlap,
return i;
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
bool Found = (MOReg == Reg);
- if (!Found && TRI && isPhys &&
- TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ if (!Found && TRI && isPhys && Register::isPhysicalRegister(MOReg)) {
if (Overlap)
Found = TRI->regsOverlap(MOReg, Reg);
else
@@ -1142,10 +1145,10 @@ void MachineInstr::clearKillInfo() {
}
}
-void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg,
+void MachineInstr::substituteRegister(Register FromReg, Register ToReg,
unsigned SubIdx,
const TargetRegisterInfo &RegInfo) {
- if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (Register::isPhysicalRegister(ToReg)) {
if (SubIdx)
ToReg = RegInfo.getSubReg(ToReg, SubIdx);
for (MachineOperand &MO : operands()) {
@@ -1165,7 +1168,7 @@ void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg,
/// isSafeToMove - Return true if it is safe to move this instruction. If
/// SawStore is set to true, it means that there is a store (or call) between
/// the instruction's location and its intended destination.
-bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
+bool MachineInstr::isSafeToMove(AAResults *AA, bool &SawStore) const {
// Ignore stuff that we obviously can't move.
//
// Treat volatile loads as stores. This is not strictly necessary for
@@ -1194,7 +1197,7 @@ bool MachineInstr::isSafeToMove(AliasAnalysis *AA, bool &SawStore) const {
return true;
}
-bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other,
+bool MachineInstr::mayAlias(AAResults *AA, const MachineInstr &Other,
bool UseTBAA) const {
const MachineFunction *MF = getMF();
const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
@@ -1206,7 +1209,7 @@ bool MachineInstr::mayAlias(AliasAnalysis *AA, const MachineInstr &Other,
return false;
// Let the target decide if memory accesses cannot possibly overlap.
- if (TII->areMemAccessesTriviallyDisjoint(*this, Other, AA))
+ if (TII->areMemAccessesTriviallyDisjoint(*this, Other))
return false;
// FIXME: Need to handle multiple memory operands to support all targets.
@@ -1312,7 +1315,7 @@ bool MachineInstr::hasOrderedMemoryRef() const {
/// isDereferenceableInvariantLoad - Return true if this instruction will never
/// trap and is loading from a location whose value is invariant across a run of
/// this function.
-bool MachineInstr::isDereferenceableInvariantLoad(AliasAnalysis *AA) const {
+bool MachineInstr::isDereferenceableInvariantLoad(AAResults *AA) const {
// If the instruction doesn't load at all, it isn't an invariant load.
if (!mayLoad())
return false;
@@ -1364,7 +1367,7 @@ unsigned MachineInstr::isConstantValuePHI() const {
assert(getNumOperands() >= 3 &&
"It's illegal to have a PHI without source operands");
- unsigned Reg = getOperand(1).getReg();
+ Register Reg = getOperand(1).getReg();
for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
if (getOperand(i).getReg() != Reg)
return 0;
@@ -1726,7 +1729,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
MFI = &MF->getFrameInfo();
Context = &MF->getFunction().getContext();
} else {
- CtxPtr = llvm::make_unique<LLVMContext>();
+ CtxPtr = std::make_unique<LLVMContext>();
Context = CtxPtr.get();
}
@@ -1780,10 +1783,10 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << '\n';
}
-bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
+bool MachineInstr::addRegisterKilled(Register IncomingReg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
- bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg);
+ bool isPhysReg = Register::isPhysicalRegister(IncomingReg);
bool hasAliases = isPhysReg &&
MCRegAliasIterator(IncomingReg, RegInfo, false).isValid();
bool Found = false;
@@ -1799,7 +1802,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
if (MO.isDebug())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
@@ -1814,8 +1817,7 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
MO.setIsKill();
Found = true;
}
- } else if (hasAliases && MO.isKill() &&
- TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ } else if (hasAliases && MO.isKill() && Register::isPhysicalRegister(Reg)) {
// A super-register kill already exists.
if (RegInfo->isSuperRegister(IncomingReg, Reg))
return true;
@@ -1847,23 +1849,23 @@ bool MachineInstr::addRegisterKilled(unsigned IncomingReg,
return Found;
}
-void MachineInstr::clearRegisterKills(unsigned Reg,
+void MachineInstr::clearRegisterKills(Register Reg,
const TargetRegisterInfo *RegInfo) {
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!Register::isPhysicalRegister(Reg))
RegInfo = nullptr;
for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isUse() || !MO.isKill())
continue;
- unsigned OpReg = MO.getReg();
+ Register OpReg = MO.getReg();
if ((RegInfo && RegInfo->regsOverlap(Reg, OpReg)) || Reg == OpReg)
MO.setIsKill(false);
}
}
-bool MachineInstr::addRegisterDead(unsigned Reg,
+bool MachineInstr::addRegisterDead(Register Reg,
const TargetRegisterInfo *RegInfo,
bool AddIfNotFound) {
- bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg);
+ bool isPhysReg = Register::isPhysicalRegister(Reg);
bool hasAliases = isPhysReg &&
MCRegAliasIterator(Reg, RegInfo, false).isValid();
bool Found = false;
@@ -1872,7 +1874,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
MachineOperand &MO = getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
@@ -1880,7 +1882,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
MO.setIsDead();
Found = true;
} else if (hasAliases && MO.isDead() &&
- TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ Register::isPhysicalRegister(MOReg)) {
// There exists a super-register that's marked dead.
if (RegInfo->isSuperRegister(Reg, MOReg))
return true;
@@ -1913,7 +1915,7 @@ bool MachineInstr::addRegisterDead(unsigned Reg,
return true;
}
-void MachineInstr::clearRegisterDeads(unsigned Reg) {
+void MachineInstr::clearRegisterDeads(Register Reg) {
for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg)
continue;
@@ -1921,7 +1923,7 @@ void MachineInstr::clearRegisterDeads(unsigned Reg) {
}
}
-void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
+void MachineInstr::setRegisterDefReadUndef(Register Reg, bool IsUndef) {
for (MachineOperand &MO : operands()) {
if (!MO.isReg() || !MO.isDef() || MO.getReg() != Reg || MO.getSubReg() == 0)
continue;
@@ -1929,9 +1931,9 @@ void MachineInstr::setRegisterDefReadUndef(unsigned Reg, bool IsUndef) {
}
}
-void MachineInstr::addRegisterDefined(unsigned Reg,
+void MachineInstr::addRegisterDefined(Register Reg,
const TargetRegisterInfo *RegInfo) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
MachineOperand *MO = findRegisterDefOperand(Reg, false, false, RegInfo);
if (MO)
return;
@@ -1947,7 +1949,7 @@ void MachineInstr::addRegisterDefined(unsigned Reg,
true /*IsImp*/));
}
-void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
+void MachineInstr::setPhysRegsDeadExcept(ArrayRef<Register> UsedRegs,
const TargetRegisterInfo &TRI) {
bool HasRegMask = false;
for (MachineOperand &MO : operands()) {
@@ -1956,18 +1958,19 @@ void MachineInstr::setPhysRegsDeadExcept(ArrayRef<unsigned> UsedRegs,
continue;
}
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Reg.isPhysical())
+ continue;
// If there are no uses, including partial uses, the def is dead.
if (llvm::none_of(UsedRegs,
- [&](unsigned Use) { return TRI.regsOverlap(Use, Reg); }))
+ [&](MCRegister Use) { return TRI.regsOverlap(Use, Reg); }))
MO.setIsDead();
}
// This is a call with a register mask operand.
// Mask clobbers are always dead, so add defs for the non-dead defines.
if (HasRegMask)
- for (ArrayRef<unsigned>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
+ for (ArrayRef<Register>::iterator I = UsedRegs.begin(), E = UsedRegs.end();
I != E; ++I)
addRegisterDefined(*I, &TRI);
}
@@ -1979,8 +1982,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
HashComponents.reserve(MI->getNumOperands() + 1);
HashComponents.push_back(MI->getOpcode());
for (const MachineOperand &MO : MI->operands()) {
- if (MO.isReg() && MO.isDef() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && MO.isDef() && Register::isVirtualRegister(MO.getReg()))
continue; // Skip virtual register defs.
HashComponents.push_back(hash_value(MO));
@@ -2012,7 +2014,7 @@ void MachineInstr::emitError(StringRef Msg) const {
MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
const MCInstrDesc &MCID, bool IsIndirect,
- unsigned Reg, const MDNode *Variable,
+ Register Reg, const MDNode *Variable,
const MDNode *Expr) {
assert(isa<DILocalVariable>(Variable) && "not a variable");
assert(cast<DIExpression>(Expr)->isValid() && "not an expression");
@@ -2048,7 +2050,7 @@ MachineInstrBuilder llvm::BuildMI(MachineFunction &MF, const DebugLoc &DL,
MachineInstrBuilder llvm::BuildMI(MachineBasicBlock &BB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, const MCInstrDesc &MCID,
- bool IsIndirect, unsigned Reg,
+ bool IsIndirect, Register Reg,
const MDNode *Variable, const MDNode *Expr) {
MachineFunction &MF = *BB.getParent();
MachineInstr *MI = BuildMI(MF, DL, MCID, IsIndirect, Reg, Variable, Expr);
@@ -2118,10 +2120,24 @@ void MachineInstr::collectDebugValues(
}
}
-void MachineInstr::changeDebugValuesDefReg(unsigned Reg) {
+void MachineInstr::changeDebugValuesDefReg(Register Reg) {
// Collect matching debug values.
SmallVector<MachineInstr *, 2> DbgValues;
- collectDebugValues(DbgValues);
+
+ if (!getOperand(0).isReg())
+ return;
+
+ unsigned DefReg = getOperand(0).getReg();
+ auto *MRI = getRegInfo();
+ for (auto &MO : MRI->use_operands(DefReg)) {
+ auto *DI = MO.getParent();
+ if (!DI->isDebugValue())
+ continue;
+ if (DI->getOperand(0).isReg() &&
+ DI->getOperand(0).getReg() == DefReg){
+ DbgValues.push_back(DI);
+ }
+ }
// Propagate Reg to debug value instructions.
for (auto *DBI : DbgValues)
diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp
index 32e266e9401e..feb849ced353 100644
--- a/lib/CodeGen/MachineInstrBundle.cpp
+++ b/lib/CodeGen/MachineInstrBundle.cpp
@@ -154,10 +154,10 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
continue;
}
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+
if (LocalDefSet.count(Reg)) {
MO.setIsInternalRead();
if (MO.isKill())
@@ -177,7 +177,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
MachineOperand &MO = *Defs[i];
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
@@ -194,7 +194,7 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB,
DeadDefSet.erase(Reg);
}
- if (!MO.isDead()) {
+ if (!MO.isDead() && Register::isPhysicalRegister(Reg)) {
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) {
unsigned SubReg = *SubRegs;
if (LocalDefSet.insert(SubReg).second)
@@ -316,7 +316,7 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
bool AllDefsDead = true;
PhysRegInfo PRI = {false, false, false, false, false, false, false, false};
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ assert(Register::isPhysicalRegister(Reg) &&
"analyzePhysReg not given a physical register!");
for (; isValid(); ++*this) {
MachineOperand &MO = deref();
@@ -329,8 +329,8 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg,
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
- if (!MOReg || !TargetRegisterInfo::isPhysicalRegister(MOReg))
+ Register MOReg = MO.getReg();
+ if (!MOReg || !Register::isPhysicalRegister(MOReg))
continue;
if (!TRI->regsOverlap(MOReg, Reg))
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 1107e609c258..6a898ff6ef88 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -153,7 +153,6 @@ namespace {
AU.addRequired<MachineDominatorTree>();
AU.addRequired<AAResultsWrapperPass>();
AU.addPreserved<MachineLoopInfo>();
- AU.addPreserved<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -424,10 +423,10 @@ void MachineLICMBase::ProcessMI(MachineInstr *MI,
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ assert(Register::isPhysicalRegister(Reg) &&
"Not expecting virtual register!");
if (!MO.isDef()) {
@@ -526,7 +525,7 @@ void MachineLICMBase::HoistRegionPostRA() {
for (const MachineOperand &MO : TI->operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
@@ -554,7 +553,7 @@ void MachineLICMBase::HoistRegionPostRA() {
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || MO.isDef() || !MO.getReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (PhysRegDefs.test(Reg) ||
PhysRegClobbers.test(Reg)) {
// If it's using a non-loop-invariant register, then it's obviously
@@ -852,8 +851,8 @@ MachineLICMBase::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen,
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || MO.isImplicit())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
// FIXME: It seems bad to use RegSeen only for some of these calculations.
@@ -922,12 +921,12 @@ static bool isInvariantStore(const MachineInstr &MI,
// Check that all register operands are caller-preserved physical registers.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// If operand is a virtual register, check if it comes from a copy of a
// physical register.
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
Reg = TRI->lookThruCopyLike(MO.getReg(), MRI);
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return false;
if (!TRI->isCallerPreservedPhysReg(Reg, *MI.getMF()))
return false;
@@ -955,17 +954,17 @@ static bool isCopyFeedingInvariantStore(const MachineInstr &MI,
const MachineFunction *MF = MI.getMF();
// Check that we are copying a constant physical register.
- unsigned CopySrcReg = MI.getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(CopySrcReg))
+ Register CopySrcReg = MI.getOperand(1).getReg();
+ if (Register::isVirtualRegister(CopySrcReg))
return false;
if (!TRI->isCallerPreservedPhysReg(CopySrcReg, *MF))
return false;
- unsigned CopyDstReg = MI.getOperand(0).getReg();
+ Register CopyDstReg = MI.getOperand(0).getReg();
// Check if any of the uses of the copy are invariant stores.
- assert (TargetRegisterInfo::isVirtualRegister(CopyDstReg) &&
- "copy dst is not a virtual reg");
+ assert(Register::isVirtualRegister(CopyDstReg) &&
+ "copy dst is not a virtual reg");
for (MachineInstr &UseMI : MRI->use_instructions(CopyDstReg)) {
if (UseMI.mayStore() && isInvariantStore(UseMI, TRI, MRI))
@@ -1010,11 +1009,11 @@ bool MachineLICMBase::IsLoopInvariantInst(MachineInstr &I) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
// Don't hoist an instruction that uses or defines a physical register.
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
@@ -1061,8 +1060,8 @@ bool MachineLICMBase::HasLoopPHIUse(const MachineInstr *MI) const {
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
// A PHI may cause a copy to be inserted.
@@ -1104,7 +1103,7 @@ bool MachineLICMBase::HasHighOperandLatency(MachineInstr &MI,
const MachineOperand &MO = UseMI.getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (MOReg != Reg)
continue;
@@ -1132,8 +1131,8 @@ bool MachineLICMBase::IsCheapInstruction(MachineInstr &MI) const {
if (!DefMO.isReg() || !DefMO.isDef())
continue;
--NumDefs;
- unsigned Reg = DefMO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = DefMO.getReg();
+ if (Register::isPhysicalRegister(Reg))
continue;
if (!TII->hasLowDefLatency(SchedModel, MI, i))
@@ -1225,8 +1224,8 @@ bool MachineLICMBase::IsProfitableToHoist(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || MO.isImplicit())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) {
LLVM_DEBUG(dbgs() << "Hoist High Latency: " << MI);
@@ -1304,7 +1303,7 @@ MachineInstr *MachineLICMBase::ExtractHoistableLoad(MachineInstr *MI) {
MachineFunction &MF = *MI->getMF();
const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
// Ok, we're unfolding. Create a temporary register and do the unfold.
- unsigned Reg = MRI->createVirtualRegister(RC);
+ Register Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
bool Success = TII->unfoldMemoryOperand(MF, *MI, Reg,
@@ -1378,20 +1377,20 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
// Physical registers may not differ here.
assert((!MO.isReg() || MO.getReg() == 0 ||
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ !Register::isPhysicalRegister(MO.getReg()) ||
MO.getReg() == Dup->getOperand(i).getReg()) &&
"Instructions with different phys regs are not identical!");
if (MO.isReg() && MO.isDef() &&
- !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ !Register::isPhysicalRegister(MO.getReg()))
Defs.push_back(i);
}
SmallVector<const TargetRegisterClass*, 2> OrigRCs;
for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
unsigned Idx = Defs[i];
- unsigned Reg = MI->getOperand(Idx).getReg();
- unsigned DupReg = Dup->getOperand(Idx).getReg();
+ Register Reg = MI->getOperand(Idx).getReg();
+ Register DupReg = Dup->getOperand(Idx).getReg();
OrigRCs.push_back(MRI->getRegClass(DupReg));
if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
@@ -1403,8 +1402,8 @@ bool MachineLICMBase::EliminateCSE(MachineInstr *MI,
}
for (unsigned Idx : Defs) {
- unsigned Reg = MI->getOperand(Idx).getReg();
- unsigned DupReg = Dup->getOperand(Idx).getReg();
+ Register Reg = MI->getOperand(Idx).getReg();
+ Register DupReg = Dup->getOperand(Idx).getReg();
MRI->replaceRegWith(Reg, DupReg);
MRI->clearKillFlags(DupReg);
}
diff --git a/lib/CodeGen/MachineLoopUtils.cpp b/lib/CodeGen/MachineLoopUtils.cpp
new file mode 100644
index 000000000000..e074b76082f0
--- /dev/null
+++ b/lib/CodeGen/MachineLoopUtils.cpp
@@ -0,0 +1,132 @@
+//=- MachineLoopUtils.cpp - Functions for manipulating loops ----------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+using namespace llvm;
+
+namespace {
+// MI's parent and BB are clones of each other. Find the equivalent copy of MI
+// in BB.
+MachineInstr &findEquivalentInstruction(MachineInstr &MI,
+ MachineBasicBlock *BB) {
+ MachineBasicBlock *PB = MI.getParent();
+ unsigned Offset = std::distance(PB->instr_begin(), MachineBasicBlock::instr_iterator(MI));
+ return *std::next(BB->instr_begin(), Offset);
+}
+} // namespace
+
+MachineBasicBlock *llvm::PeelSingleBlockLoop(LoopPeelDirection Direction,
+ MachineBasicBlock *Loop,
+ MachineRegisterInfo &MRI,
+ const TargetInstrInfo *TII) {
+ MachineFunction &MF = *Loop->getParent();
+ MachineBasicBlock *Preheader = *Loop->pred_begin();
+ if (Preheader == Loop)
+ Preheader = *std::next(Loop->pred_begin());
+ MachineBasicBlock *Exit = *Loop->succ_begin();
+ if (Exit == Loop)
+ Exit = *std::next(Loop->succ_begin());
+
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(Loop->getBasicBlock());
+ if (Direction == LPD_Front)
+ MF.insert(Loop->getIterator(), NewBB);
+ else
+ MF.insert(std::next(Loop->getIterator()), NewBB);
+
+ // FIXME: Add DenseMapInfo trait for Register so we can use it as a key.
+ DenseMap<unsigned, Register> Remaps;
+ auto InsertPt = NewBB->end();
+ for (MachineInstr &MI : *Loop) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&MI);
+ NewBB->insert(InsertPt, NewMI);
+ for (MachineOperand &MO : NewMI->defs()) {
+ Register OrigR = MO.getReg();
+ if (OrigR.isPhysical())
+ continue;
+ Register &R = Remaps[OrigR];
+ R = MRI.createVirtualRegister(MRI.getRegClass(OrigR));
+ MO.setReg(R);
+
+ if (Direction == LPD_Back) {
+ // Replace all uses outside the original loop with the new register.
+ // FIXME: is the use_iterator stable enough to mutate register uses
+ // while iterating?
+ SmallVector<MachineOperand *, 4> Uses;
+ for (auto &Use : MRI.use_operands(OrigR))
+ if (Use.getParent()->getParent() != Loop)
+ Uses.push_back(&Use);
+ for (auto *Use : Uses) {
+ MRI.constrainRegClass(R, MRI.getRegClass(Use->getReg()));
+ Use->setReg(R);
+ }
+ }
+ }
+ }
+
+ for (auto I = NewBB->getFirstNonPHI(); I != NewBB->end(); ++I)
+ for (MachineOperand &MO : I->uses())
+ if (MO.isReg() && Remaps.count(MO.getReg()))
+ MO.setReg(Remaps[MO.getReg()]);
+
+ for (auto I = NewBB->begin(); I->isPHI(); ++I) {
+ MachineInstr &MI = *I;
+ unsigned LoopRegIdx = 3, InitRegIdx = 1;
+ if (MI.getOperand(2).getMBB() != Preheader)
+ std::swap(LoopRegIdx, InitRegIdx);
+ MachineInstr &OrigPhi = findEquivalentInstruction(MI, Loop);
+ assert(OrigPhi.isPHI());
+ if (Direction == LPD_Front) {
+ // When peeling front, we are only left with the initial value from the
+ // preheader.
+ Register R = MI.getOperand(LoopRegIdx).getReg();
+ if (Remaps.count(R))
+ R = Remaps[R];
+ OrigPhi.getOperand(InitRegIdx).setReg(R);
+ MI.RemoveOperand(LoopRegIdx + 1);
+ MI.RemoveOperand(LoopRegIdx + 0);
+ } else {
+ // When peeling back, the initial value is the loop-carried value from
+ // the original loop.
+ Register LoopReg = OrigPhi.getOperand(LoopRegIdx).getReg();
+ MI.getOperand(LoopRegIdx).setReg(LoopReg);
+ MI.RemoveOperand(InitRegIdx + 1);
+ MI.RemoveOperand(InitRegIdx + 0);
+ }
+ }
+
+ DebugLoc DL;
+ if (Direction == LPD_Front) {
+ Preheader->replaceSuccessor(Loop, NewBB);
+ NewBB->addSuccessor(Loop);
+ Loop->replacePhiUsesWith(Preheader, NewBB);
+ if (TII->removeBranch(*Preheader) > 0)
+ TII->insertBranch(*Preheader, NewBB, nullptr, {}, DL);
+ TII->removeBranch(*NewBB);
+ TII->insertBranch(*NewBB, Loop, nullptr, {}, DL);
+ } else {
+ Loop->replaceSuccessor(Exit, NewBB);
+ Exit->replacePhiUsesWith(Loop, NewBB);
+ NewBB->addSuccessor(Exit);
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool CanAnalyzeBr = !TII->analyzeBranch(*Loop, TBB, FBB, Cond);
+ (void)CanAnalyzeBr;
+ assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
+ TII->removeBranch(*Loop);
+ TII->insertBranch(*Loop, TBB == Exit ? NewBB : TBB,
+ FBB == Exit ? NewBB : FBB, Cond, DL);
+ if (TII->removeBranch(*NewBB) > 0)
+ TII->insertBranch(*NewBB, Exit, nullptr, {}, DL);
+ }
+
+ return NewBB;
+}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index aadcd7319799..e0b4e9cac229 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -36,11 +36,6 @@
using namespace llvm;
using namespace llvm::dwarf;
-// Handle the Pass registration stuff necessary to use DataLayout's.
-INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
- "Machine Module Information", false, false)
-char MachineModuleInfo::ID = 0;
-
// Out of line virtual method.
MachineModuleInfoImpl::~MachineModuleInfoImpl() = default;
@@ -121,7 +116,7 @@ ArrayRef<MCSymbol *> MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
BBCallbacks.back().setMap(this);
Entry.Index = BBCallbacks.size() - 1;
Entry.Fn = BB->getParent();
- Entry.Symbols.push_back(Context.createTempSymbol());
+ Entry.Symbols.push_back(Context.createTempSymbol(!BB->hasAddressTaken()));
return Entry.Symbols;
}
@@ -193,27 +188,15 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
Map->UpdateForRAUWBlock(cast<BasicBlock>(getValPtr()), cast<BasicBlock>(V2));
}
-MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
- : ImmutablePass(ID), TM(*TM),
- Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
- TM->getObjFileLowering(), nullptr, false) {
- initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
-}
-
-MachineModuleInfo::~MachineModuleInfo() = default;
-
-bool MachineModuleInfo::doInitialization(Module &M) {
+void MachineModuleInfo::initialize() {
ObjFileMMI = nullptr;
CurCallSite = 0;
UsesMSVCFloatingPoint = UsesMorestackAddr = false;
HasSplitStack = HasNosplitStack = false;
AddrLabelSymbols = nullptr;
- TheModule = &M;
- DbgInfoAvailable = !llvm::empty(M.debug_compile_units());
- return false;
}
-bool MachineModuleInfo::doFinalization(Module &M) {
+void MachineModuleInfo::finalize() {
Personalities.clear();
delete AddrLabelSymbols;
@@ -223,10 +206,30 @@ bool MachineModuleInfo::doFinalization(Module &M) {
delete ObjFileMMI;
ObjFileMMI = nullptr;
+}
- return false;
+MachineModuleInfo::MachineModuleInfo(MachineModuleInfo &&MMI)
+ : TM(std::move(MMI.TM)),
+ Context(MMI.TM.getMCAsmInfo(), MMI.TM.getMCRegisterInfo(),
+ MMI.TM.getObjFileLowering(), nullptr, nullptr, false) {
+ ObjFileMMI = MMI.ObjFileMMI;
+ CurCallSite = MMI.CurCallSite;
+ UsesMSVCFloatingPoint = MMI.UsesMSVCFloatingPoint;
+ UsesMorestackAddr = MMI.UsesMorestackAddr;
+ HasSplitStack = MMI.HasSplitStack;
+ HasNosplitStack = MMI.HasNosplitStack;
+ AddrLabelSymbols = MMI.AddrLabelSymbols;
+ TheModule = MMI.TheModule;
}
+MachineModuleInfo::MachineModuleInfo(const LLVMTargetMachine *TM)
+ : TM(*TM), Context(TM->getMCAsmInfo(), TM->getMCRegisterInfo(),
+ TM->getObjFileLowering(), nullptr, nullptr, false) {
+ initialize();
+}
+
+MachineModuleInfo::~MachineModuleInfo() { finalize(); }
+
//===- Address of Block Management ----------------------------------------===//
ArrayRef<MCSymbol *>
@@ -305,12 +308,13 @@ public:
FreeMachineFunction() : FunctionPass(ID) {}
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo>();
- AU.addPreserved<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
}
bool runOnFunction(Function &F) override {
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineModuleInfo &MMI =
+ getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MMI.deleteMachineFunctionFor(F);
return true;
}
@@ -327,3 +331,36 @@ char FreeMachineFunction::ID;
FunctionPass *llvm::createFreeMachineFunctionPass() {
return new FreeMachineFunction();
}
+
+MachineModuleInfoWrapperPass::MachineModuleInfoWrapperPass(
+ const LLVMTargetMachine *TM)
+ : ImmutablePass(ID), MMI(TM) {
+ initializeMachineModuleInfoWrapperPassPass(*PassRegistry::getPassRegistry());
+}
+
+// Handle the Pass registration stuff necessary to use DataLayout's.
+INITIALIZE_PASS(MachineModuleInfoWrapperPass, "machinemoduleinfo",
+ "Machine Module Information", false, false)
+char MachineModuleInfoWrapperPass::ID = 0;
+
+bool MachineModuleInfoWrapperPass::doInitialization(Module &M) {
+ MMI.initialize();
+ MMI.TheModule = &M;
+ MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
+ return false;
+}
+
+bool MachineModuleInfoWrapperPass::doFinalization(Module &M) {
+ MMI.finalize();
+ return false;
+}
+
+AnalysisKey MachineModuleAnalysis::Key;
+
+MachineModuleInfo MachineModuleAnalysis::run(Module &M,
+ ModuleAnalysisManager &) {
+ MachineModuleInfo MMI(TM);
+ MMI.TheModule = &M;
+ MMI.DbgInfoAvailable = !M.debug_compile_units().empty();
+ return MMI;
+}
diff --git a/lib/CodeGen/MachineOperand.cpp b/lib/CodeGen/MachineOperand.cpp
index 4fa4ea7f6cf5..8b19501ec3cf 100644
--- a/lib/CodeGen/MachineOperand.cpp
+++ b/lib/CodeGen/MachineOperand.cpp
@@ -49,7 +49,7 @@ static MachineFunction *getMFIfAvailable(MachineOperand &MO) {
getMFIfAvailable(const_cast<const MachineOperand &>(MO)));
}
-void MachineOperand::setReg(unsigned Reg) {
+void MachineOperand::setReg(Register Reg) {
if (getReg() == Reg)
return; // No change.
@@ -71,9 +71,9 @@ void MachineOperand::setReg(unsigned Reg) {
SmallContents.RegNo = Reg;
}
-void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
+void MachineOperand::substVirtReg(Register Reg, unsigned SubIdx,
const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Reg.isVirtual());
if (SubIdx && getSubReg())
SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg());
setReg(Reg);
@@ -81,8 +81,8 @@ void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
setSubReg(SubIdx);
}
-void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) {
- assert(TargetRegisterInfo::isPhysicalRegister(Reg));
+void MachineOperand::substPhysReg(MCRegister Reg, const TargetRegisterInfo &TRI) {
+ assert(Reg.isPhysical());
if (getSubReg()) {
Reg = TRI.getSubReg(Reg, getSubReg());
// Note that getSubReg() may return 0 if the sub-register doesn't exist.
@@ -114,7 +114,7 @@ void MachineOperand::setIsDef(bool Val) {
bool MachineOperand::isRenamable() const {
assert(isReg() && "Wrong MachineOperand accessor");
- assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
+ assert(Register::isPhysicalRegister(getReg()) &&
"isRenamable should only be checked on physical registers");
if (!IsRenamable)
return false;
@@ -132,7 +132,7 @@ bool MachineOperand::isRenamable() const {
void MachineOperand::setIsRenamable(bool Val) {
assert(isReg() && "Wrong MachineOperand accessor");
- assert(TargetRegisterInfo::isPhysicalRegister(getReg()) &&
+ assert(Register::isPhysicalRegister(getReg()) &&
"setIsRenamable should only be called on physical registers");
IsRenamable = Val;
}
@@ -169,7 +169,7 @@ void MachineOperand::ChangeToFPImmediate(const ConstantFP *FPImm) {
}
void MachineOperand::ChangeToES(const char *SymName,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into an external symbol");
@@ -182,7 +182,7 @@ void MachineOperand::ChangeToES(const char *SymName,
}
void MachineOperand::ChangeToGA(const GlobalValue *GV, int64_t Offset,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into a global address");
@@ -215,7 +215,7 @@ void MachineOperand::ChangeToFrameIndex(int Idx) {
}
void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((!isReg() || !isTied()) &&
"Cannot change a tied operand into a FrameIndex");
@@ -230,7 +230,7 @@ void MachineOperand::ChangeToTargetIndex(unsigned Idx, int64_t Offset,
/// ChangeToRegister - Replace this operand with a new register operand of
/// the specified value. If an operand is known to be an register already,
/// the setReg method should be used.
-void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
+void MachineOperand::ChangeToRegister(Register Reg, bool isDef, bool isImp,
bool isKill, bool isDead, bool isUndef,
bool isDebug) {
MachineRegisterInfo *RegInfo = nullptr;
@@ -333,6 +333,8 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
return getIntrinsicID() == Other.getIntrinsicID();
case MachineOperand::MO_Predicate:
return getPredicate() == Other.getPredicate();
+ case MachineOperand::MO_ShuffleMask:
+ return getShuffleMask() == Other.getShuffleMask();
}
llvm_unreachable("Invalid machine operand type");
}
@@ -381,6 +383,8 @@ hash_code llvm::hash_value(const MachineOperand &MO) {
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getIntrinsicID());
case MachineOperand::MO_Predicate:
return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getPredicate());
+ case MachineOperand::MO_ShuffleMask:
+ return hash_combine(MO.getType(), MO.getTargetFlags(), MO.getShuffleMask());
}
llvm_unreachable("Invalid machine operand type");
}
@@ -425,12 +429,10 @@ static void printCFIRegister(unsigned DwarfReg, raw_ostream &OS,
return;
}
- int Reg = TRI->getLLVMRegNum(DwarfReg, true);
- if (Reg == -1) {
+ if (Optional<unsigned> Reg = TRI->getLLVMRegNum(DwarfReg, true))
+ OS << printReg(*Reg, TRI);
+ else
OS << "<badreg>";
- return;
- }
- OS << printReg(Reg, TRI);
}
static void printIRBlockReference(raw_ostream &OS, const BasicBlock &BB,
@@ -746,7 +748,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
printTargetFlags(OS, *this);
switch (getType()) {
case MachineOperand::MO_Register: {
- unsigned Reg = getReg();
+ Register Reg = getReg();
if (isImplicit())
OS << (isDef() ? "implicit-def " : "implicit ");
else if (PrintDef && isDef())
@@ -762,13 +764,13 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << "undef ";
if (isEarlyClobber())
OS << "early-clobber ";
- if (TargetRegisterInfo::isPhysicalRegister(getReg()) && isRenamable())
+ if (Register::isPhysicalRegister(getReg()) && isRenamable())
OS << "renamable ";
// isDebug() is exactly true for register operands of a DBG_VALUE. So we
// simply infer it when parsing and do not need to print it.
const MachineRegisterInfo *MRI = nullptr;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (const MachineFunction *MF = getMFIfAvailable(*this)) {
MRI = &MF->getRegInfo();
}
@@ -783,7 +785,7 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
OS << ".subreg" << SubReg;
}
// Print the register class / bank.
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (const MachineFunction *MF = getMFIfAvailable(*this)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (IsStandalone || !PrintDef || MRI.def_empty(Reg)) {
@@ -936,6 +938,20 @@ void MachineOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
<< CmpInst::getPredicateName(Pred) << ')';
break;
}
+ case MachineOperand::MO_ShuffleMask:
+ OS << "shufflemask(";
+ const Constant* C = getShuffleMask();
+ const int NumElts = C->getType()->getVectorNumElements();
+
+ StringRef Separator;
+ for (int I = 0; I != NumElts; ++I) {
+ OS << Separator;
+ C->getAggregateElement(I)->printAsOperand(OS, false, MST);
+ Separator = ", ";
+ }
+
+ OS << ')';
+ break;
}
}
@@ -963,7 +979,8 @@ bool MachinePointerInfo::isDereferenceable(unsigned Size, LLVMContext &C,
return false;
return isDereferenceableAndAlignedPointer(
- BasePtr, 1, APInt(DL.getPointerSizeInBits(), Offset + Size), DL);
+ BasePtr, Align::None(), APInt(DL.getPointerSizeInBits(), Offset + Size),
+ DL);
}
/// getConstantPool - Return a MachinePointerInfo record that refers to the
@@ -1049,17 +1066,6 @@ uint64_t MachineMemOperand::getAlignment() const {
return MinAlign(getBaseAlignment(), getOffset());
}
-void MachineMemOperand::print(raw_ostream &OS) const {
- ModuleSlotTracker DummyMST(nullptr);
- print(OS, DummyMST);
-}
-
-void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const {
- SmallVector<StringRef, 0> SSNs;
- LLVMContext Ctx;
- print(OS, MST, SSNs, Ctx, nullptr, nullptr);
-}
-
void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST,
SmallVectorImpl<StringRef> &SSNs,
const LLVMContext &Context,
diff --git a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
index 27db9106b337..b82403ae1b85 100644
--- a/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
+++ b/lib/CodeGen/MachineOptimizationRemarkEmitter.cpp
@@ -76,7 +76,7 @@ bool MachineOptimizationRemarkEmitterPass::runOnMachineFunction(
else
MBFI = nullptr;
- ORE = llvm::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
+ ORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
return false;
}
diff --git a/lib/CodeGen/MachineOutliner.cpp b/lib/CodeGen/MachineOutliner.cpp
index 80a235aeaa5c..8cd66825a58a 100644
--- a/lib/CodeGen/MachineOutliner.cpp
+++ b/lib/CodeGen/MachineOutliner.cpp
@@ -846,8 +846,8 @@ struct MachineOutliner : public ModulePass {
StringRef getPassName() const override { return "Machine Outliner"; }
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.addRequired<MachineModuleInfo>();
- AU.addPreserved<MachineModuleInfo>();
+ AU.addRequired<MachineModuleInfoWrapperPass>();
+ AU.addPreserved<MachineModuleInfoWrapperPass>();
AU.setPreservesAll();
ModulePass::getAnalysisUsage(AU);
}
@@ -1128,7 +1128,7 @@ MachineOutliner::createOutlinedFunction(Module &M, OutlinedFunction &OF,
IRBuilder<> Builder(EntryBB);
Builder.CreateRetVoid();
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
MachineFunction &MF = MMI.getOrCreateMachineFunction(*F);
MachineBasicBlock &MBB = *MF.CreateMachineBasicBlock();
const TargetSubtargetInfo &STI = MF.getSubtarget();
@@ -1260,7 +1260,7 @@ bool MachineOutliner::outline(Module &M,
true /* isImp = true */));
}
if (MI.isCall())
- MI.getMF()->updateCallSiteInfo(&MI);
+ MI.getMF()->eraseCallSiteInfo(&MI);
};
// Copy over the defs in the outlined range.
// First inst in outlined range <-- Anything that's defined in this
@@ -1303,6 +1303,12 @@ void MachineOutliner::populateMapper(InstructionMapper &Mapper, Module &M,
if (F.empty())
continue;
+ // Disable outlining from noreturn functions right now. Noreturn requires
+ // special handling for the case where what we are outlining could be a
+ // tail call.
+ if (F.hasFnAttribute(Attribute::NoReturn))
+ continue;
+
// There's something in F. Check if it has a MachineFunction associated with
// it.
MachineFunction *MF = MMI.getMachineFunction(F);
@@ -1421,7 +1427,7 @@ bool MachineOutliner::runOnModule(Module &M) {
if (M.empty())
return false;
- MachineModuleInfo &MMI = getAnalysis<MachineModuleInfo>();
+ MachineModuleInfo &MMI = getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
// If the user passed -enable-machine-outliner=always or
// -enable-machine-outliner, the pass will run on all functions in the module.
diff --git a/lib/CodeGen/MachinePipeliner.cpp b/lib/CodeGen/MachinePipeliner.cpp
index 54df522d371a..89c9f6093a97 100644
--- a/lib/CodeGen/MachinePipeliner.cpp
+++ b/lib/CodeGen/MachinePipeliner.cpp
@@ -56,6 +56,7 @@
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachinePipeliner.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/ModuloSchedule.h"
#include "llvm/CodeGen/RegisterPressure.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleDAGMutation.h"
@@ -153,6 +154,17 @@ static cl::opt<bool> SwpShowResMask("pipeliner-show-mask", cl::Hidden,
static cl::opt<bool> SwpDebugResource("pipeliner-dbg-res", cl::Hidden,
cl::init(false));
+static cl::opt<bool> EmitTestAnnotations(
+ "pipeliner-annotate-for-testing", cl::Hidden, cl::init(false),
+ cl::desc("Instead of emitting the pipelined code, annotate instructions "
+ "with the generated schedule for feeding into the "
+ "-modulo-schedule-test pass"));
+
+static cl::opt<bool> ExperimentalCodeGen(
+ "pipeliner-experimental-cg", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Use the experimental peeling code generator for software pipelining"));
+
namespace llvm {
// A command line option to enable the CopyToPhi DAG mutation.
@@ -314,7 +326,7 @@ bool MachinePipeliner::canPipelineLoop(MachineLoop &L) {
LI.LoopInductionVar = nullptr;
LI.LoopCompare = nullptr;
- if (TII->analyzeLoop(L, LI.LoopInductionVar, LI.LoopCompare)) {
+ if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
LLVM_DEBUG(
dbgs() << "Unable to analyzeLoop, can NOT pipeline current Loop\n");
NumFailLoop++;
@@ -349,7 +361,7 @@ void MachinePipeliner::preprocessPhiNodes(MachineBasicBlock &B) {
// If the operand uses a subregister, replace it with a new register
// without subregisters, and generate a copy to the new register.
- unsigned NewReg = MRI.createVirtualRegister(RC);
+ Register NewReg = MRI.createVirtualRegister(RC);
MachineBasicBlock &PredB = *PI.getOperand(i+1).getMBB();
MachineBasicBlock::iterator At = PredB.getFirstTerminator();
const DebugLoc &DL = PredB.findDebugLoc(At);
@@ -515,14 +527,49 @@ void SwingSchedulerDAG::schedule() {
return;
}
- generatePipelinedLoop(Schedule);
+ // Generate the schedule as a ModuloSchedule.
+ DenseMap<MachineInstr *, int> Cycles, Stages;
+ std::vector<MachineInstr *> OrderedInsts;
+ for (int Cycle = Schedule.getFirstCycle(); Cycle <= Schedule.getFinalCycle();
+ ++Cycle) {
+ for (SUnit *SU : Schedule.getInstructions(Cycle)) {
+ OrderedInsts.push_back(SU->getInstr());
+ Cycles[SU->getInstr()] = Cycle;
+ Stages[SU->getInstr()] = Schedule.stageScheduled(SU);
+ }
+ }
+ DenseMap<MachineInstr *, std::pair<unsigned, int64_t>> NewInstrChanges;
+ for (auto &KV : NewMIs) {
+ Cycles[KV.first] = Cycles[KV.second];
+ Stages[KV.first] = Stages[KV.second];
+ NewInstrChanges[KV.first] = InstrChanges[getSUnit(KV.first)];
+ }
+
+ ModuloSchedule MS(MF, &Loop, std::move(OrderedInsts), std::move(Cycles),
+ std::move(Stages));
+ if (EmitTestAnnotations) {
+ assert(NewInstrChanges.empty() &&
+ "Cannot serialize a schedule with InstrChanges!");
+ ModuloScheduleTestAnnotater MSTI(MF, MS);
+ MSTI.annotate();
+ return;
+ }
+ // The experimental code generator can't work if there are InstChanges.
+ if (ExperimentalCodeGen && NewInstrChanges.empty()) {
+ PeelingModuloScheduleExpander MSE(MF, MS, &LIS);
+ MSE.expand();
+ } else {
+ ModuloScheduleExpander MSE(MF, MS, LIS, std::move(NewInstrChanges));
+ MSE.expand();
+ MSE.cleanup();
+ }
++NumPipelined;
}
/// Clean up after the software pipeliner runs.
void SwingSchedulerDAG::finishBlock() {
- for (MachineInstr *I : NewMIs)
- MF.DeleteMachineInstr(I);
+ for (auto &KV : NewMIs)
+ MF.DeleteMachineInstr(KV.second);
NewMIs.clear();
// Call the superclass.
@@ -546,14 +593,6 @@ static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
}
-/// Return the Phi register value that comes from the incoming block.
-static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
- for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
- if (Phi.getOperand(i + 1).getMBB() != LoopBB)
- return Phi.getOperand(i).getReg();
- return 0;
-}
-
/// Return the Phi register value that comes the loop block.
static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
@@ -658,7 +697,7 @@ void SwingSchedulerDAG::addLoopCarriedDependences(AliasAnalysis *AA) {
TII->getMemOperandWithOffset(MI, BaseOp2, Offset2, TRI)) {
if (BaseOp1->isIdenticalTo(*BaseOp2) &&
(int)Offset1 < (int)Offset2) {
- assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI, AA) &&
+ assert(TII->areMemAccessesTriviallyDisjoint(LdMI, MI) &&
"What happened to the chain edge?");
SDep Dep(Load, SDep::Barrier);
Dep.setLatency(1);
@@ -730,7 +769,7 @@ void SwingSchedulerDAG::updatePhiDependences() {
MOI != MOE; ++MOI) {
if (!MOI->isReg())
continue;
- unsigned Reg = MOI->getReg();
+ Register Reg = MOI->getReg();
if (MOI->isDef()) {
// If the register is used by a Phi, then create an anti dependence.
for (MachineRegisterInfo::use_instr_iterator
@@ -809,7 +848,7 @@ void SwingSchedulerDAG::changeDependences() {
continue;
// Get the MI and SUnit for the instruction that defines the original base.
- unsigned OrigBase = I.getInstr()->getOperand(BasePos).getReg();
+ Register OrigBase = I.getInstr()->getOperand(BasePos).getReg();
MachineInstr *DefMI = MRI.getUniqueVRegDef(OrigBase);
if (!DefMI)
continue;
@@ -958,7 +997,7 @@ struct FuncUnitSorter {
unsigned F1 = 0, F2 = 0;
unsigned MFUs1 = minFuncUnits(IS1, F1);
unsigned MFUs2 = minFuncUnits(IS2, F2);
- if (MFUs1 == 1 && MFUs2 == 1)
+ if (MFUs1 == MFUs2)
return Resources.lookup(F1) < Resources.lookup(F2);
return MFUs1 > MFUs2;
}
@@ -1514,8 +1553,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
continue;
for (const MachineOperand &MO : MI->operands())
if (MO.isReg() && MO.isUse()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (Register::isVirtualRegister(Reg))
Uses.insert(Reg);
else if (MRI.isAllocatable(Reg))
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
@@ -1525,8 +1564,8 @@ static void computeLiveOuts(MachineFunction &MF, RegPressureTracker &RPTracker,
for (SUnit *SU : NS)
for (const MachineOperand &MO : SU->getInstr()->operands())
if (MO.isReg() && MO.isDef() && !MO.isDead()) {
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isVirtualRegister(Reg)) {
if (!Uses.count(Reg))
LiveOutRegs.push_back(RegisterMaskPair(Reg,
LaneBitmask::getNone()));
@@ -2012,836 +2051,6 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
return scheduleFound && Schedule.getMaxStageCount() > 0;
}
-/// Given a schedule for the loop, generate a new version of the loop,
-/// and replace the old version. This function generates a prolog
-/// that contains the initial iterations in the pipeline, and kernel
-/// loop, and the epilogue that contains the code for the final
-/// iterations.
-void SwingSchedulerDAG::generatePipelinedLoop(SMSchedule &Schedule) {
- // Create a new basic block for the kernel and add it to the CFG.
- MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
-
- unsigned MaxStageCount = Schedule.getMaxStageCount();
-
- // Remember the registers that are used in different stages. The index is
- // the iteration, or stage, that the instruction is scheduled in. This is
- // a map between register names in the original block and the names created
- // in each stage of the pipelined loop.
- ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
- InstrMapTy InstrMap;
-
- SmallVector<MachineBasicBlock *, 4> PrologBBs;
-
- MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
- assert(PreheaderBB != nullptr &&
- "Need to add code to handle loops w/o preheader");
- // Generate the prolog instructions that set up the pipeline.
- generateProlog(Schedule, MaxStageCount, KernelBB, VRMap, PrologBBs);
- MF.insert(BB->getIterator(), KernelBB);
-
- // Rearrange the instructions to generate the new, pipelined loop,
- // and update register names as needed.
- for (int Cycle = Schedule.getFirstCycle(),
- LastCycle = Schedule.getFinalCycle();
- Cycle <= LastCycle; ++Cycle) {
- std::deque<SUnit *> &CycleInstrs = Schedule.getInstructions(Cycle);
- // This inner loop schedules each instruction in the cycle.
- for (SUnit *CI : CycleInstrs) {
- if (CI->getInstr()->isPHI())
- continue;
- unsigned StageNum = Schedule.stageScheduled(getSUnit(CI->getInstr()));
- MachineInstr *NewMI = cloneInstr(CI->getInstr(), MaxStageCount, StageNum);
- updateInstruction(NewMI, false, MaxStageCount, StageNum, Schedule, VRMap);
- KernelBB->push_back(NewMI);
- InstrMap[NewMI] = CI->getInstr();
- }
- }
-
- // Copy any terminator instructions to the new kernel, and update
- // names as needed.
- for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
- E = BB->instr_end();
- I != E; ++I) {
- MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
- updateInstruction(NewMI, false, MaxStageCount, 0, Schedule, VRMap);
- KernelBB->push_back(NewMI);
- InstrMap[NewMI] = &*I;
- }
-
- KernelBB->transferSuccessors(BB);
- KernelBB->replaceSuccessor(BB, KernelBB);
-
- generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule,
- VRMap, InstrMap, MaxStageCount, MaxStageCount, false);
- generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, Schedule, VRMap,
- InstrMap, MaxStageCount, MaxStageCount, false);
-
- LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
-
- SmallVector<MachineBasicBlock *, 4> EpilogBBs;
- // Generate the epilog instructions to complete the pipeline.
- generateEpilog(Schedule, MaxStageCount, KernelBB, VRMap, EpilogBBs,
- PrologBBs);
-
- // We need this step because the register allocation doesn't handle some
- // situations well, so we insert copies to help out.
- splitLifetimes(KernelBB, EpilogBBs, Schedule);
-
- // Remove dead instructions due to loop induction variables.
- removeDeadInstructions(KernelBB, EpilogBBs);
-
- // Add branches between prolog and epilog blocks.
- addBranches(*PreheaderBB, PrologBBs, KernelBB, EpilogBBs, Schedule, VRMap);
-
- // Remove the original loop since it's no longer referenced.
- for (auto &I : *BB)
- LIS.RemoveMachineInstrFromMaps(I);
- BB->clear();
- BB->eraseFromParent();
-
- delete[] VRMap;
-}
-
-/// Generate the pipeline prolog code.
-void SwingSchedulerDAG::generateProlog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB,
- ValueMapTy *VRMap,
- MBBVectorTy &PrologBBs) {
- MachineBasicBlock *PreheaderBB = MLI->getLoopFor(BB)->getLoopPreheader();
- assert(PreheaderBB != nullptr &&
- "Need to add code to handle loops w/o preheader");
- MachineBasicBlock *PredBB = PreheaderBB;
- InstrMapTy InstrMap;
-
- // Generate a basic block for each stage, not including the last stage,
- // which will be generated in the kernel. Each basic block may contain
- // instructions from multiple stages/iterations.
- for (unsigned i = 0; i < LastStage; ++i) {
- // Create and insert the prolog basic block prior to the original loop
- // basic block. The original loop is removed later.
- MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
- PrologBBs.push_back(NewBB);
- MF.insert(BB->getIterator(), NewBB);
- NewBB->transferSuccessors(PredBB);
- PredBB->addSuccessor(NewBB);
- PredBB = NewBB;
-
- // Generate instructions for each appropriate stage. Process instructions
- // in original program order.
- for (int StageNum = i; StageNum >= 0; --StageNum) {
- for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
- BBE = BB->getFirstTerminator();
- BBI != BBE; ++BBI) {
- if (Schedule.isScheduledAtStage(getSUnit(&*BBI), (unsigned)StageNum)) {
- if (BBI->isPHI())
- continue;
- MachineInstr *NewMI =
- cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum, Schedule);
- updateInstruction(NewMI, false, i, (unsigned)StageNum, Schedule,
- VRMap);
- NewBB->push_back(NewMI);
- InstrMap[NewMI] = &*BBI;
- }
- }
- }
- rewritePhiValues(NewBB, i, Schedule, VRMap, InstrMap);
- LLVM_DEBUG({
- dbgs() << "prolog:\n";
- NewBB->dump();
- });
- }
-
- PredBB->replaceSuccessor(BB, KernelBB);
-
- // Check if we need to remove the branch from the preheader to the original
- // loop, and replace it with a branch to the new loop.
- unsigned numBranches = TII->removeBranch(*PreheaderBB);
- if (numBranches) {
- SmallVector<MachineOperand, 0> Cond;
- TII->insertBranch(*PreheaderBB, PrologBBs[0], nullptr, Cond, DebugLoc());
- }
-}
-
-/// Generate the pipeline epilog code. The epilog code finishes the iterations
-/// that were started in either the prolog or the kernel. We create a basic
-/// block for each stage that needs to complete.
-void SwingSchedulerDAG::generateEpilog(SMSchedule &Schedule, unsigned LastStage,
- MachineBasicBlock *KernelBB,
- ValueMapTy *VRMap,
- MBBVectorTy &EpilogBBs,
- MBBVectorTy &PrologBBs) {
- // We need to change the branch from the kernel to the first epilog block, so
- // this call to analyze branch uses the kernel rather than the original BB.
- MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
- SmallVector<MachineOperand, 4> Cond;
- bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
- assert(!checkBranch && "generateEpilog must be able to analyze the branch");
- if (checkBranch)
- return;
-
- MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
- if (*LoopExitI == KernelBB)
- ++LoopExitI;
- assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
- MachineBasicBlock *LoopExitBB = *LoopExitI;
-
- MachineBasicBlock *PredBB = KernelBB;
- MachineBasicBlock *EpilogStart = LoopExitBB;
- InstrMapTy InstrMap;
-
- // Generate a basic block for each stage, not including the last stage,
- // which was generated for the kernel. Each basic block may contain
- // instructions from multiple stages/iterations.
- int EpilogStage = LastStage + 1;
- for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
- MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
- EpilogBBs.push_back(NewBB);
- MF.insert(BB->getIterator(), NewBB);
-
- PredBB->replaceSuccessor(LoopExitBB, NewBB);
- NewBB->addSuccessor(LoopExitBB);
-
- if (EpilogStart == LoopExitBB)
- EpilogStart = NewBB;
-
- // Add instructions to the epilog depending on the current block.
- // Process instructions in original program order.
- for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
- for (auto &BBI : *BB) {
- if (BBI.isPHI())
- continue;
- MachineInstr *In = &BBI;
- if (Schedule.isScheduledAtStage(getSUnit(In), StageNum)) {
- // Instructions with memoperands in the epilog are updated with
- // conservative values.
- MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
- updateInstruction(NewMI, i == 1, EpilogStage, 0, Schedule, VRMap);
- NewBB->push_back(NewMI);
- InstrMap[NewMI] = In;
- }
- }
- }
- generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule,
- VRMap, InstrMap, LastStage, EpilogStage, i == 1);
- generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, Schedule, VRMap,
- InstrMap, LastStage, EpilogStage, i == 1);
- PredBB = NewBB;
-
- LLVM_DEBUG({
- dbgs() << "epilog:\n";
- NewBB->dump();
- });
- }
-
- // Fix any Phi nodes in the loop exit block.
- for (MachineInstr &MI : *LoopExitBB) {
- if (!MI.isPHI())
- break;
- for (unsigned i = 2, e = MI.getNumOperands() + 1; i != e; i += 2) {
- MachineOperand &MO = MI.getOperand(i);
- if (MO.getMBB() == BB)
- MO.setMBB(PredBB);
- }
- }
-
- // Create a branch to the new epilog from the kernel.
- // Remove the original branch and add a new branch to the epilog.
- TII->removeBranch(*KernelBB);
- TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
- // Add a branch to the loop exit.
- if (EpilogBBs.size() > 0) {
- MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
- SmallVector<MachineOperand, 4> Cond1;
- TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
- }
-}
-
-/// Replace all uses of FromReg that appear outside the specified
-/// basic block with ToReg.
-static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
- MachineBasicBlock *MBB,
- MachineRegisterInfo &MRI,
- LiveIntervals &LIS) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
- E = MRI.use_end();
- I != E;) {
- MachineOperand &O = *I;
- ++I;
- if (O.getParent()->getParent() != MBB)
- O.setReg(ToReg);
- }
- if (!LIS.hasInterval(ToReg))
- LIS.createEmptyInterval(ToReg);
-}
-
-/// Return true if the register has a use that occurs outside the
-/// specified loop.
-static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
- MachineRegisterInfo &MRI) {
- for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
- E = MRI.use_end();
- I != E; ++I)
- if (I->getParent()->getParent() != BB)
- return true;
- return false;
-}
-
-/// Generate Phis for the specific block in the generated pipelined code.
-/// This function looks at the Phis from the original code to guide the
-/// creation of new Phis.
-void SwingSchedulerDAG::generateExistingPhis(
- MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
- MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
- bool IsLast) {
- // Compute the stage number for the initial value of the Phi, which
- // comes from the prolog. The prolog to use depends on to which kernel/
- // epilog that we're adding the Phi.
- unsigned PrologStage = 0;
- unsigned PrevStage = 0;
- bool InKernel = (LastStageNum == CurStageNum);
- if (InKernel) {
- PrologStage = LastStageNum - 1;
- PrevStage = CurStageNum;
- } else {
- PrologStage = LastStageNum - (CurStageNum - LastStageNum);
- PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
- }
-
- for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
- BBE = BB->getFirstNonPHI();
- BBI != BBE; ++BBI) {
- unsigned Def = BBI->getOperand(0).getReg();
-
- unsigned InitVal = 0;
- unsigned LoopVal = 0;
- getPhiRegs(*BBI, BB, InitVal, LoopVal);
-
- unsigned PhiOp1 = 0;
- // The Phi value from the loop body typically is defined in the loop, but
- // not always. So, we need to check if the value is defined in the loop.
- unsigned PhiOp2 = LoopVal;
- if (VRMap[LastStageNum].count(LoopVal))
- PhiOp2 = VRMap[LastStageNum][LoopVal];
-
- int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
- int LoopValStage =
- Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
- unsigned NumStages = Schedule.getStagesForReg(Def, CurStageNum);
- if (NumStages == 0) {
- // We don't need to generate a Phi anymore, but we need to rename any uses
- // of the Phi value.
- unsigned NewReg = VRMap[PrevStage][LoopVal];
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, 0, &*BBI,
- Def, InitVal, NewReg);
- if (VRMap[CurStageNum].count(LoopVal))
- VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
- }
- // Adjust the number of Phis needed depending on the number of prologs left,
- // and the distance from where the Phi is first scheduled. The number of
- // Phis cannot exceed the number of prolog stages. Each stage can
- // potentially define two values.
- unsigned MaxPhis = PrologStage + 2;
- if (!InKernel && (int)PrologStage <= LoopValStage)
- MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
- unsigned NumPhis = std::min(NumStages, MaxPhis);
-
- unsigned NewReg = 0;
- unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
- // In the epilog, we may need to look back one stage to get the correct
- // Phi name because the epilog and prolog blocks execute the same stage.
- // The correct name is from the previous block only when the Phi has
- // been completely scheduled prior to the epilog, and Phi value is not
- // needed in multiple stages.
- int StageDiff = 0;
- if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
- NumPhis == 1)
- StageDiff = 1;
- // Adjust the computations below when the phi and the loop definition
- // are scheduled in different stages.
- if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
- StageDiff = StageScheduled - LoopValStage;
- for (unsigned np = 0; np < NumPhis; ++np) {
- // If the Phi hasn't been scheduled, then use the initial Phi operand
- // value. Otherwise, use the scheduled version of the instruction. This
- // is a little complicated when a Phi references another Phi.
- if (np > PrologStage || StageScheduled >= (int)LastStageNum)
- PhiOp1 = InitVal;
- // Check if the Phi has already been scheduled in a prolog stage.
- else if (PrologStage >= AccessStage + StageDiff + np &&
- VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
- PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
- // Check if the Phi has already been scheduled, but the loop instruction
- // is either another Phi, or doesn't occur in the loop.
- else if (PrologStage >= AccessStage + StageDiff + np) {
- // If the Phi references another Phi, we need to examine the other
- // Phi to get the correct value.
- PhiOp1 = LoopVal;
- MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
- int Indirects = 1;
- while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
- int PhiStage = Schedule.stageScheduled(getSUnit(InstOp1));
- if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
- PhiOp1 = getInitPhiReg(*InstOp1, BB);
- else
- PhiOp1 = getLoopPhiReg(*InstOp1, BB);
- InstOp1 = MRI.getVRegDef(PhiOp1);
- int PhiOpStage = Schedule.stageScheduled(getSUnit(InstOp1));
- int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
- if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
- VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
- PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
- break;
- }
- ++Indirects;
- }
- } else
- PhiOp1 = InitVal;
- // If this references a generated Phi in the kernel, get the Phi operand
- // from the incoming block.
- if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
- if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
- PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
-
- MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
- bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
- // In the epilog, a map lookup is needed to get the value from the kernel,
- // or previous epilog block. How is does this depends on if the
- // instruction is scheduled in the previous block.
- if (!InKernel) {
- int StageDiffAdj = 0;
- if (LoopValStage != -1 && StageScheduled > LoopValStage)
- StageDiffAdj = StageScheduled - LoopValStage;
- // Use the loop value defined in the kernel, unless the kernel
- // contains the last definition of the Phi.
- if (np == 0 && PrevStage == LastStageNum &&
- (StageScheduled != 0 || LoopValStage != 0) &&
- VRMap[PrevStage - StageDiffAdj].count(LoopVal))
- PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
- // Use the value defined by the Phi. We add one because we switch
- // from looking at the loop value to the Phi definition.
- else if (np > 0 && PrevStage == LastStageNum &&
- VRMap[PrevStage - np + 1].count(Def))
- PhiOp2 = VRMap[PrevStage - np + 1][Def];
- // Use the loop value defined in the kernel.
- else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
- VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
- PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
- // Use the value defined by the Phi, unless we're generating the first
- // epilog and the Phi refers to a Phi in a different stage.
- else if (VRMap[PrevStage - np].count(Def) &&
- (!LoopDefIsPhi || (PrevStage != LastStageNum) || (LoopValStage == StageScheduled)))
- PhiOp2 = VRMap[PrevStage - np][Def];
- }
-
- // Check if we can reuse an existing Phi. This occurs when a Phi
- // references another Phi, and the other Phi is scheduled in an
- // earlier stage. We can try to reuse an existing Phi up until the last
- // stage of the current Phi.
- if (LoopDefIsPhi) {
- if (static_cast<int>(PrologStage - np) >= StageScheduled) {
- int LVNumStages = Schedule.getStagesForPhi(LoopVal);
- int StageDiff = (StageScheduled - LoopValStage);
- LVNumStages -= StageDiff;
- // Make sure the loop value Phi has been processed already.
- if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
- NewReg = PhiOp2;
- unsigned ReuseStage = CurStageNum;
- if (Schedule.isLoopCarried(this, *PhiInst))
- ReuseStage -= LVNumStages;
- // Check if the Phi to reuse has been generated yet. If not, then
- // there is nothing to reuse.
- if (VRMap[ReuseStage - np].count(LoopVal)) {
- NewReg = VRMap[ReuseStage - np][LoopVal];
-
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, Def, NewReg);
- // Update the map with the new Phi name.
- VRMap[CurStageNum - np][Def] = NewReg;
- PhiOp2 = NewReg;
- if (VRMap[LastStageNum - np - 1].count(LoopVal))
- PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
-
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
- continue;
- }
- }
- }
- if (InKernel && StageDiff > 0 &&
- VRMap[CurStageNum - StageDiff - np].count(LoopVal))
- PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
- }
-
- const TargetRegisterClass *RC = MRI.getRegClass(Def);
- NewReg = MRI.createVirtualRegister(RC);
-
- MachineInstrBuilder NewPhi =
- BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
- TII->get(TargetOpcode::PHI), NewReg);
- NewPhi.addReg(PhiOp1).addMBB(BB1);
- NewPhi.addReg(PhiOp2).addMBB(BB2);
- if (np == 0)
- InstrMap[NewPhi] = &*BBI;
-
- // We define the Phis after creating the new pipelined code, so
- // we need to rename the Phi values in scheduled instructions.
-
- unsigned PrevReg = 0;
- if (InKernel && VRMap[PrevStage - np].count(LoopVal))
- PrevReg = VRMap[PrevStage - np][LoopVal];
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
- Def, NewReg, PrevReg);
- // If the Phi has been scheduled, use the new name for rewriting.
- if (VRMap[CurStageNum - np].count(Def)) {
- unsigned R = VRMap[CurStageNum - np][Def];
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np, &*BBI,
- R, NewReg);
- }
-
- // Check if we need to rename any uses that occurs after the loop. The
- // register to replace depends on whether the Phi is scheduled in the
- // epilog.
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
-
- // In the kernel, a dependent Phi uses the value from this Phi.
- if (InKernel)
- PhiOp2 = NewReg;
-
- // Update the map with the new Phi name.
- VRMap[CurStageNum - np][Def] = NewReg;
- }
-
- while (NumPhis++ < NumStages) {
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, NumPhis,
- &*BBI, Def, NewReg, 0);
- }
-
- // Check if we need to rename a Phi that has been eliminated due to
- // scheduling.
- if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
- replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
- }
-}
-
-/// Generate Phis for the specified block in the generated pipelined code.
-/// These are new Phis needed because the definition is scheduled after the
-/// use in the pipelined sequence.
-void SwingSchedulerDAG::generatePhis(
- MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
- MachineBasicBlock *KernelBB, SMSchedule &Schedule, ValueMapTy *VRMap,
- InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
- bool IsLast) {
- // Compute the stage number that contains the initial Phi value, and
- // the Phi from the previous stage.
- unsigned PrologStage = 0;
- unsigned PrevStage = 0;
- unsigned StageDiff = CurStageNum - LastStageNum;
- bool InKernel = (StageDiff == 0);
- if (InKernel) {
- PrologStage = LastStageNum - 1;
- PrevStage = CurStageNum;
- } else {
- PrologStage = LastStageNum - StageDiff;
- PrevStage = LastStageNum + StageDiff - 1;
- }
-
- for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
- BBE = BB->instr_end();
- BBI != BBE; ++BBI) {
- for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = BBI->getOperand(i);
- if (!MO.isReg() || !MO.isDef() ||
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
-
- int StageScheduled = Schedule.stageScheduled(getSUnit(&*BBI));
- assert(StageScheduled != -1 && "Expecting scheduled instruction.");
- unsigned Def = MO.getReg();
- unsigned NumPhis = Schedule.getStagesForReg(Def, CurStageNum);
- // An instruction scheduled in stage 0 and is used after the loop
- // requires a phi in the epilog for the last definition from either
- // the kernel or prolog.
- if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
- hasUseAfterLoop(Def, BB, MRI))
- NumPhis = 1;
- if (!InKernel && (unsigned)StageScheduled > PrologStage)
- continue;
-
- unsigned PhiOp2 = VRMap[PrevStage][Def];
- if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
- if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
- PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
- // The number of Phis can't exceed the number of prolog stages. The
- // prolog stage number is zero based.
- if (NumPhis > PrologStage + 1 - StageScheduled)
- NumPhis = PrologStage + 1 - StageScheduled;
- for (unsigned np = 0; np < NumPhis; ++np) {
- unsigned PhiOp1 = VRMap[PrologStage][Def];
- if (np <= PrologStage)
- PhiOp1 = VRMap[PrologStage - np][Def];
- if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
- if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
- PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
- if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
- PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
- }
- if (!InKernel)
- PhiOp2 = VRMap[PrevStage - np][Def];
-
- const TargetRegisterClass *RC = MRI.getRegClass(Def);
- unsigned NewReg = MRI.createVirtualRegister(RC);
-
- MachineInstrBuilder NewPhi =
- BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
- TII->get(TargetOpcode::PHI), NewReg);
- NewPhi.addReg(PhiOp1).addMBB(BB1);
- NewPhi.addReg(PhiOp2).addMBB(BB2);
- if (np == 0)
- InstrMap[NewPhi] = &*BBI;
-
- // Rewrite uses and update the map. The actions depend upon whether
- // we generating code for the kernel or epilog blocks.
- if (InKernel) {
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, PhiOp1, NewReg);
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, PhiOp2, NewReg);
-
- PhiOp2 = NewReg;
- VRMap[PrevStage - np - 1][Def] = NewReg;
- } else {
- VRMap[CurStageNum - np][Def] = NewReg;
- if (np == NumPhis - 1)
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, CurStageNum, np,
- &*BBI, Def, NewReg);
- }
- if (IsLast && np == NumPhis - 1)
- replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
- }
- }
- }
-}
-
-/// Remove instructions that generate values with no uses.
-/// Typically, these are induction variable operations that generate values
-/// used in the loop itself. A dead instruction has a definition with
-/// no uses, or uses that occur in the original loop only.
-void SwingSchedulerDAG::removeDeadInstructions(MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs) {
- // For each epilog block, check that the value defined by each instruction
- // is used. If not, delete it.
- for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
- MBE = EpilogBBs.rend();
- MBB != MBE; ++MBB)
- for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
- ME = (*MBB)->instr_rend();
- MI != ME;) {
- // From DeadMachineInstructionElem. Don't delete inline assembly.
- if (MI->isInlineAsm()) {
- ++MI;
- continue;
- }
- bool SawStore = false;
- // Check if it's safe to remove the instruction due to side effects.
- // We can, and want to, remove Phis here.
- if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
- ++MI;
- continue;
- }
- bool used = true;
- for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
- MOE = MI->operands_end();
- MOI != MOE; ++MOI) {
- if (!MOI->isReg() || !MOI->isDef())
- continue;
- unsigned reg = MOI->getReg();
- // Assume physical registers are used, unless they are marked dead.
- if (TargetRegisterInfo::isPhysicalRegister(reg)) {
- used = !MOI->isDead();
- if (used)
- break;
- continue;
- }
- unsigned realUses = 0;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
- // Check if there are any uses that occur only in the original
- // loop. If so, that's not a real use.
- if (UI->getParent()->getParent() != BB) {
- realUses++;
- used = true;
- break;
- }
- }
- if (realUses > 0)
- break;
- used = false;
- }
- if (!used) {
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI++->eraseFromParent();
- continue;
- }
- ++MI;
- }
- // In the kernel block, check if we can remove a Phi that generates a value
- // used in an instruction removed in the epilog block.
- for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
- BBE = KernelBB->getFirstNonPHI();
- BBI != BBE;) {
- MachineInstr *MI = &*BBI;
- ++BBI;
- unsigned reg = MI->getOperand(0).getReg();
- if (MRI.use_begin(reg) == MRI.use_end()) {
- LIS.RemoveMachineInstrFromMaps(*MI);
- MI->eraseFromParent();
- }
- }
-}
-
-/// For loop carried definitions, we split the lifetime of a virtual register
-/// that has uses past the definition in the next iteration. A copy with a new
-/// virtual register is inserted before the definition, which helps with
-/// generating a better register assignment.
-///
-/// v1 = phi(a, v2) v1 = phi(a, v2)
-/// v2 = phi(b, v3) v2 = phi(b, v3)
-/// v3 = .. v4 = copy v1
-/// .. = V1 v3 = ..
-/// .. = v4
-void SwingSchedulerDAG::splitLifetimes(MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule) {
- const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
- for (auto &PHI : KernelBB->phis()) {
- unsigned Def = PHI.getOperand(0).getReg();
- // Check for any Phi definition that used as an operand of another Phi
- // in the same block.
- for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
- E = MRI.use_instr_end();
- I != E; ++I) {
- if (I->isPHI() && I->getParent() == KernelBB) {
- // Get the loop carried definition.
- unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
- if (!LCDef)
- continue;
- MachineInstr *MI = MRI.getVRegDef(LCDef);
- if (!MI || MI->getParent() != KernelBB || MI->isPHI())
- continue;
- // Search through the rest of the block looking for uses of the Phi
- // definition. If one occurs, then split the lifetime.
- unsigned SplitReg = 0;
- for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
- KernelBB->instr_end()))
- if (BBJ.readsRegister(Def)) {
- // We split the lifetime when we find the first use.
- if (SplitReg == 0) {
- SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
- BuildMI(*KernelBB, MI, MI->getDebugLoc(),
- TII->get(TargetOpcode::COPY), SplitReg)
- .addReg(Def);
- }
- BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
- }
- if (!SplitReg)
- continue;
- // Search through each of the epilog blocks for any uses to be renamed.
- for (auto &Epilog : EpilogBBs)
- for (auto &I : *Epilog)
- if (I.readsRegister(Def))
- I.substituteRegister(Def, SplitReg, 0, *TRI);
- break;
- }
- }
- }
-}
-
-/// Remove the incoming block from the Phis in a basic block.
-static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
- for (MachineInstr &MI : *BB) {
- if (!MI.isPHI())
- break;
- for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
- if (MI.getOperand(i + 1).getMBB() == Incoming) {
- MI.RemoveOperand(i + 1);
- MI.RemoveOperand(i);
- break;
- }
- }
-}
-
-/// Create branches from each prolog basic block to the appropriate epilog
-/// block. These edges are needed if the loop ends before reaching the
-/// kernel.
-void SwingSchedulerDAG::addBranches(MachineBasicBlock &PreheaderBB,
- MBBVectorTy &PrologBBs,
- MachineBasicBlock *KernelBB,
- MBBVectorTy &EpilogBBs,
- SMSchedule &Schedule, ValueMapTy *VRMap) {
- assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
- MachineInstr *IndVar = Pass.LI.LoopInductionVar;
- MachineInstr *Cmp = Pass.LI.LoopCompare;
- MachineBasicBlock *LastPro = KernelBB;
- MachineBasicBlock *LastEpi = KernelBB;
-
- // Start from the blocks connected to the kernel and work "out"
- // to the first prolog and the last epilog blocks.
- SmallVector<MachineInstr *, 4> PrevInsts;
- unsigned MaxIter = PrologBBs.size() - 1;
- unsigned LC = UINT_MAX;
- unsigned LCMin = UINT_MAX;
- for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
- // Add branches to the prolog that go to the corresponding
- // epilog, and the fall-thru prolog/kernel block.
- MachineBasicBlock *Prolog = PrologBBs[j];
- MachineBasicBlock *Epilog = EpilogBBs[i];
- // We've executed one iteration, so decrement the loop count and check for
- // the loop end.
- SmallVector<MachineOperand, 4> Cond;
- // Check if the LOOP0 has already been removed. If so, then there is no need
- // to reduce the trip count.
- if (LC != 0)
- LC = TII->reduceLoopCount(*Prolog, PreheaderBB, IndVar, *Cmp, Cond,
- PrevInsts, j, MaxIter);
-
- // Record the value of the first trip count, which is used to determine if
- // branches and blocks can be removed for constant trip counts.
- if (LCMin == UINT_MAX)
- LCMin = LC;
-
- unsigned numAdded = 0;
- if (TargetRegisterInfo::isVirtualRegister(LC)) {
- Prolog->addSuccessor(Epilog);
- numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
- } else if (j >= LCMin) {
- Prolog->addSuccessor(Epilog);
- Prolog->removeSuccessor(LastPro);
- LastEpi->removeSuccessor(Epilog);
- numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
- removePhis(Epilog, LastEpi);
- // Remove the blocks that are no longer referenced.
- if (LastPro != LastEpi) {
- LastEpi->clear();
- LastEpi->eraseFromParent();
- }
- LastPro->clear();
- LastPro->eraseFromParent();
- } else {
- numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
- removePhis(Epilog, Prolog);
- }
- LastPro = Prolog;
- LastEpi = Epilog;
- for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
- E = Prolog->instr_rend();
- I != E && numAdded > 0; ++I, --numAdded)
- updateInstruction(&*I, false, j, 0, Schedule, VRMap);
- }
-}
-
/// Return true if we can compute the amount the instruction changes
/// during each iteration. Set Delta to the amount of the change.
bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
@@ -2854,7 +2063,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
if (!BaseOp->isReg())
return false;
- unsigned BaseReg = BaseOp->getReg();
+ Register BaseReg = BaseOp->getReg();
MachineRegisterInfo &MRI = MF.getRegInfo();
// Check if there is a Phi. If so, get the definition in the loop.
@@ -2874,261 +2083,6 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) {
return true;
}
-/// Update the memory operand with a new offset when the pipeliner
-/// generates a new copy of the instruction that refers to a
-/// different memory location.
-void SwingSchedulerDAG::updateMemOperands(MachineInstr &NewMI,
- MachineInstr &OldMI, unsigned Num) {
- if (Num == 0)
- return;
- // If the instruction has memory operands, then adjust the offset
- // when the instruction appears in different stages.
- if (NewMI.memoperands_empty())
- return;
- SmallVector<MachineMemOperand *, 2> NewMMOs;
- for (MachineMemOperand *MMO : NewMI.memoperands()) {
- // TODO: Figure out whether isAtomic is really necessary (see D57601).
- if (MMO->isVolatile() || MMO->isAtomic() ||
- (MMO->isInvariant() && MMO->isDereferenceable()) ||
- (!MMO->getValue())) {
- NewMMOs.push_back(MMO);
- continue;
- }
- unsigned Delta;
- if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
- int64_t AdjOffset = Delta * Num;
- NewMMOs.push_back(
- MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
- } else {
- NewMMOs.push_back(
- MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
- }
- }
- NewMI.setMemRefs(MF, NewMMOs);
-}
-
-/// Clone the instruction for the new pipelined loop and update the
-/// memory operands, if needed.
-MachineInstr *SwingSchedulerDAG::cloneInstr(MachineInstr *OldMI,
- unsigned CurStageNum,
- unsigned InstStageNum) {
- MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
- // Check for tied operands in inline asm instructions. This should be handled
- // elsewhere, but I'm not sure of the best solution.
- if (OldMI->isInlineAsm())
- for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
- const auto &MO = OldMI->getOperand(i);
- if (MO.isReg() && MO.isUse())
- break;
- unsigned UseIdx;
- if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
- NewMI->tieOperands(i, UseIdx);
- }
- updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
- return NewMI;
-}
-
-/// Clone the instruction for the new pipelined loop. If needed, this
-/// function updates the instruction using the values saved in the
-/// InstrChanges structure.
-MachineInstr *SwingSchedulerDAG::cloneAndChangeInstr(MachineInstr *OldMI,
- unsigned CurStageNum,
- unsigned InstStageNum,
- SMSchedule &Schedule) {
- MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
- DenseMap<SUnit *, std::pair<unsigned, int64_t>>::iterator It =
- InstrChanges.find(getSUnit(OldMI));
- if (It != InstrChanges.end()) {
- std::pair<unsigned, int64_t> RegAndOffset = It->second;
- unsigned BasePos, OffsetPos;
- if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
- return nullptr;
- int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
- MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
- if (Schedule.stageScheduled(getSUnit(LoopDef)) > (signed)InstStageNum)
- NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
- NewMI->getOperand(OffsetPos).setImm(NewOffset);
- }
- updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
- return NewMI;
-}
-
-/// Update the machine instruction with new virtual registers. This
-/// function may change the defintions and/or uses.
-void SwingSchedulerDAG::updateInstruction(MachineInstr *NewMI, bool LastDef,
- unsigned CurStageNum,
- unsigned InstrStageNum,
- SMSchedule &Schedule,
- ValueMapTy *VRMap) {
- for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
- MachineOperand &MO = NewMI->getOperand(i);
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
- continue;
- unsigned reg = MO.getReg();
- if (MO.isDef()) {
- // Create a new virtual register for the definition.
- const TargetRegisterClass *RC = MRI.getRegClass(reg);
- unsigned NewReg = MRI.createVirtualRegister(RC);
- MO.setReg(NewReg);
- VRMap[CurStageNum][reg] = NewReg;
- if (LastDef)
- replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
- } else if (MO.isUse()) {
- MachineInstr *Def = MRI.getVRegDef(reg);
- // Compute the stage that contains the last definition for instruction.
- int DefStageNum = Schedule.stageScheduled(getSUnit(Def));
- unsigned StageNum = CurStageNum;
- if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
- // Compute the difference in stages between the defintion and the use.
- unsigned StageDiff = (InstrStageNum - DefStageNum);
- // Make an adjustment to get the last definition.
- StageNum -= StageDiff;
- }
- if (VRMap[StageNum].count(reg))
- MO.setReg(VRMap[StageNum][reg]);
- }
- }
-}
-
-/// Return the instruction in the loop that defines the register.
-/// If the definition is a Phi, then follow the Phi operand to
-/// the instruction in the loop.
-MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
- SmallPtrSet<MachineInstr *, 8> Visited;
- MachineInstr *Def = MRI.getVRegDef(Reg);
- while (Def->isPHI()) {
- if (!Visited.insert(Def).second)
- break;
- for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
- if (Def->getOperand(i + 1).getMBB() == BB) {
- Def = MRI.getVRegDef(Def->getOperand(i).getReg());
- break;
- }
- }
- return Def;
-}
-
-/// Return the new name for the value from the previous stage.
-unsigned SwingSchedulerDAG::getPrevMapVal(unsigned StageNum, unsigned PhiStage,
- unsigned LoopVal, unsigned LoopStage,
- ValueMapTy *VRMap,
- MachineBasicBlock *BB) {
- unsigned PrevVal = 0;
- if (StageNum > PhiStage) {
- MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
- if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
- // The name is defined in the previous stage.
- PrevVal = VRMap[StageNum - 1][LoopVal];
- else if (VRMap[StageNum].count(LoopVal))
- // The previous name is defined in the current stage when the instruction
- // order is swapped.
- PrevVal = VRMap[StageNum][LoopVal];
- else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
- // The loop value hasn't yet been scheduled.
- PrevVal = LoopVal;
- else if (StageNum == PhiStage + 1)
- // The loop value is another phi, which has not been scheduled.
- PrevVal = getInitPhiReg(*LoopInst, BB);
- else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
- // The loop value is another phi, which has been scheduled.
- PrevVal =
- getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
- LoopStage, VRMap, BB);
- }
- return PrevVal;
-}
-
-/// Rewrite the Phi values in the specified block to use the mappings
-/// from the initial operand. Once the Phi is scheduled, we switch
-/// to using the loop value instead of the Phi value, so those names
-/// do not need to be rewritten.
-void SwingSchedulerDAG::rewritePhiValues(MachineBasicBlock *NewBB,
- unsigned StageNum,
- SMSchedule &Schedule,
- ValueMapTy *VRMap,
- InstrMapTy &InstrMap) {
- for (auto &PHI : BB->phis()) {
- unsigned InitVal = 0;
- unsigned LoopVal = 0;
- getPhiRegs(PHI, BB, InitVal, LoopVal);
- unsigned PhiDef = PHI.getOperand(0).getReg();
-
- unsigned PhiStage =
- (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(PhiDef)));
- unsigned LoopStage =
- (unsigned)Schedule.stageScheduled(getSUnit(MRI.getVRegDef(LoopVal)));
- unsigned NumPhis = Schedule.getStagesForPhi(PhiDef);
- if (NumPhis > StageNum)
- NumPhis = StageNum;
- for (unsigned np = 0; np <= NumPhis; ++np) {
- unsigned NewVal =
- getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
- if (!NewVal)
- NewVal = InitVal;
- rewriteScheduledInstr(NewBB, Schedule, InstrMap, StageNum - np, np, &PHI,
- PhiDef, NewVal);
- }
- }
-}
-
-/// Rewrite a previously scheduled instruction to use the register value
-/// from the new instruction. Make sure the instruction occurs in the
-/// basic block, and we don't change the uses in the new instruction.
-void SwingSchedulerDAG::rewriteScheduledInstr(
- MachineBasicBlock *BB, SMSchedule &Schedule, InstrMapTy &InstrMap,
- unsigned CurStageNum, unsigned PhiNum, MachineInstr *Phi, unsigned OldReg,
- unsigned NewReg, unsigned PrevReg) {
- bool InProlog = (CurStageNum < Schedule.getMaxStageCount());
- int StagePhi = Schedule.stageScheduled(getSUnit(Phi)) + PhiNum;
- // Rewrite uses that have been scheduled already to use the new
- // Phi register.
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
- EI = MRI.use_end();
- UI != EI;) {
- MachineOperand &UseOp = *UI;
- MachineInstr *UseMI = UseOp.getParent();
- ++UI;
- if (UseMI->getParent() != BB)
- continue;
- if (UseMI->isPHI()) {
- if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
- continue;
- if (getLoopPhiReg(*UseMI, BB) != OldReg)
- continue;
- }
- InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
- assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
- SUnit *OrigMISU = getSUnit(OrigInstr->second);
- int StageSched = Schedule.stageScheduled(OrigMISU);
- int CycleSched = Schedule.cycleScheduled(OrigMISU);
- unsigned ReplaceReg = 0;
- // This is the stage for the scheduled instruction.
- if (StagePhi == StageSched && Phi->isPHI()) {
- int CyclePhi = Schedule.cycleScheduled(getSUnit(Phi));
- if (PrevReg && InProlog)
- ReplaceReg = PrevReg;
- else if (PrevReg && !Schedule.isLoopCarried(this, *Phi) &&
- (CyclePhi <= CycleSched || OrigMISU->getInstr()->isPHI()))
- ReplaceReg = PrevReg;
- else
- ReplaceReg = NewReg;
- }
- // The scheduled instruction occurs before the scheduled Phi, and the
- // Phi is not loop carried.
- if (!InProlog && StagePhi + 1 == StageSched &&
- !Schedule.isLoopCarried(this, *Phi))
- ReplaceReg = NewReg;
- if (StagePhi > StageSched && Phi->isPHI())
- ReplaceReg = NewReg;
- if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
- ReplaceReg = NewReg;
- if (ReplaceReg) {
- MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
- UseOp.setReg(ReplaceReg);
- }
- }
-}
-
/// Check if we can change the instruction to use an offset value from the
/// previous iteration. If so, return true and set the base and offset values
/// so that we can rewrite the load, if necessary.
@@ -3147,7 +2101,7 @@ bool SwingSchedulerDAG::canUseLastOffsetValue(MachineInstr *MI,
unsigned BasePosLd, OffsetPosLd;
if (!TII->getBaseAndOffsetPosition(*MI, BasePosLd, OffsetPosLd))
return false;
- unsigned BaseReg = MI->getOperand(BasePosLd).getReg();
+ Register BaseReg = MI->getOperand(BasePosLd).getReg();
// Look for the Phi instruction.
MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
@@ -3202,7 +2156,7 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
unsigned BasePos, OffsetPos;
if (!TII->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
return;
- unsigned BaseReg = MI->getOperand(BasePos).getReg();
+ Register BaseReg = MI->getOperand(BasePos).getReg();
MachineInstr *LoopDef = findDefInLoop(BaseReg);
int DefStageNum = Schedule.stageScheduled(getSUnit(LoopDef));
int DefCycleNum = Schedule.cycleScheduled(getSUnit(LoopDef));
@@ -3221,11 +2175,29 @@ void SwingSchedulerDAG::applyInstrChange(MachineInstr *MI,
NewMI->getOperand(OffsetPos).setImm(NewOffset);
SU->setInstr(NewMI);
MISUnitMap[NewMI] = SU;
- NewMIs.insert(NewMI);
+ NewMIs[MI] = NewMI;
}
}
}
+/// Return the instruction in the loop that defines the register.
+/// If the definition is a Phi, then follow the Phi operand to
+/// the instruction in the loop.
+MachineInstr *SwingSchedulerDAG::findDefInLoop(unsigned Reg) {
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ while (Def->isPHI()) {
+ if (!Visited.insert(Def).second)
+ break;
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
+ if (Def->getOperand(i + 1).getMBB() == BB) {
+ Def = MRI.getVRegDef(Def->getOperand(i).getReg());
+ break;
+ }
+ }
+ return Def;
+}
+
/// Return true for an order or output dependence that is loop carried
/// potentially. A dependence is loop carried if the destination defines a valu
/// that may be used or defined by the source in a subsequent iteration.
@@ -3499,10 +2471,10 @@ void SMSchedule::orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
++I, ++Pos) {
for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
unsigned BasePos, OffsetPos;
if (ST.getInstrInfo()->getBaseAndOffsetPosition(*MI, BasePos, OffsetPos))
if (MI->getOperand(BasePos).getReg() == Reg)
@@ -3676,7 +2648,7 @@ bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
assert(StageDef != -1 && "Instruction should have been scheduled.");
for (auto &SI : SU.Succs)
if (SI.isAssignedRegDep())
- if (ST.getRegisterInfo()->isPhysicalRegister(SI.getReg()))
+ if (Register::isPhysicalRegister(SI.getReg()))
if (stageScheduled(SI.getSUnit()) != StageDef)
return false;
}
@@ -3810,7 +2782,7 @@ void SwingSchedulerDAG::fixupRegisterOverlaps(std::deque<SUnit *> &Instrs) {
NewMI->getOperand(OffsetPos).setImm(NewOffset);
SU->setInstr(NewMI);
MISUnitMap[NewMI] = SU;
- NewMIs.insert(NewMI);
+ NewMIs[MI] = NewMI;
}
}
OverlapReg = 0;
@@ -3847,40 +2819,6 @@ void SMSchedule::finalizeSchedule(SwingSchedulerDAG *SSD) {
ScheduledInstrs[cycle].push_front(*I);
}
}
- // Iterate over the definitions in each instruction, and compute the
- // stage difference for each use. Keep the maximum value.
- for (auto &I : InstrToCycle) {
- int DefStage = stageScheduled(I.first);
- MachineInstr *MI = I.first->getInstr();
- for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
- MachineOperand &Op = MI->getOperand(i);
- if (!Op.isReg() || !Op.isDef())
- continue;
-
- unsigned Reg = Op.getReg();
- unsigned MaxDiff = 0;
- bool PhiIsSwapped = false;
- for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
- EI = MRI.use_end();
- UI != EI; ++UI) {
- MachineOperand &UseOp = *UI;
- MachineInstr *UseMI = UseOp.getParent();
- SUnit *SUnitUse = SSD->getSUnit(UseMI);
- int UseStage = stageScheduled(SUnitUse);
- unsigned Diff = 0;
- if (UseStage != -1 && UseStage >= DefStage)
- Diff = UseStage - DefStage;
- if (MI->isPHI()) {
- if (isLoopCarried(SSD, *MI))
- ++Diff;
- else
- PhiIsSwapped = true;
- }
- MaxDiff = std::max(Diff, MaxDiff);
- }
- RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
- }
- }
// Erase all the elements in the later stages. Only one iteration should
// remain in the scheduled list, and it contains all the instructions.
@@ -4085,4 +3023,3 @@ void ResourceManager::clearResources() {
return DFAResources->clearResources();
std::fill(ProcResourceCount.begin(), ProcResourceCount.end(), 0);
}
-
diff --git a/lib/CodeGen/MachinePostDominators.cpp b/lib/CodeGen/MachinePostDominators.cpp
index 7f220ed1fd8f..f4daff667e86 100644
--- a/lib/CodeGen/MachinePostDominators.cpp
+++ b/lib/CodeGen/MachinePostDominators.cpp
@@ -17,7 +17,9 @@ using namespace llvm;
namespace llvm {
template class DominatorTreeBase<MachineBasicBlock, true>; // PostDomTreeBase
-}
+
+extern bool VerifyMachineDomInfo;
+} // namespace llvm
char MachinePostDominatorTree::ID = 0;
@@ -25,33 +27,52 @@ char MachinePostDominatorTree::ID = 0;
INITIALIZE_PASS(MachinePostDominatorTree, "machinepostdomtree",
"MachinePostDominator Tree Construction", true, true)
-MachinePostDominatorTree::MachinePostDominatorTree() : MachineFunctionPass(ID) {
+MachinePostDominatorTree::MachinePostDominatorTree()
+ : MachineFunctionPass(ID), PDT(nullptr) {
initializeMachinePostDominatorTreePass(*PassRegistry::getPassRegistry());
- DT = new PostDomTreeBase<MachineBasicBlock>();
}
-FunctionPass *
-MachinePostDominatorTree::createMachinePostDominatorTreePass() {
+FunctionPass *MachinePostDominatorTree::createMachinePostDominatorTreePass() {
return new MachinePostDominatorTree();
}
-bool
-MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
- DT->recalculate(F);
+bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
+ PDT = std::make_unique<PostDomTreeT>();
+ PDT->recalculate(F);
return false;
}
-MachinePostDominatorTree::~MachinePostDominatorTree() {
- delete DT;
-}
-
-void
-MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
+void MachinePostDominatorTree::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(AU);
}
-void
-MachinePostDominatorTree::print(llvm::raw_ostream &OS, const Module *M) const {
- DT->print(OS);
+MachineBasicBlock *MachinePostDominatorTree::findNearestCommonDominator(
+ ArrayRef<MachineBasicBlock *> Blocks) const {
+ assert(!Blocks.empty());
+
+ MachineBasicBlock *NCD = Blocks.front();
+ for (MachineBasicBlock *BB : Blocks.drop_front()) {
+ NCD = PDT->findNearestCommonDominator(NCD, BB);
+
+ // Stop when the root is reached.
+ if (PDT->isVirtualRoot(PDT->getNode(NCD)))
+ return nullptr;
+ }
+
+ return NCD;
+}
+
+void MachinePostDominatorTree::verifyAnalysis() const {
+ if (PDT && VerifyMachineDomInfo)
+ if (!PDT->verify(PostDomTreeT::VerificationLevel::Basic)) {
+ errs() << "MachinePostDominatorTree verification failed\n";
+
+ abort();
+ }
+}
+
+void MachinePostDominatorTree::print(llvm::raw_ostream &OS,
+ const Module *M) const {
+ PDT->print(OS);
}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index f0fd0405d69d..b88d4ea462ef 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -144,7 +144,7 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg) {
}
unsigned MachineRegisterInfo::createIncompleteVirtualRegister(StringRef Name) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+ unsigned Reg = Register::index2VirtReg(getNumVirtRegs());
VRegInfo.grow(Reg);
RegAllocHints.grow(Reg);
insertVRegByName(Name, Reg);
@@ -202,7 +202,7 @@ void MachineRegisterInfo::clearVirtRegTypes() { VRegToType.clear(); }
void MachineRegisterInfo::clearVirtRegs() {
#ifndef NDEBUG
for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (!VRegInfo[Reg].second)
continue;
verifyUseList(Reg);
@@ -255,7 +255,7 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const {
void MachineRegisterInfo::verifyUseLists() const {
#ifndef NDEBUG
for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
- verifyUseList(TargetRegisterInfo::index2VirtReg(i));
+ verifyUseList(Register::index2VirtReg(i));
for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i)
verifyUseList(i);
#endif
@@ -386,7 +386,7 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
for (reg_iterator I = reg_begin(FromReg), E = reg_end(); I != E; ) {
MachineOperand &O = *I;
++I;
- if (TargetRegisterInfo::isPhysicalRegister(ToReg)) {
+ if (Register::isPhysicalRegister(ToReg)) {
O.substPhysReg(ToReg, *TRI);
} else {
O.setReg(ToReg);
@@ -498,7 +498,7 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
LaneBitmask MachineRegisterInfo::getMaxLaneMaskForVReg(unsigned Reg) const {
// Lane masks are only defined for vregs.
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
const TargetRegisterClass &TRC = *getRegClass(Reg);
return TRC.getLaneMask();
}
@@ -517,7 +517,7 @@ void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) {
}
bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg) const {
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg));
+ assert(Register::isPhysicalRegister(PhysReg));
const TargetRegisterInfo *TRI = getTargetRegisterInfo();
if (TRI->isConstantPhysReg(PhysReg))
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
index e8b42047b49f..258a5f9e0482 100644
--- a/lib/CodeGen/MachineSSAUpdater.cpp
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -95,7 +95,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
while (I != BB->end() && I->isPHI()) {
bool Same = true;
for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
- unsigned SrcReg = I->getOperand(i).getReg();
+ Register SrcReg = I->getOperand(i).getReg();
MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
if (AVals[SrcBB] != SrcReg) {
Same = false;
@@ -118,7 +118,7 @@ MachineInstrBuilder InsertNewDef(unsigned Opcode,
const TargetRegisterClass *RC,
MachineRegisterInfo *MRI,
const TargetInstrInfo *TII) {
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
return BuildMI(*BB, I, DebugLoc(), TII->get(Opcode), NewVR);
}
@@ -292,7 +292,7 @@ public:
MachineSSAUpdater *Updater) {
// Insert an implicit_def to represent an undef value.
MachineInstr *NewDef = InsertNewDef(TargetOpcode::IMPLICIT_DEF,
- BB, BB->getFirstTerminator(),
+ BB, BB->getFirstNonPHI(),
Updater->VRC, Updater->MRI,
Updater->TII);
return NewDef->getOperand(0).getReg();
diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp
index ae1170ad1be6..f0721ea3b76d 100644
--- a/lib/CodeGen/MachineScheduler.cpp
+++ b/lib/CodeGen/MachineScheduler.cpp
@@ -82,6 +82,10 @@ cl::opt<bool>
DumpCriticalPathLength("misched-dcpl", cl::Hidden,
cl::desc("Print critical path length to stdout"));
+cl::opt<bool> VerifyScheduling(
+ "verify-misched", cl::Hidden,
+ cl::desc("Verify machine instrs before and after machine scheduling"));
+
} // end namespace llvm
#ifndef NDEBUG
@@ -122,9 +126,6 @@ static cl::opt<bool> EnableMemOpCluster("misched-cluster", cl::Hidden,
cl::desc("Enable memop clustering."),
cl::init(true));
-static cl::opt<bool> VerifyScheduling("verify-misched", cl::Hidden,
- cl::desc("Verify machine instrs before and after machine scheduling"));
-
// DAG subtrees must have at least this many nodes.
static const unsigned MinSubtreeSize = 8;
@@ -198,6 +199,7 @@ char &llvm::MachineSchedulerID = MachineScheduler::ID;
INITIALIZE_PASS_BEGIN(MachineScheduler, DEBUG_TYPE,
"Machine Instruction Scheduler", false, false)
INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
@@ -210,7 +212,7 @@ MachineScheduler::MachineScheduler() : MachineSchedulerBase(ID) {
void MachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<TargetPassConfig>();
@@ -234,7 +236,7 @@ PostMachineScheduler::PostMachineScheduler() : MachineSchedulerBase(ID) {
void PostMachineScheduler::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
- AU.addRequiredID(MachineDominatorsID);
+ AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<TargetPassConfig>();
MachineFunctionPass::getAnalysisUsage(AU);
@@ -933,8 +935,8 @@ void ScheduleDAGMILive::collectVRegUses(SUnit &SU) {
if (TrackLaneMasks && !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
// Ignore re-defs.
@@ -985,7 +987,7 @@ void ScheduleDAGMILive::enterRegion(MachineBasicBlock *bb,
"ShouldTrackLaneMasks requires ShouldTrackPressure");
}
-// Setup the register pressure trackers for the top scheduled top and bottom
+// Setup the register pressure trackers for the top scheduled and bottom
// scheduled regions.
void ScheduleDAGMILive::initRegPressure() {
VRegUses.clear();
@@ -1095,7 +1097,7 @@ void ScheduleDAGMILive::updatePressureDiffs(
for (const RegisterMaskPair &P : LiveUses) {
unsigned Reg = P.RegUnit;
/// FIXME: Currently assuming single-use physregs.
- if (!TRI->isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
continue;
if (ShouldTrackLaneMasks) {
@@ -1319,8 +1321,8 @@ unsigned ScheduleDAGMILive::computeCyclicCriticalPath() {
// Visit each live out vreg def to find def/use pairs that cross iterations.
for (const RegisterMaskPair &P : RPTracker.getPressure().LiveOutRegs) {
unsigned Reg = P.RegUnit;
- if (!TRI->isVirtualRegister(Reg))
- continue;
+ if (!Register::isVirtualRegister(Reg))
+ continue;
const LiveInterval &LI = LIS->getInterval(Reg);
const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB));
if (!DefVNI)
@@ -1538,14 +1540,14 @@ namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createLoadClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? llvm::make_unique<LoadClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? std::make_unique<LoadClusterMutation>(TII, TRI)
: nullptr;
}
std::unique_ptr<ScheduleDAGMutation>
createStoreClusterDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return EnableMemOpCluster ? llvm::make_unique<StoreClusterMutation>(TII, TRI)
+ return EnableMemOpCluster ? std::make_unique<StoreClusterMutation>(TII, TRI)
: nullptr;
}
@@ -1657,7 +1659,7 @@ namespace llvm {
std::unique_ptr<ScheduleDAGMutation>
createCopyConstrainDAGMutation(const TargetInstrInfo *TII,
const TargetRegisterInfo *TRI) {
- return llvm::make_unique<CopyConstrain>(TII, TRI);
+ return std::make_unique<CopyConstrain>(TII, TRI);
}
} // end namespace llvm
@@ -1687,13 +1689,13 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMILive *DAG) {
// Check for pure vreg copies.
const MachineOperand &SrcOp = Copy->getOperand(1);
- unsigned SrcReg = SrcOp.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
+ Register SrcReg = SrcOp.getReg();
+ if (!Register::isVirtualRegister(SrcReg) || !SrcOp.readsReg())
return;
const MachineOperand &DstOp = Copy->getOperand(0);
- unsigned DstReg = DstOp.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg) || DstOp.isDead())
+ Register DstReg = DstOp.getReg();
+ if (!Register::isVirtualRegister(DstReg) || DstOp.isDead())
return;
// Check if either the dest or source is local. If it's live across a back
@@ -2914,14 +2916,12 @@ int biasPhysReg(const SUnit *SU, bool isTop) {
unsigned UnscheduledOper = isTop ? 0 : 1;
// If we have already scheduled the physreg produce/consumer, immediately
// schedule the copy.
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(ScheduledOper).getReg()))
+ if (Register::isPhysicalRegister(MI->getOperand(ScheduledOper).getReg()))
return 1;
// If the physreg is at the boundary, defer it. Otherwise schedule it
// immediately to free the dependent. We can hoist the copy later.
bool AtBoundary = isTop ? !SU->NumSuccsLeft : !SU->NumPredsLeft;
- if (TargetRegisterInfo::isPhysicalRegister(
- MI->getOperand(UnscheduledOper).getReg()))
+ if (Register::isPhysicalRegister(MI->getOperand(UnscheduledOper).getReg()))
return AtBoundary ? -1 : 1;
}
@@ -2931,7 +2931,7 @@ int biasPhysReg(const SUnit *SU, bool isTop) {
// physical registers.
bool DoBias = true;
for (const MachineOperand &Op : MI->defs()) {
- if (Op.isReg() && !TargetRegisterInfo::isPhysicalRegister(Op.getReg())) {
+ if (Op.isReg() && !Register::isPhysicalRegister(Op.getReg())) {
DoBias = false;
break;
}
@@ -3259,7 +3259,8 @@ void GenericScheduler::reschedulePhysReg(SUnit *SU, bool isTop) {
// Find already scheduled copies with a single physreg dependence and move
// them just above the scheduled instruction.
for (SDep &Dep : Deps) {
- if (Dep.getKind() != SDep::Data || !TRI->isPhysicalRegister(Dep.getReg()))
+ if (Dep.getKind() != SDep::Data ||
+ !Register::isPhysicalRegister(Dep.getReg()))
continue;
SUnit *DepSU = Dep.getSUnit();
if (isTop ? DepSU->Succs.size() > 1 : DepSU->Preds.size() > 1)
@@ -3298,7 +3299,7 @@ void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
/// default scheduler if the target does not set a default.
ScheduleDAGMILive *llvm::createGenericSchedLive(MachineSchedContext *C) {
ScheduleDAGMILive *DAG =
- new ScheduleDAGMILive(C, llvm::make_unique<GenericScheduler>(C));
+ new ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C));
// Register DAG post-processors.
//
// FIXME: extend the mutation API to allow earlier mutations to instantiate
@@ -3450,7 +3451,7 @@ void PostGenericScheduler::schedNode(SUnit *SU, bool IsTopNode) {
}
ScheduleDAGMI *llvm::createGenericSchedPostRA(MachineSchedContext *C) {
- return new ScheduleDAGMI(C, llvm::make_unique<PostGenericScheduler>(C),
+ return new ScheduleDAGMI(C, std::make_unique<PostGenericScheduler>(C),
/*RemoveKillFlags=*/true);
}
@@ -3561,10 +3562,10 @@ public:
} // end anonymous namespace
static ScheduleDAGInstrs *createILPMaxScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(true));
+ return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(true));
}
static ScheduleDAGInstrs *createILPMinScheduler(MachineSchedContext *C) {
- return new ScheduleDAGMILive(C, llvm::make_unique<ILPScheduler>(false));
+ return new ScheduleDAGMILive(C, std::make_unique<ILPScheduler>(false));
}
static MachineSchedRegistry ILPMaxRegistry(
@@ -3658,7 +3659,7 @@ static ScheduleDAGInstrs *createInstructionShuffler(MachineSchedContext *C) {
assert((TopDown || !ForceTopDown) &&
"-misched-topdown incompatible with -misched-bottomup");
return new ScheduleDAGMILive(
- C, llvm::make_unique<InstructionShuffler>(Alternate, TopDown));
+ C, std::make_unique<InstructionShuffler>(Alternate, TopDown));
}
static MachineSchedRegistry ShufflerRegistry(
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index 41db2c88ce50..27a2e7023f22 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -36,8 +36,9 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/Pass.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CommandLine.h"
@@ -114,15 +115,12 @@ namespace {
bool runOnMachineFunction(MachineFunction &MF) override;
void getAnalysisUsage(AnalysisUsage &AU) const override {
- AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<AAResultsWrapperPass>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachinePostDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
- AU.addPreserved<MachineDominatorTree>();
- AU.addPreserved<MachinePostDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
if (UseBlockFreqInfo)
AU.addRequired<MachineBlockFrequencyInfo>();
@@ -195,11 +193,10 @@ bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr &MI,
if (!MI.isCopy())
return false;
- unsigned SrcReg = MI.getOperand(1).getReg();
- unsigned DstReg = MI.getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
- !TargetRegisterInfo::isVirtualRegister(DstReg) ||
- !MRI->hasOneNonDBGUse(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!Register::isVirtualRegister(SrcReg) ||
+ !Register::isVirtualRegister(DstReg) || !MRI->hasOneNonDBGUse(SrcReg))
return false;
const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
@@ -233,8 +230,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
MachineBasicBlock *DefMBB,
bool &BreakPHIEdge,
bool &LocalUse) const {
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
- "Only makes sense for vregs");
+ assert(Register::isVirtualRegister(Reg) && "Only makes sense for vregs");
// Ignore debug uses because debug info doesn't affect the code.
if (MRI->use_nodbg_empty(Reg))
@@ -416,13 +412,13 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr &MI,
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0)
continue;
// We don't move live definitions of physical registers,
// so sinking their uses won't enable any opportunities.
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
// If this instruction is the only user of a virtual register,
@@ -615,10 +611,10 @@ MachineSinking::FindSuccToSinkTo(MachineInstr &MI, MachineBasicBlock *MBB,
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0) continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
@@ -817,8 +813,9 @@ bool MachineSinking::SinkInstruction(MachineInstr &MI, bool &SawStore,
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (Reg == 0 || !Register::isPhysicalRegister(Reg))
+ continue;
if (SuccToSinkTo->isLiveIn(Reg))
return false;
}
@@ -958,8 +955,9 @@ private:
/// Track which register units have been modified and used.
LiveRegUnits ModifiedRegUnits, UsedRegUnits;
- /// Track DBG_VALUEs of (unmodified) register units.
- DenseMap<unsigned, TinyPtrVector<MachineInstr*>> SeenDbgInstrs;
+ /// Track DBG_VALUEs of (unmodified) register units. Each DBG_VALUE has an
+ /// entry in this map for each unit it touches.
+ DenseMap<unsigned, TinyPtrVector<MachineInstr *>> SeenDbgInstrs;
/// Sink Copy instructions unused in the same block close to their uses in
/// successors.
@@ -1030,7 +1028,7 @@ static void clearKillFlags(MachineInstr *MI, MachineBasicBlock &CurBB,
const TargetRegisterInfo *TRI) {
for (auto U : UsedOpsInCopy) {
MachineOperand &MO = MI->getOperand(U);
- unsigned SrcReg = MO.getReg();
+ Register SrcReg = MO.getReg();
if (!UsedRegUnits.available(SrcReg)) {
MachineBasicBlock::iterator NI = std::next(MI->getIterator());
for (MachineInstr &UI : make_range(NI, CurBB.end())) {
@@ -1053,7 +1051,7 @@ static void updateLiveIn(MachineInstr *MI, MachineBasicBlock *SuccBB,
for (MCSubRegIterator S(DefReg, TRI, true); S.isValid(); ++S)
SuccBB->removeLiveIn(*S);
for (auto U : UsedOpsInCopy) {
- unsigned Reg = MI->getOperand(U).getReg();
+ Register Reg = MI->getOperand(U).getReg();
if (!SuccBB->isLiveIn(Reg))
SuccBB->addLiveIn(Reg);
}
@@ -1069,7 +1067,7 @@ static bool hasRegisterDependency(MachineInstr *MI,
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
if (MO.isDef()) {
@@ -1094,6 +1092,14 @@ static bool hasRegisterDependency(MachineInstr *MI,
return HasRegDependency;
}
+static SmallSet<unsigned, 4> getRegUnits(unsigned Reg,
+ const TargetRegisterInfo *TRI) {
+ SmallSet<unsigned, 4> RegUnits;
+ for (auto RI = MCRegUnitIterator(Reg, TRI); RI.isValid(); ++RI)
+ RegUnits.insert(*RI);
+ return RegUnits;
+}
+
bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
MachineFunction &MF,
const TargetRegisterInfo *TRI,
@@ -1130,15 +1136,17 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
// for DBG_VALUEs later, record them when they're encountered.
if (MI->isDebugValue()) {
auto &MO = MI->getOperand(0);
- if (MO.isReg() && TRI->isPhysicalRegister(MO.getReg())) {
+ if (MO.isReg() && Register::isPhysicalRegister(MO.getReg())) {
// Bail if we can already tell the sink would be rejected, rather
// than needlessly accumulating lots of DBG_VALUEs.
if (hasRegisterDependency(MI, UsedOpsInCopy, DefedRegsInCopy,
ModifiedRegUnits, UsedRegUnits))
continue;
- // Record debug use of this register.
- SeenDbgInstrs[MO.getReg()].push_back(MI);
+ // Record debug use of each reg unit.
+ SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
+ for (unsigned Reg : Units)
+ SeenDbgInstrs[Reg].push_back(MI);
}
continue;
}
@@ -1177,15 +1185,22 @@ bool PostRAMachineSinking::tryToSinkCopy(MachineBasicBlock &CurBB,
assert((SuccBB->pred_size() == 1 && *SuccBB->pred_begin() == &CurBB) &&
"Unexpected predecessor");
- // Collect DBG_VALUEs that must sink with this copy.
+ // Collect DBG_VALUEs that must sink with this copy. We've previously
+ // recorded which reg units that DBG_VALUEs read, if this instruction
+ // writes any of those units then the corresponding DBG_VALUEs must sink.
+ SetVector<MachineInstr *> DbgValsToSinkSet;
SmallVector<MachineInstr *, 4> DbgValsToSink;
for (auto &MO : MI->operands()) {
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned reg = MO.getReg();
- for (auto *MI : SeenDbgInstrs.lookup(reg))
- DbgValsToSink.push_back(MI);
+
+ SmallSet<unsigned, 4> Units = getRegUnits(MO.getReg(), TRI);
+ for (unsigned Reg : Units)
+ for (auto *MI : SeenDbgInstrs.lookup(Reg))
+ DbgValsToSinkSet.insert(MI);
}
+ DbgValsToSink.insert(DbgValsToSink.begin(), DbgValsToSinkSet.begin(),
+ DbgValsToSinkSet.end());
// Clear the kill flag if SrcReg is killed between MI and the end of the
// block.
diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp
index f9505df4e7f4..66a3bc2f8cc4 100644
--- a/lib/CodeGen/MachineTraceMetrics.cpp
+++ b/lib/CodeGen/MachineTraceMetrics.cpp
@@ -634,7 +634,7 @@ struct DataDep {
/// Create a DataDep from an SSA form virtual register.
DataDep(const MachineRegisterInfo *MRI, unsigned VirtReg, unsigned UseOp)
: UseOp(UseOp) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg));
+ assert(Register::isVirtualRegister(VirtReg));
MachineRegisterInfo::def_iterator DefI = MRI->def_begin(VirtReg);
assert(!DefI.atEnd() && "Register has no defs");
DefMI = DefI->getParent();
@@ -660,10 +660,10 @@ static bool getDataDeps(const MachineInstr &UseMI,
const MachineOperand &MO = *I;
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
HasPhysRegs = true;
continue;
}
@@ -687,7 +687,7 @@ static void getPHIDeps(const MachineInstr &UseMI,
assert(UseMI.isPHI() && UseMI.getNumOperands() % 2 && "Bad PHI");
for (unsigned i = 1; i != UseMI.getNumOperands(); i += 2) {
if (UseMI.getOperand(i + 1).getMBB() == Pred) {
- unsigned Reg = UseMI.getOperand(i).getReg();
+ Register Reg = UseMI.getOperand(i).getReg();
Deps.push_back(DataDep(MRI, Reg, i));
return;
}
@@ -708,8 +708,8 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI,
const MachineOperand &MO = *MI;
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
// Track live defs and kills for updating RegUnits.
if (MO.isDef()) {
@@ -765,7 +765,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) {
assert(TBI.HasValidInstrHeights && "Missing height info");
unsigned MaxLen = 0;
for (const LiveInReg &LIR : TBI.LiveIns) {
- if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg))
+ if (!Register::isVirtualRegister(LIR.Reg))
continue;
const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg);
// Ignore dependencies outside the current trace.
@@ -902,8 +902,8 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
const MachineOperand &MO = *MOI;
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg))
continue;
if (MO.readsReg())
ReadOps.push_back(MI.getOperandNo(MOI));
@@ -930,7 +930,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr &MI, unsigned Height,
// Now we know the height of MI. Update any regunits read.
for (unsigned i = 0, e = ReadOps.size(); i != e; ++i) {
- unsigned Reg = MI.getOperand(ReadOps[i]).getReg();
+ Register Reg = MI.getOperand(ReadOps[i]).getReg();
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
LiveRegUnit &LRU = RegUnits[*Units];
// Set the height to the highest reader of the unit.
@@ -979,7 +979,7 @@ addLiveIns(const MachineInstr *DefMI, unsigned DefOp,
ArrayRef<const MachineBasicBlock*> Trace) {
assert(!Trace.empty() && "Trace should contain at least one block");
unsigned Reg = DefMI->getOperand(DefOp).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
const MachineBasicBlock *DefMBB = DefMI->getParent();
// Reg is live-in to all blocks in Trace that follow DefMBB.
@@ -1026,7 +1026,7 @@ computeInstrHeights(const MachineBasicBlock *MBB) {
if (MBB) {
TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()];
for (LiveInReg &LI : TBI.LiveIns) {
- if (TargetRegisterInfo::isVirtualRegister(LI.Reg)) {
+ if (Register::isVirtualRegister(LI.Reg)) {
// For virtual registers, the def latency is included.
unsigned &Height = Heights[MTM.MRI->getVRegDef(LI.Reg)];
if (Height < LI.Height)
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 0ad792ac62cf..969743edca52 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -22,7 +22,6 @@
// the verifier errors.
//===----------------------------------------------------------------------===//
-#include "LiveRangeCalc.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
@@ -37,6 +36,7 @@
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveStacks.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
@@ -122,7 +122,7 @@ namespace {
// Add Reg and any sub-registers to RV
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs)
RV.push_back(*SubRegs);
}
@@ -159,7 +159,7 @@ namespace {
// Add register to vregsPassed if it belongs there. Return true if
// anything changed.
bool addPassed(unsigned Reg) {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return false;
if (regsKilled.count(Reg) || regsLiveOut.count(Reg))
return false;
@@ -178,7 +178,7 @@ namespace {
// Add register to vregsRequired if it belongs there. Return true if
// anything changed.
bool addRequired(unsigned Reg) {
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ if (!Register::isVirtualRegister(Reg))
return false;
if (regsLiveOut.count(Reg))
return false;
@@ -552,7 +552,7 @@ void MachineVerifier::report_context_vreg(unsigned VReg) const {
}
void MachineVerifier::report_context_vreg_regunit(unsigned VRegOrUnit) const {
- if (TargetRegisterInfo::isVirtualRegister(VRegOrUnit)) {
+ if (Register::isVirtualRegister(VRegOrUnit)) {
report_context_vreg(VRegOrUnit);
} else {
errs() << "- regunit: " << printRegUnit(VRegOrUnit, TRI) << '\n';
@@ -797,7 +797,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
regsLive.clear();
if (MRI->tracksLiveness()) {
for (const auto &LI : MBB->liveins()) {
- if (!TargetRegisterInfo::isPhysicalRegister(LI.PhysReg)) {
+ if (!Register::isPhysicalRegister(LI.PhysReg)) {
report("MBB live-in list contains non-physical register", MBB);
continue;
}
@@ -957,7 +957,7 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
// Generic opcodes must not have physical register operands.
for (unsigned I = 0; I < MI->getNumOperands(); ++I) {
const MachineOperand *MO = &MI->getOperand(I);
- if (MO->isReg() && TargetRegisterInfo::isPhysicalRegister(MO->getReg()))
+ if (MO->isReg() && Register::isPhysicalRegister(MO->getReg()))
report("Generic instruction cannot have physical register", MO, I);
}
@@ -1368,7 +1368,108 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
break;
}
}
+ switch (IntrID) {
+ case Intrinsic::memcpy:
+ if (MI->getNumOperands() != 5)
+ report("Expected memcpy intrinsic to have 5 operands", MI);
+ break;
+ case Intrinsic::memmove:
+ if (MI->getNumOperands() != 5)
+ report("Expected memmove intrinsic to have 5 operands", MI);
+ break;
+ case Intrinsic::memset:
+ if (MI->getNumOperands() != 5)
+ report("Expected memset intrinsic to have 5 operands", MI);
+ break;
+ }
+ break;
+ }
+ case TargetOpcode::G_SEXT_INREG: {
+ if (!MI->getOperand(2).isImm()) {
+ report("G_SEXT_INREG expects an immediate operand #2", MI);
+ break;
+ }
+
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT SrcTy = MRI->getType(MI->getOperand(1).getReg());
+ verifyVectorElementMatch(DstTy, SrcTy, MI);
+
+ int64_t Imm = MI->getOperand(2).getImm();
+ if (Imm <= 0)
+ report("G_SEXT_INREG size must be >= 1", MI);
+ if (Imm >= SrcTy.getScalarSizeInBits())
+ report("G_SEXT_INREG size must be less than source bit width", MI);
+ break;
+ }
+ case TargetOpcode::G_SHUFFLE_VECTOR: {
+ const MachineOperand &MaskOp = MI->getOperand(3);
+ if (!MaskOp.isShuffleMask()) {
+ report("Incorrect mask operand type for G_SHUFFLE_VECTOR", MI);
+ break;
+ }
+
+ const Constant *Mask = MaskOp.getShuffleMask();
+ auto *MaskVT = dyn_cast<VectorType>(Mask->getType());
+ if (!MaskVT || !MaskVT->getElementType()->isIntegerTy(32)) {
+ report("Invalid shufflemask constant type", MI);
+ break;
+ }
+
+ if (!Mask->getAggregateElement(0u)) {
+ report("Invalid shufflemask constant type", MI);
+ break;
+ }
+ LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+ LLT Src0Ty = MRI->getType(MI->getOperand(1).getReg());
+ LLT Src1Ty = MRI->getType(MI->getOperand(2).getReg());
+
+ if (Src0Ty != Src1Ty)
+ report("Source operands must be the same type", MI);
+
+ if (Src0Ty.getScalarType() != DstTy.getScalarType())
+ report("G_SHUFFLE_VECTOR cannot change element type", MI);
+
+ // Don't check that all operands are vector because scalars are used in
+ // place of 1 element vectors.
+ int SrcNumElts = Src0Ty.isVector() ? Src0Ty.getNumElements() : 1;
+ int DstNumElts = DstTy.isVector() ? DstTy.getNumElements() : 1;
+
+ SmallVector<int, 32> MaskIdxes;
+ ShuffleVectorInst::getShuffleMask(Mask, MaskIdxes);
+
+ if (static_cast<int>(MaskIdxes.size()) != DstNumElts)
+ report("Wrong result type for shufflemask", MI);
+
+ for (int Idx : MaskIdxes) {
+ if (Idx < 0)
+ continue;
+
+ if (Idx >= 2 * SrcNumElts)
+ report("Out of bounds shuffle index", MI);
+ }
+
+ break;
+ }
+ case TargetOpcode::G_DYN_STACKALLOC: {
+ const MachineOperand &DstOp = MI->getOperand(0);
+ const MachineOperand &AllocOp = MI->getOperand(1);
+ const MachineOperand &AlignOp = MI->getOperand(2);
+
+ if (!DstOp.isReg() || !MRI->getType(DstOp.getReg()).isPointer()) {
+ report("dst operand 0 must be a pointer type", MI);
+ break;
+ }
+
+ if (!AllocOp.isReg() || !MRI->getType(AllocOp.getReg()).isScalar()) {
+ report("src operand 1 must be a scalar reg type", MI);
+ break;
+ }
+
+ if (!AlignOp.isImm()) {
+ report("src operand 2 must be an immediate type", MI);
+ break;
+ }
break;
}
default:
@@ -1525,11 +1626,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("Operand should be tied", MO, MONum);
else if (unsigned(TiedTo) != MI->findTiedOperandIdx(MONum))
report("Tied def doesn't match MCInstrDesc", MO, MONum);
- else if (TargetRegisterInfo::isPhysicalRegister(MO->getReg())) {
+ else if (Register::isPhysicalRegister(MO->getReg())) {
const MachineOperand &MOTied = MI->getOperand(TiedTo);
if (!MOTied.isReg())
report("Tied counterpart must be a register", &MOTied, TiedTo);
- else if (TargetRegisterInfo::isPhysicalRegister(MOTied.getReg()) &&
+ else if (Register::isPhysicalRegister(MOTied.getReg()) &&
MO->getReg() != MOTied.getReg())
report("Tied physical registers must match.", &MOTied, TiedTo);
}
@@ -1543,7 +1644,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
switch (MO->getType()) {
case MachineOperand::MO_Register: {
- const unsigned Reg = MO->getReg();
+ const Register Reg = MO->getReg();
if (!Reg)
return;
if (MRI->tracksLiveness() && !MI->isDebugValue())
@@ -1581,7 +1682,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Check register classes.
unsigned SubIdx = MO->getSubReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
if (SubIdx) {
report("Illegal subregister index for physical register", MO, MONum);
return;
@@ -1817,7 +1918,7 @@ void MachineVerifier::checkLivenessAtDef(const MachineOperand *MO,
if (MO->isDead()) {
LiveQueryResult LRQ = LR.Query(DefIdx);
if (!LRQ.isDeadDef()) {
- assert(TargetRegisterInfo::isVirtualRegister(VRegOrUnit) &&
+ assert(Register::isVirtualRegister(VRegOrUnit) &&
"Expecting a virtual register.");
// A dead subreg def only tells us that the specific subreg is dead. There
// could be other non-dead defs of other subregs, or we could have other
@@ -1845,8 +1946,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
addRegWithSubRegs(regsKilled, Reg);
// Check that LiveVars knows this kill.
- if (LiveVars && TargetRegisterInfo::isVirtualRegister(Reg) &&
- MO->isKill()) {
+ if (LiveVars && Register::isVirtualRegister(Reg) && MO->isKill()) {
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
if (!is_contained(VI.Kills, MI))
report("Kill missing from LiveVariables", MO, MONum);
@@ -1856,7 +1956,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (LiveInts && !LiveInts->isNotInMIMap(*MI)) {
SlotIndex UseIdx = LiveInts->getInstructionIndex(*MI);
// Check the cached regunit intervals.
- if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) {
+ if (Register::isPhysicalRegister(Reg) && !isReserved(Reg)) {
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) {
if (MRI->isReservedRegUnit(*Units))
continue;
@@ -1865,7 +1965,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
}
}
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (LiveInts->hasInterval(Reg)) {
// This is a virtual register interval.
const LiveInterval &LI = LiveInts->getInterval(Reg);
@@ -1900,7 +2000,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
// Use of a dead register.
if (!regsLive.count(Reg)) {
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
// Reserved registers may be used even when 'dead'.
bool Bad = !isReserved(Reg);
// We are fine if just any subregister has a defined value.
@@ -1922,7 +2022,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
if (!MOP.isReg() || !MOP.isImplicit())
continue;
- if (!TargetRegisterInfo::isPhysicalRegister(MOP.getReg()))
+ if (!Register::isPhysicalRegister(MOP.getReg()))
continue;
for (MCSubRegIterator SubRegs(MOP.getReg(), TRI); SubRegs.isValid();
@@ -1960,7 +2060,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
addRegWithSubRegs(regsDefined, Reg);
// Verify SSA form.
- if (MRI->isSSA() && TargetRegisterInfo::isVirtualRegister(Reg) &&
+ if (MRI->isSSA() && Register::isVirtualRegister(Reg) &&
std::next(MRI->def_begin(Reg)) != MRI->def_end())
report("Multiple virtual register defs in SSA form", MO, MONum);
@@ -1969,7 +2069,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) {
SlotIndex DefIdx = LiveInts->getInstructionIndex(*MI);
DefIdx = DefIdx.getRegSlot(MO->isEarlyClobber());
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (LiveInts->hasInterval(Reg)) {
const LiveInterval &LI = LiveInts->getInterval(Reg);
checkLivenessAtDef(MO, MONum, DefIdx, LI, Reg);
@@ -2007,7 +2107,7 @@ void MachineVerifier::visitMachineBundleAfter(const MachineInstr *MI) {
while (!regMasks.empty()) {
const uint32_t *Mask = regMasks.pop_back_val();
for (RegSet::iterator I = regsLive.begin(), E = regsLive.end(); I != E; ++I)
- if (TargetRegisterInfo::isPhysicalRegister(*I) &&
+ if (Register::isPhysicalRegister(*I) &&
MachineOperand::clobbersPhysReg(Mask, *I))
regsDead.push_back(*I);
}
@@ -2119,8 +2219,8 @@ void MachineVerifier::checkPHIOps(const MachineBasicBlock &MBB) {
if (MODef.isTied() || MODef.isImplicit() || MODef.isInternalRead() ||
MODef.isEarlyClobber() || MODef.isDebug())
report("Unexpected flag on PHI operand", &MODef, 0);
- unsigned DefReg = MODef.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DefReg))
+ Register DefReg = MODef.getReg();
+ if (!Register::isVirtualRegister(DefReg))
report("Expected first PHI operand to be a virtual register", &MODef, 0);
for (unsigned I = 1, E = Phi.getNumOperands(); I != E; I += 2) {
@@ -2212,7 +2312,7 @@ void MachineVerifier::visitMachineFunctionAfter() {
void MachineVerifier::verifyLiveVariables() {
assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
for (const auto &MBB : *MF) {
BBInfo &MInfo = MBBInfoMap[&MBB];
@@ -2238,7 +2338,7 @@ void MachineVerifier::verifyLiveVariables() {
void MachineVerifier::verifyLiveIntervals() {
assert(LiveInts && "Don't call verifyLiveIntervals without LiveInts");
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
// Spilling and splitting may leave unused registers around. Skip them.
if (MRI->reg_nodbg_empty(Reg))
@@ -2315,11 +2415,11 @@ void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR,
for (ConstMIBundleOperands MOI(*MI); MOI.isValid(); ++MOI) {
if (!MOI->isReg() || !MOI->isDef())
continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
if (MOI->getReg() != Reg)
continue;
} else {
- if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) ||
+ if (!Register::isPhysicalRegister(MOI->getReg()) ||
!TRI->hasRegUnit(MOI->getReg(), Reg))
continue;
}
@@ -2402,7 +2502,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
return;
// RegUnit intervals are allowed dead phis.
- if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() &&
+ if (!Register::isVirtualRegister(Reg) && VNI->isPHIDef() &&
S.start == VNI->def && S.end == VNI->def.getDeadSlot())
return;
@@ -2446,7 +2546,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
// The following checks only apply to virtual registers. Physreg liveness
// is too weird to check.
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
// A live segment can end with either a redefinition, a kill flag on a
// use, or a dead flag on a def.
bool hasRead = false;
@@ -2519,8 +2619,7 @@ void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR,
while (true) {
assert(LiveInts->isLiveInToMBB(LR, &*MFI));
// We don't know how to track physregs into a landing pad.
- if (!TargetRegisterInfo::isVirtualRegister(Reg) &&
- MFI->isEHPad()) {
+ if (!Register::isVirtualRegister(Reg) && MFI->isEHPad()) {
if (&*MFI == EndMBB)
break;
++MFI;
@@ -2580,7 +2679,7 @@ void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg,
void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) {
unsigned Reg = LI.reg;
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
verifyLiveRange(LI, Reg);
LaneBitmask Mask;
diff --git a/lib/CodeGen/MacroFusion.cpp b/lib/CodeGen/MacroFusion.cpp
index 2db1e86905a4..d21eae222af0 100644
--- a/lib/CodeGen/MacroFusion.cpp
+++ b/lib/CodeGen/MacroFusion.cpp
@@ -176,7 +176,7 @@ std::unique_ptr<ScheduleDAGMutation>
llvm::createMacroFusionDAGMutation(
ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
- return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
+ return std::make_unique<MacroFusion>(shouldScheduleAdjacent, true);
return nullptr;
}
@@ -184,6 +184,6 @@ std::unique_ptr<ScheduleDAGMutation>
llvm::createBranchMacroFusionDAGMutation(
ShouldSchedulePredTy shouldScheduleAdjacent) {
if(EnableMacroFusion)
- return llvm::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
+ return std::make_unique<MacroFusion>(shouldScheduleAdjacent, false);
return nullptr;
}
diff --git a/lib/CodeGen/ModuloSchedule.cpp b/lib/CodeGen/ModuloSchedule.cpp
new file mode 100644
index 000000000000..7ce3c5861801
--- /dev/null
+++ b/lib/CodeGen/ModuloSchedule.cpp
@@ -0,0 +1,2022 @@
+//===- ModuloSchedule.cpp - Software pipeline schedule expansion ----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/ModuloSchedule.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineLoopUtils.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "pipeliner"
+using namespace llvm;
+
+void ModuloSchedule::print(raw_ostream &OS) {
+ for (MachineInstr *MI : ScheduledInstrs)
+ OS << "[stage " << getStage(MI) << " @" << getCycle(MI) << "c] " << *MI;
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloScheduleExpander implementation
+//===----------------------------------------------------------------------===//
+
+/// Return the register values for the operands of a Phi instruction.
+/// This function assume the instruction is a Phi.
+static void getPhiRegs(MachineInstr &Phi, MachineBasicBlock *Loop,
+ unsigned &InitVal, unsigned &LoopVal) {
+ assert(Phi.isPHI() && "Expecting a Phi.");
+
+ InitVal = 0;
+ LoopVal = 0;
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() != Loop)
+ InitVal = Phi.getOperand(i).getReg();
+ else
+ LoopVal = Phi.getOperand(i).getReg();
+
+ assert(InitVal != 0 && LoopVal != 0 && "Unexpected Phi structure.");
+}
+
+/// Return the Phi register value that comes from the incoming block.
+static unsigned getInitPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() != LoopBB)
+ return Phi.getOperand(i).getReg();
+ return 0;
+}
+
+/// Return the Phi register value that comes the loop block.
+static unsigned getLoopPhiReg(MachineInstr &Phi, MachineBasicBlock *LoopBB) {
+ for (unsigned i = 1, e = Phi.getNumOperands(); i != e; i += 2)
+ if (Phi.getOperand(i + 1).getMBB() == LoopBB)
+ return Phi.getOperand(i).getReg();
+ return 0;
+}
+
+void ModuloScheduleExpander::expand() {
+ BB = Schedule.getLoop()->getTopBlock();
+ Preheader = *BB->pred_begin();
+ if (Preheader == BB)
+ Preheader = *std::next(BB->pred_begin());
+
+ // Iterate over the definitions in each instruction, and compute the
+ // stage difference for each use. Keep the maximum value.
+ for (MachineInstr *MI : Schedule.getInstructions()) {
+ int DefStage = Schedule.getStage(MI);
+ for (unsigned i = 0, e = MI->getNumOperands(); i < e; ++i) {
+ MachineOperand &Op = MI->getOperand(i);
+ if (!Op.isReg() || !Op.isDef())
+ continue;
+
+ Register Reg = Op.getReg();
+ unsigned MaxDiff = 0;
+ bool PhiIsSwapped = false;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(Reg),
+ EI = MRI.use_end();
+ UI != EI; ++UI) {
+ MachineOperand &UseOp = *UI;
+ MachineInstr *UseMI = UseOp.getParent();
+ int UseStage = Schedule.getStage(UseMI);
+ unsigned Diff = 0;
+ if (UseStage != -1 && UseStage >= DefStage)
+ Diff = UseStage - DefStage;
+ if (MI->isPHI()) {
+ if (isLoopCarried(*MI))
+ ++Diff;
+ else
+ PhiIsSwapped = true;
+ }
+ MaxDiff = std::max(Diff, MaxDiff);
+ }
+ RegToStageDiff[Reg] = std::make_pair(MaxDiff, PhiIsSwapped);
+ }
+ }
+
+ generatePipelinedLoop();
+}
+
+void ModuloScheduleExpander::generatePipelinedLoop() {
+ LoopInfo = TII->analyzeLoopForPipelining(BB);
+ assert(LoopInfo && "Must be able to analyze loop!");
+
+ // Create a new basic block for the kernel and add it to the CFG.
+ MachineBasicBlock *KernelBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+
+ unsigned MaxStageCount = Schedule.getNumStages() - 1;
+
+ // Remember the registers that are used in different stages. The index is
+ // the iteration, or stage, that the instruction is scheduled in. This is
+ // a map between register names in the original block and the names created
+ // in each stage of the pipelined loop.
+ ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
+ InstrMapTy InstrMap;
+
+ SmallVector<MachineBasicBlock *, 4> PrologBBs;
+
+ // Generate the prolog instructions that set up the pipeline.
+ generateProlog(MaxStageCount, KernelBB, VRMap, PrologBBs);
+ MF.insert(BB->getIterator(), KernelBB);
+
+ // Rearrange the instructions to generate the new, pipelined loop,
+ // and update register names as needed.
+ for (MachineInstr *CI : Schedule.getInstructions()) {
+ if (CI->isPHI())
+ continue;
+ unsigned StageNum = Schedule.getStage(CI);
+ MachineInstr *NewMI = cloneInstr(CI, MaxStageCount, StageNum);
+ updateInstruction(NewMI, false, MaxStageCount, StageNum, VRMap);
+ KernelBB->push_back(NewMI);
+ InstrMap[NewMI] = CI;
+ }
+
+ // Copy any terminator instructions to the new kernel, and update
+ // names as needed.
+ for (MachineBasicBlock::iterator I = BB->getFirstTerminator(),
+ E = BB->instr_end();
+ I != E; ++I) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(&*I);
+ updateInstruction(NewMI, false, MaxStageCount, 0, VRMap);
+ KernelBB->push_back(NewMI);
+ InstrMap[NewMI] = &*I;
+ }
+
+ NewKernel = KernelBB;
+ KernelBB->transferSuccessors(BB);
+ KernelBB->replaceSuccessor(BB, KernelBB);
+
+ generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
+ InstrMap, MaxStageCount, MaxStageCount, false);
+ generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
+ MaxStageCount, MaxStageCount, false);
+
+ LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
+
+ SmallVector<MachineBasicBlock *, 4> EpilogBBs;
+ // Generate the epilog instructions to complete the pipeline.
+ generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
+
+ // We need this step because the register allocation doesn't handle some
+ // situations well, so we insert copies to help out.
+ splitLifetimes(KernelBB, EpilogBBs);
+
+ // Remove dead instructions due to loop induction variables.
+ removeDeadInstructions(KernelBB, EpilogBBs);
+
+ // Add branches between prolog and epilog blocks.
+ addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
+
+ delete[] VRMap;
+}
+
+void ModuloScheduleExpander::cleanup() {
+ // Remove the original loop since it's no longer referenced.
+ for (auto &I : *BB)
+ LIS.RemoveMachineInstrFromMaps(I);
+ BB->clear();
+ BB->eraseFromParent();
+}
+
+/// Generate the pipeline prolog code.
+void ModuloScheduleExpander::generateProlog(unsigned LastStage,
+ MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap,
+ MBBVectorTy &PrologBBs) {
+ MachineBasicBlock *PredBB = Preheader;
+ InstrMapTy InstrMap;
+
+ // Generate a basic block for each stage, not including the last stage,
+ // which will be generated in the kernel. Each basic block may contain
+ // instructions from multiple stages/iterations.
+ for (unsigned i = 0; i < LastStage; ++i) {
+ // Create and insert the prolog basic block prior to the original loop
+ // basic block. The original loop is removed later.
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+ PrologBBs.push_back(NewBB);
+ MF.insert(BB->getIterator(), NewBB);
+ NewBB->transferSuccessors(PredBB);
+ PredBB->addSuccessor(NewBB);
+ PredBB = NewBB;
+
+ // Generate instructions for each appropriate stage. Process instructions
+ // in original program order.
+ for (int StageNum = i; StageNum >= 0; --StageNum) {
+ for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+ BBE = BB->getFirstTerminator();
+ BBI != BBE; ++BBI) {
+ if (Schedule.getStage(&*BBI) == StageNum) {
+ if (BBI->isPHI())
+ continue;
+ MachineInstr *NewMI =
+ cloneAndChangeInstr(&*BBI, i, (unsigned)StageNum);
+ updateInstruction(NewMI, false, i, (unsigned)StageNum, VRMap);
+ NewBB->push_back(NewMI);
+ InstrMap[NewMI] = &*BBI;
+ }
+ }
+ }
+ rewritePhiValues(NewBB, i, VRMap, InstrMap);
+ LLVM_DEBUG({
+ dbgs() << "prolog:\n";
+ NewBB->dump();
+ });
+ }
+
+ PredBB->replaceSuccessor(BB, KernelBB);
+
+ // Check if we need to remove the branch from the preheader to the original
+ // loop, and replace it with a branch to the new loop.
+ unsigned numBranches = TII->removeBranch(*Preheader);
+ if (numBranches) {
+ SmallVector<MachineOperand, 0> Cond;
+ TII->insertBranch(*Preheader, PrologBBs[0], nullptr, Cond, DebugLoc());
+ }
+}
+
+/// Generate the pipeline epilog code. The epilog code finishes the iterations
+/// that were started in either the prolog or the kernel. We create a basic
+/// block for each stage that needs to complete.
+void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
+ MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap,
+ MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs) {
+ // We need to change the branch from the kernel to the first epilog block, so
+ // this call to analyze branch uses the kernel rather than the original BB.
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool checkBranch = TII->analyzeBranch(*KernelBB, TBB, FBB, Cond);
+ assert(!checkBranch && "generateEpilog must be able to analyze the branch");
+ if (checkBranch)
+ return;
+
+ MachineBasicBlock::succ_iterator LoopExitI = KernelBB->succ_begin();
+ if (*LoopExitI == KernelBB)
+ ++LoopExitI;
+ assert(LoopExitI != KernelBB->succ_end() && "Expecting a successor");
+ MachineBasicBlock *LoopExitBB = *LoopExitI;
+
+ MachineBasicBlock *PredBB = KernelBB;
+ MachineBasicBlock *EpilogStart = LoopExitBB;
+ InstrMapTy InstrMap;
+
+ // Generate a basic block for each stage, not including the last stage,
+ // which was generated for the kernel. Each basic block may contain
+ // instructions from multiple stages/iterations.
+ int EpilogStage = LastStage + 1;
+ for (unsigned i = LastStage; i >= 1; --i, ++EpilogStage) {
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock();
+ EpilogBBs.push_back(NewBB);
+ MF.insert(BB->getIterator(), NewBB);
+
+ PredBB->replaceSuccessor(LoopExitBB, NewBB);
+ NewBB->addSuccessor(LoopExitBB);
+
+ if (EpilogStart == LoopExitBB)
+ EpilogStart = NewBB;
+
+ // Add instructions to the epilog depending on the current block.
+ // Process instructions in original program order.
+ for (unsigned StageNum = i; StageNum <= LastStage; ++StageNum) {
+ for (auto &BBI : *BB) {
+ if (BBI.isPHI())
+ continue;
+ MachineInstr *In = &BBI;
+ if ((unsigned)Schedule.getStage(In) == StageNum) {
+ // Instructions with memoperands in the epilog are updated with
+ // conservative values.
+ MachineInstr *NewMI = cloneInstr(In, UINT_MAX, 0);
+ updateInstruction(NewMI, i == 1, EpilogStage, 0, VRMap);
+ NewBB->push_back(NewMI);
+ InstrMap[NewMI] = In;
+ }
+ }
+ }
+ generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
+ InstrMap, LastStage, EpilogStage, i == 1);
+ generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
+ LastStage, EpilogStage, i == 1);
+ PredBB = NewBB;
+
+ LLVM_DEBUG({
+ dbgs() << "epilog:\n";
+ NewBB->dump();
+ });
+ }
+
+ // Fix any Phi nodes in the loop exit block.
+ LoopExitBB->replacePhiUsesWith(BB, PredBB);
+
+ // Create a branch to the new epilog from the kernel.
+ // Remove the original branch and add a new branch to the epilog.
+ TII->removeBranch(*KernelBB);
+ TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+ // Add a branch to the loop exit.
+ if (EpilogBBs.size() > 0) {
+ MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
+ SmallVector<MachineOperand, 4> Cond1;
+ TII->insertBranch(*LastEpilogBB, LoopExitBB, nullptr, Cond1, DebugLoc());
+ }
+}
+
+/// Replace all uses of FromReg that appear outside the specified
+/// basic block with ToReg.
+static void replaceRegUsesAfterLoop(unsigned FromReg, unsigned ToReg,
+ MachineBasicBlock *MBB,
+ MachineRegisterInfo &MRI,
+ LiveIntervals &LIS) {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(FromReg),
+ E = MRI.use_end();
+ I != E;) {
+ MachineOperand &O = *I;
+ ++I;
+ if (O.getParent()->getParent() != MBB)
+ O.setReg(ToReg);
+ }
+ if (!LIS.hasInterval(ToReg))
+ LIS.createEmptyInterval(ToReg);
+}
+
+/// Return true if the register has a use that occurs outside the
+/// specified loop.
+static bool hasUseAfterLoop(unsigned Reg, MachineBasicBlock *BB,
+ MachineRegisterInfo &MRI) {
+ for (MachineRegisterInfo::use_iterator I = MRI.use_begin(Reg),
+ E = MRI.use_end();
+ I != E; ++I)
+ if (I->getParent()->getParent() != BB)
+ return true;
+ return false;
+}
+
+/// Generate Phis for the specific block in the generated pipelined code.
+/// This function looks at the Phis from the original code to guide the
+/// creation of new Phis.
+void ModuloScheduleExpander::generateExistingPhis(
+ MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
+ // Compute the stage number for the initial value of the Phi, which
+ // comes from the prolog. The prolog to use depends on to which kernel/
+ // epilog that we're adding the Phi.
+ unsigned PrologStage = 0;
+ unsigned PrevStage = 0;
+ bool InKernel = (LastStageNum == CurStageNum);
+ if (InKernel) {
+ PrologStage = LastStageNum - 1;
+ PrevStage = CurStageNum;
+ } else {
+ PrologStage = LastStageNum - (CurStageNum - LastStageNum);
+ PrevStage = LastStageNum + (CurStageNum - LastStageNum) - 1;
+ }
+
+ for (MachineBasicBlock::iterator BBI = BB->instr_begin(),
+ BBE = BB->getFirstNonPHI();
+ BBI != BBE; ++BBI) {
+ Register Def = BBI->getOperand(0).getReg();
+
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(*BBI, BB, InitVal, LoopVal);
+
+ unsigned PhiOp1 = 0;
+ // The Phi value from the loop body typically is defined in the loop, but
+ // not always. So, we need to check if the value is defined in the loop.
+ unsigned PhiOp2 = LoopVal;
+ if (VRMap[LastStageNum].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum][LoopVal];
+
+ int StageScheduled = Schedule.getStage(&*BBI);
+ int LoopValStage = Schedule.getStage(MRI.getVRegDef(LoopVal));
+ unsigned NumStages = getStagesForReg(Def, CurStageNum);
+ if (NumStages == 0) {
+ // We don't need to generate a Phi anymore, but we need to rename any uses
+ // of the Phi value.
+ unsigned NewReg = VRMap[PrevStage][LoopVal];
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, 0, &*BBI, Def,
+ InitVal, NewReg);
+ if (VRMap[CurStageNum].count(LoopVal))
+ VRMap[CurStageNum][Def] = VRMap[CurStageNum][LoopVal];
+ }
+ // Adjust the number of Phis needed depending on the number of prologs left,
+ // and the distance from where the Phi is first scheduled. The number of
+ // Phis cannot exceed the number of prolog stages. Each stage can
+ // potentially define two values.
+ unsigned MaxPhis = PrologStage + 2;
+ if (!InKernel && (int)PrologStage <= LoopValStage)
+ MaxPhis = std::max((int)MaxPhis - (int)LoopValStage, 1);
+ unsigned NumPhis = std::min(NumStages, MaxPhis);
+
+ unsigned NewReg = 0;
+ unsigned AccessStage = (LoopValStage != -1) ? LoopValStage : StageScheduled;
+ // In the epilog, we may need to look back one stage to get the correct
+ // Phi name because the epilog and prolog blocks execute the same stage.
+ // The correct name is from the previous block only when the Phi has
+ // been completely scheduled prior to the epilog, and Phi value is not
+ // needed in multiple stages.
+ int StageDiff = 0;
+ if (!InKernel && StageScheduled >= LoopValStage && AccessStage == 0 &&
+ NumPhis == 1)
+ StageDiff = 1;
+ // Adjust the computations below when the phi and the loop definition
+ // are scheduled in different stages.
+ if (InKernel && LoopValStage != -1 && StageScheduled > LoopValStage)
+ StageDiff = StageScheduled - LoopValStage;
+ for (unsigned np = 0; np < NumPhis; ++np) {
+ // If the Phi hasn't been scheduled, then use the initial Phi operand
+ // value. Otherwise, use the scheduled version of the instruction. This
+ // is a little complicated when a Phi references another Phi.
+ if (np > PrologStage || StageScheduled >= (int)LastStageNum)
+ PhiOp1 = InitVal;
+ // Check if the Phi has already been scheduled in a prolog stage.
+ else if (PrologStage >= AccessStage + StageDiff + np &&
+ VRMap[PrologStage - StageDiff - np].count(LoopVal) != 0)
+ PhiOp1 = VRMap[PrologStage - StageDiff - np][LoopVal];
+ // Check if the Phi has already been scheduled, but the loop instruction
+ // is either another Phi, or doesn't occur in the loop.
+ else if (PrologStage >= AccessStage + StageDiff + np) {
+ // If the Phi references another Phi, we need to examine the other
+ // Phi to get the correct value.
+ PhiOp1 = LoopVal;
+ MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1);
+ int Indirects = 1;
+ while (InstOp1 && InstOp1->isPHI() && InstOp1->getParent() == BB) {
+ int PhiStage = Schedule.getStage(InstOp1);
+ if ((int)(PrologStage - StageDiff - np) < PhiStage + Indirects)
+ PhiOp1 = getInitPhiReg(*InstOp1, BB);
+ else
+ PhiOp1 = getLoopPhiReg(*InstOp1, BB);
+ InstOp1 = MRI.getVRegDef(PhiOp1);
+ int PhiOpStage = Schedule.getStage(InstOp1);
+ int StageAdj = (PhiOpStage != -1 ? PhiStage - PhiOpStage : 0);
+ if (PhiOpStage != -1 && PrologStage - StageAdj >= Indirects + np &&
+ VRMap[PrologStage - StageAdj - Indirects - np].count(PhiOp1)) {
+ PhiOp1 = VRMap[PrologStage - StageAdj - Indirects - np][PhiOp1];
+ break;
+ }
+ ++Indirects;
+ }
+ } else
+ PhiOp1 = InitVal;
+ // If this references a generated Phi in the kernel, get the Phi operand
+ // from the incoming block.
+ if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1))
+ if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+
+ MachineInstr *PhiInst = MRI.getVRegDef(LoopVal);
+ bool LoopDefIsPhi = PhiInst && PhiInst->isPHI();
+ // In the epilog, a map lookup is needed to get the value from the kernel,
+ // or previous epilog block. How is does this depends on if the
+ // instruction is scheduled in the previous block.
+ if (!InKernel) {
+ int StageDiffAdj = 0;
+ if (LoopValStage != -1 && StageScheduled > LoopValStage)
+ StageDiffAdj = StageScheduled - LoopValStage;
+ // Use the loop value defined in the kernel, unless the kernel
+ // contains the last definition of the Phi.
+ if (np == 0 && PrevStage == LastStageNum &&
+ (StageScheduled != 0 || LoopValStage != 0) &&
+ VRMap[PrevStage - StageDiffAdj].count(LoopVal))
+ PhiOp2 = VRMap[PrevStage - StageDiffAdj][LoopVal];
+ // Use the value defined by the Phi. We add one because we switch
+ // from looking at the loop value to the Phi definition.
+ else if (np > 0 && PrevStage == LastStageNum &&
+ VRMap[PrevStage - np + 1].count(Def))
+ PhiOp2 = VRMap[PrevStage - np + 1][Def];
+ // Use the loop value defined in the kernel.
+ else if (static_cast<unsigned>(LoopValStage) > PrologStage + 1 &&
+ VRMap[PrevStage - StageDiffAdj - np].count(LoopVal))
+ PhiOp2 = VRMap[PrevStage - StageDiffAdj - np][LoopVal];
+ // Use the value defined by the Phi, unless we're generating the first
+ // epilog and the Phi refers to a Phi in a different stage.
+ else if (VRMap[PrevStage - np].count(Def) &&
+ (!LoopDefIsPhi || (PrevStage != LastStageNum) ||
+ (LoopValStage == StageScheduled)))
+ PhiOp2 = VRMap[PrevStage - np][Def];
+ }
+
+ // Check if we can reuse an existing Phi. This occurs when a Phi
+ // references another Phi, and the other Phi is scheduled in an
+ // earlier stage. We can try to reuse an existing Phi up until the last
+ // stage of the current Phi.
+ if (LoopDefIsPhi) {
+ if (static_cast<int>(PrologStage - np) >= StageScheduled) {
+ int LVNumStages = getStagesForPhi(LoopVal);
+ int StageDiff = (StageScheduled - LoopValStage);
+ LVNumStages -= StageDiff;
+ // Make sure the loop value Phi has been processed already.
+ if (LVNumStages > (int)np && VRMap[CurStageNum].count(LoopVal)) {
+ NewReg = PhiOp2;
+ unsigned ReuseStage = CurStageNum;
+ if (isLoopCarried(*PhiInst))
+ ReuseStage -= LVNumStages;
+ // Check if the Phi to reuse has been generated yet. If not, then
+ // there is nothing to reuse.
+ if (VRMap[ReuseStage - np].count(LoopVal)) {
+ NewReg = VRMap[ReuseStage - np][LoopVal];
+
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI,
+ Def, NewReg);
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ PhiOp2 = NewReg;
+ if (VRMap[LastStageNum - np - 1].count(LoopVal))
+ PhiOp2 = VRMap[LastStageNum - np - 1][LoopVal];
+
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ continue;
+ }
+ }
+ }
+ if (InKernel && StageDiff > 0 &&
+ VRMap[CurStageNum - StageDiff - np].count(LoopVal))
+ PhiOp2 = VRMap[CurStageNum - StageDiff - np][LoopVal];
+ }
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def);
+ NewReg = MRI.createVirtualRegister(RC);
+
+ MachineInstrBuilder NewPhi =
+ BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ NewPhi.addReg(PhiOp1).addMBB(BB1);
+ NewPhi.addReg(PhiOp2).addMBB(BB2);
+ if (np == 0)
+ InstrMap[NewPhi] = &*BBI;
+
+ // We define the Phis after creating the new pipelined code, so
+ // we need to rename the Phi values in scheduled instructions.
+
+ unsigned PrevReg = 0;
+ if (InKernel && VRMap[PrevStage - np].count(LoopVal))
+ PrevReg = VRMap[PrevStage - np][LoopVal];
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
+ NewReg, PrevReg);
+ // If the Phi has been scheduled, use the new name for rewriting.
+ if (VRMap[CurStageNum - np].count(Def)) {
+ unsigned R = VRMap[CurStageNum - np][Def];
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, R,
+ NewReg);
+ }
+
+ // Check if we need to rename any uses that occurs after the loop. The
+ // register to replace depends on whether the Phi is scheduled in the
+ // epilog.
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+
+ // In the kernel, a dependent Phi uses the value from this Phi.
+ if (InKernel)
+ PhiOp2 = NewReg;
+
+ // Update the map with the new Phi name.
+ VRMap[CurStageNum - np][Def] = NewReg;
+ }
+
+ while (NumPhis++ < NumStages) {
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, NumPhis, &*BBI, Def,
+ NewReg, 0);
+ }
+
+ // Check if we need to rename a Phi that has been eliminated due to
+ // scheduling.
+ if (NumStages == 0 && IsLast && VRMap[CurStageNum].count(LoopVal))
+ replaceRegUsesAfterLoop(Def, VRMap[CurStageNum][LoopVal], BB, MRI, LIS);
+ }
+}
+
+/// Generate Phis for the specified block in the generated pipelined code.
+/// These are new Phis needed because the definition is scheduled after the
+/// use in the pipelined sequence.
+void ModuloScheduleExpander::generatePhis(
+ MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
+ unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
+ // Compute the stage number that contains the initial Phi value, and
+ // the Phi from the previous stage.
+ unsigned PrologStage = 0;
+ unsigned PrevStage = 0;
+ unsigned StageDiff = CurStageNum - LastStageNum;
+ bool InKernel = (StageDiff == 0);
+ if (InKernel) {
+ PrologStage = LastStageNum - 1;
+ PrevStage = CurStageNum;
+ } else {
+ PrologStage = LastStageNum - StageDiff;
+ PrevStage = LastStageNum + StageDiff - 1;
+ }
+
+ for (MachineBasicBlock::iterator BBI = BB->getFirstNonPHI(),
+ BBE = BB->instr_end();
+ BBI != BBE; ++BBI) {
+ for (unsigned i = 0, e = BBI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = BBI->getOperand(i);
+ if (!MO.isReg() || !MO.isDef() ||
+ !Register::isVirtualRegister(MO.getReg()))
+ continue;
+
+ int StageScheduled = Schedule.getStage(&*BBI);
+ assert(StageScheduled != -1 && "Expecting scheduled instruction.");
+ Register Def = MO.getReg();
+ unsigned NumPhis = getStagesForReg(Def, CurStageNum);
+ // An instruction scheduled in stage 0 and is used after the loop
+ // requires a phi in the epilog for the last definition from either
+ // the kernel or prolog.
+ if (!InKernel && NumPhis == 0 && StageScheduled == 0 &&
+ hasUseAfterLoop(Def, BB, MRI))
+ NumPhis = 1;
+ if (!InKernel && (unsigned)StageScheduled > PrologStage)
+ continue;
+
+ unsigned PhiOp2 = VRMap[PrevStage][Def];
+ if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
+ if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
+ PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+ // The number of Phis can't exceed the number of prolog stages. The
+ // prolog stage number is zero based.
+ if (NumPhis > PrologStage + 1 - StageScheduled)
+ NumPhis = PrologStage + 1 - StageScheduled;
+ for (unsigned np = 0; np < NumPhis; ++np) {
+ unsigned PhiOp1 = VRMap[PrologStage][Def];
+ if (np <= PrologStage)
+ PhiOp1 = VRMap[PrologStage - np][Def];
+ if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
+ if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
+ if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
+ PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
+ }
+ if (!InKernel)
+ PhiOp2 = VRMap[PrevStage - np][Def];
+
+ const TargetRegisterClass *RC = MRI.getRegClass(Def);
+ Register NewReg = MRI.createVirtualRegister(RC);
+
+ MachineInstrBuilder NewPhi =
+ BuildMI(*NewBB, NewBB->getFirstNonPHI(), DebugLoc(),
+ TII->get(TargetOpcode::PHI), NewReg);
+ NewPhi.addReg(PhiOp1).addMBB(BB1);
+ NewPhi.addReg(PhiOp2).addMBB(BB2);
+ if (np == 0)
+ InstrMap[NewPhi] = &*BBI;
+
+ // Rewrite uses and update the map. The actions depend upon whether
+ // we generating code for the kernel or epilog blocks.
+ if (InKernel) {
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp1,
+ NewReg);
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, PhiOp2,
+ NewReg);
+
+ PhiOp2 = NewReg;
+ VRMap[PrevStage - np - 1][Def] = NewReg;
+ } else {
+ VRMap[CurStageNum - np][Def] = NewReg;
+ if (np == NumPhis - 1)
+ rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
+ NewReg);
+ }
+ if (IsLast && np == NumPhis - 1)
+ replaceRegUsesAfterLoop(Def, NewReg, BB, MRI, LIS);
+ }
+ }
+ }
+}
+
+/// Remove instructions that generate values with no uses.
+/// Typically, these are induction variable operations that generate values
+/// used in the loop itself. A dead instruction has a definition with
+/// no uses, or uses that occur in the original loop only.
+void ModuloScheduleExpander::removeDeadInstructions(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs) {
+ // For each epilog block, check that the value defined by each instruction
+ // is used. If not, delete it.
+ for (MBBVectorTy::reverse_iterator MBB = EpilogBBs.rbegin(),
+ MBE = EpilogBBs.rend();
+ MBB != MBE; ++MBB)
+ for (MachineBasicBlock::reverse_instr_iterator MI = (*MBB)->instr_rbegin(),
+ ME = (*MBB)->instr_rend();
+ MI != ME;) {
+ // From DeadMachineInstructionElem. Don't delete inline assembly.
+ if (MI->isInlineAsm()) {
+ ++MI;
+ continue;
+ }
+ bool SawStore = false;
+ // Check if it's safe to remove the instruction due to side effects.
+ // We can, and want to, remove Phis here.
+ if (!MI->isSafeToMove(nullptr, SawStore) && !MI->isPHI()) {
+ ++MI;
+ continue;
+ }
+ bool used = true;
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end();
+ MOI != MOE; ++MOI) {
+ if (!MOI->isReg() || !MOI->isDef())
+ continue;
+ Register reg = MOI->getReg();
+ // Assume physical registers are used, unless they are marked dead.
+ if (Register::isPhysicalRegister(reg)) {
+ used = !MOI->isDead();
+ if (used)
+ break;
+ continue;
+ }
+ unsigned realUses = 0;
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(reg),
+ EI = MRI.use_end();
+ UI != EI; ++UI) {
+ // Check if there are any uses that occur only in the original
+ // loop. If so, that's not a real use.
+ if (UI->getParent()->getParent() != BB) {
+ realUses++;
+ used = true;
+ break;
+ }
+ }
+ if (realUses > 0)
+ break;
+ used = false;
+ }
+ if (!used) {
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI++->eraseFromParent();
+ continue;
+ }
+ ++MI;
+ }
+ // In the kernel block, check if we can remove a Phi that generates a value
+ // used in an instruction removed in the epilog block.
+ for (MachineBasicBlock::iterator BBI = KernelBB->instr_begin(),
+ BBE = KernelBB->getFirstNonPHI();
+ BBI != BBE;) {
+ MachineInstr *MI = &*BBI;
+ ++BBI;
+ Register reg = MI->getOperand(0).getReg();
+ if (MRI.use_begin(reg) == MRI.use_end()) {
+ LIS.RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ }
+ }
+}
+
+/// For loop carried definitions, we split the lifetime of a virtual register
+/// that has uses past the definition in the next iteration. A copy with a new
+/// virtual register is inserted before the definition, which helps with
+/// generating a better register assignment.
+///
+/// v1 = phi(a, v2) v1 = phi(a, v2)
+/// v2 = phi(b, v3) v2 = phi(b, v3)
+/// v3 = .. v4 = copy v1
+/// .. = V1 v3 = ..
+/// .. = v4
+void ModuloScheduleExpander::splitLifetimes(MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ for (auto &PHI : KernelBB->phis()) {
+ Register Def = PHI.getOperand(0).getReg();
+ // Check for any Phi definition that used as an operand of another Phi
+ // in the same block.
+ for (MachineRegisterInfo::use_instr_iterator I = MRI.use_instr_begin(Def),
+ E = MRI.use_instr_end();
+ I != E; ++I) {
+ if (I->isPHI() && I->getParent() == KernelBB) {
+ // Get the loop carried definition.
+ unsigned LCDef = getLoopPhiReg(PHI, KernelBB);
+ if (!LCDef)
+ continue;
+ MachineInstr *MI = MRI.getVRegDef(LCDef);
+ if (!MI || MI->getParent() != KernelBB || MI->isPHI())
+ continue;
+ // Search through the rest of the block looking for uses of the Phi
+ // definition. If one occurs, then split the lifetime.
+ unsigned SplitReg = 0;
+ for (auto &BBJ : make_range(MachineBasicBlock::instr_iterator(MI),
+ KernelBB->instr_end()))
+ if (BBJ.readsRegister(Def)) {
+ // We split the lifetime when we find the first use.
+ if (SplitReg == 0) {
+ SplitReg = MRI.createVirtualRegister(MRI.getRegClass(Def));
+ BuildMI(*KernelBB, MI, MI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), SplitReg)
+ .addReg(Def);
+ }
+ BBJ.substituteRegister(Def, SplitReg, 0, *TRI);
+ }
+ if (!SplitReg)
+ continue;
+ // Search through each of the epilog blocks for any uses to be renamed.
+ for (auto &Epilog : EpilogBBs)
+ for (auto &I : *Epilog)
+ if (I.readsRegister(Def))
+ I.substituteRegister(Def, SplitReg, 0, *TRI);
+ break;
+ }
+ }
+ }
+}
+
+/// Remove the incoming block from the Phis in a basic block.
+static void removePhis(MachineBasicBlock *BB, MachineBasicBlock *Incoming) {
+ for (MachineInstr &MI : *BB) {
+ if (!MI.isPHI())
+ break;
+ for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2)
+ if (MI.getOperand(i + 1).getMBB() == Incoming) {
+ MI.RemoveOperand(i + 1);
+ MI.RemoveOperand(i);
+ break;
+ }
+ }
+}
+
+/// Create branches from each prolog basic block to the appropriate epilog
+/// block. These edges are needed if the loop ends before reaching the
+/// kernel.
+void ModuloScheduleExpander::addBranches(MachineBasicBlock &PreheaderBB,
+ MBBVectorTy &PrologBBs,
+ MachineBasicBlock *KernelBB,
+ MBBVectorTy &EpilogBBs,
+ ValueMapTy *VRMap) {
+ assert(PrologBBs.size() == EpilogBBs.size() && "Prolog/Epilog mismatch");
+ MachineBasicBlock *LastPro = KernelBB;
+ MachineBasicBlock *LastEpi = KernelBB;
+
+ // Start from the blocks connected to the kernel and work "out"
+ // to the first prolog and the last epilog blocks.
+ SmallVector<MachineInstr *, 4> PrevInsts;
+ unsigned MaxIter = PrologBBs.size() - 1;
+ for (unsigned i = 0, j = MaxIter; i <= MaxIter; ++i, --j) {
+ // Add branches to the prolog that go to the corresponding
+ // epilog, and the fall-thru prolog/kernel block.
+ MachineBasicBlock *Prolog = PrologBBs[j];
+ MachineBasicBlock *Epilog = EpilogBBs[i];
+
+ SmallVector<MachineOperand, 4> Cond;
+ Optional<bool> StaticallyGreater =
+ LoopInfo->createTripCountGreaterCondition(j + 1, *Prolog, Cond);
+ unsigned numAdded = 0;
+ if (!StaticallyGreater.hasValue()) {
+ Prolog->addSuccessor(Epilog);
+ numAdded = TII->insertBranch(*Prolog, Epilog, LastPro, Cond, DebugLoc());
+ } else if (*StaticallyGreater == false) {
+ Prolog->addSuccessor(Epilog);
+ Prolog->removeSuccessor(LastPro);
+ LastEpi->removeSuccessor(Epilog);
+ numAdded = TII->insertBranch(*Prolog, Epilog, nullptr, Cond, DebugLoc());
+ removePhis(Epilog, LastEpi);
+ // Remove the blocks that are no longer referenced.
+ if (LastPro != LastEpi) {
+ LastEpi->clear();
+ LastEpi->eraseFromParent();
+ }
+ if (LastPro == KernelBB) {
+ LoopInfo->disposed();
+ NewKernel = nullptr;
+ }
+ LastPro->clear();
+ LastPro->eraseFromParent();
+ } else {
+ numAdded = TII->insertBranch(*Prolog, LastPro, nullptr, Cond, DebugLoc());
+ removePhis(Epilog, Prolog);
+ }
+ LastPro = Prolog;
+ LastEpi = Epilog;
+ for (MachineBasicBlock::reverse_instr_iterator I = Prolog->instr_rbegin(),
+ E = Prolog->instr_rend();
+ I != E && numAdded > 0; ++I, --numAdded)
+ updateInstruction(&*I, false, j, 0, VRMap);
+ }
+
+ if (NewKernel) {
+ LoopInfo->setPreheader(PrologBBs[MaxIter]);
+ LoopInfo->adjustTripCount(-(MaxIter + 1));
+ }
+}
+
+/// Return true if we can compute the amount the instruction changes
+/// during each iteration. Set Delta to the amount of the change.
+bool ModuloScheduleExpander::computeDelta(MachineInstr &MI, unsigned &Delta) {
+ const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
+ const MachineOperand *BaseOp;
+ int64_t Offset;
+ if (!TII->getMemOperandWithOffset(MI, BaseOp, Offset, TRI))
+ return false;
+
+ if (!BaseOp->isReg())
+ return false;
+
+ Register BaseReg = BaseOp->getReg();
+
+ MachineRegisterInfo &MRI = MF.getRegInfo();
+ // Check if there is a Phi. If so, get the definition in the loop.
+ MachineInstr *BaseDef = MRI.getVRegDef(BaseReg);
+ if (BaseDef && BaseDef->isPHI()) {
+ BaseReg = getLoopPhiReg(*BaseDef, MI.getParent());
+ BaseDef = MRI.getVRegDef(BaseReg);
+ }
+ if (!BaseDef)
+ return false;
+
+ int D = 0;
+ if (!TII->getIncrementValue(*BaseDef, D) && D >= 0)
+ return false;
+
+ Delta = D;
+ return true;
+}
+
+/// Update the memory operand with a new offset when the pipeliner
+/// generates a new copy of the instruction that refers to a
+/// different memory location.
+void ModuloScheduleExpander::updateMemOperands(MachineInstr &NewMI,
+ MachineInstr &OldMI,
+ unsigned Num) {
+ if (Num == 0)
+ return;
+ // If the instruction has memory operands, then adjust the offset
+ // when the instruction appears in different stages.
+ if (NewMI.memoperands_empty())
+ return;
+ SmallVector<MachineMemOperand *, 2> NewMMOs;
+ for (MachineMemOperand *MMO : NewMI.memoperands()) {
+ // TODO: Figure out whether isAtomic is really necessary (see D57601).
+ if (MMO->isVolatile() || MMO->isAtomic() ||
+ (MMO->isInvariant() && MMO->isDereferenceable()) ||
+ (!MMO->getValue())) {
+ NewMMOs.push_back(MMO);
+ continue;
+ }
+ unsigned Delta;
+ if (Num != UINT_MAX && computeDelta(OldMI, Delta)) {
+ int64_t AdjOffset = Delta * Num;
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, AdjOffset, MMO->getSize()));
+ } else {
+ NewMMOs.push_back(
+ MF.getMachineMemOperand(MMO, 0, MemoryLocation::UnknownSize));
+ }
+ }
+ NewMI.setMemRefs(MF, NewMMOs);
+}
+
+/// Clone the instruction for the new pipelined loop and update the
+/// memory operands, if needed.
+MachineInstr *ModuloScheduleExpander::cloneInstr(MachineInstr *OldMI,
+ unsigned CurStageNum,
+ unsigned InstStageNum) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+ // Check for tied operands in inline asm instructions. This should be handled
+ // elsewhere, but I'm not sure of the best solution.
+ if (OldMI->isInlineAsm())
+ for (unsigned i = 0, e = OldMI->getNumOperands(); i != e; ++i) {
+ const auto &MO = OldMI->getOperand(i);
+ if (MO.isReg() && MO.isUse())
+ break;
+ unsigned UseIdx;
+ if (OldMI->isRegTiedToUseOperand(i, &UseIdx))
+ NewMI->tieOperands(i, UseIdx);
+ }
+ updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+ return NewMI;
+}
+
+/// Clone the instruction for the new pipelined loop. If needed, this
+/// function updates the instruction using the values saved in the
+/// InstrChanges structure.
+MachineInstr *ModuloScheduleExpander::cloneAndChangeInstr(
+ MachineInstr *OldMI, unsigned CurStageNum, unsigned InstStageNum) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(OldMI);
+ auto It = InstrChanges.find(OldMI);
+ if (It != InstrChanges.end()) {
+ std::pair<unsigned, int64_t> RegAndOffset = It->second;
+ unsigned BasePos, OffsetPos;
+ if (!TII->getBaseAndOffsetPosition(*OldMI, BasePos, OffsetPos))
+ return nullptr;
+ int64_t NewOffset = OldMI->getOperand(OffsetPos).getImm();
+ MachineInstr *LoopDef = findDefInLoop(RegAndOffset.first);
+ if (Schedule.getStage(LoopDef) > (signed)InstStageNum)
+ NewOffset += RegAndOffset.second * (CurStageNum - InstStageNum);
+ NewMI->getOperand(OffsetPos).setImm(NewOffset);
+ }
+ updateMemOperands(*NewMI, *OldMI, CurStageNum - InstStageNum);
+ return NewMI;
+}
+
+/// Update the machine instruction with new virtual registers. This
+/// function may change the defintions and/or uses.
+void ModuloScheduleExpander::updateInstruction(MachineInstr *NewMI,
+ bool LastDef,
+ unsigned CurStageNum,
+ unsigned InstrStageNum,
+ ValueMapTy *VRMap) {
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
+ continue;
+ Register reg = MO.getReg();
+ if (MO.isDef()) {
+ // Create a new virtual register for the definition.
+ const TargetRegisterClass *RC = MRI.getRegClass(reg);
+ Register NewReg = MRI.createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ VRMap[CurStageNum][reg] = NewReg;
+ if (LastDef)
+ replaceRegUsesAfterLoop(reg, NewReg, BB, MRI, LIS);
+ } else if (MO.isUse()) {
+ MachineInstr *Def = MRI.getVRegDef(reg);
+ // Compute the stage that contains the last definition for instruction.
+ int DefStageNum = Schedule.getStage(Def);
+ unsigned StageNum = CurStageNum;
+ if (DefStageNum != -1 && (int)InstrStageNum > DefStageNum) {
+ // Compute the difference in stages between the defintion and the use.
+ unsigned StageDiff = (InstrStageNum - DefStageNum);
+ // Make an adjustment to get the last definition.
+ StageNum -= StageDiff;
+ }
+ if (VRMap[StageNum].count(reg))
+ MO.setReg(VRMap[StageNum][reg]);
+ }
+ }
+}
+
+/// Return the instruction in the loop that defines the register.
+/// If the definition is a Phi, then follow the Phi operand to
+/// the instruction in the loop.
+MachineInstr *ModuloScheduleExpander::findDefInLoop(unsigned Reg) {
+ SmallPtrSet<MachineInstr *, 8> Visited;
+ MachineInstr *Def = MRI.getVRegDef(Reg);
+ while (Def->isPHI()) {
+ if (!Visited.insert(Def).second)
+ break;
+ for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2)
+ if (Def->getOperand(i + 1).getMBB() == BB) {
+ Def = MRI.getVRegDef(Def->getOperand(i).getReg());
+ break;
+ }
+ }
+ return Def;
+}
+
+/// Return the new name for the value from the previous stage.
+unsigned ModuloScheduleExpander::getPrevMapVal(
+ unsigned StageNum, unsigned PhiStage, unsigned LoopVal, unsigned LoopStage,
+ ValueMapTy *VRMap, MachineBasicBlock *BB) {
+ unsigned PrevVal = 0;
+ if (StageNum > PhiStage) {
+ MachineInstr *LoopInst = MRI.getVRegDef(LoopVal);
+ if (PhiStage == LoopStage && VRMap[StageNum - 1].count(LoopVal))
+ // The name is defined in the previous stage.
+ PrevVal = VRMap[StageNum - 1][LoopVal];
+ else if (VRMap[StageNum].count(LoopVal))
+ // The previous name is defined in the current stage when the instruction
+ // order is swapped.
+ PrevVal = VRMap[StageNum][LoopVal];
+ else if (!LoopInst->isPHI() || LoopInst->getParent() != BB)
+ // The loop value hasn't yet been scheduled.
+ PrevVal = LoopVal;
+ else if (StageNum == PhiStage + 1)
+ // The loop value is another phi, which has not been scheduled.
+ PrevVal = getInitPhiReg(*LoopInst, BB);
+ else if (StageNum > PhiStage + 1 && LoopInst->getParent() == BB)
+ // The loop value is another phi, which has been scheduled.
+ PrevVal =
+ getPrevMapVal(StageNum - 1, PhiStage, getLoopPhiReg(*LoopInst, BB),
+ LoopStage, VRMap, BB);
+ }
+ return PrevVal;
+}
+
+/// Rewrite the Phi values in the specified block to use the mappings
+/// from the initial operand. Once the Phi is scheduled, we switch
+/// to using the loop value instead of the Phi value, so those names
+/// do not need to be rewritten.
+void ModuloScheduleExpander::rewritePhiValues(MachineBasicBlock *NewBB,
+ unsigned StageNum,
+ ValueMapTy *VRMap,
+ InstrMapTy &InstrMap) {
+ for (auto &PHI : BB->phis()) {
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(PHI, BB, InitVal, LoopVal);
+ Register PhiDef = PHI.getOperand(0).getReg();
+
+ unsigned PhiStage = (unsigned)Schedule.getStage(MRI.getVRegDef(PhiDef));
+ unsigned LoopStage = (unsigned)Schedule.getStage(MRI.getVRegDef(LoopVal));
+ unsigned NumPhis = getStagesForPhi(PhiDef);
+ if (NumPhis > StageNum)
+ NumPhis = StageNum;
+ for (unsigned np = 0; np <= NumPhis; ++np) {
+ unsigned NewVal =
+ getPrevMapVal(StageNum - np, PhiStage, LoopVal, LoopStage, VRMap, BB);
+ if (!NewVal)
+ NewVal = InitVal;
+ rewriteScheduledInstr(NewBB, InstrMap, StageNum - np, np, &PHI, PhiDef,
+ NewVal);
+ }
+ }
+}
+
+/// Rewrite a previously scheduled instruction to use the register value
+/// from the new instruction. Make sure the instruction occurs in the
+/// basic block, and we don't change the uses in the new instruction.
+void ModuloScheduleExpander::rewriteScheduledInstr(
+ MachineBasicBlock *BB, InstrMapTy &InstrMap, unsigned CurStageNum,
+ unsigned PhiNum, MachineInstr *Phi, unsigned OldReg, unsigned NewReg,
+ unsigned PrevReg) {
+ bool InProlog = (CurStageNum < (unsigned)Schedule.getNumStages() - 1);
+ int StagePhi = Schedule.getStage(Phi) + PhiNum;
+ // Rewrite uses that have been scheduled already to use the new
+ // Phi register.
+ for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(OldReg),
+ EI = MRI.use_end();
+ UI != EI;) {
+ MachineOperand &UseOp = *UI;
+ MachineInstr *UseMI = UseOp.getParent();
+ ++UI;
+ if (UseMI->getParent() != BB)
+ continue;
+ if (UseMI->isPHI()) {
+ if (!Phi->isPHI() && UseMI->getOperand(0).getReg() == NewReg)
+ continue;
+ if (getLoopPhiReg(*UseMI, BB) != OldReg)
+ continue;
+ }
+ InstrMapTy::iterator OrigInstr = InstrMap.find(UseMI);
+ assert(OrigInstr != InstrMap.end() && "Instruction not scheduled.");
+ MachineInstr *OrigMI = OrigInstr->second;
+ int StageSched = Schedule.getStage(OrigMI);
+ int CycleSched = Schedule.getCycle(OrigMI);
+ unsigned ReplaceReg = 0;
+ // This is the stage for the scheduled instruction.
+ if (StagePhi == StageSched && Phi->isPHI()) {
+ int CyclePhi = Schedule.getCycle(Phi);
+ if (PrevReg && InProlog)
+ ReplaceReg = PrevReg;
+ else if (PrevReg && !isLoopCarried(*Phi) &&
+ (CyclePhi <= CycleSched || OrigMI->isPHI()))
+ ReplaceReg = PrevReg;
+ else
+ ReplaceReg = NewReg;
+ }
+ // The scheduled instruction occurs before the scheduled Phi, and the
+ // Phi is not loop carried.
+ if (!InProlog && StagePhi + 1 == StageSched && !isLoopCarried(*Phi))
+ ReplaceReg = NewReg;
+ if (StagePhi > StageSched && Phi->isPHI())
+ ReplaceReg = NewReg;
+ if (!InProlog && !Phi->isPHI() && StagePhi < StageSched)
+ ReplaceReg = NewReg;
+ if (ReplaceReg) {
+ MRI.constrainRegClass(ReplaceReg, MRI.getRegClass(OldReg));
+ UseOp.setReg(ReplaceReg);
+ }
+ }
+}
+
+bool ModuloScheduleExpander::isLoopCarried(MachineInstr &Phi) {
+ if (!Phi.isPHI())
+ return false;
+ unsigned DefCycle = Schedule.getCycle(&Phi);
+ int DefStage = Schedule.getStage(&Phi);
+
+ unsigned InitVal = 0;
+ unsigned LoopVal = 0;
+ getPhiRegs(Phi, Phi.getParent(), InitVal, LoopVal);
+ MachineInstr *Use = MRI.getVRegDef(LoopVal);
+ if (!Use || Use->isPHI())
+ return true;
+ unsigned LoopCycle = Schedule.getCycle(Use);
+ int LoopStage = Schedule.getStage(Use);
+ return (LoopCycle > DefCycle) || (LoopStage <= DefStage);
+}
+
+//===----------------------------------------------------------------------===//
+// PeelingModuloScheduleExpander implementation
+//===----------------------------------------------------------------------===//
+// This is a reimplementation of ModuloScheduleExpander that works by creating
+// a fully correct steady-state kernel and peeling off the prolog and epilogs.
+//===----------------------------------------------------------------------===//
+
+namespace {
+// Remove any dead phis in MBB. Dead phis either have only one block as input
+// (in which case they are the identity) or have no uses.
+void EliminateDeadPhis(MachineBasicBlock *MBB, MachineRegisterInfo &MRI,
+ LiveIntervals *LIS) {
+ bool Changed = true;
+ while (Changed) {
+ Changed = false;
+ for (auto I = MBB->begin(); I != MBB->getFirstNonPHI();) {
+ MachineInstr &MI = *I++;
+ assert(MI.isPHI());
+ if (MRI.use_empty(MI.getOperand(0).getReg())) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ Changed = true;
+ } else if (MI.getNumExplicitOperands() == 3) {
+ MRI.constrainRegClass(MI.getOperand(1).getReg(),
+ MRI.getRegClass(MI.getOperand(0).getReg()));
+ MRI.replaceRegWith(MI.getOperand(0).getReg(),
+ MI.getOperand(1).getReg());
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(MI);
+ MI.eraseFromParent();
+ Changed = true;
+ }
+ }
+ }
+}
+
+/// Rewrites the kernel block in-place to adhere to the given schedule.
+/// KernelRewriter holds all of the state required to perform the rewriting.
+class KernelRewriter {
+ ModuloSchedule &S;
+ MachineBasicBlock *BB;
+ MachineBasicBlock *PreheaderBB, *ExitBB;
+ MachineRegisterInfo &MRI;
+ const TargetInstrInfo *TII;
+ LiveIntervals *LIS;
+
+ // Map from register class to canonical undef register for that class.
+ DenseMap<const TargetRegisterClass *, Register> Undefs;
+ // Map from <LoopReg, InitReg> to phi register for all created phis. Note that
+ // this map is only used when InitReg is non-undef.
+ DenseMap<std::pair<unsigned, unsigned>, Register> Phis;
+ // Map from LoopReg to phi register where the InitReg is undef.
+ DenseMap<Register, Register> UndefPhis;
+
+ // Reg is used by MI. Return the new register MI should use to adhere to the
+ // schedule. Insert phis as necessary.
+ Register remapUse(Register Reg, MachineInstr &MI);
+ // Insert a phi that carries LoopReg from the loop body and InitReg otherwise.
+ // If InitReg is not given it is chosen arbitrarily. It will either be undef
+ // or will be chosen so as to share another phi.
+ Register phi(Register LoopReg, Optional<Register> InitReg = {},
+ const TargetRegisterClass *RC = nullptr);
+ // Create an undef register of the given register class.
+ Register undef(const TargetRegisterClass *RC);
+
+public:
+ KernelRewriter(MachineLoop &L, ModuloSchedule &S,
+ LiveIntervals *LIS = nullptr);
+ void rewrite();
+};
+} // namespace
+
+KernelRewriter::KernelRewriter(MachineLoop &L, ModuloSchedule &S,
+ LiveIntervals *LIS)
+ : S(S), BB(L.getTopBlock()), PreheaderBB(L.getLoopPreheader()),
+ ExitBB(L.getExitBlock()), MRI(BB->getParent()->getRegInfo()),
+ TII(BB->getParent()->getSubtarget().getInstrInfo()), LIS(LIS) {
+ PreheaderBB = *BB->pred_begin();
+ if (PreheaderBB == BB)
+ PreheaderBB = *std::next(BB->pred_begin());
+}
+
+void KernelRewriter::rewrite() {
+ // Rearrange the loop to be in schedule order. Note that the schedule may
+ // contain instructions that are not owned by the loop block (InstrChanges and
+ // friends), so we gracefully handle unowned instructions and delete any
+ // instructions that weren't in the schedule.
+ auto InsertPt = BB->getFirstTerminator();
+ MachineInstr *FirstMI = nullptr;
+ for (MachineInstr *MI : S.getInstructions()) {
+ if (MI->isPHI())
+ continue;
+ if (MI->getParent())
+ MI->removeFromParent();
+ BB->insert(InsertPt, MI);
+ if (!FirstMI)
+ FirstMI = MI;
+ }
+ assert(FirstMI && "Failed to find first MI in schedule");
+
+ // At this point all of the scheduled instructions are between FirstMI
+ // and the end of the block. Kill from the first non-phi to FirstMI.
+ for (auto I = BB->getFirstNonPHI(); I != FirstMI->getIterator();) {
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*I);
+ (I++)->eraseFromParent();
+ }
+
+ // Now remap every instruction in the loop.
+ for (MachineInstr &MI : *BB) {
+ if (MI.isPHI() || MI.isTerminator())
+ continue;
+ for (MachineOperand &MO : MI.uses()) {
+ if (!MO.isReg() || MO.getReg().isPhysical() || MO.isImplicit())
+ continue;
+ Register Reg = remapUse(MO.getReg(), MI);
+ MO.setReg(Reg);
+ }
+ }
+ EliminateDeadPhis(BB, MRI, LIS);
+
+ // Ensure a phi exists for all instructions that are either referenced by
+ // an illegal phi or by an instruction outside the loop. This allows us to
+ // treat remaps of these values the same as "normal" values that come from
+ // loop-carried phis.
+ for (auto MI = BB->getFirstNonPHI(); MI != BB->end(); ++MI) {
+ if (MI->isPHI()) {
+ Register R = MI->getOperand(0).getReg();
+ phi(R);
+ continue;
+ }
+
+ for (MachineOperand &Def : MI->defs()) {
+ for (MachineInstr &MI : MRI.use_instructions(Def.getReg())) {
+ if (MI.getParent() != BB) {
+ phi(Def.getReg());
+ break;
+ }
+ }
+ }
+ }
+}
+
+Register KernelRewriter::remapUse(Register Reg, MachineInstr &MI) {
+ MachineInstr *Producer = MRI.getUniqueVRegDef(Reg);
+ if (!Producer)
+ return Reg;
+
+ int ConsumerStage = S.getStage(&MI);
+ if (!Producer->isPHI()) {
+ // Non-phi producers are simple to remap. Insert as many phis as the
+ // difference between the consumer and producer stages.
+ if (Producer->getParent() != BB)
+ // Producer was not inside the loop. Use the register as-is.
+ return Reg;
+ int ProducerStage = S.getStage(Producer);
+ assert(ConsumerStage != -1 &&
+ "In-loop consumer should always be scheduled!");
+ assert(ConsumerStage >= ProducerStage);
+ unsigned StageDiff = ConsumerStage - ProducerStage;
+
+ for (unsigned I = 0; I < StageDiff; ++I)
+ Reg = phi(Reg);
+ return Reg;
+ }
+
+ // First, dive through the phi chain to find the defaults for the generated
+ // phis.
+ SmallVector<Optional<Register>, 4> Defaults;
+ Register LoopReg = Reg;
+ auto LoopProducer = Producer;
+ while (LoopProducer->isPHI() && LoopProducer->getParent() == BB) {
+ LoopReg = getLoopPhiReg(*LoopProducer, BB);
+ Defaults.emplace_back(getInitPhiReg(*LoopProducer, BB));
+ LoopProducer = MRI.getUniqueVRegDef(LoopReg);
+ assert(LoopProducer);
+ }
+ int LoopProducerStage = S.getStage(LoopProducer);
+
+ Optional<Register> IllegalPhiDefault;
+
+ if (LoopProducerStage == -1) {
+ // Do nothing.
+ } else if (LoopProducerStage > ConsumerStage) {
+ // This schedule is only representable if ProducerStage == ConsumerStage+1.
+ // In addition, Consumer's cycle must be scheduled after Producer in the
+ // rescheduled loop. This is enforced by the pipeliner's ASAP and ALAP
+ // functions.
+#ifndef NDEBUG // Silence unused variables in non-asserts mode.
+ int LoopProducerCycle = S.getCycle(LoopProducer);
+ int ConsumerCycle = S.getCycle(&MI);
+#endif
+ assert(LoopProducerCycle <= ConsumerCycle);
+ assert(LoopProducerStage == ConsumerStage + 1);
+ // Peel off the first phi from Defaults and insert a phi between producer
+ // and consumer. This phi will not be at the front of the block so we
+ // consider it illegal. It will only exist during the rewrite process; it
+ // needs to exist while we peel off prologs because these could take the
+ // default value. After that we can replace all uses with the loop producer
+ // value.
+ IllegalPhiDefault = Defaults.front();
+ Defaults.erase(Defaults.begin());
+ } else {
+ assert(ConsumerStage >= LoopProducerStage);
+ int StageDiff = ConsumerStage - LoopProducerStage;
+ if (StageDiff > 0) {
+ LLVM_DEBUG(dbgs() << " -- padding defaults array from " << Defaults.size()
+ << " to " << (Defaults.size() + StageDiff) << "\n");
+ // If we need more phis than we have defaults for, pad out with undefs for
+ // the earliest phis, which are at the end of the defaults chain (the
+ // chain is in reverse order).
+ Defaults.resize(Defaults.size() + StageDiff, Defaults.empty()
+ ? Optional<Register>()
+ : Defaults.back());
+ }
+ }
+
+ // Now we know the number of stages to jump back, insert the phi chain.
+ auto DefaultI = Defaults.rbegin();
+ while (DefaultI != Defaults.rend())
+ LoopReg = phi(LoopReg, *DefaultI++, MRI.getRegClass(Reg));
+
+ if (IllegalPhiDefault.hasValue()) {
+ // The consumer optionally consumes LoopProducer in the same iteration
+ // (because the producer is scheduled at an earlier cycle than the consumer)
+ // or the initial value. To facilitate this we create an illegal block here
+ // by embedding a phi in the middle of the block. We will fix this up
+ // immediately prior to pruning.
+ auto RC = MRI.getRegClass(Reg);
+ Register R = MRI.createVirtualRegister(RC);
+ BuildMI(*BB, MI, DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(IllegalPhiDefault.getValue())
+ .addMBB(PreheaderBB) // Block choice is arbitrary and has no effect.
+ .addReg(LoopReg)
+ .addMBB(BB); // Block choice is arbitrary and has no effect.
+ return R;
+ }
+
+ return LoopReg;
+}
+
+Register KernelRewriter::phi(Register LoopReg, Optional<Register> InitReg,
+ const TargetRegisterClass *RC) {
+ // If the init register is not undef, try and find an existing phi.
+ if (InitReg.hasValue()) {
+ auto I = Phis.find({LoopReg, InitReg.getValue()});
+ if (I != Phis.end())
+ return I->second;
+ } else {
+ for (auto &KV : Phis) {
+ if (KV.first.first == LoopReg)
+ return KV.second;
+ }
+ }
+
+ // InitReg is either undef or no existing phi takes InitReg as input. Try and
+ // find a phi that takes undef as input.
+ auto I = UndefPhis.find(LoopReg);
+ if (I != UndefPhis.end()) {
+ Register R = I->second;
+ if (!InitReg.hasValue())
+ // Found a phi taking undef as input, and this input is undef so return
+ // without any more changes.
+ return R;
+ // Found a phi taking undef as input, so rewrite it to take InitReg.
+ MachineInstr *MI = MRI.getVRegDef(R);
+ MI->getOperand(1).setReg(InitReg.getValue());
+ Phis.insert({{LoopReg, InitReg.getValue()}, R});
+ MRI.constrainRegClass(R, MRI.getRegClass(InitReg.getValue()));
+ UndefPhis.erase(I);
+ return R;
+ }
+
+ // Failed to find any existing phi to reuse, so create a new one.
+ if (!RC)
+ RC = MRI.getRegClass(LoopReg);
+ Register R = MRI.createVirtualRegister(RC);
+ if (InitReg.hasValue())
+ MRI.constrainRegClass(R, MRI.getRegClass(*InitReg));
+ BuildMI(*BB, BB->getFirstNonPHI(), DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(InitReg.hasValue() ? *InitReg : undef(RC))
+ .addMBB(PreheaderBB)
+ .addReg(LoopReg)
+ .addMBB(BB);
+ if (!InitReg.hasValue())
+ UndefPhis[LoopReg] = R;
+ else
+ Phis[{LoopReg, *InitReg}] = R;
+ return R;
+}
+
+Register KernelRewriter::undef(const TargetRegisterClass *RC) {
+ Register &R = Undefs[RC];
+ if (R == 0) {
+ // Create an IMPLICIT_DEF that defines this register if we need it.
+ // All uses of this should be removed by the time we have finished unrolling
+ // prologs and epilogs.
+ R = MRI.createVirtualRegister(RC);
+ auto *InsertBB = &PreheaderBB->getParent()->front();
+ BuildMI(*InsertBB, InsertBB->getFirstTerminator(), DebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), R);
+ }
+ return R;
+}
+
+namespace {
+/// Describes an operand in the kernel of a pipelined loop. Characteristics of
+/// the operand are discovered, such as how many in-loop PHIs it has to jump
+/// through and defaults for these phis.
+class KernelOperandInfo {
+ MachineBasicBlock *BB;
+ MachineRegisterInfo &MRI;
+ SmallVector<Register, 4> PhiDefaults;
+ MachineOperand *Source;
+ MachineOperand *Target;
+
+public:
+ KernelOperandInfo(MachineOperand *MO, MachineRegisterInfo &MRI,
+ const SmallPtrSetImpl<MachineInstr *> &IllegalPhis)
+ : MRI(MRI) {
+ Source = MO;
+ BB = MO->getParent()->getParent();
+ while (isRegInLoop(MO)) {
+ MachineInstr *MI = MRI.getVRegDef(MO->getReg());
+ if (MI->isFullCopy()) {
+ MO = &MI->getOperand(1);
+ continue;
+ }
+ if (!MI->isPHI())
+ break;
+ // If this is an illegal phi, don't count it in distance.
+ if (IllegalPhis.count(MI)) {
+ MO = &MI->getOperand(3);
+ continue;
+ }
+
+ Register Default = getInitPhiReg(*MI, BB);
+ MO = MI->getOperand(2).getMBB() == BB ? &MI->getOperand(1)
+ : &MI->getOperand(3);
+ PhiDefaults.push_back(Default);
+ }
+ Target = MO;
+ }
+
+ bool operator==(const KernelOperandInfo &Other) const {
+ return PhiDefaults.size() == Other.PhiDefaults.size();
+ }
+
+ void print(raw_ostream &OS) const {
+ OS << "use of " << *Source << ": distance(" << PhiDefaults.size() << ") in "
+ << *Source->getParent();
+ }
+
+private:
+ bool isRegInLoop(MachineOperand *MO) {
+ return MO->isReg() && MO->getReg().isVirtual() &&
+ MRI.getVRegDef(MO->getReg())->getParent() == BB;
+ }
+};
+} // namespace
+
+MachineBasicBlock *
+PeelingModuloScheduleExpander::peelKernel(LoopPeelDirection LPD) {
+ MachineBasicBlock *NewBB = PeelSingleBlockLoop(LPD, BB, MRI, TII);
+ if (LPD == LPD_Front)
+ PeeledFront.push_back(NewBB);
+ else
+ PeeledBack.push_front(NewBB);
+ for (auto I = BB->begin(), NI = NewBB->begin(); !I->isTerminator();
+ ++I, ++NI) {
+ CanonicalMIs[&*I] = &*I;
+ CanonicalMIs[&*NI] = &*I;
+ BlockMIs[{NewBB, &*I}] = &*NI;
+ BlockMIs[{BB, &*I}] = &*I;
+ }
+ return NewBB;
+}
+
+void PeelingModuloScheduleExpander::peelPrologAndEpilogs() {
+ BitVector LS(Schedule.getNumStages(), true);
+ BitVector AS(Schedule.getNumStages(), true);
+ LiveStages[BB] = LS;
+ AvailableStages[BB] = AS;
+
+ // Peel out the prologs.
+ LS.reset();
+ for (int I = 0; I < Schedule.getNumStages() - 1; ++I) {
+ LS[I] = 1;
+ Prologs.push_back(peelKernel(LPD_Front));
+ LiveStages[Prologs.back()] = LS;
+ AvailableStages[Prologs.back()] = LS;
+ }
+
+ // Create a block that will end up as the new loop exiting block (dominated by
+ // all prologs and epilogs). It will only contain PHIs, in the same order as
+ // BB's PHIs. This gives us a poor-man's LCSSA with the inductive property
+ // that the exiting block is a (sub) clone of BB. This in turn gives us the
+ // property that any value deffed in BB but used outside of BB is used by a
+ // PHI in the exiting block.
+ MachineBasicBlock *ExitingBB = CreateLCSSAExitingBlock();
+
+ // Push out the epilogs, again in reverse order.
+ // We can't assume anything about the minumum loop trip count at this point,
+ // so emit a fairly complex epilog:
+ // K[0, 1, 2] // Kernel runs stages 0, 1, 2
+ // E0[2] <- P1 // Epilog runs stage 2 only, so the state after is [0].
+ // E1[1, 2] <- P0 // Epilog 1 moves the last item from stage 0 to stage 2.
+ //
+ // This creates a single-successor single-predecessor sequence of blocks for
+ // each epilog, which are kept this way for simplicity at this stage and
+ // cleaned up by the optimizer later.
+ for (int I = 1; I <= Schedule.getNumStages() - 1; ++I) {
+ Epilogs.push_back(nullptr);
+ for (int J = Schedule.getNumStages() - 1; J >= I; --J) {
+ LS.reset();
+ LS[J] = 1;
+ Epilogs.back() = peelKernel(LPD_Back);
+ LiveStages[Epilogs.back()] = LS;
+ AvailableStages[Epilogs.back()] = AS;
+ }
+ }
+
+ // Now we've defined all the prolog and epilog blocks as a fallthrough
+ // sequence, add the edges that will be followed if the loop trip count is
+ // lower than the number of stages (connecting prologs directly with epilogs).
+ auto PI = Prologs.begin();
+ auto EI = Epilogs.begin();
+ assert(Prologs.size() == Epilogs.size());
+ for (; PI != Prologs.end(); ++PI, ++EI) {
+ MachineBasicBlock *Pred = *(*EI)->pred_begin();
+ (*PI)->addSuccessor(*EI);
+ for (MachineInstr &MI : (*EI)->phis()) {
+ Register Reg = MI.getOperand(1).getReg();
+ MachineInstr *Use = MRI.getUniqueVRegDef(Reg);
+ if (Use && Use->getParent() == Pred)
+ Reg = getEquivalentRegisterIn(Reg, *PI);
+ MI.addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/false));
+ MI.addOperand(MachineOperand::CreateMBB(*PI));
+ }
+ }
+
+ // Create a list of all blocks in order.
+ SmallVector<MachineBasicBlock *, 8> Blocks;
+ llvm::copy(PeeledFront, std::back_inserter(Blocks));
+ Blocks.push_back(BB);
+ llvm::copy(PeeledBack, std::back_inserter(Blocks));
+
+ // Iterate in reverse order over all instructions, remapping as we go.
+ for (MachineBasicBlock *B : reverse(Blocks)) {
+ for (auto I = B->getFirstInstrTerminator()->getReverseIterator();
+ I != std::next(B->getFirstNonPHI()->getReverseIterator());) {
+ MachineInstr *MI = &*I++;
+ rewriteUsesOf(MI);
+ }
+ }
+ // Now all remapping has been done, we're free to optimize the generated code.
+ for (MachineBasicBlock *B : reverse(Blocks))
+ EliminateDeadPhis(B, MRI, LIS);
+ EliminateDeadPhis(ExitingBB, MRI, LIS);
+}
+
+MachineBasicBlock *PeelingModuloScheduleExpander::CreateLCSSAExitingBlock() {
+ MachineFunction &MF = *BB->getParent();
+ MachineBasicBlock *Exit = *BB->succ_begin();
+ if (Exit == BB)
+ Exit = *std::next(BB->succ_begin());
+
+ MachineBasicBlock *NewBB = MF.CreateMachineBasicBlock(BB->getBasicBlock());
+ MF.insert(std::next(BB->getIterator()), NewBB);
+
+ // Clone all phis in BB into NewBB and rewrite.
+ for (MachineInstr &MI : BB->phis()) {
+ auto RC = MRI.getRegClass(MI.getOperand(0).getReg());
+ Register OldR = MI.getOperand(3).getReg();
+ Register R = MRI.createVirtualRegister(RC);
+ SmallVector<MachineInstr *, 4> Uses;
+ for (MachineInstr &Use : MRI.use_instructions(OldR))
+ if (Use.getParent() != BB)
+ Uses.push_back(&Use);
+ for (MachineInstr *Use : Uses)
+ Use->substituteRegister(OldR, R, /*SubIdx=*/0,
+ *MRI.getTargetRegisterInfo());
+ MachineInstr *NI = BuildMI(NewBB, DebugLoc(), TII->get(TargetOpcode::PHI), R)
+ .addReg(OldR)
+ .addMBB(BB);
+ BlockMIs[{NewBB, &MI}] = NI;
+ CanonicalMIs[NI] = &MI;
+ }
+ BB->replaceSuccessor(Exit, NewBB);
+ Exit->replacePhiUsesWith(BB, NewBB);
+ NewBB->addSuccessor(Exit);
+
+ MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
+ SmallVector<MachineOperand, 4> Cond;
+ bool CanAnalyzeBr = !TII->analyzeBranch(*BB, TBB, FBB, Cond);
+ (void)CanAnalyzeBr;
+ assert(CanAnalyzeBr && "Must be able to analyze the loop branch!");
+ TII->removeBranch(*BB);
+ TII->insertBranch(*BB, TBB == Exit ? NewBB : TBB, FBB == Exit ? NewBB : FBB,
+ Cond, DebugLoc());
+ TII->insertUnconditionalBranch(*NewBB, Exit, DebugLoc());
+ return NewBB;
+}
+
+Register
+PeelingModuloScheduleExpander::getEquivalentRegisterIn(Register Reg,
+ MachineBasicBlock *BB) {
+ MachineInstr *MI = MRI.getUniqueVRegDef(Reg);
+ unsigned OpIdx = MI->findRegisterDefOperandIdx(Reg);
+ return BlockMIs[{BB, CanonicalMIs[MI]}]->getOperand(OpIdx).getReg();
+}
+
+void PeelingModuloScheduleExpander::rewriteUsesOf(MachineInstr *MI) {
+ if (MI->isPHI()) {
+ // This is an illegal PHI. The loop-carried (desired) value is operand 3,
+ // and it is produced by this block.
+ Register PhiR = MI->getOperand(0).getReg();
+ Register R = MI->getOperand(3).getReg();
+ int RMIStage = getStage(MRI.getUniqueVRegDef(R));
+ if (RMIStage != -1 && !AvailableStages[MI->getParent()].test(RMIStage))
+ R = MI->getOperand(1).getReg();
+ MRI.setRegClass(R, MRI.getRegClass(PhiR));
+ MRI.replaceRegWith(PhiR, R);
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+ return;
+ }
+
+ int Stage = getStage(MI);
+ if (Stage == -1 || LiveStages.count(MI->getParent()) == 0 ||
+ LiveStages[MI->getParent()].test(Stage))
+ // Instruction is live, no rewriting to do.
+ return;
+
+ for (MachineOperand &DefMO : MI->defs()) {
+ SmallVector<std::pair<MachineInstr *, Register>, 4> Subs;
+ for (MachineInstr &UseMI : MRI.use_instructions(DefMO.getReg())) {
+ // Only PHIs can use values from this block by construction.
+ // Match with the equivalent PHI in B.
+ assert(UseMI.isPHI());
+ Register Reg = getEquivalentRegisterIn(UseMI.getOperand(0).getReg(),
+ MI->getParent());
+ Subs.emplace_back(&UseMI, Reg);
+ }
+ for (auto &Sub : Subs)
+ Sub.first->substituteRegister(DefMO.getReg(), Sub.second, /*SubIdx=*/0,
+ *MRI.getTargetRegisterInfo());
+ }
+ if (LIS)
+ LIS->RemoveMachineInstrFromMaps(*MI);
+ MI->eraseFromParent();
+}
+
+void PeelingModuloScheduleExpander::fixupBranches() {
+ std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> Info =
+ TII->analyzeLoopForPipelining(BB);
+ assert(Info);
+
+ // Work outwards from the kernel.
+ bool KernelDisposed = false;
+ int TC = Schedule.getNumStages() - 1;
+ for (auto PI = Prologs.rbegin(), EI = Epilogs.rbegin(); PI != Prologs.rend();
+ ++PI, ++EI, --TC) {
+ MachineBasicBlock *Prolog = *PI;
+ MachineBasicBlock *Fallthrough = *Prolog->succ_begin();
+ MachineBasicBlock *Epilog = *EI;
+ SmallVector<MachineOperand, 4> Cond;
+ TII->removeBranch(*Prolog);
+ Optional<bool> StaticallyGreater =
+ Info->createTripCountGreaterCondition(TC, *Prolog, Cond);
+ if (!StaticallyGreater.hasValue()) {
+ LLVM_DEBUG(dbgs() << "Dynamic: TC > " << TC << "\n");
+ // Dynamically branch based on Cond.
+ TII->insertBranch(*Prolog, Epilog, Fallthrough, Cond, DebugLoc());
+ } else if (*StaticallyGreater == false) {
+ LLVM_DEBUG(dbgs() << "Static-false: TC > " << TC << "\n");
+ // Prolog never falls through; branch to epilog and orphan interior
+ // blocks. Leave it to unreachable-block-elim to clean up.
+ Prolog->removeSuccessor(Fallthrough);
+ for (MachineInstr &P : Fallthrough->phis()) {
+ P.RemoveOperand(2);
+ P.RemoveOperand(1);
+ }
+ TII->insertUnconditionalBranch(*Prolog, Epilog, DebugLoc());
+ KernelDisposed = true;
+ } else {
+ LLVM_DEBUG(dbgs() << "Static-true: TC > " << TC << "\n");
+ // Prolog always falls through; remove incoming values in epilog.
+ Prolog->removeSuccessor(Epilog);
+ for (MachineInstr &P : Epilog->phis()) {
+ P.RemoveOperand(4);
+ P.RemoveOperand(3);
+ }
+ }
+ }
+
+ if (!KernelDisposed) {
+ Info->adjustTripCount(-(Schedule.getNumStages() - 1));
+ Info->setPreheader(Prologs.back());
+ } else {
+ Info->disposed();
+ }
+}
+
+void PeelingModuloScheduleExpander::rewriteKernel() {
+ KernelRewriter KR(*Schedule.getLoop(), Schedule);
+ KR.rewrite();
+}
+
+void PeelingModuloScheduleExpander::expand() {
+ BB = Schedule.getLoop()->getTopBlock();
+ Preheader = Schedule.getLoop()->getLoopPreheader();
+ LLVM_DEBUG(Schedule.dump());
+
+ rewriteKernel();
+ peelPrologAndEpilogs();
+ fixupBranches();
+}
+
+void PeelingModuloScheduleExpander::validateAgainstModuloScheduleExpander() {
+ BB = Schedule.getLoop()->getTopBlock();
+ Preheader = Schedule.getLoop()->getLoopPreheader();
+
+ // Dump the schedule before we invalidate and remap all its instructions.
+ // Stash it in a string so we can print it if we found an error.
+ std::string ScheduleDump;
+ raw_string_ostream OS(ScheduleDump);
+ Schedule.print(OS);
+ OS.flush();
+
+ // First, run the normal ModuleScheduleExpander. We don't support any
+ // InstrChanges.
+ assert(LIS && "Requires LiveIntervals!");
+ ModuloScheduleExpander MSE(MF, Schedule, *LIS,
+ ModuloScheduleExpander::InstrChangesTy());
+ MSE.expand();
+ MachineBasicBlock *ExpandedKernel = MSE.getRewrittenKernel();
+ if (!ExpandedKernel) {
+ // The expander optimized away the kernel. We can't do any useful checking.
+ MSE.cleanup();
+ return;
+ }
+ // Before running the KernelRewriter, re-add BB into the CFG.
+ Preheader->addSuccessor(BB);
+
+ // Now run the new expansion algorithm.
+ KernelRewriter KR(*Schedule.getLoop(), Schedule);
+ KR.rewrite();
+ peelPrologAndEpilogs();
+
+ // Collect all illegal phis that the new algorithm created. We'll give these
+ // to KernelOperandInfo.
+ SmallPtrSet<MachineInstr *, 4> IllegalPhis;
+ for (auto NI = BB->getFirstNonPHI(); NI != BB->end(); ++NI) {
+ if (NI->isPHI())
+ IllegalPhis.insert(&*NI);
+ }
+
+ // Co-iterate across both kernels. We expect them to be identical apart from
+ // phis and full COPYs (we look through both).
+ SmallVector<std::pair<KernelOperandInfo, KernelOperandInfo>, 8> KOIs;
+ auto OI = ExpandedKernel->begin();
+ auto NI = BB->begin();
+ for (; !OI->isTerminator() && !NI->isTerminator(); ++OI, ++NI) {
+ while (OI->isPHI() || OI->isFullCopy())
+ ++OI;
+ while (NI->isPHI() || NI->isFullCopy())
+ ++NI;
+ assert(OI->getOpcode() == NI->getOpcode() && "Opcodes don't match?!");
+ // Analyze every operand separately.
+ for (auto OOpI = OI->operands_begin(), NOpI = NI->operands_begin();
+ OOpI != OI->operands_end(); ++OOpI, ++NOpI)
+ KOIs.emplace_back(KernelOperandInfo(&*OOpI, MRI, IllegalPhis),
+ KernelOperandInfo(&*NOpI, MRI, IllegalPhis));
+ }
+
+ bool Failed = false;
+ for (auto &OldAndNew : KOIs) {
+ if (OldAndNew.first == OldAndNew.second)
+ continue;
+ Failed = true;
+ errs() << "Modulo kernel validation error: [\n";
+ errs() << " [golden] ";
+ OldAndNew.first.print(errs());
+ errs() << " ";
+ OldAndNew.second.print(errs());
+ errs() << "]\n";
+ }
+
+ if (Failed) {
+ errs() << "Golden reference kernel:\n";
+ ExpandedKernel->print(errs());
+ errs() << "New kernel:\n";
+ BB->print(errs());
+ errs() << ScheduleDump;
+ report_fatal_error(
+ "Modulo kernel validation (-pipeliner-experimental-cg) failed");
+ }
+
+ // Cleanup by removing BB from the CFG again as the original
+ // ModuloScheduleExpander intended.
+ Preheader->removeSuccessor(BB);
+ MSE.cleanup();
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloScheduleTestPass implementation
+//===----------------------------------------------------------------------===//
+// This pass constructs a ModuloSchedule from its module and runs
+// ModuloScheduleExpander.
+//
+// The module is expected to contain a single-block analyzable loop.
+// The total order of instructions is taken from the loop as-is.
+// Instructions are expected to be annotated with a PostInstrSymbol.
+// This PostInstrSymbol must have the following format:
+// "Stage=%d Cycle=%d".
+//===----------------------------------------------------------------------===//
+
+namespace {
+class ModuloScheduleTest : public MachineFunctionPass {
+public:
+ static char ID;
+
+ ModuloScheduleTest() : MachineFunctionPass(ID) {
+ initializeModuloScheduleTestPass(*PassRegistry::getPassRegistry());
+ }
+
+ bool runOnMachineFunction(MachineFunction &MF) override;
+ void runOnLoop(MachineFunction &MF, MachineLoop &L);
+
+ void getAnalysisUsage(AnalysisUsage &AU) const override {
+ AU.addRequired<MachineLoopInfo>();
+ AU.addRequired<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+};
+} // namespace
+
+char ModuloScheduleTest::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ModuloScheduleTest, "modulo-schedule-test",
+ "Modulo Schedule test pass", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(ModuloScheduleTest, "modulo-schedule-test",
+ "Modulo Schedule test pass", false, false)
+
+bool ModuloScheduleTest::runOnMachineFunction(MachineFunction &MF) {
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ for (auto *L : MLI) {
+ if (L->getTopBlock() != L->getBottomBlock())
+ continue;
+ runOnLoop(MF, *L);
+ return false;
+ }
+ return false;
+}
+
+static void parseSymbolString(StringRef S, int &Cycle, int &Stage) {
+ std::pair<StringRef, StringRef> StageAndCycle = getToken(S, "_");
+ std::pair<StringRef, StringRef> StageTokenAndValue =
+ getToken(StageAndCycle.first, "-");
+ std::pair<StringRef, StringRef> CycleTokenAndValue =
+ getToken(StageAndCycle.second, "-");
+ if (StageTokenAndValue.first != "Stage" ||
+ CycleTokenAndValue.first != "_Cycle") {
+ llvm_unreachable(
+ "Bad post-instr symbol syntax: see comment in ModuloScheduleTest");
+ return;
+ }
+
+ StageTokenAndValue.second.drop_front().getAsInteger(10, Stage);
+ CycleTokenAndValue.second.drop_front().getAsInteger(10, Cycle);
+
+ dbgs() << " Stage=" << Stage << ", Cycle=" << Cycle << "\n";
+}
+
+void ModuloScheduleTest::runOnLoop(MachineFunction &MF, MachineLoop &L) {
+ LiveIntervals &LIS = getAnalysis<LiveIntervals>();
+ MachineBasicBlock *BB = L.getTopBlock();
+ dbgs() << "--- ModuloScheduleTest running on BB#" << BB->getNumber() << "\n";
+
+ DenseMap<MachineInstr *, int> Cycle, Stage;
+ std::vector<MachineInstr *> Instrs;
+ for (MachineInstr &MI : *BB) {
+ if (MI.isTerminator())
+ continue;
+ Instrs.push_back(&MI);
+ if (MCSymbol *Sym = MI.getPostInstrSymbol()) {
+ dbgs() << "Parsing post-instr symbol for " << MI;
+ parseSymbolString(Sym->getName(), Cycle[&MI], Stage[&MI]);
+ }
+ }
+
+ ModuloSchedule MS(MF, &L, std::move(Instrs), std::move(Cycle),
+ std::move(Stage));
+ ModuloScheduleExpander MSE(
+ MF, MS, LIS, /*InstrChanges=*/ModuloScheduleExpander::InstrChangesTy());
+ MSE.expand();
+ MSE.cleanup();
+}
+
+//===----------------------------------------------------------------------===//
+// ModuloScheduleTestAnnotater implementation
+//===----------------------------------------------------------------------===//
+
+void ModuloScheduleTestAnnotater::annotate() {
+ for (MachineInstr *MI : S.getInstructions()) {
+ SmallVector<char, 16> SV;
+ raw_svector_ostream OS(SV);
+ OS << "Stage-" << S.getStage(MI) << "_Cycle-" << S.getCycle(MI);
+ MCSymbol *Sym = MF.getContext().getOrCreateSymbol(OS.str());
+ MI->setPostInstrSymbol(MF, Sym);
+ }
+}
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index c70b62252139..1a493964e678 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -97,7 +97,7 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
unsigned &SingleValReg,
InstrSet &PHIsInCycle) {
assert(MI->isPHI() && "IsSingleValuePHICycle expects a PHI instruction");
- unsigned DstReg = MI->getOperand(0).getReg();
+ Register DstReg = MI->getOperand(0).getReg();
// See if we already saw this register.
if (!PHIsInCycle.insert(MI).second)
@@ -109,16 +109,15 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
// Scan the PHI operands.
for (unsigned i = 1; i != MI->getNumOperands(); i += 2) {
- unsigned SrcReg = MI->getOperand(i).getReg();
+ Register SrcReg = MI->getOperand(i).getReg();
if (SrcReg == DstReg)
continue;
MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
// Skip over register-to-register moves.
- if (SrcMI && SrcMI->isCopy() &&
- !SrcMI->getOperand(0).getSubReg() &&
+ if (SrcMI && SrcMI->isCopy() && !SrcMI->getOperand(0).getSubReg() &&
!SrcMI->getOperand(1).getSubReg() &&
- TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) {
+ Register::isVirtualRegister(SrcMI->getOperand(1).getReg())) {
SrcReg = SrcMI->getOperand(1).getReg();
SrcMI = MRI->getVRegDef(SrcReg);
}
@@ -142,8 +141,8 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI,
/// other PHIs in a cycle.
bool OptimizePHIs::IsDeadPHICycle(MachineInstr *MI, InstrSet &PHIsInCycle) {
assert(MI->isPHI() && "IsDeadPHICycle expects a PHI instruction");
- unsigned DstReg = MI->getOperand(0).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ Register DstReg = MI->getOperand(0).getReg();
+ assert(Register::isVirtualRegister(DstReg) &&
"PHI destination is not a virtual register");
// See if we already saw this register.
@@ -177,7 +176,7 @@ bool OptimizePHIs::OptimizeBB(MachineBasicBlock &MBB) {
InstrSet PHIsInCycle;
if (IsSingleValuePHICycle(MI, SingleValReg, PHIsInCycle) &&
SingleValReg != 0) {
- unsigned OldReg = MI->getOperand(0).getReg();
+ Register OldReg = MI->getOperand(0).getReg();
if (!MRI->constrainRegClass(SingleValReg, MRI->getRegClass(OldReg)))
continue;
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 948a5835438c..4dd4c4b1084e 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -31,7 +31,9 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/CodeGen/TargetOpcodes.h"
+#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Pass.h"
@@ -168,7 +170,7 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
// Remove dead IMPLICIT_DEF instructions.
for (MachineInstr *DefMI : ImpDefs) {
- unsigned DefReg = DefMI->getOperand(0).getReg();
+ Register DefReg = DefMI->getOperand(0).getReg();
if (MRI->use_nodbg_empty(DefReg)) {
if (LIS)
LIS->RemoveMachineInstrFromMaps(*DefMI);
@@ -183,6 +185,11 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
MF.DeleteMachineInstr(I.first);
}
+ // TODO: we should use the incremental DomTree updater here.
+ if (Changed)
+ if (auto *MDT = getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->getBase().recalculate(MF);
+
LoweredPHIs.clear();
ImpDefs.clear();
VRegPHIUseCount.clear();
@@ -240,7 +247,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
MachineInstr *MPhi = MBB.remove(&*MBB.begin());
unsigned NumSrcs = (MPhi->getNumOperands() - 1) / 2;
- unsigned DestReg = MPhi->getOperand(0).getReg();
+ Register DestReg = MPhi->getOperand(0).getReg();
assert(MPhi->getOperand(0).getSubReg() == 0 && "Can't handle sub-reg PHIs");
bool isDead = MPhi->getOperand(0).isDead();
@@ -252,11 +259,12 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// Insert a register to register copy at the top of the current block (but
// after any remaining phi nodes) which copies the new incoming register
// into the phi node destination.
+ MachineInstr *PHICopy = nullptr;
const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
if (allPhiOperandsUndefined(*MPhi, *MRI))
// If all sources of a PHI node are implicit_def or undef uses, just emit an
// implicit_def instead of a copy.
- BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ PHICopy = BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), DestReg);
else {
// Can we reuse an earlier PHI node? This only happens for critical edges,
@@ -273,15 +281,13 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
}
- BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), DestReg)
- .addReg(IncomingReg);
+ // Give the target possiblity to handle special cases fallthrough otherwise
+ PHICopy = TII->createPHIDestinationCopy(MBB, AfterPHIsIt, MPhi->getDebugLoc(),
+ IncomingReg, DestReg);
}
// Update live variable information if there is any.
if (LV) {
- MachineInstr &PHICopy = *std::prev(AfterPHIsIt);
-
if (IncomingReg) {
LiveVariables::VarInfo &VI = LV->getVarInfo(IncomingReg);
@@ -302,7 +308,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// killed. Note that because the value is defined in several places (once
// each for each incoming block), the "def" block and instruction fields
// for the VarInfo is not filled in.
- LV->addVirtualRegisterKilled(IncomingReg, PHICopy);
+ LV->addVirtualRegisterKilled(IncomingReg, *PHICopy);
}
// Since we are going to be deleting the PHI node, if it is the last use of
@@ -312,15 +318,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// If the result is dead, update LV.
if (isDead) {
- LV->addVirtualRegisterDead(DestReg, PHICopy);
+ LV->addVirtualRegisterDead(DestReg, *PHICopy);
LV->removeVirtualRegisterDead(DestReg, *MPhi);
}
}
// Update LiveIntervals for the new copy or implicit def.
if (LIS) {
- SlotIndex DestCopyIndex =
- LIS->InsertMachineInstrInMaps(*std::prev(AfterPHIsIt));
+ SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(*PHICopy);
SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB);
if (IncomingReg) {
@@ -368,11 +373,11 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
// IncomingReg register in the corresponding predecessor basic block.
SmallPtrSet<MachineBasicBlock*, 8> MBBsInsertedInto;
for (int i = NumSrcs - 1; i >= 0; --i) {
- unsigned SrcReg = MPhi->getOperand(i*2+1).getReg();
+ Register SrcReg = MPhi->getOperand(i * 2 + 1).getReg();
unsigned SrcSubReg = MPhi->getOperand(i*2+1).getSubReg();
bool SrcUndef = MPhi->getOperand(i*2+1).isUndef() ||
isImplicitlyDefined(SrcReg, *MRI);
- assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ assert(Register::isVirtualRegister(SrcReg) &&
"Machine PHI Operands must all be virtual registers!");
// Get the MachineBasicBlock equivalent of the BasicBlock that is the source
@@ -406,9 +411,9 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
if (DefMI->isImplicitDef())
ImpDefs.insert(DefMI);
} else {
- NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(),
- TII->get(TargetOpcode::COPY), IncomingReg)
- .addReg(SrcReg, 0, SrcSubReg);
+ NewSrcInstr =
+ TII->createPHISourceCopy(opBlock, InsertPos, MPhi->getDebugLoc(),
+ SrcReg, SrcSubReg, IncomingReg);
}
}
@@ -457,7 +462,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB,
}
} else {
// We just inserted this copy.
- KillInst = std::prev(InsertPos);
+ KillInst = NewSrcInstr;
}
}
assert(KillInst->readsRegister(SrcReg) && "Cannot find kill instruction");
@@ -567,7 +572,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
for (MachineBasicBlock::iterator BBI = MBB.begin(), BBE = MBB.end();
BBI != BBE && BBI->isPHI(); ++BBI) {
for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
- unsigned Reg = BBI->getOperand(i).getReg();
+ Register Reg = BBI->getOperand(i).getReg();
MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
// Is there a critical edge from PreMBB to MBB?
if (PreMBB->succ_size() == 1)
diff --git a/lib/CodeGen/PatchableFunction.cpp b/lib/CodeGen/PatchableFunction.cpp
index a3fa1b0ad8ed..529fde84e39a 100644
--- a/lib/CodeGen/PatchableFunction.cpp
+++ b/lib/CodeGen/PatchableFunction.cpp
@@ -78,7 +78,7 @@ bool PatchableFunction::runOnMachineFunction(MachineFunction &MF) {
MIB.add(MO);
FirstActualI->eraseFromParent();
- MF.ensureAlignment(4);
+ MF.ensureAlignment(Align(16));
return true;
}
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index b918396aa8c5..54f1d38ed106 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -418,7 +418,7 @@ namespace {
const MachineRegisterInfo &MRI,
const TargetInstrInfo *TII = nullptr)
: DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
- if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (!Register::isPhysicalRegister(Reg)) {
Def = MRI.getVRegDef(Reg);
DefIdx = MRI.def_begin(Reg).getOperandNo();
}
@@ -460,8 +460,8 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
return false;
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ if (Register::isPhysicalRegister(DstReg) ||
+ Register::isPhysicalRegister(SrcReg))
return false;
if (MRI->hasOneNonDBGUse(SrcReg))
@@ -581,7 +581,7 @@ optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
MRI->constrainRegClass(DstReg, DstRC);
}
- unsigned NewVR = MRI->createVirtualRegister(RC);
+ Register NewVR = MRI->createVirtualRegister(RC);
MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVR)
.addReg(DstReg, 0, SubIdx);
@@ -609,8 +609,8 @@ bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
unsigned SrcReg, SrcReg2;
int CmpMask, CmpValue;
if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
- (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
+ Register::isPhysicalRegister(SrcReg) ||
+ (SrcReg2 != 0 && Register::isPhysicalRegister(SrcReg2)))
return false;
// Attempt to optimize the comparison instruction.
@@ -663,7 +663,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
// Thus, instead of maintaining untested code, we will revisit that if
// that changes at some point.
unsigned Reg = RegSubReg.Reg;
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return false;
const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
@@ -675,7 +675,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
do {
CurSrcPair = SrcToLook.pop_back_val();
// As explained above, do not handle physical registers
- if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
+ if (Register::isPhysicalRegister(CurSrcPair.Reg))
return false;
ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
@@ -723,7 +723,7 @@ bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
// constraints to the register allocator. Moreover, if we want to extend
// the live-range of a physical register, unlike SSA virtual register,
// we will have to check that they aren't redefine before the related use.
- if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
+ if (Register::isPhysicalRegister(CurSrcPair.Reg))
return false;
// Keep following the chain if the value isn't any better yet.
@@ -761,7 +761,7 @@ insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
// NewRC is only correct if no subregisters are involved. findNextSource()
// should have rejected those cases already.
assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
- unsigned NewVR = MRI.createVirtualRegister(NewRC);
+ Register NewVR = MRI.createVirtualRegister(NewRC);
MachineBasicBlock *MBB = OrigPHI.getParent();
MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
TII.get(TargetOpcode::PHI), NewVR);
@@ -1170,7 +1170,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
"Coalescer can understand multiple defs?!");
const MachineOperand &MODef = MI.getOperand(0);
// Do not rewrite physical definitions.
- if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
+ if (Register::isPhysicalRegister(MODef.getReg()))
return false;
bool Changed = false;
@@ -1221,7 +1221,7 @@ bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
MachineInstr &
PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
RegSubRegPair Def, RewriteMapTy &RewriteMap) {
- assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
+ assert(!Register::isPhysicalRegister(Def.Reg) &&
"We do not rewrite physical registers");
// Find the new source to use in the COPY rewrite.
@@ -1229,7 +1229,7 @@ PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
// Insert the COPY.
const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
- unsigned NewVReg = MRI->createVirtualRegister(DefRC);
+ Register NewVReg = MRI->createVirtualRegister(DefRC);
MachineInstr *NewCopy =
BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
@@ -1280,7 +1280,7 @@ bool PeepholeOptimizer::optimizeUncoalescableCopy(
while (CpyRewriter.getNextRewritableSource(Src, Def)) {
// If a physical register is here, this is probably for a good reason.
// Do not rewrite that.
- if (TargetRegisterInfo::isPhysicalRegister(Def.Reg))
+ if (Register::isPhysicalRegister(Def.Reg))
return false;
// If we do not know how to rewrite this definition, there is no point
@@ -1315,12 +1315,11 @@ bool PeepholeOptimizer::isLoadFoldable(
if (MCID.getNumDefs() != 1)
return false;
- unsigned Reg = MI.getOperand(0).getReg();
+ Register Reg = MI.getOperand(0).getReg();
// To reduce compilation time, we check MRI->hasOneNonDBGUser when inserting
// loads. It should be checked when processing uses of the load, since
// uses can be removed during peephole.
- if (!MI.getOperand(0).getSubReg() &&
- TargetRegisterInfo::isVirtualRegister(Reg) &&
+ if (!MI.getOperand(0).getSubReg() && Register::isVirtualRegister(Reg) &&
MRI->hasOneNonDBGUser(Reg)) {
FoldAsLoadDefCandidates.insert(Reg);
return true;
@@ -1336,8 +1335,8 @@ bool PeepholeOptimizer::isMoveImmediate(
return false;
if (MCID.getNumDefs() != 1)
return false;
- unsigned Reg = MI.getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ Register Reg = MI.getOperand(0).getReg();
+ if (Register::isVirtualRegister(Reg)) {
ImmDefMIs.insert(std::make_pair(Reg, &MI));
ImmDefRegs.insert(Reg);
return true;
@@ -1359,8 +1358,8 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr &MI,
// Ignore dead implicit defs.
if (MO.isImplicit() && MO.isDead())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
if (ImmDefRegs.count(Reg) == 0)
continue;
@@ -1393,12 +1392,12 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
DenseMap<unsigned, MachineInstr *> &CopyMIs) {
assert(MI.isCopy() && "expected a COPY machine instruction");
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return false;
- unsigned DstReg = MI.getOperand(0).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DstReg))
+ Register DstReg = MI.getOperand(0).getReg();
+ if (!Register::isVirtualRegister(DstReg))
return false;
if (CopySrcRegs.insert(SrcReg).second) {
@@ -1416,7 +1415,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
if (SrcSubReg != PrevSrcSubReg)
return false;
- unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
+ Register PrevDstReg = PrevCopy->getOperand(0).getReg();
// Only replace if the copy register class is the same.
//
@@ -1433,8 +1432,7 @@ bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
}
bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
- return TargetRegisterInfo::isPhysicalRegister(Reg) &&
- !MRI->isAllocatable(Reg);
+ return Register::isPhysicalRegister(Reg) && !MRI->isAllocatable(Reg);
}
bool PeepholeOptimizer::foldRedundantNAPhysCopy(
@@ -1444,9 +1442,9 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
if (DisableNAPhysCopyOpt)
return false;
- unsigned DstReg = MI.getOperand(0).getReg();
- unsigned SrcReg = MI.getOperand(1).getReg();
- if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ Register DstReg = MI.getOperand(0).getReg();
+ Register SrcReg = MI.getOperand(1).getReg();
+ if (isNAPhysCopy(SrcReg) && Register::isVirtualRegister(DstReg)) {
// %vreg = COPY %physreg
// Avoid using a datastructure which can track multiple live non-allocatable
// phys->virt copies since LLVM doesn't seem to do this.
@@ -1454,7 +1452,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
- if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
+ if (!(Register::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
return false;
// %physreg = COPY %vreg
@@ -1467,7 +1465,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
return false;
}
- unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg();
+ Register PrevDstReg = PrevCopy->second->getOperand(0).getReg();
if (PrevDstReg == SrcReg) {
// Remove the virt->phys copy: we saw the virtual register definition, and
// the non-allocatable physical register's state hasn't changed since then.
@@ -1489,7 +1487,7 @@ bool PeepholeOptimizer::foldRedundantNAPhysCopy(
static bool isVirtualRegisterOperand(MachineOperand &MO) {
if (!MO.isReg())
return false;
- return TargetRegisterInfo::isVirtualRegister(MO.getReg());
+ return Register::isVirtualRegister(MO.getReg());
}
bool PeepholeOptimizer::findTargetRecurrence(
@@ -1662,7 +1660,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
for (const MachineOperand &MO : MI->operands()) {
// Visit all operands: definitions can be implicit or explicit.
if (MO.isReg()) {
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (MO.isDef() && isNAPhysCopy(Reg)) {
const auto &Def = NAPhysToVirtMIs.find(Reg);
if (Def != NAPhysToVirtMIs.end()) {
@@ -1778,7 +1776,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
LocalMIs.erase(DefMI);
LocalMIs.insert(FoldMI);
if (MI->isCall())
- MI->getMF()->updateCallSiteInfo(MI, FoldMI);
+ MI->getMF()->moveCallSiteInfo(MI, FoldMI);
MI->eraseFromParent();
DefMI->eraseFromParent();
MRI->markUsesInDebugValueAsUndef(FoldedReg);
@@ -1810,7 +1808,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
assert(Def->isCopy() && "Invalid definition");
// Copy instruction are supposed to be: Def = Src.
// If someone breaks this assumption, bad things will happen everywhere.
- assert(Def->getNumOperands() == 2 && "Invalid number of operands");
+ // There may be implicit uses preventing the copy to be moved across
+ // some target specific register definitions
+ assert(Def->getNumOperands() - Def->getNumImplicitOperands() == 2 &&
+ "Invalid number of operands");
+ assert(!Def->hasImplicitDef() && "Only implicit uses are allowed");
if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
// If we look for a different subreg, it means we want a subreg of src.
@@ -1855,6 +1857,11 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
SrcIdx = OpIdx;
}
+ // In some rare case, Def has no input, SrcIdx is out of bound,
+ // getOperand(SrcIdx) will fail below.
+ if (SrcIdx >= Def->getNumOperands())
+ return ValueTrackerResult();
+
// Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
// will break the assumed guarantees for the upper bits.
for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
@@ -2087,7 +2094,7 @@ ValueTrackerResult ValueTracker::getNextSource() {
// If we can still move up in the use-def chain, move to the next
// definition.
- if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
+ if (!Register::isPhysicalRegister(Reg) && OneRegSrc) {
MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
if (DI != MRI.def_end()) {
Def = DI->getParent();
diff --git a/lib/CodeGen/PreISelIntrinsicLowering.cpp b/lib/CodeGen/PreISelIntrinsicLowering.cpp
index 2752e186875c..0d2f6f99ca96 100644
--- a/lib/CodeGen/PreISelIntrinsicLowering.cpp
+++ b/lib/CodeGen/PreISelIntrinsicLowering.cpp
@@ -76,7 +76,7 @@ static bool lowerObjCCall(Function &F, const char *NewFn,
}
for (auto I = F.use_begin(), E = F.use_end(); I != E;) {
- auto *CI = dyn_cast<CallInst>(I->getUser());
+ auto *CI = cast<CallInst>(I->getUser());
assert(CI->getCalledFunction() && "Cannot lower an indirect call!");
++I;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b38987ad1c90..11bff45f9ad5 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -73,9 +73,9 @@ bool ProcessImplicitDefs::canTurnIntoImplicitDef(MachineInstr *MI) {
void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
LLVM_DEBUG(dbgs() << "Processing " << *MI);
- unsigned Reg = MI->getOperand(0).getReg();
+ Register Reg = MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
// For virtual registers, mark all uses as <undef>, and convert users to
// implicit-def when possible.
for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
@@ -100,8 +100,8 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) {
for (MachineOperand &MO : UserMI->operands()) {
if (!MO.isReg())
continue;
- unsigned UserReg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(UserReg) ||
+ Register UserReg = MO.getReg();
+ if (!Register::isPhysicalRegister(UserReg) ||
!TRI->regsOverlap(Reg, UserReg))
continue;
// UserMI uses or redefines Reg. Set <undef> flags on all uses.
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index d463bee67595..729f06dda62b 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -898,7 +898,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &MF) {
// frame index registers. Functions which don't want/need this optimization
// will continue to use the existing code path.
if (MFI.getUseLocalStackAllocationBlock()) {
- unsigned Align = MFI.getLocalFrameMaxAlign();
+ unsigned Align = MFI.getLocalFrameMaxAlign().value();
// Adjust to alignment boundary.
Offset = alignTo(Offset, Align, Skew);
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index da3ef4b771f3..74e721dbd138 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -129,7 +129,7 @@ const PseudoSourceValue *
PseudoSourceValueManager::getFixedStack(int FI) {
std::unique_ptr<FixedStackPseudoSourceValue> &V = FSValues[FI];
if (!V)
- V = llvm::make_unique<FixedStackPseudoSourceValue>(FI, TII);
+ V = std::make_unique<FixedStackPseudoSourceValue>(FI, TII);
return V.get();
}
@@ -138,7 +138,7 @@ PseudoSourceValueManager::getGlobalValueCallEntry(const GlobalValue *GV) {
std::unique_ptr<const GlobalValuePseudoSourceValue> &E =
GlobalCallEntries[GV];
if (!E)
- E = llvm::make_unique<GlobalValuePseudoSourceValue>(GV, TII);
+ E = std::make_unique<GlobalValuePseudoSourceValue>(GV, TII);
return E.get();
}
@@ -147,6 +147,6 @@ PseudoSourceValueManager::getExternalSymbolCallEntry(const char *ES) {
std::unique_ptr<const ExternalSymbolPseudoSourceValue> &E =
ExternalCallEntries[ES];
if (!E)
- E = llvm::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII);
+ E = std::make_unique<ExternalSymbolPseudoSourceValue>(ES, TII);
return E.get();
}
diff --git a/lib/CodeGen/ReachingDefAnalysis.cpp b/lib/CodeGen/ReachingDefAnalysis.cpp
index f05c97ad621e..2850033e6419 100644
--- a/lib/CodeGen/ReachingDefAnalysis.cpp
+++ b/lib/CodeGen/ReachingDefAnalysis.cpp
@@ -9,6 +9,7 @@
#include "llvm/CodeGen/ReachingDefAnalysis.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp
index 1cbe75c27d13..156daaa03bb5 100644
--- a/lib/CodeGen/RegAllocBase.cpp
+++ b/lib/CodeGen/RegAllocBase.cpp
@@ -73,7 +73,7 @@ void RegAllocBase::seedLiveRegs() {
NamedRegionTimer T("seed", "Seed Live Regs", TimerGroupName,
TimerGroupDescription, TimePassesIsEnabled);
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (MRI->reg_nodbg_empty(Reg))
continue;
enqueue(&LIS->getInterval(Reg));
@@ -154,7 +154,7 @@ void RegAllocBase::allocatePhysRegs() {
continue;
}
LLVM_DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
- assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ assert(Register::isVirtualRegister(SplitVirtReg->reg) &&
"expect split value in virtual register");
enqueue(SplitVirtReg);
++NumNewQueued;
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index 2ffa5e389f89..44d0233604e7 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -90,7 +90,7 @@ namespace {
explicit LiveReg(unsigned VirtReg) : VirtReg(VirtReg) {}
unsigned getSparseSetIndex() const {
- return TargetRegisterInfo::virtReg2Index(VirtReg);
+ return Register::virtReg2Index(VirtReg);
}
};
@@ -200,11 +200,11 @@ namespace {
void assignVirtToPhysReg(LiveReg &, MCPhysReg PhysReg);
LiveRegMap::iterator findLiveVirtReg(unsigned VirtReg) {
- return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
LiveRegMap::const_iterator findLiveVirtReg(unsigned VirtReg) const {
- return LiveVirtRegs.find(TargetRegisterInfo::virtReg2Index(VirtReg));
+ return LiveVirtRegs.find(Register::virtReg2Index(VirtReg));
}
void allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint);
@@ -264,7 +264,7 @@ int RegAllocFast::getStackSpaceFor(unsigned VirtReg) {
/// Returns false if \p VirtReg is known to not live out of the current block.
bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
- if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg))) {
+ if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg))) {
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
}
@@ -272,7 +272,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
// If this block loops back to itself, it would be necessary to check whether
// the use comes after the def.
if (MBB->isSuccessor(MBB)) {
- MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return true;
}
@@ -282,7 +282,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
unsigned C = 0;
for (const MachineInstr &UseInst : MRI->reg_nodbg_instructions(VirtReg)) {
if (UseInst.getParent() != MBB || ++C >= Limit) {
- MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
// Cannot be live-out if there are no successors.
return !MBB->succ_empty();
}
@@ -293,7 +293,7 @@ bool RegAllocFast::mayLiveOut(unsigned VirtReg) {
/// Returns false if \p VirtReg is known to not be live into the current block.
bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
- if (MayLiveAcrossBlocks.test(TargetRegisterInfo::virtReg2Index(VirtReg)))
+ if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
return !MBB->pred_empty();
// See if the first \p Limit def of the register are all in the current block.
@@ -301,7 +301,7 @@ bool RegAllocFast::mayLiveIn(unsigned VirtReg) {
unsigned C = 0;
for (const MachineInstr &DefInst : MRI->def_instructions(VirtReg)) {
if (DefInst.getParent() != MBB || ++C >= Limit) {
- MayLiveAcrossBlocks.set(TargetRegisterInfo::virtReg2Index(VirtReg));
+ MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
return !MBB->pred_empty();
}
}
@@ -394,7 +394,7 @@ void RegAllocFast::killVirtReg(LiveReg &LR) {
/// Mark virtreg as no longer available.
void RegAllocFast::killVirtReg(unsigned VirtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ assert(Register::isVirtualRegister(VirtReg) &&
"killVirtReg needs a virtual register");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
if (LRI != LiveVirtRegs.end() && LRI->PhysReg)
@@ -405,7 +405,7 @@ void RegAllocFast::killVirtReg(unsigned VirtReg) {
/// stack slot if needed.
void RegAllocFast::spillVirtReg(MachineBasicBlock::iterator MI,
unsigned VirtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ assert(Register::isVirtualRegister(VirtReg) &&
"Spilling a physical register is illegal!");
LiveRegMap::iterator LRI = findLiveVirtReg(VirtReg);
assert(LRI != LiveVirtRegs.end() && LRI->PhysReg &&
@@ -455,9 +455,8 @@ void RegAllocFast::usePhysReg(MachineOperand &MO) {
if (MO.isUndef())
return;
- unsigned PhysReg = MO.getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
- "Bad usePhysReg operand");
+ Register PhysReg = MO.getReg();
+ assert(Register::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand");
markRegUsedInInstr(PhysReg);
switch (PhysRegState[PhysReg]) {
@@ -626,9 +625,9 @@ unsigned RegAllocFast::traceCopyChain(unsigned Reg) const {
static const unsigned ChainLengthLimit = 3;
unsigned C = 0;
do {
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return Reg;
- assert(TargetRegisterInfo::isVirtualRegister(Reg));
+ assert(Register::isVirtualRegister(Reg));
MachineInstr *VRegDef = MRI->getUniqueVRegDef(Reg);
if (!VRegDef || !isCoalescable(*VRegDef))
@@ -646,7 +645,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
unsigned C = 0;
for (const MachineInstr &MI : MRI->def_instructions(VirtReg)) {
if (isCoalescable(MI)) {
- unsigned Reg = MI.getOperand(1).getReg();
+ Register Reg = MI.getOperand(1).getReg();
Reg = traceCopyChain(Reg);
if (Reg != 0)
return Reg;
@@ -662,7 +661,7 @@ unsigned RegAllocFast::traceCopies(unsigned VirtReg) const {
void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
const unsigned VirtReg = LR.VirtReg;
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ assert(Register::isVirtualRegister(VirtReg) &&
"Can only allocate virtual registers");
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
@@ -671,8 +670,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
<< " with hint " << printReg(Hint0, TRI) << '\n');
// Take hint when possible.
- if (TargetRegisterInfo::isPhysicalRegister(Hint0) &&
- MRI->isAllocatable(Hint0) && RC.contains(Hint0)) {
+ if (Register::isPhysicalRegister(Hint0) && MRI->isAllocatable(Hint0) &&
+ RC.contains(Hint0)) {
// Ignore the hint if we would have to spill a dirty register.
unsigned Cost = calcSpillCost(Hint0);
if (Cost < spillDirty) {
@@ -692,9 +691,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
// Try other hint.
unsigned Hint1 = traceCopies(VirtReg);
- if (TargetRegisterInfo::isPhysicalRegister(Hint1) &&
- MRI->isAllocatable(Hint1) && RC.contains(Hint1) &&
- !isRegUsedInInstr(Hint1)) {
+ if (Register::isPhysicalRegister(Hint1) && MRI->isAllocatable(Hint1) &&
+ RC.contains(Hint1) && !isRegUsedInInstr(Hint1)) {
// Ignore the hint if we would have to spill a dirty register.
unsigned Cost = calcSpillCost(Hint1);
if (Cost < spillDirty) {
@@ -752,8 +750,8 @@ void RegAllocFast::allocVirtReg(MachineInstr &MI, LiveReg &LR, unsigned Hint0) {
void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
assert(MO.isUndef() && "expected undef use");
- unsigned VirtReg = MO.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Expected virtreg");
+ Register VirtReg = MO.getReg();
+ assert(Register::isVirtualRegister(VirtReg) && "Expected virtreg");
LiveRegMap::const_iterator LRI = findLiveVirtReg(VirtReg);
MCPhysReg PhysReg;
@@ -778,14 +776,13 @@ void RegAllocFast::allocVirtRegUndef(MachineOperand &MO) {
/// Allocates a register for VirtReg and mark it as dirty.
MCPhysReg RegAllocFast::defineVirtReg(MachineInstr &MI, unsigned OpNum,
unsigned VirtReg, unsigned Hint) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "Not a virtual register");
+ assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
if (!LRI->PhysReg) {
// If there is no hint, peek at the only use of this register.
- if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) &&
+ if ((!Hint || !Register::isPhysicalRegister(Hint)) &&
MRI->hasOneNonDBGUse(VirtReg)) {
const MachineInstr &UseMI = *MRI->use_instr_nodbg_begin(VirtReg);
// It's a copy, use the destination register as a hint.
@@ -812,8 +809,7 @@ RegAllocFast::LiveReg &RegAllocFast::reloadVirtReg(MachineInstr &MI,
unsigned OpNum,
unsigned VirtReg,
unsigned Hint) {
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "Not a virtual register");
+ assert(Register::isVirtualRegister(VirtReg) && "Not a virtual register");
LiveRegMap::iterator LRI;
bool New;
std::tie(LRI, New) = LiveVirtRegs.insert(LiveReg(VirtReg));
@@ -866,7 +862,7 @@ bool RegAllocFast::setPhysReg(MachineInstr &MI, MachineOperand &MO,
}
// Handle subregister index.
- MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : 0);
+ MO.setReg(PhysReg ? TRI->getSubReg(PhysReg, MO.getSubReg()) : Register());
MO.setIsRenamable(true);
MO.setSubReg(0);
@@ -893,8 +889,8 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
SmallSet<unsigned, 8> ThroughRegs;
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
if (MO.isEarlyClobber() || (MO.isUse() && MO.isTied()) ||
(MO.getSubReg() && MI.readsVirtualRegister(Reg))) {
@@ -908,8 +904,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
LLVM_DEBUG(dbgs() << "\nChecking for physdef collisions.\n");
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.isDef()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Reg || !Register::isPhysicalRegister(Reg))
+ continue;
markRegUsedInInstr(Reg);
for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) {
if (ThroughRegs.count(PhysRegState[*AI]))
@@ -922,8 +919,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
+ continue;
if (MO.isUse()) {
if (!MO.isTied()) continue;
LLVM_DEBUG(dbgs() << "Operand " << I << "(" << MO
@@ -947,8 +945,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
for (unsigned I = 0, E = MI.getNumOperands(); I != E; ++I) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
+ continue;
if (!MO.isEarlyClobber())
continue;
// Note: defineVirtReg may invalidate MO.
@@ -961,8 +960,9 @@ void RegAllocFast::handleThroughOperands(MachineInstr &MI,
UsedInInstr.clear();
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Reg || !Register::isPhysicalRegister(Reg))
+ continue;
LLVM_DEBUG(dbgs() << "\tSetting " << printReg(Reg, TRI)
<< " as used in instr\n");
markRegUsedInInstr(Reg);
@@ -1002,10 +1002,8 @@ void RegAllocFast::dumpState() {
e = LiveVirtRegs.end(); i != e; ++i) {
if (!i->PhysReg)
continue;
- assert(TargetRegisterInfo::isVirtualRegister(i->VirtReg) &&
- "Bad map key");
- assert(TargetRegisterInfo::isPhysicalRegister(i->PhysReg) &&
- "Bad map value");
+ assert(Register::isVirtualRegister(i->VirtReg) && "Bad map key");
+ assert(Register::isPhysicalRegister(i->PhysReg) && "Bad map value");
assert(PhysRegState[i->PhysReg] == i->VirtReg && "Bad inverse map");
}
}
@@ -1045,9 +1043,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
continue;
}
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg) continue;
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
VirtOpEnd = i+1;
if (MO.isUse()) {
hasTiedOps = hasTiedOps ||
@@ -1096,8 +1094,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
for (unsigned I = 0; I != VirtOpEnd; ++I) {
MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
+ continue;
if (MO.isUse()) {
if (MO.isUndef()) {
HasUndefUse = true;
@@ -1124,8 +1123,8 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
for (MachineOperand &MO : MI.uses()) {
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
assert(MO.isUndef() && "Should only have undef virtreg uses left");
@@ -1139,8 +1138,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
if (hasEarlyClobbers) {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ Register Reg = MO.getReg();
+ if (!Reg || !Register::isPhysicalRegister(Reg))
+ continue;
// Look for physreg defs and tied uses.
if (!MO.isDef() && !MO.isTied()) continue;
markRegUsedInInstr(Reg);
@@ -1166,10 +1166,9 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
- if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg) ||
- !MRI->isAllocatable(Reg))
+ if (!Reg || !Register::isPhysicalRegister(Reg) || !MRI->isAllocatable(Reg))
continue;
definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved);
}
@@ -1180,10 +1179,10 @@ void RegAllocFast::allocateInstruction(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(I);
if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// We have already dealt with phys regs in the previous scan.
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
MCPhysReg PhysReg = defineVirtReg(MI, I, Reg, CopySrcReg);
if (setPhysReg(MI, MI.getOperand(I), PhysReg)) {
@@ -1215,8 +1214,8 @@ void RegAllocFast::handleDebugValue(MachineInstr &MI) {
// mostly constants and frame indices.
if (!MO.isReg())
return;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
return;
// See if this virtual register has already been allocated to a physical
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
index 771fc46415db..d27db678f02a 100644
--- a/lib/CodeGen/RegAllocGreedy.cpp
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -685,7 +685,7 @@ void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) {
// The queue holds (size, reg) pairs.
const unsigned Size = LI->getSize();
const unsigned Reg = LI->reg;
- assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
+ assert(Register::isVirtualRegister(Reg) &&
"Can only enqueue virtual registers");
unsigned Prio;
@@ -899,7 +899,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg,
// Check if any interfering live range is heavier than MaxWeight.
for (unsigned i = Q.interferingVRegs().size(); i; --i) {
LiveInterval *Intf = Q.interferingVRegs()[i - 1];
- assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) &&
+ assert(Register::isVirtualRegister(Intf->reg) &&
"Only expecting virtual register interference from query");
// Do not allow eviction of a virtual register if we are in the middle
@@ -984,7 +984,7 @@ bool RAGreedy::canEvictInterferenceInRange(LiveInterval &VirtReg,
continue;
// Cannot evict non virtual reg interference.
- if (!TargetRegisterInfo::isVirtualRegister(Intf->reg))
+ if (!Register::isVirtualRegister(Intf->reg))
return false;
// Never evict spill products. They cannot split or spill.
if (getStage(*Intf) == RS_Done)
@@ -2881,7 +2881,7 @@ void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) {
continue;
}
// Get the current assignment.
- Register OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg)
+ Register OtherPhysReg = Register::isPhysicalRegister(OtherReg)
? OtherReg
: VRM->getPhys(OtherReg);
// Push the collected information.
@@ -2919,7 +2919,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
SmallVector<unsigned, 2> RecoloringCandidates;
HintsInfo Info;
unsigned Reg = VirtReg.reg;
- unsigned PhysReg = VRM->getPhys(Reg);
+ Register PhysReg = VRM->getPhys(Reg);
// Start the recoloring algorithm from the input live-interval, then
// it will propagate to the ones that are copy-related with it.
Visited.insert(Reg);
@@ -2932,7 +2932,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
Reg = RecoloringCandidates.pop_back_val();
// We cannot recolor physical register.
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
continue;
assert(VRM->hasPhys(Reg) && "We have unallocated variable!!");
@@ -2940,7 +2940,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
// Get the live interval mapped with this virtual register to be able
// to check for the interference with the new color.
LiveInterval &LI = LIS->getInterval(Reg);
- unsigned CurrPhys = VRM->getPhys(Reg);
+ Register CurrPhys = VRM->getPhys(Reg);
// Check that the new color matches the register class constraints and
// that it is free for this live range.
if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) ||
@@ -3021,7 +3021,7 @@ void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) {
/// getting rid of 2 copies.
void RAGreedy::tryHintsRecoloring() {
for (LiveInterval *LI : SetOfBrokenHints) {
- assert(TargetRegisterInfo::isVirtualRegister(LI->reg) &&
+ assert(Register::isVirtualRegister(LI->reg) &&
"Recoloring is possible only for virtual registers");
// Some dead defs may be around (e.g., because of debug uses).
// Ignore those.
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 7a5a6c148ed4..3c4a46b12f99 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -558,7 +558,7 @@ void RegAllocPBQP::findVRegIntervalsToAlloc(const MachineFunction &MF,
// Iterate over all live ranges.
for (unsigned I = 0, E = MRI.getNumVirtRegs(); I != E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
if (MRI.reg_nodbg_empty(Reg))
continue;
VRegsToAlloc.insert(Reg);
@@ -824,11 +824,11 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
if (!VRegsToAlloc.empty()) {
const TargetSubtargetInfo &Subtarget = MF.getSubtarget();
std::unique_ptr<PBQPRAConstraintList> ConstraintsRoot =
- llvm::make_unique<PBQPRAConstraintList>();
- ConstraintsRoot->addConstraint(llvm::make_unique<SpillCosts>());
- ConstraintsRoot->addConstraint(llvm::make_unique<Interference>());
+ std::make_unique<PBQPRAConstraintList>();
+ ConstraintsRoot->addConstraint(std::make_unique<SpillCosts>());
+ ConstraintsRoot->addConstraint(std::make_unique<Interference>());
if (PBQPCoalescing)
- ConstraintsRoot->addConstraint(llvm::make_unique<Coalescing>());
+ ConstraintsRoot->addConstraint(std::make_unique<Coalescing>());
ConstraintsRoot->addConstraint(Subtarget.getCustomPBQPConstraints());
bool PBQPAllocComplete = false;
@@ -848,7 +848,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
std::string GraphFileName = FullyQualifiedName + "." + RS.str() +
".pbqpgraph";
std::error_code EC;
- raw_fd_ostream OS(GraphFileName, EC, sys::fs::F_Text);
+ raw_fd_ostream OS(GraphFileName, EC, sys::fs::OF_Text);
LLVM_DEBUG(dbgs() << "Dumping graph for round " << Round << " to \""
<< GraphFileName << "\"\n");
G.dump(OS);
diff --git a/lib/CodeGen/RegUsageInfoCollector.cpp b/lib/CodeGen/RegUsageInfoCollector.cpp
index b37dfada7101..757ff0e44953 100644
--- a/lib/CodeGen/RegUsageInfoCollector.cpp
+++ b/lib/CodeGen/RegUsageInfoCollector.cpp
@@ -142,6 +142,13 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
auto SetRegAsDefined = [&RegMask] (unsigned Reg) {
RegMask[Reg / 32] &= ~(1u << Reg % 32);
};
+
+ // Some targets can clobber registers "inside" a call, typically in
+ // linker-generated code.
+ for (const MCPhysReg Reg : TRI->getIntraCallClobberedRegs(&MF))
+ for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
+ SetRegAsDefined(*AI);
+
// Scan all the physical registers. When a register is defined in the current
// function set it and all the aliasing registers as defined in the regmask.
// FIXME: Rewrite to use regunits.
@@ -164,7 +171,8 @@ bool RegUsageInfoCollector::runOnMachineFunction(MachineFunction &MF) {
SetRegAsDefined(PReg);
}
- if (TargetFrameLowering::isSafeForNoCSROpt(F)) {
+ if (TargetFrameLowering::isSafeForNoCSROpt(F) &&
+ MF.getSubtarget().getFrameLowering()->isProfitableForNoCSROpt(F)) {
++NumCSROpt;
LLVM_DEBUG(dbgs() << MF.getName()
<< " function optimized for not having CSR.\n");
diff --git a/lib/CodeGen/RegUsageInfoPropagate.cpp b/lib/CodeGen/RegUsageInfoPropagate.cpp
index fc4be82d215e..0205e6193741 100644
--- a/lib/CodeGen/RegUsageInfoPropagate.cpp
+++ b/lib/CodeGen/RegUsageInfoPropagate.cpp
@@ -130,7 +130,11 @@ bool RegUsageInfoPropagation::runOnMachineFunction(MachineFunction &MF) {
};
if (const Function *F = findCalledFunction(M, MI)) {
- UpdateRegMask(*F);
+ if (F->isDefinitionExact()) {
+ UpdateRegMask(*F);
+ } else {
+ LLVM_DEBUG(dbgs() << "Function definition is not exact\n");
+ }
} else {
LLVM_DEBUG(dbgs() << "Failed to find call target function\n");
}
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 2db6ab454cea..6ff5ddbc023d 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -406,8 +406,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
Partial = SrcSub || DstSub;
// If one register is a physreg, it must be Dst.
- if (TargetRegisterInfo::isPhysicalRegister(Src)) {
- if (TargetRegisterInfo::isPhysicalRegister(Dst))
+ if (Register::isPhysicalRegister(Src)) {
+ if (Register::isPhysicalRegister(Dst))
return false;
std::swap(Src, Dst);
std::swap(SrcSub, DstSub);
@@ -416,7 +416,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
- if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
+ if (Register::isPhysicalRegister(Dst)) {
// Eliminate DstSub on a physreg.
if (DstSub) {
Dst = TRI.getSubReg(Dst, DstSub);
@@ -474,8 +474,8 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
CrossClass = NewRC != DstRC || NewRC != SrcRC;
}
// Check our invariants
- assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
- assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
+ assert(Register::isVirtualRegister(Src) && "Src must be virtual");
+ assert(!(Register::isPhysicalRegister(Dst) && DstSub) &&
"Cannot have a physical SubIdx");
SrcReg = Src;
DstReg = Dst;
@@ -483,7 +483,7 @@ bool CoalescerPair::setRegisters(const MachineInstr *MI) {
}
bool CoalescerPair::flip() {
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ if (Register::isPhysicalRegister(DstReg))
return false;
std::swap(SrcReg, DstReg);
std::swap(SrcIdx, DstIdx);
@@ -507,8 +507,8 @@ bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
}
// Now check that Dst matches DstReg.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
- if (!TargetRegisterInfo::isPhysicalRegister(Dst))
+ if (Register::isPhysicalRegister(DstReg)) {
+ if (!Register::isPhysicalRegister(Dst))
return false;
assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
// DstSub could be set for a physreg from INSERT_SUBREG.
@@ -802,7 +802,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
return { false, false };
MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
- unsigned NewReg = NewDstMO.getReg();
+ Register NewReg = NewDstMO.getReg();
if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
return { false, false };
@@ -835,8 +835,8 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
if (!NewMI)
return { false, false };
- if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
- TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
+ if (Register::isVirtualRegister(IntA.reg) &&
+ Register::isVirtualRegister(IntB.reg) &&
!MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
return { false, false };
if (NewMI != DefMI) {
@@ -877,7 +877,7 @@ RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
continue;
// Kill flags are no longer accurate. They are recomputed after RA.
UseMO.setIsKill(false);
- if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ if (Register::isPhysicalRegister(NewReg))
UseMO.substPhysReg(NewReg, *TRI);
else
UseMO.setReg(NewReg);
@@ -1188,7 +1188,7 @@ bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
/// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
/// defining a subregister.
static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
- assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ assert(!Register::isPhysicalRegister(Reg) &&
"This code cannot handle physreg aliasing");
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
@@ -1209,7 +1209,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ if (Register::isPhysicalRegister(SrcReg))
return false;
LiveInterval &SrcInt = LIS->getInterval(SrcReg);
@@ -1240,7 +1240,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
return false;
// Only support subregister destinations when the def is read-undef.
MachineOperand &DstOperand = CopyMI->getOperand(0);
- unsigned CopyDstReg = DstOperand.getReg();
+ Register CopyDstReg = DstOperand.getReg();
if (DstOperand.getSubReg() && !DstOperand.isUndef())
return false;
@@ -1254,7 +1254,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
if (!DefMI->isImplicitDef()) {
- if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+ if (Register::isPhysicalRegister(DstReg)) {
unsigned NewDstReg = DstReg;
unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
@@ -1269,7 +1269,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
// Theoretically, some stack frame reference could exist. Just make sure
// it hasn't actually happened.
- assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
+ assert(Register::isVirtualRegister(DstReg) &&
"Only expect to deal with virtual or physical registers");
}
}
@@ -1317,7 +1317,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
if (MO.isReg()) {
assert(MO.isImplicit() && "No explicit operands after implicit operands.");
// Discard VReg implicit defs.
- if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
+ if (Register::isPhysicalRegister(MO.getReg()))
ImplicitOps.push_back(MO);
}
}
@@ -1336,12 +1336,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MachineOperand &MO = NewMI.getOperand(i);
if (MO.isReg() && MO.isDef()) {
assert(MO.isImplicit() && MO.isDead() &&
- TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
+ Register::isPhysicalRegister(MO.getReg()));
NewMIImplDefs.push_back(MO.getReg());
}
}
- if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
+ if (Register::isVirtualRegister(DstReg)) {
unsigned NewIdx = NewMI.getOperand(0).getSubReg();
if (DefRC != nullptr) {
@@ -1428,7 +1428,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
// The New instruction may be defining a sub-register of what's actually
// been asked for. If so it must implicitly define the whole thing.
- assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
+ assert(Register::isPhysicalRegister(DstReg) &&
"Only expect virtual or physical registers in remat");
NewMI.getOperand(0).setIsDead(true);
NewMI.addOperand(MachineOperand::CreateReg(
@@ -1480,7 +1480,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
MachineInstr *UseMI = UseMO.getParent();
if (UseMI->isDebugValue()) {
- if (TargetRegisterInfo::isPhysicalRegister(DstReg))
+ if (Register::isPhysicalRegister(DstReg))
UseMO.substPhysReg(DstReg, *TRI);
else
UseMO.setReg(DstReg);
@@ -1651,7 +1651,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
unsigned DstReg,
unsigned SubIdx) {
- bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ bool DstIsPhys = Register::isPhysicalRegister(DstReg);
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
@@ -2411,8 +2411,8 @@ std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
assert(MI && "No defining instruction");
if (!MI->isFullCopy())
return std::make_pair(VNI, TrackReg);
- unsigned SrcReg = MI->getOperand(1).getReg();
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
+ Register SrcReg = MI->getOperand(1).getReg();
+ if (!Register::isVirtualRegister(SrcReg))
return std::make_pair(VNI, TrackReg);
const LiveInterval &LI = LIS->getInterval(SrcReg);
@@ -3189,9 +3189,9 @@ void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
assert(MI && "No instruction to erase");
if (MI->isCopy()) {
- unsigned Reg = MI->getOperand(1).getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg) &&
- Reg != CP.getSrcReg() && Reg != CP.getDstReg())
+ Register Reg = MI->getOperand(1).getReg();
+ if (Register::isVirtualRegister(Reg) && Reg != CP.getSrcReg() &&
+ Reg != CP.getDstReg())
ShrinkRegs.push_back(Reg);
}
ErasedInstrs.insert(MI);
@@ -3463,10 +3463,10 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
if (Copy->getOperand(1).isUndef())
return false;
- unsigned SrcReg = Copy->getOperand(1).getReg();
- unsigned DstReg = Copy->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
- || TargetRegisterInfo::isPhysicalRegister(DstReg))
+ Register SrcReg = Copy->getOperand(1).getReg();
+ Register DstReg = Copy->getOperand(0).getReg();
+ if (Register::isPhysicalRegister(SrcReg) ||
+ Register::isPhysicalRegister(DstReg))
return false;
return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
@@ -3526,12 +3526,11 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
if (!isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg))
return false;
// Check if the destination of this copy has any other affinity.
- if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ if (Register::isPhysicalRegister(DstReg) ||
// If SrcReg is a physical register, the copy won't be coalesced.
// Ignoring it may have other side effect (like missing
// rematerialization). So keep it.
- TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
- !isTerminalReg(DstReg, Copy, MRI))
+ Register::isPhysicalRegister(SrcReg) || !isTerminalReg(DstReg, Copy, MRI))
return false;
// DstReg is a terminal node. Check if it interferes with any other
@@ -3554,7 +3553,7 @@ bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
if (OtherReg == SrcReg)
OtherReg = OtherSrcReg;
// Check if OtherReg is a non-terminal.
- if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
+ if (Register::isPhysicalRegister(OtherReg) ||
isTerminalReg(OtherReg, MI, MRI))
continue;
// Check that OtherReg interfere with DstReg.
diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp
index 7d9b3aa9b2d7..bf192d1c530d 100644
--- a/lib/CodeGen/RegisterPressure.cpp
+++ b/lib/CodeGen/RegisterPressure.cpp
@@ -134,6 +134,22 @@ void PressureDiff::dump(const TargetRegisterInfo &TRI) const {
}
dbgs() << '\n';
}
+
+LLVM_DUMP_METHOD
+void PressureChange::dump() const {
+ dbgs() << "[" << getPSetOrMax() << ", " << getUnitInc() << "]\n";
+}
+
+void RegPressureDelta::dump() const {
+ dbgs() << "[Excess=";
+ Excess.dump();
+ dbgs() << ", CriticalMax=";
+ CriticalMax.dump();
+ dbgs() << ", CurrentMax=";
+ CurrentMax.dump();
+ dbgs() << "]\n";
+}
+
#endif
void RegPressureTracker::increaseRegPressure(unsigned RegUnit,
@@ -219,7 +235,7 @@ void LiveRegSet::clear() {
}
static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) {
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return &LIS.getInterval(Reg);
return LIS.getCachedRegUnit(Reg);
}
@@ -345,7 +361,7 @@ void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) {
assert(isBottomClosed() && "need bottom-up tracking to intialize.");
for (const RegisterMaskPair &Pair : P.LiveOutRegs) {
unsigned RegUnit = Pair.RegUnit;
- if (TargetRegisterInfo::isVirtualRegister(RegUnit)
+ if (Register::isVirtualRegister(RegUnit)
&& !RPTracker.hasUntiedDef(RegUnit))
increaseSetPressure(LiveThruPressure, *MRI, RegUnit,
LaneBitmask::getNone(), Pair.LaneMask);
@@ -406,7 +422,7 @@ static LaneBitmask getLanesWithProperty(const LiveIntervals &LIS,
const MachineRegisterInfo &MRI, bool TrackLaneMasks, unsigned RegUnit,
SlotIndex Pos, LaneBitmask SafeDefault,
bool(*Property)(const LiveRange &LR, SlotIndex Pos)) {
- if (TargetRegisterInfo::isVirtualRegister(RegUnit)) {
+ if (Register::isVirtualRegister(RegUnit)) {
const LiveInterval &LI = LIS.getInterval(RegUnit);
LaneBitmask Result;
if (TrackLaneMasks && LI.hasSubRanges()) {
@@ -483,7 +499,7 @@ class RegisterOperandsCollector {
void collectOperand(const MachineOperand &MO) const {
if (!MO.isReg() || !MO.getReg())
return;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (MO.isUse()) {
if (!MO.isUndef() && !MO.isInternalRead())
pushReg(Reg, RegOpers.Uses);
@@ -503,7 +519,7 @@ class RegisterOperandsCollector {
void pushReg(unsigned Reg,
SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
addRegLanes(RegUnits, RegisterMaskPair(Reg, LaneBitmask::getAll()));
} else if (MRI.isAllocatable(Reg)) {
for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units)
@@ -514,7 +530,7 @@ class RegisterOperandsCollector {
void collectOperandLanes(const MachineOperand &MO) const {
if (!MO.isReg() || !MO.getReg())
return;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
unsigned SubRegIdx = MO.getSubReg();
if (MO.isUse()) {
if (!MO.isUndef() && !MO.isInternalRead())
@@ -535,7 +551,7 @@ class RegisterOperandsCollector {
void pushRegLanes(unsigned Reg, unsigned SubRegIdx,
SmallVectorImpl<RegisterMaskPair> &RegUnits) const {
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
LaneBitmask LaneMask = SubRegIdx != 0
? TRI.getSubRegIndexLaneMask(SubRegIdx)
: MRI.getMaxLaneMaskForVReg(Reg);
@@ -590,7 +606,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
// If the def is all that is live after the instruction, then in case
// of a subregister def we need a read-undef flag.
unsigned RegUnit = I->RegUnit;
- if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
+ if (Register::isVirtualRegister(RegUnit) &&
AddFlagsMI != nullptr && (LiveAfter & ~I->LaneMask).none())
AddFlagsMI->setRegisterDefReadUndef(RegUnit);
@@ -616,7 +632,7 @@ void RegisterOperands::adjustLaneLiveness(const LiveIntervals &LIS,
if (AddFlagsMI != nullptr) {
for (const RegisterMaskPair &P : DeadDefs) {
unsigned RegUnit = P.RegUnit;
- if (!TargetRegisterInfo::isVirtualRegister(RegUnit))
+ if (!Register::isVirtualRegister(RegUnit))
continue;
LaneBitmask LiveAfter = getLiveLanesAt(LIS, MRI, true, RegUnit,
Pos.getDeadSlot());
@@ -825,7 +841,7 @@ void RegPressureTracker::recede(const RegisterOperands &RegOpers,
if (TrackUntiedDefs) {
for (const RegisterMaskPair &Def : RegOpers.Defs) {
unsigned RegUnit = Def.RegUnit;
- if (TargetRegisterInfo::isVirtualRegister(RegUnit) &&
+ if (Register::isVirtualRegister(RegUnit) &&
(LiveRegs.contains(RegUnit) & Def.LaneMask).none())
UntiedDefs.insert(RegUnit);
}
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index bb19110e6d70..ec0868acab38 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -49,7 +49,7 @@ using namespace llvm;
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
-void RegScavenger::setRegUsed(unsigned Reg, LaneBitmask LaneMask) {
+void RegScavenger::setRegUsed(Register Reg, LaneBitmask LaneMask) {
LiveUnits.addRegMasked(Reg, LaneMask);
}
@@ -96,12 +96,12 @@ void RegScavenger::enterBasicBlockEnd(MachineBasicBlock &MBB) {
}
}
-void RegScavenger::addRegUnits(BitVector &BV, unsigned Reg) {
+void RegScavenger::addRegUnits(BitVector &BV, Register Reg) {
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
BV.set(*RUI);
}
-void RegScavenger::removeRegUnits(BitVector &BV, unsigned Reg) {
+void RegScavenger::removeRegUnits(BitVector &BV, Register Reg) {
for (MCRegUnitIterator RUI(Reg, TRI); RUI.isValid(); ++RUI)
BV.reset(*RUI);
}
@@ -133,8 +133,8 @@ void RegScavenger::determineKillsAndDefs() {
}
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
@@ -204,8 +204,8 @@ void RegScavenger::forward() {
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isPhysicalRegister(Reg) || isReserved(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isPhysicalRegister(Reg) || isReserved(Reg))
continue;
if (MO.isUse()) {
if (MO.isUndef())
@@ -278,14 +278,14 @@ void RegScavenger::backward() {
--MBBI;
}
-bool RegScavenger::isRegUsed(unsigned Reg, bool includeReserved) const {
+bool RegScavenger::isRegUsed(Register Reg, bool includeReserved) const {
if (isReserved(Reg))
return includeReserved;
return !LiveUnits.available(Reg);
}
-unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
- for (unsigned Reg : *RC) {
+Register RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
+ for (Register Reg : *RC) {
if (!isRegUsed(Reg)) {
LLVM_DEBUG(dbgs() << "Scavenger found unused reg: " << printReg(Reg, TRI)
<< "\n");
@@ -297,13 +297,13 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const {
BitVector RegScavenger::getRegsAvailable(const TargetRegisterClass *RC) {
BitVector Mask(TRI->getNumRegs());
- for (unsigned Reg : *RC)
+ for (Register Reg : *RC)
if (!isRegUsed(Reg))
Mask.set(Reg);
return Mask;
}
-unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
+Register RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
BitVector &Candidates,
unsigned InstrLimit,
MachineBasicBlock::iterator &UseMI) {
@@ -329,7 +329,7 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI,
Candidates.clearBitsNotInMask(MO.getRegMask());
if (!MO.isReg() || MO.isUndef() || !MO.getReg())
continue;
- if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (Register::isVirtualRegister(MO.getReg())) {
if (MO.isDef())
isVirtDefInsn = true;
else if (MO.isKill())
@@ -430,7 +430,7 @@ findSurvivorBackwards(const MachineRegisterInfo &MRI,
// be usefull for this other vreg as well later.
bool FoundVReg = false;
for (const MachineOperand &MO : MI.operands()) {
- if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
FoundVReg = true;
break;
}
@@ -457,7 +457,7 @@ static unsigned getFrameIndexOperandNum(MachineInstr &MI) {
}
RegScavenger::ScavengedInfo &
-RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
+RegScavenger::spill(Register Reg, const TargetRegisterClass &RC, int SPAdj,
MachineBasicBlock::iterator Before,
MachineBasicBlock::iterator &UseMI) {
// Find an available scavenging slot with size and alignment matching
@@ -531,7 +531,7 @@ RegScavenger::spill(unsigned Reg, const TargetRegisterClass &RC, int SPAdj,
return Scavenged[SI];
}
-unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
+Register RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
MachineBasicBlock::iterator I,
int SPAdj, bool AllowSpill) {
MachineInstr &MI = *I;
@@ -542,7 +542,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// Exclude all the registers being used by the instruction.
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) &&
- !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ !Register::isVirtualRegister(MO.getReg()))
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI)
Candidates.reset(*AI);
}
@@ -556,7 +556,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
// Find the register whose use is furthest away.
MachineBasicBlock::iterator UseMI;
- unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI);
+ Register SReg = findSurvivorReg(I, Candidates, 25, UseMI);
// If we found an unused register there is no reason to spill it.
if (!isRegUsed(SReg)) {
@@ -576,7 +576,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC,
return SReg;
}
-unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
+Register RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
MachineBasicBlock::iterator To,
bool RestoreAfter, int SPAdj,
bool AllowSpill) {
@@ -620,8 +620,8 @@ unsigned RegScavenger::scavengeRegisterBackwards(const TargetRegisterClass &RC,
/// \p ReserveAfter controls whether the scavenged register needs to be reserved
/// after the current instruction, otherwise it will only be reserved before the
/// current instruction.
-static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
- unsigned VReg, bool ReserveAfter) {
+static Register scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
+ Register VReg, bool ReserveAfter) {
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
#ifndef NDEBUG
// Verify that all definitions and uses are in the same basic block.
@@ -664,7 +664,7 @@ static unsigned scavengeVReg(MachineRegisterInfo &MRI, RegScavenger &RS,
// spill/reload if necessary.
int SPAdj = 0;
const TargetRegisterClass &RC = *MRI.getRegClass(VReg);
- unsigned SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(),
+ Register SReg = RS.scavengeRegisterBackwards(RC, DefMI.getIterator(),
ReserveAfter, SPAdj);
MRI.replaceRegWith(VReg, SReg);
++NumScavengedRegs;
@@ -694,17 +694,17 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
for (const MachineOperand &MO : NMI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// We only care about virtual registers and ignore virtual registers
// created by the target callbacks in the process (those will be handled
// in a scavenging round).
- if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
- TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ if (!Register::isVirtualRegister(Reg) ||
+ Register::virtReg2Index(Reg) >= InitialNumVirtRegs)
continue;
if (!MO.readsReg())
continue;
- unsigned SReg = scavengeVReg(MRI, RS, Reg, true);
+ Register SReg = scavengeVReg(MRI, RS, Reg, true);
N->addRegisterKilled(SReg, &TRI, false);
RS.setRegUsed(SReg);
}
@@ -716,10 +716,10 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
for (const MachineOperand &MO : MI.operands()) {
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// Only vregs, no newly created vregs (see above).
- if (!TargetRegisterInfo::isVirtualRegister(Reg) ||
- TargetRegisterInfo::virtReg2Index(Reg) >= InitialNumVirtRegs)
+ if (!Register::isVirtualRegister(Reg) ||
+ Register::virtReg2Index(Reg) >= InitialNumVirtRegs)
continue;
// We have to look at all operands anyway so we can precalculate here
// whether there is a reading operand. This allows use to skip the use
@@ -730,14 +730,14 @@ static bool scavengeFrameVirtualRegsInBlock(MachineRegisterInfo &MRI,
NextInstructionReadsVReg = true;
}
if (MO.isDef()) {
- unsigned SReg = scavengeVReg(MRI, RS, Reg, false);
+ Register SReg = scavengeVReg(MRI, RS, Reg, false);
I->addRegisterDead(SReg, &TRI, false);
}
}
}
#ifndef NDEBUG
for (const MachineOperand &MO : MBB.front().operands()) {
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !Register::isVirtualRegister(MO.getReg()))
continue;
assert(!MO.isInternalRead() && "Cannot assign inside bundles");
assert((!MO.isUndef() || MO.isDef()) && "Cannot handle undef uses");
diff --git a/lib/CodeGen/RenameIndependentSubregs.cpp b/lib/CodeGen/RenameIndependentSubregs.cpp
index 22cff48c3051..e3f5abb6301f 100644
--- a/lib/CodeGen/RenameIndependentSubregs.cpp
+++ b/lib/CodeGen/RenameIndependentSubregs.cpp
@@ -138,7 +138,7 @@ bool RenameIndependentSubregs::renameComponents(LiveInterval &LI) const {
LLVM_DEBUG(dbgs() << printReg(Reg) << ": Splitting into newly created:");
for (unsigned I = 1, NumClasses = Classes.getNumClasses(); I < NumClasses;
++I) {
- unsigned NewVReg = MRI->createVirtualRegister(RegClass);
+ Register NewVReg = MRI->createVirtualRegister(RegClass);
LiveInterval &NewLI = LIS->createEmptyInterval(NewVReg);
Intervals.push_back(&NewLI);
LLVM_DEBUG(dbgs() << ' ' << printReg(NewVReg));
@@ -390,7 +390,7 @@ bool RenameIndependentSubregs::runOnMachineFunction(MachineFunction &MF) {
// there can't be any further splitting.
bool Changed = false;
for (size_t I = 0, E = MRI->getNumVirtRegs(); I < E; ++I) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(I);
+ unsigned Reg = Register::index2VirtReg(I);
if (!LIS->hasInterval(Reg))
continue;
LiveInterval &LI = LIS->getInterval(Reg);
diff --git a/lib/CodeGen/SafeStack.cpp b/lib/CodeGen/SafeStack.cpp
index a6bc7330e2cc..ddbbd0f8d6e9 100644
--- a/lib/CodeGen/SafeStack.cpp
+++ b/lib/CodeGen/SafeStack.cpp
@@ -871,7 +871,7 @@ public:
report_fatal_error("TargetLowering instance is required");
auto *DL = &F.getParent()->getDataLayout();
- auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
auto &ACT = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
// Compute DT and LI only for functions that have the attribute.
diff --git a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
index 7776dffb4e9c..b4037499d7d1 100644
--- a/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
+++ b/lib/CodeGen/ScalarizeMaskedMemIntrin.cpp
@@ -173,15 +173,30 @@ static void scalarizeMaskedLoad(CallInst *CI, bool &ModifiedDT) {
return;
}
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %res.phi.else3 = phi <16 x i32> [ %11, %cond.load1 ], [ %res.phi.else, %else ]
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
// br i1 %mask_1, label %cond.load, label %else
//
-
- Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx);
+ }
// Create "cond" block
//
@@ -290,13 +305,29 @@ static void scalarizeMaskedStore(CallInst *CI, bool &ModifiedDT) {
return;
}
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
- // %mask_1 = extractelement <16 x i1> %mask, i32 Idx
+ // %mask_1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
// br i1 %mask_1, label %cond.store, label %else
//
- Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx);
+ }
// Create "cond" block
//
@@ -392,15 +423,30 @@ static void scalarizeMaskedGather(CallInst *CI, bool &ModifiedDT) {
return;
}
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
- // %Mask1 = extractelement <16 x i1> %Mask, i32 1
+ // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
// br i1 %Mask1, label %cond.load, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
// Create "cond" block
//
@@ -499,14 +545,29 @@ static void scalarizeMaskedScatter(CallInst *CI, bool &ModifiedDT) {
return;
}
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
- // %Mask1 = extractelement <16 x i1> %Mask, i32 Idx
+ // %Mask1 = and i16 %scalar_mask, i32 1 << Idx
+ // %cond = icmp ne i16 %mask_1, 0
// br i1 %Mask1, label %cond.store, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
// Create "cond" block
//
@@ -555,6 +616,32 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
// The result vector
Value *VResult = PassThru;
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ unsigned MemIndex = 0;
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+ LoadInst *Load =
+ Builder.CreateAlignedLoad(EltTy, NewPtr, 1, "Load" + Twine(Idx));
+ VResult =
+ Builder.CreateInsertElement(VResult, Load, Idx, "Res" + Twine(Idx));
+ ++MemIndex;
+ }
+ CI->replaceAllUsesWith(VResult);
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
@@ -563,8 +650,14 @@ static void scalarizeMaskedExpandLoad(CallInst *CI, bool &ModifiedDT) {
// br i1 %mask_1, label %cond.load, label %else
//
- Value *Predicate =
- Builder.CreateExtractElement(Mask, Idx);
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
// Create "cond" block
//
@@ -633,13 +726,44 @@ static void scalarizeMaskedCompressStore(CallInst *CI, bool &ModifiedDT) {
unsigned VectorWidth = VecType->getNumElements();
+ // Shorten the way if the mask is a vector of constants.
+ if (isConstantIntVector(Mask)) {
+ unsigned MemIndex = 0;
+ for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
+ if (cast<Constant>(Mask)->getAggregateElement(Idx)->isNullValue())
+ continue;
+ Value *OneElt =
+ Builder.CreateExtractElement(Src, Idx, "Elt" + Twine(Idx));
+ Value *NewPtr = Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, MemIndex);
+ Builder.CreateAlignedStore(OneElt, NewPtr, 1);
+ ++MemIndex;
+ }
+ CI->eraseFromParent();
+ return;
+ }
+
+ // If the mask is not v1i1, use scalar bit test operations. This generates
+ // better results on X86 at least.
+ Value *SclrMask;
+ if (VectorWidth != 1) {
+ Type *SclrMaskTy = Builder.getIntNTy(VectorWidth);
+ SclrMask = Builder.CreateBitCast(Mask, SclrMaskTy, "scalar_mask");
+ }
+
for (unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
// Fill the "else" block, created in the previous iteration
//
// %mask_1 = extractelement <16 x i1> %mask, i32 Idx
// br i1 %mask_1, label %cond.store, label %else
//
- Value *Predicate = Builder.CreateExtractElement(Mask, Idx);
+ Value *Predicate;
+ if (VectorWidth != 1) {
+ Value *Mask = Builder.getInt(APInt::getOneBitSet(VectorWidth, Idx));
+ Predicate = Builder.CreateICmpNE(Builder.CreateAnd(SclrMask, Mask),
+ Builder.getIntN(VectorWidth, 0));
+ } else {
+ Predicate = Builder.CreateExtractElement(Mask, Idx, "Mask" + Twine(Idx));
+ }
// Create "cond" block
//
@@ -727,17 +851,24 @@ bool ScalarizeMaskedMemIntrin::optimizeCallInst(CallInst *CI,
switch (II->getIntrinsicID()) {
default:
break;
- case Intrinsic::masked_load:
+ case Intrinsic::masked_load: {
// Scalarize unsupported vector masked load
- if (TTI->isLegalMaskedLoad(CI->getType()))
+ unsigned Alignment =
+ cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
+ if (TTI->isLegalMaskedLoad(CI->getType(), MaybeAlign(Alignment)))
return false;
scalarizeMaskedLoad(CI, ModifiedDT);
return true;
- case Intrinsic::masked_store:
- if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType()))
+ }
+ case Intrinsic::masked_store: {
+ unsigned Alignment =
+ cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
+ if (TTI->isLegalMaskedStore(CI->getArgOperand(0)->getType(),
+ MaybeAlign(Alignment)))
return false;
scalarizeMaskedStore(CI, ModifiedDT);
return true;
+ }
case Intrinsic::masked_gather:
if (TTI->isLegalMaskedGather(CI->getType()))
return false;
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index d5ad7e92299d..96a1f86c3e04 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -18,7 +18,6 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/SparseSet.h"
#include "llvm/ADT/iterator_range.h"
-#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
@@ -205,10 +204,10 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() {
if (ExitMI) {
for (const MachineOperand &MO : ExitMI->operands()) {
if (!MO.isReg() || MO.isDef()) continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
- } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
+ } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
addVRegUseDeps(&ExitSU, ExitMI->getOperandNo(&MO));
}
}
@@ -285,7 +284,7 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// We do not need to track any dependencies for constant registers.
if (MRI.isConstantPhysReg(Reg))
return;
@@ -361,7 +360,7 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
{
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// No point in tracking lanemasks if we don't have interesting subregisters.
const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
if (!RC.HasDisjunctSubRegs)
@@ -373,6 +372,13 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
return TRI->getSubRegIndexLaneMask(SubReg);
}
+bool ScheduleDAGInstrs::deadDefHasNoUse(const MachineOperand &MO) {
+ auto RegUse = CurrentVRegUses.find(MO.getReg());
+ if (RegUse == CurrentVRegUses.end())
+ return true;
+ return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
+}
+
/// Adds register output and data dependencies from this SUnit to instructions
/// that occur later in the same scheduling region if they read from or write to
/// the virtual register defined at OperIdx.
@@ -382,7 +388,7 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
MachineInstr *MI = SU->getInstr();
MachineOperand &MO = MI->getOperand(OperIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
LaneBitmask DefLaneMask;
LaneBitmask KillLaneMask;
@@ -393,6 +399,18 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
// earlier instruction.
KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
+ if (MO.getSubReg() != 0 && MO.isUndef()) {
+ // There may be other subregister defs on the same instruction of the same
+ // register in later operands. The lanes of other defs will now be live
+ // after this instruction, so these should not be treated as killed by the
+ // instruction even though they appear to be killed in this one operand.
+ for (int I = OperIdx + 1, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &OtherMO = MI->getOperand(I);
+ if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
+ KillLaneMask &= ~getLaneMaskForMO(OtherMO);
+ }
+ }
+
// Clear undef flag, we'll re-add it later once we know which subregister
// Def is first.
MO.setIsUndef(false);
@@ -402,8 +420,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
}
if (MO.isDead()) {
- assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
- "Dead defs should have no uses");
+ assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
} else {
// Add data dependence to all uses we found so far.
const TargetSubtargetInfo &ST = MF.getSubtarget();
@@ -491,7 +508,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
const MachineInstr *MI = SU->getInstr();
const MachineOperand &MO = MI->getOperand(OperIdx);
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
// Remember the use. Data dependencies will be added when we find the def.
LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO)
@@ -514,7 +531,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
/// Returns true if MI is an instruction we are unable to reason about
/// (like a call or something with unmodeled side effects).
-static inline bool isGlobalMemoryObject(AliasAnalysis *AA, MachineInstr *MI) {
+static inline bool isGlobalMemoryObject(AAResults *AA, MachineInstr *MI) {
return MI->isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad(AA));
}
@@ -701,7 +718,7 @@ void ScheduleDAGInstrs::insertBarrierChain(Value2SUsMap &map) {
map.reComputeSize();
}
-void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
+void ScheduleDAGInstrs::buildSchedGraph(AAResults *AA,
RegPressureTracker *RPTracker,
PressureDiffs *PDiffs,
LiveIntervals *LIS,
@@ -821,10 +838,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
const MachineOperand &MO = MI.getOperand(j);
if (!MO.isReg() || !MO.isDef())
continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
addPhysRegDeps(SU, j);
- } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ } else if (Register::isVirtualRegister(Reg)) {
HasVRegDef = true;
addVRegDefDeps(SU, j);
}
@@ -838,10 +855,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
// additional use dependencies.
if (!MO.isReg() || !MO.isUse())
continue;
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ Register Reg = MO.getReg();
+ if (Register::isPhysicalRegister(Reg)) {
addPhysRegDeps(SU, j);
- } else if (TargetRegisterInfo::isVirtualRegister(Reg) && MO.readsReg()) {
+ } else if (Register::isVirtualRegister(Reg) && MO.readsReg()) {
addVRegUseDeps(SU, j);
}
}
@@ -1071,7 +1088,7 @@ static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs,
for (MachineOperand &MO : MI.operands()) {
if (!MO.isReg() || !MO.readsReg())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
@@ -1102,7 +1119,7 @@ void ScheduleDAGInstrs::fixupKills(MachineBasicBlock &MBB) {
if (MO.isReg()) {
if (!MO.isDef())
continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (!Reg)
continue;
LiveRegs.removeReg(Reg);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 49c922f560fa..e8950b58d42d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -24,7 +24,6 @@
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
-#include "llvm/ADT/SmallBitVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -111,10 +110,20 @@ static cl::opt<bool>
MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
cl::desc("DAG combiner may split indexing from loads"));
+static cl::opt<bool>
+ EnableStoreMerging("combiner-store-merging", cl::Hidden, cl::init(true),
+ cl::desc("DAG combiner enable merging multiple stores "
+ "into a wider store"));
+
static cl::opt<unsigned> TokenFactorInlineLimit(
"combiner-tokenfactor-inline-limit", cl::Hidden, cl::init(2048),
cl::desc("Limit the number of operands to inline for Token Factors"));
+static cl::opt<unsigned> StoreMergeDependenceLimit(
+ "combiner-store-merge-dependence-limit", cl::Hidden, cl::init(10),
+ cl::desc("Limit the number of times for the same StoreNode and RootNode "
+ "to bail out in store merging dependence check"));
+
namespace {
class DAGCombiner {
@@ -152,6 +161,14 @@ namespace {
/// which have not yet been combined to the worklist.
SmallPtrSet<SDNode *, 32> CombinedNodes;
+ /// Map from candidate StoreNode to the pair of RootNode and count.
+ /// The count is used to track how many times we have seen the StoreNode
+ /// with the same RootNode bail out in dependence check. If we have seen
+ /// the bail out for the same pair many times over a limit, we won't
+ /// consider the StoreNode with the same RootNode as store merging
+ /// candidate again.
+ DenseMap<SDNode *, std::pair<SDNode *, unsigned>> StoreRootCountMap;
+
// AA - Used for DAG load/store alias analysis.
AliasAnalysis *AA;
@@ -236,6 +253,7 @@ namespace {
void removeFromWorklist(SDNode *N) {
CombinedNodes.erase(N);
PruningList.remove(N);
+ StoreRootCountMap.erase(N);
auto It = WorklistMap.find(N);
if (It == WorklistMap.end())
@@ -361,6 +379,7 @@ namespace {
SDValue visitSUBE(SDNode *N);
SDValue visitSUBCARRY(SDNode *N);
SDValue visitMUL(SDNode *N);
+ SDValue visitMULFIX(SDNode *N);
SDValue useDivRem(SDNode *N);
SDValue visitSDIV(SDNode *N);
SDValue visitSDIVLike(SDValue N0, SDValue N1, SDNode *N);
@@ -421,7 +440,6 @@ namespace {
SDValue visitFP_TO_SINT(SDNode *N);
SDValue visitFP_TO_UINT(SDNode *N);
SDValue visitFP_ROUND(SDNode *N);
- SDValue visitFP_ROUND_INREG(SDNode *N);
SDValue visitFP_EXTEND(SDNode *N);
SDValue visitFNEG(SDNode *N);
SDValue visitFABS(SDNode *N);
@@ -470,7 +488,7 @@ namespace {
SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
SDValue N1, SDNodeFlags Flags);
- SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
+ SDValue visitShiftByConstant(SDNode *N);
SDValue foldSelectOfConstants(SDNode *N);
SDValue foldVSelectOfConstants(SDNode *N);
@@ -497,6 +515,7 @@ namespace {
bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
SDValue &CC) const;
bool isOneUseSetCC(SDValue N) const;
+ bool isCheaperToUseNegatedFPOps(SDValue X, SDValue Y);
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
@@ -510,7 +529,7 @@ namespace {
SDValue BuildSDIVPow2(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDValue BuildLogBase2(SDValue V, const SDLoc &DL);
- SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags);
+ SDValue BuildDivEstimate(SDValue N, SDValue Op, SDNodeFlags Flags);
SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags Flags);
SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags, bool Recip);
@@ -521,11 +540,11 @@ namespace {
SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
bool DemandHighBits = true);
SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
- SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
+ SDValue MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
SDValue InnerPos, SDValue InnerNeg,
unsigned PosOpcode, unsigned NegOpcode,
const SDLoc &DL);
- SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+ SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
SDValue MatchLoadCombine(SDNode *N);
SDValue MatchStoreCombine(StoreSDNode *N);
SDValue ReduceLoadWidth(SDNode *N);
@@ -742,6 +761,11 @@ CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
}
+bool TargetLowering::DAGCombinerInfo::
+recursivelyDeleteUnusedNodes(SDNode *N) {
+ return ((DAGCombiner*)DC)->recursivelyDeleteUnusedNodes(N);
+}
+
void TargetLowering::DAGCombinerInfo::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
@@ -766,195 +790,6 @@ void DAGCombiner::deleteAndRecombine(SDNode *N) {
DAG.DeleteNode(N);
}
-/// Return 1 if we can compute the negated form of the specified expression for
-/// the same cost as the expression itself, or 2 if we can compute the negated
-/// form more cheaply than the expression itself.
-static char isNegatibleForFree(SDValue Op, bool LegalOperations,
- const TargetLowering &TLI,
- const TargetOptions *Options,
- bool ForCodeSize,
- unsigned Depth = 0) {
- // fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return 2;
-
- // Don't allow anything with multiple uses unless we know it is free.
- EVT VT = Op.getValueType();
- const SDNodeFlags Flags = Op->getFlags();
- if (!Op.hasOneUse() &&
- !(Op.getOpcode() == ISD::FP_EXTEND &&
- TLI.isFPExtFree(VT, Op.getOperand(0).getValueType())))
- return 0;
-
- // Don't recurse exponentially.
- if (Depth > 6)
- return 0;
-
- switch (Op.getOpcode()) {
- default: return false;
- case ISD::ConstantFP: {
- if (!LegalOperations)
- return 1;
-
- // Don't invert constant FP values after legalization unless the target says
- // the negated constant is legal.
- return TLI.isOperationLegal(ISD::ConstantFP, VT) ||
- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
- ForCodeSize);
- }
- case ISD::BUILD_VECTOR: {
- // Only permit BUILD_VECTOR of constants.
- if (llvm::any_of(Op->op_values(), [&](SDValue N) {
- return !N.isUndef() && !isa<ConstantFPSDNode>(N);
- }))
- return 0;
- if (!LegalOperations)
- return 1;
- if (TLI.isOperationLegal(ISD::ConstantFP, VT) &&
- TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
- return 1;
- return llvm::all_of(Op->op_values(), [&](SDValue N) {
- return N.isUndef() ||
- TLI.isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
- ForCodeSize);
- });
- }
- case ISD::FADD:
- if (!Options->UnsafeFPMath && !Flags.hasNoSignedZeros())
- return 0;
-
- // After operation legalization, it might not be legal to create new FSUBs.
- if (LegalOperations && !TLI.isOperationLegalOrCustom(ISD::FSUB, VT))
- return 0;
-
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, ForCodeSize, Depth + 1))
- return V;
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- ForCodeSize, Depth + 1);
- case ISD::FSUB:
- // We can't turn -(A-B) into B-A when we honor signed zeros.
- if (!Options->NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
- return 0;
-
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return 1;
-
- case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
- if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
- Options, ForCodeSize, Depth + 1))
- return V;
-
- return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
- ForCodeSize, Depth + 1);
-
- case ISD::FP_EXTEND:
- case ISD::FP_ROUND:
- case ISD::FSIN:
- return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
- ForCodeSize, Depth + 1);
- }
-}
-
-/// If isNegatibleForFree returns true, return the newly negated expression.
-static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
- bool LegalOperations, bool ForCodeSize,
- unsigned Depth = 0) {
- // fneg is removable even if it has multiple uses.
- if (Op.getOpcode() == ISD::FNEG)
- return Op.getOperand(0);
-
- assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
- const TargetOptions &Options = DAG.getTarget().Options;
- const SDNodeFlags Flags = Op->getFlags();
-
- switch (Op.getOpcode()) {
- default: llvm_unreachable("Unknown code");
- case ISD::ConstantFP: {
- APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
- V.changeSign();
- return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
- }
- case ISD::BUILD_VECTOR: {
- SmallVector<SDValue, 4> Ops;
- for (SDValue C : Op->op_values()) {
- if (C.isUndef()) {
- Ops.push_back(C);
- continue;
- }
- APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
- V.changeSign();
- Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
- }
- return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
- }
- case ISD::FADD:
- assert(Options.UnsafeFPMath || Flags.hasNoSignedZeros());
-
- // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
- Depth + 1))
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
- // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(0), Flags);
- case ISD::FSUB:
- // fold (fneg (fsub 0, B)) -> B
- if (ConstantFPSDNode *N0CFP =
- isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
- if (N0CFP->isZero())
- return Op.getOperand(1);
-
- // fold (fneg (fsub A, B)) -> (fsub B, A)
- return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
- Op.getOperand(1), Op.getOperand(0), Flags);
-
- case ISD::FMUL:
- case ISD::FDIV:
- // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
- if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
- DAG.getTargetLoweringInfo(), &Options, ForCodeSize,
- Depth + 1))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1), Flags);
-
- // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- Op.getOperand(0),
- GetNegatedExpression(Op.getOperand(1), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1), Flags);
-
- case ISD::FP_EXTEND:
- case ISD::FSIN:
- return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1));
- case ISD::FP_ROUND:
- return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
- GetNegatedExpression(Op.getOperand(0), DAG,
- LegalOperations, ForCodeSize,
- Depth + 1),
- Op.getOperand(1));
- }
-}
-
// APInts must be the same size for most operations, this helper
// function zero extends the shorter of the pair so that they match.
// We provide an Offset so that we can create bitwidths that won't overflow.
@@ -1124,7 +959,6 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL,
SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
if (!OpNode.getNode())
return SDValue();
- AddToWorklist(OpNode.getNode());
return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
}
}
@@ -1438,7 +1272,6 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
SDValue RV =
DAG.getNode(ISD::TRUNCATE, DL, VT, DAG.getNode(Opc, DL, PVT, N0, N1));
- AddToWorklist(N0.getNode());
if (Replace)
ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
@@ -1591,8 +1424,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
for (SDNode *LN : UpdatedNodes) {
- AddToWorklist(LN);
AddUsersToWorklist(LN);
+ AddToWorklist(LN);
}
if (!NIsValid)
continue;
@@ -1673,6 +1506,10 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ADDCARRY: return visitADDCARRY(N);
case ISD::SUBE: return visitSUBE(N);
case ISD::SUBCARRY: return visitSUBCARRY(N);
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: return visitMULFIX(N);
case ISD::MUL: return visitMUL(N);
case ISD::SDIV: return visitSDIV(N);
case ISD::UDIV: return visitUDIV(N);
@@ -1736,7 +1573,6 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::FP_TO_SINT: return visitFP_TO_SINT(N);
case ISD::FP_TO_UINT: return visitFP_TO_UINT(N);
case ISD::FP_ROUND: return visitFP_ROUND(N);
- case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N);
case ISD::FP_EXTEND: return visitFP_EXTEND(N);
case ISD::FNEG: return visitFNEG(N);
case ISD::FABS: return visitFABS(N);
@@ -3308,6 +3144,18 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
}
+ if (TLI.isOperationLegalOrCustom(ISD::ADDCARRY, VT)) {
+ // (sub Carry, X) -> (addcarry (sub 0, X), 0, Carry)
+ if (SDValue Carry = getAsCarry(TLI, N0)) {
+ SDValue X = N1;
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue NegX = DAG.getNode(ISD::SUB, DL, VT, Zero, X);
+ return DAG.getNode(ISD::ADDCARRY, DL,
+ DAG.getVTList(VT, Carry.getValueType()), NegX, Zero,
+ Carry);
+ }
+ }
+
return SDValue();
}
@@ -3442,6 +3290,30 @@ SDValue DAGCombiner::visitSUBCARRY(SDNode *N) {
return SDValue();
}
+// Notice that "mulfix" can be any of SMULFIX, SMULFIXSAT, UMULFIX and
+// UMULFIXSAT here.
+SDValue DAGCombiner::visitMULFIX(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ SDValue Scale = N->getOperand(2);
+ EVT VT = N0.getValueType();
+
+ // fold (mulfix x, undef, scale) -> 0
+ if (N0.isUndef() || N1.isUndef())
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ // Canonicalize constant to RHS (vector doesn't have to splat)
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0, Scale);
+
+ // fold (mulfix x, 0, scale) -> 0
+ if (isNullConstant(N1))
+ return DAG.getConstant(0, SDLoc(N), VT);
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -3537,7 +3409,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// x * 15 --> (x << 4) - x
// x * -33 --> -((x << 5) + x)
// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
- if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
+ if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
@@ -4083,10 +3955,10 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (VT.isVector()) {
// fold (mulhs x, 0) -> 0
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return N0;
+ // do not return N0/N1, because undef node may exist.
+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
}
// fold (mulhs x, 0) -> 0
@@ -4095,7 +3967,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
// fold (mulhs x, 1) -> (sra x, size(x)-1)
if (isOneConstant(N1))
return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
- DAG.getConstant(N0.getValueSizeInBits() - 1, DL,
+ DAG.getConstant(N0.getScalarValueSizeInBits() - 1, DL,
getShiftAmountTy(N0.getValueType())));
// fold (mulhs x, undef) -> 0
@@ -4130,10 +4002,10 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (VT.isVector()) {
// fold (mulhu x, 0) -> 0
- if (ISD::isBuildVectorAllZeros(N1.getNode()))
- return N1;
- if (ISD::isBuildVectorAllZeros(N0.getNode()))
- return N0;
+ // do not return N0/N1, because undef node may exist.
+ if (ISD::isBuildVectorAllZeros(N0.getNode()) ||
+ ISD::isBuildVectorAllZeros(N1.getNode()))
+ return DAG.getConstant(0, DL, VT);
}
// fold (mulhu x, 0) -> 0
@@ -4265,6 +4137,18 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
EVT VT = N->getValueType(0);
SDLoc DL(N);
+ // (umul_lohi N0, 0) -> (0, 0)
+ if (isNullConstant(N->getOperand(1))) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, Zero, Zero);
+ }
+
+ // (umul_lohi N0, 1) -> (N0, 0)
+ if (isOneConstant(N->getOperand(1))) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return CombineTo(N, N->getOperand(0), Zero);
+ }
+
// If the type is twice as wide is legal, transform the mulhu to a wider
// multiply plus a shift.
if (VT.isSimple() && !VT.isVector()) {
@@ -4290,13 +4174,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
}
SDValue DAGCombiner::visitMULO(SDNode *N) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ EVT VT = N0.getValueType();
bool IsSigned = (ISD::SMULO == N->getOpcode());
+ EVT CarryVT = N->getValueType(1);
+ SDLoc DL(N);
+
+ // canonicalize constant to RHS.
+ if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
+ !DAG.isConstantIntBuildVectorOrConstantInt(N1))
+ return DAG.getNode(N->getOpcode(), DL, N->getVTList(), N1, N0);
+
+ // fold (mulo x, 0) -> 0 + no carry out
+ if (isNullOrNullSplat(N1))
+ return CombineTo(N, DAG.getConstant(0, DL, VT),
+ DAG.getConstant(0, DL, CarryVT));
+
// (mulo x, 2) -> (addo x, x)
- if (ConstantSDNode *C2 = isConstOrConstSplat(N->getOperand(1)))
+ if (ConstantSDNode *C2 = isConstOrConstSplat(N1))
if (C2->getAPIntValue() == 2)
- return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, SDLoc(N),
- N->getVTList(), N->getOperand(0), N->getOperand(0));
+ return DAG.getNode(IsSigned ? ISD::SADDO : ISD::UADDO, DL,
+ N->getVTList(), N0, N0);
return SDValue();
}
@@ -4444,7 +4344,9 @@ SDValue DAGCombiner::hoistLogicOpWithSameOpcodeHands(SDNode *N) {
if ((HandOpcode == ISD::BITCAST || HandOpcode == ISD::SCALAR_TO_VECTOR) &&
Level <= AfterLegalizeTypes) {
// Input types must be integer and the same.
- if (XVT.isInteger() && XVT == Y.getValueType()) {
+ if (XVT.isInteger() && XVT == Y.getValueType() &&
+ !(VT.isVector() && TLI.isTypeLegal(VT) &&
+ !XVT.isVector() && !TLI.isTypeLegal(XVT))) {
SDValue Logic = DAG.getNode(LogicOpcode, DL, XVT, X, Y);
return DAG.getNode(HandOpcode, DL, VT, Logic);
}
@@ -4770,8 +4672,8 @@ bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
return true;
}
- // Do not change the width of a volatile load.
- if (LoadN->isVolatile())
+ // Do not change the width of a volatile or atomic loads.
+ if (!LoadN->isSimple())
return false;
// Do not generate loads of non-round integer types since these can
@@ -4803,15 +4705,15 @@ bool DAGCombiner::isLegalNarrowLdSt(LSBaseSDNode *LDST,
if (!MemVT.isRound())
return false;
- // Don't change the width of a volatile load.
- if (LDST->isVolatile())
+ // Don't change the width of a volatile or atomic loads.
+ if (!LDST->isSimple())
return false;
// Verify that we are actually reducing a load width here.
if (LDST->getMemoryVT().getSizeInBits() < MemVT.getSizeInBits())
return false;
- // Ensure that this isn't going to produce an unsupported unaligned access.
+ // Ensure that this isn't going to produce an unsupported memory access.
if (ShAmt &&
!TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), MemVT,
LDST->getAddressSpace(), ShAmt / 8,
@@ -5076,6 +4978,59 @@ SDValue DAGCombiner::unfoldExtremeBitClearingToShifts(SDNode *N) {
return T1;
}
+/// Try to replace shift/logic that tests if a bit is clear with mask + setcc.
+/// For a target with a bit test, this is expected to become test + set and save
+/// at least 1 instruction.
+static SDValue combineShiftAnd1ToBitTest(SDNode *And, SelectionDAG &DAG) {
+ assert(And->getOpcode() == ISD::AND && "Expected an 'and' op");
+
+ // This is probably not worthwhile without a supported type.
+ EVT VT = And->getValueType(0);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
+
+ // Look through an optional extension and find a 'not'.
+ // TODO: Should we favor test+set even without the 'not' op?
+ SDValue Not = And->getOperand(0), And1 = And->getOperand(1);
+ if (Not.getOpcode() == ISD::ANY_EXTEND)
+ Not = Not.getOperand(0);
+ if (!isBitwiseNot(Not) || !Not.hasOneUse() || !isOneConstant(And1))
+ return SDValue();
+
+ // Look though an optional truncation. The source operand may not be the same
+ // type as the original 'and', but that is ok because we are masking off
+ // everything but the low bit.
+ SDValue Srl = Not.getOperand(0);
+ if (Srl.getOpcode() == ISD::TRUNCATE)
+ Srl = Srl.getOperand(0);
+
+ // Match a shift-right by constant.
+ if (Srl.getOpcode() != ISD::SRL || !Srl.hasOneUse() ||
+ !isa<ConstantSDNode>(Srl.getOperand(1)))
+ return SDValue();
+
+ // We might have looked through casts that make this transform invalid.
+ // TODO: If the source type is wider than the result type, do the mask and
+ // compare in the source type.
+ const APInt &ShiftAmt = Srl.getConstantOperandAPInt(1);
+ unsigned VTBitWidth = VT.getSizeInBits();
+ if (ShiftAmt.uge(VTBitWidth))
+ return SDValue();
+
+ // Turn this into a bit-test pattern using mask op + setcc:
+ // and (not (srl X, C)), 1 --> (and X, 1<<C) == 0
+ SDLoc DL(And);
+ SDValue X = DAG.getZExtOrTrunc(Srl.getOperand(0), DL, VT);
+ EVT CCVT = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
+ SDValue Mask = DAG.getConstant(
+ APInt::getOneBitSet(VTBitWidth, ShiftAmt.getZExtValue()), DL, VT);
+ SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, Mask);
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ SDValue Setcc = DAG.getSetCC(DL, CCVT, NewAnd, Zero, ISD::SETEQ);
+ return DAG.getZExtOrTrunc(Setcc, DL, VT);
+}
+
SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -5163,6 +5118,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
+
// similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
// (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
// already be zero by virtue of the width of the base type of the load.
@@ -5337,7 +5293,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
unsigned MemBitSize = MemVT.getScalarSizeInBits();
APInt ExtBits = APInt::getHighBitsSet(ExtBitSize, ExtBitSize - MemBitSize);
if (DAG.MaskedValueIsZero(N1, ExtBits) &&
- ((!LegalOperations && !LN0->isVolatile()) ||
+ ((!LegalOperations && LN0->isSimple()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
SDValue ExtLoad =
DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(),
@@ -5358,6 +5314,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (SDValue Shifts = unfoldExtremeBitClearingToShifts(N))
return Shifts;
+ if (TLI.hasBitTest(N0, N1))
+ if (SDValue V = combineShiftAnd1ToBitTest(N, DAG))
+ return V;
+
return SDValue();
}
@@ -5564,6 +5524,23 @@ static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
return true;
}
+// Match 2 elements of a packed halfword bswap.
+static bool isBSwapHWordPair(SDValue N, MutableArrayRef<SDNode *> Parts) {
+ if (N.getOpcode() == ISD::OR)
+ return isBSwapHWordElement(N.getOperand(0), Parts) &&
+ isBSwapHWordElement(N.getOperand(1), Parts);
+
+ if (N.getOpcode() == ISD::SRL && N.getOperand(0).getOpcode() == ISD::BSWAP) {
+ ConstantSDNode *C = isConstOrConstSplat(N.getOperand(1));
+ if (!C || C->getAPIntValue() != 16)
+ return false;
+ Parts[0] = Parts[1] = N.getOperand(0).getOperand(0).getNode();
+ return true;
+ }
+
+ return false;
+}
+
/// Match a 32-bit packed halfword bswap. That is
/// ((x & 0x000000ff) << 8) |
/// ((x & 0x0000ff00) >> 8) |
@@ -5581,43 +5558,26 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
return SDValue();
// Look for either
- // (or (or (and), (and)), (or (and), (and)))
- // (or (or (or (and), (and)), (and)), (and))
- if (N0.getOpcode() != ISD::OR)
- return SDValue();
- SDValue N00 = N0.getOperand(0);
- SDValue N01 = N0.getOperand(1);
+ // (or (bswaphpair), (bswaphpair))
+ // (or (or (bswaphpair), (and)), (and))
+ // (or (or (and), (bswaphpair)), (and))
SDNode *Parts[4] = {};
- if (N1.getOpcode() == ISD::OR &&
- N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
+ if (isBSwapHWordPair(N0, Parts)) {
// (or (or (and), (and)), (or (and), (and)))
- if (!isBSwapHWordElement(N00, Parts))
+ if (!isBSwapHWordPair(N1, Parts))
return SDValue();
-
- if (!isBSwapHWordElement(N01, Parts))
- return SDValue();
- SDValue N10 = N1.getOperand(0);
- if (!isBSwapHWordElement(N10, Parts))
- return SDValue();
- SDValue N11 = N1.getOperand(1);
- if (!isBSwapHWordElement(N11, Parts))
- return SDValue();
- } else {
+ } else if (N0.getOpcode() == ISD::OR) {
// (or (or (or (and), (and)), (and)), (and))
if (!isBSwapHWordElement(N1, Parts))
return SDValue();
- if (!isBSwapHWordElement(N01, Parts))
- return SDValue();
- if (N00.getOpcode() != ISD::OR)
- return SDValue();
- SDValue N000 = N00.getOperand(0);
- if (!isBSwapHWordElement(N000, Parts))
- return SDValue();
- SDValue N001 = N00.getOperand(1);
- if (!isBSwapHWordElement(N001, Parts))
+ SDValue N00 = N0.getOperand(0);
+ SDValue N01 = N0.getOperand(1);
+ if (!(isBSwapHWordElement(N01, Parts) && isBSwapHWordPair(N00, Parts)) &&
+ !(isBSwapHWordElement(N00, Parts) && isBSwapHWordPair(N01, Parts)))
return SDValue();
- }
+ } else
+ return SDValue();
// Make sure the parts are all coming from the same node.
if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
@@ -5791,15 +5751,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
- bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
- if (!LegalMask) {
- std::swap(NewLHS, NewRHS);
- ShuffleVectorSDNode::commuteMask(Mask);
- LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
- }
-
- if (LegalMask)
- return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS,
+ Mask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
}
}
}
@@ -5867,8 +5823,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
return V;
// See if this is some rotate idiom.
- if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
- return SDValue(Rot, 0);
+ if (SDValue Rot = MatchRotate(N0, N1, SDLoc(N)))
+ return Rot;
if (SDValue Load = MatchLoadCombine(N))
return Load;
@@ -5914,6 +5870,9 @@ static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
/// Otherwise, returns an expansion of \p ExtractFrom based on the following
/// patterns:
///
+/// (or (add v v) (shrl v bitwidth-1)):
+/// expands (add v v) -> (shl v 1)
+///
/// (or (mul v c0) (shrl (mul v c1) c2)):
/// expands (mul v c0) -> (shl (mul v c1) c3)
///
@@ -5936,6 +5895,23 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
"Existing shift must be valid as a rotate half");
ExtractFrom = stripConstantMask(DAG, ExtractFrom, Mask);
+
+ // Value and Type of the shift.
+ SDValue OppShiftLHS = OppShift.getOperand(0);
+ EVT ShiftedVT = OppShiftLHS.getValueType();
+
+ // Amount of the existing shift.
+ ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
+
+ // (add v v) -> (shl v 1)
+ if (OppShift.getOpcode() == ISD::SRL && OppShiftCst &&
+ ExtractFrom.getOpcode() == ISD::ADD &&
+ ExtractFrom.getOperand(0) == ExtractFrom.getOperand(1) &&
+ ExtractFrom.getOperand(0) == OppShiftLHS &&
+ OppShiftCst->getAPIntValue() == ShiftedVT.getScalarSizeInBits() - 1)
+ return DAG.getNode(ISD::SHL, DL, ShiftedVT, OppShiftLHS,
+ DAG.getShiftAmountConstant(1, ShiftedVT, DL));
+
// Preconditions:
// (or (op0 v c0) (shiftl/r (op0 v c1) c2))
//
@@ -5959,15 +5935,11 @@ static SDValue extractShiftForRotate(SelectionDAG &DAG, SDValue OppShift,
// op0 must be the same opcode on both sides, have the same LHS argument,
// and produce the same value type.
- SDValue OppShiftLHS = OppShift.getOperand(0);
- EVT ShiftedVT = OppShiftLHS.getValueType();
if (OppShiftLHS.getOpcode() != ExtractFrom.getOpcode() ||
OppShiftLHS.getOperand(0) != ExtractFrom.getOperand(0) ||
ShiftedVT != ExtractFrom.getValueType())
return SDValue();
- // Amount of the existing shift.
- ConstantSDNode *OppShiftCst = isConstOrConstSplat(OppShift.getOperand(1));
// Constant mul/udiv/shift amount from the RHS of the shift's LHS op.
ConstantSDNode *OppLHSCst = isConstOrConstSplat(OppShiftLHS.getOperand(1));
// Constant mul/udiv/shift amount from the RHS of the ExtractFrom op.
@@ -6137,7 +6109,7 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
// to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
// former being preferred if supported. InnerPos and InnerNeg are Pos and
// Neg with outer conversions stripped away.
-SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
+SDValue DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
SDValue Neg, SDValue InnerPos,
SDValue InnerNeg, unsigned PosOpcode,
unsigned NegOpcode, const SDLoc &DL) {
@@ -6152,32 +6124,33 @@ SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
- HasPos ? Pos : Neg).getNode();
+ HasPos ? Pos : Neg);
}
- return nullptr;
+ return SDValue();
}
// MatchRotate - Handle an 'or' of two operands. If this is one of the many
// idioms for rotate, and if the target supports rotation instructions, generate
// a rot[lr].
-SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
+SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// Must be a legal type. Expanded 'n promoted things won't work with rotates.
EVT VT = LHS.getValueType();
- if (!TLI.isTypeLegal(VT)) return nullptr;
+ if (!TLI.isTypeLegal(VT))
+ return SDValue();
// The target must have at least one rotate flavor.
bool HasROTL = hasOperation(ISD::ROTL, VT);
bool HasROTR = hasOperation(ISD::ROTR, VT);
- if (!HasROTL && !HasROTR) return nullptr;
+ if (!HasROTL && !HasROTR)
+ return SDValue();
// Check for truncated rotate.
if (LHS.getOpcode() == ISD::TRUNCATE && RHS.getOpcode() == ISD::TRUNCATE &&
LHS.getOperand(0).getValueType() == RHS.getOperand(0).getValueType()) {
assert(LHS.getValueType() == RHS.getValueType());
- if (SDNode *Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
- return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(),
- SDValue(Rot, 0)).getNode();
+ if (SDValue Rot = MatchRotate(LHS.getOperand(0), RHS.getOperand(0), DL)) {
+ return DAG.getNode(ISD::TRUNCATE, SDLoc(LHS), LHS.getValueType(), Rot);
}
}
@@ -6192,7 +6165,7 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// If neither side matched a rotate half, bail
if (!LHSShift && !RHSShift)
- return nullptr;
+ return SDValue();
// InstCombine may have combined a constant shl, srl, mul, or udiv with one
// side of the rotate, so try to handle that here. In all cases we need to
@@ -6215,15 +6188,15 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
// If a side is still missing, nothing else we can do.
if (!RHSShift || !LHSShift)
- return nullptr;
+ return SDValue();
// At this point we've matched or extracted a shift op on each side.
if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
- return nullptr; // Not shifting the same value.
+ return SDValue(); // Not shifting the same value.
if (LHSShift.getOpcode() == RHSShift.getOpcode())
- return nullptr; // Shifts must disagree.
+ return SDValue(); // Shifts must disagree.
// Canonicalize shl to left side in a shl/srl pair.
if (RHSShift.getOpcode() == ISD::SHL) {
@@ -6267,13 +6240,13 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
}
- return Rot.getNode();
+ return Rot;
}
// If there is a mask here, and we have a variable shift, we can't be sure
// that we're masking out the right stuff.
if (LHSMask.getNode() || RHSMask.getNode())
- return nullptr;
+ return SDValue();
// If the shift amount is sign/zext/any-extended just peel it off.
SDValue LExtOp0 = LHSShiftAmt;
@@ -6290,17 +6263,17 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
RExtOp0 = RHSShiftAmt.getOperand(0);
}
- SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
+ SDValue TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
if (TryL)
return TryL;
- SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
+ SDValue TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
if (TryR)
return TryR;
- return nullptr;
+ return SDValue();
}
namespace {
@@ -6415,7 +6388,7 @@ calculateByteProvider(SDValue Op, unsigned Index, unsigned Depth,
Depth + 1);
case ISD::LOAD: {
auto L = cast<LoadSDNode>(Op.getNode());
- if (L->isVolatile() || L->isIndexed())
+ if (!L->isSimple() || L->isIndexed())
return None;
unsigned NarrowBitWidth = L->getMemoryVT().getSizeInBits();
@@ -6504,8 +6477,9 @@ SDValue DAGCombiner::MatchStoreCombine(StoreSDNode *N) {
SDValue Chain;
SmallVector<StoreSDNode *, 8> Stores;
for (StoreSDNode *Store = N; Store; Store = dyn_cast<StoreSDNode>(Chain)) {
+ // TODO: Allow unordered atomics when wider type is legal (see D66309)
if (Store->getMemoryVT() != MVT::i8 ||
- Store->isVolatile() || Store->isIndexed())
+ !Store->isSimple() || Store->isIndexed())
return SDValue();
Stores.push_back(Store);
Chain = Store->getChain();
@@ -6716,7 +6690,8 @@ SDValue DAGCombiner::MatchLoadCombine(SDNode *N) {
return SDValue();
LoadSDNode *L = P->Load;
- assert(L->hasNUsesOfValue(1, 0) && !L->isVolatile() && !L->isIndexed() &&
+ assert(L->hasNUsesOfValue(1, 0) && L->isSimple() &&
+ !L->isIndexed() &&
"Must be enforced by calculateByteProvider");
assert(L->getOffset().isUndef() && "Unindexed load must have undef offset");
@@ -6958,25 +6933,25 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
if (isOneConstant(N1) && VT == MVT::i1 && N0.hasOneUse() &&
(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
- SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
- if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isOneUseSetCC(N01) || isOneUseSetCC(N00)) {
unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
- LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
- RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
}
}
// fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
if (isAllOnesConstant(N1) && N0.hasOneUse() &&
(N0Opcode == ISD::OR || N0Opcode == ISD::AND)) {
- SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
- if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
+ SDValue N00 = N0.getOperand(0), N01 = N0.getOperand(1);
+ if (isa<ConstantSDNode>(N01) || isa<ConstantSDNode>(N00)) {
unsigned NewOpcode = N0Opcode == ISD::AND ? ISD::OR : ISD::AND;
- LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
- RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
- AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
- return DAG.getNode(NewOpcode, DL, VT, LHS, RHS);
+ N00 = DAG.getNode(ISD::XOR, SDLoc(N00), VT, N00, N1); // N00 = ~N00
+ N01 = DAG.getNode(ISD::XOR, SDLoc(N01), VT, N01, N1); // N01 = ~N01
+ AddToWorklist(N00.getNode()); AddToWorklist(N01.getNode());
+ return DAG.getNode(NewOpcode, DL, VT, N00, N01);
}
}
@@ -7079,26 +7054,103 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
return SDValue();
}
+/// If we have a shift-by-constant of a bitwise logic op that itself has a
+/// shift-by-constant operand with identical opcode, we may be able to convert
+/// that into 2 independent shifts followed by the logic op. This is a
+/// throughput improvement.
+static SDValue combineShiftOfShiftedLogic(SDNode *Shift, SelectionDAG &DAG) {
+ // Match a one-use bitwise logic op.
+ SDValue LogicOp = Shift->getOperand(0);
+ if (!LogicOp.hasOneUse())
+ return SDValue();
+
+ unsigned LogicOpcode = LogicOp.getOpcode();
+ if (LogicOpcode != ISD::AND && LogicOpcode != ISD::OR &&
+ LogicOpcode != ISD::XOR)
+ return SDValue();
+
+ // Find a matching one-use shift by constant.
+ unsigned ShiftOpcode = Shift->getOpcode();
+ SDValue C1 = Shift->getOperand(1);
+ ConstantSDNode *C1Node = isConstOrConstSplat(C1);
+ assert(C1Node && "Expected a shift with constant operand");
+ const APInt &C1Val = C1Node->getAPIntValue();
+ auto matchFirstShift = [&](SDValue V, SDValue &ShiftOp,
+ const APInt *&ShiftAmtVal) {
+ if (V.getOpcode() != ShiftOpcode || !V.hasOneUse())
+ return false;
+
+ ConstantSDNode *ShiftCNode = isConstOrConstSplat(V.getOperand(1));
+ if (!ShiftCNode)
+ return false;
+
+ // Capture the shifted operand and shift amount value.
+ ShiftOp = V.getOperand(0);
+ ShiftAmtVal = &ShiftCNode->getAPIntValue();
+
+ // Shift amount types do not have to match their operand type, so check that
+ // the constants are the same width.
+ if (ShiftAmtVal->getBitWidth() != C1Val.getBitWidth())
+ return false;
+
+ // The fold is not valid if the sum of the shift values exceeds bitwidth.
+ if ((*ShiftAmtVal + C1Val).uge(V.getScalarValueSizeInBits()))
+ return false;
+
+ return true;
+ };
+
+ // Logic ops are commutative, so check each operand for a match.
+ SDValue X, Y;
+ const APInt *C0Val;
+ if (matchFirstShift(LogicOp.getOperand(0), X, C0Val))
+ Y = LogicOp.getOperand(1);
+ else if (matchFirstShift(LogicOp.getOperand(1), X, C0Val))
+ Y = LogicOp.getOperand(0);
+ else
+ return SDValue();
+
+ // shift (logic (shift X, C0), Y), C1 -> logic (shift X, C0+C1), (shift Y, C1)
+ SDLoc DL(Shift);
+ EVT VT = Shift->getValueType(0);
+ EVT ShiftAmtVT = Shift->getOperand(1).getValueType();
+ SDValue ShiftSumC = DAG.getConstant(*C0Val + C1Val, DL, ShiftAmtVT);
+ SDValue NewShift1 = DAG.getNode(ShiftOpcode, DL, VT, X, ShiftSumC);
+ SDValue NewShift2 = DAG.getNode(ShiftOpcode, DL, VT, Y, C1);
+ return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
+}
+
/// Handle transforms common to the three shifts, when the shift amount is a
/// constant.
/// We are looking for: (shift being one of shl/sra/srl)
/// shift (binop X, C0), C1
/// And want to transform into:
/// binop (shift X, C1), (shift C0, C1)
-SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
+SDValue DAGCombiner::visitShiftByConstant(SDNode *N) {
+ assert(isConstOrConstSplat(N->getOperand(1)) && "Expected constant operand");
+
// Do not turn a 'not' into a regular xor.
if (isBitwiseNot(N->getOperand(0)))
return SDValue();
// The inner binop must be one-use, since we want to replace it.
- SDNode *LHS = N->getOperand(0).getNode();
- if (!LHS->hasOneUse()) return SDValue();
+ SDValue LHS = N->getOperand(0);
+ if (!LHS.hasOneUse() || !TLI.isDesirableToCommuteWithShift(N, Level))
+ return SDValue();
+
+ // TODO: This is limited to early combining because it may reveal regressions
+ // otherwise. But since we just checked a target hook to see if this is
+ // desirable, that should have filtered out cases where this interferes
+ // with some other pattern matching.
+ if (!LegalTypes)
+ if (SDValue R = combineShiftOfShiftedLogic(N, DAG))
+ return R;
// We want to pull some binops through shifts, so that we have (and (shift))
// instead of (shift (and)), likewise for add, or, xor, etc. This sort of
// thing happens with address calculations, so it's important to canonicalize
// it.
- switch (LHS->getOpcode()) {
+ switch (LHS.getOpcode()) {
default:
return SDValue();
case ISD::OR:
@@ -7112,14 +7164,14 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
}
// We require the RHS of the binop to be a constant and not opaque as well.
- ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
+ ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS.getOperand(1));
if (!BinOpCst)
return SDValue();
// FIXME: disable this unless the input to the binop is a shift by a constant
// or is copy/select. Enable this in other cases when figure out it's exactly
// profitable.
- SDValue BinOpLHSVal = LHS->getOperand(0);
+ SDValue BinOpLHSVal = LHS.getOperand(0);
bool IsShiftByConstant = (BinOpLHSVal.getOpcode() == ISD::SHL ||
BinOpLHSVal.getOpcode() == ISD::SRA ||
BinOpLHSVal.getOpcode() == ISD::SRL) &&
@@ -7133,24 +7185,16 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
if (IsCopyOrSelect && N->hasOneUse())
return SDValue();
- EVT VT = N->getValueType(0);
-
- if (!TLI.isDesirableToCommuteWithShift(N, Level))
- return SDValue();
-
// Fold the constants, shifting the binop RHS by the shift amount.
- SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
- N->getValueType(0),
- LHS->getOperand(1), N->getOperand(1));
+ SDLoc DL(N);
+ EVT VT = N->getValueType(0);
+ SDValue NewRHS = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(1),
+ N->getOperand(1));
assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
- // Create the new shift.
- SDValue NewShift = DAG.getNode(N->getOpcode(),
- SDLoc(LHS->getOperand(0)),
- VT, LHS->getOperand(0), N->getOperand(1));
-
- // Create the new binop.
- return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
+ SDValue NewShift = DAG.getNode(N->getOpcode(), DL, VT, LHS.getOperand(0),
+ N->getOperand(1));
+ return DAG.getNode(LHS.getOpcode(), DL, VT, NewShift, NewRHS);
}
SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
@@ -7478,7 +7522,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
}
if (N1C && !N1C->isOpaque())
- if (SDValue NewSHL = visitShiftByConstant(N, N1C))
+ if (SDValue NewSHL = visitShiftByConstant(N))
return NewSHL;
return SDValue();
@@ -7597,6 +7641,37 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}
+ // We convert trunc/ext to opposing shifts in IR, but casts may be cheaper.
+ // sra (add (shl X, N1C), AddC), N1C -->
+ // sext (add (trunc X to (width - N1C)), AddC')
+ if (!LegalTypes && N0.getOpcode() == ISD::ADD && N0.hasOneUse() && N1C &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ N0.getOperand(0).getOperand(1) == N1 && N0.getOperand(0).hasOneUse()) {
+ if (ConstantSDNode *AddC = isConstOrConstSplat(N0.getOperand(1))) {
+ SDValue Shl = N0.getOperand(0);
+ // Determine what the truncate's type would be and ask the target if that
+ // is a free operation.
+ LLVMContext &Ctx = *DAG.getContext();
+ unsigned ShiftAmt = N1C->getZExtValue();
+ EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - ShiftAmt);
+ if (VT.isVector())
+ TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
+
+ // TODO: The simple type check probably belongs in the default hook
+ // implementation and/or target-specific overrides (because
+ // non-simple types likely require masking when legalized), but that
+ // restriction may conflict with other transforms.
+ if (TruncVT.isSimple() && TLI.isTruncateFree(VT, TruncVT)) {
+ SDLoc DL(N);
+ SDValue Trunc = DAG.getZExtOrTrunc(Shl.getOperand(0), DL, TruncVT);
+ SDValue ShiftC = DAG.getConstant(AddC->getAPIntValue().lshr(ShiftAmt).
+ trunc(TruncVT.getScalarSizeInBits()), DL, TruncVT);
+ SDValue Add = DAG.getNode(ISD::ADD, DL, TruncVT, Trunc, ShiftC);
+ return DAG.getSExtOrTrunc(Add, DL, VT);
+ }
+ }
+ }
+
// fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
N1.getOperand(0).getOpcode() == ISD::AND) {
@@ -7638,7 +7713,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
if (N1C && !N1C->isOpaque())
- if (SDValue NewSRA = visitShiftByConstant(N, N1C))
+ if (SDValue NewSRA = visitShiftByConstant(N))
return NewSRA;
return SDValue();
@@ -7819,7 +7894,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return SDValue(N, 0);
if (N1C && !N1C->isOpaque())
- if (SDValue NewSRL = visitShiftByConstant(N, N1C))
+ if (SDValue NewSRL = visitShiftByConstant(N))
return NewSRL;
// Attempt to convert a srl of a load into a narrower zero-extending load.
@@ -8100,6 +8175,43 @@ static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
}
}
+/// If a (v)select has a condition value that is a sign-bit test, try to smear
+/// the condition operand sign-bit across the value width and use it as a mask.
+static SDValue foldSelectOfConstantsUsingSra(SDNode *N, SelectionDAG &DAG) {
+ SDValue Cond = N->getOperand(0);
+ SDValue C1 = N->getOperand(1);
+ SDValue C2 = N->getOperand(2);
+ assert(isConstantOrConstantVector(C1) && isConstantOrConstantVector(C2) &&
+ "Expected select-of-constants");
+
+ EVT VT = N->getValueType(0);
+ if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse() ||
+ VT != Cond.getOperand(0).getValueType())
+ return SDValue();
+
+ // The inverted-condition + commuted-select variants of these patterns are
+ // canonicalized to these forms in IR.
+ SDValue X = Cond.getOperand(0);
+ SDValue CondC = Cond.getOperand(1);
+ ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
+ if (CC == ISD::SETGT && isAllOnesOrAllOnesSplat(CondC) &&
+ isAllOnesOrAllOnesSplat(C2)) {
+ // i32 X > -1 ? C1 : -1 --> (X >>s 31) | C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::OR, DL, VT, Sra, C1);
+ }
+ if (CC == ISD::SETLT && isNullOrNullSplat(CondC) && isNullOrNullSplat(C2)) {
+ // i8 X < 0 ? C1 : 0 --> (X >>s 7) & C1
+ SDLoc DL(N);
+ SDValue ShAmtC = DAG.getConstant(X.getScalarValueSizeInBits() - 1, DL, VT);
+ SDValue Sra = DAG.getNode(ISD::SRA, DL, VT, X, ShAmtC);
+ return DAG.getNode(ISD::AND, DL, VT, Sra, C1);
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
SDValue Cond = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -8148,22 +8260,36 @@ SDValue DAGCombiner::foldSelectOfConstants(SDNode *N) {
return Cond;
}
- // For any constants that differ by 1, we can transform the select into an
- // extend and add. Use a target hook because some targets may prefer to
- // transform in the other direction.
+ // Use a target hook because some targets may prefer to transform in the
+ // other direction.
if (TLI.convertSelectOfConstantsToMath(VT)) {
- if (C1->getAPIntValue() - 1 == C2->getAPIntValue()) {
+ // For any constants that differ by 1, we can transform the select into an
+ // extend and add.
+ const APInt &C1Val = C1->getAPIntValue();
+ const APInt &C2Val = C2->getAPIntValue();
+ if (C1Val - 1 == C2Val) {
// select Cond, C1, C1-1 --> add (zext Cond), C1-1
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
- if (C1->getAPIntValue() + 1 == C2->getAPIntValue()) {
+ if (C1Val + 1 == C2Val) {
// select Cond, C1, C1+1 --> add (sext Cond), C1+1
if (VT != MVT::i1)
Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond);
return DAG.getNode(ISD::ADD, DL, VT, Cond, N2);
}
+
+ // select Cond, Pow2, 0 --> (zext Cond) << log2(Pow2)
+ if (C1Val.isPowerOf2() && C2Val.isNullValue()) {
+ if (VT != MVT::i1)
+ Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond);
+ SDValue ShAmtC = DAG.getConstant(C1Val.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, Cond, ShAmtC);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
}
return SDValue();
@@ -8381,23 +8507,6 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) {
return SDValue();
}
-static
-std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
- SDLoc DL(N);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // Split the inputs.
- SDValue Lo, Hi, LL, LH, RL, RH;
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
-
- return std::make_pair(Lo, Hi);
-}
-
// This function assumes all the vselect's arguments are CONCAT_VECTOR
// nodes and that the condition is a BV of ConstantSDNodes (or undefs).
static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
@@ -8456,7 +8565,6 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
SDValue Mask = MSC->getMask();
- SDValue Data = MSC->getValue();
SDValue Chain = MSC->getChain();
SDLoc DL(N);
@@ -8464,123 +8572,19 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MSCATTER data type requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (Mask.getOpcode() != ISD::SETCC)
- return SDValue();
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
- SDValue MaskLo, MaskHi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
-
- EVT MemoryVT = MSC->getMemoryVT();
- unsigned Alignment = MSC->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- SDValue DataLo, DataHi;
- std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
-
- SDValue Scale = MSC->getScale();
- SDValue BasePtr = MSC->getBasePtr();
- SDValue IndexLo, IndexHi;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MSC->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, MSC->getAAInfo(), MSC->getRanges());
-
- SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo, Scale };
- SDValue Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
- DataLo.getValueType(), DL, OpsLo, MMO);
-
- // The order of the Scatter operation after split is well defined. The "Hi"
- // part comes after the "Lo". So these two operations should be chained one
- // after another.
- SDValue OpsHi[] = { Lo, DataHi, MaskHi, BasePtr, IndexHi, Scale };
- return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
+ return SDValue();
}
SDValue DAGCombiner::visitMSTORE(SDNode *N) {
MaskedStoreSDNode *MST = cast<MaskedStoreSDNode>(N);
SDValue Mask = MST->getMask();
- SDValue Data = MST->getValue();
SDValue Chain = MST->getChain();
- EVT VT = Data.getValueType();
SDLoc DL(N);
// Zap masked stores with a zero mask.
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return Chain;
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MSTORE data type requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (Mask.getOpcode() == ISD::SETCC) {
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue MaskLo, MaskHi, Lo, Hi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- SDValue Ptr = MST->getBasePtr();
-
- EVT MemoryVT = MST->getMemoryVT();
- unsigned Alignment = MST->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- SDValue DataLo, DataHi;
- std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MST->getPointerInfo(),
- MachineMemOperand::MOStore, LoMemVT.getStoreSize(),
- Alignment, MST->getAAInfo(), MST->getRanges());
-
- Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
- MST->isTruncatingStore(),
- MST->isCompressingStore());
-
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MST->isCompressingStore());
- unsigned HiOffset = LoMemVT.getStoreSize();
-
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- MST->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOStore, HiMemVT.getStoreSize(), Alignment,
- MST->getAAInfo(), MST->getRanges());
-
- Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
- MST->isTruncatingStore(),
- MST->isCompressingStore());
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
- }
return SDValue();
}
@@ -8593,76 +8597,7 @@ SDValue DAGCombiner::visitMGATHER(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MGT->getPassThru(), MGT->getChain());
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MGATHER result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
-
- if (Mask.getOpcode() != ISD::SETCC)
- return SDValue();
-
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue MaskLo, MaskHi, Lo, Hi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- SDValue PassThru = MGT->getPassThru();
- SDValue PassThruLo, PassThruHi;
- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
-
- SDValue Chain = MGT->getChain();
- EVT MemoryVT = MGT->getMemoryVT();
- unsigned Alignment = MGT->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- SDValue Scale = MGT->getScale();
- SDValue BasePtr = MGT->getBasePtr();
- SDValue Index = MGT->getIndex();
- SDValue IndexLo, IndexHi;
- std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MGT->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MGT->getAAInfo(), MGT->getRanges());
-
- SDValue OpsLo[] = { Chain, PassThruLo, MaskLo, BasePtr, IndexLo, Scale };
- Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
- MMO);
-
- SDValue OpsHi[] = { Chain, PassThruHi, MaskHi, BasePtr, IndexHi, Scale };
- Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
- MMO);
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- // Build a factor node to remember that this load is independent of the
- // other one.
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
-
- SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
-
- SDValue RetOps[] = { GatherRes, Chain };
- return DAG.getMergeValues(RetOps, DL);
+ return SDValue();
}
SDValue DAGCombiner::visitMLOAD(SDNode *N) {
@@ -8674,76 +8609,6 @@ SDValue DAGCombiner::visitMLOAD(SDNode *N) {
if (ISD::isBuildVectorAllZeros(Mask.getNode()))
return CombineTo(N, MLD->getPassThru(), MLD->getChain());
- if (Level >= AfterLegalizeTypes)
- return SDValue();
-
- // If the MLOAD result requires splitting and the mask is provided by a
- // SETCC, then split both nodes and its operands before legalization. This
- // prevents the type legalizer from unrolling SETCC into scalar comparisons
- // and enables future optimizations (e.g. min/max pattern matching on X86).
- if (Mask.getOpcode() == ISD::SETCC) {
- EVT VT = N->getValueType(0);
-
- // Check if any splitting is required.
- if (TLI.getTypeAction(*DAG.getContext(), VT) !=
- TargetLowering::TypeSplitVector)
- return SDValue();
-
- SDValue MaskLo, MaskHi, Lo, Hi;
- std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
-
- SDValue PassThru = MLD->getPassThru();
- SDValue PassThruLo, PassThruHi;
- std::tie(PassThruLo, PassThruHi) = DAG.SplitVector(PassThru, DL);
-
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
-
- SDValue Chain = MLD->getChain();
- SDValue Ptr = MLD->getBasePtr();
- EVT MemoryVT = MLD->getMemoryVT();
- unsigned Alignment = MLD->getOriginalAlignment();
-
- EVT LoMemVT, HiMemVT;
- std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
-
- MachineMemOperand *MMO = DAG.getMachineFunction().
- getMachineMemOperand(MLD->getPointerInfo(),
- MachineMemOperand::MOLoad, LoMemVT.getStoreSize(),
- Alignment, MLD->getAAInfo(), MLD->getRanges());
-
- Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, PassThruLo, LoMemVT,
- MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
-
- Ptr = TLI.IncrementMemoryAddress(Ptr, MaskLo, DL, LoMemVT, DAG,
- MLD->isExpandingLoad());
- unsigned HiOffset = LoMemVT.getStoreSize();
-
- MMO = DAG.getMachineFunction().getMachineMemOperand(
- MLD->getPointerInfo().getWithOffset(HiOffset),
- MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), Alignment,
- MLD->getAAInfo(), MLD->getRanges());
-
- Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, PassThruHi, HiMemVT,
- MMO, ISD::NON_EXTLOAD, MLD->isExpandingLoad());
-
- AddToWorklist(Lo.getNode());
- AddToWorklist(Hi.getNode());
-
- // Build a factor node to remember that this load is independent of the
- // other one.
- Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Legalized the chain result - switch anything that used the old chain to
- // use the new one.
- DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
-
- SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
-
- SDValue RetOps[] = { LoadRes, Chain };
- return DAG.getMergeValues(RetOps, DL);
- }
return SDValue();
}
@@ -8791,6 +8656,18 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) {
return DAG.getNode(ISD::ADD, DL, VT, ExtendedCond, N2);
}
+ // select Cond, Pow2C, 0 --> (zext Cond) << log2(Pow2C)
+ APInt Pow2C;
+ if (ISD::isConstantSplatVector(N1.getNode(), Pow2C) && Pow2C.isPowerOf2() &&
+ isNullOrNullSplat(N2)) {
+ SDValue ZextCond = DAG.getZExtOrTrunc(Cond, DL, VT);
+ SDValue ShAmtC = DAG.getConstant(Pow2C.exactLogBase2(), DL, VT);
+ return DAG.getNode(ISD::SHL, DL, VT, ZextCond, ShAmtC);
+ }
+
+ if (SDValue V = foldSelectOfConstantsUsingSra(N, DAG))
+ return V;
+
// The general case for select-of-constants:
// vselect <N x i1> Cond, C1, C2 --> xor (and (sext Cond), (C1^C2)), C2
// ...but that only makes sense if a vselect is slower than 2 logic ops, so
@@ -8832,13 +8709,12 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
if (isAbs) {
- EVT VT = LHS.getValueType();
if (TLI.isOperationLegalOrCustom(ISD::ABS, VT))
return DAG.getNode(ISD::ABS, DL, VT, LHS);
- SDValue Shift = DAG.getNode(
- ISD::SRA, DL, VT, LHS,
- DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT));
+ SDValue Shift = DAG.getNode(ISD::SRA, DL, VT, LHS,
+ DAG.getConstant(VT.getScalarSizeInBits() - 1,
+ DL, getShiftAmountTy(VT)));
SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
AddToWorklist(Shift.getNode());
AddToWorklist(Add.getNode());
@@ -8851,10 +8727,9 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
// This is OK if we don't care about what happens if either operand is a
// NaN.
//
- if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, N0.getOperand(0),
- N0.getOperand(1), TLI)) {
- if (SDValue FMinMax = combineMinNumMaxNum(
- DL, VT, N0.getOperand(0), N0.getOperand(1), N1, N2, CC, TLI, DAG))
+ if (N0.hasOneUse() && isLegalToCombineMinNumMaxNum(DAG, LHS, RHS, TLI)) {
+ if (SDValue FMinMax =
+ combineMinNumMaxNum(DL, VT, LHS, RHS, N1, N2, CC, TLI, DAG))
return FMinMax;
}
@@ -9209,8 +9084,9 @@ SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
- !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
- !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ !N0.hasOneUse() || !LN0->isSimple() ||
+ !DstVT.isVector() || !DstVT.isPow2VectorType() ||
+ !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
return SDValue();
SmallVector<SDNode *, 4> SetCCs;
@@ -9411,7 +9287,8 @@ static SDValue tryToFoldExtOfExtload(SelectionDAG &DAG, DAGCombiner &Combiner,
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- if ((LegalOperations || LN0->isVolatile() || VT.isVector()) &&
+ if ((LegalOperations || !LN0->isSimple() ||
+ VT.isVector()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, MemVT))
return SDValue();
@@ -9436,7 +9313,7 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
if (!ISD::isNON_EXTLoad(N0.getNode()) ||
!ISD::isUNINDEXEDLoad(N0.getNode()) ||
((LegalOperations || VT.isVector() ||
- cast<LoadSDNode>(N0)->isVolatile()) &&
+ !cast<LoadSDNode>(N0)->isSimple()) &&
!TLI.isLoadExtLegal(ExtLoadType, VT, N0.getValueType())))
return {};
@@ -9468,6 +9345,35 @@ static SDValue tryToFoldExtOfLoad(SelectionDAG &DAG, DAGCombiner &Combiner,
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
+static SDValue tryToFoldExtOfMaskedLoad(SelectionDAG &DAG,
+ const TargetLowering &TLI, EVT VT,
+ SDNode *N, SDValue N0,
+ ISD::LoadExtType ExtLoadType,
+ ISD::NodeType ExtOpc) {
+ if (!N0.hasOneUse())
+ return SDValue();
+
+ MaskedLoadSDNode *Ld = dyn_cast<MaskedLoadSDNode>(N0);
+ if (!Ld || Ld->getExtensionType() != ISD::NON_EXTLOAD)
+ return SDValue();
+
+ if (!TLI.isLoadExtLegal(ExtLoadType, VT, Ld->getValueType(0)))
+ return SDValue();
+
+ if (!TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
+ return SDValue();
+
+ SDLoc dl(Ld);
+ SDValue PassThru = DAG.getNode(ExtOpc, dl, VT, Ld->getPassThru());
+ SDValue NewLoad = DAG.getMaskedLoad(VT, dl, Ld->getChain(),
+ Ld->getBasePtr(), Ld->getMask(),
+ PassThru, Ld->getMemoryVT(),
+ Ld->getMemOperand(), ExtLoadType,
+ Ld->isExpandingLoad());
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), SDValue(NewLoad.getNode(), 1));
+ return NewLoad;
+}
+
static SDValue foldExtendedSignBitTest(SDNode *N, SelectionDAG &DAG,
bool LegalOperations) {
assert((N->getOpcode() == ISD::SIGN_EXTEND ||
@@ -9568,6 +9474,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
ISD::SEXTLOAD, ISD::SIGN_EXTEND))
return foldedExt;
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::SEXTLOAD,
+ ISD::SIGN_EXTEND))
+ return foldedExt;
+
// fold (sext (load x)) to multiple smaller sextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
@@ -9856,6 +9767,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
ISD::ZEXTLOAD, ISD::ZERO_EXTEND))
return foldedExt;
+ if (SDValue foldedExt =
+ tryToFoldExtOfMaskedLoad(DAG, TLI, VT, N, N0, ISD::ZEXTLOAD,
+ ISD::ZERO_EXTEND))
+ return foldedExt;
+
// fold (zext (load x)) to multiple smaller zextloads.
// Only on illegal but splittable vectors.
if (SDValue ExtLoad = CombineExtLoad(N))
@@ -10340,7 +10256,10 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
return SDValue();
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (!isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
+ // Reducing the width of a volatile load is illegal. For atomics, we may be
+ // able to reduce the width provided we never widen again. (see D66309)
+ if (!LN0->isSimple() ||
+ !isLegalNarrowLdSt(LN0, ExtType, ExtVT, ShAmt))
return SDValue();
auto AdjustBigEndianShift = [&](unsigned ShAmt) {
@@ -10369,11 +10288,11 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
SDValue Load;
if (ExtType == ISD::NON_EXTLOAD)
- Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
+ Load = DAG.getLoad(VT, DL, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), NewAlign,
LN0->getMemOperand()->getFlags(), LN0->getAAInfo());
else
- Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(), NewPtr,
+ Load = DAG.getExtLoad(ExtType, DL, VT, LN0->getChain(), NewPtr,
LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT,
NewAlign, LN0->getMemOperand()->getFlags(),
LN0->getAAInfo());
@@ -10392,7 +10311,6 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
// no larger than the source) then the useful bits of the result are
// zero; we can't simply return the shortened shift, because the result
// of that operation is undefined.
- SDLoc DL(N0);
if (ShLeftAmt >= VT.getSizeInBits())
Result = DAG.getConstant(0, DL, VT);
else
@@ -10513,7 +10431,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (ISD::isEXTLoad(N0.getNode()) &&
ISD::isUNINDEXEDLoad(N0.getNode()) &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile() &&
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple() &&
N0.hasOneUse()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -10530,7 +10448,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
N0.hasOneUse() &&
EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) &&
TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
@@ -10757,7 +10675,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// after truncation.
if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- if (!LN0->isVolatile() &&
+ if (LN0->isSimple() &&
LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
VT, LN0->getChain(), LN0->getBasePtr(),
@@ -11051,7 +10969,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
- ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
+ ((!LegalOperations && cast<LoadSDNode>(N0)->isSimple()) ||
TLI.isOperationLegal(ISD::LOAD, VT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
@@ -11237,15 +11155,10 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) {
for (int i = 0; i != MaskScale; ++i)
NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
- bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
- if (!LegalMask) {
- std::swap(SV0, SV1);
- ShuffleVectorSDNode::commuteMask(NewMask);
- LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
- }
-
- if (LegalMask)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
}
return SDValue();
@@ -11998,7 +11911,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// N0 + -0.0 --> N0 (also allowed with +0.0 and fast-math)
ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1, true);
if (N1C && N1C->isZero())
- if (N1C->isNegative() || Options.UnsafeFPMath || Flags.hasNoSignedZeros())
+ if (N1C->isNegative() || Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())
return N0;
if (SDValue NewSel = foldBinOpIntoSelect(N))
@@ -12006,17 +11919,17 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// fold (fadd A, (fneg B)) -> (fsub A, B)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize) == 2)
- return DAG.getNode(ISD::FSUB, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize), Flags);
+ TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize) == 2)
+ return DAG.getNode(
+ ISD::FSUB, DL, VT, N0,
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
// fold (fadd (fneg A), B) -> (fsub B, A)
if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
- isNegatibleForFree(N0, LegalOperations, TLI, &Options, ForCodeSize) == 2)
- return DAG.getNode(ISD::FSUB, DL, VT, N1,
- GetNegatedExpression(N0, DAG, LegalOperations,
- ForCodeSize), Flags);
+ TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize) == 2)
+ return DAG.getNode(
+ ISD::FSUB, DL, VT, N1,
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize), Flags);
auto isFMulNegTwo = [](SDValue FMul) {
if (!FMul.hasOneUse() || FMul.getOpcode() != ISD::FMUL)
@@ -12056,7 +11969,7 @@ SDValue DAGCombiner::visitFADD(SDNode *N) {
// If 'unsafe math' or reassoc and nsz, fold lots of things.
// TODO: break out portions of the transformations below for which Unsafe is
// considered and which do not require both nsz and reassoc
- if ((Options.UnsafeFPMath ||
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
(Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
AllowNewConst) {
// fadd (fadd x, c1), c2 -> fadd x, c1 + c2
@@ -12175,7 +12088,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
// (fsub A, 0) -> A
if (N1CFP && N1CFP->isZero()) {
- if (!N1CFP->isNegative() || Options.UnsafeFPMath ||
+ if (!N1CFP->isNegative() || Options.NoSignedZerosFPMath ||
Flags.hasNoSignedZeros()) {
return N0;
}
@@ -12195,16 +12108,16 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
if (N0CFP && N0CFP->isZero()) {
if (N0CFP->isNegative() ||
(Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros())) {
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
- return GetNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+ return TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N1, Flags);
}
}
- if ((Options.UnsafeFPMath ||
- (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros()))
- && N1.getOpcode() == ISD::FADD) {
+ if (((Options.UnsafeFPMath && Options.NoSignedZerosFPMath) ||
+ (Flags.hasAllowReassociation() && Flags.hasNoSignedZeros())) &&
+ N1.getOpcode() == ISD::FADD) {
// X - (X + Y) -> -Y
if (N0 == N1->getOperand(0))
return DAG.getNode(ISD::FNEG, DL, VT, N1->getOperand(1), Flags);
@@ -12214,10 +12127,10 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
}
// fold (fsub A, (fneg B)) -> (fadd A, B)
- if (isNegatibleForFree(N1, LegalOperations, TLI, &Options, ForCodeSize))
- return DAG.getNode(ISD::FADD, DL, VT, N0,
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize), Flags);
+ if (TLI.isNegatibleForFree(N1, DAG, LegalOperations, ForCodeSize))
+ return DAG.getNode(
+ ISD::FADD, DL, VT, N0,
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
// FSUB -> FMA combines:
if (SDValue Fused = visitFSUBForFMACombine(N)) {
@@ -12228,6 +12141,21 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) {
return SDValue();
}
+/// Return true if both inputs are at least as cheap in negated form and at
+/// least one input is strictly cheaper in negated form.
+bool DAGCombiner::isCheaperToUseNegatedFPOps(SDValue X, SDValue Y) {
+ if (char LHSNeg =
+ TLI.isNegatibleForFree(X, DAG, LegalOperations, ForCodeSize))
+ if (char RHSNeg =
+ TLI.isNegatibleForFree(Y, DAG, LegalOperations, ForCodeSize))
+ // Both negated operands are at least as cheap as their counterparts.
+ // Check to see if at least one is cheaper negated.
+ if (LHSNeg == 2 || RHSNeg == 2)
+ return true;
+
+ return false;
+}
+
SDValue DAGCombiner::visitFMUL(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -12254,10 +12182,6 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
!isConstantFPBuildVectorOrConstantFP(N1))
return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
- // fold (fmul A, 1.0) -> A
- if (N1CFP && N1CFP->isExactlyValue(1.0))
- return N0;
-
if (SDValue NewSel = foldBinOpIntoSelect(N))
return NewSel;
@@ -12302,21 +12226,13 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) {
if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
return DAG.getNode(ISD::FNEG, DL, VT, N0);
- // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- // Both can be negated for free, check to see if at least one is cheaper
- // negated.
- if (LHSNeg == 2 || RHSNeg == 2)
- return DAG.getNode(ISD::FMUL, DL, VT,
- GetNegatedExpression(N0, DAG, LegalOperations,
- ForCodeSize),
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize),
- Flags);
- }
+ // -N0 * -N1 --> N0 * N1
+ if (isCheaperToUseNegatedFPOps(N0, N1)) {
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ return DAG.getNode(ISD::FMUL, DL, VT, NegN0, NegN1, Flags);
}
// fold (fmul X, (select (fcmp X > 0.0), -1.0, 1.0)) -> (fneg (fabs X))
@@ -12395,6 +12311,15 @@ SDValue DAGCombiner::visitFMA(SDNode *N) {
return DAG.getNode(ISD::FMA, DL, VT, N0, N1, N2);
}
+ // (-N0 * -N1) + N2 --> (N0 * N1) + N2
+ if (isCheaperToUseNegatedFPOps(N0, N1)) {
+ SDValue NegN0 =
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ SDValue NegN1 =
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize);
+ return DAG.getNode(ISD::FMA, DL, VT, NegN0, NegN1, N2, Flags);
+ }
+
if (UnsafeFPMath) {
if (N0CFP && N0CFP->isZero())
return N2;
@@ -12602,9 +12527,8 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
// If this FDIV is part of a reciprocal square root, it may be folded
// into a target-specific square root estimate instruction.
if (N1.getOpcode() == ISD::FSQRT) {
- if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
+ if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags))
return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
- }
} else if (N1.getOpcode() == ISD::FP_EXTEND &&
N1.getOperand(0).getOpcode() == ISD::FSQRT) {
if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
@@ -12645,28 +12569,16 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) {
}
// Fold into a reciprocal estimate and multiply instead of a real divide.
- if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
- AddToWorklist(RV.getNode());
- return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
- }
+ if (SDValue RV = BuildDivEstimate(N0, N1, Flags))
+ return RV;
}
// (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
- if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options,
- ForCodeSize)) {
- // Both can be negated for free, check to see if at least one is cheaper
- // negated.
- if (LHSNeg == 2 || RHSNeg == 2)
- return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
- GetNegatedExpression(N0, DAG, LegalOperations,
- ForCodeSize),
- GetNegatedExpression(N1, DAG, LegalOperations,
- ForCodeSize),
- Flags);
- }
- }
+ if (isCheaperToUseNegatedFPOps(N0, N1))
+ return DAG.getNode(
+ ISD::FDIV, SDLoc(N), VT,
+ TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize),
+ TLI.getNegatedExpression(N1, DAG, LegalOperations, ForCodeSize), Flags);
return SDValue();
}
@@ -13112,22 +13024,6 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
return SDValue();
}
-SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
- SDValue N0 = N->getOperand(0);
- EVT VT = N->getValueType(0);
- EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
-
- // fold (fp_round_inreg c1fp) -> c1fp
- if (N0CFP && isTypeLegal(EVT)) {
- SDLoc DL(N);
- SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
- return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
- }
-
- return SDValue();
-}
-
SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -13236,9 +13132,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
if (isConstantFPBuildVectorOrConstantFP(N0))
return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
- if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
- &DAG.getTarget().Options, ForCodeSize))
- return GetNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
+ if (TLI.isNegatibleForFree(N0, DAG, LegalOperations, ForCodeSize))
+ return TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize);
// Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
// constant pool values.
@@ -14004,11 +13899,12 @@ bool DAGCombiner::extendLoadedValueToExtension(LoadSDNode *LD, SDValue &Val) {
}
SDValue DAGCombiner::ForwardStoreValueToDirectLoad(LoadSDNode *LD) {
- if (OptLevel == CodeGenOpt::None || LD->isVolatile())
+ if (OptLevel == CodeGenOpt::None || !LD->isSimple())
return SDValue();
SDValue Chain = LD->getOperand(0);
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain.getNode());
- if (!ST || ST->isVolatile())
+ // TODO: Relax this restriction for unordered atomics (see D66309)
+ if (!ST || !ST->isSimple())
return SDValue();
EVT LDType = LD->getValueType(0);
@@ -14107,7 +14003,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// If load is not volatile and there are no uses of the loaded value (and
// the updated indexed value in case of indexed loads), change uses of the
// chain value into uses of the chain input (i.e. delete the dead load).
- if (!LD->isVolatile()) {
+ // TODO: Allow this for unordered atomics (see D66309)
+ if (LD->isSimple()) {
if (N->getValueType(1) == MVT::Other) {
// Unindexed loads.
if (!N->hasAnyUseOfValue(0)) {
@@ -14241,7 +14138,7 @@ struct LoadedSlice {
/// Helper structure used to compute the cost of a slice.
struct Cost {
/// Are we optimizing for code size.
- bool ForCodeSize;
+ bool ForCodeSize = false;
/// Various cost.
unsigned Loads = 0;
@@ -14250,10 +14147,10 @@ struct LoadedSlice {
unsigned ZExts = 0;
unsigned Shift = 0;
- Cost(bool ForCodeSize = false) : ForCodeSize(ForCodeSize) {}
+ explicit Cost(bool ForCodeSize) : ForCodeSize(ForCodeSize) {}
/// Get the cost of one isolated slice.
- Cost(const LoadedSlice &LS, bool ForCodeSize = false)
+ Cost(const LoadedSlice &LS, bool ForCodeSize)
: ForCodeSize(ForCodeSize), Loads(1) {
EVT TruncType = LS.Inst->getValueType(0);
EVT LoadedType = LS.getLoadedType();
@@ -14678,7 +14575,7 @@ bool DAGCombiner::SliceUpLoad(SDNode *N) {
return false;
LoadSDNode *LD = cast<LoadSDNode>(N);
- if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
+ if (!LD->isSimple() || !ISD::isNormalLoad(LD) ||
!LD->getValueType(0).isInteger())
return false;
@@ -14829,13 +14726,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
else if (Chain->getOpcode() == ISD::TokenFactor &&
SDValue(LD, 1).hasOneUse()) {
// LD has only 1 chain use so they are no indirect dependencies.
- bool isOk = false;
- for (const SDValue &ChainOp : Chain->op_values())
- if (ChainOp.getNode() == LD) {
- isOk = true;
- break;
- }
- if (!isOk)
+ if (!LD->isOperandOf(Chain.getNode()))
return Result;
} else
return Result; // Fail.
@@ -14848,7 +14739,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
/// Check to see if IVal is something that provides a value as specified by
/// MaskInfo. If so, replace the specified store with a narrower store of
/// truncated IVal.
-static SDNode *
+static SDValue
ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
SDValue IVal, StoreSDNode *St,
DAGCombiner *DC) {
@@ -14860,14 +14751,19 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
- if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
+ if (!DAG.MaskedValueIsZero(IVal, Mask)) return SDValue();
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
- // legalization.
- MVT VT = MVT::getIntegerVT(NumBytes*8);
+ // legalization (and the target doesn't explicitly think this is a bad idea).
+ MVT VT = MVT::getIntegerVT(NumBytes * 8);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (!DC->isTypeLegal(VT))
- return nullptr;
+ return SDValue();
+ if (St->getMemOperand() &&
+ !TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
+ *St->getMemOperand()))
+ return SDValue();
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
@@ -14901,8 +14797,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
++OpsNarrowed;
return DAG
.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
- St->getPointerInfo().getWithOffset(StOffset), NewAlign)
- .getNode();
+ St->getPointerInfo().getWithOffset(StOffset), NewAlign);
}
/// Look for sequence of load / op / store where op is one of 'or', 'xor', and
@@ -14911,7 +14806,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
/// or code size.
SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
- if (ST->isVolatile())
+ if (!ST->isSimple())
return SDValue();
SDValue Chain = ST->getChain();
@@ -14933,16 +14828,16 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
std::pair<unsigned, unsigned> MaskedLoad;
MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
if (MaskedLoad.first)
- if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(1), ST,this))
- return SDValue(NewST, 0);
+ return NewST;
// Or is commutative, so try swapping X and Y.
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
if (MaskedLoad.first)
- if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
+ if (SDValue NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(0), ST,this))
- return SDValue(NewST, 0);
+ return NewST;
}
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
@@ -15367,14 +15262,16 @@ void DAGCombiner::getStoreMergeCandidates(
// Loads must only have one use.
if (!Ld->hasNUsesOfValue(1, 0))
return;
- // The memory operands must not be volatile/indexed.
- if (Ld->isVolatile() || Ld->isIndexed())
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Ld->isSimple() || Ld->isIndexed())
return;
}
auto CandidateMatch = [&](StoreSDNode *Other, BaseIndexOffset &Ptr,
int64_t &Offset) -> bool {
- // The memory operands must not be volatile/indexed.
- if (Other->isVolatile() || Other->isIndexed())
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!Other->isSimple() || Other->isIndexed())
return false;
// Don't mix temporal stores with non-temporal stores.
if (St->isNonTemporal() != Other->isNonTemporal())
@@ -15394,8 +15291,10 @@ void DAGCombiner::getStoreMergeCandidates(
// Loads must only have one use.
if (!OtherLd->hasNUsesOfValue(1, 0))
return false;
- // The memory operands must not be volatile/indexed.
- if (OtherLd->isVolatile() || OtherLd->isIndexed())
+ // The memory operands must not be volatile/indexed/atomic.
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (!OtherLd->isSimple() ||
+ OtherLd->isIndexed())
return false;
// Don't mix temporal loads with non-temporal loads.
if (cast<LoadSDNode>(Val)->isNonTemporal() != OtherLd->isNonTemporal())
@@ -15425,6 +15324,18 @@ void DAGCombiner::getStoreMergeCandidates(
return (BasePtr.equalBaseIndex(Ptr, DAG, Offset));
};
+ // Check if the pair of StoreNode and the RootNode already bail out many
+ // times which is over the limit in dependence check.
+ auto OverLimitInDependenceCheck = [&](SDNode *StoreNode,
+ SDNode *RootNode) -> bool {
+ auto RootCount = StoreRootCountMap.find(StoreNode);
+ if (RootCount != StoreRootCountMap.end() &&
+ RootCount->second.first == RootNode &&
+ RootCount->second.second > StoreMergeDependenceLimit)
+ return true;
+ return false;
+ };
+
// We looking for a root node which is an ancestor to all mergable
// stores. We search up through a load, to our root and then down
// through all children. For instance we will find Store{1,2,3} if
@@ -15454,7 +15365,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I2)) {
BaseIndexOffset Ptr;
int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherST, RootNode))
StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
} else
@@ -15464,7 +15376,8 @@ void DAGCombiner::getStoreMergeCandidates(
if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
BaseIndexOffset Ptr;
int64_t PtrDiff;
- if (CandidateMatch(OtherST, Ptr, PtrDiff))
+ if (CandidateMatch(OtherST, Ptr, PtrDiff) &&
+ !OverLimitInDependenceCheck(OtherST, RootNode))
StoreNodes.push_back(MemOpLink(OtherST, PtrDiff));
}
}
@@ -15522,13 +15435,24 @@ bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
// Search through DAG. We can stop early if we find a store node.
for (unsigned i = 0; i < NumStores; ++i)
if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist,
- Max))
+ Max)) {
+ // If the searching bail out, record the StoreNode and RootNode in the
+ // StoreRootCountMap. If we have seen the pair many times over a limit,
+ // we won't add the StoreNode into StoreNodes set again.
+ if (Visited.size() >= Max) {
+ auto &RootCount = StoreRootCountMap[StoreNodes[i].MemNode];
+ if (RootCount.first == RootNode)
+ RootCount.second++;
+ else
+ RootCount = {RootNode, 1};
+ }
return false;
+ }
return true;
}
bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
- if (OptLevel == CodeGenOpt::None)
+ if (OptLevel == CodeGenOpt::None || !EnableStoreMerging)
return false;
EVT MemVT = St->getMemoryVT();
@@ -15588,7 +15512,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode *St) {
bool RV = false;
while (StoreNodes.size() > 1) {
- unsigned StartIdx = 0;
+ size_t StartIdx = 0;
while ((StartIdx + 1 < StoreNodes.size()) &&
StoreNodes[StartIdx].OffsetFromBase + ElementSizeBytes !=
StoreNodes[StartIdx + 1].OffsetFromBase)
@@ -16113,7 +16037,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
case MVT::ppcf128:
return SDValue();
case MVT::f32:
- if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
+ if ((isTypeLegal(MVT::i32) && !LegalOperations && ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
;
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
@@ -16125,7 +16049,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
return SDValue();
case MVT::f64:
if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
- !ST->isVolatile()) ||
+ ST->isSimple()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
;
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
@@ -16134,7 +16058,7 @@ SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
Ptr, ST->getMemOperand());
}
- if (!ST->isVolatile() &&
+ if (ST->isSimple() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
// Many FP stores are not made apparent until after legalize, e.g. for
// argument passing. Since this is so common, custom legalize the
@@ -16181,7 +16105,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// memory accesses. We don't care if the original type was legal or not
// as we assume software couldn't rely on the number of accesses of an
// illegal type.
- if (((!LegalOperations && !ST->isVolatile()) ||
+ // TODO: May be able to relax for unordered atomics (see D66309)
+ if (((!LegalOperations && ST->isSimple()) ||
TLI.isOperationLegal(ISD::STORE, SVT)) &&
TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT,
DAG, *ST->getMemOperand())) {
@@ -16242,9 +16167,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// See if we can simplify the input to this truncstore with knowledge that
// only the low bits are being used. For example:
// "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8"
- SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits);
AddToWorklist(Value.getNode());
- if (Shorter)
+ if (SDValue Shorter = DAG.GetDemandedBits(Value, TruncDemandedBits))
return DAG.getTruncStore(Chain, SDLoc(N), Shorter, Ptr, ST->getMemoryVT(),
ST->getMemOperand());
@@ -16263,9 +16187,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// If this is a load followed by a store to the same location, then the store
// is dead/noop.
+ // TODO: Can relax for unordered atomics (see D66309)
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
- ST->isUnindexed() && !ST->isVolatile() &&
+ ST->isUnindexed() && ST->isSimple() &&
// There can't be any side effects between the load and store, such as
// a call or store.
Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
@@ -16274,9 +16199,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
}
}
+ // TODO: Can relax for unordered atomics (see D66309)
if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
- if (ST->isUnindexed() && !ST->isVolatile() && ST1->isUnindexed() &&
- !ST1->isVolatile()) {
+ if (ST->isUnindexed() && ST->isSimple() &&
+ ST1->isUnindexed() && ST1->isSimple()) {
if (ST1->getBasePtr() == Ptr && ST1->getValue() == Value &&
ST->getMemoryVT() == ST1->getMemoryVT()) {
// If this is a store followed by a store with the same value to the
@@ -16405,7 +16331,8 @@ SDValue DAGCombiner::visitLIFETIME_END(SDNode *N) {
break;
case ISD::STORE: {
StoreSDNode *ST = dyn_cast<StoreSDNode>(Chain);
- if (ST->isVolatile() || ST->isIndexed())
+ // TODO: Can relax for unordered atomics (see D66309)
+ if (!ST->isSimple() || ST->isIndexed())
continue;
const BaseIndexOffset StoreBase = BaseIndexOffset::match(ST, DAG);
// If we store purely within object bounds just before its lifetime ends,
@@ -16456,6 +16383,11 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
if (OptLevel == CodeGenOpt::None)
return SDValue();
+ // Can't change the number of memory accesses for a volatile store or break
+ // atomicity for an atomic one.
+ if (!ST->isSimple())
+ return SDValue();
+
SDValue Val = ST->getValue();
SDLoc DL(ST);
@@ -16531,12 +16463,52 @@ SDValue DAGCombiner::splitMergedValStore(StoreSDNode *ST) {
}
/// Convert a disguised subvector insertion into a shuffle:
-/// insert_vector_elt V, (bitcast X from vector type), IdxC -->
-/// bitcast(shuffle (bitcast V), (extended X), Mask)
-/// Note: We do not use an insert_subvector node because that requires a legal
-/// subvector type.
SDValue DAGCombiner::combineInsertEltToShuffle(SDNode *N, unsigned InsIndex) {
SDValue InsertVal = N->getOperand(1);
+ SDValue Vec = N->getOperand(0);
+
+ // (insert_vector_elt (vector_shuffle X, Y), (extract_vector_elt X, N), InsIndex)
+ // --> (vector_shuffle X, Y)
+ if (Vec.getOpcode() == ISD::VECTOR_SHUFFLE && Vec.hasOneUse() &&
+ InsertVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
+ isa<ConstantSDNode>(InsertVal.getOperand(1))) {
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Vec.getNode());
+ ArrayRef<int> Mask = SVN->getMask();
+
+ SDValue X = Vec.getOperand(0);
+ SDValue Y = Vec.getOperand(1);
+
+ // Vec's operand 0 is using indices from 0 to N-1 and
+ // operand 1 from N to 2N - 1, where N is the number of
+ // elements in the vectors.
+ int XOffset = -1;
+ if (InsertVal.getOperand(0) == X) {
+ XOffset = 0;
+ } else if (InsertVal.getOperand(0) == Y) {
+ XOffset = X.getValueType().getVectorNumElements();
+ }
+
+ if (XOffset != -1) {
+ SmallVector<int, 16> NewMask(Mask.begin(), Mask.end());
+
+ auto *ExtrIndex = cast<ConstantSDNode>(InsertVal.getOperand(1));
+ NewMask[InsIndex] = XOffset + ExtrIndex->getZExtValue();
+ assert(NewMask[InsIndex] <
+ (int)(2 * Vec.getValueType().getVectorNumElements()) &&
+ NewMask[InsIndex] >= 0 && "NewMask[InsIndex] is out of bound");
+
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(Vec.getValueType(), SDLoc(N), X,
+ Y, NewMask, DAG);
+ if (LegalShuffle)
+ return LegalShuffle;
+ }
+ }
+
+ // insert_vector_elt V, (bitcast X from vector type), IdxC -->
+ // bitcast(shuffle (bitcast V), (extended X), Mask)
+ // Note: We do not use an insert_subvector node because that requires a
+ // legal subvector type.
if (InsertVal.getOpcode() != ISD::BITCAST || !InsertVal.hasOneUse() ||
!InsertVal.getOperand(0).getValueType().isVector())
return SDValue();
@@ -16674,7 +16646,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue EltNo,
LoadSDNode *OriginalLoad) {
- assert(!OriginalLoad->isVolatile());
+ assert(OriginalLoad->isSimple());
EVT ResultVT = EVE->getValueType(0);
EVT VecEltVT = InVecVT.getVectorElementType();
@@ -16747,12 +16719,12 @@ SDValue DAGCombiner::scalarizeExtractedVectorLoad(SDNode *EVE, EVT InVecVT,
SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
SDValue To[] = { Load, Chain };
DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
+ // Make sure to revisit this node to clean it up; it will usually be dead.
+ AddToWorklist(EVE);
// Since we're explicitly calling ReplaceAllUses, add the new node to the
// worklist explicitly as well.
- AddToWorklist(Load.getNode());
AddUsersToWorklist(Load.getNode()); // Add users too
- // Make sure to revisit this node to clean it up; it will usually be dead.
- AddToWorklist(EVE);
+ AddToWorklist(Load.getNode());
++OpsNarrowed;
return SDValue(EVE, 0);
}
@@ -16982,7 +16954,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
ISD::isNormalLoad(VecOp.getNode()) &&
!Index->hasPredecessor(VecOp.getNode())) {
auto *VecLoad = dyn_cast<LoadSDNode>(VecOp);
- if (VecLoad && !VecLoad->isVolatile())
+ if (VecLoad && VecLoad->isSimple())
return scalarizeExtractedVectorLoad(N, VecVT, Index, VecLoad);
}
@@ -17041,7 +17013,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Make sure we found a non-volatile load and the extractelement is
// the only use.
- if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
+ if (!LN0 || !LN0->hasNUsesOfValue(1,0) || !LN0->isSimple())
return SDValue();
// If Idx was -1 above, Elt is going to be -1, so just return undef.
@@ -17344,17 +17316,16 @@ static SDValue reduceBuildVecToShuffleWithZero(SDNode *BV, SelectionDAG &DAG) {
// the shuffle mask with -1.
}
- // Turn this into a shuffle with zero if that's legal.
- EVT VecVT = Extract.getOperand(0).getValueType();
- if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(ShufMask, VecVT))
- return SDValue();
-
// buildvec undef, ..., (zext (extractelt V, IndexC)), undef... -->
// bitcast (shuffle V, ZeroVec, VectorMask)
SDLoc DL(BV);
+ EVT VecVT = Extract.getOperand(0).getValueType();
SDValue ZeroVec = DAG.getConstant(0, DL, VecVT);
- SDValue Shuf = DAG.getVectorShuffle(VecVT, DL, Extract.getOperand(0), ZeroVec,
- ShufMask);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ SDValue Shuf = TLI.buildLegalVectorShuffle(VecVT, DL, Extract.getOperand(0),
+ ZeroVec, ShufMask, DAG);
+ if (!Shuf)
+ return SDValue();
return DAG.getBitcast(VT, Shuf);
}
@@ -17656,6 +17627,13 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
}
}
+ // A splat of a single element is a SPLAT_VECTOR if supported on the target.
+ if (TLI.getOperationAction(ISD::SPLAT_VECTOR, VT) != TargetLowering::Expand)
+ if (SDValue V = cast<BuildVectorSDNode>(N)->getSplatValue()) {
+ assert(!V.isUndef() && "Splat of undef should have been handled earlier");
+ return DAG.getNode(ISD::SPLAT_VECTOR, SDLoc(N), VT, V);
+ }
+
// Check if we can express BUILD VECTOR via subvector extract.
if (!LegalTypes && (N->getNumOperands() > 1)) {
SDValue Op0 = N->getOperand(0);
@@ -17829,11 +17807,9 @@ static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
}
}
- if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
- return SDValue();
-
- return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
- DAG.getBitcast(VT, SV1), Mask);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
+ DAG.getBitcast(VT, SV1), Mask, DAG);
}
SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
@@ -17853,6 +17829,15 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
SDValue In = N->getOperand(0);
assert(In.getValueType().isVector() && "Must concat vectors");
+ // If the input is a concat_vectors, just make a larger concat by padding
+ // with smaller undefs.
+ if (In.getOpcode() == ISD::CONCAT_VECTORS && In.hasOneUse()) {
+ unsigned NumOps = N->getNumOperands() * In.getNumOperands();
+ SmallVector<SDValue, 4> Ops(In->op_begin(), In->op_end());
+ Ops.resize(NumOps, DAG.getUNDEF(Ops[0].getValueType()));
+ return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
+ }
+
SDValue Scalar = peekThroughOneUseBitcasts(In);
// concat_vectors(scalar_to_vector(scalar), undef) ->
@@ -18002,6 +17987,23 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
return SDValue();
}
+// Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
+// if the subvector can be sourced for free.
+static SDValue getSubVectorSrc(SDValue V, SDValue Index, EVT SubVT) {
+ if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
+ V.getOperand(1).getValueType() == SubVT && V.getOperand(2) == Index) {
+ return V.getOperand(1);
+ }
+ auto *IndexC = dyn_cast<ConstantSDNode>(Index);
+ if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
+ V.getOperand(0).getValueType() == SubVT &&
+ (IndexC->getZExtValue() % SubVT.getVectorNumElements()) == 0) {
+ uint64_t SubIdx = IndexC->getZExtValue() / SubVT.getVectorNumElements();
+ return V.getOperand(SubIdx);
+ }
+ return SDValue();
+}
+
static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
SelectionDAG &DAG) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
@@ -18010,39 +18012,29 @@ static SDValue narrowInsertExtractVectorBinOp(SDNode *Extract,
if (!TLI.isBinOp(BinOpcode) || BinOp.getNode()->getNumValues() != 1)
return SDValue();
+ EVT VecVT = BinOp.getValueType();
SDValue Bop0 = BinOp.getOperand(0), Bop1 = BinOp.getOperand(1);
- SDValue Index = Extract->getOperand(1);
- EVT VT = Extract->getValueType(0);
+ if (VecVT != Bop0.getValueType() || VecVT != Bop1.getValueType())
+ return SDValue();
- // Helper that peeks through INSERT_SUBVECTOR/CONCAT_VECTORS to find
- // if the source subvector is the same type as the one being extracted.
- auto GetSubVector = [VT, Index](SDValue V) -> SDValue {
- if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
- V.getOperand(1).getValueType() == VT && V.getOperand(2) == Index) {
- return V.getOperand(1);
- }
- auto *IndexC = dyn_cast<ConstantSDNode>(Index);
- if (IndexC && V.getOpcode() == ISD::CONCAT_VECTORS &&
- V.getOperand(0).getValueType() == VT &&
- (IndexC->getZExtValue() % VT.getVectorNumElements()) == 0) {
- uint64_t SubIdx = IndexC->getZExtValue() / VT.getVectorNumElements();
- return V.getOperand(SubIdx);
- }
+ SDValue Index = Extract->getOperand(1);
+ EVT SubVT = Extract->getValueType(0);
+ if (!TLI.isOperationLegalOrCustom(BinOpcode, SubVT))
return SDValue();
- };
- SDValue Sub0 = GetSubVector(Bop0);
- SDValue Sub1 = GetSubVector(Bop1);
+
+ SDValue Sub0 = getSubVectorSrc(Bop0, Index, SubVT);
+ SDValue Sub1 = getSubVectorSrc(Bop1, Index, SubVT);
// TODO: We could handle the case where only 1 operand is being inserted by
// creating an extract of the other operand, but that requires checking
// number of uses and/or costs.
- if (!Sub0 || !Sub1 || !TLI.isOperationLegalOrCustom(BinOpcode, VT))
+ if (!Sub0 || !Sub1)
return SDValue();
// We are inserting both operands of the wide binop only to extract back
// to the narrow vector size. Eliminate all of the insert/extract:
// ext (binop (ins ?, X, Index), (ins ?, Y, Index)), Index --> binop X, Y
- return DAG.getNode(BinOpcode, SDLoc(Extract), VT, Sub0, Sub1,
+ return DAG.getNode(BinOpcode, SDLoc(Extract), SubVT, Sub0, Sub1,
BinOp->getFlags());
}
@@ -18174,7 +18166,8 @@ static SDValue narrowExtractedVectorLoad(SDNode *Extract, SelectionDAG &DAG) {
auto *Ld = dyn_cast<LoadSDNode>(Extract->getOperand(0));
auto *ExtIdx = dyn_cast<ConstantSDNode>(Extract->getOperand(1));
- if (!Ld || Ld->getExtensionType() || Ld->isVolatile() || !ExtIdx)
+ if (!Ld || Ld->getExtensionType() || !Ld->isSimple() ||
+ !ExtIdx)
return SDValue();
// Allow targets to opt-out.
@@ -18878,7 +18871,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// build_vector.
if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
int SplatIndex = SVN->getSplatIndex();
- if (TLI.isExtractVecEltCheap(VT, SplatIndex) &&
+ if (N0.hasOneUse() && TLI.isExtractVecEltCheap(VT, SplatIndex) &&
TLI.isBinOp(N0.getOpcode()) && N0.getNode()->getNumValues() == 1) {
// splat (vector_bo L, R), Index -->
// splat (scalar_bo (extelt L, Index), (extelt R, Index))
@@ -19153,22 +19146,13 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
SV1 = DAG.getUNDEF(VT);
// Avoid introducing shuffles with illegal mask.
- if (!TLI.isShuffleMaskLegal(Mask, VT)) {
- ShuffleVectorSDNode::commuteMask(Mask);
-
- if (!TLI.isShuffleMaskLegal(Mask, VT))
- return SDValue();
-
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
- // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
- std::swap(SV0, SV1);
- }
-
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
// shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
- return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
+ // shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
+ return TLI.buildLegalVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask, DAG);
}
if (SDValue V = foldShuffleOfConcatUndefs(SVN, DAG))
@@ -19191,35 +19175,35 @@ SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
SmallVector<int, 8> NewMask(InVecT.getVectorNumElements(), -1);
int Elt = C0->getZExtValue();
NewMask[0] = Elt;
- SDValue Val;
// If we have an implict truncate do truncate here as long as it's legal.
// if it's not legal, this should
if (VT.getScalarType() != InVal.getValueType() &&
InVal.getValueType().isScalarInteger() &&
isTypeLegal(VT.getScalarType())) {
- Val =
+ SDValue Val =
DAG.getNode(ISD::TRUNCATE, SDLoc(InVal), VT.getScalarType(), InVal);
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Val);
}
if (VT.getScalarType() == InVecT.getScalarType() &&
- VT.getVectorNumElements() <= InVecT.getVectorNumElements() &&
- TLI.isShuffleMaskLegal(NewMask, VT)) {
- Val = DAG.getVectorShuffle(InVecT, SDLoc(N), InVec,
- DAG.getUNDEF(InVecT), NewMask);
- // If the initial vector is the correct size this shuffle is a
- // valid result.
- if (VT == InVecT)
- return Val;
- // If not we must truncate the vector.
- if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
- MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
- SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
- EVT SubVT =
- EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
- VT.getVectorNumElements());
- Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT, Val,
- ZeroIdx);
- return Val;
+ VT.getVectorNumElements() <= InVecT.getVectorNumElements()) {
+ SDValue LegalShuffle =
+ TLI.buildLegalVectorShuffle(InVecT, SDLoc(N), InVec,
+ DAG.getUNDEF(InVecT), NewMask, DAG);
+ if (LegalShuffle) {
+ // If the initial vector is the correct size this shuffle is a
+ // valid result.
+ if (VT == InVecT)
+ return LegalShuffle;
+ // If not we must truncate the vector.
+ if (VT.getVectorNumElements() != InVecT.getVectorNumElements()) {
+ MVT IdxTy = TLI.getVectorIdxTy(DAG.getDataLayout());
+ SDValue ZeroIdx = DAG.getConstant(0, SDLoc(N), IdxTy);
+ EVT SubVT =
+ EVT::getVectorVT(*DAG.getContext(), InVecT.getVectorElementType(),
+ VT.getVectorNumElements());
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), SubVT,
+ LegalShuffle, ZeroIdx);
+ }
}
}
}
@@ -19627,6 +19611,39 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
}
}
+ // Make sure all but the first op are undef or constant.
+ auto ConcatWithConstantOrUndef = [](SDValue Concat) {
+ return Concat.getOpcode() == ISD::CONCAT_VECTORS &&
+ std::all_of(std::next(Concat->op_begin()), Concat->op_end(),
+ [](const SDValue &Op) {
+ return Op.isUndef() ||
+ ISD::isBuildVectorOfConstantSDNodes(Op.getNode());
+ });
+ };
+
+ // The following pattern is likely to emerge with vector reduction ops. Moving
+ // the binary operation ahead of the concat may allow using a narrower vector
+ // instruction that has better performance than the wide version of the op:
+ // VBinOp (concat X, undef/constant), (concat Y, undef/constant) -->
+ // concat (VBinOp X, Y), VecC
+ if (ConcatWithConstantOrUndef(LHS) && ConcatWithConstantOrUndef(RHS) &&
+ (LHS.hasOneUse() || RHS.hasOneUse())) {
+ EVT NarrowVT = LHS.getOperand(0).getValueType();
+ if (NarrowVT == RHS.getOperand(0).getValueType() &&
+ TLI.isOperationLegalOrCustomOrPromote(Opcode, NarrowVT)) {
+ SDLoc DL(N);
+ unsigned NumOperands = LHS.getNumOperands();
+ SmallVector<SDValue, 4> ConcatOps;
+ for (unsigned i = 0; i != NumOperands; ++i) {
+ // This constant fold for operands 1 and up.
+ ConcatOps.push_back(DAG.getNode(Opcode, DL, NarrowVT, LHS.getOperand(i),
+ RHS.getOperand(i)));
+ }
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps);
+ }
+ }
+
if (SDValue V = scalarizeBinOpOfSplats(N, DAG))
return V;
@@ -19723,7 +19740,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
// Token chains must be identical.
if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
- LLD->isVolatile() || RLD->isVolatile() ||
+ // Be conservative for atomics for the moment
+ // TODO: This does appear to be legal for unordered atomics (see D66309)
+ !LLD->isSimple() || !RLD->isSimple() ||
// FIXME: If either is a pre/post inc/dec load,
// we'd need to split out the address adjustment.
LLD->isIndexed() || RLD->isIndexed() ||
@@ -19928,7 +19947,7 @@ SDValue DAGCombiner::foldSelectCCToShiftAnd(const SDLoc &DL, SDValue N0,
SDValue DAGCombiner::convertSelectOfFPConstantsToLoadOffset(
const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3,
ISD::CondCode CC) {
- if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType().isFloatingPoint()))
+ if (!TLI.reduceSelectOfFPConstantLoads(N0.getValueType()))
return SDValue();
// If we are before legalize types, we want the other legalization to happen
@@ -20016,8 +20035,13 @@ SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
// when the condition can be materialized as an all-ones register. Any
// single bit-test can be materialized as an all-ones register with
// shift-left and shift-right-arith.
+ // TODO: The operation legality checks could be loosened to include "custom",
+ // but that may cause regressions for targets that do not have shift
+ // instructions.
if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
- N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
+ N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2) &&
+ TLI.isOperationLegal(ISD::SHL, VT) &&
+ TLI.isOperationLegal(ISD::SRA, VT)) {
SDValue AndLHS = N0->getOperand(0);
auto *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
@@ -20209,7 +20233,10 @@ SDValue DAGCombiner::BuildLogBase2(SDValue V, const SDLoc &DL) {
/// =>
/// X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
/// does not require additional intermediate precision]
-SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
+/// For the last iteration, put numerator N into it to gain more precision:
+/// Result = N X_i + X_i (N - N A X_i)
+SDValue DAGCombiner::BuildDivEstimate(SDValue N, SDValue Op,
+ SDNodeFlags Flags) {
if (Level >= AfterLegalizeDAG)
return SDValue();
@@ -20230,25 +20257,39 @@ SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags Flags) {
if (SDValue Est = TLI.getRecipEstimate(Op, DAG, Enabled, Iterations)) {
AddToWorklist(Est.getNode());
+ SDLoc DL(Op);
if (Iterations) {
- SDLoc DL(Op);
SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
- // Newton iterations: Est = Est + Est (1 - Arg * Est)
+ // Newton iterations: Est = Est + Est (N - Arg * Est)
+ // If this is the last iteration, also multiply by the numerator.
for (int i = 0; i < Iterations; ++i) {
- SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
+ SDValue MulEst = Est;
+
+ if (i == Iterations - 1) {
+ MulEst = DAG.getNode(ISD::FMUL, DL, VT, N, Est, Flags);
+ AddToWorklist(MulEst.getNode());
+ }
+
+ SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, MulEst, Flags);
AddToWorklist(NewEst.getNode());
- NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
+ NewEst = DAG.getNode(ISD::FSUB, DL, VT,
+ (i == Iterations - 1 ? N : FPOne), NewEst, Flags);
AddToWorklist(NewEst.getNode());
NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
AddToWorklist(NewEst.getNode());
- Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
+ Est = DAG.getNode(ISD::FADD, DL, VT, MulEst, NewEst, Flags);
AddToWorklist(Est.getNode());
}
+ } else {
+ // If no iterations are available, multiply with N.
+ Est = DAG.getNode(ISD::FMUL, DL, VT, Est, N, Flags);
+ AddToWorklist(Est.getNode());
}
+
return Est;
}
@@ -20271,31 +20312,19 @@ SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
// We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
// this entire sequence requires only one FP constant.
SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
- AddToWorklist(HalfArg.getNode());
-
HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
- AddToWorklist(HalfArg.getNode());
// Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
- AddToWorklist(NewEst.getNode());
-
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
- AddToWorklist(Est.getNode());
}
// If non-reciprocal square root is requested, multiply the result by Arg.
- if (!Reciprocal) {
+ if (!Reciprocal)
Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
- AddToWorklist(Est.getNode());
- }
return Est;
}
@@ -20321,13 +20350,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// E = (E * -0.5) * ((A * E) * E + -3.0)
for (unsigned i = 0; i < Iterations; ++i) {
SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
- AddToWorklist(AE.getNode());
-
SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
- AddToWorklist(AEE.getNode());
-
SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
- AddToWorklist(RHS.getNode());
// When calculating a square root at the last iteration build:
// S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
@@ -20340,10 +20364,8 @@ SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
// SQRT: LHS = (A * E) * -0.5
LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
}
- AddToWorklist(LHS.getNode());
Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
- AddToWorklist(Est.getNode());
}
return Est;
@@ -20400,16 +20422,11 @@ SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags Flags,
SDValue Fabs = DAG.getNode(ISD::FABS, DL, VT, Op);
SDValue IsDenorm = DAG.getSetCC(DL, CCVT, Fabs, NormC, ISD::SETLT);
Est = DAG.getNode(SelOpcode, DL, VT, IsDenorm, FPZero, Est);
- AddToWorklist(Fabs.getNode());
- AddToWorklist(IsDenorm.getNode());
- AddToWorklist(Est.getNode());
} else {
// X == 0.0 ? 0.0 : Est
SDValue FPZero = DAG.getConstantFP(0.0, DL, VT);
SDValue IsZero = DAG.getSetCC(DL, CCVT, Op, FPZero, ISD::SETEQ);
Est = DAG.getNode(SelOpcode, DL, VT, IsZero, FPZero, Est);
- AddToWorklist(IsZero.getNode());
- AddToWorklist(Est.getNode());
}
}
}
@@ -20432,6 +20449,7 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
struct MemUseCharacteristics {
bool IsVolatile;
+ bool IsAtomic;
SDValue BasePtr;
int64_t Offset;
Optional<int64_t> NumBytes;
@@ -20447,18 +20465,20 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
: (LSN->getAddressingMode() == ISD::PRE_DEC)
? -1 * C->getSExtValue()
: 0;
- return {LSN->isVolatile(), LSN->getBasePtr(), Offset /*base offset*/,
+ return {LSN->isVolatile(), LSN->isAtomic(), LSN->getBasePtr(),
+ Offset /*base offset*/,
Optional<int64_t>(LSN->getMemoryVT().getStoreSize()),
LSN->getMemOperand()};
}
if (const auto *LN = cast<LifetimeSDNode>(N))
- return {false /*isVolatile*/, LN->getOperand(1),
+ return {false /*isVolatile*/, /*isAtomic*/ false, LN->getOperand(1),
(LN->hasOffset()) ? LN->getOffset() : 0,
(LN->hasOffset()) ? Optional<int64_t>(LN->getSize())
: Optional<int64_t>(),
(MachineMemOperand *)nullptr};
// Default.
- return {false /*isvolatile*/, SDValue(), (int64_t)0 /*offset*/,
+ return {false /*isvolatile*/, /*isAtomic*/ false, SDValue(),
+ (int64_t)0 /*offset*/,
Optional<int64_t>() /*size*/, (MachineMemOperand *)nullptr};
};
@@ -20474,6 +20494,11 @@ bool DAGCombiner::isAlias(SDNode *Op0, SDNode *Op1) const {
if (MUC0.IsVolatile && MUC1.IsVolatile)
return true;
+ // Be conservative about atomics for the moment
+ // TODO: This is way overconservative for unordered atomics (see D66309)
+ if (MUC0.IsAtomic && MUC1.IsAtomic)
+ return true;
+
if (MUC0.MMO && MUC1.MMO) {
if ((MUC0.MMO->isInvariant() && MUC1.MMO->isStore()) ||
(MUC1.MMO->isInvariant() && MUC0.MMO->isStore()))
@@ -20555,7 +20580,8 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
SmallPtrSet<SDNode *, 16> Visited; // Visited node set.
// Get alias information for node.
- const bool IsLoad = isa<LoadSDNode>(N) && !cast<LoadSDNode>(N)->isVolatile();
+ // TODO: relax aliasing for unordered atomics (see D66309)
+ const bool IsLoad = isa<LoadSDNode>(N) && cast<LoadSDNode>(N)->isSimple();
// Starting off.
Chains.push_back(OriginalChain);
@@ -20571,8 +20597,9 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
case ISD::LOAD:
case ISD::STORE: {
// Get alias information for C.
+ // TODO: Relax aliasing for unordered atomics (see D66309)
bool IsOpLoad = isa<LoadSDNode>(C.getNode()) &&
- !cast<LSBaseSDNode>(C.getNode())->isVolatile();
+ cast<LSBaseSDNode>(C.getNode())->isSimple();
if ((IsLoad && IsOpLoad) || !isAlias(N, C.getNode())) {
// Look further up the chain.
C = C.getOperand(0);
@@ -20727,7 +20754,8 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
// If the chain has more than one use, then we can't reorder the mem ops.
if (!SDValue(Chain, 0)->hasOneUse())
break;
- if (Chain->isVolatile() || Chain->isIndexed())
+ // TODO: Relax for unordered atomics (see D66309)
+ if (!Chain->isSimple() || Chain->isIndexed())
break;
// Find the base pointer and offset for this memory node.
@@ -20795,11 +20823,11 @@ bool DAGCombiner::parallelizeChainedStores(StoreSDNode *St) {
SDValue TF = DAG.getTokenFactor(SDLoc(STChain), TFOps);
CombineTo(St, TF);
- AddToWorklist(STChain);
- // Add TF operands worklist in reverse order.
- for (auto I = TF->getNumOperands(); I;)
- AddToWorklist(TF->getOperand(--I).getNode());
+ // Add TF and its operands to the worklist.
AddToWorklist(TF.getNode());
+ for (const SDValue &Op : TF->ops())
+ AddToWorklist(Op.getNode());
+ AddToWorklist(STChain);
return true;
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 22c23ba877e8..6d7260d7aee5 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -174,7 +174,7 @@ static unsigned findSinkableLocalRegDef(MachineInstr &MI) {
if (RegDef)
return 0;
RegDef = MO.getReg();
- } else if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ } else if (Register::isVirtualRegister(MO.getReg())) {
// This is another use of a vreg. Don't try to sink it.
return 0;
}
@@ -1213,14 +1213,13 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
if (!FrameAlign)
FrameAlign = TLI.getByValTypeAlignment(ElementTy, DL);
Flags.setByValSize(FrameSize);
- Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Arg.IsNest)
Flags.setNest();
if (NeedsRegBlock)
Flags.setInConsecutiveRegs();
- unsigned OriginalAlignment = DL.getABITypeAlignment(Arg.Ty);
- Flags.setOrigAlign(OriginalAlignment);
+ Flags.setOrigAlign(Align(DL.getABITypeAlignment(Arg.Ty)));
CLI.OutVals.push_back(Arg.Val);
CLI.OutFlags.push_back(Flags);
@@ -1237,8 +1236,8 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
updateValueMap(CLI.CS->getInstruction(), CLI.ResultReg, CLI.NumResultRegs);
// Set labels for heapallocsite call.
- if (CLI.CS && CLI.CS->getInstruction()->getMetadata("heapallocsite")) {
- MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
+ if (CLI.CS && CLI.CS->getInstruction()->hasMetadata("heapallocsite")) {
+ const MDNode *MD = CLI.CS->getInstruction()->getMetadata("heapallocsite");
MF->addCodeViewHeapAllocSite(CLI.Call, MD);
}
@@ -1303,6 +1302,7 @@ bool FastISel::selectCall(const User *I) {
ExtraInfo |= InlineAsm::Extra_HasSideEffects;
if (IA->isAlignStack())
ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ ExtraInfo |= IA->getDialect() * InlineAsm::Extra_AsmDialect;
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
TII.get(TargetOpcode::INLINEASM))
@@ -1388,9 +1388,11 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
"Expected inlined-at fields to agree");
// A dbg.declare describes the address of a source variable, so lower it
// into an indirect DBG_VALUE.
+ auto *Expr = DI->getExpression();
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc,
- TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ true,
- *Op, DI->getVariable(), DI->getExpression());
+ TII.get(TargetOpcode::DBG_VALUE), /*IsIndirect*/ false,
+ *Op, DI->getVariable(), Expr);
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
@@ -1414,19 +1416,19 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
if (CI->getBitWidth() > 64)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addCImm(CI)
- .addImm(0U)
+ .addReg(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
else
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addImm(CI->getZExtValue())
- .addImm(0U)
+ .addReg(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (const auto *CF = dyn_cast<ConstantFP>(V)) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II)
.addFPImm(CF)
- .addImm(0U)
+ .addReg(0U)
.addMetadata(DI->getVariable())
.addMetadata(DI->getExpression());
} else if (unsigned Reg = lookUpRegForValue(V)) {
@@ -1453,24 +1455,12 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) {
TII.get(TargetOpcode::DBG_LABEL)).addMetadata(DI->getLabel());
return true;
}
- case Intrinsic::objectsize: {
- ConstantInt *CI = cast<ConstantInt>(II->getArgOperand(1));
- unsigned long long Res = CI->isZero() ? -1ULL : 0;
- Constant *ResCI = ConstantInt::get(II->getType(), Res);
- unsigned ResultReg = getRegForValue(ResCI);
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
- case Intrinsic::is_constant: {
- Constant *ResCI = ConstantInt::get(II->getType(), 0);
- unsigned ResultReg = getRegForValue(ResCI);
- if (!ResultReg)
- return false;
- updateValueMap(II, ResultReg);
- return true;
- }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
+
+ case Intrinsic::is_constant:
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
+
case Intrinsic::launder_invariant_group:
case Intrinsic::strip_invariant_group:
case Intrinsic::expect: {
@@ -1677,11 +1667,11 @@ bool FastISel::selectInstruction(const Instruction *I) {
/// (fall-through) successor, and update the CFG.
void FastISel::fastEmitBranch(MachineBasicBlock *MSucc,
const DebugLoc &DbgLoc) {
- if (FuncInfo.MBB->getBasicBlock()->size() > 1 &&
+ if (FuncInfo.MBB->getBasicBlock()->sizeWithoutDebug() > 1 &&
FuncInfo.MBB->isLayoutSuccessor(MSucc)) {
- // For more accurate line information if this is the only instruction
- // in the block then emit it, otherwise we have the unconditional
- // fall-through case, which needs no instructions.
+ // For more accurate line information if this is the only non-debug
+ // instruction in the block then emit it, otherwise we have the
+ // unconditional fall-through case, which needs no instructions.
} else {
// The unconditional branch case.
TII.insertBranch(*FuncInfo.MBB, MSucc, nullptr,
@@ -2028,7 +2018,7 @@ unsigned FastISel::createResultReg(const TargetRegisterClass *RC) {
unsigned FastISel::constrainOperandRegClass(const MCInstrDesc &II, unsigned Op,
unsigned OpNum) {
- if (TargetRegisterInfo::isVirtualRegister(Op)) {
+ if (Register::isVirtualRegister(Op)) {
const TargetRegisterClass *RegClass =
TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF);
if (!MRI.constrainRegClass(Op, RegClass)) {
@@ -2236,7 +2226,7 @@ unsigned FastISel::fastEmitInst_i(unsigned MachineInstOpcode,
unsigned FastISel::fastEmitInst_extractsubreg(MVT RetVT, unsigned Op0,
bool Op0IsKill, uint32_t Idx) {
unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT));
- assert(TargetRegisterInfo::isVirtualRegister(Op0) &&
+ assert(Register::isVirtualRegister(Op0) &&
"Cannot yet extract from physregs");
const TargetRegisterClass *RC = MRI.getRegClass(Op0);
MRI.constrainRegClass(Op0, TRI.getSubClassWithSubReg(RC, Idx));
@@ -2417,10 +2407,9 @@ FastISel::createMachineMemOperandFor(const Instruction *I) const {
} else
return nullptr;
- bool IsNonTemporal = I->getMetadata(LLVMContext::MD_nontemporal) != nullptr;
- bool IsInvariant = I->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
- bool IsDereferenceable =
- I->getMetadata(LLVMContext::MD_dereferenceable) != nullptr;
+ bool IsNonTemporal = I->hasMetadata(LLVMContext::MD_nontemporal);
+ bool IsInvariant = I->hasMetadata(LLVMContext::MD_invariant_load);
+ bool IsDereferenceable = I->hasMetadata(LLVMContext::MD_dereferenceable);
const MDNode *Ranges = I->getMetadata(LLVMContext::MD_range);
AAMDNodes AAInfo;
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 8b1759246b76..cf6711adad48 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/FunctionLoweringInfo.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -424,7 +425,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
unsigned BitWidth = IntVT.getSizeInBits();
unsigned DestReg = ValueMap[PN];
- if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ if (!Register::isVirtualRegister(DestReg))
return;
LiveOutRegInfo.grow(DestReg);
LiveOutInfo &DestLOI = LiveOutRegInfo[DestReg];
@@ -445,7 +446,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when its"
"CopyToReg node was created.");
unsigned SrcReg = ValueMap[V];
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (!Register::isVirtualRegister(SrcReg)) {
DestLOI.IsValid = false;
return;
}
@@ -480,7 +481,7 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) {
assert(ValueMap.count(V) && "V should have been placed in ValueMap when "
"its CopyToReg node was created.");
unsigned SrcReg = ValueMap[V];
- if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (!Register::isVirtualRegister(SrcReg)) {
DestLOI.IsValid = false;
return;
}
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 9bc07d35dfc5..c5095995ec2e 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -71,7 +71,7 @@ static unsigned countOperands(SDNode *Node, unsigned NumExpUses,
if (isa<RegisterMaskSDNode>(Node->getOperand(I - 1)))
continue;
if (RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Node->getOperand(I - 1)))
- if (TargetRegisterInfo::isPhysicalRegister(RN->getReg()))
+ if (Register::isPhysicalRegister(RN->getReg()))
continue;
NumImpUses = N - I;
break;
@@ -86,7 +86,7 @@ void InstrEmitter::
EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
unsigned SrcReg, DenseMap<SDValue, unsigned> &VRBaseMap) {
unsigned VRBase = 0;
- if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
+ if (Register::isVirtualRegister(SrcReg)) {
// Just use the input register directly!
SDValue Op(Node, ResNo);
if (IsClone)
@@ -114,7 +114,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == ResNo) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ if (Register::isVirtualRegister(DestReg)) {
VRBase = DestReg;
Match = false;
} else if (DestReg != SrcReg)
@@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
UseRC = RC;
else if (RC) {
const TargetRegisterClass *ComRC =
- TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy);
+ TRI->getCommonSubClass(UseRC, RC);
// If multiple uses expect disjoint register classes, we emit
// copies in AddRegisterOperand.
if (ComRC)
@@ -219,7 +219,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
if (II.OpInfo[i].isOptionalDef()) {
// Optional def must be a physical register.
VRBase = cast<RegisterSDNode>(Node->getOperand(i-NumResults))->getReg();
- assert(TargetRegisterInfo::isPhysicalRegister(VRBase));
+ assert(Register::isPhysicalRegister(VRBase));
MIB.addReg(VRBase, RegState::Define);
}
@@ -229,7 +229,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node,
User->getOperand(2).getNode() == Node &&
User->getOperand(2).getResNo() == i) {
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
const TargetRegisterClass *RegRC = MRI->getRegClass(Reg);
if (RegRC == RC) {
VRBase = Reg;
@@ -272,7 +272,7 @@ unsigned InstrEmitter::getVR(SDValue Op,
// does not include operand register class info.
const TargetRegisterClass *RC = TLI->getRegClassFor(
Op.getSimpleValueType(), Op.getNode()->isDivergent());
- unsigned VReg = MRI->createVirtualRegister(RC);
+ Register VReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, Op.getDebugLoc(),
TII->get(TargetOpcode::IMPLICIT_DEF), VReg);
return VReg;
@@ -319,7 +319,7 @@ InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB,
if (!ConstrainedRC) {
OpRC = TRI->getAllocatableClass(OpRC);
assert(OpRC && "Constraints cannot be fulfilled for allocation");
- unsigned NewVReg = MRI->createVirtualRegister(OpRC);
+ Register NewVReg = MRI->createVirtualRegister(OpRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
VReg = NewVReg;
@@ -385,9 +385,8 @@ void InstrEmitter::AddOperand(MachineInstrBuilder &MIB,
(IIRC && TRI->isDivergentRegClass(IIRC)))
: nullptr;
- if (OpRC && IIRC && OpRC != IIRC &&
- TargetRegisterInfo::isVirtualRegister(VReg)) {
- unsigned NewVReg = MRI->createVirtualRegister(IIRC);
+ if (OpRC && IIRC && OpRC != IIRC && Register::isVirtualRegister(VReg)) {
+ Register NewVReg = MRI->createVirtualRegister(IIRC);
BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(),
TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg);
VReg = NewVReg;
@@ -465,7 +464,7 @@ unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx,
// register instead.
RC = TRI->getSubClassWithSubReg(TLI->getRegClassFor(VT, isDivergent), SubIdx);
assert(RC && "No legal register class for VT supports that SubIdx");
- unsigned NewReg = MRI->createVirtualRegister(RC);
+ Register NewReg = MRI->createVirtualRegister(RC);
BuildMI(*MBB, InsertPos, DL, TII->get(TargetOpcode::COPY), NewReg)
.addReg(VReg);
return NewReg;
@@ -485,7 +484,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
if (User->getOpcode() == ISD::CopyToReg &&
User->getOperand(2).getNode() == Node) {
unsigned DestReg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ if (Register::isVirtualRegister(DestReg)) {
VRBase = DestReg;
break;
}
@@ -503,7 +502,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
unsigned Reg;
MachineInstr *DefMI;
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(0));
- if (R && TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ if (R && Register::isPhysicalRegister(R->getReg())) {
Reg = R->getReg();
DefMI = nullptr;
} else {
@@ -529,7 +528,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Reg may not support a SubIdx sub-register, and we may need to
// constrain its register class or issue a COPY to a compatible register
// class.
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
Reg = ConstrainForSubReg(Reg, SubIdx,
Node->getOperand(0).getSimpleValueType(),
Node->isDivergent(), Node->getDebugLoc());
@@ -541,7 +540,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
MachineInstrBuilder CopyMI =
BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
TII->get(TargetOpcode::COPY), VRBase);
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
CopyMI.addReg(Reg, 0, SubIdx);
else
CopyMI.addReg(TRI->getSubReg(Reg, SubIdx));
@@ -614,7 +613,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
const TargetRegisterClass *DstRC =
TRI->getAllocatableClass(TRI->getRegClass(DstRCIdx));
- unsigned NewVReg = MRI->createVirtualRegister(DstRC);
+ Register NewVReg = MRI->createVirtualRegister(DstRC);
BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY),
NewVReg).addReg(VReg);
@@ -631,7 +630,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
bool IsClone, bool IsCloned) {
unsigned DstRCIdx = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx);
- unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
+ Register NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC));
const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE);
MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg);
unsigned NumOps = Node->getNumOperands();
@@ -649,7 +648,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
RegisterSDNode *R = dyn_cast<RegisterSDNode>(Node->getOperand(i-1));
// Skip physical registers as they don't have a vreg to get and we'll
// insert copies for them in TwoAddressInstructionPass anyway.
- if (!R || !TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
+ if (!R || !Register::isPhysicalRegister(R->getReg())) {
unsigned SubIdx = cast<ConstantSDNode>(Op)->getZExtValue();
unsigned SubReg = getVR(Node->getOperand(i-1), VRBaseMap);
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
@@ -678,7 +677,7 @@ MachineInstr *
InstrEmitter::EmitDbgValue(SDDbgValue *SD,
DenseMap<SDValue, unsigned> &VRBaseMap) {
MDNode *Var = SD->getVariable();
- MDNode *Expr = SD->getExpression();
+ const DIExpression *Expr = SD->getExpression();
DebugLoc DL = SD->getDebugLoc();
assert(cast<DILocalVariable>(Var)->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
@@ -702,12 +701,11 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// EmitTargetCodeForFrameDebugValue is responsible for allocation.
auto FrameMI = BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE))
.addFrameIndex(SD->getFrameIx());
+
if (SD->isIndirect())
- // Push [fi + 0] onto the DIExpression stack.
- FrameMI.addImm(0);
- else
- // Push fi onto the DIExpression stack.
- FrameMI.addReg(0);
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
+ FrameMI.addReg(0);
return FrameMI.addMetadata(Var).addMetadata(Expr);
}
// Otherwise, we're going to create an instruction here.
@@ -753,9 +751,9 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD,
// Indirect addressing is indicated by an Imm as the second parameter.
if (SD->isIndirect())
- MIB.addImm(0U);
- else
- MIB.addReg(0U, RegState::Debug);
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
+
+ MIB.addReg(0U, RegState::Debug);
MIB.addMetadata(Var);
MIB.addMetadata(Expr);
@@ -928,12 +926,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
//
// Collect all the used physreg defs, and make sure that any unused physreg
// defs are marked as dead.
- SmallVector<unsigned, 8> UsedRegs;
+ SmallVector<Register, 8> UsedRegs;
// Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = NumDefs; i < NumResults; ++i) {
- unsigned Reg = II.getImplicitDefs()[i - NumDefs];
+ Register Reg = II.getImplicitDefs()[i - NumDefs];
if (!Node->hasAnyUseOfValue(i))
continue;
// This implicitly defined physreg has a use.
@@ -960,8 +958,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// direct RegisterSDNode operands.
for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
- unsigned Reg = R->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ Register Reg = R->getReg();
+ if (Reg.isPhysical())
UsedRegs.push_back(Reg);
}
}
@@ -995,8 +993,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::CopyToReg: {
unsigned DestReg = cast<RegisterSDNode>(Node->getOperand(1))->getReg();
SDValue SrcVal = Node->getOperand(2);
- if (TargetRegisterInfo::isVirtualRegister(DestReg) &&
- SrcVal.isMachineOpcode() &&
+ if (Register::isVirtualRegister(DestReg) && SrcVal.isMachineOpcode() &&
SrcVal.getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) {
// Instead building a COPY to that vreg destination, build an
// IMPLICIT_DEF instruction instead.
@@ -1093,16 +1090,18 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// FIXME: Add dead flags for physical and virtual registers defined.
// For now, mark physical register defs as implicit to help fast
// regalloc. This makes inline asm look a lot like calls.
- MIB.addReg(Reg, RegState::Define |
- getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ MIB.addReg(Reg,
+ RegState::Define |
+ getImplRegState(Register::isPhysicalRegister(Reg)));
}
break;
case InlineAsm::Kind_RegDefEarlyClobber:
case InlineAsm::Kind_Clobber:
for (unsigned j = 0; j != NumVals; ++j, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber |
- getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg)));
+ MIB.addReg(Reg,
+ RegState::Define | RegState::EarlyClobber |
+ getImplRegState(Register::isPhysicalRegister(Reg)));
ECRegs.push_back(Reg);
}
break;
@@ -1136,7 +1135,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
// then remove the early-clobber flag.
for (unsigned Reg : ECRegs) {
if (MIB->readsRegister(Reg, TRI)) {
- MachineOperand *MO =
+ MachineOperand *MO =
MIB->findRegisterDefOperand(Reg, false, false, TRI);
assert(MO && "No def operand for clobbered register?");
MO->setIsEarlyClobber(false);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index bf817f00f83d..f9fdf525240f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/CodeGen/ISDOpcodes.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -161,6 +162,7 @@ private:
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT,
const SDLoc &dl, SDValue ChainIn);
SDValue ExpandBUILD_VECTOR(SDNode *Node);
+ SDValue ExpandSPLAT_VECTOR(SDNode *Node);
SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node);
void ExpandDYNAMIC_STACKALLOC(SDNode *Node,
SmallVectorImpl<SDValue> &Results);
@@ -236,6 +238,16 @@ public:
}
ReplacedNode(Old);
}
+
+ void ReplaceNodeWithValue(SDValue Old, SDValue New) {
+ LLVM_DEBUG(dbgs() << " ... replacing: "; Old->dump(&DAG);
+ dbgs() << " with: "; New->dump(&DAG));
+
+ DAG.ReplaceAllUsesOfValueWith(Old, New);
+ if (UpdatedNodes)
+ UpdatedNodes->insert(New.getNode());
+ ReplacedNode(Old.getNode());
+ }
};
} // end anonymous namespace
@@ -493,8 +505,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
// expand it.
EVT MemVT = ST->getMemoryVT();
const DataLayout &DL = DAG.getDataLayout();
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
- *ST->getMemOperand())) {
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
LLVM_DEBUG(dbgs() << "Expanding unsupported unaligned store\n");
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
@@ -608,8 +620,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) {
EVT MemVT = ST->getMemoryVT();
// If this is an unaligned store and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
- *ST->getMemOperand())) {
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *ST->getMemOperand())) {
SDValue Result = TLI.expandUnalignedStore(ST, DAG);
ReplaceNode(SDValue(ST, 0), Result);
}
@@ -669,8 +681,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
const DataLayout &DL = DAG.getDataLayout();
// If this is an unaligned load and the target doesn't support it,
// expand it.
- if (!TLI.allowsMemoryAccess(*DAG.getContext(), DL, MemVT,
- *LD->getMemOperand())) {
+ if (!TLI.allowsMemoryAccessForAlignment(*DAG.getContext(), DL, MemVT,
+ *LD->getMemOperand())) {
std::tie(RVal, RChain) = TLI.expandUnalignedLoad(LD, DAG);
}
break;
@@ -894,11 +906,10 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) {
if (SrcVT.getScalarType() == MVT::f16) {
EVT ISrcVT = SrcVT.changeTypeToInteger();
EVT IDestVT = DestVT.changeTypeToInteger();
- EVT LoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
+ EVT ILoadVT = TLI.getRegisterType(IDestVT.getSimpleVT());
- SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, LoadVT,
- Chain, Ptr, ISrcVT,
- LD->getMemOperand());
+ SDValue Result = DAG.getExtLoad(ISD::ZEXTLOAD, dl, ILoadVT, Chain,
+ Ptr, ISrcVT, LD->getMemOperand());
Value = DAG.getNode(ISD::FP16_TO_FP, dl, DestVT, Result);
Chain = Result.getValue(1);
break;
@@ -959,15 +970,13 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
#ifndef NDEBUG
for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i)
- assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
- TargetLowering::TypeLegal ||
- TLI.isTypeLegal(Node->getValueType(i))) &&
+ assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) ==
+ TargetLowering::TypeLegal &&
"Unexpected illegal type!");
for (const SDValue &Op : Node->op_values())
assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) ==
TargetLowering::TypeLegal ||
- TLI.isTypeLegal(Op.getValueType()) ||
Op.getOpcode() == ISD::TargetConstant ||
Op.getOpcode() == ISD::Register) &&
"Unexpected illegal type!");
@@ -1004,7 +1013,6 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
Action = TLI.getOperationAction(Node->getOpcode(),
Node->getOperand(0).getValueType());
break;
- case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG: {
EVT InnerType = cast<VTSDNode>(Node->getOperand(1))->getVT();
Action = TLI.getOperationAction(Node->getOpcode(), InnerType);
@@ -1097,38 +1105,15 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
return;
}
break;
- case ISD::STRICT_FADD:
- case ISD::STRICT_FSUB:
- case ISD::STRICT_FMUL:
- case ISD::STRICT_FDIV:
- case ISD::STRICT_FREM:
- case ISD::STRICT_FSQRT:
- case ISD::STRICT_FMA:
- case ISD::STRICT_FPOW:
- case ISD::STRICT_FPOWI:
- case ISD::STRICT_FSIN:
- case ISD::STRICT_FCOS:
- case ISD::STRICT_FEXP:
- case ISD::STRICT_FEXP2:
- case ISD::STRICT_FLOG:
- case ISD::STRICT_FLOG10:
- case ISD::STRICT_FLOG2:
- case ISD::STRICT_FRINT:
- case ISD::STRICT_FNEARBYINT:
- case ISD::STRICT_FMAXNUM:
- case ISD::STRICT_FMINNUM:
- case ISD::STRICT_FCEIL:
- case ISD::STRICT_FFLOOR:
- case ISD::STRICT_FROUND:
- case ISD::STRICT_FTRUNC:
- case ISD::STRICT_FP_ROUND:
- case ISD::STRICT_FP_EXTEND:
- // These pseudo-ops get legalized as if they were their non-strict
- // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
- // is also legal, but if ISD::FSQRT requires expansion then so does
- // ISD::STRICT_FSQRT.
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These pseudo-ops are the same as the other STRICT_ ops except
+ // they are registered with setOperationAction() using the input type
+ // instead of the output type.
Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getValueType(0));
+ Node->getOperand(1).getValueType());
break;
case ISD::SADDSAT:
case ISD::UADDSAT:
@@ -1139,7 +1124,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) {
}
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: {
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
@@ -1650,7 +1636,6 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
MVT OpVT = LHS.getSimpleValueType();
ISD::CondCode CCCode = cast<CondCodeSDNode>(CC)->get();
NeedInvert = false;
- bool NeedSwap = false;
switch (TLI.getCondCodeAction(CCCode, OpVT)) {
default: llvm_unreachable("Unknown condition code action!");
case TargetLowering::Legal:
@@ -1664,6 +1649,7 @@ bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS,
return true;
}
// Swapping operands didn't work. Try inverting the condition.
+ bool NeedSwap = false;
InvCC = getSetCCInverse(CCCode, OpVT.isInteger());
if (!TLI.isCondCodeLegalOrCustom(InvCC, OpVT)) {
// If inverting the condition is not enough, try swapping operands
@@ -2021,6 +2007,14 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
return ExpandVectorBuildThroughStack(Node);
}
+SDValue SelectionDAGLegalize::ExpandSPLAT_VECTOR(SDNode *Node) {
+ SDLoc DL(Node);
+ EVT VT = Node->getValueType(0);
+ SDValue SplatVal = Node->getOperand(0);
+
+ return DAG.getSplatBuildVector(VT, DL, SplatVal);
+}
+
// Expand a node into a call to a libcall. If the result value
// does not fit into a register, return the lo part and set the hi part to the
// by-reg argument. If it does fit into a single register, return the result
@@ -2074,12 +2068,12 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
std::pair<SDValue, SDValue> CallInfo = TLI.LowerCallTo(CLI);
if (!CallInfo.second.getNode()) {
- LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump());
+ LLVM_DEBUG(dbgs() << "Created tailcall: "; DAG.getRoot().dump(&DAG));
// It's a tailcall, return the chain (which is the DAG root).
return DAG.getRoot();
}
- LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump());
+ LLVM_DEBUG(dbgs() << "Created libcall: "; CallInfo.first.dump(&DAG));
return CallInfo.first;
}
@@ -2167,6 +2161,9 @@ SDValue SelectionDAGLegalize::ExpandArgFPLibCall(SDNode* Node,
RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_F128,
RTLIB::Libcall Call_PPCF128) {
+ if (Node->isStrictFPOpcode())
+ Node = DAG.mutateStrictFPToFP(Node);
+
RTLIB::Libcall LC;
switch (Node->getOperand(0).getValueType().getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
@@ -2815,6 +2812,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
case ISD::STRICT_FP_ROUND:
+ // This expansion does not honor the "strict" properties anyway,
+ // so prefer falling back to the non-strict operation if legal.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getValueType(0),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2829,6 +2832,12 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
case ISD::STRICT_FP_EXTEND:
+ // This expansion does not honor the "strict" properties anyway,
+ // so prefer falling back to the non-strict operation if legal.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ break;
Tmp1 = EmitStackConvert(Node->getOperand(1),
Node->getOperand(1).getValueType(),
Node->getValueType(0), dl, Node->getOperand(0));
@@ -2873,19 +2882,6 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
Results.push_back(Tmp1);
break;
}
- case ISD::FP_ROUND_INREG: {
- // The only way we can lower this is to turn it into a TRUNCSTORE,
- // EXTLOAD pair, targeting a temporary location (a stack slot).
-
- // NOTE: there is a choice here between constantly creating new stack
- // slots and always reusing the same one. We currently always create
- // new ones, as reuse may inhibit scheduling.
- EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
- Tmp1 = EmitStackConvert(Node->getOperand(0), ExtraVT,
- Node->getValueType(0), dl);
- Results.push_back(Tmp1);
- break;
- }
case ISD::UINT_TO_FP:
if (TLI.expandUINT_TO_FP(Node, Tmp1, DAG)) {
Results.push_back(Tmp1);
@@ -2901,33 +2897,26 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG))
Results.push_back(Tmp1);
break;
+ case ISD::STRICT_FP_TO_SINT:
+ if (TLI.expandFP_TO_SINT(Node, Tmp1, DAG)) {
+ ReplaceNode(Node, Tmp1.getNode());
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_SINT node\n");
+ return true;
+ }
+ break;
case ISD::FP_TO_UINT:
- if (TLI.expandFP_TO_UINT(Node, Tmp1, DAG))
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG))
Results.push_back(Tmp1);
break;
- case ISD::LROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
- RTLIB::LROUND_F64, RTLIB::LROUND_F80,
- RTLIB::LROUND_F128,
- RTLIB::LROUND_PPCF128));
- break;
- case ISD::LLROUND:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
- RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
- RTLIB::LLROUND_F128,
- RTLIB::LLROUND_PPCF128));
- break;
- case ISD::LRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
- RTLIB::LRINT_F64, RTLIB::LRINT_F80,
- RTLIB::LRINT_F128,
- RTLIB::LRINT_PPCF128));
- break;
- case ISD::LLRINT:
- Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
- RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
- RTLIB::LLRINT_F128,
- RTLIB::LLRINT_PPCF128));
+ case ISD::STRICT_FP_TO_UINT:
+ if (TLI.expandFP_TO_UINT(Node, Tmp1, Tmp2, DAG)) {
+ // Relink the chain.
+ DAG.ReplaceAllUsesOfValueWith(SDValue(Node,1), Tmp2);
+ // Replace the new UINT result.
+ ReplaceNodeWithValue(SDValue(Node, 0), Tmp1);
+ LLVM_DEBUG(dbgs() << "Successfully expanded STRICT_FP_TO_UINT node\n");
+ return true;
+ }
break;
case ISD::VAARG:
Results.push_back(DAG.expandVAArg(Node));
@@ -3348,6 +3337,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
Results.push_back(TLI.expandFixedPointMul(Node, DAG));
break;
case ISD::ADDCARRY:
@@ -3662,6 +3652,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
case ISD::BUILD_VECTOR:
Results.push_back(ExpandBUILD_VECTOR(Node));
break;
+ case ISD::SPLAT_VECTOR:
+ Results.push_back(ExpandSPLAT_VECTOR(Node));
+ break;
case ISD::SRA:
case ISD::SRL:
case ISD::SHL: {
@@ -3715,6 +3708,33 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
break;
}
+ if (Results.empty() && Node->isStrictFPOpcode()) {
+ // FIXME: We were asked to expand a strict floating-point operation,
+ // but there is currently no expansion implemented that would preserve
+ // the "strict" properties. For now, we just fall back to the non-strict
+ // version if that is legal on the target. The actual mutation of the
+ // operation will happen in SelectionDAGISel::DoInstructionSelection.
+ switch (Node->getOpcode()) {
+ default:
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal)
+ return true;
+ break;
+ case ISD::STRICT_LRINT:
+ case ISD::STRICT_LLRINT:
+ case ISD::STRICT_LROUND:
+ case ISD::STRICT_LLROUND:
+ // These are registered by the operand type instead of the value
+ // type. Reflect that here.
+ if (TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getOperand(1).getValueType())
+ == TargetLowering::Legal)
+ return true;
+ break;
+ }
+ }
+
// Replace the original node with the legalized result.
if (Results.empty()) {
LLVM_DEBUG(dbgs() << "Cannot expand node\n");
@@ -3956,6 +3976,34 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) {
RTLIB::POW_F80, RTLIB::POW_F128,
RTLIB::POW_PPCF128));
break;
+ case ISD::LROUND:
+ case ISD::STRICT_LROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LROUND_F32,
+ RTLIB::LROUND_F64, RTLIB::LROUND_F80,
+ RTLIB::LROUND_F128,
+ RTLIB::LROUND_PPCF128));
+ break;
+ case ISD::LLROUND:
+ case ISD::STRICT_LLROUND:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLROUND_F32,
+ RTLIB::LLROUND_F64, RTLIB::LLROUND_F80,
+ RTLIB::LLROUND_F128,
+ RTLIB::LLROUND_PPCF128));
+ break;
+ case ISD::LRINT:
+ case ISD::STRICT_LRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LRINT_F32,
+ RTLIB::LRINT_F64, RTLIB::LRINT_F80,
+ RTLIB::LRINT_F128,
+ RTLIB::LRINT_PPCF128));
+ break;
+ case ISD::LLRINT:
+ case ISD::STRICT_LLRINT:
+ Results.push_back(ExpandArgFPLibCall(Node, RTLIB::LLRINT_F32,
+ RTLIB::LLRINT_F64, RTLIB::LLRINT_F80,
+ RTLIB::LLRINT_F128,
+ RTLIB::LLRINT_PPCF128));
+ break;
case ISD::FDIV:
Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64,
RTLIB::DIV_F80, RTLIB::DIV_F128,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index b4849b2881e6..72d052473f11 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -42,10 +42,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT,
}
//===----------------------------------------------------------------------===//
-// Convert Float Results to Integer for Non-HW-supported Operations.
+// Convert Float Results to Integer
//===----------------------------------------------------------------------===//
-bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
+void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
LLVM_DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG);
dbgs() << "\n");
SDValue R = SDValue();
@@ -58,26 +58,18 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
- case ISD::Register:
- case ISD::CopyFromReg:
- case ISD::CopyToReg:
- assert(isLegalInHWReg(N->getValueType(ResNo)) &&
- "Unsupported SoftenFloatRes opcode!");
- // Only when isLegalInHWReg, we can skip check of the operands.
- R = SDValue(N, ResNo);
- break;
case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break;
- case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
- case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break;
+ case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N); break;
case ISD::EXTRACT_VECTOR_ELT:
R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N, ResNo); break;
- case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break;
+ case ISD::FABS: R = SoftenFloatRes_FABS(N); break;
case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break;
case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break;
case ISD::FADD: R = SoftenFloatRes_FADD(N); break;
case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break;
- case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break;
+ case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break;
case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break;
case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break;
case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break;
@@ -89,7 +81,7 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FMA: R = SoftenFloatRes_FMA(N); break;
case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break;
case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break;
- case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break;
+ case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break;
case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break;
case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break;
case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break;
@@ -102,30 +94,24 @@ bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break;
case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break;
case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break;
- case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break;
+ case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break;
case ISD::ATOMIC_SWAP: R = BitcastToInt_ATOMIC_SWAP(N); break;
- case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break;
- case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break;
+ case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break;
+ case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break;
case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break;
case ISD::VAARG: R = SoftenFloatRes_VAARG(N); break;
}
- if (R.getNode() && R.getNode() != N) {
+ // If R is null, the sub-method took care of registering the result.
+ if (R.getNode()) {
+ assert(R.getNode() != N);
SetSoftenedFloat(SDValue(N, ResNo), R);
- // Return true only if the node is changed, assuming that the operands
- // are also converted when necessary.
- return true;
}
-
- // Otherwise, return false to tell caller to scan operands.
- return false;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) {
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
@@ -144,10 +130,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) {
BitConvertToInteger(N->getOperand(1)));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, we can load better from the constant pool.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N) {
ConstantFPSDNode *CN = cast<ConstantFPSDNode>(N);
// In ppcf128, the high 64 bits are always first in memory regardless
// of Endianness. LLVM's APFloat representation is not Endian sensitive,
@@ -172,19 +155,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) {
}
SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, keep the extracted value in register.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0));
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N),
NewOp.getValueType().getVectorElementType(),
NewOp, N->getOperand(1));
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, FABS can be implemented as native bitwise operations.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned Size = NVT.getSizeInBits();
@@ -200,57 +177,69 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMINNUM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMIN_F32,
RTLIB::FMIN_F64,
RTLIB::FMIN_F80,
RTLIB::FMIN_F128,
RTLIB::FMIN_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMAXNUM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMAX_F32,
RTLIB::FMAX_F64,
RTLIB::FMAX_F80,
RTLIB::FMAX_F128,
RTLIB::FMAX_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::ADD_F32,
RTLIB::ADD_F64,
RTLIB::ADD_F80,
RTLIB::ADD_F128,
RTLIB::ADD_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::CEIL_F32,
RTLIB::CEIL_F64,
RTLIB::CEIL_F80,
RTLIB::CEIL_F128,
RTLIB::CEIL_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(0));
SDValue RHS = BitConvertToInteger(N->getOperand(1));
SDLoc dl(N);
@@ -301,98 +290,123 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::COS_F32,
RTLIB::COS_F64,
RTLIB::COS_F80,
RTLIB::COS_F128,
RTLIB::COS_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::EXP_F32,
RTLIB::EXP_F64,
RTLIB::EXP_F80,
RTLIB::EXP_F128,
RTLIB::EXP_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::EXP2_F32,
RTLIB::EXP2_F64,
RTLIB::EXP2_F80,
RTLIB::EXP2_F128,
RTLIB::EXP2_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FLOOR_F32,
RTLIB::FLOOR_F64,
RTLIB::FLOOR_F80,
RTLIB::FLOOR_F128,
RTLIB::FLOOR_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::LOG_F32,
RTLIB::LOG_F64,
RTLIB::LOG_F80,
RTLIB::LOG_F128,
RTLIB::LOG_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::LOG2_F32,
RTLIB::LOG2_F64,
RTLIB::LOG2_F80,
RTLIB::LOG2_F128,
RTLIB::LOG2_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::LOG10_F32,
RTLIB::LOG10_F64,
RTLIB::LOG10_F80,
RTLIB::LOG10_F128,
RTLIB::LOG10_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
@@ -400,48 +414,57 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) {
SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)),
GetSoftenedFloat(N->getOperand(2)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[3] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType(),
+ N->getOperand(2).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::NEARBYINT_F32,
RTLIB::NEARBYINT_F64,
RTLIB::NEARBYINT_F80,
RTLIB::NEARBYINT_F128,
RTLIB::NEARBYINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
- // When LegalInHWReg, FNEG can be implemented as native bitwise operations.
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDLoc dl(N);
- EVT FloatVT = N->getValueType(ResNo);
+ EVT FloatVT = N->getValueType(0);
if (FloatVT == MVT::f32 || FloatVT == MVT::f64 || FloatVT == MVT::f128) {
// Expand Y = FNEG(X) -> Y = X ^ sign mask
APInt SignMask = APInt::getSignMask(NVT.getSizeInBits());
@@ -452,13 +475,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) {
// Expand Y = FNEG(X) -> Y = SUB -0.0, X
SDValue Ops[2] = { DAG.getConstantFP(-0.0, dl, N->getValueType(0)),
GetSoftenedFloat(N->getOperand(0)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, false, dl).first;
+ NVT, Ops, CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
@@ -485,7 +509,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
}
// FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special
@@ -493,15 +520,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) {
SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP(SDNode *N) {
EVT MidVT = TLI.getTypeToTransformTo(*DAG.getContext(), MVT::f32);
SDValue Op = N->getOperand(0);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
SDValue Res32 = TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, MidVT, Op,
- false, SDLoc(N)).first;
+ CallOptions, SDLoc(N)).first;
if (N->getValueType(0) == MVT::f32)
return Res32;
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
RTLIB::Libcall LC = RTLIB::getFPEXT(MVT::f32, N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!");
- return TLI.makeLibCall(DAG, LC, NVT, Res32, false, SDLoc(N)).first;
+ return TLI.makeLibCall(DAG, LC, NVT, Res32, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
@@ -515,20 +545,27 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0));
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!");
- return TLI.makeLibCall(DAG, LC, NVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::POW_F32,
RTLIB::POW_F64,
RTLIB::POW_F80,
RTLIB::POW_F128,
RTLIB::POW_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
@@ -536,87 +573,111 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) {
"Unsupported power type!");
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::POWI_F32,
RTLIB::POWI_F64,
RTLIB::POWI_F80,
RTLIB::POWI_F128,
RTLIB::POWI_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::REM_F32,
RTLIB::REM_F64,
RTLIB::REM_F80,
RTLIB::REM_F128,
RTLIB::REM_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::RINT_F32,
RTLIB::RINT_F64,
RTLIB::RINT_F80,
RTLIB::RINT_F128,
RTLIB::RINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::ROUND_F32,
RTLIB::ROUND_F64,
RTLIB::ROUND_F80,
RTLIB::ROUND_F128,
RTLIB::ROUND_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SIN_F32,
RTLIB::SIN_F64,
RTLIB::SIN_F80,
RTLIB::SIN_F128,
RTLIB::SIN_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SQRT_F32,
RTLIB::SQRT_F64,
RTLIB::SQRT_F80,
RTLIB::SQRT_F128,
RTLIB::SQRT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)),
GetSoftenedFloat(N->getOperand(1)) };
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { N->getOperand(0).getValueType(),
+ N->getOperand(1).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- NVT, Ops, false, SDLoc(N)).first;
+ NVT, Ops, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
@@ -625,17 +686,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) {
return DAG.getNode(ISD::FP_TO_FP16, SDLoc(N), NVT, N->getOperand(0));
SDValue Op = GetSoftenedFloat(N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::TRUNC_F32,
RTLIB::TRUNC_F64,
RTLIB::TRUNC_F80,
RTLIB::TRUNC_F128,
RTLIB::TRUNC_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
- bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo));
+SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
LoadSDNode *L = cast<LoadSDNode>(N);
EVT VT = N->getValueType(0);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
@@ -666,23 +729,17 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) {
// use the new one.
ReplaceValueWith(SDValue(N, 1), NewL.getValue(1));
auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL);
- if (LegalInHWReg)
- return ExtendNode;
return BitConvertToInteger(ExtendNode);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) {
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(1));
SDValue RHS = GetSoftenedFloat(N->getOperand(2));
return DAG.getSelect(SDLoc(N),
LHS.getValueType(), N->getOperand(0), LHS, RHS);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) {
- if (isLegalInHWReg(N->getValueType(ResNo)))
- return SDValue(N, ResNo);
+SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) {
SDValue LHS = GetSoftenedFloat(N->getOperand(2));
SDValue RHS = GetSoftenedFloat(N->getOperand(3));
return DAG.getNode(ISD::SELECT_CC, SDLoc(N),
@@ -736,14 +793,18 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) {
// Sign/zero extend the argument if the libcall takes a larger type.
SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl,
NVT, N->getOperand(0));
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(Signed);
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, LC,
TLI.getTypeToTransformTo(*DAG.getContext(), RVT),
- Op, Signed, dl).first;
+ Op, CallOptions, dl).first;
}
//===----------------------------------------------------------------------===//
-// Convert Float Operand to Integer for Non-HW-supported Operations.
+// Convert Float Operand to Integer
//===----------------------------------------------------------------------===//
bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
@@ -753,8 +814,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
switch (N->getOpcode()) {
default:
- if (CanSkipSoftenFloatOperand(N, OpNo))
- return false;
#ifndef NDEBUG
dbgs() << "SoftenFloatOperand Op #" << OpNo << ": ";
N->dump(&DAG); dbgs() << "\n";
@@ -762,11 +821,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
llvm_unreachable("Do not know how to soften this operator's operand!");
case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
- case ISD::CopyToReg: Res = SoftenFloatOp_COPY_TO_REG(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
- case ISD::FABS: Res = SoftenFloatOp_FABS(N); break;
- case ISD::FCOPYSIGN: Res = SoftenFloatOp_FCOPYSIGN(N); break;
- case ISD::FNEG: Res = SoftenFloatOp_FNEG(N); break;
case ISD::FP_EXTEND: Res = SoftenFloatOp_FP_EXTEND(N); break;
case ISD::FP_TO_FP16: // Same as FP_ROUND for softening purposes
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
@@ -776,19 +831,9 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
case ISD::LLROUND: Res = SoftenFloatOp_LLROUND(N); break;
case ISD::LRINT: Res = SoftenFloatOp_LRINT(N); break;
case ISD::LLRINT: Res = SoftenFloatOp_LLRINT(N); break;
- case ISD::SELECT: Res = SoftenFloatOp_SELECT(N); break;
case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break;
case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break;
- case ISD::STORE:
- Res = SoftenFloatOp_STORE(N, OpNo);
- // Do not try to analyze or soften this node again if the value is
- // or can be held in a register. In that case, Res.getNode() should
- // be equal to N.
- if (Res.getNode() == N &&
- isLegalInHWReg(N->getOperand(OpNo).getValueType()))
- return false;
- // Otherwise, we need to reanalyze and lower the new Res nodes.
- break;
+ case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -800,60 +845,16 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
return true;
assert(Res.getValueType() == N->getValueType(0) && N->getNumValues() == 1 &&
- "Invalid operand expansion");
+ "Invalid operand promotion");
ReplaceValueWith(SDValue(N, 0), Res);
return false;
}
-bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) {
- if (!isLegalInHWReg(N->getOperand(OpNo).getValueType()))
- return false;
-
- // When the operand type can be kept in registers there is nothing to do for
- // the following opcodes.
- switch (N->getOperand(OpNo).getOpcode()) {
- case ISD::BITCAST:
- case ISD::ConstantFP:
- case ISD::CopyFromReg:
- case ISD::CopyToReg:
- case ISD::FABS:
- case ISD::FCOPYSIGN:
- case ISD::FNEG:
- case ISD::Register:
- case ISD::SELECT:
- case ISD::SELECT_CC:
- return true;
- }
-
- switch (N->getOpcode()) {
- case ISD::ConstantFP: // Leaf node.
- case ISD::CopyFromReg: // Operand is a register that we know to be left
- // unchanged by SoftenFloatResult().
- case ISD::Register: // Leaf node.
- return true;
- }
- return false;
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
- return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0),
- GetSoftenedFloat(N->getOperand(0)));
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatOp_COPY_TO_REG(SDNode *N) {
- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
- SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
-
- if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
- return SDValue();
-
- if (N->getNumOperands() == 3)
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2), 0);
+ SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2,
- N->getOperand(3)),
- 0);
+ return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0);
}
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
@@ -868,7 +869,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_EXTEND(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPEXT(SVT, RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND libcall");
- return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
}
@@ -885,7 +889,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- return TLI.makeLibCall(DAG, LC, RVT, Op, false, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ return TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
@@ -895,7 +902,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
EVT VT = NewLHS.getValueType();
NewLHS = GetSoftenedFloat(NewLHS);
NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(2), N->getOperand(3));
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -911,34 +919,6 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) {
0);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_FABS(SDNode *N) {
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
-
- if (Op == N->getOperand(0))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatOp_FCOPYSIGN(SDNode *N) {
- SDValue Op0 = GetSoftenedFloat(N->getOperand(0));
- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
-
- if (Op0 == N->getOperand(0) && Op1 == N->getOperand(1))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, Op0, Op1), 0);
-}
-
-SDValue DAGTypeLegalizer::SoftenFloatOp_FNEG(SDNode *N) {
- SDValue Op = GetSoftenedFloat(N->getOperand(0));
-
- if (Op == N->getOperand(0))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, Op), 0);
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
bool Signed = N->getOpcode() == ISD::FP_TO_SINT;
EVT SVT = N->getOperand(0).getValueType();
@@ -962,23 +942,15 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) {
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!");
SDValue Op = GetSoftenedFloat(N->getOperand(0));
- SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, false, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
+ SDValue Res = TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl).first;
// Truncate the result if the libcall returns a larger type.
return DAG.getNode(ISD::TRUNCATE, dl, RVT, Res);
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT(SDNode *N) {
- SDValue Op1 = GetSoftenedFloat(N->getOperand(1));
- SDValue Op2 = GetSoftenedFloat(N->getOperand(2));
-
- if (Op1 == N->getOperand(1) && Op2 == N->getOperand(2))
- return SDValue();
-
- return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Op1, Op2),
- 0);
-}
-
SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1);
ISD::CondCode CCCode = cast<CondCodeSDNode>(N->getOperand(4))->get();
@@ -986,7 +958,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) {
EVT VT = NewLHS.getValueType();
NewLHS = GetSoftenedFloat(NewLHS);
NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(0), N->getOperand(1));
// If softenSetCCOperands returned a scalar, we need to compare the result
// against zero to select between true and false values.
@@ -1009,7 +982,8 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) {
EVT VT = NewLHS.getValueType();
NewLHS = GetSoftenedFloat(NewLHS);
NewRHS = GetSoftenedFloat(NewRHS);
- TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N));
+ TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N),
+ N->getOperand(0), N->getOperand(1));
// If softenSetCCOperands returned a scalar, use it.
if (!NewRHS.getNode()) {
@@ -1047,13 +1021,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LROUND(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LROUND_F32,
RTLIB::LROUND_F64,
RTLIB::LROUND_F80,
RTLIB::LROUND_F128,
RTLIB::LROUND_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
@@ -1061,13 +1038,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLROUND(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLROUND_F32,
RTLIB::LLROUND_F64,
RTLIB::LLROUND_F80,
RTLIB::LLROUND_F128,
RTLIB::LLROUND_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
@@ -1075,13 +1055,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LRINT(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LRINT_F32,
RTLIB::LRINT_F64,
RTLIB::LRINT_F80,
RTLIB::LRINT_F128,
RTLIB::LRINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
@@ -1089,13 +1072,16 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_LLRINT(SDNode *N) {
SDValue Op = GetSoftenedFloat(N->getOperand(0));
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[1] = { N->getOperand(0).getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, N->getValueType(0), true);
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLRINT_F32,
RTLIB::LLRINT_F64,
RTLIB::LLRINT_F80,
RTLIB::LLRINT_F128,
RTLIB::LLRINT_PPCF128),
- NVT, Op, false, SDLoc(N)).first;
+ NVT, Op, CallOptions, SDLoc(N)).first;
}
//===----------------------------------------------------------------------===//
@@ -1267,13 +1253,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::DIV_F32,
RTLIB::DIV_F64,
RTLIB::DIV_F80,
RTLIB::DIV_F128,
RTLIB::DIV_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1341,13 +1328,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::FMA_F32,
RTLIB::FMA_F64,
RTLIB::FMA_F80,
RTLIB::FMA_F128,
RTLIB::FMA_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1355,13 +1343,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo,
void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::MUL_F32,
RTLIB::MUL_F64,
RTLIB::MUL_F80,
RTLIB::MUL_F128,
RTLIB::MUL_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1470,13 +1459,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N,
void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
+ TargetLowering::MakeLibCallOptions CallOptions;
SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0),
RTLIB::SUB_F32,
RTLIB::SUB_F64,
RTLIB::SUB_F80,
RTLIB::SUB_F128,
RTLIB::SUB_PPCF128),
- N->getValueType(0), Ops, false,
+ N->getValueType(0), Ops, CallOptions,
SDLoc(N)).first;
GetPairElements(Call, Lo, Hi);
}
@@ -1555,7 +1545,9 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo,
}
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!");
- Hi = TLI.makeLibCall(DAG, LC, VT, Src, true, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ Hi = TLI.makeLibCall(DAG, LC, VT, Src, CallOptions, dl).first;
GetPairElements(Hi, Lo, Hi);
}
@@ -1732,7 +1724,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!");
- return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), false, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ return TLI.makeLibCall(DAG, LC, RVT, N->getOperand(0), CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
@@ -1741,8 +1734,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!");
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, LC, N->getValueType(0), N->getOperand(0),
- false, dl).first;
+ CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) {
@@ -1807,49 +1801,53 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::ExpandFloatOp_LROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LROUND_F32,
RTLIB::LROUND_F64,
RTLIB::LROUND_F80,
RTLIB::LROUND_F128,
RTLIB::LROUND_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_LLROUND(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLROUND_F32,
RTLIB::LLROUND_F64,
RTLIB::LLROUND_F80,
RTLIB::LLROUND_F128,
RTLIB::LLROUND_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_LRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LRINT_F32,
RTLIB::LRINT_F64,
RTLIB::LRINT_F80,
RTLIB::LRINT_F128,
RTLIB::LRINT_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandFloatOp_LLRINT(SDNode *N) {
EVT RVT = N->getValueType(0);
EVT RetVT = N->getOperand(0).getValueType().getSimpleVT().SimpleTy;
+ TargetLowering::MakeLibCallOptions CallOptions;
return TLI.makeLibCall(DAG, GetFPLibCall(RetVT,
RTLIB::LLRINT_F32,
RTLIB::LLRINT_F64,
RTLIB::LLRINT_F80,
RTLIB::LLRINT_F128,
RTLIB::LLRINT_PPCF128),
- RVT, N->getOperand(0), false, SDLoc(N)).first;
+ RVT, N->getOperand(0), CallOptions, SDLoc(N)).first;
}
//===----------------------------------------------------------------------===//
@@ -2002,6 +2000,12 @@ void DAGTypeLegalizer::PromoteFloatResult(SDNode *N, unsigned ResNo) {
dbgs() << "\n");
SDValue R = SDValue();
+ // See if the target wants to custom expand this node.
+ if (CustomLowerNode(N, N->getValueType(ResNo), true)) {
+ LLVM_DEBUG(dbgs() << "Node has been custom expanded, done\n");
+ return;
+ }
+
switch (N->getOpcode()) {
// These opcodes cannot appear if promotion of FP16 is done in the backend
// instead of Clang
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 15ac45c37c66..d5c1b539adbd 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -100,6 +100,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_BUILD_VECTOR(N); break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntRes_SPLAT_VECTOR(N); break;
case ISD::CONCAT_VECTORS:
Res = PromoteIntRes_CONCAT_VECTORS(N); break;
@@ -112,6 +114,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND: Res = PromoteIntRes_INT_EXTEND(N); break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT: Res = PromoteIntRes_FP_TO_XINT(N); break;
@@ -148,9 +152,12 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UADDSAT:
case ISD::SSUBSAT:
case ISD::USUBSAT: Res = PromoteIntRes_ADDSUBSAT(N); break;
+
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: Res = PromoteIntRes_MULFIX(N); break;
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: Res = PromoteIntRes_MULFIX(N); break;
+
case ISD::ABS: Res = PromoteIntRes_ABS(N); break;
case ISD::ATOMIC_LOAD:
@@ -494,7 +501,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
TLI.isOperationLegalOrCustom(ISD::FP_TO_SINT, NVT))
NewOpc = ISD::FP_TO_SINT;
- SDValue Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
+ if (N->getOpcode() == ISD::STRICT_FP_TO_UINT &&
+ !TLI.isOperationLegal(ISD::STRICT_FP_TO_UINT, NVT) &&
+ TLI.isOperationLegalOrCustom(ISD::STRICT_FP_TO_SINT, NVT))
+ NewOpc = ISD::STRICT_FP_TO_SINT;
+
+ SDValue Res;
+ if (N->isStrictFPOpcode()) {
+ Res = DAG.getNode(NewOpc, dl, { NVT, MVT::Other },
+ { N->getOperand(0), N->getOperand(1) });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ } else
+ Res = DAG.getNode(NewOpc, dl, NVT, N->getOperand(0));
// Assert that the converted value fits in the original type. If it doesn't
// (eg: because the value being converted is too big), then the result of the
@@ -503,7 +523,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) {
// NOTE: fp-to-uint to fp-to-sint promotion guarantees zero extend. For example:
// before legalization: fp-to-uint16, 65534. -> 0xfffe
// after legalization: fp-to-sint32, 65534. -> 0x0000fffe
- return DAG.getNode(N->getOpcode() == ISD::FP_TO_UINT ?
+ return DAG.getNode((N->getOpcode() == ISD::FP_TO_UINT ||
+ N->getOpcode() == ISD::STRICT_FP_TO_UINT) ?
ISD::AssertZext : ISD::AssertSext, dl, NVT, Res,
DAG.getValueType(N->getValueType(0).getScalarType()));
}
@@ -590,7 +611,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MGATHER(MaskedGatherSDNode *N) {
N->getIndex(), N->getScale() };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(NVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
- N->getMemOperand());
+ N->getMemOperand(), N->getIndexType());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -623,48 +644,84 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) {
}
SDValue DAGTypeLegalizer::PromoteIntRes_ADDSUBSAT(SDNode *N) {
- // For promoting iN -> iM, this can be expanded by
- // 1. ANY_EXTEND iN to iM
- // 2. SHL by M-N
- // 3. [US][ADD|SUB]SAT
- // 4. L/ASHR by M-N
+ // If the promoted type is legal, we can convert this to:
+ // 1. ANY_EXTEND iN to iM
+ // 2. SHL by M-N
+ // 3. [US][ADD|SUB]SAT
+ // 4. L/ASHR by M-N
+ // Else it is more efficient to convert this to a min and a max
+ // operation in the higher precision arithmetic.
SDLoc dl(N);
SDValue Op1 = N->getOperand(0);
SDValue Op2 = N->getOperand(1);
unsigned OldBits = Op1.getScalarValueSizeInBits();
unsigned Opcode = N->getOpcode();
- unsigned ShiftOp;
- switch (Opcode) {
- case ISD::SADDSAT:
- case ISD::SSUBSAT:
- ShiftOp = ISD::SRA;
- break;
- case ISD::UADDSAT:
- case ISD::USUBSAT:
- ShiftOp = ISD::SRL;
- break;
- default:
- llvm_unreachable("Expected opcode to be signed or unsigned saturation "
- "addition or subtraction");
- }
-
- SDValue Op1Promoted = GetPromotedInteger(Op1);
- SDValue Op2Promoted = GetPromotedInteger(Op2);
+ SDValue Op1Promoted, Op2Promoted;
+ if (Opcode == ISD::UADDSAT || Opcode == ISD::USUBSAT) {
+ Op1Promoted = ZExtPromotedInteger(Op1);
+ Op2Promoted = ZExtPromotedInteger(Op2);
+ } else {
+ Op1Promoted = SExtPromotedInteger(Op1);
+ Op2Promoted = SExtPromotedInteger(Op2);
+ }
EVT PromotedType = Op1Promoted.getValueType();
unsigned NewBits = PromotedType.getScalarSizeInBits();
- unsigned SHLAmount = NewBits - OldBits;
- EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
- SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
- Op1Promoted =
- DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
- Op2Promoted =
- DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
- SDValue Result =
- DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
- return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+ if (TLI.isOperationLegalOrCustom(Opcode, PromotedType)) {
+ unsigned ShiftOp;
+ switch (Opcode) {
+ case ISD::SADDSAT:
+ case ISD::SSUBSAT:
+ ShiftOp = ISD::SRA;
+ break;
+ case ISD::UADDSAT:
+ case ISD::USUBSAT:
+ ShiftOp = ISD::SRL;
+ break;
+ default:
+ llvm_unreachable("Expected opcode to be signed or unsigned saturation "
+ "addition or subtraction");
+ }
+
+ unsigned SHLAmount = NewBits - OldBits;
+ EVT SHVT = TLI.getShiftAmountTy(PromotedType, DAG.getDataLayout());
+ SDValue ShiftAmount = DAG.getConstant(SHLAmount, dl, SHVT);
+ Op1Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op1Promoted, ShiftAmount);
+ Op2Promoted =
+ DAG.getNode(ISD::SHL, dl, PromotedType, Op2Promoted, ShiftAmount);
+
+ SDValue Result =
+ DAG.getNode(Opcode, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ShiftOp, dl, PromotedType, Result, ShiftAmount);
+ } else {
+ if (Opcode == ISD::USUBSAT) {
+ SDValue Max =
+ DAG.getNode(ISD::UMAX, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::SUB, dl, PromotedType, Max, Op2Promoted);
+ }
+
+ if (Opcode == ISD::UADDSAT) {
+ APInt MaxVal = APInt::getAllOnesValue(OldBits).zext(NewBits);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Add =
+ DAG.getNode(ISD::ADD, dl, PromotedType, Op1Promoted, Op2Promoted);
+ return DAG.getNode(ISD::UMIN, dl, PromotedType, Add, SatMax);
+ }
+
+ unsigned AddOp = Opcode == ISD::SADDSAT ? ISD::ADD : ISD::SUB;
+ APInt MinVal = APInt::getSignedMinValue(OldBits).sext(NewBits);
+ APInt MaxVal = APInt::getSignedMaxValue(OldBits).sext(NewBits);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, PromotedType);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, PromotedType);
+ SDValue Result =
+ DAG.getNode(AddOp, dl, PromotedType, Op1Promoted, Op2Promoted);
+ Result = DAG.getNode(ISD::SMIN, dl, PromotedType, Result, SatMax);
+ Result = DAG.getNode(ISD::SMAX, dl, PromotedType, Result, SatMin);
+ return Result;
+ }
}
SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
@@ -673,6 +730,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
SDValue Op1Promoted, Op2Promoted;
bool Signed =
N->getOpcode() == ISD::SMULFIX || N->getOpcode() == ISD::SMULFIXSAT;
+ bool Saturating =
+ N->getOpcode() == ISD::SMULFIXSAT || N->getOpcode() == ISD::UMULFIXSAT;
if (Signed) {
Op1Promoted = SExtPromotedInteger(N->getOperand(0));
Op2Promoted = SExtPromotedInteger(N->getOperand(1));
@@ -685,7 +744,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MULFIX(SDNode *N) {
unsigned DiffSize =
PromotedType.getScalarSizeInBits() - OldType.getScalarSizeInBits();
- bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
if (Saturating) {
// Promoting the operand and result values changes the saturation width,
// which is extends the values that we clamp to on saturation. This could be
@@ -1110,6 +1168,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
Res = PromoteIntOp_INSERT_VECTOR_ELT(N, OpNo);break;
case ISD::SCALAR_TO_VECTOR:
Res = PromoteIntOp_SCALAR_TO_VECTOR(N); break;
+ case ISD::SPLAT_VECTOR:
+ Res = PromoteIntOp_SPLAT_VECTOR(N); break;
case ISD::VSELECT:
case ISD::SELECT: Res = PromoteIntOp_SELECT(N, OpNo); break;
case ISD::SELECT_CC: Res = PromoteIntOp_SELECT_CC(N, OpNo); break;
@@ -1148,7 +1208,8 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: Res = PromoteIntOp_MULFIX(N); break;
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: Res = PromoteIntOp_MULFIX(N); break;
case ISD::FPOWI: Res = PromoteIntOp_FPOWI(N); break;
@@ -1339,6 +1400,13 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) {
GetPromotedInteger(N->getOperand(0))), 0);
}
+SDValue DAGTypeLegalizer::PromoteIntOp_SPLAT_VECTOR(SDNode *N) {
+ // Integer SPLAT_VECTOR operands are implicitly truncated, so just promote the
+ // operand in place.
+ return SDValue(
+ DAG.UpdateNodeOperands(N, GetPromotedInteger(N->getOperand(0))), 0);
+}
+
SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) {
assert(OpNo == 0 && "Only know how to promote the condition!");
SDValue Cond = N->getOperand(0);
@@ -1454,8 +1522,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MGATHER(MaskedGatherSDNode *N,
EVT DataVT = N->getValueType(0);
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
} else if (OpNo == 4) {
- // Need to sign extend the index since the bits will likely be used.
- NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
@@ -1470,8 +1542,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MSCATTER(MaskedScatterSDNode *N,
EVT DataVT = N->getValue().getValueType();
NewOps[OpNo] = PromoteTargetBoolean(N->getOperand(OpNo), DataVT);
} else if (OpNo == 4) {
- // Need to sign extend the index since the bits will likely be used.
- NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ // The Index
+ if (N->isIndexSigned())
+ // Need to sign extend the index since the bits will likely be used.
+ NewOps[OpNo] = SExtPromotedInteger(N->getOperand(OpNo));
+ else
+ NewOps[OpNo] = ZExtPromotedInteger(N->getOperand(OpNo));
} else
NewOps[OpNo] = GetPromotedInteger(N->getOperand(OpNo));
return SDValue(DAG.UpdateNodeOperands(N, NewOps), 0);
@@ -1715,7 +1791,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: ExpandIntRes_MULFIX(N, Lo, Hi); break;
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: ExpandIntRes_MULFIX(N, Lo, Hi); break;
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
@@ -2473,7 +2550,9 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2488,7 +2567,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo,
RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT);
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, false/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2514,7 +2594,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLROUND(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT RetVT = N->getValueType(0);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2540,7 +2622,9 @@ void DAGTypeLegalizer::ExpandIntRes_LLRINT(SDNode *N, SDValue &Lo,
SDLoc dl(N);
EVT RetVT = N->getValueType(0);
- SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, RetVT, Op, CallOptions, dl).first,
Lo, Hi);
}
@@ -2743,7 +2827,9 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N,
}
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true/*irrelevant*/, dl).first,
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first,
Lo, Hi);
}
@@ -2777,38 +2863,53 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
uint64_t Scale = N->getConstantOperandVal(2);
- bool Saturating = N->getOpcode() == ISD::SMULFIXSAT;
- EVT BoolVT = getSetCCResultType(VT);
- SDValue Zero = DAG.getConstant(0, dl, VT);
+ bool Saturating = (N->getOpcode() == ISD::SMULFIXSAT ||
+ N->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (N->getOpcode() == ISD::SMULFIX ||
+ N->getOpcode() == ISD::SMULFIXSAT);
+
+ // Handle special case when scale is equal to zero.
if (!Scale) {
SDValue Result;
if (!Saturating) {
Result = DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
} else {
- Result = DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ EVT BoolVT = getSetCCResultType(VT);
+ unsigned MulOp = Signed ? ISD::SMULO : ISD::UMULO;
+ Result = DAG.getNode(MulOp, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue Product = Result.getValue(0);
SDValue Overflow = Result.getValue(1);
-
- APInt MinVal = APInt::getSignedMinValue(VTSize);
- APInt MaxVal = APInt::getSignedMaxValue(VTSize);
- SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
- SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
- SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
- Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
- Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ if (Signed) {
+ APInt MinVal = APInt::getSignedMinValue(VTSize);
+ APInt MaxVal = APInt::getSignedMaxValue(VTSize);
+ SDValue SatMin = DAG.getConstant(MinVal, dl, VT);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
+ Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
+ Result = DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else {
+ // For unsigned multiplication, we only need to check the max since we
+ // can't really overflow towards zero.
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ Result = DAG.getSelect(dl, VT, Overflow, SatMax, Product);
+ }
}
SplitInteger(Result, Lo, Hi);
return;
}
+ // For SMULFIX[SAT] we only expect to find Scale<VTSize, but this assert will
+ // cover for unhandled cases below, while still being valid for UMULFIX[SAT].
+ assert(Scale <= VTSize && "Scale can't be larger than the value type size.");
+
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue LL, LH, RL, RH;
GetExpandedInteger(LHS, LL, LH);
GetExpandedInteger(RHS, RL, RH);
SmallVector<SDValue, 4> Result;
- bool Signed = (N->getOpcode() == ISD::SMULFIX ||
- N->getOpcode() == ISD::SMULFIXSAT);
unsigned LoHiOp = Signed ? ISD::SMUL_LOHI : ISD::UMUL_LOHI;
if (!TLI.expandMUL_LOHI(LoHiOp, VT, dl, LHS, RHS, Result, NVT, DAG,
TargetLowering::MulExpansionKind::OnlyLegalOrCustom,
@@ -2822,19 +2923,9 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
"the size of the current value type");
EVT ShiftTy = TLI.getShiftAmountTy(NVT, DAG.getDataLayout());
- // Shift whole amount by scale.
- SDValue ResultLL = Result[0];
- SDValue ResultLH = Result[1];
- SDValue ResultHL = Result[2];
- SDValue ResultHH = Result[3];
-
- SDValue SatMax, SatMin;
- SDValue NVTZero = DAG.getConstant(0, dl, NVT);
- SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
- EVT BoolNVT = getSetCCResultType(NVT);
-
- // After getting the multplication result in 4 parts, we need to perform a
+ // After getting the multiplication result in 4 parts, we need to perform a
// shift right by the amount of the scale to get the result in that scale.
+ //
// Let's say we multiply 2 64 bit numbers. The resulting value can be held in
// 128 bits that are cut into 4 32-bit parts:
//
@@ -2846,123 +2937,135 @@ void DAGTypeLegalizer::ExpandIntRes_MULFIX(SDNode *N, SDValue &Lo,
//
// |NVTSize-|
//
- // The resulting Lo and Hi will only need to be one of these 32-bit parts
- // after shifting.
+ // The resulting Lo and Hi would normally be in LL and LH after the shift. But
+ // to avoid unneccessary shifting of all 4 parts, we can adjust the shift
+ // amount and get Lo and Hi using two funnel shifts. Or for the special case
+ // when Scale is a multiple of NVTSize we can just pick the result without
+ // shifting.
+ uint64_t Part0 = Scale / NVTSize; // Part holding lowest bit needed.
+ if (Scale % NVTSize) {
+ SDValue ShiftAmount = DAG.getConstant(Scale % NVTSize, dl, ShiftTy);
+ Lo = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 1], Result[Part0],
+ ShiftAmount);
+ Hi = DAG.getNode(ISD::FSHR, dl, NVT, Result[Part0 + 2], Result[Part0 + 1],
+ ShiftAmount);
+ } else {
+ Lo = Result[Part0];
+ Hi = Result[Part0 + 1];
+ }
+
+ // Unless saturation is requested we are done. The result is in <Hi,Lo>.
+ if (!Saturating)
+ return;
+
+ // Can not overflow when there is no integer part.
+ if (Scale == VTSize)
+ return;
+
+ // To handle saturation we must check for overflow in the multiplication.
+ //
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of Result)
+ // aren't all zeroes.
+ //
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of Result)
+ // aren't all ones or all zeroes.
+ //
+ // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
+ // highest bit of HH determines saturation direction in the event of signed
+ // saturation.
+
+ SDValue ResultHL = Result[2];
+ SDValue ResultHH = Result[3];
+
+ SDValue SatMax, SatMin;
+ SDValue NVTZero = DAG.getConstant(0, dl, NVT);
+ SDValue NVTNeg1 = DAG.getConstant(-1, dl, NVT);
+ EVT BoolNVT = getSetCCResultType(NVT);
+
+ if (!Signed) {
+ if (Scale < NVTSize) {
+ // Overflow happened if ((HH | (HL >> Scale)) != 0).
+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getConstant(Scale, dl, ShiftTy));
+ SDValue Tmp = DAG.getNode(ISD::OR, dl, NVT, HLAdjusted, ResultHH);
+ SatMax = DAG.getSetCC(dl, BoolNVT, Tmp, NVTZero, ISD::SETNE);
+ } else if (Scale == NVTSize) {
+ // Overflow happened if (HH != 0).
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETNE);
+ } else if (Scale < VTSize) {
+ // Overflow happened if ((HH >> (Scale - NVTSize)) != 0).
+ SDValue HLAdjusted = DAG.getNode(ISD::SRL, dl, NVT, ResultHL,
+ DAG.getConstant(Scale - NVTSize, dl,
+ ShiftTy));
+ SatMax = DAG.getSetCC(dl, BoolNVT, HLAdjusted, NVTZero, ISD::SETNE);
+ } else
+ llvm_unreachable("Scale must be less or equal to VTSize for UMULFIXSAT"
+ "(and saturation can't happen with Scale==VTSize).");
+
+ Hi = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, NVTNeg1, Lo);
+ return;
+ }
+
if (Scale < NVTSize) {
- // If the scale is less than the size of the VT we expand to, the Hi and
- // Lo of the result will be in the first 2 parts of the result after
- // shifting right. This only requires shifting by the scale as far as the
- // third part in the result (ResultHL).
- SDValue SRLAmnt = DAG.getConstant(Scale, dl, ShiftTy);
- SDValue SHLAmnt = DAG.getConstant(NVTSize - Scale, dl, ShiftTy);
- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLL, SRLAmnt);
- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
- DAG.getNode(ISD::SHL, dl, NVT, ResultLH, SHLAmnt));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
-
- // We cannot overflow past HH when multiplying 2 ints of size VTSize, so the
- // highest bit of HH determines saturation direction in the event of
- // saturation.
// The number of overflow bits we can check are VTSize - Scale + 1 (we
// include the sign bit). If these top bits are > 0, then we overflowed past
// the max value. If these top bits are < -1, then we overflowed past the
// min value. Otherwise, we did not overflow.
- if (Saturating) {
- unsigned OverflowBits = VTSize - Scale + 1;
- assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
- "Extent of overflow bits must start within HL");
- SDValue HLHiMask = DAG.getConstant(
- APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
- SDValue HLLoMask = DAG.getConstant(
- APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
-
- // HH > 0 or HH == 0 && HL > HLLoMask
- SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
- SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
- SDValue HLPos =
- DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
- SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLPos));
-
- // HH < -1 or HH == -1 && HL < HLHiMask
- SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
- SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
- SDValue HLNeg =
- DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
- SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLNeg));
- }
+ unsigned OverflowBits = VTSize - Scale + 1;
+ assert(OverflowBits <= VTSize && OverflowBits > NVTSize &&
+ "Extent of overflow bits must start within HL");
+ SDValue HLHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits - NVTSize), dl, NVT);
+ SDValue HLLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, VTSize - OverflowBits), dl, NVT);
+ // We overflow max if HH > 0 or (HH == 0 && HL > HLLoMask).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLUGT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLLoMask, ISD::SETUGT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLUGT));
+ // We overflow min if HH < -1 or (HH == -1 && HL < HLHiMask).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLULT = DAG.getSetCC(dl, BoolNVT, ResultHL, HLHiMask, ISD::SETULT);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLULT));
} else if (Scale == NVTSize) {
- // If the scales are equal, Lo and Hi are ResultLH and Result HL,
- // respectively. Avoid shifting to prevent undefined behavior.
- Lo = ResultLH;
- Hi = ResultHL;
-
- // We overflow max if HH > 0 or HH == 0 && HL sign bit is 1.
- // We overflow min if HH < -1 or HH == -1 && HL sign bit is 0.
- if (Saturating) {
- SDValue HHPos = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
- SDValue HHZero = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
- SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
- SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHPos,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHZero, HLNeg));
-
- SDValue HHNeg = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
- SDValue HHNeg1 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
- SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
- SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHNeg,
- DAG.getNode(ISD::AND, dl, BoolNVT, HHNeg1, HLPos));
- }
+ // We overflow max if HH > 0 or (HH == 0 && HL sign bit is 1).
+ SDValue HHGT0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETGT);
+ SDValue HHEQ0 = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTZero, ISD::SETEQ);
+ SDValue HLNeg = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETLT);
+ SatMax = DAG.getNode(ISD::OR, dl, BoolNVT, HHGT0,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ0, HLNeg));
+ // We overflow min if HH < -1 or (HH == -1 && HL sign bit is 0).
+ SDValue HHLT = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETLT);
+ SDValue HHEQ = DAG.getSetCC(dl, BoolNVT, ResultHH, NVTNeg1, ISD::SETEQ);
+ SDValue HLPos = DAG.getSetCC(dl, BoolNVT, ResultHL, NVTZero, ISD::SETGE);
+ SatMin = DAG.getNode(ISD::OR, dl, BoolNVT, HHLT,
+ DAG.getNode(ISD::AND, dl, BoolNVT, HHEQ, HLPos));
} else if (Scale < VTSize) {
- // If the scale is instead less than the old VT size, but greater than or
- // equal to the expanded VT size, the first part of the result (ResultLL) is
- // no longer a part of Lo because it would be scaled out anyway. Instead we
- // can start shifting right from the fourth part (ResultHH) to the second
- // part (ResultLH), and Result LH will be the new Lo.
- SDValue SRLAmnt = DAG.getConstant(Scale - NVTSize, dl, ShiftTy);
- SDValue SHLAmnt = DAG.getConstant(VTSize - Scale, dl, ShiftTy);
- Lo = DAG.getNode(ISD::SRL, dl, NVT, ResultLH, SRLAmnt);
- Lo = DAG.getNode(ISD::OR, dl, NVT, Lo,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHL, SHLAmnt));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, ResultHL, SRLAmnt);
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SHL, dl, NVT, ResultHH, SHLAmnt));
-
// This is similar to the case when we saturate if Scale < NVTSize, but we
- // only need to chech HH.
- if (Saturating) {
- unsigned OverflowBits = VTSize - Scale + 1;
- SDValue HHHiMask = DAG.getConstant(
- APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
- SDValue HHLoMask = DAG.getConstant(
- APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
-
- SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
- SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
- }
- } else if (Scale == VTSize) {
- assert(
- !Signed &&
- "Only unsigned types can have a scale equal to the operand bit width");
-
- Lo = ResultHL;
- Hi = ResultHH;
- } else {
- llvm_unreachable("Expected the scale to be less than or equal to the width "
- "of the operands");
- }
+ // only need to check HH.
+ unsigned OverflowBits = VTSize - Scale + 1;
+ SDValue HHHiMask = DAG.getConstant(
+ APInt::getHighBitsSet(NVTSize, OverflowBits), dl, NVT);
+ SDValue HHLoMask = DAG.getConstant(
+ APInt::getLowBitsSet(NVTSize, NVTSize - OverflowBits), dl, NVT);
+ SatMax = DAG.getSetCC(dl, BoolNVT, ResultHH, HHLoMask, ISD::SETGT);
+ SatMin = DAG.getSetCC(dl, BoolNVT, ResultHH, HHHiMask, ISD::SETLT);
+ } else
+ llvm_unreachable("Illegal scale for signed fixed point mul.");
- if (Saturating) {
- APInt LHMax = APInt::getSignedMaxValue(NVTSize);
- APInt LLMax = APInt::getAllOnesValue(NVTSize);
- APInt LHMin = APInt::getSignedMinValue(NVTSize);
- Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LHMax, dl, NVT), Hi);
- Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(LHMin, dl, NVT), Hi);
- Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(LLMax, dl, NVT), Lo);
- Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
- }
+ // Saturate to signed maximum.
+ APInt MaxHi = APInt::getSignedMaxValue(NVTSize);
+ APInt MaxLo = APInt::getAllOnesValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMax, DAG.getConstant(MaxLo, dl, NVT), Lo);
+ // Saturate to signed minimum.
+ APInt MinHi = APInt::getSignedMinValue(NVTSize);
+ Hi = DAG.getSelect(dl, NVT, SatMin, DAG.getConstant(MinHi, dl, NVT), Hi);
+ Lo = DAG.getSelect(dl, NVT, SatMin, NVTZero, Lo);
}
void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node,
@@ -3030,7 +3133,9 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N,
LC = RTLIB::SDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
@@ -3129,7 +3234,9 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N,
if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, isSigned, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
return;
}
@@ -3217,7 +3324,9 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N,
LC = RTLIB::SREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, true, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N,
@@ -3373,7 +3482,8 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
LC = RTLIB::UDIV_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
@@ -3399,7 +3509,8 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N,
LC = RTLIB::UREM_I128;
assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!");
- SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, false, dl).first, Lo, Hi);
+ TargetLowering::MakeLibCallOptions CallOptions;
+ SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, CallOptions, dl).first, Lo, Hi);
}
void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N,
@@ -3759,7 +3870,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this SINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, Op, true, SDLoc(N)).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, SDLoc(N)).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -3924,7 +4037,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT);
assert(LC != RTLIB::UNKNOWN_LIBCALL &&
"Don't know how to expand this UINT_TO_FP!");
- return TLI.makeLibCall(DAG, LC, DstVT, Op, true, dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(true);
+ return TLI.makeLibCall(DAG, LC, DstVT, Op, CallOptions, dl).first;
}
SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) {
@@ -4033,6 +4148,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NOutVT, Op);
}
+SDValue DAGTypeLegalizer::PromoteIntRes_SPLAT_VECTOR(SDNode *N) {
+ SDLoc dl(N);
+
+ SDValue SplatVal = N->getOperand(0);
+
+ assert(!SplatVal.getValueType().isVector() && "Input must be a scalar");
+
+ EVT OutVT = N->getValueType(0);
+ EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
+ assert(NOutVT.isVector() && "Type must be promoted to a vector type");
+ EVT NOutElemVT = NOutVT.getVectorElementType();
+
+ SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutElemVT, SplatVal);
+
+ return DAG.getNode(ISD::SPLAT_VECTOR, dl, NOutVT, Op);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) {
SDLoc dl(N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 14fd5be23ccb..b596c174a287 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -81,7 +81,6 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
for (unsigned i = 0, e = Node.getNumValues(); i != e; ++i) {
SDValue Res(&Node, i);
- EVT VT = Res.getValueType();
bool Failed = false;
// Don't create a value in map.
auto ResId = (ValueToIdMap.count(Res)) ? ValueToIdMap[Res] : 0;
@@ -135,17 +134,13 @@ void DAGTypeLegalizer::PerformExpensiveChecks() {
dbgs() << "Unprocessed value in a map!";
Failed = true;
}
- } else if (isTypeLegal(VT) || IgnoreNodeResults(&Node)) {
+ } else if (isTypeLegal(Res.getValueType()) || IgnoreNodeResults(&Node)) {
if (Mapped > 1) {
dbgs() << "Value with legal type was transformed!";
Failed = true;
}
} else {
- // If the value can be kept in HW registers, softening machinery can
- // leave it unchanged and don't put it to any map.
- if (Mapped == 0 &&
- !(getTypeAction(VT) == TargetLowering::TypeSoftenFloat &&
- isLegalInHWReg(VT))) {
+ if (Mapped == 0) {
dbgs() << "Processed value not in any map!";
Failed = true;
} else if (Mapped & (Mapped - 1)) {
@@ -257,13 +252,9 @@ bool DAGTypeLegalizer::run() {
Changed = true;
goto NodeDone;
case TargetLowering::TypeSoftenFloat:
- Changed = SoftenFloatResult(N, i);
- if (Changed)
- goto NodeDone;
- // If not changed, the result type should be legally in register.
- assert(isLegalInHWReg(ResultVT) &&
- "Unchanged SoftenFloatResult should be legal in register!");
- goto ScanOperands;
+ SoftenFloatResult(N, i);
+ Changed = true;
+ goto NodeDone;
case TargetLowering::TypeExpandFloat:
ExpandFloatResult(N, i);
Changed = true;
@@ -439,15 +430,9 @@ NodeDone:
bool Failed = false;
// Check that all result types are legal.
- // A value type is illegal if its TypeAction is not TypeLegal,
- // and TLI.RegClassForVT does not have a register class for this type.
- // For example, the x86_64 target has f128 that is not TypeLegal,
- // to have softened operators, but it also has FR128 register class to
- // pass and return f128 values. Hence a legalized node can have f128 type.
if (!IgnoreNodeResults(&Node))
for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i)
- if (!isTypeLegal(Node.getValueType(i)) &&
- !TLI.isTypeLegal(Node.getValueType(i))) {
+ if (!isTypeLegal(Node.getValueType(i))) {
dbgs() << "Result type " << i << " illegal: ";
Node.dump(&DAG);
Failed = true;
@@ -456,8 +441,7 @@ NodeDone:
// Check that all operand types are legal.
for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i)
if (!IgnoreNodeResults(Node.getOperand(i).getNode()) &&
- !isTypeLegal(Node.getOperand(i).getValueType()) &&
- !TLI.isTypeLegal(Node.getOperand(i).getValueType())) {
+ !isTypeLegal(Node.getOperand(i).getValueType())) {
dbgs() << "Operand type " << i << " illegal: ";
Node.getOperand(i).dump(&DAG);
Failed = true;
@@ -713,23 +697,13 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) {
}
void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) {
- // f128 of x86_64 could be kept in SSE registers,
- // but sometimes softened to i128.
- assert((Result.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) ||
- Op.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
+ assert(Result.getValueType() ==
+ TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) &&
"Invalid type for softened float");
AnalyzeNewValue(Result);
auto &OpIdEntry = SoftenedFloats[getTableId(Op)];
- // Allow repeated calls to save f128 type nodes
- // or any node with type that transforms to itself.
- // Many operations on these types are not softened.
- assert(((OpIdEntry == 0) ||
- Op.getValueType() ==
- TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) &&
- "Node is already converted to integer!");
+ assert((OpIdEntry == 0) && "Node is already converted to integer!");
OpIdEntry = getTableId(Result);
}
@@ -1003,25 +977,27 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) {
/// Convert the node into a libcall with the same prototype.
SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N,
bool isSigned) {
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
unsigned NumOps = N->getNumOperands();
SDLoc dl(N);
if (NumOps == 0) {
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), None, CallOptions,
dl).first;
} else if (NumOps == 1) {
SDValue Op = N->getOperand(0);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Op, CallOptions,
dl).first;
} else if (NumOps == 2) {
SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) };
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned,
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions,
dl).first;
}
SmallVector<SDValue, 8> Ops(NumOps);
for (unsigned i = 0; i < NumOps; ++i)
Ops[i] = N->getOperand(i);
- return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, isSigned, dl).first;
+ return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, CallOptions, dl).first;
}
/// Expand a node into a call to a libcall. Similar to ExpandLibCall except that
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 1d489b1b3a33..4afbae69128a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -73,15 +73,6 @@ private:
return VT.isSimple() && TLI.isTypeLegal(VT);
}
- /// Return true if this type can be passed in registers.
- /// For example, x86_64's f128, should to be legally in registers
- /// and only some operations converted to library calls or integer
- /// bitwise operations.
- bool isLegalInHWReg(EVT VT) const {
- EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
- return VT == NVT && isSimpleLegalType(VT);
- }
-
EVT getSetCCResultType(EVT VT) const {
return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
}
@@ -306,6 +297,7 @@ private:
SDValue PromoteIntRes_VECTOR_SHUFFLE(SDNode *N);
SDValue PromoteIntRes_BUILD_VECTOR(SDNode *N);
SDValue PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntRes_SPLAT_VECTOR(SDNode *N);
SDValue PromoteIntRes_EXTEND_VECTOR_INREG(SDNode *N);
SDValue PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue PromoteIntRes_CONCAT_VECTORS(SDNode *N);
@@ -363,6 +355,7 @@ private:
SDValue PromoteIntOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue PromoteIntOp_CONCAT_VECTORS(SDNode *N);
SDValue PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue PromoteIntOp_SPLAT_VECTOR(SDNode *N);
SDValue PromoteIntOp_SELECT(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_SETCC(SDNode *N, unsigned OpNo);
@@ -472,14 +465,11 @@ private:
// Float to Integer Conversion Support: LegalizeFloatTypes.cpp
//===--------------------------------------------------------------------===//
- /// Given an operand Op of Float type, returns the integer if the Op is not
- /// supported in target HW and converted to the integer.
- /// The integer contains exactly the same bits as Op - only the type changed.
- /// For example, if Op is an f32 which was softened to an i32, then this
- /// method returns an i32, the bits of which coincide with those of Op.
- /// If the Op can be efficiently supported in target HW or the operand must
- /// stay in a register, the Op is not converted to an integer.
- /// In that case, the given op is returned.
+ /// GetSoftenedFloat - Given a processed operand Op which was converted to an
+ /// integer of the same size, this returns the integer. The integer contains
+ /// exactly the same bits as Op - only the type changed. For example, if Op
+ /// is an f32 which was softened to an i32, then this method returns an i32,
+ /// the bits of which coincide with those of Op
SDValue GetSoftenedFloat(SDValue Op) {
TableId Id = getTableId(Op);
auto Iter = SoftenedFloats.find(Id);
@@ -494,19 +484,19 @@ private:
}
void SetSoftenedFloat(SDValue Op, SDValue Result);
- // Convert Float Results to Integer for Non-HW-supported Operations.
- bool SoftenFloatResult(SDNode *N, unsigned ResNo);
+ // Convert Float Results to Integer.
+ void SoftenFloatResult(SDNode *N, unsigned ResNo);
SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
- SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_ConstantFP(SDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FABS(SDNode *N);
SDValue SoftenFloatRes_FMINNUM(SDNode *N);
SDValue SoftenFloatRes_FMAXNUM(SDNode *N);
SDValue SoftenFloatRes_FADD(SDNode *N);
SDValue SoftenFloatRes_FCEIL(SDNode *N);
- SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N);
SDValue SoftenFloatRes_FCOS(SDNode *N);
SDValue SoftenFloatRes_FDIV(SDNode *N);
SDValue SoftenFloatRes_FEXP(SDNode *N);
@@ -518,7 +508,7 @@ private:
SDValue SoftenFloatRes_FMA(SDNode *N);
SDValue SoftenFloatRes_FMUL(SDNode *N);
SDValue SoftenFloatRes_FNEARBYINT(SDNode *N);
- SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_FNEG(SDNode *N);
SDValue SoftenFloatRes_FP_EXTEND(SDNode *N);
SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N);
SDValue SoftenFloatRes_FP_ROUND(SDNode *N);
@@ -531,27 +521,17 @@ private:
SDValue SoftenFloatRes_FSQRT(SDNode *N);
SDValue SoftenFloatRes_FSUB(SDNode *N);
SDValue SoftenFloatRes_FTRUNC(SDNode *N);
- SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo);
- SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo);
+ SDValue SoftenFloatRes_LOAD(SDNode *N);
+ SDValue SoftenFloatRes_SELECT(SDNode *N);
+ SDValue SoftenFloatRes_SELECT_CC(SDNode *N);
SDValue SoftenFloatRes_UNDEF(SDNode *N);
SDValue SoftenFloatRes_VAARG(SDNode *N);
SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N);
- // Return true if we can skip softening the given operand or SDNode because
- // either it was soften before by SoftenFloatResult and references to the
- // operand were replaced by ReplaceValueWith or it's value type is legal in HW
- // registers and the operand can be left unchanged.
- bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo);
-
- // Convert Float Operand to Integer for Non-HW-supported Operations.
+ // Convert Float Operand to Integer.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
SDValue SoftenFloatOp_BITCAST(SDNode *N);
- SDValue SoftenFloatOp_COPY_TO_REG(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
- SDValue SoftenFloatOp_FABS(SDNode *N);
- SDValue SoftenFloatOp_FCOPYSIGN(SDNode *N);
- SDValue SoftenFloatOp_FNEG(SDNode *N);
SDValue SoftenFloatOp_FP_EXTEND(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_XINT(SDNode *N);
@@ -559,7 +539,6 @@ private:
SDValue SoftenFloatOp_LLROUND(SDNode *N);
SDValue SoftenFloatOp_LRINT(SDNode *N);
SDValue SoftenFloatOp_LLRINT(SDNode *N);
- SDValue SoftenFloatOp_SELECT(SDNode *N);
SDValue SoftenFloatOp_SELECT_CC(SDNode *N);
SDValue SoftenFloatOp_SETCC(SDNode *N);
SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo);
@@ -715,6 +694,7 @@ private:
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
SDValue ScalarizeVecOp_BITCAST(SDNode *N);
SDValue ScalarizeVecOp_UnaryOp(SDNode *N);
+ SDValue ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_VSELECT(SDNode *N);
@@ -830,6 +810,7 @@ private:
SDValue WidenVecRes_Ternary(SDNode *N);
SDValue WidenVecRes_Binary(SDNode *N);
SDValue WidenVecRes_BinaryCanTrap(SDNode *N);
+ SDValue WidenVecRes_BinaryWithExtraScalarOp(SDNode *N);
SDValue WidenVecRes_StrictFP(SDNode *N);
SDValue WidenVecRes_OverflowOp(SDNode *N, unsigned ResNo);
SDValue WidenVecRes_Convert(SDNode *N);
@@ -933,6 +914,8 @@ private:
void SplitRes_SELECT_CC (SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitRes_UNDEF (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVSETCC(const SDNode *N);
+
//===--------------------------------------------------------------------===//
// Generic Expansion: LegalizeTypesGeneric.cpp
//===--------------------------------------------------------------------===//
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 943f63f46c47..5562f400b6e1 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -52,17 +52,11 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
case TargetLowering::TypePromoteFloat:
llvm_unreachable("Bitcast of a promotion-needing float should never need"
"expansion");
- case TargetLowering::TypeSoftenFloat: {
- // Expand the floating point operand only if it was converted to integers.
- // Otherwise, it is a legal type like f128 that can be saved in a register.
- auto SoftenedOp = GetSoftenedFloat(InOp);
- if (isLegalInHWReg(SoftenedOp.getValueType()))
- break;
- SplitInteger(SoftenedOp, Lo, Hi);
+ case TargetLowering::TypeSoftenFloat:
+ SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
- }
case TargetLowering::TypeExpandInteger:
case TargetLowering::TypeExpandFloat: {
auto &DL = DAG.getDataLayout();
@@ -509,23 +503,6 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo,
GetSplitOp(Op, Lo, Hi);
}
-static std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N,
- SelectionDAG &DAG) {
- SDLoc DL(N);
- EVT LoVT, HiVT;
- std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
-
- // Split the inputs.
- SDValue Lo, Hi, LL, LH, RL, RH;
- std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
- std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
-
- Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
- Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
-
- return std::make_pair(Lo, Hi);
-}
-
void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue LL, LH, RL, RH, CL, CH;
SDLoc dl(N);
@@ -537,16 +514,25 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) {
if (Cond.getValueType().isVector()) {
if (SDValue Res = WidenVSELECTAndMask(N))
std::tie(CL, CH) = DAG.SplitVector(Res->getOperand(0), dl);
- // It seems to improve code to generate two narrow SETCCs as opposed to
- // splitting a wide result vector.
- else if (Cond.getOpcode() == ISD::SETCC)
- std::tie(CL, CH) = SplitVSETCC(Cond.getNode(), DAG);
// Check if there are already splitted versions of the vector available and
// use those instead of splitting the mask operand again.
else if (getTypeAction(Cond.getValueType()) ==
TargetLowering::TypeSplitVector)
GetSplitVector(Cond, CL, CH);
- else
+ // It seems to improve code to generate two narrow SETCCs as opposed to
+ // splitting a wide result vector.
+ else if (Cond.getOpcode() == ISD::SETCC) {
+ // If the condition is a vXi1 vector, and the LHS of the setcc is a legal
+ // type and the setcc result type is the same vXi1, then leave the setcc
+ // alone.
+ EVT CondLHSVT = Cond.getOperand(0).getValueType();
+ if (Cond.getValueType().getVectorElementType() == MVT::i1 &&
+ isTypeLegal(CondLHSVT) &&
+ getSetCCResultType(CondLHSVT) == Cond.getValueType())
+ std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
+ else
+ SplitVecRes_SETCC(Cond.getNode(), CL, CH);
+ } else
std::tie(CL, CH) = DAG.SplitVector(Cond, dl);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 10b8b705869e..15c3a0b6cfad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -38,6 +38,7 @@
#include "llvm/IR/DataLayout.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MachineValueType.h"
#include "llvm/Support/MathExtras.h"
@@ -333,14 +334,27 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_ROUND:
case ISD::STRICT_FP_EXTEND:
- // These pseudo-ops get legalized as if they were their non-strict
- // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT
- // is also legal, but if ISD::FSQRT requires expansion then so does
- // ISD::STRICT_FSQRT.
- Action = TLI.getStrictFPOperationAction(Node->getOpcode(),
- Node->getValueType(0));
+ Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
+ // If we're asked to expand a strict vector floating-point operation,
+ // by default we're going to simply unroll it. That is usually the
+ // best approach, except in the case where the resulting strict (scalar)
+ // operations would themselves use the fallback mutation to non-strict.
+ // In that specific case, just do the fallback on the vector op.
+ if (Action == TargetLowering::Expand &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(),
+ Node->getValueType(0))
+ == TargetLowering::Legal) {
+ EVT EltVT = Node->getValueType(0).getVectorElementType();
+ if (TLI.getOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Expand &&
+ TLI.getStrictFPOperationAction(Node->getOpcode(), EltVT)
+ == TargetLowering::Legal)
+ Action = TargetLowering::Legal;
+ }
break;
case ISD::ADD:
case ISD::SUB:
@@ -439,16 +453,13 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
break;
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
- case ISD::UMULFIX: {
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT: {
unsigned Scale = Node->getConstantOperandVal(2);
Action = TLI.getFixedPointOperationAction(Node->getOpcode(),
Node->getValueType(0), Scale);
break;
}
- case ISD::FP_ROUND_INREG:
- Action = TLI.getOperationAction(Node->getOpcode(),
- cast<VTSDNode>(Node->getOperand(1))->getVT());
- break;
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::VECREDUCE_ADD:
@@ -820,6 +831,13 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::SMULFIX:
case ISD::UMULFIX:
return ExpandFixedPointMul(Op);
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIXSAT:
+ // FIXME: We do not expand SMULFIXSAT/UMULFIXSAT here yet, not sure exactly
+ // why. Maybe it results in worse codegen compared to the unroll for some
+ // targets? This should probably be investigated. And if we still prefer to
+ // unroll an explanation could be helpful.
+ return DAG.UnrollVectorOp(Op.getNode());
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -844,6 +862,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
return ExpandStrictFPOp(Op);
case ISD::VECREDUCE_ADD:
case ISD::VECREDUCE_MUL:
@@ -1168,9 +1188,13 @@ SDValue VectorLegalizer::ExpandABS(SDValue Op) {
SDValue VectorLegalizer::ExpandFP_TO_UINT(SDValue Op) {
// Attempt to expand using TargetLowering.
- SDValue Result;
- if (TLI.expandFP_TO_UINT(Op.getNode(), Result, DAG))
+ SDValue Result, Chain;
+ if (TLI.expandFP_TO_UINT(Op.getNode(), Result, Chain, DAG)) {
+ if (Op.getNode()->isStrictFPOpcode())
+ // Relink the chain
+ DAG.ReplaceAllUsesOfValueWith(Op.getValue(1), Chain);
return Result;
+ }
// Otherwise go ahead and unroll.
return DAG.UnrollVectorOp(Op.getNode());
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 7e4d52617977..3763e886cef2 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -52,7 +52,6 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
case ISD::STRICT_FP_ROUND: R = ScalarizeVecRes_STRICT_FP_ROUND(N); break;
case ISD::FP_ROUND: R = ScalarizeVecRes_FP_ROUND(N); break;
- case ISD::FP_ROUND_INREG: R = ScalarizeVecRes_InregOp(N); break;
case ISD::FPOWI: R = ScalarizeVecRes_FPOWI(N); break;
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
@@ -171,6 +170,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FFLOOR:
case ISD::STRICT_FROUND:
case ISD::STRICT_FTRUNC:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::STRICT_FP_EXTEND:
R = ScalarizeVecRes_StrictFPOp(N);
break;
@@ -185,6 +186,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
R = ScalarizeVecRes_MULFIX(N);
break;
}
@@ -604,6 +606,10 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::UINT_TO_FP:
Res = ScalarizeVecOp_UnaryOp(N);
break;
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
+ Res = ScalarizeVecOp_UnaryOp_StrictFP(N);
+ break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
break;
@@ -679,6 +685,23 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) {
return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Op);
}
+/// If the input is a vector that needs to be scalarized, it must be <1 x ty>.
+/// Do the strict FP operation on the element instead.
+SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp_StrictFP(SDNode *N) {
+ assert(N->getValueType(0).getVectorNumElements() == 1 &&
+ "Unexpected vector type!");
+ SDValue Elt = GetScalarizedVector(N->getOperand(1));
+ SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N),
+ { N->getValueType(0).getScalarType(), MVT::Other },
+ { N->getOperand(0), Elt });
+ // Legalize the chain result - switch anything that used the old chain to
+ // use the new one.
+ ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
+ // Revectorize the result so the types line up with what the uses of this
+ // expression expect.
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), N->getValueType(0), Res);
+}
+
/// The vectors to concatenate have length one - use a BUILD_VECTOR instead.
SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) {
SmallVector<SDValue, 8> Ops(N->getNumOperands());
@@ -828,7 +851,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::EXTRACT_SUBVECTOR: SplitVecRes_EXTRACT_SUBVECTOR(N, Lo, Hi); break;
case ISD::INSERT_SUBVECTOR: SplitVecRes_INSERT_SUBVECTOR(N, Lo, Hi); break;
- case ISD::FP_ROUND_INREG: SplitVecRes_InregOp(N, Lo, Hi); break;
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::FCOPYSIGN: SplitVecRes_FCOPYSIGN(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
@@ -883,7 +905,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FP_ROUND:
case ISD::STRICT_FP_ROUND:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::FRINT:
case ISD::FROUND:
case ISD::FSIN:
@@ -977,6 +1001,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMULFIX:
case ISD::SMULFIXSAT:
case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
SplitVecRes_MULFIX(N, Lo, Hi);
break;
}
@@ -1560,10 +1585,14 @@ void DAGTypeLegalizer::SplitVecRes_MLOAD(MaskedLoadSDNode *MLD,
// Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
EVT MemoryVT = MLD->getMemoryVT();
EVT LoMemVT, HiMemVT;
@@ -1622,10 +1651,14 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
// Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ if (Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl);
+ }
EVT MemoryVT = MGT->getMemoryVT();
EVT LoMemVT, HiMemVT;
@@ -1651,11 +1684,11 @@ void DAGTypeLegalizer::SplitVecRes_MGATHER(MaskedGatherSDNode *MGT,
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl, OpsLo,
- MMO);
+ MMO, MGT->getIndexType());
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl, OpsHi,
- MMO);
+ MMO, MGT->getIndexType());
// Build a factor node to remember that this load is independent of the
// other one.
@@ -1979,6 +2012,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
break;
case ISD::FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::CTTZ:
case ISD::CTLZ:
case ISD::CTPOP:
@@ -2293,7 +2328,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue OpsLo[] = {Ch, PassThruLo, MaskLo, Ptr, IndexLo, Scale};
SDValue Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, dl,
- OpsLo, MMO);
+ OpsLo, MMO, MGT->getIndexType());
MMO = DAG.getMachineFunction().
getMachineMemOperand(MGT->getPointerInfo(),
@@ -2303,7 +2338,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MGATHER(MaskedGatherSDNode *MGT,
SDValue OpsHi[] = {Ch, PassThruHi, MaskHi, Ptr, IndexHi, Scale};
SDValue Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, dl,
- OpsHi, MMO);
+ OpsHi, MMO, MGT->getIndexType());
// Build a factor node to remember that this load is independent of the
// other one.
@@ -2340,12 +2375,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSTORE(MaskedStoreSDNode *N,
else
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- // Split Mask operand
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
SDValue Lo, Hi;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -2397,12 +2436,16 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
else
std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
+ // Split Mask operand
SDValue MaskLo, MaskHi;
- if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
- // Split Mask operand
- GetSplitVector(Mask, MaskLo, MaskHi);
- else
- std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ if (OpNo == 1 && Mask.getOpcode() == ISD::SETCC) {
+ SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi);
+ } else {
+ if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector)
+ GetSplitVector(Mask, MaskLo, MaskHi);
+ else
+ std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, DL);
+ }
SDValue IndexHi, IndexLo;
if (getTypeAction(Index.getValueType()) == TargetLowering::TypeSplitVector)
@@ -2418,7 +2461,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
SDValue OpsLo[] = {Ch, DataLo, MaskLo, Ptr, IndexLo, Scale};
Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
- DL, OpsLo, MMO);
+ DL, OpsLo, MMO, N->getIndexType());
MMO = DAG.getMachineFunction().
getMachineMemOperand(N->getPointerInfo(),
@@ -2430,7 +2473,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_MSCATTER(MaskedScatterSDNode *N,
// after another.
SDValue OpsHi[] = {Lo, DataHi, MaskHi, Ptr, IndexHi, Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
- DL, OpsHi, MMO);
+ DL, OpsHi, MMO, N->getIndexType());
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -2596,7 +2639,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) {
LoRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Lo0, Lo1, N->getOperand(2));
HiRes = DAG.getNode(ISD::SETCC, DL, PartResVT, Hi0, Hi1, N->getOperand(2));
SDValue Con = DAG.getNode(ISD::CONCAT_VECTORS, DL, WideResVT, LoRes, HiRes);
- return PromoteTargetBoolean(Con, N->getValueType(0));
+
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, DL, N->getValueType(0), Con);
}
@@ -2663,7 +2710,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecRes_EXTRACT_SUBVECTOR(N); break;
- case ISD::FP_ROUND_INREG: Res = WidenVecRes_InregOp(N); break;
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
@@ -2719,6 +2765,15 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
Res = WidenVecRes_BinaryCanTrap(N);
break;
+ case ISD::SMULFIX:
+ case ISD::SMULFIXSAT:
+ case ISD::UMULFIX:
+ case ISD::UMULFIXSAT:
+ // These are binary operations, but with an extra operand that shouldn't
+ // be widened (the scale).
+ Res = WidenVecRes_BinaryWithExtraScalarOp(N);
+ break;
+
case ISD::STRICT_FADD:
case ISD::STRICT_FSUB:
case ISD::STRICT_FMUL:
@@ -2790,6 +2845,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::STRICT_FP_EXTEND:
case ISD::STRICT_FP_ROUND:
+ case ISD::STRICT_FP_TO_SINT:
+ case ISD::STRICT_FP_TO_UINT:
Res = WidenVecRes_Convert_StrictFP(N);
break;
@@ -2866,6 +2923,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, N->getFlags());
}
+SDValue DAGTypeLegalizer::WidenVecRes_BinaryWithExtraScalarOp(SDNode *N) {
+ // Binary op widening, but with an extra operand that shouldn't be widened.
+ SDLoc dl(N);
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue InOp1 = GetWidenedVector(N->getOperand(0));
+ SDValue InOp2 = GetWidenedVector(N->getOperand(1));
+ SDValue InOp3 = N->getOperand(2);
+ return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2, InOp3,
+ N->getFlags());
+}
+
// Given a vector of operations that have been broken up to widen, see
// if we can collect them together into the next widest legal VT. This
// implementation is trap-safe.
@@ -3716,7 +3784,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_MGATHER(MaskedGatherSDNode *N) {
Scale };
SDValue Res = DAG.getMaskedGather(DAG.getVTList(WideVT, MVT::Other),
N->getMemoryVT(), dl, Ops,
- N->getMemOperand());
+ N->getMemOperand(), N->getIndexType());
// Legalize the chain result - switch anything that used the old chain to
// use the new one.
@@ -4094,7 +4162,9 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_EXTEND:
case ISD::STRICT_FP_EXTEND:
case ISD::FP_TO_SINT:
+ case ISD::STRICT_FP_TO_SINT:
case ISD::FP_TO_UINT:
+ case ISD::STRICT_FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
case ISD::TRUNCATE:
@@ -4434,7 +4504,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MGATHER(SDNode *N, unsigned OpNo) {
SDValue Ops[] = {MG->getChain(), DataOp, Mask, MG->getBasePtr(), Index,
Scale};
SDValue Res = DAG.getMaskedGather(MG->getVTList(), MG->getMemoryVT(), dl, Ops,
- MG->getMemOperand());
+ MG->getMemOperand(), MG->getIndexType());
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
ReplaceValueWith(SDValue(N, 0), Res.getValue(0));
return SDValue();
@@ -4472,7 +4542,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_MSCATTER(SDNode *N, unsigned OpNo) {
Scale};
return DAG.getMaskedScatter(DAG.getVTList(MVT::Other),
MSC->getMemoryVT(), SDLoc(N), Ops,
- MSC->getMemOperand());
+ MSC->getMemOperand(), MSC->getIndexType());
}
SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
@@ -4504,7 +4574,10 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) {
ISD::EXTRACT_SUBVECTOR, dl, ResVT, WideSETCC,
DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
- return PromoteTargetBoolean(CC, VT);
+ EVT OpVT = N->getOperand(0).getValueType();
+ ISD::NodeType ExtendCode =
+ TargetLowering::getExtendForContent(TLI.getBooleanContents(OpVT));
+ return DAG.getNode(ExtendCode, dl, VT, CC);
}
SDValue DAGTypeLegalizer::WidenVecOp_VECREDUCE(SDNode *N) {
@@ -4706,7 +4779,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl<SDValue> &LdChain,
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth;
- unsigned LdAlign = LD->isVolatile() ? 0 : Align; // Allow wider loads.
+ unsigned LdAlign = (!LD->isSimple()) ? 0 : Align; // Allow wider loads.
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index 2cb850fa1a3d..7ee44c808fcb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -498,7 +498,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
CheckForLiveRegDef(SU, Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
} else
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 34b4c8502353..ff806bdb822c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -1188,6 +1188,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
if (!Pred.isArtificial())
AddPredQueued(NewSU, Pred);
+ // Make sure the clone comes after the original. (InstrEmitter assumes
+ // this ordering.)
+ AddPredQueued(NewSU, SDep(SU, SDep::Artificial));
+
// Only copy scheduled successors. Cut them from old node's successor
// list and move them over.
SmallVector<std::pair<SUnit *, SDep>, 4> DelDeps;
@@ -1374,7 +1378,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl<unsigned> &LRegs) {
// Check for def of register or earlyclobber register.
for (; NumVals; --NumVals, ++i) {
unsigned Reg = cast<RegisterSDNode>(Node->getOperand(i))->getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
CheckForLiveRegDef(SU, Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI);
}
} else
@@ -2358,7 +2362,7 @@ static bool hasOnlyLiveInOpers(const SUnit *SU) {
PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
unsigned Reg =
cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
RetVal = true;
continue;
}
@@ -2379,7 +2383,7 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
unsigned Reg =
cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
RetVal = true;
continue;
}
@@ -2948,8 +2952,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyToReg &&
- TargetRegisterInfo::isVirtualRegister
- (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ Register::isVirtualRegister(
+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
SDNode *PredFrameSetup = nullptr;
@@ -2995,8 +2999,8 @@ void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU.getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
- TargetRegisterInfo::isVirtualRegister
- (cast<RegisterSDNode>(N->getOperand(1))->getReg()))
+ Register::isVirtualRegister(
+ cast<RegisterSDNode>(N->getOperand(1))->getReg()))
continue;
// Perform checks on the successors of PredSU.
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 568c6191e512..d4c1fb36475e 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -115,7 +115,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
return;
unsigned Reg = cast<RegisterSDNode>(User->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
return;
unsigned ResNo = User->getOperand(2).getResNo();
@@ -528,7 +528,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// glued together nodes with a single SUnit.
-void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
+void ScheduleDAGSDNodes::BuildSchedGraph(AAResults *AA) {
// Cluster certain nodes which should be scheduled together.
ClusterNodes();
// Populate the SUnits array.
@@ -656,7 +656,7 @@ void ScheduleDAGSDNodes::computeOperandLatency(SDNode *Def, SDNode *Use,
if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
!BB->succ_empty()) {
unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
// This copy is a liveout value. It is likely coalesced, so reduce the
// latency so not to penalize the def.
// FIXME: need target specific adjustment here?
@@ -808,7 +808,7 @@ EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap,
} else {
// Copy from physical register.
assert(I->getReg() && "Unknown physical register!");
- unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
+ Register VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
(void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
@@ -909,6 +909,12 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen, NewInsn);
+
+ if (MDNode *MD = DAG->getHeapAllocSite(N)) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
+
GluedNodes.pop_back();
}
auto NewInsn =
@@ -917,6 +923,10 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
if (HasDbg)
ProcessSourceNode(SU->getNode(), DAG, Emitter, VRBaseMap, Orders, Seen,
NewInsn);
+ if (MDNode *MD = DAG->getHeapAllocSite(SU->getNode())) {
+ if (NewInsn && NewInsn->isCall())
+ MF.addCodeViewHeapAllocSite(NewInsn, MD);
+ }
}
// Insert all the dbg_values which have not already been inserted in source
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 5163b4fa4fd3..183ce4b0652d 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -26,6 +26,7 @@
namespace llvm {
+class AAResults;
class InstrItineraryData;
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
@@ -93,7 +94,7 @@ class InstrItineraryData;
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
- void BuildSchedGraph(AliasAnalysis *AA);
+ void BuildSchedGraph(AAResults *AA);
/// InitNumRegDefsLeft - Determine the # of regs defined by this node.
///
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
index ab06b55b49fd..e7bac73678a7 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp
@@ -63,14 +63,13 @@ private:
/// HazardRec - The hazard recognizer to use.
ScheduleHazardRecognizer *HazardRec;
- /// AA - AliasAnalysis for making memory reference queries.
- AliasAnalysis *AA;
+ /// AA - AAResults for making memory reference queries.
+ AAResults *AA;
public:
- ScheduleDAGVLIW(MachineFunction &mf,
- AliasAnalysis *aa,
+ ScheduleDAGVLIW(MachineFunction &mf, AAResults *aa,
SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue), AA(aa) {
const TargetSubtargetInfo &STI = mf.getSubtarget();
HazardRec = STI.getInstrInfo()->CreateTargetHazardRecognizer(&STI, this);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 5852e693fa9f..52a71b91d93f 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -859,9 +859,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
break;
case ISD::TargetExternalSymbol: {
ExternalSymbolSDNode *ESN = cast<ExternalSymbolSDNode>(N);
- Erased = TargetExternalSymbols.erase(
- std::pair<std::string,unsigned char>(ESN->getSymbol(),
- ESN->getTargetFlags()));
+ Erased = TargetExternalSymbols.erase(std::pair<std::string, unsigned>(
+ ESN->getSymbol(), ESN->getTargetFlags()));
break;
}
case ISD::MCSymbol: {
@@ -1084,6 +1083,7 @@ void SelectionDAG::clear() {
ExternalSymbols.clear();
TargetExternalSymbols.clear();
MCSymbols.clear();
+ SDCallSiteDbgInfo.clear();
std::fill(CondCodeNodes.begin(), CondCodeNodes.end(),
static_cast<CondCodeSDNode*>(nullptr));
std::fill(ValueTypeNodes.begin(), ValueTypeNodes.end(),
@@ -1353,7 +1353,7 @@ SDValue SelectionDAG::getConstantFP(double Val, const SDLoc &DL, EVT VT,
SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, const SDLoc &DL,
EVT VT, int64_t Offset, bool isTargetGA,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTargetGA) &&
"Cannot set target flags on target-independent globals");
@@ -1400,7 +1400,7 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
}
SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent jump tables");
unsigned Opc = isTarget ? ISD::TargetJumpTable : ISD::JumpTable;
@@ -1421,7 +1421,7 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
@@ -1449,7 +1449,7 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT,
SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
unsigned Alignment, int Offset,
bool isTarget,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
assert((TargetFlags == 0 || isTarget) &&
"Cannot set target flags on target-independent globals");
if (Alignment == 0)
@@ -1473,7 +1473,7 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
}
SDValue SelectionDAG::getTargetIndex(int Index, EVT VT, int64_t Offset,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::TargetIndex, getVTList(VT), None);
ID.AddInteger(Index);
@@ -1535,10 +1535,9 @@ SDValue SelectionDAG::getMCSymbol(MCSymbol *Sym, EVT VT) {
}
SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
- unsigned char TargetFlags) {
+ unsigned TargetFlags) {
SDNode *&N =
- TargetExternalSymbols[std::pair<std::string,unsigned char>(Sym,
- TargetFlags)];
+ TargetExternalSymbols[std::pair<std::string, unsigned>(Sym, TargetFlags)];
if (N) return SDValue(N, 0);
N = newSDNode<ExternalSymbolSDNode>(true, Sym, TargetFlags, VT);
InsertNode(N);
@@ -1802,9 +1801,8 @@ SDValue SelectionDAG::getLabelNode(unsigned Opcode, const SDLoc &dl,
}
SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
- int64_t Offset,
- bool isTarget,
- unsigned char TargetFlags) {
+ int64_t Offset, bool isTarget,
+ unsigned TargetFlags) {
unsigned Opc = isTarget ? ISD::TargetBlockAddress : ISD::BlockAddress;
FoldingSetNodeID ID;
@@ -1900,20 +1898,19 @@ SDValue SelectionDAG::expandVAArg(SDNode *Node) {
EVT VT = Node->getValueType(0);
SDValue Tmp1 = Node->getOperand(0);
SDValue Tmp2 = Node->getOperand(1);
- unsigned Align = Node->getConstantOperandVal(3);
+ const MaybeAlign MA(Node->getConstantOperandVal(3));
SDValue VAListLoad = getLoad(TLI.getPointerTy(getDataLayout()), dl, Tmp1,
Tmp2, MachinePointerInfo(V));
SDValue VAList = VAListLoad;
- if (Align > TLI.getMinStackArgumentAlignment()) {
- assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
-
+ if (MA && *MA > TLI.getMinStackArgumentAlignment()) {
VAList = getNode(ISD::ADD, dl, VAList.getValueType(), VAList,
- getConstant(Align - 1, dl, VAList.getValueType()));
+ getConstant(MA->value() - 1, dl, VAList.getValueType()));
- VAList = getNode(ISD::AND, dl, VAList.getValueType(), VAList,
- getConstant(-(int64_t)Align, dl, VAList.getValueType()));
+ VAList =
+ getNode(ISD::AND, dl, VAList.getValueType(), VAList,
+ getConstant(-(int64_t)MA->value(), dl, VAList.getValueType()));
}
// Increment the pointer, VAList, to the next vaarg
@@ -2154,12 +2151,9 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
}
case ISD::OR:
case ISD::XOR:
- // If the LHS or RHS don't contribute bits to the or, drop them.
- if (MaskedValueIsZero(V.getOperand(0), DemandedBits))
- return V.getOperand(1);
- if (MaskedValueIsZero(V.getOperand(1), DemandedBits))
- return V.getOperand(0);
- break;
+ case ISD::SIGN_EXTEND_INREG:
+ return TLI->SimplifyMultipleUseDemandedBits(V, DemandedBits, DemandedElts,
+ *this, 0);
case ISD::SRL:
// Only look at single-use SRLs.
if (!V.getNode()->hasOneUse())
@@ -2203,15 +2197,6 @@ SDValue SelectionDAG::GetDemandedBits(SDValue V, const APInt &DemandedBits,
return getNode(ISD::ANY_EXTEND, SDLoc(V), V.getValueType(), DemandedSrc);
break;
}
- case ISD::SIGN_EXTEND_INREG:
- EVT ExVT = cast<VTSDNode>(V.getOperand(1))->getVT();
- unsigned ExVTBits = ExVT.getScalarSizeInBits();
-
- // If none of the extended bits are demanded, eliminate the sextinreg.
- if (DemandedBits.getActiveBits() <= ExVTBits)
- return V.getOperand(0);
-
- break;
}
return SDValue();
}
@@ -2395,15 +2380,39 @@ SDValue SelectionDAG::getSplatValue(SDValue V) {
/// If a SHL/SRA/SRL node has a constant or splat constant shift amount that
/// is less than the element bit-width of the shift node, return it.
static const APInt *getValidShiftAmountConstant(SDValue V) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
if (ConstantSDNode *SA = isConstOrConstSplat(V.getOperand(1))) {
// Shifting more than the bitwidth is not valid.
const APInt &ShAmt = SA->getAPIntValue();
- if (ShAmt.ult(V.getScalarValueSizeInBits()))
+ if (ShAmt.ult(BitWidth))
return &ShAmt;
}
return nullptr;
}
+/// If a SHL/SRA/SRL node has constant vector shift amounts that are all less
+/// than the element bit-width of the shift node, return the minimum value.
+static const APInt *getValidMinimumShiftAmountConstant(SDValue V) {
+ unsigned BitWidth = V.getScalarValueSizeInBits();
+ auto *BV = dyn_cast<BuildVectorSDNode>(V.getOperand(1));
+ if (!BV)
+ return nullptr;
+ const APInt *MinShAmt = nullptr;
+ for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
+ auto *SA = dyn_cast<ConstantSDNode>(BV->getOperand(i));
+ if (!SA)
+ return nullptr;
+ // Shifting more than the bitwidth is not valid.
+ const APInt &ShAmt = SA->getAPIntValue();
+ if (ShAmt.uge(BitWidth))
+ return nullptr;
+ if (MinShAmt && MinShAmt->ule(ShAmt))
+ continue;
+ MinShAmt = &ShAmt;
+ }
+ return MinShAmt;
+}
+
/// Determine which bits of Op are known to be either zero or one and return
/// them in Known. For vectors, the known bits are those that are shared by
/// every vector element.
@@ -2437,7 +2446,7 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
return Known;
}
- if (Depth == 6)
+ if (Depth >= MaxRecursionDepth)
return Known; // Limit search depth.
KnownBits Known2;
@@ -2582,14 +2591,13 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
SDValue Src = Op.getOperand(0);
ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
- } else {
- Known = computeKnownBits(Src, Depth + 1);
+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
}
+ Known = computeKnownBits(Src, DemandedSrc, Depth + 1);
break;
}
case ISD::SCALAR_TO_VECTOR: {
@@ -2800,25 +2808,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
Known.One.lshrInPlace(Shift);
// High bits are known zero.
Known.Zero.setHighBits(Shift);
- } else if (auto *BV = dyn_cast<BuildVectorSDNode>(Op.getOperand(1))) {
- // If the shift amount is a vector of constants see if we can bound
- // the number of upper zero bits.
- unsigned ShiftAmountMin = BitWidth;
- for (unsigned i = 0; i != BV->getNumOperands(); ++i) {
- if (auto *C = dyn_cast<ConstantSDNode>(BV->getOperand(i))) {
- const APInt &ShAmt = C->getAPIntValue();
- if (ShAmt.ult(BitWidth)) {
- ShiftAmountMin = std::min<unsigned>(ShiftAmountMin,
- ShAmt.getZExtValue());
- continue;
- }
- }
- // Don't know anything.
- ShiftAmountMin = 0;
- break;
- }
-
- Known.Zero.setHighBits(ShiftAmountMin);
+ } else if (const APInt *ShMinAmt = getValidMinimumShiftAmountConstant(Op)) {
+ // Minimum shift high bits are known zero.
+ Known.Zero.setHighBits(ShMinAmt->getZExtValue());
}
break;
case ISD::SRA:
@@ -3105,12 +3097,12 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
// If the first operand is non-negative or has all low bits zero, then
// the upper bits are all zero.
- if (Known2.Zero[BitWidth-1] || ((Known2.Zero & LowBits) == LowBits))
+ if (Known2.isNonNegative() || LowBits.isSubsetOf(Known2.Zero))
Known.Zero |= ~LowBits;
// If the first operand is negative and not all low bits are zero, then
// the upper bits are all one.
- if (Known2.One[BitWidth-1] && ((Known2.One & LowBits) != 0))
+ if (Known2.isNegative() && LowBits.intersects(Known2.One))
Known.One |= ~LowBits;
assert((Known.Zero & Known.One) == 0&&"Bits known to be one AND zero?");
}
@@ -3427,7 +3419,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
return Val.getNumSignBits();
}
- if (Depth == 6)
+ if (Depth >= MaxRecursionDepth)
return 1; // Limit search depth.
if (!DemandedElts)
@@ -3729,6 +3721,18 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
Tmp = ComputeNumSignBits(Op.getOperand(0), Depth+1);
if (Tmp == 1) return 1; // Early out.
return std::min(Tmp, Tmp2)-1;
+ case ISD::MUL: {
+ // The output of the Mul can be at most twice the valid bits in the inputs.
+ unsigned SignBitsOp0 = ComputeNumSignBits(Op.getOperand(0), Depth + 1);
+ if (SignBitsOp0 == 1)
+ break;
+ unsigned SignBitsOp1 = ComputeNumSignBits(Op.getOperand(1), Depth + 1);
+ if (SignBitsOp1 == 1)
+ break;
+ unsigned OutValidBits =
+ (VTBits - SignBitsOp0 + 1) + (VTBits - SignBitsOp1 + 1);
+ return OutValidBits > VTBits ? 1 : VTBits - OutValidBits + 1;
+ }
case ISD::TRUNCATE: {
// Check if the sign bits of source go down as far as the truncated value.
unsigned NumSrcBits = Op.getOperand(0).getScalarValueSizeInBits();
@@ -3817,13 +3821,13 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
SDValue Src = Op.getOperand(0);
ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt DemandedSrc = APInt::getAllOnesValue(NumSrcElts);
if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
// Offset the demanded elts by the subvector index.
uint64_t Idx = SubIdx->getZExtValue();
- APInt DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
- return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
+ DemandedSrc = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
}
- return ComputeNumSignBits(Src, Depth + 1);
+ return ComputeNumSignBits(Src, DemandedSrc, Depth + 1);
}
case ISD::CONCAT_VECTORS: {
// Determine the minimum number of sign bits across all demanded
@@ -3976,7 +3980,7 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op, bool SNaN, unsigned Depth) const
if (getTarget().Options.NoNaNsFPMath || Op->getFlags().hasNoNaNs())
return true;
- if (Depth == 6)
+ if (Depth >= MaxRecursionDepth)
return false; // Limit search depth.
// TODO: Handle vectors.
@@ -4645,7 +4649,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
return getUNDEF(VT);
// -(X-Y) -> (Y-X) is unsafe because when X==Y, -0.0 != +0.0
- if ((getTarget().Options.UnsafeFPMath || Flags.hasNoSignedZeros()) &&
+ if ((getTarget().Options.NoSignedZerosFPMath || Flags.hasNoSignedZeros()) &&
OpOpcode == ISD::FSUB)
return getNode(ISD::FSUB, DL, VT, Operand.getOperand(1),
Operand.getOperand(0), Flags);
@@ -5156,22 +5160,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
if (N2C && N2C->isNullValue())
return N1;
break;
- case ISD::FP_ROUND_INREG: {
- EVT EVT = cast<VTSDNode>(N2)->getVT();
- assert(VT == N1.getValueType() && "Not an inreg round!");
- assert(VT.isFloatingPoint() && EVT.isFloatingPoint() &&
- "Cannot FP_ROUND_INREG integer types");
- assert(EVT.isVector() == VT.isVector() &&
- "FP_ROUND_INREG type should be vector iff the operand "
- "type is vector!");
- assert((!EVT.isVector() ||
- EVT.getVectorNumElements() == VT.getVectorNumElements()) &&
- "Vector element counts must match in FP_ROUND_INREG");
- assert(EVT.bitsLE(VT) && "Not rounding down!");
- (void)EVT;
- if (cast<VTSDNode>(N2)->getVT() == VT) return N1; // Not actually rounding.
- break;
- }
case ISD::FP_ROUND:
assert(VT.isFloatingPoint() &&
N1.getValueType().isFloatingPoint() &&
@@ -5382,7 +5370,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
std::swap(N1, N2);
} else {
switch (Opcode) {
- case ISD::FP_ROUND_INREG:
case ISD::SIGN_EXTEND_INREG:
case ISD::SUB:
return getUNDEF(VT); // fold op(undef, arg2) -> undef
@@ -5770,7 +5757,7 @@ static void chainLoadsAndStoresForMemcpy(SelectionDAG &DAG, const SDLoc &dl,
static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
SDValue Chain, SDValue Dst, SDValue Src,
- uint64_t Size, unsigned Align,
+ uint64_t Size, unsigned Alignment,
bool isVol, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) {
@@ -5795,15 +5782,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
if (FI && !MFI.isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
- if (Align > SrcAlign)
- SrcAlign = Align;
+ if (Alignment > SrcAlign)
+ SrcAlign = Alignment;
ConstantDataArraySlice Slice;
bool CopyFromConstant = isMemSrcFromConstant(Src, Slice);
bool isZeroConstant = CopyFromConstant && Slice.Array == nullptr;
unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
if (!TLI.findOptimalMemOpLowering(
- MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align),
+ MemOps, Limit, Size, (DstAlignCanChange ? 0 : Alignment),
(isZeroConstant ? 0 : SrcAlign), /*IsMemset=*/false,
/*ZeroMemset=*/false, /*MemcpyStrSrc=*/CopyFromConstant,
/*AllowOverlap=*/!isVol, DstPtrInfo.getAddrSpace(),
@@ -5818,15 +5805,15 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
// realignment.
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!TRI->needsStackRealignment(MF))
- while (NewAlign > Align &&
- DL.exceedsNaturalStackAlignment(NewAlign))
- NewAlign /= 2;
+ while (NewAlign > Alignment &&
+ DL.exceedsNaturalStackAlignment(Align(NewAlign)))
+ NewAlign /= 2;
- if (NewAlign > Align) {
+ if (NewAlign > Alignment) {
// Give the stack frame object a larger alignment if needed.
if (MFI.getObjectAlignment(FI->getIndex()) < NewAlign)
MFI.setObjectAlignment(FI->getIndex(), NewAlign);
- Align = NewAlign;
+ Alignment = NewAlign;
}
}
@@ -5869,10 +5856,9 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
}
Value = getMemsetStringVal(VT, dl, DAG, TLI, SubSlice);
if (Value.getNode()) {
- Store = DAG.getStore(Chain, dl, Value,
- DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), Align,
- MMOFlags);
+ Store = DAG.getStore(
+ Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
+ DstPtrInfo.getWithOffset(DstOff), Alignment, MMOFlags);
OutChains.push_back(Store);
}
}
@@ -5900,7 +5886,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, const SDLoc &dl,
Store = DAG.getTruncStore(
Chain, dl, Value, DAG.getMemBasePlusOffset(Dst, DstOff, dl),
- DstPtrInfo.getWithOffset(DstOff), VT, Align, MMOFlags);
+ DstPtrInfo.getWithOffset(DstOff), VT, Alignment, MMOFlags);
OutStoreChains.push_back(Store);
}
SrcOff += VTSize;
@@ -6567,7 +6553,7 @@ SDValue SelectionDAG::getMergeValues(ArrayRef<SDValue> Ops, const SDLoc &dl) {
SDValue SelectionDAG::getMemIntrinsicNode(
unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef<SDValue> Ops,
EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align,
- MachineMemOperand::Flags Flags, unsigned Size, const AAMDNodes &AAInfo) {
+ MachineMemOperand::Flags Flags, uint64_t Size, const AAMDNodes &AAInfo) {
if (Align == 0) // Ensure that codegen never sees alignment 0
Align = getEVTAlignment(MemVT);
@@ -6619,7 +6605,9 @@ SDValue SelectionDAG::getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl,
createOperands(N, Ops);
}
InsertNode(N);
- return SDValue(N, 0);
+ SDValue V(N, 0);
+ NewSDValueDbgMsg(V, "Creating new node: ", this);
+ return V;
}
SDValue SelectionDAG::getLifetimeNode(bool IsStart, const SDLoc &dl,
@@ -7022,14 +7010,15 @@ SDValue SelectionDAG::getMaskedStore(SDValue Chain, const SDLoc &dl,
SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO) {
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MGATHER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedGatherSDNode>(
- dl.getIROrder(), VTs, VT, MMO));
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7038,7 +7027,7 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
}
auto *N = newSDNode<MaskedGatherSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO);
+ VTs, VT, MMO, IndexType);
createOperands(N, Ops);
assert(N->getPassThru().getValueType() == N->getValueType(0) &&
@@ -7062,14 +7051,15 @@ SDValue SelectionDAG::getMaskedGather(SDVTList VTs, EVT VT, const SDLoc &dl,
SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
ArrayRef<SDValue> Ops,
- MachineMemOperand *MMO) {
+ MachineMemOperand *MMO,
+ ISD::MemIndexType IndexType) {
assert(Ops.size() == 6 && "Incompatible number of operands");
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MSCATTER, VTs, Ops);
ID.AddInteger(VT.getRawBits());
ID.AddInteger(getSyntheticNodeSubclassData<MaskedScatterSDNode>(
- dl.getIROrder(), VTs, VT, MMO));
+ dl.getIROrder(), VTs, VT, MMO, IndexType));
ID.AddInteger(MMO->getPointerInfo().getAddrSpace());
void *IP = nullptr;
if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) {
@@ -7077,7 +7067,7 @@ SDValue SelectionDAG::getMaskedScatter(SDVTList VTs, EVT VT, const SDLoc &dl,
return SDValue(E, 0);
}
auto *N = newSDNode<MaskedScatterSDNode>(dl.getIROrder(), dl.getDebugLoc(),
- VTs, VT, MMO);
+ VTs, VT, MMO, IndexType);
createOperands(N, Ops);
assert(N->getMask().getValueType().getVectorNumElements() ==
@@ -7766,16 +7756,22 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) {
case ISD::STRICT_FLOG: NewOpc = ISD::FLOG; break;
case ISD::STRICT_FLOG10: NewOpc = ISD::FLOG10; break;
case ISD::STRICT_FLOG2: NewOpc = ISD::FLOG2; break;
+ case ISD::STRICT_LRINT: NewOpc = ISD::LRINT; break;
+ case ISD::STRICT_LLRINT: NewOpc = ISD::LLRINT; break;
case ISD::STRICT_FRINT: NewOpc = ISD::FRINT; break;
case ISD::STRICT_FNEARBYINT: NewOpc = ISD::FNEARBYINT; break;
case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break;
case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break;
case ISD::STRICT_FCEIL: NewOpc = ISD::FCEIL; break;
case ISD::STRICT_FFLOOR: NewOpc = ISD::FFLOOR; break;
+ case ISD::STRICT_LROUND: NewOpc = ISD::LROUND; break;
+ case ISD::STRICT_LLROUND: NewOpc = ISD::LLROUND; break;
case ISD::STRICT_FROUND: NewOpc = ISD::FROUND; break;
case ISD::STRICT_FTRUNC: NewOpc = ISD::FTRUNC; break;
case ISD::STRICT_FP_ROUND: NewOpc = ISD::FP_ROUND; break;
case ISD::STRICT_FP_EXTEND: NewOpc = ISD::FP_EXTEND; break;
+ case ISD::STRICT_FP_TO_SINT: NewOpc = ISD::FP_TO_SINT; break;
+ case ISD::STRICT_FP_TO_UINT: NewOpc = ISD::FP_TO_UINT; break;
}
assert(Node->getNumValues() == 2 && "Unexpected number of results!");
@@ -7925,6 +7921,7 @@ MachineSDNode *SelectionDAG::getMachineNode(unsigned Opcode, const SDLoc &DL,
CSEMap.InsertNode(N, IP);
InsertNode(N);
+ NewSDValueDbgMsg(SDValue(N, 0), "Creating new machine node: ", this);
return N;
}
@@ -8619,7 +8616,7 @@ SDValue SelectionDAG::makeEquivalentMemoryOrdering(LoadSDNode *OldLoad,
// TokenFactor.
SDValue OldChain = SDValue(OldLoad, 1);
SDValue NewChain = SDValue(NewMemOp.getNode(), 1);
- if (!OldLoad->hasAnyUseOfValue(1))
+ if (OldChain == NewChain || !OldLoad->hasAnyUseOfValue(1))
return NewChain;
SDValue TokenFactor =
@@ -8812,7 +8809,7 @@ HandleSDNode::~HandleSDNode() {
GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order,
const DebugLoc &DL,
const GlobalValue *GA, EVT VT,
- int64_t o, unsigned char TF)
+ int64_t o, unsigned TF)
: SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) {
TheGlobal = GA;
}
@@ -8986,7 +8983,7 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
// Loads don't have side effects, look through them.
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(*this)) {
- if (!Ld->isVolatile())
+ if (Ld->isUnordered())
return Ld->getChain().reachesChainWithoutSideEffects(Dest, Depth-1);
}
return false;
@@ -9005,21 +9002,51 @@ void SDNode::intersectFlagsWith(const SDNodeFlags Flags) {
SDValue
SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
- ArrayRef<ISD::NodeType> CandidateBinOps) {
+ ArrayRef<ISD::NodeType> CandidateBinOps,
+ bool AllowPartials) {
// The pattern must end in an extract from index 0.
if (Extract->getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
!isNullConstant(Extract->getOperand(1)))
return SDValue();
- SDValue Op = Extract->getOperand(0);
- unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
-
// Match against one of the candidate binary ops.
+ SDValue Op = Extract->getOperand(0);
if (llvm::none_of(CandidateBinOps, [Op](ISD::NodeType BinOp) {
return Op.getOpcode() == unsigned(BinOp);
}))
return SDValue();
+ // Floating-point reductions may require relaxed constraints on the final step
+ // of the reduction because they may reorder intermediate operations.
+ unsigned CandidateBinOp = Op.getOpcode();
+ if (Op.getValueType().isFloatingPoint()) {
+ SDNodeFlags Flags = Op->getFlags();
+ switch (CandidateBinOp) {
+ case ISD::FADD:
+ if (!Flags.hasNoSignedZeros() || !Flags.hasAllowReassociation())
+ return SDValue();
+ break;
+ default:
+ llvm_unreachable("Unhandled FP opcode for binop reduction");
+ }
+ }
+
+ // Matching failed - attempt to see if we did enough stages that a partial
+ // reduction from a subvector is possible.
+ auto PartialReduction = [&](SDValue Op, unsigned NumSubElts) {
+ if (!AllowPartials || !Op)
+ return SDValue();
+ EVT OpVT = Op.getValueType();
+ EVT OpSVT = OpVT.getScalarType();
+ EVT SubVT = EVT::getVectorVT(*getContext(), OpSVT, NumSubElts);
+ if (!TLI->isExtractSubvectorCheap(SubVT, OpVT, 0))
+ return SDValue();
+ BinOp = (ISD::NodeType)CandidateBinOp;
+ return getNode(
+ ISD::EXTRACT_SUBVECTOR, SDLoc(Op), SubVT, Op,
+ getConstant(0, SDLoc(Op), TLI->getVectorIdxTy(getDataLayout())));
+ };
+
// At each stage, we're looking for something that looks like:
// %s = shufflevector <8 x i32> %op, <8 x i32> undef,
// <8 x i32> <i32 2, i32 3, i32 undef, i32 undef,
@@ -9030,10 +9057,16 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
// <4,5,6,7,u,u,u,u>
// <2,3,u,u,u,u,u,u>
// <1,u,u,u,u,u,u,u>
- unsigned CandidateBinOp = Op.getOpcode();
+ // While a partial reduction match would be:
+ // <2,3,u,u,u,u,u,u>
+ // <1,u,u,u,u,u,u,u>
+ unsigned Stages = Log2_32(Op.getValueType().getVectorNumElements());
+ SDValue PrevOp;
for (unsigned i = 0; i < Stages; ++i) {
+ unsigned MaskEnd = (1 << i);
+
if (Op.getOpcode() != CandidateBinOp)
- return SDValue();
+ return PartialReduction(PrevOp, MaskEnd);
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
@@ -9049,12 +9082,14 @@ SelectionDAG::matchBinOpReduction(SDNode *Extract, ISD::NodeType &BinOp,
// The first operand of the shuffle should be the same as the other operand
// of the binop.
if (!Shuffle || Shuffle->getOperand(0) != Op)
- return SDValue();
+ return PartialReduction(PrevOp, MaskEnd);
// Verify the shuffle has the expected (at this stage of the pyramid) mask.
- for (int Index = 0, MaskEnd = 1 << i; Index < MaskEnd; ++Index)
- if (Shuffle->getMaskElt(Index) != MaskEnd + Index)
- return SDValue();
+ for (int Index = 0; Index < (int)MaskEnd; ++Index)
+ if (Shuffle->getMaskElt(Index) != (int)(MaskEnd + Index))
+ return PartialReduction(PrevOp, MaskEnd);
+
+ PrevOp = Op;
}
BinOp = (ISD::NodeType)CandidateBinOp;
@@ -9114,8 +9149,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
getShiftAmountOperand(Operands[0].getValueType(),
Operands[1])));
break;
- case ISD::SIGN_EXTEND_INREG:
- case ISD::FP_ROUND_INREG: {
+ case ISD::SIGN_EXTEND_INREG: {
EVT ExtVT = cast<VTSDNode>(Operands[1])->getVT().getVectorElementType();
Scalars.push_back(getNode(N->getOpcode(), dl, EltVT,
Operands[0],
@@ -9187,6 +9221,9 @@ bool SelectionDAG::areNonVolatileConsecutiveLoads(LoadSDNode *LD,
int Dist) const {
if (LD->isVolatile() || Base->isVolatile())
return false;
+ // TODO: probably too restrictive for atomics, revisit
+ if (!LD->isSimple())
+ return false;
if (LD->isIndexed() || Base->isIndexed())
return false;
if (LD->getChain() != Base->getChain())
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
index 9592bc30a4e1..3a53ab9717a4 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGAddressAnalysis.cpp
@@ -14,6 +14,7 @@
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
#include <cstdint>
using namespace llvm;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e818dd27c05e..8c15563fcd23 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -833,7 +833,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
// If the source register was virtual and if we know something about it,
// add an assert node.
- if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ if (!Register::isVirtualRegister(Regs[Part + i]) ||
!RegisterVT.isInteger())
continue;
@@ -948,8 +948,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
if (HasMatching)
Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
- else if (!Regs.empty() &&
- TargetRegisterInfo::isVirtualRegister(Regs.front())) {
+ else if (!Regs.empty() && Register::isVirtualRegister(Regs.front())) {
// Put the register class of the virtual registers in the flag word. That
// way, later passes can recompute register class constraints for inline
// assembly as well as normal instructions.
@@ -1810,7 +1809,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
// offsets to its parts don't wrap either.
SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
- SDValue Val = RetOp.getValue(i);
+ SDValue Val = RetOp.getValue(RetOp.getResNo() + i);
if (MemVTs[i] != ValueVTs[i])
Val = DAG.getPtrExtOrTrunc(Val, getCurSDLoc(), MemVTs[i]);
Chains[i] = DAG.getStore(Chain, getCurSDLoc(), Val,
@@ -2263,7 +2262,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
Instruction::BinaryOps Opcode = BOp->getOpcode();
if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
- !I.getMetadata(LLVMContext::MD_unpredictable) &&
+ !I.hasMetadata(LLVMContext::MD_unpredictable) &&
(Opcode == Instruction::And || Opcode == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
Opcode,
@@ -2600,9 +2599,11 @@ void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
void
SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setDiscardResult(true);
SDValue Chain =
TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
- None, false, getCurSDLoc(), false, false).second;
+ None, CallOptions, getCurSDLoc()).second;
// On PS4, the "return address" must still be within the calling function,
// even if it's at the very end, so emit an explicit TRAP here.
// Passing 'true' for doesNotReturn above won't generate the trap for us.
@@ -2618,24 +2619,18 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock *SwitchBB) {
SDLoc dl = getCurSDLoc();
- // Subtract the minimum value
+ // Subtract the minimum value.
SDValue SwitchOp = getValue(B.SValue);
EVT VT = SwitchOp.getValueType();
- SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
- DAG.getConstant(B.First, dl, VT));
-
- // Check range
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- SDValue RangeCmp = DAG.getSetCC(
- dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
- Sub.getValueType()),
- Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
+ SDValue RangeSub =
+ DAG.getNode(ISD::SUB, dl, VT, SwitchOp, DAG.getConstant(B.First, dl, VT));
// Determine the type of the test operands.
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
bool UsePtrType = false;
- if (!TLI.isTypeLegal(VT))
+ if (!TLI.isTypeLegal(VT)) {
UsePtrType = true;
- else {
+ } else {
for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
// Switch table case range are encoded into series of masks.
@@ -2644,6 +2639,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
break;
}
}
+ SDValue Sub = RangeSub;
if (UsePtrType) {
VT = TLI.getPointerTy(DAG.getDataLayout());
Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
@@ -2655,20 +2651,29 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
MachineBasicBlock* MBB = B.Cases[0].ThisBB;
- addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
+ if (!B.OmitRangeCheck)
+ addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
addSuccessorWithProb(SwitchBB, MBB, B.Prob);
SwitchBB->normalizeSuccProbs();
- SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
- MVT::Other, CopyTo, RangeCmp,
- DAG.getBasicBlock(B.Default));
+ SDValue Root = CopyTo;
+ if (!B.OmitRangeCheck) {
+ // Conditional branch to the default block.
+ SDValue RangeCmp = DAG.getSetCC(dl,
+ TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
+ RangeSub.getValueType()),
+ RangeSub, DAG.getConstant(B.Range, dl, RangeSub.getValueType()),
+ ISD::SETUGT);
+
+ Root = DAG.getNode(ISD::BRCOND, dl, MVT::Other, Root, RangeCmp,
+ DAG.getBasicBlock(B.Default));
+ }
// Avoid emitting unnecessary branches to the next block.
if (MBB != NextBlock(SwitchBB))
- BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
- DAG.getBasicBlock(MBB));
+ Root = DAG.getNode(ISD::BR, dl, MVT::Other, Root, DAG.getBasicBlock(MBB));
- DAG.setRoot(BrRange);
+ DAG.setRoot(Root);
}
/// visitBitTestCase - this function produces one "bit test"
@@ -3266,8 +3271,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) {
// We care about the legality of the operation after it has been type
// legalized.
- while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
- VT != TLI.getTypeToTransformTo(Ctx, VT))
+ while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal)
VT = TLI.getTypeToTransformTo(Ctx, VT);
// If the vselect is legal, assume we want to leave this as a vector setcc +
@@ -3534,17 +3538,32 @@ void SelectionDAGBuilder::visitExtractElement(const User &I) {
void SelectionDAGBuilder::visitShuffleVector(const User &I) {
SDValue Src1 = getValue(I.getOperand(0));
SDValue Src2 = getValue(I.getOperand(1));
+ Constant *MaskV = cast<Constant>(I.getOperand(2));
SDLoc DL = getCurSDLoc();
-
- SmallVector<int, 8> Mask;
- ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
- unsigned MaskNumElts = Mask.size();
-
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
EVT SrcVT = Src1.getValueType();
unsigned SrcNumElts = SrcVT.getVectorNumElements();
+ if (MaskV->isNullValue() && VT.isScalableVector()) {
+ // Canonical splat form of first element of first input vector.
+ SDValue FirstElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
+ SrcVT.getScalarType(), Src1,
+ DAG.getConstant(0, DL,
+ TLI.getVectorIdxTy(DAG.getDataLayout())));
+ setValue(&I, DAG.getNode(ISD::SPLAT_VECTOR, DL, VT, FirstElt));
+ return;
+ }
+
+ // For now, we only handle splats for scalable vectors.
+ // The DAGCombiner will perform a BUILD_VECTOR -> SPLAT_VECTOR transformation
+ // for targets that support a SPLAT_VECTOR for non-scalable vector types.
+ assert(!VT.isScalableVector() && "Unsupported scalable vector shuffle");
+
+ SmallVector<int, 8> Mask;
+ ShuffleVectorInst::getShuffleMask(MaskV, Mask);
+ unsigned MaskNumElts = Mask.size();
+
if (SrcNumElts == MaskNumElts) {
setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
return;
@@ -3825,7 +3844,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// Normalize Vector GEP - all scalar operands should be converted to the
// splat vector.
unsigned VectorWidth = I.getType()->isVectorTy() ?
- cast<VectorType>(I.getType())->getVectorNumElements() : 0;
+ I.getType()->getVectorNumElements() : 0;
if (VectorWidth && !N.getValueType().isVector()) {
LLVMContext &Context = *DAG.getContext();
@@ -3858,12 +3877,11 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
// If this is a scalar constant or a splat vector of constants,
// handle it quickly.
- const auto *CI = dyn_cast<ConstantInt>(Idx);
- if (!CI && isa<ConstantDataVector>(Idx) &&
- cast<ConstantDataVector>(Idx)->getSplatValue())
- CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
+ const auto *C = dyn_cast<Constant>(Idx);
+ if (C && isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
- if (CI) {
+ if (const auto *CI = dyn_cast_or_null<ConstantInt>(C)) {
if (CI->isZero())
continue;
APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
@@ -3872,7 +3890,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
DAG.getConstant(Offs, dl, IdxTy);
- // In an inbouds GEP with an offset that is nonnegative even when
+ // In an inbounds GEP with an offset that is nonnegative even when
// interpreted as signed, assume there is no unsigned overflow.
SDNodeFlags Flags;
if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
@@ -4002,8 +4020,8 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
Type *Ty = I.getType();
bool isVolatile = I.isVolatile();
- bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
- bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+ bool isNonTemporal = I.hasMetadata(LLVMContext::MD_nontemporal);
+ bool isInvariant = I.hasMetadata(LLVMContext::MD_invariant_load);
bool isDereferenceable =
isDereferenceablePointer(SV, I.getType(), DAG.getDataLayout());
unsigned Alignment = I.getAlignment();
@@ -4118,7 +4136,7 @@ void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
SDValue Src = getValue(SrcV);
// Create a virtual register, then update the virtual register.
- unsigned VReg =
+ Register VReg =
SwiftError.getOrCreateVRegDefAt(&I, FuncInfo.MBB, I.getPointerOperand());
// Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
// Chain can be getRoot or getControlRoot.
@@ -4132,8 +4150,8 @@ void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
"call visitLoadFromSwiftError when backend supports swifterror");
assert(!I.isVolatile() &&
- I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
- I.getMetadata(LLVMContext::MD_invariant_load) == nullptr &&
+ !I.hasMetadata(LLVMContext::MD_nontemporal) &&
+ !I.hasMetadata(LLVMContext::MD_invariant_load) &&
"Support volatile, non temporal, invariant for load_from_swift_error");
const Value *SV = I.getOperand(0);
@@ -4209,7 +4227,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
auto MMOFlags = MachineMemOperand::MONone;
if (I.isVolatile())
MMOFlags |= MachineMemOperand::MOVolatile;
- if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
+ if (I.hasMetadata(LLVMContext::MD_nontemporal))
MMOFlags |= MachineMemOperand::MONonTemporal;
MMOFlags |= TLI.getMMOFlags(I);
@@ -4309,8 +4327,9 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
// are looking for. If first operand of the GEP is a splat vector - we
// extract the splat value and use it as a uniform base.
// In all other cases the function returns 'false'.
-static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
- SDValue &Scale, SelectionDAGBuilder* SDB) {
+static bool getUniformBase(const Value *&Ptr, SDValue &Base, SDValue &Index,
+ ISD::MemIndexType &IndexType, SDValue &Scale,
+ SelectionDAGBuilder *SDB) {
SelectionDAG& DAG = SDB->DAG;
LLVMContext &Context = *DAG.getContext();
@@ -4330,8 +4349,13 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
// Ensure all the other indices are 0.
for (unsigned i = 1; i < FinalIndex; ++i) {
- auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
- if (!C || !C->isZero())
+ auto *C = dyn_cast<Constant>(GEP->getOperand(i));
+ if (!C)
+ return false;
+ if (isa<VectorType>(C->getType()))
+ C = C->getSplatValue();
+ auto *CI = dyn_cast_or_null<ConstantInt>(C);
+ if (!CI || !CI->isZero())
return false;
}
@@ -4346,6 +4370,7 @@ static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
SDB->getCurSDLoc(), TLI.getPointerTy(DL));
Base = SDB->getValue(Ptr);
Index = SDB->getValue(IndexVal);
+ IndexType = ISD::SIGNED_SCALED;
if (!Index.getValueType().isVector()) {
unsigned GEPWidth = GEP->getType()->getVectorNumElements();
@@ -4373,9 +4398,11 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
SDValue Base;
SDValue Index;
+ ISD::MemIndexType IndexType;
SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
+ this);
const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
MachineMemOperand *MMO = DAG.getMachineFunction().
@@ -4385,11 +4412,12 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
- Ops, MMO);
+ Ops, MMO, IndexType);
DAG.setRoot(Scatter);
setValue(&I, Scatter);
}
@@ -4476,9 +4504,11 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
SDValue Root = DAG.getRoot();
SDValue Base;
SDValue Index;
+ ISD::MemIndexType IndexType;
SDValue Scale;
const Value *BasePtr = Ptr;
- bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
+ bool UniformBase = getUniformBase(BasePtr, Base, Index, IndexType, Scale,
+ this);
bool ConstantMemory = false;
if (UniformBase && AA &&
AA->pointsToConstantMemory(
@@ -4500,11 +4530,12 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
if (!UniformBase) {
Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
Index = getValue(Ptr);
+ IndexType = ISD::SIGNED_SCALED;
Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
}
SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
- Ops, MMO);
+ Ops, MMO, IndexType);
SDValue OutChain = Gather.getValue(1);
if (!ConstantMemory)
@@ -4628,7 +4659,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
auto Flags = MachineMemOperand::MOLoad;
if (I.isVolatile())
Flags |= MachineMemOperand::MOVolatile;
- if (I.getMetadata(LLVMContext::MD_invariant_load) != nullptr)
+ if (I.hasMetadata(LLVMContext::MD_invariant_load))
Flags |= MachineMemOperand::MOInvariant;
if (isDereferenceablePointer(I.getPointerOperand(), I.getType(),
DAG.getDataLayout()))
@@ -4645,9 +4676,27 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
AAMDNodes(), nullptr, SSID, Order);
InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
- SDValue L =
- DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
- getValue(I.getPointerOperand()), MMO);
+
+ SDValue Ptr = getValue(I.getPointerOperand());
+
+ if (TLI.lowerAtomicLoadAsLoadSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for loads to prevent future divergence.
+ SDValue L = DAG.getLoad(MemVT, dl, InChain, Ptr, MMO);
+ if (MemVT != VT)
+ L = DAG.getPtrExtOrTrunc(L, dl, VT);
+
+ setValue(&I, L);
+ SDValue OutChain = L.getValue(1);
+ if (!I.isUnordered())
+ DAG.setRoot(OutChain);
+ else
+ PendingLoads.push_back(OutChain);
+ return;
+ }
+
+ SDValue L = DAG.getAtomic(ISD::ATOMIC_LOAD, dl, MemVT, MemVT, InChain,
+ Ptr, MMO);
SDValue OutChain = L.getValue(1);
if (MemVT != VT)
@@ -4686,9 +4735,17 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
SDValue Val = getValue(I.getValueOperand());
if (Val.getValueType() != MemVT)
Val = DAG.getPtrExtOrTrunc(Val, dl, MemVT);
+ SDValue Ptr = getValue(I.getPointerOperand());
+ if (TLI.lowerAtomicStoreAsStoreSDNode(I)) {
+ // TODO: Once this is better exercised by tests, it should be merged with
+ // the normal path for stores to prevent future divergence.
+ SDValue S = DAG.getStore(InChain, dl, Val, Ptr, MMO);
+ DAG.setRoot(S);
+ return;
+ }
SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, MemVT, InChain,
- getValue(I.getPointerOperand()), Val, MMO);
+ Ptr, Val, MMO);
DAG.setRoot(OutChain);
@@ -4731,8 +4788,22 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// Add all operands of the call to the operand list.
for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
- SDValue Op = getValue(I.getArgOperand(i));
- Ops.push_back(Op);
+ const Value *Arg = I.getArgOperand(i);
+ if (!I.paramHasAttr(i, Attribute::ImmArg)) {
+ Ops.push_back(getValue(Arg));
+ continue;
+ }
+
+ // Use TargetConstant instead of a regular constant for immarg.
+ EVT VT = TLI.getValueType(*DL, Arg->getType(), true);
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(Arg)) {
+ assert(CI->getBitWidth() <= 64 &&
+ "large intrinsic immediates not handled");
+ Ops.push_back(DAG.getTargetConstant(*CI, SDLoc(), VT));
+ } else {
+ Ops.push_back(
+ DAG.getTargetConstantFP(*cast<ConstantFP>(Arg), SDLoc(), VT));
+ }
}
SmallVector<EVT, 4> ValueVTs;
@@ -4749,10 +4820,10 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// This is target intrinsic that touches memory
AAMDNodes AAInfo;
I.getAAMetadata(AAInfo);
- Result =
- DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
- MachinePointerInfo(Info.ptrVal, Info.offset),
- Info.align, Info.flags, Info.size, AAInfo);
+ Result = DAG.getMemIntrinsicNode(
+ Info.opc, getCurSDLoc(), VTs, Ops, Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
+ Info.align ? Info.align->value() : 0, Info.flags, Info.size, AAInfo);
} else if (!HasChain) {
Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
} else if (!I.getType()->isVoidTy()) {
@@ -4918,12 +4989,11 @@ static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
// Put the exponent in the right bit position for later addition to the
// final result:
//
- // #define LOG2OFe 1.4426950f
- // t0 = Op * LOG2OFe
+ // t0 = Op * log2(e)
// TODO: What fast-math-flags should be set here?
SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
- getF32Constant(DAG, 0x3fb8aa3b, dl));
+ DAG.getConstantFP(numbers::log2ef, dl, MVT::f32));
return getLimitedPrecisionExp2(t0, dl, DAG);
}
@@ -4941,10 +5011,11 @@ static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
- // Scale the exponent by log(2) [0.69314718f].
+ // Scale the exponent by log(2).
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
- SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
- getF32Constant(DAG, 0x3f317218, dl));
+ SDValue LogOfExponent =
+ DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
+ DAG.getConstantFP(numbers::ln2f, dl, MVT::f32));
// Get the significand and build it into a floating-point number with
// exponent of 1.
@@ -5311,19 +5382,32 @@ static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
}
-// getUnderlyingArgReg - Find underlying register used for a truncated or
-// bitcasted argument.
-static unsigned getUnderlyingArgReg(const SDValue &N) {
+// getUnderlyingArgRegs - Find underlying registers used for a truncated,
+// bitcasted, or split argument. Returns a list of <Register, size in bits>
+static void
+getUnderlyingArgRegs(SmallVectorImpl<std::pair<unsigned, unsigned>> &Regs,
+ const SDValue &N) {
switch (N.getOpcode()) {
- case ISD::CopyFromReg:
- return cast<RegisterSDNode>(N.getOperand(1))->getReg();
+ case ISD::CopyFromReg: {
+ SDValue Op = N.getOperand(1);
+ Regs.emplace_back(cast<RegisterSDNode>(Op)->getReg(),
+ Op.getValueType().getSizeInBits());
+ return;
+ }
case ISD::BITCAST:
case ISD::AssertZext:
case ISD::AssertSext:
case ISD::TRUNCATE:
- return getUnderlyingArgReg(N.getOperand(0));
+ getUnderlyingArgRegs(Regs, N.getOperand(0));
+ return;
+ case ISD::BUILD_PAIR:
+ case ISD::BUILD_VECTOR:
+ case ISD::CONCAT_VECTORS:
+ for (SDValue Op : N->op_values())
+ getUnderlyingArgRegs(Regs, Op);
+ return;
default:
- return 0;
+ return;
}
}
@@ -5412,11 +5496,16 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
if (FI != std::numeric_limits<int>::max())
Op = MachineOperand::CreateFI(FI);
+ SmallVector<std::pair<unsigned, unsigned>, 8> ArgRegsAndSizes;
if (!Op && N.getNode()) {
- unsigned Reg = getUnderlyingArgReg(N);
- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ getUnderlyingArgRegs(ArgRegsAndSizes, N);
+ Register Reg;
+ if (ArgRegsAndSizes.size() == 1)
+ Reg = ArgRegsAndSizes.front().first;
+
+ if (Reg && Reg.isVirtual()) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
- unsigned PR = RegInfo.getLiveInPhysReg(Reg);
+ Register PR = RegInfo.getLiveInPhysReg(Reg);
if (PR)
Reg = PR;
}
@@ -5436,29 +5525,42 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
}
if (!Op) {
+ // Create a DBG_VALUE for each decomposed value in ArgRegs to cover Reg
+ auto splitMultiRegDbgValue
+ = [&](ArrayRef<std::pair<unsigned, unsigned>> SplitRegs) {
+ unsigned Offset = 0;
+ for (auto RegAndSize : SplitRegs) {
+ auto FragmentExpr = DIExpression::createFragmentExpression(
+ Expr, Offset, RegAndSize.second);
+ if (!FragmentExpr)
+ continue;
+ assert(!IsDbgDeclare && "DbgDeclare operand is not in memory?");
+ FuncInfo.ArgDbgValues.push_back(
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
+ RegAndSize.first, Variable, *FragmentExpr));
+ Offset += RegAndSize.second;
+ }
+ };
+
// Check if ValueMap has reg number.
- DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
+ DenseMap<const Value *, unsigned>::const_iterator
+ VMI = FuncInfo.ValueMap.find(V);
if (VMI != FuncInfo.ValueMap.end()) {
const auto &TLI = DAG.getTargetLoweringInfo();
RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
V->getType(), getABIRegCopyCC(V));
if (RFV.occupiesMultipleRegs()) {
- unsigned Offset = 0;
- for (auto RegAndSize : RFV.getRegsAndSizes()) {
- Op = MachineOperand::CreateReg(RegAndSize.first, false);
- auto FragmentExpr = DIExpression::createFragmentExpression(
- Expr, Offset, RegAndSize.second);
- if (!FragmentExpr)
- continue;
- FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
- Op->getReg(), Variable, *FragmentExpr));
- Offset += RegAndSize.second;
- }
+ splitMultiRegDbgValue(RFV.getRegsAndSizes());
return true;
}
+
Op = MachineOperand::CreateReg(VMI->second, false);
IsIndirect = IsDbgDeclare;
+ } else if (ArgRegsAndSizes.size() > 1) {
+ // This was split due to the calling convention, and no virtual register
+ // mapping exists for the value.
+ splitMultiRegDbgValue(ArgRegsAndSizes);
+ return true;
}
}
@@ -5468,8 +5570,10 @@ bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
assert(Variable->isValidLocationForIntrinsic(DL) &&
"Expected inlined-at fields to agree");
IsIndirect = (Op->isReg()) ? IsIndirect : true;
+ if (IsIndirect)
+ Expr = DIExpression::append(Expr, {dwarf::DW_OP_deref});
FuncInfo.ArgDbgValues.push_back(
- BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
+ BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), false,
*Op, Variable, Expr));
return true;
@@ -5554,11 +5658,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
return;
case Intrinsic::sponentry:
setValue(&I, DAG.getNode(ISD::SPONENTRY, sdl,
- TLI.getPointerTy(DAG.getDataLayout())));
+ TLI.getFrameIndexTy(DAG.getDataLayout())));
return;
case Intrinsic::frameaddress:
setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
- TLI.getPointerTy(DAG.getDataLayout()),
+ TLI.getFrameIndexTy(DAG.getDataLayout()),
getValue(I.getArgOperand(0))));
return;
case Intrinsic::read_register: {
@@ -5888,65 +5992,6 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::masked_compressstore:
visitMaskedStore(I, true /* IsCompressing */);
return;
- case Intrinsic::x86_mmx_pslli_w:
- case Intrinsic::x86_mmx_pslli_d:
- case Intrinsic::x86_mmx_pslli_q:
- case Intrinsic::x86_mmx_psrli_w:
- case Intrinsic::x86_mmx_psrli_d:
- case Intrinsic::x86_mmx_psrli_q:
- case Intrinsic::x86_mmx_psrai_w:
- case Intrinsic::x86_mmx_psrai_d: {
- SDValue ShAmt = getValue(I.getArgOperand(1));
- if (isa<ConstantSDNode>(ShAmt)) {
- visitTargetIntrinsic(I, Intrinsic);
- return;
- }
- unsigned NewIntrinsic = 0;
- EVT ShAmtVT = MVT::v2i32;
- switch (Intrinsic) {
- case Intrinsic::x86_mmx_pslli_w:
- NewIntrinsic = Intrinsic::x86_mmx_psll_w;
- break;
- case Intrinsic::x86_mmx_pslli_d:
- NewIntrinsic = Intrinsic::x86_mmx_psll_d;
- break;
- case Intrinsic::x86_mmx_pslli_q:
- NewIntrinsic = Intrinsic::x86_mmx_psll_q;
- break;
- case Intrinsic::x86_mmx_psrli_w:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
- break;
- case Intrinsic::x86_mmx_psrli_d:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
- break;
- case Intrinsic::x86_mmx_psrli_q:
- NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
- break;
- case Intrinsic::x86_mmx_psrai_w:
- NewIntrinsic = Intrinsic::x86_mmx_psra_w;
- break;
- case Intrinsic::x86_mmx_psrai_d:
- NewIntrinsic = Intrinsic::x86_mmx_psra_d;
- break;
- default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
- }
-
- // The vector shift intrinsics with scalars uses 32b shift amounts but
- // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
- // to be zero.
- // We must do this early because v2i32 is not a legal type.
- SDValue ShOps[2];
- ShOps[0] = ShAmt;
- ShOps[1] = DAG.getConstant(0, sdl, MVT::i32);
- ShAmt = DAG.getBuildVector(ShAmtVT, sdl, ShOps);
- EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
- ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt);
- Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT,
- DAG.getConstant(NewIntrinsic, sdl, MVT::i32),
- getValue(I.getArgOperand(0)), ShAmt);
- setValue(&I, Res);
- return;
- }
case Intrinsic::powi:
setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)), DAG));
@@ -6063,6 +6108,8 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_constrained_fdiv:
case Intrinsic::experimental_constrained_frem:
case Intrinsic::experimental_constrained_fma:
+ case Intrinsic::experimental_constrained_fptosi:
+ case Intrinsic::experimental_constrained_fptoui:
case Intrinsic::experimental_constrained_fptrunc:
case Intrinsic::experimental_constrained_fpext:
case Intrinsic::experimental_constrained_sqrt:
@@ -6075,12 +6122,16 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
case Intrinsic::experimental_constrained_log:
case Intrinsic::experimental_constrained_log10:
case Intrinsic::experimental_constrained_log2:
+ case Intrinsic::experimental_constrained_lrint:
+ case Intrinsic::experimental_constrained_llrint:
case Intrinsic::experimental_constrained_rint:
case Intrinsic::experimental_constrained_nearbyint:
case Intrinsic::experimental_constrained_maxnum:
case Intrinsic::experimental_constrained_minnum:
case Intrinsic::experimental_constrained_ceil:
case Intrinsic::experimental_constrained_floor:
+ case Intrinsic::experimental_constrained_lround:
+ case Intrinsic::experimental_constrained_llround:
case Intrinsic::experimental_constrained_round:
case Intrinsic::experimental_constrained_trunc:
visitConstrainedFPIntrinsic(cast<ConstrainedFPIntrinsic>(I));
@@ -6272,6 +6323,14 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
Op3));
return;
}
+ case Intrinsic::umul_fix_sat: {
+ SDValue Op1 = getValue(I.getArgOperand(0));
+ SDValue Op2 = getValue(I.getArgOperand(1));
+ SDValue Op3 = getValue(I.getArgOperand(2));
+ setValue(&I, DAG.getNode(ISD::UMULFIXSAT, sdl, Op1.getValueType(), Op1, Op2,
+ Op3));
+ return;
+ }
case Intrinsic::stacksave: {
SDValue Op = getRoot();
Res = DAG.getNode(
@@ -6347,29 +6406,11 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
DAG.setRoot(Res);
return;
}
- case Intrinsic::objectsize: {
- // If we don't know by now, we're never going to know.
- ConstantInt *CI = dyn_cast<ConstantInt>(I.getArgOperand(1));
-
- assert(CI && "Non-constant type in __builtin_object_size?");
-
- SDValue Arg = getValue(I.getCalledValue());
- EVT Ty = Arg.getValueType();
-
- if (CI->isZero())
- Res = DAG.getConstant(-1ULL, sdl, Ty);
- else
- Res = DAG.getConstant(0, sdl, Ty);
-
- setValue(&I, Res);
- return;
- }
+ case Intrinsic::objectsize:
+ llvm_unreachable("llvm.objectsize.* should have been lowered already");
case Intrinsic::is_constant:
- // If this wasn't constant-folded away by now, then it's not a
- // constant.
- setValue(&I, DAG.getConstant(0, sdl, MVT::i1));
- return;
+ llvm_unreachable("llvm.is.constant.* should have been lowered already");
case Intrinsic::annotation:
case Intrinsic::ptr_annotation:
@@ -6818,6 +6859,17 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
setValue(&I, Val);
return;
}
+ case Intrinsic::ptrmask: {
+ SDValue Ptr = getValue(I.getOperand(0));
+ SDValue Const = getValue(I.getOperand(1));
+
+ EVT DestVT =
+ EVT(DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()));
+
+ setValue(&I, DAG.getNode(ISD::AND, getCurSDLoc(), DestVT, Ptr,
+ DAG.getZExtOrTrunc(Const, getCurSDLoc(), DestVT)));
+ return;
+ }
}
}
@@ -6845,6 +6897,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_fma:
Opcode = ISD::STRICT_FMA;
break;
+ case Intrinsic::experimental_constrained_fptosi:
+ Opcode = ISD::STRICT_FP_TO_SINT;
+ break;
+ case Intrinsic::experimental_constrained_fptoui:
+ Opcode = ISD::STRICT_FP_TO_UINT;
+ break;
case Intrinsic::experimental_constrained_fptrunc:
Opcode = ISD::STRICT_FP_ROUND;
break;
@@ -6881,6 +6939,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_log2:
Opcode = ISD::STRICT_FLOG2;
break;
+ case Intrinsic::experimental_constrained_lrint:
+ Opcode = ISD::STRICT_LRINT;
+ break;
+ case Intrinsic::experimental_constrained_llrint:
+ Opcode = ISD::STRICT_LLRINT;
+ break;
case Intrinsic::experimental_constrained_rint:
Opcode = ISD::STRICT_FRINT;
break;
@@ -6899,6 +6963,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic(
case Intrinsic::experimental_constrained_floor:
Opcode = ISD::STRICT_FFLOOR;
break;
+ case Intrinsic::experimental_constrained_lround:
+ Opcode = ISD::STRICT_LROUND;
+ break;
+ case Intrinsic::experimental_constrained_llround:
+ Opcode = ISD::STRICT_LLROUND;
+ break;
case Intrinsic::experimental_constrained_round:
Opcode = ISD::STRICT_FROUND;
break;
@@ -7102,7 +7172,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
if (SwiftErrorVal && TLI.supportSwiftError()) {
// Get the last element of InVals.
SDValue Src = CLI.InVals.back();
- unsigned VReg = SwiftError.getOrCreateVRegDefAt(
+ Register VReg = SwiftError.getOrCreateVRegDefAt(
CS.getInstruction(), FuncInfo.MBB, SwiftErrorVal);
SDValue CopyNode = CLI.DAG.getCopyToReg(Result.second, CLI.DL, VReg, Src);
DAG.setRoot(CopyNode);
@@ -8021,6 +8091,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// Compute the constraint code and ConstraintType to use.
TLI.ComputeConstraintToUse(T, SDValue());
+ if (T.ConstraintType == TargetLowering::C_Immediate &&
+ OpInfo.CallOperand && !isa<ConstantSDNode>(OpInfo.CallOperand))
+ // We've delayed emitting a diagnostic like the "n" constraint because
+ // inlining could cause an integer showing up.
+ return emitInlineAsmError(
+ CS, "constraint '" + Twine(T.ConstraintCode) + "' expects an "
+ "integer constant expression");
+
ExtraInfo.update(T);
}
@@ -8105,7 +8183,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
switch (OpInfo.Type) {
case InlineAsm::isOutput:
if (OpInfo.ConstraintType == TargetLowering::C_Memory ||
- (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)) {
unsigned ConstraintID =
TLI.getInlineAsmMemConstraint(OpInfo.ConstraintCode);
@@ -8119,13 +8198,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
MVT::i32));
AsmNodeOperands.push_back(OpInfo.CallOperand);
break;
- } else if ((OpInfo.ConstraintType == TargetLowering::C_Other &&
+ } else if (((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
!OpInfo.isIndirect) ||
OpInfo.ConstraintType == TargetLowering::C_Register ||
OpInfo.ConstraintType == TargetLowering::C_RegisterClass) {
// Otherwise, this outputs to a register (directly for C_Register /
- // C_RegisterClass, and a target-defined fashion for C_Other). Find a
- // register that we can use.
+ // C_RegisterClass, and a target-defined fashion for
+ // C_Immediate/C_Other). Find a register that we can use.
if (OpInfo.AssignedRegs.Regs.empty()) {
emitInlineAsmError(
CS, "couldn't allocate output register for constraint '" +
@@ -8205,15 +8285,24 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
- if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ if ((OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) &&
OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
- if (OpInfo.ConstraintType == TargetLowering::C_Other) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate ||
+ OpInfo.ConstraintType == TargetLowering::C_Other) {
std::vector<SDValue> Ops;
TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode,
Ops, DAG);
if (Ops.empty()) {
+ if (OpInfo.ConstraintType == TargetLowering::C_Immediate)
+ if (isa<ConstantSDNode>(InOperandVal)) {
+ emitInlineAsmError(CS, "value out of range for constraint '" +
+ Twine(OpInfo.ConstraintCode) + "'");
+ return;
+ }
+
emitInlineAsmError(CS, "invalid operand for inline asm constraint '" +
Twine(OpInfo.ConstraintCode) + "'");
return;
@@ -8250,7 +8339,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
assert((OpInfo.ConstraintType == TargetLowering::C_RegisterClass ||
- OpInfo.ConstraintType == TargetLowering::C_Register) &&
+ OpInfo.ConstraintType == TargetLowering::C_Register ||
+ OpInfo.ConstraintType == TargetLowering::C_Immediate) &&
"Unknown constraint type!");
// TODO: Support this.
@@ -8356,6 +8446,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
Val = OpInfo.AssignedRegs.getCopyFromRegs(
DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction());
break;
+ case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
Val = TLI.LowerAsmOutputForConstraint(Chain, Flag, getCurSDLoc(),
OpInfo, DAG);
@@ -9018,7 +9109,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
- unsigned OriginalAlignment = getABIAlignmentForCallingConv(ArgTy, DL);
+ const Align OriginalAlignment(getABIAlignmentForCallingConv(ArgTy, DL));
if (Args[i].Ty->isPointerTy()) {
Flags.setPointer();
@@ -9073,7 +9164,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
FrameAlign = Args[i].Alignment;
else
FrameAlign = getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Args[i].IsNest)
Flags.setNest();
@@ -9129,7 +9220,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
if (NumParts > 1 && j == 0)
MyFlags.Flags.setSplit();
else if (j != 0) {
- MyFlags.Flags.setOrigAlign(1);
+ MyFlags.Flags.setOrigAlign(Align::None());
if (j == NumParts - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9259,7 +9350,7 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) {
assert((Op.getOpcode() != ISD::CopyFromReg ||
cast<RegisterSDNode>(Op.getOperand(1))->getReg() != Reg) &&
"Copy from a reg to the same reg!");
- assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg");
+ assert(!Register::isPhysicalRegister(Reg) && "Is a physreg");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
// If this is an InlineAsm we have to match the registers required, not the
@@ -9516,8 +9607,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// Certain targets (such as MIPS), may have a different ABI alignment
// for a type depending on the context. Give the target a chance to
// specify the alignment it wants.
- unsigned OriginalAlignment =
- TLI->getABIAlignmentForCallingConv(ArgTy, DL);
+ const Align OriginalAlignment(
+ TLI->getABIAlignmentForCallingConv(ArgTy, DL));
if (Arg.getType()->isPointerTy()) {
Flags.setPointer();
@@ -9577,7 +9668,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FrameAlign = Arg.getParamAlignment();
else
FrameAlign = TLI->getByValTypeAlignment(ElementTy, DL);
- Flags.setByValAlign(FrameAlign);
+ Flags.setByValAlign(Align(FrameAlign));
}
if (Arg.hasAttribute(Attribute::Nest))
Flags.setNest();
@@ -9586,6 +9677,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
Flags.setOrigAlign(OriginalAlignment);
if (ArgCopyElisionCandidates.count(&Arg))
Flags.setCopyElisionCandidate();
+ if (Arg.hasAttribute(Attribute::Returned))
+ Flags.setReturned();
MVT RegisterVT = TLI->getRegisterTypeForCallingConv(
*CurDAG->getContext(), F.getCallingConv(), VT);
@@ -9598,7 +9691,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MyFlags.Flags.setSplit();
// if it isn't first piece, alignment must be 1
else if (i > 0) {
- MyFlags.Flags.setOrigAlign(1);
+ MyFlags.Flags.setOrigAlign(Align::None());
if (i == NumRegs - 1)
MyFlags.Flags.setSplitEnd();
}
@@ -9650,7 +9743,8 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
MachineFunction& MF = SDB->DAG.getMachineFunction();
MachineRegisterInfo& RegInfo = MF.getRegInfo();
- unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
+ Register SRetReg =
+ RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT));
FuncInfo->DemoteRegister = SRetReg;
NewRoot =
SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue);
@@ -9748,10 +9842,14 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
FuncInfo->setArgumentFrameIndex(&Arg, FI->getIndex());
}
+ // Analyses past this point are naive and don't expect an assertion.
+ if (Res.getOpcode() == ISD::AssertZext)
+ Res = Res.getOperand(0);
+
// Update the SwiftErrorVRegDefMap.
if (Res.getOpcode() == ISD::CopyFromReg && isSwiftErrorArg) {
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg))
+ if (Register::isVirtualRegister(Reg))
SwiftError->setCurrentVReg(FuncInfo->MBB, SwiftError->getFunctionArg(),
Reg);
}
@@ -9763,7 +9861,7 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
// FIXME: This isn't very clean... it would be nice to make this more
// general.
unsigned Reg = cast<RegisterSDNode>(Res.getOperand(1))->getReg();
- if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (Register::isVirtualRegister(Reg)) {
FuncInfo->ValueMap[&Arg] = Reg;
continue;
}
@@ -10087,8 +10185,6 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
break;
}
case CC_BitTests: {
- // FIXME: If Fallthrough is unreachable, skip the range check.
-
// FIXME: Optimize away range check based on pivot comparisons.
BitTestBlock *BTB = &SL->BitTestCases[I->BTCasesIndex];
@@ -10109,6 +10205,11 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond,
BTB->DefaultProb -= DefaultProb / 2;
}
+ if (FallthroughUnreachable) {
+ // Skip the range check if the fallthrough block is unreachable.
+ BTB->OmitRangeCheck = true;
+ }
+
// If we're in the right place, emit the bit test header right now.
if (CurMBB == SwitchMBB) {
visitBitTestHeader(*BTB, SwitchMBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 0072e33f23b7..bfcf30b430b6 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -426,7 +426,7 @@ public:
SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo,
SwiftErrorValueTracking &swifterror, CodeGenOpt::Level ol)
: SDNodeOrder(LowestSDNodeOrder), TM(dag.getTarget()), DAG(dag),
- SL(make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
+ SL(std::make_unique<SDAGSwitchLowering>(this, funcinfo)), FuncInfo(funcinfo),
SwiftError(swifterror) {}
void init(GCFunctionInfo *gfi, AliasAnalysis *AA,
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index da3049881d31..bc10f7621239 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -280,6 +280,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
+ case ISD::SPLAT_VECTOR: return "splat_vector";
case ISD::CARRY_FALSE: return "carry_false";
case ISD::ADDC: return "addc";
case ISD::ADDE: return "adde";
@@ -305,6 +306,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::SMULFIX: return "smulfix";
case ISD::SMULFIXSAT: return "smulfixsat";
case ISD::UMULFIX: return "umulfix";
+ case ISD::UMULFIXSAT: return "umulfixsat";
// Conversion operators.
case ISD::SIGN_EXTEND: return "sign_extend";
@@ -318,22 +320,27 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::FP_ROUND: return "fp_round";
case ISD::STRICT_FP_ROUND: return "strict_fp_round";
case ISD::FLT_ROUNDS_: return "flt_rounds";
- case ISD::FP_ROUND_INREG: return "fp_round_inreg";
case ISD::FP_EXTEND: return "fp_extend";
case ISD::STRICT_FP_EXTEND: return "strict_fp_extend";
case ISD::SINT_TO_FP: return "sint_to_fp";
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
+ case ISD::STRICT_FP_TO_SINT: return "strict_fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
+ case ISD::STRICT_FP_TO_UINT: return "strict_fp_to_uint";
case ISD::BITCAST: return "bitcast";
case ISD::ADDRSPACECAST: return "addrspacecast";
case ISD::FP16_TO_FP: return "fp16_to_fp";
case ISD::FP_TO_FP16: return "fp_to_fp16";
case ISD::LROUND: return "lround";
+ case ISD::STRICT_LROUND: return "strict_lround";
case ISD::LLROUND: return "llround";
+ case ISD::STRICT_LLROUND: return "strict_llround";
case ISD::LRINT: return "lrint";
+ case ISD::STRICT_LRINT: return "strict_lrint";
case ISD::LLRINT: return "llrint";
+ case ISD::STRICT_LLRINT: return "strict_llrint";
// Control flow instructions
case ISD::BR: return "br";
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index bdf9f2c166e1..1f07a241a824 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -27,6 +27,7 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
+#include "llvm/Analysis/LegacyDivergenceAnalysis.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
@@ -434,9 +435,9 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
TII = MF->getSubtarget().getInstrInfo();
TLI = MF->getSubtarget().getTargetLowering();
RegInfo = &MF->getRegInfo();
- LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+ LibInfo = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(Fn);
GFI = Fn.hasGC() ? &getAnalysis<GCModuleInfo>().getFunctionInfo(Fn) : nullptr;
- ORE = make_unique<OptimizationRemarkEmitter>(&Fn);
+ ORE = std::make_unique<OptimizationRemarkEmitter>(&Fn);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
auto *LIWP = getAnalysisIfAvailable<LoopInfoWrapperPass>();
@@ -524,8 +525,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
To = J->second;
}
// Make sure the new register has a sufficiently constrained register class.
- if (TargetRegisterInfo::isVirtualRegister(From) &&
- TargetRegisterInfo::isVirtualRegister(To))
+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
@@ -572,7 +572,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
bool hasFI = MI->getOperand(0).isFI();
Register Reg =
hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
EntryMBB->insert(EntryMBB->begin(), MI);
else {
MachineInstr *Def = RegInfo->getVRegDef(Reg);
@@ -582,7 +582,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
Def->getParent()->insert(std::next(InsertPos), MI);
} else
LLVM_DEBUG(dbgs() << "Dropping debug info for dead vreg"
- << TargetRegisterInfo::virtReg2Index(Reg) << "\n");
+ << Register::virtReg2Index(Reg) << "\n");
}
// If Reg is live-in then update debug info to track its copy in a vreg.
@@ -671,8 +671,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
To = J->second;
}
// Make sure the new register has a sufficiently constrained register class.
- if (TargetRegisterInfo::isVirtualRegister(From) &&
- TargetRegisterInfo::isVirtualRegister(To))
+ if (Register::isVirtualRegister(From) && Register::isVirtualRegister(To))
MRI.constrainRegClass(To, MRI.getRegClass(From));
// Replace it.
@@ -760,7 +759,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
continue;
unsigned DestReg = cast<RegisterSDNode>(N->getOperand(1))->getReg();
- if (!TargetRegisterInfo::isVirtualRegister(DestReg))
+ if (!Register::isVirtualRegister(DestReg))
continue;
// Ignore non-integer values.
@@ -1652,9 +1651,8 @@ static bool MIIsInTerminatorSequence(const MachineInstr &MI) {
// Make sure that the copy dest is not a vreg when the copy source is a
// physical register.
- if (!OPI2->isReg() ||
- (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) &&
- TargetRegisterInfo::isPhysicalRegister(OPI2->getReg())))
+ if (!OPI2->isReg() || (!Register::isPhysicalRegister(OPI->getReg()) &&
+ Register::isPhysicalRegister(OPI2->getReg())))
return false;
return true;
@@ -2234,9 +2232,9 @@ void SelectionDAGISel::Select_READ_REGISTER(SDNode *Op) {
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg =
+ Register Reg =
TLI->getRegisterByName(RegStr->getString().data(), Op->getValueType(0),
- *CurDAG);
+ CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyFromReg(
Op->getOperand(0), dl, Reg, Op->getValueType(0));
New->setNodeId(-1);
@@ -2248,9 +2246,9 @@ void SelectionDAGISel::Select_WRITE_REGISTER(SDNode *Op) {
SDLoc dl(Op);
MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(Op->getOperand(1));
const MDString *RegStr = dyn_cast<MDString>(MD->getMD()->getOperand(0));
- unsigned Reg = TLI->getRegisterByName(RegStr->getString().data(),
+ Register Reg = TLI->getRegisterByName(RegStr->getString().data(),
Op->getOperand(2).getValueType(),
- *CurDAG);
+ CurDAG->getMachineFunction());
SDValue New = CurDAG->getCopyToReg(
Op->getOperand(0), dl, Reg, Op->getOperand(2));
New->setNodeId(-1);
@@ -3323,10 +3321,13 @@ void SelectionDAGISel::SelectCodeCommon(SDNode *NodeToMatch,
continue;
}
- case OPC_EmitCopyToReg: {
+ case OPC_EmitCopyToReg:
+ case OPC_EmitCopyToReg2: {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+ if (Opcode == OPC_EmitCopyToReg2)
+ DestPhysReg |= MatcherTable[MatcherIndex++] << 8;
if (!InputChain.getNode())
InputChain = CurDAG->getEntryNode();
diff --git a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
index 395e9a8a4fc5..fad98b6f50dc 100644
--- a/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/StatepointLowering.cpp
@@ -378,7 +378,6 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
// We use TargetFrameIndex so that isel will not select it into LEA
Loc = Builder.DAG.getTargetFrameIndex(Index, Builder.getFrameIndexTy());
-#ifndef NDEBUG
// Right now we always allocate spill slots that are of the same
// size as the value we're about to spill (the size of spillee can
// vary since we spill vectors of pointers too). At some point we
@@ -387,12 +386,18 @@ spillIncomingStatepointValue(SDValue Incoming, SDValue Chain,
MachineFrameInfo &MFI = Builder.DAG.getMachineFunction().getFrameInfo();
assert((MFI.getObjectSize(Index) * 8) == Incoming.getValueSizeInBits() &&
"Bad spill: stack slot does not match!");
-#endif
+ // Note: Using the alignment of the spill slot (rather than the abi or
+ // preferred alignment) is required for correctness when dealing with spill
+ // slots with preferred alignments larger than frame alignment..
auto &MF = Builder.DAG.getMachineFunction();
auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *StoreMMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlignment(Index));
Chain = Builder.DAG.getStore(Chain, Builder.getCurSDLoc(), Incoming, Loc,
- PtrInfo);
+ StoreMMO);
MMO = getMachineMemOperand(MF, *cast<FrameIndexSDNode>(Loc));
@@ -1011,20 +1016,27 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) {
return;
}
- SDValue SpillSlot =
- DAG.getTargetFrameIndex(*DerivedPtrLocation, getFrameIndexTy());
+ unsigned Index = *DerivedPtrLocation;
+ SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy());
// Note: We know all of these reloads are independent, but don't bother to
// exploit that chain wise. DAGCombine will happily do so as needed, so
// doing it here would be a small compile time win at most.
SDValue Chain = getRoot();
- SDValue SpillLoad =
- DAG.getLoad(DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
- Relocate.getType()),
- getCurSDLoc(), Chain, SpillSlot,
- MachinePointerInfo::getFixedStack(DAG.getMachineFunction(),
- *DerivedPtrLocation));
+ auto &MF = DAG.getMachineFunction();
+ auto &MFI = MF.getFrameInfo();
+ auto PtrInfo = MachinePointerInfo::getFixedStack(MF, Index);
+ auto *LoadMMO =
+ MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
+ MFI.getObjectSize(Index),
+ MFI.getObjectAlignment(Index));
+
+ auto LoadVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
+ Relocate.getType());
+
+ SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain,
+ SpillSlot, LoadMMO);
DAG.setRoot(SpillLoad.getValue(1));
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b260cd91d468..9ab1324533f1 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -11,7 +11,6 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/TargetLowering.h"
-#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
@@ -37,7 +36,7 @@ using namespace llvm;
/// NOTE: The TargetMachine owns TLOF.
TargetLowering::TargetLowering(const TargetMachine &tm)
- : TargetLoweringBase(tm) {}
+ : TargetLoweringBase(tm) {}
const char *TargetLowering::getTargetNodeName(unsigned Opcode) const {
return nullptr;
@@ -80,7 +79,7 @@ bool TargetLowering::parametersInCSRMatch(const MachineRegisterInfo &MRI,
const CCValAssign &ArgLoc = ArgLocs[I];
if (!ArgLoc.isRegLoc())
continue;
- unsigned Reg = ArgLoc.getLocReg();
+ Register Reg = ArgLoc.getLocReg();
// Only look at callee saved registers.
if (MachineOperand::clobbersPhysReg(CallerPreservedMask, Reg))
continue;
@@ -121,19 +120,25 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
/// result of type RetVT.
std::pair<SDValue, SDValue>
TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
- ArrayRef<SDValue> Ops, bool isSigned,
- const SDLoc &dl, bool doesNotReturn,
- bool isReturnValueUsed,
- bool isPostTypeLegalization) const {
+ ArrayRef<SDValue> Ops,
+ MakeLibCallOptions CallOptions,
+ const SDLoc &dl) const {
TargetLowering::ArgListTy Args;
Args.reserve(Ops.size());
TargetLowering::ArgListEntry Entry;
- for (SDValue Op : Ops) {
- Entry.Node = Op;
+ for (unsigned i = 0; i < Ops.size(); ++i) {
+ SDValue NewOp = Ops[i];
+ Entry.Node = NewOp;
Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
- Entry.IsSExt = shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
- Entry.IsZExt = !shouldSignExtendTypeInLibCall(Op.getValueType(), isSigned);
+ Entry.IsSExt = shouldSignExtendTypeInLibCall(NewOp.getValueType(),
+ CallOptions.IsSExt);
+ Entry.IsZExt = !Entry.IsSExt;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.OpsVTBeforeSoften[i])) {
+ Entry.IsSExt = Entry.IsZExt = false;
+ }
Args.push_back(Entry);
}
@@ -144,15 +149,22 @@ TargetLowering::makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT,
Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext());
TargetLowering::CallLoweringInfo CLI(DAG);
- bool signExtend = shouldSignExtendTypeInLibCall(RetVT, isSigned);
+ bool signExtend = shouldSignExtendTypeInLibCall(RetVT, CallOptions.IsSExt);
+ bool zeroExtend = !signExtend;
+
+ if (CallOptions.IsSoften &&
+ !shouldExtendTypeInLibCall(CallOptions.RetVTBeforeSoften)) {
+ signExtend = zeroExtend = false;
+ }
+
CLI.setDebugLoc(dl)
.setChain(DAG.getEntryNode())
.setLibCallee(getLibcallCallingConv(LC), RetTy, Callee, std::move(Args))
- .setNoReturn(doesNotReturn)
- .setDiscardResult(!isReturnValueUsed)
- .setIsPostTypeLegalization(isPostTypeLegalization)
+ .setNoReturn(CallOptions.DoesNotReturn)
+ .setDiscardResult(!CallOptions.IsReturnValueUsed)
+ .setIsPostTypeLegalization(CallOptions.IsPostTypeLegalization)
.setSExtResult(signExtend)
- .setZExtResult(!signExtend);
+ .setZExtResult(zeroExtend);
return LowerCallTo(CLI);
}
@@ -263,7 +275,8 @@ TargetLowering::findOptimalMemOpLowering(std::vector<EVT> &MemOps,
void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode,
- const SDLoc &dl) const {
+ const SDLoc &dl, const SDValue OldLHS,
+ const SDValue OldRHS) const {
assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128 || VT == MVT::ppcf128)
&& "Unsupported setcc type!");
@@ -365,8 +378,11 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
// Use the target specific return value for comparions lib calls.
EVT RetVT = getCmpLibcallReturnType();
SDValue Ops[2] = {NewLHS, NewRHS};
- NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, false /*sign irrelevant*/,
- dl).first;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ EVT OpsVT[2] = { OldLHS.getValueType(),
+ OldRHS.getValueType() };
+ CallOptions.setTypeListBeforeSoften(OpsVT, RetVT, true);
+ NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, CallOptions, dl).first;
NewRHS = DAG.getConstant(0, dl, RetVT);
CCCode = getCmpLibcallCC(LC1);
@@ -378,8 +394,7 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT,
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
NewLHS, NewRHS, DAG.getCondCode(CCCode));
- NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, false/*sign irrelevant*/,
- dl).first;
+ NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, CallOptions, dl).first;
NewLHS = DAG.getNode(
ISD::SETCC, dl,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), RetVT),
@@ -564,6 +579,170 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits,
AssumeSingleUse);
}
+// TODO: Can we merge SelectionDAG::GetDemandedBits into this?
+// TODO: Under what circumstances can we create nodes? Constant folding?
+SDValue TargetLowering::SimplifyMultipleUseDemandedBits(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ // Limit search depth.
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
+ return SDValue();
+
+ // Ignore UNDEFs.
+ if (Op.isUndef())
+ return SDValue();
+
+ // Not demanding any bits/elts from Op.
+ if (DemandedBits == 0 || DemandedElts == 0)
+ return DAG.getUNDEF(Op.getValueType());
+
+ unsigned NumElts = DemandedElts.getBitWidth();
+ KnownBits LHSKnown, RHSKnown;
+ switch (Op.getOpcode()) {
+ case ISD::BITCAST: {
+ SDValue Src = peekThroughBitcasts(Op.getOperand(0));
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+ unsigned NumSrcEltBits = SrcVT.getScalarSizeInBits();
+ unsigned NumDstEltBits = DstVT.getScalarSizeInBits();
+
+ if (NumSrcEltBits == NumDstEltBits)
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedBits, DemandedElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+
+ // TODO - bigendian once we have test coverage.
+ if (SrcVT.isVector() && (NumDstEltBits % NumSrcEltBits) == 0 &&
+ DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumDstEltBits / NumSrcEltBits;
+ unsigned NumSrcElts = SrcVT.getVectorNumElements();
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != Scale; ++i) {
+ unsigned Offset = i * NumSrcEltBits;
+ APInt Sub = DemandedBits.extractBits(NumSrcEltBits, Offset);
+ if (!Sub.isNullValue()) {
+ DemandedSrcBits |= Sub;
+ for (unsigned j = 0; j != NumElts; ++j)
+ if (DemandedElts[j])
+ DemandedSrcElts.setBit((j * Scale) + i);
+ }
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ // TODO - bigendian once we have test coverage.
+ if ((NumSrcEltBits % NumDstEltBits) == 0 &&
+ DAG.getDataLayout().isLittleEndian()) {
+ unsigned Scale = NumSrcEltBits / NumDstEltBits;
+ unsigned NumSrcElts = SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1;
+ APInt DemandedSrcBits = APInt::getNullValue(NumSrcEltBits);
+ APInt DemandedSrcElts = APInt::getNullValue(NumSrcElts);
+ for (unsigned i = 0; i != NumElts; ++i)
+ if (DemandedElts[i]) {
+ unsigned Offset = (i % Scale) * NumDstEltBits;
+ DemandedSrcBits.insertBits(DemandedBits, Offset);
+ DemandedSrcElts.setBit(i / Scale);
+ }
+
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Src, DemandedSrcBits, DemandedSrcElts, DAG, Depth + 1))
+ return DAG.getBitcast(DstVT, V);
+ }
+
+ break;
+ }
+ case ISD::AND: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known 1 on one side, return the other.
+ // These bits cannot contribute to the result of the 'and' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero | RHSKnown.One))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero | LHSKnown.One))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::OR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other. These bits cannot contribute to the result of the 'or' in this
+ // context.
+ if (DemandedBits.isSubsetOf(LHSKnown.One | RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(RHSKnown.One | LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::XOR: {
+ LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
+ RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
+
+ // If all of the demanded bits are known zero on one side, return the
+ // other.
+ if (DemandedBits.isSubsetOf(RHSKnown.Zero))
+ return Op.getOperand(0);
+ if (DemandedBits.isSubsetOf(LHSKnown.Zero))
+ return Op.getOperand(1);
+ break;
+ }
+ case ISD::SIGN_EXTEND_INREG: {
+ // If none of the extended bits are demanded, eliminate the sextinreg.
+ EVT ExVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
+ if (DemandedBits.getActiveBits() <= ExVT.getScalarSizeInBits())
+ return Op.getOperand(0);
+ break;
+ }
+ case ISD::INSERT_VECTOR_ELT: {
+ // If we don't demand the inserted element, return the base vector.
+ SDValue Vec = Op.getOperand(0);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ EVT VecVT = Vec.getValueType();
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
+ !DemandedElts[CIdx->getZExtValue()])
+ return Vec;
+ break;
+ }
+ case ISD::VECTOR_SHUFFLE: {
+ ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(Op)->getMask();
+
+ // If all the demanded elts are from one operand and are inline,
+ // then we can use the operand directly.
+ bool AllUndef = true, IdentityLHS = true, IdentityRHS = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ int M = ShuffleMask[i];
+ if (M < 0 || !DemandedElts[i])
+ continue;
+ AllUndef = false;
+ IdentityLHS &= (M == (int)i);
+ IdentityRHS &= ((M - NumElts) == i);
+ }
+
+ if (AllUndef)
+ return DAG.getUNDEF(Op.getValueType());
+ if (IdentityLHS)
+ return Op.getOperand(0);
+ if (IdentityRHS)
+ return Op.getOperand(1);
+ break;
+ }
+ default:
+ if (Op.getOpcode() >= ISD::BUILTIN_OP_END)
+ if (SDValue V = SimplifyMultipleUseDemandedBitsForTargetNode(
+ Op, DemandedBits, DemandedElts, DAG, Depth))
+ return V;
+ break;
+ }
+ return SDValue();
+}
+
/// Look at Op. At this point, we know that only the OriginalDemandedBits of the
/// result of Op are ever used downstream. If we can use this information to
/// simplify Op, create a new simplified DAG node and return true, returning the
@@ -619,12 +798,15 @@ bool TargetLowering::SimplifyDemandedBits(
} else if (OriginalDemandedBits == 0 || OriginalDemandedElts == 0) {
// Not demanding any bits/elts from Op.
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
- } else if (Depth == 6) { // Limit search depth.
+ } else if (Depth >= SelectionDAG::MaxRecursionDepth) {
+ // Limit search depth.
return false;
}
KnownBits Known2, KnownOut;
switch (Op.getOpcode()) {
+ case ISD::TargetConstant:
+ llvm_unreachable("Can't simplify this node");
case ISD::SCALAR_TO_VECTOR: {
if (!DemandedElts[0])
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT));
@@ -728,6 +910,21 @@ bool TargetLowering::SimplifyDemandedBits(
}
break;
}
+ case ISD::EXTRACT_SUBVECTOR: {
+ // If index isn't constant, assume we need all the source vector elements.
+ SDValue Src = Op.getOperand(0);
+ ConstantSDNode *SubIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1));
+ unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
+ APInt SrcElts = APInt::getAllOnesValue(NumSrcElts);
+ if (SubIdx && SubIdx->getAPIntValue().ule(NumSrcElts - NumElts)) {
+ // Offset the demanded elts by the subvector index.
+ uint64_t Idx = SubIdx->getZExtValue();
+ SrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
+ }
+ if (SimplifyDemandedBits(Src, DemandedBits, SrcElts, Known, TLO, Depth + 1))
+ return true;
+ break;
+ }
case ISD::CONCAT_VECTORS: {
Known.Zero.setAllBits();
Known.One.setAllBits();
@@ -773,22 +970,37 @@ bool TargetLowering::SimplifyDemandedBits(
}
if (!!DemandedLHS || !!DemandedRHS) {
+ SDValue Op0 = Op.getOperand(0);
+ SDValue Op1 = Op.getOperand(1);
+
Known.Zero.setAllBits();
Known.One.setAllBits();
if (!!DemandedLHS) {
- if (SimplifyDemandedBits(Op.getOperand(0), DemandedBits, DemandedLHS,
- Known2, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op0, DemandedBits, DemandedLHS, Known2, TLO,
+ Depth + 1))
return true;
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
if (!!DemandedRHS) {
- if (SimplifyDemandedBits(Op.getOperand(1), DemandedBits, DemandedRHS,
- Known2, TLO, Depth + 1))
+ if (SimplifyDemandedBits(Op1, DemandedBits, DemandedRHS, Known2, TLO,
+ Depth + 1))
return true;
Known.One &= Known2.One;
Known.Zero &= Known2.Zero;
}
+
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedLHS, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedRHS, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getVectorShuffle(VT, dl, Op0, Op1, ShuffleMask);
+ return TLO.CombineTo(Op, NewOp);
+ }
}
break;
}
@@ -834,6 +1046,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if (DemandedBits.isSubsetOf(Known2.Zero | Known.One))
@@ -869,6 +1095,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if (DemandedBits.isSubsetOf(Known2.One | Known.Zero))
@@ -901,6 +1141,20 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
assert(!Known2.hasConflict() && "Bits known to be one AND zero?");
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!DemandedBits.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, DemandedBits, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if (DemandedBits.isSubsetOf(Known.Zero))
@@ -1034,7 +1288,7 @@ bool TargetLowering::SimplifyDemandedBits(
// out) are never demanded.
// TODO - support non-uniform vector amounts.
if (Op0.getOpcode() == ISD::SRL) {
- if ((DemandedBits & APInt::getLowBitsSet(BitWidth, ShAmt)) == 0) {
+ if (!DemandedBits.intersects(APInt::getLowBitsSet(BitWidth, ShAmt))) {
if (ConstantSDNode *SA2 =
isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
if (SA2->getAPIntValue().ult(BitWidth)) {
@@ -1141,7 +1395,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (Op0.getOpcode() == ISD::SHL) {
if (ConstantSDNode *SA2 =
isConstOrConstSplat(Op0.getOperand(1), DemandedElts)) {
- if ((DemandedBits & APInt::getHighBitsSet(BitWidth, ShAmt)) == 0) {
+ if (!DemandedBits.intersects(
+ APInt::getHighBitsSet(BitWidth, ShAmt))) {
if (SA2->getAPIntValue().ult(BitWidth)) {
unsigned C1 = SA2->getZExtValue();
unsigned Opc = ISD::SRL;
@@ -1479,6 +1734,11 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
Known = Known.trunc(BitWidth);
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (SDValue NewSrc = SimplifyMultipleUseDemandedBits(
+ Src, TruncMask, DemandedElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::TRUNCATE, dl, VT, NewSrc));
+
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Src.getNode()->hasOneUse()) {
@@ -1595,9 +1855,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Bitcast from a vector using SimplifyDemanded Bits/VectorElts.
// Demand the elt/bit if any of the original elts/bits are demanded.
// TODO - bigendian once we have test coverage.
- // TODO - bool vectors once SimplifyDemandedVectorElts has SETCC support.
- if (SrcVT.isVector() && NumSrcEltBits > 1 &&
- (BitWidth % NumSrcEltBits) == 0 &&
+ if (SrcVT.isVector() && (BitWidth % NumSrcEltBits) == 0 &&
TLO.DAG.getDataLayout().isLittleEndian()) {
unsigned Scale = BitWidth / NumSrcEltBits;
unsigned NumSrcElts = SrcVT.getVectorNumElements();
@@ -1663,6 +1921,7 @@ bool TargetLowering::SimplifyDemandedBits(
// Add, Sub, and Mul don't demand any bits in positions beyond that
// of the highest bit demanded of them.
SDValue Op0 = Op.getOperand(0), Op1 = Op.getOperand(1);
+ SDNodeFlags Flags = Op.getNode()->getFlags();
unsigned DemandedBitsLZ = DemandedBits.countLeadingZeros();
APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ);
if (SimplifyDemandedBits(Op0, LoMask, DemandedElts, Known2, TLO,
@@ -1671,7 +1930,6 @@ bool TargetLowering::SimplifyDemandedBits(
Depth + 1) ||
// See if the operation should be performed at a smaller bit width.
ShrinkDemandedOp(Op, BitWidth, DemandedBits, TLO)) {
- SDNodeFlags Flags = Op.getNode()->getFlags();
if (Flags.hasNoSignedWrap() || Flags.hasNoUnsignedWrap()) {
// Disable the nsw and nuw flags. We can no longer guarantee that we
// won't wrap after simplification.
@@ -1684,6 +1942,23 @@ bool TargetLowering::SimplifyDemandedBits(
return true;
}
+ // Attempt to avoid multi-use ops if we don't need anything from them.
+ if (!LoMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) {
+ SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits(
+ Op0, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits(
+ Op1, LoMask, DemandedElts, TLO.DAG, Depth + 1);
+ if (DemandedOp0 || DemandedOp1) {
+ Flags.setNoSignedWrap(false);
+ Flags.setNoUnsignedWrap(false);
+ Op0 = DemandedOp0 ? DemandedOp0 : Op0;
+ Op1 = DemandedOp1 ? DemandedOp1 : Op1;
+ SDValue NewOp =
+ TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Op1, Flags);
+ return TLO.CombineTo(Op, NewOp);
+ }
+ }
+
// If we have a constant operand, we may be able to turn it into -1 if we
// do not demand the high bits. This can make the constant smaller to
// encode, allow more general folding, or match specialized instruction
@@ -1694,10 +1969,8 @@ bool TargetLowering::SimplifyDemandedBits(
if (C && !C->isAllOnesValue() && !C->isOne() &&
(C->getAPIntValue() | HighMask).isAllOnesValue()) {
SDValue Neg1 = TLO.DAG.getAllOnesConstant(dl, VT);
- // We can't guarantee that the new math op doesn't wrap, so explicitly
- // clear those flags to prevent folding with a potential existing node
- // that has those flags set.
- SDNodeFlags Flags;
+ // Disable the nsw and nuw flags. We can no longer guarantee that we
+ // won't wrap after simplification.
Flags.setNoSignedWrap(false);
Flags.setNoUnsignedWrap(false);
SDValue NewOp = TLO.DAG.getNode(Op.getOpcode(), dl, VT, Op0, Neg1, Flags);
@@ -1837,7 +2110,7 @@ bool TargetLowering::SimplifyDemandedVectorElts(
}
// Limit search depth.
- if (Depth >= 6)
+ if (Depth >= SelectionDAG::MaxRecursionDepth)
return false;
SDLoc DL(Op);
@@ -2001,6 +2274,15 @@ bool TargetLowering::SimplifyDemandedVectorElts(
return true;
APInt BaseElts = DemandedElts;
BaseElts.insertBits(APInt::getNullValue(NumSubElts), SubIdx);
+
+ // If none of the base operand elements are demanded, replace it with undef.
+ if (!BaseElts && !Base.isUndef())
+ return TLO.CombineTo(Op,
+ TLO.DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
+ TLO.DAG.getUNDEF(VT),
+ Op.getOperand(1),
+ Op.getOperand(2)));
+
if (SimplifyDemandedVectorElts(Base, BaseElts, KnownUndef, KnownZero, TLO,
Depth + 1))
return true;
@@ -2134,11 +2416,13 @@ bool TargetLowering::SimplifyDemandedVectorElts(
// Update legal shuffle masks based on demanded elements if it won't reduce
// to Identity which can cause premature removal of the shuffle mask.
- if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps &&
- isShuffleMaskLegal(NewMask, VT))
- return TLO.CombineTo(Op,
- TLO.DAG.getVectorShuffle(VT, DL, Op.getOperand(0),
- Op.getOperand(1), NewMask));
+ if (Updated && !IdentityLHS && !IdentityRHS && !TLO.LegalOps) {
+ SDValue LegalShuffle =
+ buildLegalVectorShuffle(VT, DL, Op.getOperand(0), Op.getOperand(1),
+ NewMask, TLO.DAG);
+ if (LegalShuffle)
+ return TLO.CombineTo(Op, LegalShuffle);
+ }
// Propagate undef/zero elements from LHS/RHS.
for (unsigned i = 0; i != NumElts; ++i) {
@@ -2304,6 +2588,13 @@ void TargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
Known.resetAll();
}
+void TargetLowering::computeKnownBitsForTargetInstr(
+ GISelKnownBits &Analysis, Register R, KnownBits &Known,
+ const APInt &DemandedElts, const MachineRegisterInfo &MRI,
+ unsigned Depth) const {
+ Known.resetAll();
+}
+
void TargetLowering::computeKnownBitsForFrameIndex(const SDValue Op,
KnownBits &Known,
const APInt &DemandedElts,
@@ -2357,6 +2648,36 @@ bool TargetLowering::SimplifyDemandedBitsForTargetNode(
return false;
}
+SDValue TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ assert(
+ (Op.getOpcode() >= ISD::BUILTIN_OP_END ||
+ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ Op.getOpcode() == ISD::INTRINSIC_VOID) &&
+ "Should use SimplifyMultipleUseDemandedBits if you don't know whether Op"
+ " is a target node!");
+ return SDValue();
+}
+
+SDValue
+TargetLowering::buildLegalVectorShuffle(EVT VT, const SDLoc &DL, SDValue N0,
+ SDValue N1, MutableArrayRef<int> Mask,
+ SelectionDAG &DAG) const {
+ bool LegalMask = isShuffleMaskLegal(Mask, VT);
+ if (!LegalMask) {
+ std::swap(N0, N1);
+ ShuffleVectorSDNode::commuteMask(Mask);
+ LegalMask = isShuffleMaskLegal(Mask, VT);
+ }
+
+ if (!LegalMask)
+ return SDValue();
+
+ return DAG.getVectorShuffle(VT, DL, N0, N1, Mask);
+}
+
const Constant *TargetLowering::getTargetConstantFromLoad(LoadSDNode*) const {
return nullptr;
}
@@ -2610,6 +2931,77 @@ SDValue TargetLowering::optimizeSetCCOfSignedTruncationCheck(
return T2;
}
+// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL) const {
+ assert(isConstOrConstSplat(N1C) &&
+ isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() &&
+ "Should be a comparison with 0.");
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Valid only for [in]equality comparisons.");
+
+ unsigned NewShiftOpcode;
+ SDValue X, C, Y;
+
+ SelectionDAG &DAG = DCI.DAG;
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+
+ // Look for '(C l>>/<< Y)'.
+ auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) {
+ // The shift should be one-use.
+ if (!V.hasOneUse())
+ return false;
+ unsigned OldShiftOpcode = V.getOpcode();
+ switch (OldShiftOpcode) {
+ case ISD::SHL:
+ NewShiftOpcode = ISD::SRL;
+ break;
+ case ISD::SRL:
+ NewShiftOpcode = ISD::SHL;
+ break;
+ default:
+ return false; // must be a logical shift.
+ }
+ // We should be shifting a constant.
+ // FIXME: best to use isConstantOrConstantVector().
+ C = V.getOperand(0);
+ ConstantSDNode *CC =
+ isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ if (!CC)
+ return false;
+ Y = V.getOperand(1);
+
+ ConstantSDNode *XC =
+ isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true);
+ return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
+ X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG);
+ };
+
+ // LHS of comparison should be an one-use 'and'.
+ if (N0.getOpcode() != ISD::AND || !N0.hasOneUse())
+ return SDValue();
+
+ X = N0.getOperand(0);
+ SDValue Mask = N0.getOperand(1);
+
+ // 'and' is commutative!
+ if (!Match(Mask)) {
+ std::swap(X, Mask);
+ if (!Match(Mask))
+ return SDValue();
+ }
+
+ EVT VT = X.getValueType();
+
+ // Produce:
+ // ((X 'OppositeShiftOpcode' Y) & C) Cond 0
+ SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y);
+ SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C);
+ SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond);
+ return T2;
+}
+
/// Try to fold an equality comparison with a {add/sub/xor} binary operation as
/// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
/// handle the commuted versions of these patterns.
@@ -2726,9 +3118,9 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// (ctpop x) u< 2 -> (x & x-1) == 0
// (ctpop x) u> 1 -> (x & x-1) != 0
if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
- SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
- DAG.getConstant(1, dl, CTVT));
- SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
+ SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, dl, CTVT), CC);
}
@@ -2852,7 +3244,7 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
LoadSDNode *Lod = cast<LoadSDNode>(N0.getOperand(0));
APInt bestMask;
unsigned bestWidth = 0, bestOffset = 0;
- if (!Lod->isVolatile() && Lod->isUnindexed()) {
+ if (Lod->isSimple() && Lod->isUnindexed()) {
unsigned origWidth = N0.getValueSizeInBits();
unsigned maskWidth = origWidth;
// We can narrow (e.g.) 16-bit extending loads on 32-bit target to
@@ -3178,6 +3570,14 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ if (Cond == ISD::SETEQ || Cond == ISD::SETNE) {
+ // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <</l>> Y) & C) ==/!= 0
+ if (C1.isNullValue())
+ if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift(
+ VT, N0, N1, Cond, DCI, dl))
+ return CC;
+ }
+
// If we have "setcc X, C0", check to see if we can shrink the immediate
// by changing cc.
// TODO: Support this for vectors after legalize ops.
@@ -3203,33 +3603,35 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// Back to non-vector simplifications.
// TODO: Can we do these for vector splats?
if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) {
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
const APInt &C1 = N1C->getAPIntValue();
+ EVT ShValTy = N0.getValueType();
// Fold bit comparisons when we can.
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
- (VT == N0.getValueType() ||
- (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) &&
+ (VT == ShValTy || (isTypeLegal(VT) && VT.bitsLE(ShValTy))) &&
N0.getOpcode() == ISD::AND) {
auto &DL = DAG.getDataLayout();
if (auto *AndRHS = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
- EVT ShiftTy = getShiftAmountTy(N0.getValueType(), DL,
- !DCI.isBeforeLegalize());
+ EVT ShiftTy = getShiftAmountTy(ShValTy, DL, !DCI.isBeforeLegalize());
if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3
// Perform the xform if the AND RHS is a single bit.
- if (AndRHS->getAPIntValue().isPowerOf2()) {
+ unsigned ShCt = AndRHS->getAPIntValue().logBase2();
+ if (AndRHS->getAPIntValue().isPowerOf2() &&
+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
- DAG.getConstant(AndRHS->getAPIntValue().logBase2(), dl,
- ShiftTy)));
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
}
} else if (Cond == ISD::SETEQ && C1 == AndRHS->getAPIntValue()) {
// (X & 8) == 8 --> (X & 8) >> 3
// Perform the xform if C1 is a single bit.
- if (C1.isPowerOf2()) {
+ unsigned ShCt = C1.logBase2();
+ if (C1.isPowerOf2() &&
+ ShCt <= TLI.getShiftAmountThreshold(ShValTy)) {
return DAG.getNode(ISD::TRUNCATE, dl, VT,
- DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0,
- DAG.getConstant(C1.logBase2(), dl,
- ShiftTy)));
+ DAG.getNode(ISD::SRL, dl, ShValTy, N0,
+ DAG.getConstant(ShCt, dl, ShiftTy)));
}
}
}
@@ -3452,15 +3854,21 @@ SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
// Fold remainder of division by a constant.
- if (N0.getOpcode() == ISD::UREM && N0.hasOneUse() &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ if ((N0.getOpcode() == ISD::UREM || N0.getOpcode() == ISD::SREM) &&
+ N0.hasOneUse() && (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes();
// When division is cheap or optimizing for minimum size,
// fall through to DIVREM creation by skipping this fold.
- if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize))
- if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
- return Folded;
+ if (!isIntDivCheap(VT, Attr) && !Attr.hasFnAttribute(Attribute::MinSize)) {
+ if (N0.getOpcode() == ISD::UREM) {
+ if (SDValue Folded = buildUREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ } else if (N0.getOpcode() == ISD::SREM) {
+ if (SDValue Folded = buildSREMEqFold(VT, N0, N1, Cond, DCI, dl))
+ return Folded;
+ }
+ }
}
// Fold away ALL boolean setcc's.
@@ -3567,15 +3975,17 @@ TargetLowering::getConstraintType(StringRef Constraint) const {
if (S == 1) {
switch (Constraint[0]) {
default: break;
- case 'r': return C_RegisterClass;
+ case 'r':
+ return C_RegisterClass;
case 'm': // memory
case 'o': // offsetable
case 'V': // not offsetable
return C_Memory;
- case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
case 'E': // Floating Point Constant
case 'F': // Floating Point Constant
+ return C_Immediate;
+ case 'i': // Simple Integer or Relocatable Constant
case 's': // Relocatable Constant
case 'p': // Address.
case 'X': // Allow ANY value.
@@ -3950,6 +4360,7 @@ TargetLowering::ParseConstraints(const DataLayout &DL,
/// Return an integer indicating how general CT is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
switch (CT) {
+ case TargetLowering::C_Immediate:
case TargetLowering::C_Other:
case TargetLowering::C_Unknown:
return 0;
@@ -4069,11 +4480,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
TargetLowering::ConstraintType CType =
TLI.getConstraintType(OpInfo.Codes[i]);
- // If this is an 'other' constraint, see if the operand is valid for it.
- // For example, on X86 we might have an 'rI' constraint. If the operand
- // is an integer in the range [0..31] we want to use I (saving a load
- // of a register), otherwise we must use 'r'.
- if (CType == TargetLowering::C_Other && Op.getNode()) {
+ // If this is an 'other' or 'immediate' constraint, see if the operand is
+ // valid for it. For example, on X86 we might have an 'rI' constraint. If
+ // the operand is an integer in the range [0..31] we want to use I (saving a
+ // load of a register), otherwise we must use 'r'.
+ if ((CType == TargetLowering::C_Other ||
+ CType == TargetLowering::C_Immediate) && Op.getNode()) {
assert(OpInfo.Codes[i].size() == 1 &&
"Unhandled multi-letter 'other' constraint");
std::vector<SDValue> ResultOps;
@@ -4455,6 +4867,34 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
return DAG.getSelect(dl, VT, IsOne, N0, Q);
}
+/// If all values in Values that *don't* match the predicate are same 'splat'
+/// value, then replace all values with that splat value.
+/// Else, if AlternativeReplacement was provided, then replace all values that
+/// do match predicate with AlternativeReplacement value.
+static void
+turnVectorIntoSplatVector(MutableArrayRef<SDValue> Values,
+ std::function<bool(SDValue)> Predicate,
+ SDValue AlternativeReplacement = SDValue()) {
+ SDValue Replacement;
+ // Is there a value for which the Predicate does *NOT* match? What is it?
+ auto SplatValue = llvm::find_if_not(Values, Predicate);
+ if (SplatValue != Values.end()) {
+ // Does Values consist only of SplatValue's and values matching Predicate?
+ if (llvm::all_of(Values, [Predicate, SplatValue](SDValue Value) {
+ return Value == *SplatValue || Predicate(Value);
+ })) // Then we shall replace values matching predicate with SplatValue.
+ Replacement = *SplatValue;
+ }
+ if (!Replacement) {
+ // Oops, we did not find the "baseline" splat value.
+ if (!AlternativeReplacement)
+ return; // Nothing to do.
+ // Let's replace with provided value then.
+ Replacement = AlternativeReplacement;
+ }
+ std::replace_if(Values.begin(), Values.end(), Predicate, Replacement);
+}
+
/// Given an ISD::UREM used only by an ISD::SETEQ or ISD::SETNE
/// where the divisor is constant and the comparison target is zero,
/// return a DAG expression that will generate the same comparison result
@@ -4482,77 +4922,409 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
DAGCombinerInfo &DCI, const SDLoc &DL,
SmallVectorImpl<SDNode *> &Created) const {
// fold (seteq/ne (urem N, D), 0) -> (setule/ugt (rotr (mul N, P), K), Q)
- // - D must be constant with D = D0 * 2^K where D0 is odd and D0 != 1
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
// - P is the multiplicative inverse of D0 modulo 2^W
- // - Q = floor((2^W - 1) / D0)
+ // - Q = floor(((2^W) - 1) / D)
// where W is the width of the common type of N and D.
assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
"Only applicable for (in)equality comparisons.");
+ SelectionDAG &DAG = DCI.DAG;
+
EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
// If MUL is unavailable, we cannot proceed in any case.
if (!isOperationLegalOrCustom(ISD::MUL, VT))
return SDValue();
- // TODO: Add non-uniform constant support.
- ConstantSDNode *Divisor = isConstOrConstSplat(REMNode->getOperand(1));
+ // TODO: Could support comparing with non-zero too.
ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
- if (!Divisor || !CompTarget || Divisor->isNullValue() ||
- !CompTarget->isNullValue())
+ if (!CompTarget || !CompTarget->isNullValue())
return SDValue();
- const APInt &D = Divisor->getAPIntValue();
+ bool HadOneDivisor = false;
+ bool AllDivisorsAreOnes = true;
+ bool HadEvenDivisor = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ SmallVector<SDValue, 16> PAmts, KAmts, QAmts;
+
+ auto BuildUREMPattern = [&](ConstantSDNode *C) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (C->isNullValue())
+ return false;
+
+ const APInt &D = C->getAPIntValue();
+ // If all divisors are ones, we will prefer to avoid the fold.
+ HadOneDivisor |= D.isOneValue();
+ AllDivisorsAreOnes &= D.isOneValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ // D is even if it has trailing zeros.
+ HadEvenDivisor |= (K != 0);
+ // D is a power-of-two if D0 is one.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOneValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // Q = floor((2^W - 1) / D)
+ APInt Q = APInt::getAllOnesValue(W).udiv(D);
+
+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the divisor is 1 the result can be constant-folded.
+ if (D.isOneValue()) {
+ // Set P and K amount to a bogus values so we can try to splat them.
+ P = 0;
+ K = -1;
+ assert(Q.isAllOnesValue() &&
+ "Expecting all-ones comparison for one divisor");
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
- // Decompose D into D0 * 2^K
- unsigned K = D.countTrailingZeros();
- bool DivisorIsEven = (K != 0);
- APInt D0 = D.lshr(K);
+ // Collect the values from each element.
+ if (!ISD::matchUnaryPredicate(D, BuildUREMPattern))
+ return SDValue();
- // The fold is invalid when D0 == 1.
- // This is reachable because visitSetCC happens before visitREM.
- if (D0.isOneValue())
+ // If this is a urem by a one, avoid the fold since it can be constant-folded.
+ if (AllDivisorsAreOnes)
return SDValue();
- // P = inv(D0, 2^W)
- // 2^W requires W + 1 bits, so we have to extend and then truncate.
- unsigned W = D.getBitWidth();
- APInt P = D0.zext(W + 1)
- .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
- .trunc(W);
- assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
- assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+ // If this is a urem by a powers-of-two, avoid the fold since it can be
+ // best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
- // Q = floor((2^W - 1) / D)
- APInt Q = APInt::getAllOnesValue(W).udiv(D);
+ SDValue PVal, KVal, QVal;
+ if (VT.isVector()) {
+ if (HadOneDivisor) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
- SelectionDAG &DAG = DCI.DAG;
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else {
+ PVal = PAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
- SDValue PVal = DAG.getConstant(P, DL, VT);
- SDValue QVal = DAG.getConstant(Q, DL, VT);
// (mul N, P)
- SDValue Op1 = DAG.getNode(ISD::MUL, DL, VT, REMNode->getOperand(0), PVal);
- Created.push_back(Op1.getNode());
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
- // Rotate right only if D was even.
- if (DivisorIsEven) {
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
// We need ROTR to do this.
if (!isOperationLegalOrCustom(ISD::ROTR, VT))
return SDValue();
- SDValue ShAmt =
- DAG.getConstant(K, DL, getShiftAmountTy(VT, DAG.getDataLayout()));
SDNodeFlags Flags;
Flags.setExact(true);
// UREM: (rotr (mul N, P), K)
- Op1 = DAG.getNode(ISD::ROTR, DL, VT, Op1, ShAmt, Flags);
- Created.push_back(Op1.getNode());
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Created.push_back(Op0.getNode());
}
// UREM: (setule/setugt (rotr (mul N, P), K), Q)
- return DAG.getSetCC(DL, SETCCVT, Op1, QVal,
+ return DAG.getSetCC(DL, SETCCVT, Op0, QVal,
((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
}
+/// Given an ISD::SREM used only by an ISD::SETEQ or ISD::SETNE
+/// where the divisor is constant and the comparison target is zero,
+/// return a DAG expression that will generate the same comparison result
+/// using only multiplications, additions and shifts/rotations.
+/// Ref: "Hacker's Delight" 10-17.
+SDValue TargetLowering::buildSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode,
+ ISD::CondCode Cond,
+ DAGCombinerInfo &DCI,
+ const SDLoc &DL) const {
+ SmallVector<SDNode *, 7> Built;
+ if (SDValue Folded = prepareSREMEqFold(SETCCVT, REMNode, CompTargetNode, Cond,
+ DCI, DL, Built)) {
+ assert(Built.size() <= 7 && "Max size prediction failed.");
+ for (SDNode *N : Built)
+ DCI.AddToWorklist(N);
+ return Folded;
+ }
+
+ return SDValue();
+}
+
+SDValue
+TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode,
+ SDValue CompTargetNode, ISD::CondCode Cond,
+ DAGCombinerInfo &DCI, const SDLoc &DL,
+ SmallVectorImpl<SDNode *> &Created) const {
+ // Fold:
+ // (seteq/ne (srem N, D), 0)
+ // To:
+ // (setule/ugt (rotr (add (mul N, P), A), K), Q)
+ //
+ // - D must be constant, with D = D0 * 2^K where D0 is odd
+ // - P is the multiplicative inverse of D0 modulo 2^W
+ // - A = bitwiseand(floor((2^(W - 1) - 1) / D0), (-(2^k)))
+ // - Q = floor((2 * A) / (2^K))
+ // where W is the width of the common type of N and D.
+ assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) &&
+ "Only applicable for (in)equality comparisons.");
+
+ SelectionDAG &DAG = DCI.DAG;
+
+ EVT VT = REMNode.getValueType();
+ EVT SVT = VT.getScalarType();
+ EVT ShVT = getShiftAmountTy(VT, DAG.getDataLayout());
+ EVT ShSVT = ShVT.getScalarType();
+
+ // If MUL is unavailable, we cannot proceed in any case.
+ if (!isOperationLegalOrCustom(ISD::MUL, VT))
+ return SDValue();
+
+ // TODO: Could support comparing with non-zero too.
+ ConstantSDNode *CompTarget = isConstOrConstSplat(CompTargetNode);
+ if (!CompTarget || !CompTarget->isNullValue())
+ return SDValue();
+
+ bool HadIntMinDivisor = false;
+ bool HadOneDivisor = false;
+ bool AllDivisorsAreOnes = true;
+ bool HadEvenDivisor = false;
+ bool NeedToApplyOffset = false;
+ bool AllDivisorsArePowerOfTwo = true;
+ SmallVector<SDValue, 16> PAmts, AAmts, KAmts, QAmts;
+
+ auto BuildSREMPattern = [&](ConstantSDNode *C) {
+ // Division by 0 is UB. Leave it to be constant-folded elsewhere.
+ if (C->isNullValue())
+ return false;
+
+ // FIXME: we don't fold `rem %X, -C` to `rem %X, C` in DAGCombine.
+
+ // WARNING: this fold is only valid for positive divisors!
+ APInt D = C->getAPIntValue();
+ if (D.isNegative())
+ D.negate(); // `rem %X, -C` is equivalent to `rem %X, C`
+
+ HadIntMinDivisor |= D.isMinSignedValue();
+
+ // If all divisors are ones, we will prefer to avoid the fold.
+ HadOneDivisor |= D.isOneValue();
+ AllDivisorsAreOnes &= D.isOneValue();
+
+ // Decompose D into D0 * 2^K
+ unsigned K = D.countTrailingZeros();
+ assert((!D.isOneValue() || (K == 0)) && "For divisor '1' we won't rotate.");
+ APInt D0 = D.lshr(K);
+
+ if (!D.isMinSignedValue()) {
+ // D is even if it has trailing zeros; unless it's INT_MIN, in which case
+ // we don't care about this lane in this fold, we'll special-handle it.
+ HadEvenDivisor |= (K != 0);
+ }
+
+ // D is a power-of-two if D0 is one. This includes INT_MIN.
+ // If all divisors are power-of-two, we will prefer to avoid the fold.
+ AllDivisorsArePowerOfTwo &= D0.isOneValue();
+
+ // P = inv(D0, 2^W)
+ // 2^W requires W + 1 bits, so we have to extend and then truncate.
+ unsigned W = D.getBitWidth();
+ APInt P = D0.zext(W + 1)
+ .multiplicativeInverse(APInt::getSignedMinValue(W + 1))
+ .trunc(W);
+ assert(!P.isNullValue() && "No multiplicative inverse!"); // unreachable
+ assert((D0 * P).isOneValue() && "Multiplicative inverse sanity check.");
+
+ // A = floor((2^(W - 1) - 1) / D0) & -2^K
+ APInt A = APInt::getSignedMaxValue(W).udiv(D0);
+ A.clearLowBits(K);
+
+ if (!D.isMinSignedValue()) {
+ // If divisor INT_MIN, then we don't care about this lane in this fold,
+ // we'll special-handle it.
+ NeedToApplyOffset |= A != 0;
+ }
+
+ // Q = floor((2 * A) / (2^K))
+ APInt Q = (2 * A).udiv(APInt::getOneBitSet(W, K));
+
+ assert(APInt::getAllOnesValue(SVT.getSizeInBits()).ugt(A) &&
+ "We are expecting that A is always less than all-ones for SVT");
+ assert(APInt::getAllOnesValue(ShSVT.getSizeInBits()).ugt(K) &&
+ "We are expecting that K is always less than all-ones for ShSVT");
+
+ // If the divisor is 1 the result can be constant-folded. Likewise, we
+ // don't care about INT_MIN lanes, those can be set to undef if appropriate.
+ if (D.isOneValue()) {
+ // Set P, A and K to a bogus values so we can try to splat them.
+ P = 0;
+ A = -1;
+ K = -1;
+
+ // x ?% 1 == 0 <--> true <--> x u<= -1
+ Q = -1;
+ }
+
+ PAmts.push_back(DAG.getConstant(P, DL, SVT));
+ AAmts.push_back(DAG.getConstant(A, DL, SVT));
+ KAmts.push_back(
+ DAG.getConstant(APInt(ShSVT.getSizeInBits(), K), DL, ShSVT));
+ QAmts.push_back(DAG.getConstant(Q, DL, SVT));
+ return true;
+ };
+
+ SDValue N = REMNode.getOperand(0);
+ SDValue D = REMNode.getOperand(1);
+
+ // Collect the values from each element.
+ if (!ISD::matchUnaryPredicate(D, BuildSREMPattern))
+ return SDValue();
+
+ // If this is a srem by a one, avoid the fold since it can be constant-folded.
+ if (AllDivisorsAreOnes)
+ return SDValue();
+
+ // If this is a srem by a powers-of-two (including INT_MIN), avoid the fold
+ // since it can be best implemented as a bit test.
+ if (AllDivisorsArePowerOfTwo)
+ return SDValue();
+
+ SDValue PVal, AVal, KVal, QVal;
+ if (VT.isVector()) {
+ if (HadOneDivisor) {
+ // Try to turn PAmts into a splat, since we don't care about the values
+ // that are currently '0'. If we can't, just keep '0'`s.
+ turnVectorIntoSplatVector(PAmts, isNullConstant);
+ // Try to turn AAmts into a splat, since we don't care about the
+ // values that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(AAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, SVT));
+ // Try to turn KAmts into a splat, since we don't care about the values
+ // that are currently '-1'. If we can't, change them to '0'`s.
+ turnVectorIntoSplatVector(KAmts, isAllOnesConstant,
+ DAG.getConstant(0, DL, ShSVT));
+ }
+
+ PVal = DAG.getBuildVector(VT, DL, PAmts);
+ AVal = DAG.getBuildVector(VT, DL, AAmts);
+ KVal = DAG.getBuildVector(ShVT, DL, KAmts);
+ QVal = DAG.getBuildVector(VT, DL, QAmts);
+ } else {
+ PVal = PAmts[0];
+ AVal = AAmts[0];
+ KVal = KAmts[0];
+ QVal = QAmts[0];
+ }
+
+ // (mul N, P)
+ SDValue Op0 = DAG.getNode(ISD::MUL, DL, VT, N, PVal);
+ Created.push_back(Op0.getNode());
+
+ if (NeedToApplyOffset) {
+ // We need ADD to do this.
+ if (!isOperationLegalOrCustom(ISD::ADD, VT))
+ return SDValue();
+
+ // (add (mul N, P), A)
+ Op0 = DAG.getNode(ISD::ADD, DL, VT, Op0, AVal);
+ Created.push_back(Op0.getNode());
+ }
+
+ // Rotate right only if any divisor was even. We avoid rotates for all-odd
+ // divisors as a performance improvement, since rotating by 0 is a no-op.
+ if (HadEvenDivisor) {
+ // We need ROTR to do this.
+ if (!isOperationLegalOrCustom(ISD::ROTR, VT))
+ return SDValue();
+ SDNodeFlags Flags;
+ Flags.setExact(true);
+ // SREM: (rotr (add (mul N, P), A), K)
+ Op0 = DAG.getNode(ISD::ROTR, DL, VT, Op0, KVal, Flags);
+ Created.push_back(Op0.getNode());
+ }
+
+ // SREM: (setule/setugt (rotr (add (mul N, P), A), K), Q)
+ SDValue Fold =
+ DAG.getSetCC(DL, SETCCVT, Op0, QVal,
+ ((Cond == ISD::SETEQ) ? ISD::SETULE : ISD::SETUGT));
+
+ // If we didn't have lanes with INT_MIN divisor, then we're done.
+ if (!HadIntMinDivisor)
+ return Fold;
+
+ // That fold is only valid for positive divisors. Which effectively means,
+ // it is invalid for INT_MIN divisors. So if we have such a lane,
+ // we must fix-up results for said lanes.
+ assert(VT.isVector() && "Can/should only get here for vectors.");
+
+ if (!isOperationLegalOrCustom(ISD::SETEQ, VT) ||
+ !isOperationLegalOrCustom(ISD::AND, VT) ||
+ !isOperationLegalOrCustom(Cond, VT) ||
+ !isOperationLegalOrCustom(ISD::VSELECT, VT))
+ return SDValue();
+
+ Created.push_back(Fold.getNode());
+
+ SDValue IntMin = DAG.getConstant(
+ APInt::getSignedMinValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue IntMax = DAG.getConstant(
+ APInt::getSignedMaxValue(SVT.getScalarSizeInBits()), DL, VT);
+ SDValue Zero =
+ DAG.getConstant(APInt::getNullValue(SVT.getScalarSizeInBits()), DL, VT);
+
+ // Which lanes had INT_MIN divisors? Divisor is constant, so const-folded.
+ SDValue DivisorIsIntMin = DAG.getSetCC(DL, SETCCVT, D, IntMin, ISD::SETEQ);
+ Created.push_back(DivisorIsIntMin.getNode());
+
+ // (N s% INT_MIN) ==/!= 0 <--> (N & INT_MAX) ==/!= 0
+ SDValue Masked = DAG.getNode(ISD::AND, DL, VT, N, IntMax);
+ Created.push_back(Masked.getNode());
+ SDValue MaskedIsZero = DAG.getSetCC(DL, SETCCVT, Masked, Zero, Cond);
+ Created.push_back(MaskedIsZero.getNode());
+
+ // To produce final result we need to blend 2 vectors: 'SetCC' and
+ // 'MaskedIsZero'. If the divisor for channel was *NOT* INT_MIN, we pick
+ // from 'Fold', else pick from 'MaskedIsZero'. Since 'DivisorIsIntMin' is
+ // constant-folded, select can get lowered to a shuffle with constant mask.
+ SDValue Blended =
+ DAG.getNode(ISD::VSELECT, DL, VT, DivisorIsIntMin, MaskedIsZero, Fold);
+
+ return Blended;
+}
+
bool TargetLowering::
verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
if (!isa<ConstantSDNode>(Op.getOperand(0))) {
@@ -4564,6 +5336,246 @@ verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const {
return false;
}
+char TargetLowering::isNegatibleForFree(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations, bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG)
+ return 2;
+
+ // Don't allow anything with multiple uses unless we know it is free.
+ EVT VT = Op.getValueType();
+ const SDNodeFlags Flags = Op->getFlags();
+ const TargetOptions &Options = DAG.getTarget().Options;
+ if (!Op.hasOneUse() && !(Op.getOpcode() == ISD::FP_EXTEND &&
+ isFPExtFree(VT, Op.getOperand(0).getValueType())))
+ return 0;
+
+ // Don't recurse exponentially.
+ if (Depth > SelectionDAG::MaxRecursionDepth)
+ return 0;
+
+ switch (Op.getOpcode()) {
+ case ISD::ConstantFP: {
+ if (!LegalOperations)
+ return 1;
+
+ // Don't invert constant FP values after legalization unless the target says
+ // the negated constant is legal.
+ return isOperationLegal(ISD::ConstantFP, VT) ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(Op)->getValueAPF()), VT,
+ ForCodeSize);
+ }
+ case ISD::BUILD_VECTOR: {
+ // Only permit BUILD_VECTOR of constants.
+ if (llvm::any_of(Op->op_values(), [&](SDValue N) {
+ return !N.isUndef() && !isa<ConstantFPSDNode>(N);
+ }))
+ return 0;
+ if (!LegalOperations)
+ return 1;
+ if (isOperationLegal(ISD::ConstantFP, VT) &&
+ isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return 1;
+ return llvm::all_of(Op->op_values(), [&](SDValue N) {
+ return N.isUndef() ||
+ isFPImmLegal(neg(cast<ConstantFPSDNode>(N)->getValueAPF()), VT,
+ ForCodeSize);
+ });
+ }
+ case ISD::FADD:
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // After operation legalization, it might not be legal to create new FSUBs.
+ if (LegalOperations && !isOperationLegalOrCustom(ISD::FSUB, VT))
+ return 0;
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1))
+ return V;
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ case ISD::FSUB:
+ // We can't turn -(A-B) into B-A when we honor signed zeros.
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return 1;
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
+ if (char V = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1))
+ return V;
+
+ // Ignore X * 2.0 because that is expected to be canonicalized to X + X.
+ if (auto *C = isConstOrConstSplatFP(Op.getOperand(1)))
+ if (C->isExactlyValue(2.0) && Op.getOpcode() == ISD::FMUL)
+ return 0;
+
+ return isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+
+ case ISD::FMA:
+ case ISD::FMAD: {
+ if (!Options.NoSignedZerosFPMath && !Flags.hasNoSignedZeros())
+ return 0;
+
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ char V2 = isNegatibleForFree(Op.getOperand(2), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (!V2)
+ return 0;
+
+ // One of Op0/Op1 must be cheaply negatible, then select the cheapest.
+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V01 = std::max(V0, V1);
+ return V01 ? std::max(V01, V2) : 0;
+ }
+
+ case ISD::FP_EXTEND:
+ case ISD::FP_ROUND:
+ case ISD::FSIN:
+ return isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ }
+
+ return 0;
+}
+
+SDValue TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG,
+ bool LegalOperations,
+ bool ForCodeSize,
+ unsigned Depth) const {
+ // fneg is removable even if it has multiple uses.
+ if (Op.getOpcode() == ISD::FNEG)
+ return Op.getOperand(0);
+
+ assert(Depth <= SelectionDAG::MaxRecursionDepth &&
+ "getNegatedExpression doesn't match isNegatibleForFree");
+ const SDNodeFlags Flags = Op->getFlags();
+
+ switch (Op.getOpcode()) {
+ case ISD::ConstantFP: {
+ APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
+ V.changeSign();
+ return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
+ }
+ case ISD::BUILD_VECTOR: {
+ SmallVector<SDValue, 4> Ops;
+ for (SDValue C : Op->op_values()) {
+ if (C.isUndef()) {
+ Ops.push_back(C);
+ continue;
+ }
+ APFloat V = cast<ConstantFPSDNode>(C)->getValueAPF();
+ V.changeSign();
+ Ops.push_back(DAG.getConstantFP(V, SDLoc(Op), C.getValueType()));
+ }
+ return DAG.getBuildVector(Op.getValueType(), SDLoc(Op), Ops);
+ }
+ case ISD::FADD:
+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) &&
+ "Expected NSZ fp-flag");
+
+ // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+ Depth + 1))
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1), Flags);
+ // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(1), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(0), Flags);
+ case ISD::FSUB:
+ // fold (fneg (fsub 0, B)) -> B
+ if (ConstantFPSDNode *N0CFP =
+ isConstOrConstSplatFP(Op.getOperand(0), /*AllowUndefs*/ true))
+ if (N0CFP->isZero())
+ return Op.getOperand(1);
+
+ // fold (fneg (fsub A, B)) -> (fsub B, A)
+ return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
+ Op.getOperand(1), Op.getOperand(0), Flags);
+
+ case ISD::FMUL:
+ case ISD::FDIV:
+ // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
+ if (isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations, ForCodeSize,
+ Depth + 1))
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1), Flags);
+
+ // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
+ return DAG.getNode(
+ Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0),
+ getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1),
+ Flags);
+
+ case ISD::FMA:
+ case ISD::FMAD: {
+ assert((DAG.getTarget().Options.NoSignedZerosFPMath ||
+ Flags.hasNoSignedZeros()) &&
+ "Expected NSZ fp-flag");
+
+ SDValue Neg2 = getNegatedExpression(Op.getOperand(2), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+
+ char V0 = isNegatibleForFree(Op.getOperand(0), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ char V1 = isNegatibleForFree(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ if (V0 >= V1) {
+ // fold (fneg (fma X, Y, Z)) -> (fma (fneg X), Y, (fneg Z))
+ SDValue Neg0 = getNegatedExpression(
+ Op.getOperand(0), DAG, LegalOperations, ForCodeSize, Depth + 1);
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Neg0,
+ Op.getOperand(1), Neg2, Flags);
+ }
+
+ // fold (fneg (fma X, Y, Z)) -> (fma X, (fneg Y), (fneg Z))
+ SDValue Neg1 = getNegatedExpression(Op.getOperand(1), DAG, LegalOperations,
+ ForCodeSize, Depth + 1);
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ Op.getOperand(0), Neg1, Neg2, Flags);
+ }
+
+ case ISD::FP_EXTEND:
+ case ISD::FSIN:
+ return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1));
+ case ISD::FP_ROUND:
+ return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
+ getNegatedExpression(Op.getOperand(0), DAG,
+ LegalOperations, ForCodeSize,
+ Depth + 1),
+ Op.getOperand(1));
+ }
+
+ llvm_unreachable("Unknown code");
+}
+
//===----------------------------------------------------------------------===//
// Legalization Utilities
//===----------------------------------------------------------------------===//
@@ -4862,7 +5874,8 @@ bool TargetLowering::expandROT(SDNode *Node, SDValue &Result,
bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
SelectionDAG &DAG) const {
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
SDLoc dl(SDValue(Node, 0));
@@ -4871,6 +5884,13 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
if (SrcVT != MVT::f32 || DstVT != MVT::i64)
return false;
+ if (Node->isStrictFPOpcode())
+ // When a NaN is converted to an integer a trap is allowed. We can't
+ // use this expansion here because it would eliminate that trap. Other
+ // traps are also allowed and cannot be eliminated. See
+ // IEEE 754-2008 sec 5.8.
+ return false;
+
// Expand f32 -> i64 conversion
// This algorithm comes from compiler-rt's implementation of fixsfdi:
// https://github.com/llvm/llvm-project/blob/master/compiler-rt/lib/builtins/fixsfdi.c
@@ -4924,9 +5944,11 @@ bool TargetLowering::expandFP_TO_SINT(SDNode *Node, SDValue &Result,
}
bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
+ SDValue &Chain,
SelectionDAG &DAG) const {
SDLoc dl(SDValue(Node, 0));
- SDValue Src = Node->getOperand(0);
+ unsigned OpNo = Node->isStrictFPOpcode() ? 1 : 0;
+ SDValue Src = Node->getOperand(OpNo);
EVT SrcVT = Src.getValueType();
EVT DstVT = Node->getValueType(0);
@@ -4934,7 +5956,9 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
// Only expand vector types if we have the appropriate vector bit operations.
- if (DstVT.isVector() && (!isOperationLegalOrCustom(ISD::FP_TO_SINT, DstVT) ||
+ unsigned SIntOpcode = Node->isStrictFPOpcode() ? ISD::STRICT_FP_TO_SINT :
+ ISD::FP_TO_SINT;
+ if (DstVT.isVector() && (!isOperationLegalOrCustom(SIntOpcode, DstVT) ||
!isOperationLegalOrCustomOrPromote(ISD::XOR, SrcVT)))
return false;
@@ -4946,14 +5970,21 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
APInt SignMask = APInt::getSignMask(DstVT.getScalarSizeInBits());
if (APFloat::opOverflow &
APF.convertFromAPInt(SignMask, false, APFloat::rmNearestTiesToEven)) {
- Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
+ if (Node->isStrictFPOpcode()) {
+ Result = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { Node->getOperand(0), Src });
+ Chain = Result.getValue(1);
+ } else
+ Result = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Src);
return true;
}
SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT);
- bool Strict = shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+ bool Strict = Node->isStrictFPOpcode() ||
+ shouldUseStrictFP_TO_INT(SrcVT, DstVT, /*IsSigned*/ false);
+
if (Strict) {
// Expand based on maximum range of FP_TO_SINT, if the value exceeds the
// signmask then offset (the result of which should be fully representable).
@@ -4963,12 +5994,23 @@ bool TargetLowering::expandFP_TO_UINT(SDNode *Node, SDValue &Result,
// Result = fp_to_sint(Val) ^ Ofs
// TODO: Should any fast-math-flags be set for the FSUB?
- SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src,
- DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst));
+ SDValue SrcBiased;
+ if (Node->isStrictFPOpcode())
+ SrcBiased = DAG.getNode(ISD::STRICT_FSUB, dl, { SrcVT, MVT::Other },
+ { Node->getOperand(0), Src, Cst });
+ else
+ SrcBiased = DAG.getNode(ISD::FSUB, dl, SrcVT, Src, Cst);
+ SDValue Val = DAG.getSelect(dl, SrcVT, Sel, Src, SrcBiased);
SDValue Ofs = DAG.getSelect(dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT),
DAG.getConstant(SignMask, dl, DstVT));
- Result = DAG.getNode(ISD::XOR, dl, DstVT,
- DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val), Ofs);
+ SDValue SInt;
+ if (Node->isStrictFPOpcode()) {
+ SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { DstVT, MVT::Other },
+ { SrcBiased.getValue(1), Val });
+ Chain = SInt.getValue(1);
+ } else
+ SInt = DAG.getNode(ISD::FP_TO_SINT, dl, DstVT, Val);
+ Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, Ofs);
} else {
// Expand based on maximum range of FP_TO_SINT:
// True = fp_to_sint(Src)
@@ -5918,7 +6960,8 @@ SDValue
TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
assert((Node->getOpcode() == ISD::SMULFIX ||
Node->getOpcode() == ISD::UMULFIX ||
- Node->getOpcode() == ISD::SMULFIXSAT) &&
+ Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT) &&
"Expected a fixed point multiplication opcode");
SDLoc dl(Node);
@@ -5926,15 +6969,19 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
SDValue RHS = Node->getOperand(1);
EVT VT = LHS.getValueType();
unsigned Scale = Node->getConstantOperandVal(2);
- bool Saturating = Node->getOpcode() == ISD::SMULFIXSAT;
+ bool Saturating = (Node->getOpcode() == ISD::SMULFIXSAT ||
+ Node->getOpcode() == ISD::UMULFIXSAT);
+ bool Signed = (Node->getOpcode() == ISD::SMULFIX ||
+ Node->getOpcode() == ISD::SMULFIXSAT);
EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
unsigned VTSize = VT.getScalarSizeInBits();
if (!Scale) {
// [us]mul.fix(a, b, 0) -> mul(a, b)
- if (!Saturating && isOperationLegalOrCustom(ISD::MUL, VT)) {
- return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
- } else if (Saturating && isOperationLegalOrCustom(ISD::SMULO, VT)) {
+ if (!Saturating) {
+ if (isOperationLegalOrCustom(ISD::MUL, VT))
+ return DAG.getNode(ISD::MUL, dl, VT, LHS, RHS);
+ } else if (Signed && isOperationLegalOrCustom(ISD::SMULO, VT)) {
SDValue Result =
DAG.getNode(ISD::SMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
SDValue Product = Result.getValue(0);
@@ -5948,11 +6995,18 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
SDValue ProdNeg = DAG.getSetCC(dl, BoolVT, Product, Zero, ISD::SETLT);
Result = DAG.getSelect(dl, VT, ProdNeg, SatMax, SatMin);
return DAG.getSelect(dl, VT, Overflow, Result, Product);
+ } else if (!Signed && isOperationLegalOrCustom(ISD::UMULO, VT)) {
+ SDValue Result =
+ DAG.getNode(ISD::UMULO, dl, DAG.getVTList(VT, BoolVT), LHS, RHS);
+ SDValue Product = Result.getValue(0);
+ SDValue Overflow = Result.getValue(1);
+
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue SatMax = DAG.getConstant(MaxVal, dl, VT);
+ return DAG.getSelect(dl, VT, Overflow, SatMax, Product);
}
}
- bool Signed =
- Node->getOpcode() == ISD::SMULFIX || Node->getOpcode() == ISD::SMULFIXSAT;
assert(((Signed && Scale < VTSize) || (!Signed && Scale <= VTSize)) &&
"Expected scale to be less than the number of bits if signed or at "
"most the number of bits if unsigned.");
@@ -5978,7 +7032,8 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
if (Scale == VTSize)
// Result is just the top half since we'd be shifting by the width of the
- // operand.
+ // operand. Overflow impossible so this works for both UMULFIX and
+ // UMULFIXSAT.
return Hi;
// The result will need to be shifted right by the scale since both operands
@@ -5990,20 +7045,55 @@ TargetLowering::expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const {
if (!Saturating)
return Result;
- unsigned OverflowBits = VTSize - Scale + 1; // +1 for the sign
- SDValue HiMask =
- DAG.getConstant(APInt::getHighBitsSet(VTSize, OverflowBits), dl, VT);
- SDValue LoMask = DAG.getConstant(
- APInt::getLowBitsSet(VTSize, VTSize - OverflowBits), dl, VT);
- APInt MaxVal = APInt::getSignedMaxValue(VTSize);
- APInt MinVal = APInt::getSignedMinValue(VTSize);
-
- Result = DAG.getSelectCC(dl, Hi, LoMask,
- DAG.getConstant(MaxVal, dl, VT), Result,
- ISD::SETGT);
- return DAG.getSelectCC(dl, Hi, HiMask,
- DAG.getConstant(MinVal, dl, VT), Result,
- ISD::SETLT);
+ if (!Signed) {
+ // Unsigned overflow happened if the upper (VTSize - Scale) bits (of the
+ // widened multiplication) aren't all zeroes.
+
+ // Saturate to max if ((Hi >> Scale) != 0),
+ // which is the same as if (Hi > ((1 << Scale) - 1))
+ APInt MaxVal = APInt::getMaxValue(VTSize);
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask,
+ DAG.getConstant(MaxVal, dl, VT), Result,
+ ISD::SETUGT);
+
+ return Result;
+ }
+
+ // Signed overflow happened if the upper (VTSize - Scale + 1) bits (of the
+ // widened multiplication) aren't all ones or all zeroes.
+
+ SDValue SatMin = DAG.getConstant(APInt::getSignedMinValue(VTSize), dl, VT);
+ SDValue SatMax = DAG.getConstant(APInt::getSignedMaxValue(VTSize), dl, VT);
+
+ if (Scale == 0) {
+ SDValue Sign = DAG.getNode(ISD::SRA, dl, VT, Lo,
+ DAG.getConstant(VTSize - 1, dl, ShiftTy));
+ SDValue Overflow = DAG.getSetCC(dl, BoolVT, Hi, Sign, ISD::SETNE);
+ // Saturated to SatMin if wide product is negative, and SatMax if wide
+ // product is positive ...
+ SDValue Zero = DAG.getConstant(0, dl, VT);
+ SDValue ResultIfOverflow = DAG.getSelectCC(dl, Hi, Zero, SatMin, SatMax,
+ ISD::SETLT);
+ // ... but only if we overflowed.
+ return DAG.getSelect(dl, VT, Overflow, ResultIfOverflow, Result);
+ }
+
+ // We handled Scale==0 above so all the bits to examine is in Hi.
+
+ // Saturate to max if ((Hi >> (Scale - 1)) > 0),
+ // which is the same as if (Hi > (1 << (Scale - 1)) - 1)
+ SDValue LowMask = DAG.getConstant(APInt::getLowBitsSet(VTSize, Scale - 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, LowMask, SatMax, Result, ISD::SETGT);
+ // Saturate to min if (Hi >> (Scale - 1)) < -1),
+ // which is the same as if (HI < (-1 << (Scale - 1))
+ SDValue HighMask =
+ DAG.getConstant(APInt::getHighBitsSet(VTSize, VTSize - Scale + 1),
+ dl, VT);
+ Result = DAG.getSelectCC(dl, Hi, HighMask, SatMin, Result, ISD::SETLT);
+ return Result;
}
void TargetLowering::expandUADDSUBO(
@@ -6060,24 +7150,19 @@ void TargetLowering::expandSADDSUBO(
SDValue Zero = DAG.getConstant(0, dl, LHS.getValueType());
- // LHSSign -> LHS >= 0
- // RHSSign -> RHS >= 0
- // SumSign -> Result >= 0
- //
- // Add:
- // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
- // Sub:
- // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
- SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE);
- SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE);
- SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign,
- IsAdd ? ISD::SETEQ : ISD::SETNE);
-
- SDValue SumSign = DAG.getSetCC(dl, OType, Result, Zero, ISD::SETGE);
- SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE);
-
- SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE);
- Overflow = DAG.getBoolExtOrTrunc(Cmp, dl, ResultType, ResultType);
+ // For an addition, the result should be less than one of the operands (LHS)
+ // if and only if the other operand (RHS) is negative, otherwise there will
+ // be overflow.
+ // For a subtraction, the result should be less than one of the operands
+ // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
+ // otherwise there will be overflow.
+ SDValue ResultLowerThanLHS = DAG.getSetCC(dl, OType, Result, LHS, ISD::SETLT);
+ SDValue ConditionRHS =
+ DAG.getSetCC(dl, OType, RHS, Zero, IsAdd ? ISD::SETLT : ISD::SETGT);
+
+ Overflow = DAG.getBoolExtOrTrunc(
+ DAG.getNode(ISD::XOR, dl, OType, ConditionRHS, ResultLowerThanLHS), dl,
+ ResultType, ResultType);
}
bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
@@ -6176,20 +7261,19 @@ bool TargetLowering::expandMULO(SDNode *Node, SDValue &Result,
// being a legal type for the architecture and thus has to be split to
// two arguments.
SDValue Ret;
+ TargetLowering::MakeLibCallOptions CallOptions;
+ CallOptions.setSExt(isSigned);
+ CallOptions.setIsPostTypeLegalization(true);
if (shouldSplitFunctionArgumentsAsLittleEndian(DAG.getDataLayout())) {
// Halves of WideVT are packed into registers in different order
// depending on platform endianness. This is usually handled by
// the C calling convention, but we can't defer to it in
// the legalizer.
SDValue Args[] = { LHS, HiLHS, RHS, HiRHS };
- Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
- /* doesNotReturn */ false, /* isReturnValueUsed */ true,
- /* isPostTypeLegalization */ true).first;
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
} else {
SDValue Args[] = { HiLHS, LHS, HiRHS, RHS };
- Ret = makeLibCall(DAG, LC, WideVT, Args, isSigned, dl,
- /* doesNotReturn */ false, /* isReturnValueUsed */ true,
- /* isPostTypeLegalization */ true).first;
+ Ret = makeLibCall(DAG, LC, WideVT, Args, CallOptions, dl).first;
}
assert(Ret.getOpcode() == ISD::MERGE_VALUES &&
"Ret value is a collection of constituent nodes holding result.");
diff --git a/lib/CodeGen/ShrinkWrap.cpp b/lib/CodeGen/ShrinkWrap.cpp
index 2db0ea570598..412a00095b9b 100644
--- a/lib/CodeGen/ShrinkWrap.cpp
+++ b/lib/CodeGen/ShrinkWrap.cpp
@@ -278,11 +278,10 @@ bool ShrinkWrap::useOrDefCSROrFI(const MachineInstr &MI,
// Ignore instructions like DBG_VALUE which don't read/def the register.
if (!MO.isDef() && !MO.readsReg())
continue;
- unsigned PhysReg = MO.getReg();
+ Register PhysReg = MO.getReg();
if (!PhysReg)
continue;
- assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) &&
- "Unallocated register?!");
+ assert(Register::isPhysicalRegister(PhysReg) && "Unallocated register?!");
// The stack pointer is not normally described as a callee-saved register
// in calling convention definitions, so we need to watch for it
// separately. An SP mentioned by a call instruction, we can ignore,
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index 23e5ce0acae8..db520d4e6403 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -477,7 +477,10 @@ bool SjLjEHPrepare::runOnFunction(Function &F) {
UnregisterFn = M.getOrInsertFunction(
"_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()),
PointerType::getUnqual(FunctionContextTy));
- FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress);
+ FrameAddrFn = Intrinsic::getDeclaration(
+ &M, Intrinsic::frameaddress,
+ {Type::getInt8PtrTy(M.getContext(),
+ M.getDataLayout().getAllocaAddrSpace())});
StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave);
StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore);
BuiltinSetupDispatchFn =
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 5c944fe3f6b3..0c1f1220c421 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -12,7 +12,6 @@
//===----------------------------------------------------------------------===//
#include "SplitKit.h"
-#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/None.h"
@@ -22,6 +21,7 @@
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/LiveRangeEdit.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -437,7 +437,7 @@ void SplitEditor::addDeadDef(LiveInterval &LI, VNInfo *VNI, bool Original) {
assert(DefMI != nullptr);
LaneBitmask LM;
for (const MachineOperand &DefOp : DefMI->defs()) {
- unsigned R = DefOp.getReg();
+ Register R = DefOp.getReg();
if (R != LI.reg)
continue;
if (unsigned SR = DefOp.getSubReg())
@@ -1373,7 +1373,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) {
assert(LI.hasSubRanges());
LiveRangeCalc SubLRC;
- unsigned Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg();
+ Register Reg = EP.MO.getReg(), Sub = EP.MO.getSubReg();
LaneBitmask LM = Sub != 0 ? TRI.getSubRegIndexLaneMask(Sub)
: MRI.getMaxLaneMaskForVReg(Reg);
for (LiveInterval::SubRange &S : LI.subranges()) {
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index 86ad3811e3ad..78f0bbd24db5 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -14,7 +14,6 @@
#ifndef LLVM_LIB_CODEGEN_SPLITKIT_H
#define LLVM_LIB_CODEGEN_SPLITKIT_H
-#include "LiveRangeCalc.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/DenseMap.h"
@@ -25,6 +24,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/LiveInterval.h"
#include "llvm/CodeGen/LiveIntervals.h"
+#include "llvm/CodeGen/LiveRangeCalc.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/SlotIndexes.h"
diff --git a/lib/CodeGen/StackMaps.cpp b/lib/CodeGen/StackMaps.cpp
index ae9401b89700..383c91259ffc 100644
--- a/lib/CodeGen/StackMaps.cpp
+++ b/lib/CodeGen/StackMaps.cpp
@@ -113,7 +113,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
unsigned Size = DL.getPointerSizeInBits();
assert((Size % 8) == 0 && "Need pointer size in bytes.");
Size /= 8;
- unsigned Reg = (++MOI)->getReg();
+ Register Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
Locs.emplace_back(StackMaps::Location::Direct, Size,
getDwarfRegNum(Reg, TRI), Imm);
@@ -122,7 +122,7 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
case StackMaps::IndirectMemRefOp: {
int64_t Size = (++MOI)->getImm();
assert(Size > 0 && "Need a valid size for indirect memory locations.");
- unsigned Reg = (++MOI)->getReg();
+ Register Reg = (++MOI)->getReg();
int64_t Imm = (++MOI)->getImm();
Locs.emplace_back(StackMaps::Location::Indirect, Size,
getDwarfRegNum(Reg, TRI), Imm);
@@ -148,14 +148,14 @@ StackMaps::parseOperand(MachineInstr::const_mop_iterator MOI,
if (MOI->isImplicit())
return ++MOI;
- assert(TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) &&
+ assert(Register::isPhysicalRegister(MOI->getReg()) &&
"Virtreg operands should have been rewritten before now.");
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(MOI->getReg());
assert(!MOI->getSubReg() && "Physical subreg still around.");
unsigned Offset = 0;
unsigned DwarfRegNum = getDwarfRegNum(MOI->getReg(), TRI);
- unsigned LLVMRegNum = TRI->getLLVMRegNum(DwarfRegNum, false);
+ unsigned LLVMRegNum = *TRI->getLLVMRegNum(DwarfRegNum, false);
unsigned SubRegIdx = TRI->getSubRegIndex(LLVMRegNum, MOI->getReg());
if (SubRegIdx)
Offset = TRI->getSubRegIdxOffset(SubRegIdx);
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 809960c7fdf9..5683d1db473c 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -17,7 +17,6 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/CodeGen/Passes.h"
@@ -157,6 +156,68 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge,
return NeedsProtector;
}
+bool StackProtector::HasAddressTaken(const Instruction *AI) {
+ for (const User *U : AI->users()) {
+ const auto *I = cast<Instruction>(U);
+ switch (I->getOpcode()) {
+ case Instruction::Store:
+ if (AI == cast<StoreInst>(I)->getValueOperand())
+ return true;
+ break;
+ case Instruction::AtomicCmpXchg:
+ // cmpxchg conceptually includes both a load and store from the same
+ // location. So, like store, the value being stored is what matters.
+ if (AI == cast<AtomicCmpXchgInst>(I)->getNewValOperand())
+ return true;
+ break;
+ case Instruction::PtrToInt:
+ if (AI == cast<PtrToIntInst>(I)->getOperand(0))
+ return true;
+ break;
+ case Instruction::Call: {
+ // Ignore intrinsics that do not become real instructions.
+ // TODO: Narrow this to intrinsics that have store-like effects.
+ const auto *CI = cast<CallInst>(I);
+ if (!isa<DbgInfoIntrinsic>(CI) && !CI->isLifetimeStartOrEnd())
+ return true;
+ break;
+ }
+ case Instruction::Invoke:
+ return true;
+ case Instruction::BitCast:
+ case Instruction::GetElementPtr:
+ case Instruction::Select:
+ case Instruction::AddrSpaceCast:
+ if (HasAddressTaken(I))
+ return true;
+ break;
+ case Instruction::PHI: {
+ // Keep track of what PHI nodes we have already visited to ensure
+ // they are only visited once.
+ const auto *PN = cast<PHINode>(I);
+ if (VisitedPHIs.insert(PN).second)
+ if (HasAddressTaken(PN))
+ return true;
+ break;
+ }
+ case Instruction::Load:
+ case Instruction::AtomicRMW:
+ case Instruction::Ret:
+ // These instructions take an address operand, but have load-like or
+ // other innocuous behavior that should not trigger a stack protector.
+ // atomicrmw conceptually has both load and store semantics, but the
+ // value being stored must be integer; so if a pointer is being stored,
+ // we'll catch it in the PtrToInt case above.
+ break;
+ default:
+ // Conservatively return true for any instruction that takes an address
+ // operand, but is not handled above.
+ return true;
+ }
+ }
+ return false;
+}
+
/// Search for the first call to the llvm.stackprotector intrinsic and return it
/// if present.
static const CallInst *findStackProtectorIntrinsic(Function &F) {
@@ -264,9 +325,7 @@ bool StackProtector::RequiresStackProtector() {
continue;
}
- if (Strong && PointerMayBeCaptured(AI,
- /* ReturnCaptures */ false,
- /* StoreCaptures */ true)) {
+ if (Strong && HasAddressTaken(AI)) {
++NumAddrTaken;
Layout.insert(std::make_pair(AI, MachineFrameInfo::SSPLK_AddrOf));
ORE.emit([&]() {
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 99b533e10b87..9c8143c55dc2 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -221,7 +221,7 @@ void StackSlotColoring::InitializeSlots() {
for (auto *I : Intervals) {
LiveInterval &li = I->second;
LLVM_DEBUG(li.dump());
- int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
+ int FI = Register::stackSlot2Index(li.reg);
if (MFI->isDeadObjectIndex(FI))
continue;
@@ -268,7 +268,7 @@ StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const {
int StackSlotColoring::ColorSlot(LiveInterval *li) {
int Color = -1;
bool Share = false;
- int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int FI = Register::stackSlot2Index(li->reg);
uint8_t StackID = MFI->getStackID(FI);
if (!DisableSharing) {
@@ -330,7 +330,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
bool Changed = false;
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int SS = Register::stackSlot2Index(li->reg);
int NewSS = ColorSlot(li);
assert(NewSS >= 0 && "Stack coloring failed?");
SlotMapping[SS] = NewSS;
@@ -343,7 +343,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "\nSpill slots after coloring:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
+ int SS = Register::stackSlot2Index(li->reg);
li->weight = SlotWeights[SS];
}
// Sort them by new weight.
diff --git a/lib/CodeGen/SwiftErrorValueTracking.cpp b/lib/CodeGen/SwiftErrorValueTracking.cpp
index 96821cadb1b6..c72a04276a4f 100644
--- a/lib/CodeGen/SwiftErrorValueTracking.cpp
+++ b/lib/CodeGen/SwiftErrorValueTracking.cpp
@@ -13,9 +13,10 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/SwiftErrorValueTracking.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallSet.h"
-#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/CodeGen/TargetLowering.h"
#include "llvm/IR/Value.h"
diff --git a/lib/CodeGen/TailDuplicator.cpp b/lib/CodeGen/TailDuplicator.cpp
index a0590a8a6cc6..03c68a37e459 100644
--- a/lib/CodeGen/TailDuplicator.cpp
+++ b/lib/CodeGen/TailDuplicator.cpp
@@ -235,8 +235,8 @@ bool TailDuplicator::tailDuplicateAndUpdate(
MachineInstr *Copy = Copies[i];
if (!Copy->isCopy())
continue;
- unsigned Dst = Copy->getOperand(0).getReg();
- unsigned Src = Copy->getOperand(1).getReg();
+ Register Dst = Copy->getOperand(0).getReg();
+ Register Src = Copy->getOperand(1).getReg();
if (MRI->hasOneNonDBGUse(Src) &&
MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
// Copy is the only use. Do trivial copy propagation here.
@@ -312,7 +312,7 @@ static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
if (!MI.isPHI())
break;
for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
- unsigned SrcReg = MI.getOperand(i).getReg();
+ Register SrcReg = MI.getOperand(i).getReg();
UsedByPhi->insert(SrcReg);
}
}
@@ -340,17 +340,17 @@ void TailDuplicator::processPHI(
DenseMap<unsigned, RegSubRegPair> &LocalVRMap,
SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies,
const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) {
- unsigned DefReg = MI->getOperand(0).getReg();
+ Register DefReg = MI->getOperand(0).getReg();
unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
assert(SrcOpIdx && "Unable to find matching PHI source?");
- unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ Register SrcReg = MI->getOperand(SrcOpIdx).getReg();
unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
// Insert a copy from source to the end of the block. The def register is the
// available value liveout of the block.
- unsigned NewDef = MRI->createVirtualRegister(RC);
+ Register NewDef = MRI->createVirtualRegister(RC);
Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
addSSAUpdateEntry(DefReg, NewDef, PredBB);
@@ -384,12 +384,12 @@ void TailDuplicator::duplicateInstruction(
MachineOperand &MO = NewMI.getOperand(i);
if (!MO.isReg())
continue;
- unsigned Reg = MO.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ Register Reg = MO.getReg();
+ if (!Register::isVirtualRegister(Reg))
continue;
if (MO.isDef()) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- unsigned NewReg = MRI->createVirtualRegister(RC);
+ Register NewReg = MRI->createVirtualRegister(RC);
MO.setReg(NewReg);
LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
@@ -433,7 +433,7 @@ void TailDuplicator::duplicateInstruction(
auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
if (NewRC == nullptr)
NewRC = OrigRC;
- unsigned NewReg = MRI->createVirtualRegister(NewRC);
+ Register NewReg = MRI->createVirtualRegister(NewRC);
BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(),
TII->get(TargetOpcode::COPY), NewReg)
.addReg(VI->second.Reg, 0, VI->second.SubReg);
@@ -477,7 +477,7 @@ void TailDuplicator::updateSuccessorsPHIs(
assert(Idx != 0);
MachineOperand &MO0 = MI.getOperand(Idx);
- unsigned Reg = MO0.getReg();
+ Register Reg = MO0.getReg();
if (isDead) {
// Folded into the previous BB.
// There could be duplicate phi source entries. FIXME: Should sdisel
diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp
index 9c4483cb240d..9eeacc2584cb 100644
--- a/lib/CodeGen/TargetFrameLoweringImpl.cpp
+++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp
@@ -18,6 +18,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CallSite.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Function.h"
#include "llvm/MC/MCRegisterInfo.h"
@@ -71,7 +72,9 @@ void TargetFrameLowering::determineCalleeSaves(MachineFunction &MF,
// When interprocedural register allocation is enabled caller saved registers
// are preferred over callee saved registers.
- if (MF.getTarget().Options.EnableIPRA && isSafeForNoCSROpt(MF.getFunction()))
+ if (MF.getTarget().Options.EnableIPRA &&
+ isSafeForNoCSROpt(MF.getFunction()) &&
+ isProfitableForNoCSROpt(MF.getFunction()))
return;
// Get the callee saved register list...
@@ -118,6 +121,18 @@ unsigned TargetFrameLowering::getStackAlignmentSkew(
return 0;
}
+bool TargetFrameLowering::isSafeForNoCSROpt(const Function &F) {
+ if (!F.hasLocalLinkage() || F.hasAddressTaken() ||
+ !F.hasFnAttribute(Attribute::NoRecurse))
+ return false;
+ // Function should not be optimized as tail call.
+ for (const User *U : F.users())
+ if (auto CS = ImmutableCallSite(U))
+ if (CS.isTailCall())
+ return false;
+ return true;
+}
+
int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
llvm_unreachable("getInitialCFAOffset() not implemented!");
}
@@ -125,4 +140,4 @@ int TargetFrameLowering::getInitialCFAOffset(const MachineFunction &MF) const {
unsigned TargetFrameLowering::getInitialCFARegister(const MachineFunction &MF)
const {
llvm_unreachable("getInitialCFARegister() not implemented!");
-} \ No newline at end of file
+}
diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp
index 868617ffe14d..6cae3b869501 100644
--- a/lib/CodeGen/TargetInstrInfo.cpp
+++ b/lib/CodeGen/TargetInstrInfo.cpp
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSchedule.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DebugInfoMetadata.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrItineraries.h"
#include "llvm/Support/CommandLine.h"
@@ -142,7 +143,7 @@ TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
while (Tail != MBB->end()) {
auto MI = Tail++;
if (MI->isCall())
- MBB->getParent()->updateCallSiteInfo(&*MI);
+ MBB->getParent()->eraseCallSiteInfo(&*MI);
MBB->erase(MI);
}
@@ -183,10 +184,10 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
bool Reg2IsInternal = MI.getOperand(Idx2).isInternalRead();
// Avoid calling isRenamable for virtual registers since we assert that
// renamable property is only queried/set for physical registers.
- bool Reg1IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg1)
+ bool Reg1IsRenamable = Register::isPhysicalRegister(Reg1)
? MI.getOperand(Idx1).isRenamable()
: false;
- bool Reg2IsRenamable = TargetRegisterInfo::isPhysicalRegister(Reg2)
+ bool Reg2IsRenamable = Register::isPhysicalRegister(Reg2)
? MI.getOperand(Idx2).isRenamable()
: false;
// If destination is tied to either of the commuted source register, then
@@ -228,9 +229,9 @@ MachineInstr *TargetInstrInfo::commuteInstructionImpl(MachineInstr &MI,
CommutedMI->getOperand(Idx1).setIsInternalRead(Reg2IsInternal);
// Avoid calling setIsRenamable for virtual registers since we assert that
// renamable property is only queried/set for physical registers.
- if (TargetRegisterInfo::isPhysicalRegister(Reg1))
+ if (Register::isPhysicalRegister(Reg1))
CommutedMI->getOperand(Idx2).setIsRenamable(Reg1IsRenamable);
- if (TargetRegisterInfo::isPhysicalRegister(Reg2))
+ if (Register::isPhysicalRegister(Reg2))
CommutedMI->getOperand(Idx1).setIsRenamable(Reg2IsRenamable);
return CommutedMI;
}
@@ -281,7 +282,7 @@ bool TargetInstrInfo::fixCommutedOpIndices(unsigned &ResultIdx1,
return true;
}
-bool TargetInstrInfo::findCommutedOpIndices(MachineInstr &MI,
+bool TargetInstrInfo::findCommutedOpIndices(const MachineInstr &MI,
unsigned &SrcOpIdx1,
unsigned &SrcOpIdx2) const {
assert(!MI.isBundle() &&
@@ -393,7 +394,7 @@ bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC,
if (BitOffset < 0 || BitOffset % 8)
return false;
- Size = BitSize /= 8;
+ Size = BitSize / 8;
Offset = (unsigned)BitOffset / 8;
assert(TRI->getSpillSize(*RC) >= (Offset + Size) && "bad subregister range");
@@ -442,16 +443,15 @@ static const TargetRegisterClass *canFoldCopy(const MachineInstr &MI,
if (FoldOp.getSubReg() || LiveOp.getSubReg())
return nullptr;
- unsigned FoldReg = FoldOp.getReg();
- unsigned LiveReg = LiveOp.getReg();
+ Register FoldReg = FoldOp.getReg();
+ Register LiveReg = LiveOp.getReg();
- assert(TargetRegisterInfo::isVirtualRegister(FoldReg) &&
- "Cannot fold physregs");
+ assert(Register::isVirtualRegister(FoldReg) && "Cannot fold physregs");
const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
const TargetRegisterClass *RC = MRI.getRegClass(FoldReg);
- if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg()))
+ if (Register::isPhysicalRegister(LiveOp.getReg()))
return RC->contains(LiveOp.getReg()) ? RC : nullptr;
if (RC->hasSubClassEq(MRI.getRegClass(LiveReg)))
@@ -674,9 +674,9 @@ bool TargetInstrInfo::hasReassociableOperands(
// reassociate.
MachineInstr *MI1 = nullptr;
MachineInstr *MI2 = nullptr;
- if (Op1.isReg() && TargetRegisterInfo::isVirtualRegister(Op1.getReg()))
+ if (Op1.isReg() && Register::isVirtualRegister(Op1.getReg()))
MI1 = MRI.getUniqueVRegDef(Op1.getReg());
- if (Op2.isReg() && TargetRegisterInfo::isVirtualRegister(Op2.getReg()))
+ if (Op2.isReg() && Register::isVirtualRegister(Op2.getReg()))
MI2 = MRI.getUniqueVRegDef(Op2.getReg());
// And they need to be in the trace (otherwise, they won't have a depth).
@@ -805,27 +805,27 @@ void TargetInstrInfo::reassociateOps(
MachineOperand &OpY = Root.getOperand(OpIdx[Row][3]);
MachineOperand &OpC = Root.getOperand(0);
- unsigned RegA = OpA.getReg();
- unsigned RegB = OpB.getReg();
- unsigned RegX = OpX.getReg();
- unsigned RegY = OpY.getReg();
- unsigned RegC = OpC.getReg();
+ Register RegA = OpA.getReg();
+ Register RegB = OpB.getReg();
+ Register RegX = OpX.getReg();
+ Register RegY = OpY.getReg();
+ Register RegC = OpC.getReg();
- if (TargetRegisterInfo::isVirtualRegister(RegA))
+ if (Register::isVirtualRegister(RegA))
MRI.constrainRegClass(RegA, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegB))
+ if (Register::isVirtualRegister(RegB))
MRI.constrainRegClass(RegB, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegX))
+ if (Register::isVirtualRegister(RegX))
MRI.constrainRegClass(RegX, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegY))
+ if (Register::isVirtualRegister(RegY))
MRI.constrainRegClass(RegY, RC);
- if (TargetRegisterInfo::isVirtualRegister(RegC))
+ if (Register::isVirtualRegister(RegC))
MRI.constrainRegClass(RegC, RC);
// Create a new virtual register for the result of (X op Y) instead of
// recycling RegB because the MachineCombiner's computation of the critical
// path requires a new register definition rather than an existing one.
- unsigned NewVR = MRI.createVirtualRegister(RC);
+ Register NewVR = MRI.createVirtualRegister(RC);
InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
unsigned Opcode = Root.getOpcode();
@@ -880,21 +880,21 @@ void TargetInstrInfo::genAlternativeCodeSequence(
}
bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
- const MachineInstr &MI, AliasAnalysis *AA) const {
+ const MachineInstr &MI, AAResults *AA) const {
const MachineFunction &MF = *MI.getMF();
const MachineRegisterInfo &MRI = MF.getRegInfo();
// Remat clients assume operand 0 is the defined register.
if (!MI.getNumOperands() || !MI.getOperand(0).isReg())
return false;
- unsigned DefReg = MI.getOperand(0).getReg();
+ Register DefReg = MI.getOperand(0).getReg();
// A sub-register definition can only be rematerialized if the instruction
// doesn't read the other parts of the register. Otherwise it is really a
// read-modify-write operation on the full virtual register which cannot be
// moved safely.
- if (TargetRegisterInfo::isVirtualRegister(DefReg) &&
- MI.getOperand(0).getSubReg() && MI.readsVirtualRegister(DefReg))
+ if (Register::isVirtualRegister(DefReg) && MI.getOperand(0).getSubReg() &&
+ MI.readsVirtualRegister(DefReg))
return false;
// A load from a fixed stack slot can be rematerialized. This may be
@@ -924,12 +924,12 @@ bool TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg()) continue;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
if (Reg == 0)
continue;
// Check for a well-behaved physical register.
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
if (MO.isUse()) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
@@ -1120,6 +1120,24 @@ bool TargetInstrInfo::hasLowDefLatency(const TargetSchedModel &SchedModel,
return (DefCycle != -1 && DefCycle <= 1);
}
+Optional<ParamLoadedValue>
+TargetInstrInfo::describeLoadedValue(const MachineInstr &MI) const {
+ const MachineFunction *MF = MI.getMF();
+ const MachineOperand *Op = nullptr;
+ DIExpression *Expr = DIExpression::get(MF->getFunction().getContext(), {});;
+ const MachineOperand *SrcRegOp, *DestRegOp;
+
+ if (isCopyInstr(MI, SrcRegOp, DestRegOp)) {
+ Op = SrcRegOp;
+ return ParamLoadedValue(*Op, Expr);
+ } else if (MI.isMoveImmediate()) {
+ Op = &MI.getOperand(1);
+ return ParamLoadedValue(*Op, Expr);
+ }
+
+ return None;
+}
+
/// Both DefMI and UseMI must be valid. By default, call directly to the
/// itinerary. This may be overriden by the target.
int TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
@@ -1227,3 +1245,5 @@ bool TargetInstrInfo::getInsertSubregInputs(
InsertedReg.SubIdx = (unsigned)MOSubIdx.getImm();
return true;
}
+
+TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp
index 9b28c1a6c450..9b23012f47e3 100644
--- a/lib/CodeGen/TargetLoweringBase.cpp
+++ b/lib/CodeGen/TargetLoweringBase.cpp
@@ -167,6 +167,7 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::BZERO, "__bzero");
break;
case Triple::aarch64:
+ case Triple::aarch64_32:
setLibcallName(RTLIB::BZERO, "bzero");
break;
default:
@@ -197,6 +198,11 @@ void TargetLoweringBase::InitLibcalls(const Triple &TT) {
setLibcallName(RTLIB::SINCOS_PPCF128, "sincosl");
}
+ if (TT.isPS4CPU()) {
+ setLibcallName(RTLIB::SINCOS_F32, "sincosf");
+ setLibcallName(RTLIB::SINCOS_F64, "sincos");
+ }
+
if (TT.isOSOpenBSD()) {
setLibcallName(RTLIB::STACKPROTECTOR_CHECK_FAIL, nullptr);
}
@@ -578,13 +584,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm) : TM(tm) {
BooleanFloatContents = UndefinedBooleanContent;
BooleanVectorContents = UndefinedBooleanContent;
SchedPreferenceInfo = Sched::ILP;
- JumpBufSize = 0;
- JumpBufAlignment = 0;
- MinFunctionAlignment = 0;
- PrefFunctionAlignment = 0;
- PrefLoopAlignment = 0;
GatherAllAliasesMaxDepth = 18;
- MinStackArgumentAlignment = 1;
// TODO: the default will be switched to 0 in the next commit, along
// with the Target-specific changes necessary.
MaxAtomicSizeInBitsSupported = 1024;
@@ -653,6 +653,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::SMULFIX, VT, Expand);
setOperationAction(ISD::SMULFIXSAT, VT, Expand);
setOperationAction(ISD::UMULFIX, VT, Expand);
+ setOperationAction(ISD::UMULFIXSAT, VT, Expand);
// Overflow operations default to expand
setOperationAction(ISD::SADDO, VT, Expand);
@@ -689,6 +690,7 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::ANY_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Expand);
setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Expand);
+ setOperationAction(ISD::SPLAT_VECTOR, VT, Expand);
}
// Constrained floating-point operations default to expand.
@@ -708,16 +710,22 @@ void TargetLoweringBase::initActions() {
setOperationAction(ISD::STRICT_FLOG, VT, Expand);
setOperationAction(ISD::STRICT_FLOG10, VT, Expand);
setOperationAction(ISD::STRICT_FLOG2, VT, Expand);
+ setOperationAction(ISD::STRICT_LRINT, VT, Expand);
+ setOperationAction(ISD::STRICT_LLRINT, VT, Expand);
setOperationAction(ISD::STRICT_FRINT, VT, Expand);
setOperationAction(ISD::STRICT_FNEARBYINT, VT, Expand);
setOperationAction(ISD::STRICT_FCEIL, VT, Expand);
setOperationAction(ISD::STRICT_FFLOOR, VT, Expand);
+ setOperationAction(ISD::STRICT_LROUND, VT, Expand);
+ setOperationAction(ISD::STRICT_LLROUND, VT, Expand);
setOperationAction(ISD::STRICT_FROUND, VT, Expand);
setOperationAction(ISD::STRICT_FTRUNC, VT, Expand);
setOperationAction(ISD::STRICT_FMAXNUM, VT, Expand);
setOperationAction(ISD::STRICT_FMINNUM, VT, Expand);
setOperationAction(ISD::STRICT_FP_ROUND, VT, Expand);
setOperationAction(ISD::STRICT_FP_EXTEND, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Expand);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Expand);
// For most targets @llvm.get.dynamic.area.offset just returns 0.
setOperationAction(ISD::GET_DYNAMIC_AREA_OFFSET, VT, Expand);
@@ -824,7 +832,8 @@ TargetLoweringBase::getTypeConversion(LLVMContext &Context, EVT VT) const {
LegalizeTypeAction LA = ValueTypeActions.getTypeAction(SVT);
assert((LA == TypeLegal || LA == TypeSoftenFloat ||
- ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger) &&
+ (NVT.isVector() ||
+ ValueTypeActions.getTypeAction(NVT) != TypePromoteInteger)) &&
"Promote may not follow Expand or Promote");
if (LA == TypeSplitVector)
@@ -1257,17 +1266,23 @@ void TargetLoweringBase::computeRegisterProperties(
MVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
bool IsLegalWiderType = false;
+ bool IsScalable = VT.isScalableVector();
LegalizeTypeAction PreferredAction = getPreferredVectorAction(VT);
switch (PreferredAction) {
- case TypePromoteInteger:
+ case TypePromoteInteger: {
+ MVT::SimpleValueType EndVT = IsScalable ?
+ MVT::LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE :
+ MVT::LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE;
// Try to promote the elements of integer vectors. If no legal
// promotion was found, fall through to the widen-vector method.
- for (unsigned nVT = i + 1; nVT <= MVT::LAST_INTEGER_VECTOR_VALUETYPE; ++nVT) {
+ for (unsigned nVT = i + 1;
+ (MVT::SimpleValueType)nVT <= EndVT; ++nVT) {
MVT SVT = (MVT::SimpleValueType) nVT;
// Promote vectors of integers to vectors with the same number
// of elements, with a wider element type.
if (SVT.getScalarSizeInBits() > EltVT.getSizeInBits() &&
- SVT.getVectorNumElements() == NElts && isTypeLegal(SVT)) {
+ SVT.getVectorNumElements() == NElts &&
+ SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -1279,23 +1294,37 @@ void TargetLoweringBase::computeRegisterProperties(
if (IsLegalWiderType)
break;
LLVM_FALLTHROUGH;
+ }
case TypeWidenVector:
- // Try to widen the vector.
- for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
- MVT SVT = (MVT::SimpleValueType) nVT;
- if (SVT.getVectorElementType() == EltVT
- && SVT.getVectorNumElements() > NElts && isTypeLegal(SVT)) {
- TransformToType[i] = SVT;
- RegisterTypeForVT[i] = SVT;
- NumRegistersForVT[i] = 1;
+ if (isPowerOf2_32(NElts)) {
+ // Try to widen the vector.
+ for (unsigned nVT = i + 1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
+ MVT SVT = (MVT::SimpleValueType) nVT;
+ if (SVT.getVectorElementType() == EltVT
+ && SVT.getVectorNumElements() > NElts
+ && SVT.isScalableVector() == IsScalable && isTypeLegal(SVT)) {
+ TransformToType[i] = SVT;
+ RegisterTypeForVT[i] = SVT;
+ NumRegistersForVT[i] = 1;
+ ValueTypeActions.setTypeAction(VT, TypeWidenVector);
+ IsLegalWiderType = true;
+ break;
+ }
+ }
+ if (IsLegalWiderType)
+ break;
+ } else {
+ // Only widen to the next power of 2 to keep consistency with EVT.
+ MVT NVT = VT.getPow2VectorType();
+ if (isTypeLegal(NVT)) {
+ TransformToType[i] = NVT;
ValueTypeActions.setTypeAction(VT, TypeWidenVector);
- IsLegalWiderType = true;
+ RegisterTypeForVT[i] = NVT;
+ NumRegistersForVT[i] = 1;
break;
}
}
- if (IsLegalWiderType)
- break;
LLVM_FALLTHROUGH;
case TypeSplitVector:
@@ -1488,12 +1517,9 @@ unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty,
return DL.getABITypeAlignment(Ty);
}
-bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
- const DataLayout &DL, EVT VT,
- unsigned AddrSpace,
- unsigned Alignment,
- MachineMemOperand::Flags Flags,
- bool *Fast) const {
+bool TargetLoweringBase::allowsMemoryAccessForAlignment(
+ LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
+ unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
// Check if the specified alignment is sufficient based on the data layout.
// TODO: While using the data layout works in practice, a better solution
// would be to implement this check directly (make this a virtual function).
@@ -1511,6 +1537,21 @@ bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
return allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags, Fast);
}
+bool TargetLoweringBase::allowsMemoryAccessForAlignment(
+ LLVMContext &Context, const DataLayout &DL, EVT VT,
+ const MachineMemOperand &MMO, bool *Fast) const {
+ return allowsMemoryAccessForAlignment(Context, DL, VT, MMO.getAddrSpace(),
+ MMO.getAlignment(), MMO.getFlags(),
+ Fast);
+}
+
+bool TargetLoweringBase::allowsMemoryAccess(
+ LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
+ unsigned Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
+ return allowsMemoryAccessForAlignment(Context, DL, VT, AddrSpace, Alignment,
+ Flags, Fast);
+}
+
bool TargetLoweringBase::allowsMemoryAccess(LLVMContext &Context,
const DataLayout &DL, EVT VT,
const MachineMemOperand &MMO,
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 4c8f75b237aa..4978f4b9500b 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -43,6 +43,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCSectionMachO.h"
#include "llvm/MC/MCSectionWasm.h"
+#include "llvm/MC/MCSectionXCOFF.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCSymbolELF.h"
@@ -154,6 +155,7 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
break;
case Triple::aarch64:
case Triple::aarch64_be:
+ case Triple::aarch64_32:
// The small model guarantees static code/data size < 4GB, but not where it
// will be in memory. Most of these could end up >2GB away so even a signed
// pc-relative 32-bit address is insufficient, theoretically.
@@ -375,7 +377,7 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(
ELF::SHT_PROGBITS, Flags, 0);
unsigned Size = DL.getPointerSize();
Streamer.SwitchSection(Sec);
- Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0));
+ Streamer.EmitValueToAlignment(DL.getPointerABIAlignment(0).value());
Streamer.EmitSymbolAttribute(Label, MCSA_ELF_TypeObject);
const MCExpr *E = MCConstantExpr::create(Size, getContext());
Streamer.emitELFSize(Label, E);
@@ -524,8 +526,8 @@ static const MCSymbolELF *getAssociatedSymbol(const GlobalObject *GO,
if (!VM)
report_fatal_error("MD_associated operand is not ValueAsMetadata");
- GlobalObject *OtherGO = dyn_cast<GlobalObject>(VM->getValue());
- return OtherGO ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGO)) : nullptr;
+ auto *OtherGV = dyn_cast<GlobalValue>(VM->getValue());
+ return OtherGV ? dyn_cast<MCSymbolELF>(TM.getSymbol(OtherGV)) : nullptr;
}
static unsigned getEntrySizeForKind(SectionKind Kind) {
@@ -566,6 +568,8 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
SectionName = Attrs.getAttribute("bss-section").getValueAsString();
} else if (Attrs.hasAttribute("rodata-section") && Kind.isReadOnly()) {
SectionName = Attrs.getAttribute("rodata-section").getValueAsString();
+ } else if (Attrs.hasAttribute("relro-section") && Kind.isReadOnlyWithRel()) {
+ SectionName = Attrs.getAttribute("relro-section").getValueAsString();
} else if (Attrs.hasAttribute("data-section") && Kind.isData()) {
SectionName = Attrs.getAttribute("data-section").getValueAsString();
}
@@ -1107,8 +1111,8 @@ MCSymbol *TargetLoweringObjectFileMachO::getCFIPersonalitySymbol(
}
const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
- const MCSymbol *Sym, const MCValue &MV, int64_t Offset,
- MachineModuleInfo *MMI, MCStreamer &Streamer) const {
+ const GlobalValue *GV, const MCSymbol *Sym, const MCValue &MV,
+ int64_t Offset, MachineModuleInfo *MMI, MCStreamer &Streamer) const {
// Although MachO 32-bit targets do not explicitly have a GOTPCREL relocation
// as 64-bit do, we replace the GOT equivalent by accessing the final symbol
// through a non_lazy_ptr stub instead. One advantage is that it allows the
@@ -1165,12 +1169,10 @@ const MCExpr *TargetLoweringObjectFileMachO::getIndirectSymViaGOTPCRel(
MCSymbol *Stub = Ctx.getOrCreateSymbol(Name);
MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(Stub);
- if (!StubSym.getPointer()) {
- bool IsIndirectLocal = Sym->isDefined() && !Sym->isExternal();
- // With the assumption that IsIndirectLocal == GV->hasLocalLinkage().
+
+ if (!StubSym.getPointer())
StubSym = MachineModuleInfoImpl::StubValueTy(const_cast<MCSymbol *>(Sym),
- !IsIndirectLocal);
- }
+ !GV->hasLocalLinkage());
const MCExpr *BSymExpr =
MCSymbolRefExpr::create(BaseSym, MCSymbolRefExpr::VK_None, Ctx);
@@ -1519,7 +1521,8 @@ static MCSectionCOFF *getCOFFStaticStructorSection(MCContext &Ctx,
// internally, so we use ".CRT$XCA00001" for them.
SmallString<24> Name;
raw_svector_ostream OS(Name);
- OS << ".CRT$XC" << (Priority < 200 ? 'A' : 'T') << format("%05u", Priority);
+ OS << ".CRT$X" << (IsCtor ? "C" : "T") <<
+ (Priority < 200 ? 'A' : 'T') << format("%05u", Priority);
MCSectionCOFF *Sec = Ctx.getCOFFSection(
Name, COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
@@ -1595,7 +1598,8 @@ const MCExpr *TargetLoweringObjectFileCOFF::lowerRelativeReference(
static std::string APIntToHexString(const APInt &AI) {
unsigned Width = (AI.getBitWidth() / 8) * 2;
- std::string HexString = utohexstr(AI.getLimitedValue(), /*LowerCase=*/true);
+ std::string HexString = AI.toString(16, /*Signed=*/false);
+ transform(HexString.begin(), HexString.end(), HexString.begin(), tolower);
unsigned Size = HexString.size();
assert(Width >= Size && "hex string is too large!");
HexString.insert(HexString.begin(), Width - Size, '0');
@@ -1819,3 +1823,82 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
llvm_unreachable("@llvm.global_dtors should have been lowered already");
return nullptr;
}
+
+//===----------------------------------------------------------------------===//
+// XCOFF
+//===----------------------------------------------------------------------===//
+MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ report_fatal_error("XCOFF explicit sections not yet implemented.");
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::SelectSectionForGlobal(
+ const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
+ assert(!TM.getFunctionSections() && !TM.getDataSections() &&
+ "XCOFF unique sections not yet implemented.");
+
+ // Common symbols go into a csect with matching name which will get mapped
+ // into the .bss section.
+ if (Kind.isBSSLocal() || Kind.isCommon()) {
+ SmallString<128> Name;
+ getNameWithPrefix(Name, GO, TM);
+ XCOFF::StorageClass SC =
+ TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GO);
+ return getContext().getXCOFFSection(
+ Name, Kind.isBSSLocal() ? XCOFF::XMC_BS : XCOFF::XMC_RW, XCOFF::XTY_CM,
+ SC, Kind, /* BeginSymbolName */ nullptr);
+ }
+
+ if (Kind.isText())
+ return TextSection;
+
+ if (Kind.isData())
+ return DataSection;
+
+ report_fatal_error("XCOFF other section types not yet implemented.");
+}
+
+bool TargetLoweringObjectFileXCOFF::shouldPutJumpTableInFunctionSection(
+ bool UsesLabelDifference, const Function &F) const {
+ report_fatal_error("TLOF XCOFF not yet implemented.");
+}
+
+void TargetLoweringObjectFileXCOFF::Initialize(MCContext &Ctx,
+ const TargetMachine &TgtM) {
+ TargetLoweringObjectFile::Initialize(Ctx, TgtM);
+ TTypeEncoding = 0;
+ PersonalityEncoding = 0;
+ LSDAEncoding = 0;
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getStaticCtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("XCOFF ctor section not yet implemented.");
+}
+
+MCSection *TargetLoweringObjectFileXCOFF::getStaticDtorSection(
+ unsigned Priority, const MCSymbol *KeySym) const {
+ report_fatal_error("XCOFF dtor section not yet implemented.");
+}
+
+const MCExpr *TargetLoweringObjectFileXCOFF::lowerRelativeReference(
+ const GlobalValue *LHS, const GlobalValue *RHS,
+ const TargetMachine &TM) const {
+ report_fatal_error("XCOFF not yet implemented.");
+}
+
+XCOFF::StorageClass TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(
+ const GlobalObject *GO) {
+ switch (GO->getLinkage()) {
+ case GlobalValue::InternalLinkage:
+ return XCOFF::C_HIDEXT;
+ case GlobalValue::ExternalLinkage:
+ case GlobalValue::CommonLinkage:
+ return XCOFF::C_EXT;
+ case GlobalValue::ExternalWeakLinkage:
+ return XCOFF::C_WEAKEXT;
+ default:
+ report_fatal_error(
+ "Unhandled linkage when mapping linkage to StorageClass.");
+ }
+}
diff --git a/lib/CodeGen/TargetPassConfig.cpp b/lib/CodeGen/TargetPassConfig.cpp
index 36df02692f86..f1f4f65adf7c 100644
--- a/lib/CodeGen/TargetPassConfig.cpp
+++ b/lib/CodeGen/TargetPassConfig.cpp
@@ -49,9 +49,10 @@
using namespace llvm;
-cl::opt<bool> EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
- cl::desc("Enable interprocedural register allocation "
- "to reduce load/store at procedure calls."));
+static cl::opt<bool>
+ EnableIPRA("enable-ipra", cl::init(false), cl::Hidden,
+ cl::desc("Enable interprocedural register allocation "
+ "to reduce load/store at procedure calls."));
static cl::opt<bool> DisablePostRASched("disable-post-ra", cl::Hidden,
cl::desc("Disable Post Regalloc Scheduler"));
static cl::opt<bool> DisableBranchFold("disable-branch-fold", cl::Hidden,
@@ -152,8 +153,10 @@ static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID).
// Targets can return true in targetSchedulesPostRAScheduling() and
// insert a PostRA scheduling pass wherever it wants.
-cl::opt<bool> MISchedPostRA("misched-postra", cl::Hidden,
- cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)"));
+static cl::opt<bool> MISchedPostRA(
+ "misched-postra", cl::Hidden,
+ cl::desc(
+ "Run MachineScheduler post regalloc (independent of preRA sched)"));
// Experimental option to run live interval analysis early.
static cl::opt<bool> EarlyLiveIntervals("early-live-intervals", cl::Hidden,
@@ -175,10 +178,10 @@ static cl::opt<CFLAAType> UseCFLAA(
/// Option names for limiting the codegen pipeline.
/// Those are used in error reporting and we didn't want
/// to duplicate their names all over the place.
-const char *StartAfterOptName = "start-after";
-const char *StartBeforeOptName = "start-before";
-const char *StopAfterOptName = "stop-after";
-const char *StopBeforeOptName = "stop-before";
+static const char *StartAfterOptName = "start-after";
+static const char *StartBeforeOptName = "start-before";
+static const char *StopAfterOptName = "stop-after";
+static const char *StopBeforeOptName = "stop-before";
static cl::opt<std::string>
StartAfterOpt(StringRef(StartAfterOptName),
@@ -654,6 +657,7 @@ void TargetPassConfig::addIRPasses() {
// TODO: add a pass insertion point here
addPass(createGCLoweringPass());
addPass(createShadowStackGCLoweringPass());
+ addPass(createLowerConstantIntrinsicsPass());
// Make sure that no unreachable blocks are instruction selected.
addPass(createUnreachableBlockEliminationPass());
@@ -1231,5 +1235,5 @@ bool TargetPassConfig::isGISelCSEEnabled() const {
}
std::unique_ptr<CSEConfigBase> TargetPassConfig::getCSEConfig() const {
- return make_unique<CSEConfigBase>();
+ return std::make_unique<CSEConfigBase>();
}
diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp
index f1b2ecf3243b..e5592c31098a 100644
--- a/lib/CodeGen/TargetRegisterInfo.cpp
+++ b/lib/CodeGen/TargetRegisterInfo.cpp
@@ -86,22 +86,21 @@ bool TargetRegisterInfo::checkAllSuperRegsMarked(const BitVector &RegisterSet,
namespace llvm {
-Printable printReg(unsigned Reg, const TargetRegisterInfo *TRI,
+Printable printReg(Register Reg, const TargetRegisterInfo *TRI,
unsigned SubIdx, const MachineRegisterInfo *MRI) {
return Printable([Reg, TRI, SubIdx, MRI](raw_ostream &OS) {
if (!Reg)
OS << "$noreg";
- else if (TargetRegisterInfo::isStackSlot(Reg))
- OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg);
- else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ else if (Register::isStackSlot(Reg))
+ OS << "SS#" << Register::stackSlot2Index(Reg);
+ else if (Register::isVirtualRegister(Reg)) {
StringRef Name = MRI ? MRI->getVRegName(Reg) : "";
if (Name != "") {
OS << '%' << Name;
} else {
- OS << '%' << TargetRegisterInfo::virtReg2Index(Reg);
+ OS << '%' << Register::virtReg2Index(Reg);
}
- }
- else if (!TRI)
+ } else if (!TRI)
OS << '$' << "physreg" << Reg;
else if (Reg < TRI->getNumRegs()) {
OS << '$';
@@ -143,8 +142,8 @@ Printable printRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
Printable printVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) {
return Printable([Unit, TRI](raw_ostream &OS) {
- if (TRI && TRI->isVirtualRegister(Unit)) {
- OS << '%' << TargetRegisterInfo::virtReg2Index(Unit);
+ if (Register::isVirtualRegister(Unit)) {
+ OS << '%' << Register::virtReg2Index(Unit);
} else {
OS << printRegUnit(Unit, TRI);
}
@@ -189,7 +188,8 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const {
/// the right type that contains this physreg.
const TargetRegisterClass *
TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, MVT VT) const {
- assert(isPhysicalRegister(reg) && "reg must be a physical register");
+ assert(Register::isPhysicalRegister(reg) &&
+ "reg must be a physical register");
// Pick the most sub register class of the right type that contains
// this physreg.
@@ -238,24 +238,16 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF,
static inline
const TargetRegisterClass *firstCommonClass(const uint32_t *A,
const uint32_t *B,
- const TargetRegisterInfo *TRI,
- const MVT::SimpleValueType SVT =
- MVT::SimpleValueType::Any) {
- const MVT VT(SVT);
+ const TargetRegisterInfo *TRI) {
for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32)
- if (unsigned Common = *A++ & *B++) {
- const TargetRegisterClass *RC =
- TRI->getRegClass(I + countTrailingZeros(Common));
- if (SVT == MVT::SimpleValueType::Any || TRI->isTypeLegalForClass(*RC, VT))
- return RC;
- }
+ if (unsigned Common = *A++ & *B++)
+ return TRI->getRegClass(I + countTrailingZeros(Common));
return nullptr;
}
const TargetRegisterClass *
TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
- const TargetRegisterClass *B,
- const MVT::SimpleValueType SVT) const {
+ const TargetRegisterClass *B) const {
// First take care of the trivial cases.
if (A == B)
return A;
@@ -264,7 +256,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A,
// Register classes are ordered topologically, so the largest common
// sub-class it the common sub-class with the smallest ID.
- return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT);
+ return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this);
}
const TargetRegisterClass *
@@ -409,7 +401,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
// Target-independent hints are either a physical or a virtual register.
unsigned Phys = Reg;
- if (VRM && isVirtualRegister(Phys))
+ if (VRM && Register::isVirtualRegister(Phys))
Phys = VRM->getPhys(Phys);
// Don't add the same reg twice (Hints_MRI may contain multiple virtual
@@ -417,7 +409,7 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
if (!HintedRegs.insert(Phys).second)
continue;
// Check that Phys is a valid hint in VirtReg's register class.
- if (!isPhysicalRegister(Phys))
+ if (!Register::isPhysicalRegister(Phys))
continue;
if (MRI.isReserved(Phys))
continue;
@@ -433,6 +425,20 @@ TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg,
return false;
}
+bool TargetRegisterInfo::isCalleeSavedPhysReg(
+ unsigned PhysReg, const MachineFunction &MF) const {
+ if (PhysReg == 0)
+ return false;
+ const uint32_t *callerPreservedRegs =
+ getCallPreservedMask(MF, MF.getFunction().getCallingConv());
+ if (callerPreservedRegs) {
+ assert(Register::isPhysicalRegister(PhysReg) &&
+ "Expected physical register");
+ return (callerPreservedRegs[PhysReg / 32] >> PhysReg % 32) & 1;
+ }
+ return false;
+}
+
bool TargetRegisterInfo::canRealignStack(const MachineFunction &MF) const {
return !MF.getFunction().hasFnAttribute("no-realign-stack");
}
@@ -466,7 +472,7 @@ bool TargetRegisterInfo::regmaskSubsetEqual(const uint32_t *mask0,
unsigned TargetRegisterInfo::getRegSizeInBits(unsigned Reg,
const MachineRegisterInfo &MRI) const {
const TargetRegisterClass *RC{};
- if (isPhysicalRegister(Reg)) {
+ if (Register::isPhysicalRegister(Reg)) {
// The size is not directly available for physical registers.
// Instead, we need to access a register class that contains Reg and
// get the size of that register class.
@@ -501,7 +507,7 @@ TargetRegisterInfo::lookThruCopyLike(unsigned SrcReg,
CopySrcReg = MI->getOperand(2).getReg();
}
- if (!isVirtualRegister(CopySrcReg))
+ if (!Register::isVirtualRegister(CopySrcReg))
return CopySrcReg;
SrcReg = CopySrcReg;
diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp
index 195279719ad4..ce59452fd1b8 100644
--- a/lib/CodeGen/TargetSchedule.cpp
+++ b/lib/CodeGen/TargetSchedule.cpp
@@ -300,7 +300,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx,
// TODO: The following hack exists because predication passes do not
// correctly append imp-use operands, and readsReg() strangely returns false
// for predicated defs.
- unsigned Reg = DefMI->getOperand(DefOperIdx).getReg();
+ Register Reg = DefMI->getOperand(DefOperIdx).getReg();
const MachineFunction &MF = *DefMI->getMF();
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
if (!DepMI->readsRegister(Reg, TRI) && TII->isPredicated(*DepMI))
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 43d876646967..ea971809d4e4 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -230,7 +230,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MO.isUse() && MOReg != SavedReg)
@@ -299,7 +299,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
MachineOperand &MO = OtherMI.getOperand(i);
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (DefReg == MOReg)
@@ -418,8 +418,8 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
} else
return false;
- IsSrcPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
- IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ IsSrcPhys = Register::isPhysicalRegister(SrcReg);
+ IsDstPhys = Register::isPhysicalRegister(DstReg);
return true;
}
@@ -427,8 +427,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII,
/// given instruction, is killed by the given instruction.
static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg,
LiveIntervals *LIS) {
- if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) &&
- !LIS->isNotInMIMap(*MI)) {
+ if (LIS && Register::isVirtualRegister(Reg) && !LIS->isNotInMIMap(*MI)) {
// FIXME: Sometimes tryInstructionTransform() will add instructions and
// test whether they can be folded before keeping them. In this case it
// sets a kill before recursively calling tryInstructionTransform() again.
@@ -475,12 +474,12 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
MachineInstr *DefMI = &MI;
while (true) {
// All uses of physical registers are likely to be kills.
- if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ if (Register::isPhysicalRegister(Reg) &&
(allowFalsePositives || MRI->hasOneUse(Reg)))
return true;
if (!isPlainlyKilled(DefMI, Reg, LIS))
return false;
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return true;
MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
// If there are multiple defs, we can't do a simple analysis, so just
@@ -536,7 +535,7 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
}
IsDstPhys = false;
if (isTwoAddrUse(UseMI, Reg, DstReg)) {
- IsDstPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
+ IsDstPhys = Register::isPhysicalRegister(DstReg);
return &UseMI;
}
return nullptr;
@@ -546,13 +545,13 @@ MachineInstr *findOnlyInterestingUse(unsigned Reg, MachineBasicBlock *MBB,
/// to.
static unsigned
getMappedReg(unsigned Reg, DenseMap<unsigned, unsigned> &RegMap) {
- while (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ while (Register::isVirtualRegister(Reg)) {
DenseMap<unsigned, unsigned>::iterator SI = RegMap.find(Reg);
if (SI == RegMap.end())
return 0;
Reg = SI->second;
}
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (Register::isPhysicalRegister(Reg))
return Reg;
return 0;
}
@@ -683,7 +682,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
unsigned RegBIdx,
unsigned RegCIdx,
unsigned Dist) {
- unsigned RegC = MI->getOperand(RegCIdx).getReg();
+ Register RegC = MI->getOperand(RegCIdx).getReg();
LLVM_DEBUG(dbgs() << "2addr: COMMUTING : " << *MI);
MachineInstr *NewMI = TII->commuteInstruction(*MI, false, RegBIdx, RegCIdx);
@@ -700,7 +699,7 @@ bool TwoAddressInstructionPass::commuteInstruction(MachineInstr *MI,
// Update source register map.
unsigned FromRegC = getMappedReg(RegC, SrcRegMap);
if (FromRegC) {
- unsigned RegA = MI->getOperand(DstIdx).getReg();
+ Register RegA = MI->getOperand(DstIdx).getReg();
SrcRegMap[RegA] = FromRegC;
}
@@ -911,7 +910,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
for (const MachineOperand &MO : MI->operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MO.isDef())
@@ -955,7 +954,7 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi,
for (const MachineOperand &MO : OtherMI.operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MO.isDef()) {
@@ -1093,7 +1092,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
for (const MachineOperand &MO : KillMI->operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (MO.isUse()) {
if (!MOReg)
continue;
@@ -1105,7 +1104,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
Uses.insert(MOReg);
if (isKill && MOReg != Reg)
Kills.insert(MOReg);
- } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) {
+ } else if (Register::isPhysicalRegister(MOReg)) {
Defs.insert(MOReg);
if (!MO.isDead())
LiveDefs.insert(MOReg);
@@ -1130,7 +1129,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
for (const MachineOperand &MO : OtherMI.operands()) {
if (!MO.isReg())
continue;
- unsigned MOReg = MO.getReg();
+ Register MOReg = MO.getReg();
if (!MOReg)
continue;
if (MO.isUse()) {
@@ -1154,8 +1153,7 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi,
unsigned MOReg = OtherDefs[i];
if (Uses.count(MOReg))
return false;
- if (TargetRegisterInfo::isPhysicalRegister(MOReg) &&
- LiveDefs.count(MOReg))
+ if (Register::isPhysicalRegister(MOReg) && LiveDefs.count(MOReg))
return false;
// Physical register def is seen.
Defs.erase(MOReg);
@@ -1208,8 +1206,8 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
return false;
bool MadeChange = false;
- unsigned DstOpReg = MI->getOperand(DstOpIdx).getReg();
- unsigned BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
+ Register DstOpReg = MI->getOperand(DstOpIdx).getReg();
+ Register BaseOpReg = MI->getOperand(BaseOpIdx).getReg();
unsigned OpsNum = MI->getDesc().getNumOperands();
unsigned OtherOpIdx = MI->getDesc().getNumDefs();
for (; OtherOpIdx < OpsNum; OtherOpIdx++) {
@@ -1221,7 +1219,7 @@ bool TwoAddressInstructionPass::tryInstructionCommute(MachineInstr *MI,
!TII->findCommutedOpIndices(*MI, BaseOpIdx, OtherOpIdx))
continue;
- unsigned OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
+ Register OtherOpReg = MI->getOperand(OtherOpIdx).getReg();
bool AggressiveCommute = false;
// If OtherOp dies but BaseOp does not, swap the OtherOp and BaseOp
@@ -1276,14 +1274,14 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
return false;
MachineInstr &MI = *mi;
- unsigned regA = MI.getOperand(DstIdx).getReg();
- unsigned regB = MI.getOperand(SrcIdx).getReg();
+ Register regA = MI.getOperand(DstIdx).getReg();
+ Register regB = MI.getOperand(SrcIdx).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(regB) &&
+ assert(Register::isVirtualRegister(regB) &&
"cannot make instruction into two-address form");
bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true);
- if (TargetRegisterInfo::isVirtualRegister(regA))
+ if (Register::isVirtualRegister(regA))
scanUses(regA);
bool Commuted = tryInstructionCommute(&MI, DstIdx, SrcIdx, regBKilled, Dist);
@@ -1363,7 +1361,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
const TargetRegisterClass *RC =
TRI->getAllocatableClass(
TII->getRegClass(UnfoldMCID, LoadRegIndex, TRI, *MF));
- unsigned Reg = MRI->createVirtualRegister(RC);
+ Register Reg = MRI->createVirtualRegister(RC);
SmallVector<MachineInstr *, 2> NewMIs;
if (!TII->unfoldMemoryOperand(*MF, MI, Reg,
/*UnfoldLoad=*/true,
@@ -1399,8 +1397,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi,
if (LV) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
- if (MO.isReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
+ if (MO.isReg() && Register::isVirtualRegister(MO.getReg())) {
if (MO.isUse()) {
if (MO.isKill()) {
if (NewMIs[0]->killsRegister(MO.getReg()))
@@ -1474,8 +1471,8 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
AnyOps = true;
MachineOperand &SrcMO = MI->getOperand(SrcIdx);
MachineOperand &DstMO = MI->getOperand(DstIdx);
- unsigned SrcReg = SrcMO.getReg();
- unsigned DstReg = DstMO.getReg();
+ Register SrcReg = SrcMO.getReg();
+ Register DstReg = DstMO.getReg();
// Tied constraint already satisfied?
if (SrcReg == DstReg)
continue;
@@ -1485,7 +1482,7 @@ collectTiedOperands(MachineInstr *MI, TiedOperandMap &TiedOperands) {
// Deal with undef uses immediately - simply rewrite the src operand.
if (SrcMO.isUndef() && !DstMO.getSubReg()) {
// Constrain the DstReg register class if required.
- if (TargetRegisterInfo::isVirtualRegister(DstReg))
+ if (Register::isVirtualRegister(DstReg))
if (const TargetRegisterClass *RC = TII->getRegClass(MCID, SrcIdx,
TRI, *MF))
MRI->constrainRegClass(DstReg, RC);
@@ -1522,7 +1519,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
unsigned DstIdx = TiedPairs[tpi].second;
const MachineOperand &DstMO = MI->getOperand(DstIdx);
- unsigned RegA = DstMO.getReg();
+ Register RegA = DstMO.getReg();
// Grab RegB from the instruction because it may have changed if the
// instruction was commuted.
@@ -1538,7 +1535,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
LastCopiedReg = RegA;
- assert(TargetRegisterInfo::isVirtualRegister(RegB) &&
+ assert(Register::isVirtualRegister(RegB) &&
"cannot make instruction into two-address form");
#ifndef NDEBUG
@@ -1559,14 +1556,13 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
MIB.addReg(RegB, 0, SubRegB);
const TargetRegisterClass *RC = MRI->getRegClass(RegB);
if (SubRegB) {
- if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ if (Register::isVirtualRegister(RegA)) {
assert(TRI->getMatchingSuperRegClass(RC, MRI->getRegClass(RegA),
SubRegB) &&
"tied subregister must be a truncation");
// The superreg class will not be used to constrain the subreg class.
RC = nullptr;
- }
- else {
+ } else {
assert(TRI->getMatchingSuperReg(RegA, SubRegB, MRI->getRegClass(RegB))
&& "tied subregister must be a truncation");
}
@@ -1581,7 +1577,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
if (LIS) {
LastCopyIdx = LIS->InsertMachineInstrInMaps(*PrevMI).getRegSlot();
- if (TargetRegisterInfo::isVirtualRegister(RegA)) {
+ if (Register::isVirtualRegister(RegA)) {
LiveInterval &LI = LIS->getInterval(RegA);
VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator());
SlotIndex endIdx =
@@ -1601,8 +1597,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI,
}
// Make sure regA is a legal regclass for the SrcIdx operand.
- if (TargetRegisterInfo::isVirtualRegister(RegA) &&
- TargetRegisterInfo::isVirtualRegister(RegB))
+ if (Register::isVirtualRegister(RegA) && Register::isVirtualRegister(RegB))
MRI->constrainRegClass(RegA, RC);
MO.setReg(RegA);
// The getMatchingSuper asserts guarantee that the register class projected
@@ -1744,8 +1739,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
if (TiedPairs.size() == 1) {
unsigned SrcIdx = TiedPairs[0].first;
unsigned DstIdx = TiedPairs[0].second;
- unsigned SrcReg = mi->getOperand(SrcIdx).getReg();
- unsigned DstReg = mi->getOperand(DstIdx).getReg();
+ Register SrcReg = mi->getOperand(SrcIdx).getReg();
+ Register DstReg = mi->getOperand(DstIdx).getReg();
if (SrcReg != DstReg &&
tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) {
// The tied operands have been eliminated or shifted further down
@@ -1803,9 +1798,8 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
void TwoAddressInstructionPass::
eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
MachineInstr &MI = *MBBI;
- unsigned DstReg = MI.getOperand(0).getReg();
- if (MI.getOperand(0).getSubReg() ||
- TargetRegisterInfo::isPhysicalRegister(DstReg) ||
+ Register DstReg = MI.getOperand(0).getReg();
+ if (MI.getOperand(0).getSubReg() || Register::isPhysicalRegister(DstReg) ||
!(MI.getNumOperands() & 1)) {
LLVM_DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << MI);
llvm_unreachable(nullptr);
@@ -1821,7 +1815,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
bool DefEmitted = false;
for (unsigned i = 1, e = MI.getNumOperands(); i < e; i += 2) {
MachineOperand &UseMO = MI.getOperand(i);
- unsigned SrcReg = UseMO.getReg();
+ Register SrcReg = UseMO.getReg();
unsigned SubIdx = MI.getOperand(i+1).getImm();
// Nothing needs to be inserted for undef operands.
if (UseMO.isUndef())
@@ -1855,7 +1849,7 @@ eliminateRegSequence(MachineBasicBlock::iterator &MBBI) {
DefEmitted = true;
// Update LiveVariables' kill info.
- if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg))
+ if (LV && isKill && !Register::isPhysicalRegister(SrcReg))
LV->replaceKillInstruction(SrcReg, MI, *CopyMI);
LLVM_DEBUG(dbgs() << "Inserted: " << *CopyMI);
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 177bab32bccc..3289eff71336 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -103,7 +103,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
df_iterator_default_set<MachineBasicBlock*> Reachable;
bool ModifiedPHI = false;
- MMI = getAnalysisIfAvailable<MachineModuleInfo>();
+ auto *MMIWP = getAnalysisIfAvailable<MachineModuleInfoWrapperPass>();
+ MMI = MMIWP ? &MMIWP->getMMI() : nullptr;
MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
MachineLoopInfo *MLI = getAnalysisIfAvailable<MachineLoopInfo>();
@@ -146,8 +147,14 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
}
// Actually remove the blocks now.
- for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i)
+ for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) {
+ // Remove any call site information for calls in the block.
+ for (auto &I : DeadBlocks[i]->instrs())
+ if (I.isCall(MachineInstr::IgnoreBundle))
+ DeadBlocks[i]->getParent()->eraseCallSiteInfo(&I);
+
DeadBlocks[i]->eraseFromParent();
+ }
// Cleanup PHI nodes.
for (MachineFunction::iterator I = F.begin(), E = F.end(); I != E; ++I) {
@@ -167,8 +174,8 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
if (phi->getNumOperands() == 3) {
const MachineOperand &Input = phi->getOperand(1);
const MachineOperand &Output = phi->getOperand(0);
- unsigned InputReg = Input.getReg();
- unsigned OutputReg = Output.getReg();
+ Register InputReg = Input.getReg();
+ Register OutputReg = Output.getReg();
assert(Output.getSubReg() == 0 && "Cannot have output subregister");
ModifiedPHI = true;
diff --git a/lib/CodeGen/ValueTypes.cpp b/lib/CodeGen/ValueTypes.cpp
index a911cdcbec9d..73b862d51c0f 100644
--- a/lib/CodeGen/ValueTypes.cpp
+++ b/lib/CodeGen/ValueTypes.cpp
@@ -115,8 +115,8 @@ std::string EVT::getEVTString() const {
switch (V.SimpleTy) {
default:
if (isVector())
- return "v" + utostr(getVectorNumElements()) +
- getVectorElementType().getEVTString();
+ return (isScalableVector() ? "nxv" : "v") + utostr(getVectorNumElements())
+ + getVectorElementType().getEVTString();
if (isInteger())
return "i" + utostr(getSizeInBits());
llvm_unreachable("Invalid EVT!");
@@ -144,6 +144,7 @@ std::string EVT::getEVTString() const {
case MVT::v32i1: return "v32i1";
case MVT::v64i1: return "v64i1";
case MVT::v128i1: return "v128i1";
+ case MVT::v256i1: return "v256i1";
case MVT::v512i1: return "v512i1";
case MVT::v1024i1: return "v1024i1";
case MVT::v1i8: return "v1i8";
@@ -157,6 +158,7 @@ std::string EVT::getEVTString() const {
case MVT::v256i8: return "v256i8";
case MVT::v1i16: return "v1i16";
case MVT::v2i16: return "v2i16";
+ case MVT::v3i16: return "v3i16";
case MVT::v4i16: return "v4i16";
case MVT::v8i16: return "v8i16";
case MVT::v16i16: return "v16i16";
@@ -187,8 +189,11 @@ std::string EVT::getEVTString() const {
case MVT::v1f32: return "v1f32";
case MVT::v2f32: return "v2f32";
case MVT::v2f16: return "v2f16";
+ case MVT::v3f16: return "v3f16";
case MVT::v4f16: return "v4f16";
case MVT::v8f16: return "v8f16";
+ case MVT::v16f16: return "v16f16";
+ case MVT::v32f16: return "v32f16";
case MVT::v3f32: return "v3f32";
case MVT::v4f32: return "v4f32";
case MVT::v5f32: return "v5f32";
@@ -205,6 +210,48 @@ std::string EVT::getEVTString() const {
case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64";
case MVT::v8f64: return "v8f64";
+ case MVT::nxv1i1: return "nxv1i1";
+ case MVT::nxv2i1: return "nxv2i1";
+ case MVT::nxv4i1: return "nxv4i1";
+ case MVT::nxv8i1: return "nxv8i1";
+ case MVT::nxv16i1: return "nxv16i1";
+ case MVT::nxv32i1: return "nxv32i1";
+ case MVT::nxv1i8: return "nxv1i8";
+ case MVT::nxv2i8: return "nxv2i8";
+ case MVT::nxv4i8: return "nxv4i8";
+ case MVT::nxv8i8: return "nxv8i8";
+ case MVT::nxv16i8: return "nxv16i8";
+ case MVT::nxv32i8: return "nxv32i8";
+ case MVT::nxv1i16: return "nxv1i16";
+ case MVT::nxv2i16: return "nxv2i16";
+ case MVT::nxv4i16: return "nxv4i16";
+ case MVT::nxv8i16: return "nxv8i16";
+ case MVT::nxv16i16:return "nxv16i16";
+ case MVT::nxv32i16:return "nxv32i16";
+ case MVT::nxv1i32: return "nxv1i32";
+ case MVT::nxv2i32: return "nxv2i32";
+ case MVT::nxv4i32: return "nxv4i32";
+ case MVT::nxv8i32: return "nxv8i32";
+ case MVT::nxv16i32:return "nxv16i32";
+ case MVT::nxv32i32:return "nxv32i32";
+ case MVT::nxv1i64: return "nxv1i64";
+ case MVT::nxv2i64: return "nxv2i64";
+ case MVT::nxv4i64: return "nxv4i64";
+ case MVT::nxv8i64: return "nxv8i64";
+ case MVT::nxv16i64:return "nxv16i64";
+ case MVT::nxv32i64:return "nxv32i64";
+ case MVT::nxv2f16: return "nxv2f16";
+ case MVT::nxv4f16: return "nxv4f16";
+ case MVT::nxv8f16: return "nxv8f16";
+ case MVT::nxv1f32: return "nxv1f32";
+ case MVT::nxv2f32: return "nxv2f32";
+ case MVT::nxv4f32: return "nxv4f32";
+ case MVT::nxv8f32: return "nxv8f32";
+ case MVT::nxv16f32:return "nxv16f32";
+ case MVT::nxv1f64: return "nxv1f64";
+ case MVT::nxv2f64: return "nxv2f64";
+ case MVT::nxv4f64: return "nxv4f64";
+ case MVT::nxv8f64: return "nxv8f64";
case MVT::Metadata:return "Metadata";
case MVT::Untyped: return "Untyped";
case MVT::exnref : return "exnref";
@@ -241,6 +288,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32);
case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64);
case MVT::v128i1: return VectorType::get(Type::getInt1Ty(Context), 128);
+ case MVT::v256i1: return VectorType::get(Type::getInt1Ty(Context), 256);
case MVT::v512i1: return VectorType::get(Type::getInt1Ty(Context), 512);
case MVT::v1024i1: return VectorType::get(Type::getInt1Ty(Context), 1024);
case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1);
@@ -254,6 +302,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v256i8: return VectorType::get(Type::getInt8Ty(Context), 256);
case MVT::v1i16: return VectorType::get(Type::getInt16Ty(Context), 1);
case MVT::v2i16: return VectorType::get(Type::getInt16Ty(Context), 2);
+ case MVT::v3i16: return VectorType::get(Type::getInt16Ty(Context), 3);
case MVT::v4i16: return VectorType::get(Type::getInt16Ty(Context), 4);
case MVT::v8i16: return VectorType::get(Type::getInt16Ty(Context), 8);
case MVT::v16i16: return VectorType::get(Type::getInt16Ty(Context), 16);
@@ -282,8 +331,11 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v32i64: return VectorType::get(Type::getInt64Ty(Context), 32);
case MVT::v1i128: return VectorType::get(Type::getInt128Ty(Context), 1);
case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2);
+ case MVT::v3f16: return VectorType::get(Type::getHalfTy(Context), 3);
case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4);
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
+ case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16);
+ case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32);
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3);
@@ -302,8 +354,92 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2);
case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4);
case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8);
+ case MVT::nxv1i1:
+ return VectorType::get(Type::getInt1Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i1:
+ return VectorType::get(Type::getInt1Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i1:
+ return VectorType::get(Type::getInt1Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i1:
+ return VectorType::get(Type::getInt1Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i1:
+ return VectorType::get(Type::getInt1Ty(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv32i1:
+ return VectorType::get(Type::getInt1Ty(Context), 32, /*Scalable=*/ true);
+ case MVT::nxv1i8:
+ return VectorType::get(Type::getInt8Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i8:
+ return VectorType::get(Type::getInt8Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i8:
+ return VectorType::get(Type::getInt8Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i8:
+ return VectorType::get(Type::getInt8Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i8:
+ return VectorType::get(Type::getInt8Ty(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv32i8:
+ return VectorType::get(Type::getInt8Ty(Context), 32, /*Scalable=*/ true);
+ case MVT::nxv1i16:
+ return VectorType::get(Type::getInt16Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i16:
+ return VectorType::get(Type::getInt16Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i16:
+ return VectorType::get(Type::getInt16Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i16:
+ return VectorType::get(Type::getInt16Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i16:
+ return VectorType::get(Type::getInt16Ty(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv32i16:
+ return VectorType::get(Type::getInt16Ty(Context), 32, /*Scalable=*/ true);
+ case MVT::nxv1i32:
+ return VectorType::get(Type::getInt32Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i32:
+ return VectorType::get(Type::getInt32Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i32:
+ return VectorType::get(Type::getInt32Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i32:
+ return VectorType::get(Type::getInt32Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i32:
+ return VectorType::get(Type::getInt32Ty(Context), 16,/*Scalable=*/ true);
+ case MVT::nxv32i32:
+ return VectorType::get(Type::getInt32Ty(Context), 32,/*Scalable=*/ true);
+ case MVT::nxv1i64:
+ return VectorType::get(Type::getInt64Ty(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2i64:
+ return VectorType::get(Type::getInt64Ty(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4i64:
+ return VectorType::get(Type::getInt64Ty(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8i64:
+ return VectorType::get(Type::getInt64Ty(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16i64:
+ return VectorType::get(Type::getInt64Ty(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv32i64:
+ return VectorType::get(Type::getInt64Ty(Context), 32, /*Scalable=*/ true);
+ case MVT::nxv2f16:
+ return VectorType::get(Type::getHalfTy(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4f16:
+ return VectorType::get(Type::getHalfTy(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8f16:
+ return VectorType::get(Type::getHalfTy(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv1f32:
+ return VectorType::get(Type::getFloatTy(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2f32:
+ return VectorType::get(Type::getFloatTy(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4f32:
+ return VectorType::get(Type::getFloatTy(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8f32:
+ return VectorType::get(Type::getFloatTy(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv16f32:
+ return VectorType::get(Type::getFloatTy(Context), 16, /*Scalable=*/ true);
+ case MVT::nxv1f64:
+ return VectorType::get(Type::getDoubleTy(Context), 1, /*Scalable=*/ true);
+ case MVT::nxv2f64:
+ return VectorType::get(Type::getDoubleTy(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4f64:
+ return VectorType::get(Type::getDoubleTy(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8f64:
+ return VectorType::get(Type::getDoubleTy(Context), 8, /*Scalable=*/ true);
case MVT::Metadata: return Type::getMetadataTy(Context);
- }
+ }
}
/// Return the value type corresponding to the specified type. This returns all
@@ -329,7 +465,8 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::VectorTyID: {
VectorType *VTy = cast<VectorType>(Ty);
return getVectorVT(
- getVT(VTy->getElementType(), false), VTy->getNumElements());
+ getVT(VTy->getElementType(), /*HandleUnknown=*/ false),
+ VTy->getElementCount());
}
}
}
@@ -345,8 +482,9 @@ EVT EVT::getEVT(Type *Ty, bool HandleUnknown){
return getIntegerVT(Ty->getContext(), cast<IntegerType>(Ty)->getBitWidth());
case Type::VectorTyID: {
VectorType *VTy = cast<VectorType>(Ty);
- return getVectorVT(Ty->getContext(), getEVT(VTy->getElementType(), false),
- VTy->getNumElements());
+ return getVectorVT(Ty->getContext(),
+ getEVT(VTy->getElementType(), /*HandleUnknown=*/ false),
+ VTy->getElementCount());
}
}
}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 4a06704a8876..5312e2eea96b 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -80,15 +80,14 @@ void VirtRegMap::grow() {
Virt2SplitMap.resize(NumRegs);
}
-void VirtRegMap::assignVirt2Phys(unsigned virtReg, MCPhysReg physReg) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg) &&
- TargetRegisterInfo::isPhysicalRegister(physReg));
- assert(Virt2PhysMap[virtReg] == NO_PHYS_REG &&
+void VirtRegMap::assignVirt2Phys(Register virtReg, MCPhysReg physReg) {
+ assert(virtReg.isVirtual() && Register::isPhysicalRegister(physReg));
+ assert(Virt2PhysMap[virtReg.id()] == NO_PHYS_REG &&
"attempt to assign physical register to already mapped "
"virtual register");
assert(!getRegInfo().isReserved(physReg) &&
"Attempt to map virtReg to a reserved physReg");
- Virt2PhysMap[virtReg] = physReg;
+ Virt2PhysMap[virtReg.id()] = physReg;
}
unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
@@ -99,46 +98,46 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
return SS;
}
-bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) {
- unsigned Hint = MRI->getSimpleHint(VirtReg);
- if (!Hint)
+bool VirtRegMap::hasPreferredPhys(Register VirtReg) {
+ Register Hint = MRI->getSimpleHint(VirtReg);
+ if (!Hint.isValid())
return false;
- if (TargetRegisterInfo::isVirtualRegister(Hint))
+ if (Hint.isVirtual())
Hint = getPhys(Hint);
return getPhys(VirtReg) == Hint;
}
-bool VirtRegMap::hasKnownPreference(unsigned VirtReg) {
+bool VirtRegMap::hasKnownPreference(Register VirtReg) {
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(VirtReg);
- if (TargetRegisterInfo::isPhysicalRegister(Hint.second))
+ if (Register::isPhysicalRegister(Hint.second))
return true;
- if (TargetRegisterInfo::isVirtualRegister(Hint.second))
+ if (Register::isVirtualRegister(Hint.second))
return hasPhys(Hint.second);
return false;
}
-int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+int VirtRegMap::assignVirt2StackSlot(Register virtReg) {
+ assert(virtReg.isVirtual());
+ assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
- return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
+ return Virt2StackSlotMap[virtReg.id()] = createSpillSlot(RC);
}
-void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
- assert(TargetRegisterInfo::isVirtualRegister(virtReg));
- assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
+void VirtRegMap::assignVirt2StackSlot(Register virtReg, int SS) {
+ assert(virtReg.isVirtual());
+ assert(Virt2StackSlotMap[virtReg.id()] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
assert((SS >= 0 ||
(SS >= MF->getFrameInfo().getObjectIndexBegin())) &&
"illegal fixed frame index");
- Virt2StackSlotMap[virtReg] = SS;
+ Virt2StackSlotMap[virtReg.id()] = SS;
}
void VirtRegMap::print(raw_ostream &OS, const Module*) const {
OS << "********** REGISTER MAP **********\n";
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
OS << '[' << printReg(Reg, TRI) << " -> "
<< printReg(Virt2PhysMap[Reg], TRI) << "] "
@@ -147,7 +146,7 @@ void VirtRegMap::print(raw_ostream &OS, const Module*) const {
}
for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
- unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ unsigned Reg = Register::index2VirtReg(i);
if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
OS << '[' << printReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
<< "] " << TRI->getRegClassName(MRI->getRegClass(Reg)) << "\n";
@@ -185,10 +184,10 @@ class VirtRegRewriter : public MachineFunctionPass {
void rewrite();
void addMBBLiveIns();
bool readsUndefSubreg(const MachineOperand &MO) const;
- void addLiveInsForSubRanges(const LiveInterval &LI, unsigned PhysReg) const;
+ void addLiveInsForSubRanges(const LiveInterval &LI, Register PhysReg) const;
void handleIdentityCopy(MachineInstr &MI) const;
void expandCopyBundle(MachineInstr &MI) const;
- bool subRegLiveThrough(const MachineInstr &MI, unsigned SuperPhysReg) const;
+ bool subRegLiveThrough(const MachineInstr &MI, Register SuperPhysReg) const;
public:
static char ID;
@@ -265,7 +264,7 @@ bool VirtRegRewriter::runOnMachineFunction(MachineFunction &fn) {
}
void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
- unsigned PhysReg) const {
+ Register PhysReg) const {
assert(!LI.empty());
assert(LI.hasSubRanges());
@@ -312,7 +311,7 @@ void VirtRegRewriter::addLiveInsForSubRanges(const LiveInterval &LI,
// assignments.
void VirtRegRewriter::addMBBLiveIns() {
for (unsigned Idx = 0, IdxE = MRI->getNumVirtRegs(); Idx != IdxE; ++Idx) {
- unsigned VirtReg = TargetRegisterInfo::index2VirtReg(Idx);
+ Register VirtReg = Register::index2VirtReg(Idx);
if (MRI->reg_nodbg_empty(VirtReg))
continue;
LiveInterval &LI = LIS->getInterval(VirtReg);
@@ -320,7 +319,7 @@ void VirtRegRewriter::addMBBLiveIns() {
continue;
// This is a virtual register that is live across basic blocks. Its
// assigned PhysReg must be marked as live-in to those blocks.
- unsigned PhysReg = VRM->getPhys(VirtReg);
+ Register PhysReg = VRM->getPhys(VirtReg);
assert(PhysReg != VirtRegMap::NO_PHYS_REG && "Unmapped virtual register.");
if (LI.hasSubRanges()) {
@@ -353,7 +352,7 @@ bool VirtRegRewriter::readsUndefSubreg(const MachineOperand &MO) const {
if (MO.isUndef())
return true;
- unsigned Reg = MO.getReg();
+ Register Reg = MO.getReg();
const LiveInterval &LI = LIS->getInterval(Reg);
const MachineInstr &MI = *MO.getParent();
SlotIndex BaseIndex = LIS->getInstructionIndex(MI);
@@ -469,7 +468,7 @@ void VirtRegRewriter::expandCopyBundle(MachineInstr &MI) const {
/// \pre \p MI defines a subregister of a virtual register that
/// has been assigned to \p SuperPhysReg.
bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
- unsigned SuperPhysReg) const {
+ Register SuperPhysReg) const {
SlotIndex MIIndex = LIS->getInstructionIndex(MI);
SlotIndex BeforeMIUses = MIIndex.getBaseIndex();
SlotIndex AfterMIDefs = MIIndex.getBoundaryIndex();
@@ -493,9 +492,9 @@ bool VirtRegRewriter::subRegLiveThrough(const MachineInstr &MI,
void VirtRegRewriter::rewrite() {
bool NoSubRegLiveness = !MRI->subRegLivenessEnabled();
- SmallVector<unsigned, 8> SuperDeads;
- SmallVector<unsigned, 8> SuperDefs;
- SmallVector<unsigned, 8> SuperKills;
+ SmallVector<Register, 8> SuperDeads;
+ SmallVector<Register, 8> SuperDefs;
+ SmallVector<Register, 8> SuperKills;
for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
MBBI != MBBE; ++MBBI) {
@@ -513,10 +512,10 @@ void VirtRegRewriter::rewrite() {
if (MO.isRegMask())
MRI->addPhysRegsUsedFromRegMask(MO.getRegMask());
- if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (!MO.isReg() || !MO.getReg().isVirtual())
continue;
- unsigned VirtReg = MO.getReg();
- unsigned PhysReg = VRM->getPhys(VirtReg);
+ Register VirtReg = MO.getReg();
+ Register PhysReg = VRM->getPhys(VirtReg);
assert(PhysReg != VirtRegMap::NO_PHYS_REG &&
"Instruction uses unmapped VirtReg");
assert(!MRI->isReserved(PhysReg) && "Reserved register assignment");
@@ -562,7 +561,7 @@ void VirtRegRewriter::rewrite() {
// PhysReg operands cannot have subregister indexes.
PhysReg = TRI->getSubReg(PhysReg, SubReg);
- assert(PhysReg && "Invalid SubReg for physical register");
+ assert(PhysReg.isValid() && "Invalid SubReg for physical register");
MO.setSubReg(0);
}
// Rewrite. Note we could have used MachineOperand::substPhysReg(), but
diff --git a/lib/CodeGen/XRayInstrumentation.cpp b/lib/CodeGen/XRayInstrumentation.cpp
index 19c59e9542b4..119c3fd1ec7f 100644
--- a/lib/CodeGen/XRayInstrumentation.cpp
+++ b/lib/CodeGen/XRayInstrumentation.cpp
@@ -111,7 +111,7 @@ void XRayInstrumentation::replaceRetWithPatchableRet(
MIB.add(MO);
Terminators.push_back(&T);
if (T.isCall())
- MF.updateCallSiteInfo(&T);
+ MF.eraseCallSiteInfo(&T);
}
}
}