aboutsummaryrefslogtreecommitdiff
path: root/lib/CodeGen
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2011-02-20 12:57:14 +0000
committerDimitry Andric <dim@FreeBSD.org>2011-02-20 12:57:14 +0000
commitcf099d11218cb6f6c5cce947d6738e347f07fb12 (patch)
treed2b61ce94e654cb01a254d2195259db5f9cc3f3c /lib/CodeGen
parent49011b52fcba02a6051957b84705159f52fae4e4 (diff)
downloadsrc-cf099d11218cb6f6c5cce947d6738e347f07fb12.tar.gz
src-cf099d11218cb6f6c5cce947d6738e347f07fb12.zip
Vendor import of llvm trunk r126079:vendor/llvm/llvm-r126079
Notes
Notes: svn path=/vendor/llvm/dist/; revision=218885 svn path=/vendor/llvm/llvm-r126079/; revision=218886; tag=vendor/llvm/llvm-r126079
Diffstat (limited to 'lib/CodeGen')
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp45
-rw-r--r--lib/CodeGen/AllocationOrder.cpp68
-rw-r--r--lib/CodeGen/AllocationOrder.h54
-rw-r--r--lib/CodeGen/Analysis.cpp30
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp64
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp61
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp122
-rw-r--r--lib/CodeGen/AsmPrinter/CMakeLists.txt3
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfCFIException.cpp138
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp681
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h139
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp338
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h155
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfTableException.cpp349
-rw-r--r--lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp1
-rw-r--r--lib/CodeGen/CMakeLists.txt17
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp15
-rw-r--r--lib/CodeGen/CallingConvLower.cpp40
-rw-r--r--lib/CodeGen/CodeGen.cpp61
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp99
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.h12
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp16
-rw-r--r--lib/CodeGen/DwarfEHPrepare.cpp30
-rw-r--r--lib/CodeGen/ELF.h2
-rw-r--r--lib/CodeGen/ELFWriter.cpp17
-rw-r--r--lib/CodeGen/EdgeBundles.cpp86
-rw-r--r--lib/CodeGen/ExpandISelPseudos.cpp82
-rw-r--r--lib/CodeGen/GCMetadata.cpp7
-rw-r--r--lib/CodeGen/GCStrategy.cpp44
-rw-r--r--lib/CodeGen/IfConversion.cpp247
-rw-r--r--lib/CodeGen/InlineSpiller.cpp287
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp32
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp43
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp26
-rw-r--r--lib/CodeGen/LiveDebugVariables.cpp711
-rw-r--r--lib/CodeGen/LiveDebugVariables.h63
-rw-r--r--lib/CodeGen/LiveInterval.cpp312
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp357
-rw-r--r--lib/CodeGen/LiveIntervalUnion.cpp315
-rw-r--r--lib/CodeGen/LiveIntervalUnion.h258
-rw-r--r--lib/CodeGen/LiveRangeEdit.cpp129
-rw-r--r--lib/CodeGen/LiveRangeEdit.h135
-rw-r--r--lib/CodeGen/LiveStackAnalysis.cpp20
-rw-r--r--lib/CodeGen/LiveVariables.cpp42
-rw-r--r--lib/CodeGen/LocalStackSlotAllocation.cpp29
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp79
-rw-r--r--lib/CodeGen/MachineCSE.cpp162
-rw-r--r--lib/CodeGen/MachineDominators.cpp3
-rw-r--r--lib/CodeGen/MachineFunction.cpp69
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp12
-rw-r--r--lib/CodeGen/MachineInstr.cpp171
-rw-r--r--lib/CodeGen/MachineLICM.cpp506
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp7
-rw-r--r--lib/CodeGen/MachineLoopRanges.cpp116
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp76
-rw-r--r--lib/CodeGen/MachineRegisterInfo.cpp64
-rw-r--r--lib/CodeGen/MachineSink.cpp312
-rw-r--r--lib/CodeGen/MachineVerifier.cpp377
-rw-r--r--lib/CodeGen/OptimizePHIs.cpp6
-rw-r--r--lib/CodeGen/PBQP/Graph.h425
-rw-r--r--lib/CodeGen/PBQP/HeuristicBase.h246
-rw-r--r--lib/CodeGen/PBQP/HeuristicSolver.h616
-rw-r--r--lib/CodeGen/PBQP/Heuristics/Briggs.h460
-rw-r--r--lib/CodeGen/PBQP/Math.h288
-rw-r--r--lib/CodeGen/PBQP/Solution.h89
-rw-r--r--lib/CodeGen/PHIElimination.cpp143
-rw-r--r--lib/CodeGen/PHIElimination.h115
-rw-r--r--lib/CodeGen/PHIEliminationUtils.cpp61
-rw-r--r--lib/CodeGen/PHIEliminationUtils.h25
-rw-r--r--lib/CodeGen/PeepholeOptimizer.cpp131
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp74
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp43
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp7
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp62
-rw-r--r--lib/CodeGen/PrologEpilogInserter.h4
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp2
-rw-r--r--lib/CodeGen/RegAllocBase.h181
-rw-r--r--lib/CodeGen/RegAllocBasic.cpp523
-rw-r--r--lib/CodeGen/RegAllocFast.cpp68
-rw-r--r--lib/CodeGen/RegAllocGreedy.cpp1285
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp109
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp994
-rw-r--r--lib/CodeGen/RegisterCoalescer.cpp3
-rw-r--r--lib/CodeGen/RenderMachineFunction.cpp14
-rw-r--r--lib/CodeGen/RenderMachineFunction.h4
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp15
-rw-r--r--lib/CodeGen/ScheduleDAGEmit.cpp2
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp137
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.h10
-rw-r--r--lib/CodeGen/ScoreboardHazardRecognizer.cpp (renamed from lib/CodeGen/PostRAHazardRecognizer.cpp)137
-rw-r--r--lib/CodeGen/SelectionDAG/CMakeLists.txt2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp1211
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp84
-rw-r--r--lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp1
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp141
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp1056
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp33
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp426
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp49
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h34
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp66
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp325
-rw-r--r--lib/CodeGen/SelectionDAG/SDNodeDbgValue.h2
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp51
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp1969
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp333
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h35
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp684
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp850
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h14
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp791
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp669
-rw-r--r--lib/CodeGen/ShrinkWrapping.cpp4
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp247
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h13
-rw-r--r--lib/CodeGen/SjLjEHPrepare.cpp446
-rw-r--r--lib/CodeGen/SlotIndexes.cpp33
-rw-r--r--lib/CodeGen/SpillPlacement.cpp330
-rw-r--r--lib/CodeGen/SpillPlacement.h108
-rw-r--r--lib/CodeGen/Spiller.cpp316
-rw-r--r--lib/CodeGen/Spiller.h12
-rw-r--r--lib/CodeGen/SplitKit.cpp1491
-rw-r--r--lib/CodeGen/SplitKit.h419
-rw-r--r--lib/CodeGen/Splitter.cpp32
-rw-r--r--lib/CodeGen/Splitter.h4
-rw-r--r--lib/CodeGen/StackProtector.cpp28
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp30
-rw-r--r--lib/CodeGen/StrongPHIElimination.cpp1694
-rw-r--r--lib/CodeGen/TailDuplication.cpp21
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp50
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp253
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp54
-rw-r--r--lib/CodeGen/UnreachableBlockElim.cpp15
-rw-r--r--lib/CodeGen/VirtRegMap.cpp165
-rw-r--r--lib/CodeGen/VirtRegMap.h33
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp896
139 files changed, 17732 insertions, 11835 deletions
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 5a634d6ccb01..b520d8fcedc0 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -155,16 +155,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
// In a return block, examine the function live-out regs.
for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(),
E = MRI.liveout_end(); I != E; ++I) {
- unsigned Reg = *I;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- State->UnionGroups(AliasReg, 0);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
+ for (const unsigned *Alias = TRI->getOverlaps(*I);
+ unsigned Reg = *Alias; ++Alias) {
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
}
}
}
@@ -176,16 +171,11 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
SE = BB->succ_end(); SI != SE; ++SI)
for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
E = (*SI)->livein_end(); I != E; ++I) {
- unsigned Reg = *I;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
- State->UnionGroups(AliasReg, 0);
- KillIndices[AliasReg] = BB->size();
- DefIndices[AliasReg] = ~0u;
+ for (const unsigned *Alias = TRI->getOverlaps(*I);
+ unsigned Reg = *Alias; ++Alias) {
+ State->UnionGroups(Reg, 0);
+ KillIndices[Reg] = BB->size();
+ DefIndices[Reg] = ~0u;
}
}
@@ -197,12 +187,8 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
unsigned Reg = *I;
if (!IsReturnBlock && !Pristine.test(Reg)) continue;
- State->UnionGroups(Reg, 0);
- KillIndices[Reg] = BB->size();
- DefIndices[Reg] = ~0u;
- // Repeat, for all aliases.
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ unsigned AliasReg = *Alias; ++Alias) {
State->UnionGroups(AliasReg, 0);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
@@ -435,12 +421,9 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI,
continue;
// Update def for Reg and aliases.
- DefIndices[Reg] = Count;
- for (const unsigned *Alias = TRI->getAliasSet(Reg);
- *Alias; ++Alias) {
- unsigned AliasReg = *Alias;
+ for (const unsigned *Alias = TRI->getOverlaps(Reg);
+ unsigned AliasReg = *Alias; ++Alias)
DefIndices[AliasReg] = Count;
- }
}
}
diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp
new file mode 100644
index 000000000000..20c7625f3253
--- /dev/null
+++ b/lib/CodeGen/AllocationOrder.cpp
@@ -0,0 +1,68 @@
+//===-- llvm/CodeGen/AllocationOrder.cpp - Allocation Order ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#include "AllocationOrder.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+
+using namespace llvm;
+
+// Compare VirtRegMap::getRegAllocPref().
+AllocationOrder::AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const BitVector &ReservedRegs)
+ : Pos(0), Reserved(ReservedRegs) {
+ const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg);
+ std::pair<unsigned, unsigned> HintPair =
+ VRM.getRegInfo().getRegAllocationHint(VirtReg);
+
+ // HintPair.second is a register, phys or virt.
+ Hint = HintPair.second;
+
+ // Translate to physreg, or 0 if not assigned yet.
+ if (TargetRegisterInfo::isVirtualRegister(Hint))
+ Hint = VRM.getPhys(Hint);
+
+ // The remaining allocation order may depend on the hint.
+ tie(Begin, End) = VRM.getTargetRegInfo()
+ .getAllocationOrder(RC, HintPair.first, Hint, VRM.getMachineFunction());
+
+ // Target-dependent hints require resolution.
+ if (HintPair.first)
+ Hint = VRM.getTargetRegInfo().ResolveRegAllocHint(HintPair.first, Hint,
+ VRM.getMachineFunction());
+
+ // The hint must be a valid physreg for allocation.
+ if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) ||
+ !RC->contains(Hint) || ReservedRegs.test(Hint)))
+ Hint = 0;
+}
+
+unsigned AllocationOrder::next() {
+ // First take the hint.
+ if (!Pos) {
+ Pos = Begin;
+ if (Hint)
+ return Hint;
+ }
+ // Then look at the order from TRI.
+ while(Pos != End) {
+ unsigned Reg = *Pos++;
+ if (Reg != Hint && !Reserved.test(Reg))
+ return Reg;
+ }
+ return 0;
+}
diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h
new file mode 100644
index 000000000000..3db4b6925fca
--- /dev/null
+++ b/lib/CodeGen/AllocationOrder.h
@@ -0,0 +1,54 @@
+//===-- llvm/CodeGen/AllocationOrder.h - Allocation Order -*- C++ -*-------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements an allocation order for virtual registers.
+//
+// The preferred allocation order for a virtual register depends on allocation
+// hints and target hooks. The AllocationOrder class encapsulates all of that.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_ALLOCATIONORDER_H
+#define LLVM_CODEGEN_ALLOCATIONORDER_H
+
+namespace llvm {
+
+class BitVector;
+class VirtRegMap;
+
+class AllocationOrder {
+ const unsigned *Begin;
+ const unsigned *End;
+ const unsigned *Pos;
+ const BitVector &Reserved;
+ unsigned Hint;
+public:
+
+ /// AllocationOrder - Create a new AllocationOrder for VirtReg.
+ /// @param VirtReg Virtual register to allocate for.
+ /// @param VRM Virtual register map for function.
+ /// @param ReservedRegs Set of reserved registers as returned by
+ /// TargetRegisterInfo::getReservedRegs().
+ AllocationOrder(unsigned VirtReg,
+ const VirtRegMap &VRM,
+ const BitVector &ReservedRegs);
+
+ /// next - Return the next physical register in the allocation order, or 0.
+ /// It is safe to call next again after it returned 0.
+ /// It will keep returning 0 until rewind() is called.
+ unsigned next();
+
+ /// rewind - Start over from the beginning.
+ void rewind() { Pos = 0; }
+
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp
index e3dd646c952e..36638c36de67 100644
--- a/lib/CodeGen/Analysis.cpp
+++ b/lib/CodeGen/Analysis.cpp
@@ -19,6 +19,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
@@ -30,7 +31,7 @@ using namespace llvm;
/// of insertvalue or extractvalue indices that identify a member, return
/// the linearized index of the start of the member.
///
-unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
+unsigned llvm::ComputeLinearIndex(const Type *Ty,
const unsigned *Indices,
const unsigned *IndicesEnd,
unsigned CurIndex) {
@@ -45,8 +46,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
EE = STy->element_end();
EI != EE; ++EI) {
if (Indices && *Indices == unsigned(EI - EB))
- return ComputeLinearIndex(TLI, *EI, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(TLI, *EI, 0, 0, CurIndex);
+ return ComputeLinearIndex(*EI, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(*EI, 0, 0, CurIndex);
}
return CurIndex;
}
@@ -55,8 +56,8 @@ unsigned llvm::ComputeLinearIndex(const TargetLowering &TLI, const Type *Ty,
const Type *EltTy = ATy->getElementType();
for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
if (Indices && *Indices == i)
- return ComputeLinearIndex(TLI, EltTy, Indices+1, IndicesEnd, CurIndex);
- CurIndex = ComputeLinearIndex(TLI, EltTy, 0, 0, CurIndex);
+ return ComputeLinearIndex(EltTy, Indices+1, IndicesEnd, CurIndex);
+ CurIndex = ComputeLinearIndex(EltTy, 0, 0, CurIndex);
}
return CurIndex;
}
@@ -125,7 +126,7 @@ GlobalVariable *llvm::ExtractTypeInfo(Value *V) {
/// hasInlineAsmMemConstraint - Return true if the inline asm instruction being
/// processed uses a memory 'm' constraint.
bool
-llvm::hasInlineAsmMemConstraint(std::vector<InlineAsm::ConstraintInfo> &CInfos,
+llvm::hasInlineAsmMemConstraint(InlineAsm::ConstraintInfoVector &CInfos,
const TargetLowering &TLI) {
for (unsigned i = 0, e = CInfos.size(); i != e; ++i) {
InlineAsm::ConstraintInfo &CI = CInfos[i];
@@ -283,3 +284,20 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr,
return true;
}
+bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node,
+ const TargetLowering &TLI) {
+ const Function *F = DAG.getMachineFunction().getFunction();
+
+ // Conservatively require the attributes of the call to match those of
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if (CallerRetAttr & ~Attribute::NoAlias)
+ return false;
+
+ // It's not safe to eliminate the sign / zero extension of the return value.
+ if ((CallerRetAttr & Attribute::ZExt) || (CallerRetAttr & Attribute::SExt))
+ return false;
+
+ // Check if the only use is a function return node.
+ return TLI.isUsedByReturnOnly(Node);
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index d358ab20ffc5..43e8990a9da1 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -38,6 +38,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/ErrorHandling.h"
@@ -178,16 +179,24 @@ bool AsmPrinter::doInitialization(Module &M) {
if (!M.getModuleInlineAsm().empty()) {
OutStreamer.AddComment("Start of file scope inline assembly");
OutStreamer.AddBlankLine();
- EmitInlineAsm(M.getModuleInlineAsm()+"\n", 0/*no loc cookie*/);
+ EmitInlineAsm(M.getModuleInlineAsm()+"\n");
OutStreamer.AddComment("End of file scope inline assembly");
OutStreamer.AddBlankLine();
}
if (MAI->doesSupportDebugInformation())
DD = new DwarfDebug(this, &M);
-
+
if (MAI->doesSupportExceptionHandling())
- DE = new DwarfException(this);
+ switch (MAI->getExceptionHandlingType()) {
+ default:
+ case ExceptionHandling::DwarfTable:
+ DE = new DwarfTableException(this);
+ break;
+ case ExceptionHandling::DwarfCFI:
+ DE = new DwarfCFIException(this);
+ break;
+ }
return false;
}
@@ -282,8 +291,12 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Handle common symbols.
if (GVKind.isCommon()) {
+ unsigned Align = 1 << AlignLog;
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
+
// .comm _foo, 42, 4
- OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
return;
}
@@ -301,11 +314,15 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
OutStreamer.EmitLocalCommonSymbol(GVSym, Size);
return;
}
+
+ unsigned Align = 1 << AlignLog;
+ if (!getObjFileLowering().getCommDirectiveSupportsAlignment())
+ Align = 0;
// .local _foo
OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Local);
// .comm _foo, 42, 4
- OutStreamer.EmitCommonSymbol(GVSym, Size, 1 << AlignLog);
+ OutStreamer.EmitCommonSymbol(GVSym, Size, Align);
return;
}
@@ -327,6 +344,13 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) {
// Handle thread local data for mach-o which requires us to output an
// additional structure of data and mangle the original symbol so that we
// can reference it later.
+ //
+ // TODO: This should become an "emit thread local global" method on TLOF.
+ // All of this macho specific stuff should be sunk down into TLOFMachO and
+ // stuff like "TLSExtraDataSection" should no longer be part of the parent
+ // TLOF class. This will also make it more obvious that stuff like
+ // MCStreamer::EmitTBSSSymbol is macho specific and only called from macho
+ // specific code.
if (GVKind.isThreadLocal() && MAI->hasMachoTBSSDirective()) {
// Emit the .tbss symbol
MCSymbol *MangSym =
@@ -623,7 +647,7 @@ void AsmPrinter::EmitFunctionBody() {
if (ShouldPrintDebugScopes) {
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
- DD->beginScope(II);
+ DD->beginInstruction(II);
}
if (isVerbose())
@@ -657,7 +681,7 @@ void AsmPrinter::EmitFunctionBody() {
if (ShouldPrintDebugScopes) {
NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled);
- DD->endScope(II);
+ DD->endInstruction(II);
}
}
}
@@ -729,7 +753,20 @@ bool AsmPrinter::doFinalization(Module &M) {
for (Module::const_global_iterator I = M.global_begin(), E = M.global_end();
I != E; ++I)
EmitGlobalVariable(I);
-
+
+ // Emit visibility info for declarations
+ for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) {
+ const Function &F = *I;
+ if (!F.isDeclaration())
+ continue;
+ GlobalValue::VisibilityTypes V = F.getVisibility();
+ if (V == GlobalValue::DefaultVisibility)
+ continue;
+
+ MCSymbol *Name = Mang->getSymbol(&F);
+ EmitVisibility(Name, V);
+ }
+
// Finalize debug and EH information.
if (DE) {
{
@@ -905,14 +942,6 @@ void AsmPrinter::EmitConstantPool() {
const Type *Ty = CPE.getType();
Offset = NewOffset + TM.getTargetData()->getTypeAllocSize(Ty);
-
- // Emit the label with a comment on it.
- if (isVerbose()) {
- OutStreamer.GetCommentOS() << "constant pool ";
- WriteTypeSymbolic(OutStreamer.GetCommentOS(), CPE.getType(),
- MF->getFunction()->getParent());
- OutStreamer.GetCommentOS() << '\n';
- }
OutStreamer.EmitLabel(GetCPISymbol(CPI));
if (CPE.isMachineConstantPoolEntry())
@@ -983,7 +1012,7 @@ void AsmPrinter::EmitJumpTableInfo() {
}
}
- // On some targets (e.g. Darwin) we want to emit two consequtive labels
+ // On some targets (e.g. Darwin) we want to emit two consecutive labels
// before each jump table. The first label is never referenced, but tells
// the assembler and linker the extents of the jump table object. The
// second label is actually referenced by the code.
@@ -1004,6 +1033,7 @@ void AsmPrinter::EmitJumpTableInfo() {
void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI,
const MachineBasicBlock *MBB,
unsigned UID) const {
+ assert(MBB && MBB->getNumber() >= 0 && "Invalid basic block");
const MCExpr *Value = 0;
switch (MJTI->getEntryKind()) {
case MachineJumpTableInfo::EK_Inline:
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
index ce4519c541e3..98a1bf2f1ce4 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp
@@ -19,7 +19,7 @@
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -36,9 +36,8 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const {
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
- if (MAI->hasLEB128() && OutStreamer.hasRawTextSupport()) {
- // FIXME: MCize.
- OutStreamer.EmitRawText("\t.sleb128\t" + Twine(Value));
+ if (MAI->hasLEB128()) {
+ OutStreamer.EmitSLEB128IntValue(Value);
return;
}
@@ -60,10 +59,10 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc,
unsigned PadTo) const {
if (isVerbose() && Desc)
OutStreamer.AddComment(Desc);
-
- if (MAI->hasLEB128() && PadTo == 0 && OutStreamer.hasRawTextSupport()) {
- // FIXME: MCize.
- OutStreamer.EmitRawText("\t.uleb128\t" + Twine(Value));
+
+ // FIXME: Should we add a PadTo option to the streamer?
+ if (MAI->hasLEB128() && PadTo == 0) {
+ OutStreamer.EmitULEB128IntValue(Value);
return;
}
@@ -157,7 +156,7 @@ void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const {
const MCExpr *Exp =
TLOF.getExprForDwarfReference(Sym, Mang, MMI, Encoding, OutStreamer);
- OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0);
+ OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding));
}
void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{
@@ -215,8 +214,8 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
const TargetRegisterInfo *RI = TM.getRegisterInfo();
int stackGrowth = TM.getTargetData()->getPointerSize();
- if (TM.getFrameInfo()->getStackGrowthDirection() !=
- TargetFrameInfo::StackGrowsUp)
+ if (TM.getFrameLowering()->getStackGrowthDirection() !=
+ TargetFrameLowering::StackGrowsUp)
stackGrowth *= -1;
for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
@@ -277,3 +276,43 @@ void AsmPrinter::EmitFrameMoves(const std::vector<MachineMove> &Moves,
}
}
}
+
+/// EmitFrameMoves - Emit frame instructions to describe the layout of the
+/// frame.
+void AsmPrinter::EmitCFIFrameMoves(const std::vector<MachineMove> &Moves) const {
+ const TargetRegisterInfo *RI = TM.getRegisterInfo();
+
+ int stackGrowth = TM.getTargetData()->getPointerSize();
+ if (TM.getFrameLowering()->getStackGrowthDirection() !=
+ TargetFrameLowering::StackGrowsUp)
+ stackGrowth *= -1;
+
+ for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
+ const MachineMove &Move = Moves[i];
+ MCSymbol *Label = Move.getLabel();
+ // Throw out move if the label is invalid.
+ if (Label && !Label->isDefined()) continue; // Not emitted, in dead code.
+
+ const MachineLocation &Dst = Move.getDestination();
+ const MachineLocation &Src = Move.getSource();
+
+ // If advancing cfa.
+ if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
+ assert(!Src.isReg() && "Machine move not supported yet.");
+
+ if (Src.getReg() == MachineLocation::VirtualFP) {
+ OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset());
+ } else {
+ assert("Machine move not supported yet");
+ // Reg + Offset
+ }
+ } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) {
+ assert(Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true));
+ } else {
+ assert(!Dst.isReg() && "Machine move not supported yet.");
+ OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true),
+ Dst.getOffset());
+ }
+ }
+}
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
index df0316814c08..c6166e2365a5 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp
@@ -34,15 +34,47 @@
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+namespace {
+ struct SrcMgrDiagInfo {
+ const MDNode *LocInfo;
+ LLVMContext::InlineAsmDiagHandlerTy DiagHandler;
+ void *DiagContext;
+ };
+}
+
+/// SrcMgrDiagHandler - This callback is invoked when the SourceMgr for an
+/// inline asm has an error in it. diagInfo is a pointer to the SrcMgrDiagInfo
+/// struct above.
+static void SrcMgrDiagHandler(const SMDiagnostic &Diag, void *diagInfo) {
+ SrcMgrDiagInfo *DiagInfo = static_cast<SrcMgrDiagInfo *>(diagInfo);
+ assert(DiagInfo && "Diagnostic context not passed down?");
+
+ // If the inline asm had metadata associated with it, pull out a location
+ // cookie corresponding to which line the error occurred on.
+ unsigned LocCookie = 0;
+ if (const MDNode *LocInfo = DiagInfo->LocInfo) {
+ unsigned ErrorLine = Diag.getLineNo()-1;
+ if (ErrorLine >= LocInfo->getNumOperands())
+ ErrorLine = 0;
+
+ if (LocInfo->getNumOperands() != 0)
+ if (const ConstantInt *CI =
+ dyn_cast<ConstantInt>(LocInfo->getOperand(ErrorLine)))
+ LocCookie = CI->getZExtValue();
+ }
+
+ DiagInfo->DiagHandler(Diag, DiagInfo->DiagContext, LocCookie);
+}
+
/// EmitInlineAsm - Emit a blob of inline asm to the output streamer.
-void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
+void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode) const {
assert(!Str.empty() && "Can't emit empty inline asm block");
-
+
// Remember if the buffer is nul terminated or not so we can avoid a copy.
bool isNullTerminated = Str.back() == 0;
if (isNullTerminated)
Str = Str.substr(0, Str.size()-1);
-
+
// If the output streamer is actually a .s file, just emit the blob textually.
// This is useful in case the asm parser doesn't handle something but the
// system assembler does.
@@ -50,18 +82,23 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
OutStreamer.EmitRawText(Str);
return;
}
-
+
SourceMgr SrcMgr;
-
+ SrcMgrDiagInfo DiagInfo;
+
// If the current LLVMContext has an inline asm handler, set it in SourceMgr.
LLVMContext &LLVMCtx = MMI->getModule()->getContext();
bool HasDiagHandler = false;
- if (void *DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler()) {
- SrcMgr.setDiagHandler((SourceMgr::DiagHandlerTy)(intptr_t)DiagHandler,
- LLVMCtx.getInlineAsmDiagnosticContext(), LocCookie);
+ if (LLVMCtx.getInlineAsmDiagnosticHandler() != 0) {
+ // If the source manager has an issue, we arrange for SrcMgrDiagHandler
+ // to be invoked, getting DiagInfo passed into it.
+ DiagInfo.LocInfo = LocMDNode;
+ DiagInfo.DiagHandler = LLVMCtx.getInlineAsmDiagnosticHandler();
+ DiagInfo.DiagContext = LLVMCtx.getInlineAsmDiagnosticContext();
+ SrcMgr.setDiagHandler(SrcMgrDiagHandler, &DiagInfo);
HasDiagHandler = true;
}
-
+
MemoryBuffer *Buffer;
if (isNullTerminated)
Buffer = MemoryBuffer::getMemBuffer(Str, "<inline asm>");
@@ -70,7 +107,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
// Tell SrcMgr about this buffer, it takes ownership of the buffer.
SrcMgr.AddNewSourceBuffer(Buffer, SMLoc());
-
+
OwningPtr<MCAsmParser> Parser(createMCAsmParser(TM.getTarget(), SrcMgr,
OutContext, OutStreamer,
*MAI));
@@ -92,15 +129,15 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const {
/// instruction that is an inline asm.
void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
assert(MI->isInlineAsm() && "printInlineAsm only works on inline asms");
-
+
unsigned NumOperands = MI->getNumOperands();
-
+
// Count the number of register definitions to find the asm string.
unsigned NumDefs = 0;
for (; MI->getOperand(NumDefs).isReg() && MI->getOperand(NumDefs).isDef();
++NumDefs)
assert(NumDefs != NumOperands-2 && "No asm string?");
-
+
assert(MI->getOperand(NumDefs).isSymbol() && "No asm string?");
// Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
@@ -128,22 +165,23 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
// Get the !srcloc metadata node if we have it, and decode the loc cookie from
// it.
unsigned LocCookie = 0;
+ const MDNode *LocMD = 0;
for (unsigned i = MI->getNumOperands(); i != 0; --i) {
- if (MI->getOperand(i-1).isMetadata())
- if (const MDNode *SrcLoc = MI->getOperand(i-1).getMetadata())
- if (SrcLoc->getNumOperands() != 0)
- if (const ConstantInt *CI =
- dyn_cast<ConstantInt>(SrcLoc->getOperand(0))) {
- LocCookie = CI->getZExtValue();
- break;
- }
+ if (MI->getOperand(i-1).isMetadata() &&
+ (LocMD = MI->getOperand(i-1).getMetadata()) &&
+ LocMD->getNumOperands() != 0) {
+ if (const ConstantInt *CI = dyn_cast<ConstantInt>(LocMD->getOperand(0))) {
+ LocCookie = CI->getZExtValue();
+ break;
+ }
+ }
}
-
+
// Emit the inline asm to a temporary string so we can emit it through
// EmitInlineAsm.
SmallString<256> StringData;
raw_svector_ostream OS(StringData);
-
+
OS << '\t';
// The variant of the current asmprinter.
@@ -151,7 +189,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
int CurVariant = -1; // The number of the {.|.|.} region we are in.
const char *LastEmitted = AsmStr; // One past the last character emitted.
-
+
while (*LastEmitted) {
switch (*LastEmitted) {
default: {
@@ -199,18 +237,18 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
++LastEmitted; // consume ')' character.
if (CurVariant == -1)
OS << '}'; // this is gcc's behavior for } outside a variant
- else
+ else
CurVariant = -1;
break;
}
if (Done) break;
-
+
bool HasCurlyBraces = false;
if (*LastEmitted == '{') { // ${variable}
++LastEmitted; // Consume '{' character.
HasCurlyBraces = true;
}
-
+
// If we have ${:foo}, then this is not a real operand reference, it is a
// "magic" string reference, just like in .td files. Arrange to call
// PrintSpecial.
@@ -221,25 +259,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
if (StrEnd == 0)
report_fatal_error("Unterminated ${:foo} operand in inline asm"
" string: '" + Twine(AsmStr) + "'");
-
+
std::string Val(StrStart, StrEnd);
PrintSpecial(MI, OS, Val.c_str());
LastEmitted = StrEnd+1;
break;
}
-
+
const char *IDStart = LastEmitted;
const char *IDEnd = IDStart;
- while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
-
+ while (*IDEnd >= '0' && *IDEnd <= '9') ++IDEnd;
+
unsigned Val;
if (StringRef(IDStart, IDEnd-IDStart).getAsInteger(10, Val))
report_fatal_error("Bad $ operand number in inline asm string: '" +
Twine(AsmStr) + "'");
LastEmitted = IDEnd;
-
+
char Modifier[2] = { 0, 0 };
-
+
if (HasCurlyBraces) {
// If we have curly braces, check for a modifier character. This
// supports syntax like ${0:u}, which correspond to "%u0" in GCC asm.
@@ -248,25 +286,25 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
if (*LastEmitted == 0)
report_fatal_error("Bad ${:} expression in inline asm string: '" +
Twine(AsmStr) + "'");
-
+
Modifier[0] = *LastEmitted;
++LastEmitted; // Consume modifier character.
}
-
+
if (*LastEmitted != '}')
report_fatal_error("Bad ${} expression in inline asm string: '" +
Twine(AsmStr) + "'");
++LastEmitted; // Consume '}' character.
}
-
+
if (Val >= NumOperands-1)
report_fatal_error("Invalid $ operand number in inline asm string: '" +
Twine(AsmStr) + "'");
-
+
// Okay, we finally have a value number. Ask the target to print this
// operand!
if (CurVariant == -1 || CurVariant == AsmPrinterVariant) {
- unsigned OpNo = 2;
+ unsigned OpNo = InlineAsm::MIOp_FirstOperand;
bool Error = false;
@@ -310,8 +348,8 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const {
}
}
OS << '\n' << (char)0; // null terminate string.
- EmitInlineAsm(OS.str(), LocCookie);
-
+ EmitInlineAsm(OS.str(), LocMD);
+
// Emit the #NOAPP end marker. This has to happen even if verbose-asm isn't
// enabled, so we use EmitRawText.
if (OutStreamer.hasRawTextSupport())
@@ -335,7 +373,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
} else if (!strcmp(Code, "uid")) {
// Comparing the address of MI isn't sufficient, because machineinstrs may
// be allocated to the same address across functions.
-
+
// If this is a new LastFn instruction, bump the counter.
if (LastMI != MI || LastFn != getFunctionNumber()) {
++Counter;
@@ -349,7 +387,7 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, raw_ostream &OS,
Msg << "Unknown special formatter '" << Code
<< "' for machine instr: " << *MI;
report_fatal_error(Msg.str());
- }
+ }
}
/// PrintAsmOperand - Print the specified operand of MI, an INLINEASM
diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt
index ca8b8436c11f..306efade7d92 100644
--- a/lib/CodeGen/AsmPrinter/CMakeLists.txt
+++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt
@@ -3,9 +3,10 @@ add_llvm_library(LLVMAsmPrinter
AsmPrinterDwarf.cpp
AsmPrinterInlineAsm.cpp
DIE.cpp
+ DwarfCFIException.cpp
DwarfDebug.cpp
DwarfException.cpp
+ DwarfTableException.cpp
OcamlGCPrinter.cpp
)
-target_link_libraries (LLVMAsmPrinter LLVMMCParser)
diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
new file mode 100644
index 000000000000..68be2eed8f0e
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -0,0 +1,138 @@
+//===-- CodeGen/AsmPrinter/DwarfException.cpp - Dwarf Exception Impl ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfCFIException::DwarfCFIException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitTable(false), shouldEmitMoves(false), shouldEmitTableModule(false)
+ {}
+
+DwarfCFIException::~DwarfCFIException() {}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfCFIException::EndModule() {
+ if (!Asm->MAI->isExceptionHandlingDwarf())
+ return;
+
+ if (!shouldEmitTableModule)
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ // Begin eh frame section.
+ Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+ // Emit references to all used personality functions
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+ for (size_t i = 0, e = Personalities.size(); i != e; ++i) {
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("personality", i));
+ Asm->EmitReference(Personalities[i], PerEncoding);
+ }
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfCFIException::BeginFunction(const MachineFunction *MF) {
+ shouldEmitTable = shouldEmitMoves = false;
+
+ // If any landing pads survive, we need an EH table.
+ shouldEmitTable = !MMI->getLandingPads().empty();
+
+ // See if we need frame move info.
+ shouldEmitMoves =
+ !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+ if (shouldEmitMoves || shouldEmitTable)
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+
+ shouldEmitTableModule |= shouldEmitTable;
+
+ if (shouldEmitMoves) {
+ const TargetFrameLowering *TFL = Asm->TM.getFrameLowering();
+ Asm->OutStreamer.EmitCFIStartProc();
+
+ // Indicate locations of general callee saved registers in frame.
+ std::vector<MachineMove> Moves;
+ TFL->getInitialFrameState(Moves);
+ Asm->EmitCFIFrameMoves(Moves);
+ Asm->EmitCFIFrameMoves(MMI->getFrameMoves());
+ }
+
+ if (!shouldEmitTable)
+ return;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Provide LSDA information.
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ if (LSDAEncoding != dwarf::DW_EH_PE_omit)
+ Asm->OutStreamer.EmitCFILsda(Asm->GetTempSymbol("exception",
+ Asm->getFunctionNumber()),
+ LSDAEncoding);
+
+ // Indicate personality routine, if any.
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+ if (PerEncoding != dwarf::DW_EH_PE_omit &&
+ MMI->getPersonalities()[MMI->getPersonalityIndex()])
+ Asm->OutStreamer.EmitCFIPersonality(Asm->GetTempSymbol("personality",
+ MMI->getPersonalityIndex()),
+ PerEncoding);
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfCFIException::EndFunction() {
+ if (!shouldEmitMoves && !shouldEmitTable) return;
+
+ if (shouldEmitMoves)
+ Asm->OutStreamer.EmitCFIEndProc();
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (shouldEmitTable)
+ EmitExceptionTable();
+}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c886a5ecc615..5106d5778c29 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -16,6 +16,7 @@
#include "DIE.h"
#include "llvm/Constants.h"
#include "llvm/Module.h"
+#include "llvm/Instructions.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/MC/MCAsmInfo.h"
@@ -24,12 +25,13 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/CommandLine.h"
@@ -38,7 +40,7 @@
#include "llvm/Support/ValueHandle.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Timer.h"
-#include "llvm/System/Path.h"
+#include "llvm/Support/Path.h"
using namespace llvm;
static cl::opt<bool> PrintDbgScope("print-dbgscope", cl::Hidden,
@@ -52,6 +54,10 @@ static cl::opt<bool> UnknownLocations("use-unknown-locations", cl::Hidden,
cl::desc("Make an absense of debug location information explicit."),
cl::init(false));
+#ifndef NDEBUG
+STATISTIC(BlocksWithoutLineNo, "Number of blocks without any line number");
+#endif
+
namespace {
const char *DWARFGroupName = "DWARF Emission";
const char *DbgTimerName = "DWARF Debug Writer";
@@ -507,8 +513,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIVariable V) {
return;
unsigned Line = V.getLineNumber();
- unsigned FileID = GetOrCreateSourceID(V.getContext().getDirectory(),
- V.getContext().getFilename());
+ if (Line == 0)
+ return;
+ unsigned FileID = GetOrCreateSourceID(V.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -522,8 +529,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIGlobalVariable G) {
return;
unsigned Line = G.getLineNumber();
- unsigned FileID = GetOrCreateSourceID(G.getContext().getDirectory(),
- G.getContext().getFilename());
+ if (Line == 0)
+ return;
+ unsigned FileID = GetOrCreateSourceID(G.getContext().getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -542,8 +550,7 @@ void DwarfDebug::addSourceLine(DIE *Die, DISubprogram SP) {
unsigned Line = SP.getLineNumber();
if (!SP.getContext().Verify())
return;
- unsigned FileID = GetOrCreateSourceID(SP.getDirectory(),
- SP.getFilename());
+ unsigned FileID = GetOrCreateSourceID(SP.getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -557,10 +564,9 @@ void DwarfDebug::addSourceLine(DIE *Die, DIType Ty) {
return;
unsigned Line = Ty.getLineNumber();
- if (!Ty.getContext().Verify())
+ if (Line == 0 || !Ty.getContext().Verify())
return;
- unsigned FileID = GetOrCreateSourceID(Ty.getContext().getDirectory(),
- Ty.getContext().getFilename());
+ unsigned FileID = GetOrCreateSourceID(Ty.getFilename());
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -574,10 +580,11 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
return;
unsigned Line = NS.getLineNumber();
+ if (Line == 0)
+ return;
StringRef FN = NS.getFilename();
- StringRef Dir = NS.getDirectory();
- unsigned FileID = GetOrCreateSourceID(Dir, FN);
+ unsigned FileID = GetOrCreateSourceID(FN);
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -588,8 +595,8 @@ void DwarfDebug::addSourceLine(DIE *Die, DINameSpace NS) {
void DwarfDebug::addVariableAddress(DbgVariable *&DV, DIE *Die, int64_t FI) {
MachineLocation Location;
unsigned FrameReg;
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- int Offset = RI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg);
Location.set(FrameReg, Offset);
if (DV->variableHasComplexAddress())
@@ -620,8 +627,7 @@ void DwarfDebug::addComplexAddress(DbgVariable *&DV, DIE *Die,
if (Reg < 32) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
} else {
- Reg = Reg - dwarf::DW_OP_reg0;
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
}
} else {
@@ -760,8 +766,7 @@ void DwarfDebug::addBlockByrefAddress(DbgVariable *&DV, DIE *Die,
if (Reg < 32)
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
else {
- Reg = Reg - dwarf::DW_OP_reg0;
- addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + Reg);
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx);
addUInt(Block, 0, dwarf::DW_FORM_udata, Reg);
}
} else {
@@ -812,6 +817,15 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
unsigned Reg = RI->getDwarfRegNum(Location.getReg(), false);
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ if (RI->getFrameRegister(*Asm->MF) == Location.getReg()
+ && Location.getOffset()) {
+ // If variable offset is based in frame register then use fbreg.
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg);
+ addSInt(Block, 0, dwarf::DW_FORM_sdata, Location.getOffset());
+ addBlock(Die, Attribute, 0, Block);
+ return;
+ }
+
if (Location.isReg()) {
if (Reg < 32) {
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + Reg);
@@ -834,35 +848,28 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
}
/// addRegisterAddress - Add register location entry in variable DIE.
-bool DwarfDebug::addRegisterAddress(DIE *Die, const MCSymbol *VS,
- const MachineOperand &MO) {
+bool DwarfDebug::addRegisterAddress(DIE *Die, const MachineOperand &MO) {
assert (MO.isReg() && "Invalid machine operand!");
if (!MO.getReg())
return false;
MachineLocation Location;
Location.set(MO.getReg());
addAddress(Die, dwarf::DW_AT_location, Location);
- if (VS)
- addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
return true;
}
/// addConstantValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantValue(DIE *Die, const MCSymbol *VS,
- const MachineOperand &MO) {
+bool DwarfDebug::addConstantValue(DIE *Die, const MachineOperand &MO) {
assert (MO.isImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
unsigned Imm = MO.getImm();
addUInt(Block, 0, dwarf::DW_FORM_udata, Imm);
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
- if (VS)
- addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
return true;
}
/// addConstantFPValue - Add constant value entry in variable DIE.
-bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
- const MachineOperand &MO) {
+bool DwarfDebug::addConstantFPValue(DIE *Die, const MachineOperand &MO) {
assert (MO.isFPImm() && "Invalid machine operand!");
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
APFloat FPImm = MO.getFPImm()->getValueAPF();
@@ -883,11 +890,42 @@ bool DwarfDebug::addConstantFPValue(DIE *Die, const MCSymbol *VS,
(unsigned char)0xFF & FltPtr[Start]);
addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
- if (VS)
- addLabel(Die, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr, VS);
return true;
}
+/// addConstantValue - Add constant value entry in variable DIE.
+bool DwarfDebug::addConstantValue(DIE *Die, ConstantInt *CI,
+ bool Unsigned) {
+ if (CI->getBitWidth() <= 64) {
+ if (Unsigned)
+ addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ CI->getZExtValue());
+ else
+ addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
+ CI->getSExtValue());
+ return true;
+ }
+
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+
+ // Get the raw data form of the large APInt.
+ const APInt Val = CI->getValue();
+ const char *Ptr = (const char*)Val.getRawData();
+
+ int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte.
+ bool LittleEndian = Asm->getTargetData().isLittleEndian();
+ int Incr = (LittleEndian ? 1 : -1);
+ int Start = (LittleEndian ? 0 : NumBytes - 1);
+ int Stop = (LittleEndian ? NumBytes : -1);
+
+ // Output the constant to DWARF one byte at a time.
+ for (; Start != Stop; Start += Incr)
+ addUInt(Block, 0, dwarf::DW_FORM_data1,
+ (unsigned char)0xFF & Ptr[Start]);
+
+ addBlock(Die, dwarf::DW_AT_const_value, 0, Block);
+ return true;
+}
/// addToContextOwner - Add Die into the list of its context owner's children.
void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
@@ -898,8 +936,7 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context));
ContextDIE->addChild(Die);
} else if (Context.isSubprogram()) {
- DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context),
- /*MakeDecl=*/false);
+ DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context));
ContextDIE->addChild(Die);
} else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context))
ContextDIE->addChild(Die);
@@ -1033,16 +1070,23 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIDescriptor RTy = Elements.getElement(0);
addType(&Buffer, DIType(RTy));
- // Add prototype flag.
- addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
-
+ bool isPrototyped = true;
// Add arguments.
for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
- DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
DIDescriptor Ty = Elements.getElement(i);
- addType(Arg, DIType(Ty));
- Buffer.addChild(Arg);
+ if (Ty.isUnspecifiedParameter()) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters);
+ Buffer.addChild(Arg);
+ isPrototyped = false;
+ } else {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ addType(Arg, DIType(Ty));
+ Buffer.addChild(Arg);
+ }
}
+ // Add prototype flag.
+ if (isPrototyped)
+ addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
}
break;
case dwarf::DW_TAG_structure_type:
@@ -1060,8 +1104,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
for (unsigned i = 0; i < N; ++i) {
DIDescriptor Element = Elements.getElement(i);
DIE *ElemDie = NULL;
- if (Element.isSubprogram())
+ if (Element.isSubprogram()) {
+ DISubprogram SP(Element);
ElemDie = createSubprogramDIE(DISubprogram(Element));
+ if (SP.isProtected())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_protected);
+ else if (SP.isPrivate())
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_private);
+ else
+ addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_public);
+ if (SP.isExplicit())
+ addUInt(ElemDie, dwarf::DW_AT_explicit, dwarf::DW_FORM_flag, 1);
+ }
else if (Element.isVariable()) {
DIVariable DV(Element);
ElemDie = new DIE(dwarf::DW_TAG_variable);
@@ -1094,6 +1151,21 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
DIDescriptor Context = CTy.getContext();
addToContextOwner(&Buffer, Context);
}
+
+ if (Tag == dwarf::DW_TAG_class_type) {
+ DIArray TParams = CTy.getTemplateParams();
+ unsigned N = TParams.getNumElements();
+ // Add template parameters.
+ for (unsigned i = 0; i < N; ++i) {
+ DIDescriptor Element = TParams.getElement(i);
+ if (Element.isTemplateTypeParameter())
+ Buffer.addChild(getOrCreateTemplateTypeParameterDIE(
+ DITemplateTypeParameter(Element)));
+ else if (Element.isTemplateValueParameter())
+ Buffer.addChild(getOrCreateTemplateValueParameterDIE(
+ DITemplateValueParameter(Element)));
+ }
+ }
break;
}
default:
@@ -1124,6 +1196,38 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
}
}
+/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateTypeParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) {
+ CompileUnit *TypeCU = getCompileUnit(TP);
+ DIE *ParamDIE = TypeCU->getDIE(TP);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter);
+ addType(ParamDIE, TP.getType());
+ addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TP.getName());
+ return ParamDIE;
+}
+
+/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+/// for the given DITemplateValueParameter.
+DIE *
+DwarfDebug::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV) {
+ CompileUnit *TVCU = getCompileUnit(TPV);
+ DIE *ParamDIE = TVCU->getDIE(TPV);
+ if (ParamDIE)
+ return ParamDIE;
+
+ ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter);
+ addType(ParamDIE, TPV.getType());
+ addString(ParamDIE, dwarf::DW_AT_name, dwarf::DW_FORM_string, TPV.getName());
+ addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
+ TPV.getValue());
+ return ParamDIE;
+}
+
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
int64_t L = SR.getLo();
@@ -1258,7 +1362,8 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
else if (DT.isPrivate())
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
dwarf::DW_ACCESS_private);
- else if (DT.getTag() == dwarf::DW_TAG_inheritance)
+ // Otherwise C++ member and base classes are considered public.
+ else if (DT.getCompileUnit().getLanguage() == dwarf::DW_LANG_C_plus_plus)
addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
dwarf::DW_ACCESS_public);
if (DT.isVirtual())
@@ -1268,7 +1373,7 @@ DIE *DwarfDebug::createMemberDIE(DIDerivedType DT) {
}
/// createSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
+DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP) {
CompileUnit *SPCU = getCompileUnit(SP);
DIE *SPDie = SPCU->getDIE(SP);
if (SPDie)
@@ -1286,10 +1391,7 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
addSourceLine(SPDie, SP);
- // Add prototyped tag, if C or ObjC.
- unsigned Lang = SP.getCompileUnit().getLanguage();
- if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
- Lang == dwarf::DW_LANG_ObjC)
+ if (SP.isPrototyped())
addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
// Add Return Type.
@@ -1307,13 +1409,13 @@ DIE *DwarfDebug::createSubprogramDIE(DISubprogram SP, bool MakeDecl) {
addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
- addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
+ addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex());
addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
ContainingTypeMap.insert(std::make_pair(SPDie,
SP.getContainingType()));
}
- if (MakeDecl || !SP.isDefinition()) {
+ if (!SP.isDefinition()) {
addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
// Add arguments. Do not add arguments for subprogram definition. They will
@@ -1603,6 +1705,8 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
if (Tag == dwarf::DW_TAG_formal_parameter && DV->getType().isArtificial())
addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
+ else if (DIVariable(DV->getVariable()).isArtificial())
+ addUInt(VariableDie, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1);
if (Scope->isAbstractScope()) {
DV->setDIE(VariableDie);
@@ -1625,7 +1729,6 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
DbgVariableToDbgInstMap.find(DV);
if (DVI != DbgVariableToDbgInstMap.end()) {
const MachineInstr *DVInsn = DVI->second;
- const MCSymbol *DVLabel = findVariableLabel(DV);
bool updated = false;
// FIXME : Handle getNumOperands != 3
if (DVInsn->getNumOperands() == 3) {
@@ -1637,20 +1740,17 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
addVariableAddress(DV, VariableDie, DVInsn->getOperand(1).getImm());
updated = true;
} else
- updated = addRegisterAddress(VariableDie, DVLabel, RegOp);
+ updated = addRegisterAddress(VariableDie, RegOp);
}
else if (DVInsn->getOperand(0).isImm())
- updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+ updated = addConstantValue(VariableDie, DVInsn->getOperand(0));
else if (DVInsn->getOperand(0).isFPImm())
updated =
- addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0));
+ addConstantFPValue(VariableDie, DVInsn->getOperand(0));
} else {
MachineLocation Location = Asm->getDebugValueLocation(DVInsn);
if (Location.getReg()) {
addAddress(VariableDie, dwarf::DW_AT_location, Location);
- if (DVLabel)
- addLabel(VariableDie, dwarf::DW_AT_start_scope, dwarf::DW_FORM_addr,
- DVLabel);
updated = true;
}
}
@@ -1700,6 +1800,16 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
if (!Scope || !Scope->getScopeNode())
return NULL;
+ SmallVector <DIE *, 8> Children;
+ // Collect lexical scope childrens first.
+ const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i)
+ if (DIE *Variable = constructVariableDIE(Variables[i], Scope))
+ Children.push_back(Variable);
+ const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j)
+ if (DIE *Nested = constructScopeDIE(Scopes[j]))
+ Children.push_back(Nested);
DIScope DS(Scope->getScopeNode());
DIE *ScopeDIE = NULL;
if (Scope->getInlinedAt())
@@ -1715,26 +1825,19 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
else
ScopeDIE = updateSubprogramScopeDIE(DS);
}
- else
+ else {
+ // There is no need to emit empty lexical block DIE.
+ if (Children.empty())
+ return NULL;
ScopeDIE = constructLexicalScopeDIE(Scope);
- if (!ScopeDIE) return NULL;
-
- // Add variables to scope.
- const SmallVector<DbgVariable *, 8> &Variables = Scope->getDbgVariables();
- for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
- DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
- if (VariableDIE)
- ScopeDIE->addChild(VariableDIE);
}
+
+ if (!ScopeDIE) return NULL;
- // Add nested scopes.
- const SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
- for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
- // Define the Scope debug information entry.
- DIE *NestedDIE = constructScopeDIE(Scopes[j]);
- if (NestedDIE)
- ScopeDIE->addChild(NestedDIE);
- }
+ // Add children
+ for (SmallVector<DIE *, 8>::iterator I = Children.begin(),
+ E = Children.end(); I != E; ++I)
+ ScopeDIE->addChild(*I);
if (DS.isSubprogram())
addPubTypes(DISubprogram(DS));
@@ -1746,37 +1849,21 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
/// source file names. If none currently exists, create a new id and insert it
/// in the SourceIds map. This can update DirectoryNames and SourceFileNames
/// maps as well.
-unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName){
- unsigned DId;
- assert (DirName.empty() == false && "Invalid directory name!");
- StringMap<unsigned>::iterator DI = DirectoryIdMap.find(DirName);
- if (DI != DirectoryIdMap.end()) {
- DId = DI->getValue();
- } else {
- DId = DirectoryNames.size() + 1;
- DirectoryIdMap[DirName] = DId;
- DirectoryNames.push_back(DirName);
- }
+unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName){
+ // If FE did not provide a file name, then assume stdin.
+ if (FileName.empty())
+ return GetOrCreateSourceID("<stdin>");
- unsigned FId;
- StringMap<unsigned>::iterator FI = SourceFileIdMap.find(FileName);
- if (FI != SourceFileIdMap.end()) {
- FId = FI->getValue();
- } else {
- FId = SourceFileNames.size() + 1;
- SourceFileIdMap[FileName] = FId;
- SourceFileNames.push_back(FileName);
- }
+ StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
+ if (Entry.getValue())
+ return Entry.getValue();
- DenseMap<std::pair<unsigned, unsigned>, unsigned>::iterator SI =
- SourceIdMap.find(std::make_pair(DId, FId));
- if (SI != SourceIdMap.end())
- return SI->second;
+ unsigned SrcId = SourceIdMap.size();
+ Entry.setValue(SrcId);
- unsigned SrcId = SourceIds.size() + 1; // DW_AT_decl_file cannot be 0.
- SourceIdMap[std::make_pair(DId, FId)] = SrcId;
- SourceIds.push_back(std::make_pair(DId, FId));
+ // Print out a .file directive to specify files for .loc directives.
+ Asm->OutStreamer.EmitDwarfFileDirective(SrcId, FileName);
return SrcId;
}
@@ -1802,7 +1889,7 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) {
DICompileUnit DIUnit(N);
StringRef FN = DIUnit.getFilename();
StringRef Dir = DIUnit.getDirectory();
- unsigned ID = GetOrCreateSourceID(Dir, FN);
+ unsigned ID = GetOrCreateSourceID(FN);
DIE *Die = new DIE(dwarf::DW_TAG_compile_unit);
addString(Die, dwarf::DW_AT_producer, dwarf::DW_FORM_string,
@@ -1886,6 +1973,32 @@ static bool isUnsignedDIType(DIType Ty) {
return false;
}
+// Return const exprssion if value is a GEP to access merged global
+// constant. e.g.
+// i8* getelementptr ({ i8, i8, i8, i8 }* @_MergedGlobals, i32 0, i32 0)
+static const ConstantExpr *getMergedGlobalExpr(const Value *V) {
+ const ConstantExpr *CE = dyn_cast_or_null<ConstantExpr>(V);
+ if (!CE || CE->getNumOperands() != 3 ||
+ CE->getOpcode() != Instruction::GetElementPtr)
+ return NULL;
+
+ // First operand points to a global value.
+ if (!isa<GlobalValue>(CE->getOperand(0)))
+ return NULL;
+
+ // Second operand is zero.
+ const ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(CE->getOperand(1));
+ if (!CI || !CI->isZero())
+ return NULL;
+
+ // Third operand is offset.
+ if (!isa<ConstantInt>(CE->getOperand(2)))
+ return NULL;
+
+ return CE;
+}
+
/// constructGlobalVariableDIE - Construct global variable DIE.
void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
DIGlobalVariable GV(N);
@@ -1952,16 +2065,22 @@ void DwarfDebug::constructGlobalVariableDIE(const MDNode *N) {
} else {
addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
}
- } else if (Constant *C = GV.getConstant()) {
- if (ConstantInt *CI = dyn_cast<ConstantInt>(C)) {
- if (isUnsignedDIType(GTy))
- addUInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata,
- CI->getZExtValue());
- else
- addSInt(VariableDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata,
- CI->getSExtValue());
- }
+ } else if (ConstantInt *CI =
+ dyn_cast_or_null<ConstantInt>(GV.getConstant()))
+ addConstantValue(VariableDIE, CI, isUnsignedDIType(GTy));
+ else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) {
+ // GV is a merged global.
+ DIEBlock *Block = new (DIEValueAllocator) DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr);
+ addLabel(Block, 0, dwarf::DW_FORM_udata,
+ Asm->Mang->getSymbol(cast<GlobalValue>(CE->getOperand(0))));
+ ConstantInt *CII = cast<ConstantInt>(CE->getOperand(2));
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(Block, 0, dwarf::DW_FORM_udata, CII->getZExtValue());
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus);
+ addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block);
}
+
return;
}
@@ -2043,25 +2162,12 @@ void DwarfDebug::beginModule(Module *M) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+ if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty"))
+ for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i)
+ getOrCreateTypeDIE(DIType(NMD->getOperand(i)));
+
// Prime section data.
SectionMap.insert(Asm->getObjFileLowering().getTextSection());
-
- // Print out .file directives to specify files for .loc directives. These are
- // printed out early so that they precede any .loc directives.
- if (Asm->MAI->hasDotLocAndDotFile()) {
- for (unsigned i = 1, e = getNumSourceIds()+1; i != e; ++i) {
- // Remember source id starts at 1.
- std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(i);
- // FIXME: don't use sys::path for this! This should not depend on the
- // host.
- sys::Path FullPath(getSourceDirectoryName(Id.first));
- bool AppendOk =
- FullPath.appendComponent(getSourceFileName(Id.second));
- assert(AppendOk && "Could not append filename to directory!");
- AppendOk = false;
- Asm->OutStreamer.EmitDwarfFileDirective(i, FullPath.str());
- }
- }
}
/// endModule - Emit all Dwarf sections that should come after the content.
@@ -2081,8 +2187,7 @@ void DwarfDebug::endModule() {
StringRef FName = SP.getLinkageName();
if (FName.empty())
FName = SP.getName();
- NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName)));
+ NamedMDNode *NMD = getFnSpecificMDNode(*(MMI->getModule()), FName);
if (!NMD) continue;
unsigned E = NMD->getNumOperands();
if (!E) continue;
@@ -2152,9 +2257,6 @@ void DwarfDebug::endModule() {
// Corresponding abbreviations into a abbrev section.
emitAbbreviations();
- // Emit source line correspondence into a debug line section.
- emitDebugLines();
-
// Emit info into a debug pubnames section.
emitDebugPubNames();
@@ -2242,15 +2344,6 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction * MF,
}
}
-/// isDbgValueInUndefinedReg - Return true if debug value, encoded by
-/// DBG_VALUE instruction, is in undefined reg.
-static bool isDbgValueInUndefinedReg(const MachineInstr *MI) {
- assert (MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!");
- if (MI->getOperand(0).isReg() && !MI->getOperand(0).getReg())
- return true;
- return false;
-}
-
/// isDbgValueInDefinedReg - Return true if debug value, encoded by
/// DBG_VALUE instruction, is in a defined reg.
static bool isDbgValueInDefinedReg(const MachineInstr *MI) {
@@ -2275,7 +2368,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
const MachineInstr *MInsn = II;
- if (!MInsn->isDebugValue() || isDbgValueInUndefinedReg(MInsn))
+ if (!MInsn->isDebugValue())
continue;
DbgValues.push_back(MInsn);
}
@@ -2297,19 +2390,18 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
ME = DbgValues.end(); MI != ME; ++MI) {
const MDNode *Var =
(*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata();
- if (Var == DV && isDbgValueInDefinedReg(*MI) &&
+ if (Var == DV &&
!PrevMI->isIdenticalTo(*MI))
MultipleValues.push_back(*MI);
PrevMI = *MI;
}
- DbgScope *Scope = findDbgScope(MInsn);
- bool CurFnArg = false;
+ DbgScope *Scope = NULL;
if (DV.getTag() == dwarf::DW_TAG_arg_variable &&
DISubprogram(DV.getContext()).describes(MF->getFunction()))
- CurFnArg = true;
- if (!Scope && CurFnArg)
Scope = CurrentFnDbgScope;
+ else
+ Scope = findDbgScope(MInsn);
// If variable scope is not found then skip this variable.
if (!Scope)
continue;
@@ -2317,8 +2409,6 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
Processed.insert(DV);
DbgVariable *RegVar = new DbgVariable(DV);
Scope->addVariable(RegVar);
- if (!CurFnArg)
- DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn);
if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) {
DbgVariableToDbgInstMap[AbsVar] = MInsn;
VarToAbstractVarMap[RegVar] = AbsVar;
@@ -2375,10 +2465,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF,
// Collect info for variables that were optimized out.
const Function *F = MF->getFunction();
- const Module *M = F->getParent();
- if (NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.",
- getRealLinkageName(F->getName())))) {
+ if (NamedMDNode *NMD = getFnSpecificMDNode(*(F->getParent()), F->getName())) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
if (!DV || !Processed.insert(DV))
@@ -2409,8 +2496,8 @@ const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) {
return I->second;
}
-/// beginScope - Process beginning of a scope.
-void DwarfDebug::beginScope(const MachineInstr *MI) {
+/// beginInstruction - Process beginning of an instruction.
+void DwarfDebug::beginInstruction(const MachineInstr *MI) {
if (InsnNeedsLabel.count(MI) == 0) {
LabelsBeforeInsn[MI] = PrevLabel;
return;
@@ -2444,8 +2531,8 @@ void DwarfDebug::beginScope(const MachineInstr *MI) {
assert (0 && "Instruction is not processed!");
}
-/// endScope - Process end of a scope.
-void DwarfDebug::endScope(const MachineInstr *MI) {
+/// endInstruction - Process end of an instruction.
+void DwarfDebug::endInstruction(const MachineInstr *MI) {
if (InsnsEndScopeSet.count(MI) != 0) {
// Emit a label if this instruction ends a scope.
MCSymbol *Label = MMI->getContext().CreateTempSymbol();
@@ -2624,6 +2711,10 @@ bool DwarfDebug::extractScopeInformation() {
continue;
}
+ // Ignore DBG_VALUE. It does not contribute any instruction in output.
+ if (MInsn->isDebugValue())
+ continue;
+
if (RangeBeginMI) {
// If we have alread seen a beginning of a instruction range and
// current instruction scope does not match scope of first instruction
@@ -2727,12 +2818,37 @@ static DebugLoc FindFirstDebugLoc(const MachineFunction *MF) {
return DebugLoc();
}
+#ifndef NDEBUG
+/// CheckLineNumbers - Count basicblocks whose instructions do not have any
+/// line number information.
+static void CheckLineNumbers(const MachineFunction *MF) {
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ bool FoundLineNo = false;
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MI = II;
+ if (!MI->getDebugLoc().isUnknown()) {
+ FoundLineNo = true;
+ break;
+ }
+ }
+ if (!FoundLineNo && I->size())
+ ++BlocksWithoutLineNo;
+ }
+}
+#endif
+
/// beginFunction - Gather pre-function debug information. Assumes being
/// emitted immediately after the function entry point.
void DwarfDebug::beginFunction(const MachineFunction *MF) {
if (!MMI->hasDebugInfo()) return;
if (!extractScopeInformation()) return;
+#ifndef NDEBUG
+ CheckLineNumbers(MF);
+#endif
+
FunctionBeginSym = Asm->GetTempSymbol("func_begin",
Asm->getFunctionNumber());
// Assumes in correct section after the entry point.
@@ -2775,16 +2891,14 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) {
DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata());
if (!DV.Verify()) continue;
// If DBG_VALUE is for a local variable then it needs a label.
- if (DV.getTag() != dwarf::DW_TAG_arg_variable
- && isDbgValueInUndefinedReg(MI) == false)
+ if (DV.getTag() != dwarf::DW_TAG_arg_variable)
InsnNeedsLabel.insert(MI);
// DBG_VALUE for inlined functions argument needs a label.
else if (!DISubprogram(getDISubprogram(DV.getContext())).
describes(MF->getFunction()))
InsnNeedsLabel.insert(MI);
// DBG_VALUE indicating argument location change needs a label.
- else if (isDbgValueInUndefinedReg(MI) == false
- && !ProcessedArgs.insert(DV))
+ else if (!ProcessedArgs.insert(DV))
InsnNeedsLabel.insert(MI);
} else {
// If location is unknown then instruction needs a location only if
@@ -2820,17 +2934,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
SmallPtrSet<const MDNode *, 16> ProcessedVars;
collectVariableInfo(MF, ProcessedVars);
- // Get function line info.
- if (!Lines.empty()) {
- // Get section line info.
- unsigned ID = SectionMap.insert(Asm->getCurrentSection());
- if (SectionSourceLines.size() < ID) SectionSourceLines.resize(ID);
- std::vector<SrcLineInfo> &SectionLineInfos = SectionSourceLines[ID-1];
- // Append the function info to section info.
- SectionLineInfos.insert(SectionLineInfos.end(),
- Lines.begin(), Lines.end());
- }
-
// Construct abstract scopes.
for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
AE = AbstractScopesList.end(); AI != AE; ++AI) {
@@ -2840,10 +2943,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
StringRef FName = SP.getLinkageName();
if (FName.empty())
FName = SP.getName();
- const Module *M = MF->getFunction()->getParent();
- if (NamedMDNode *NMD =
- M->getNamedMetadata(Twine("llvm.dbg.lv.",
- getRealLinkageName(FName)))) {
+ if (NamedMDNode *NMD =
+ getFnSpecificMDNode(*(MF->getFunction()->getParent()), FName)) {
for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) {
DIVariable DV(cast<MDNode>(NMD->getOperand(i)));
if (!DV || !ProcessedVars.insert(DV))
@@ -2875,7 +2976,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
DbgVariableToFrameIndexMap.clear();
VarToAbstractVarMap.clear();
DbgVariableToDbgInstMap.clear();
- DbgVariableLabelsMap.clear();
DeleteContainerSeconds(DbgScopeMap);
InsnsEndScopeSet.clear();
ConcreteScopes.clear();
@@ -2884,7 +2984,6 @@ void DwarfDebug::endFunction(const MachineFunction *MF) {
AbstractVariables.clear();
LabelsBeforeInsn.clear();
LabelsAfterInsn.clear();
- Lines.clear();
PrevLabel = NULL;
}
@@ -2906,15 +3005,6 @@ bool DwarfDebug::findVariableFrameIndex(const DbgVariable *V, int *FI) {
return true;
}
-/// findVariableLabel - Find MCSymbol for the variable.
-const MCSymbol *DwarfDebug::findVariableLabel(const DbgVariable *V) {
- DenseMap<const DbgVariable *, const MCSymbol *>::iterator I
- = DbgVariableLabelsMap.find(V);
- if (I == DbgVariableLabelsMap.end())
- return NULL;
- else return I->second;
-}
-
/// findDbgScope - Find DbgScope for the debug loc attached with an
/// instruction.
DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
@@ -2940,7 +3030,6 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) {
/// the source line list.
MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
const MDNode *S) {
- StringRef Dir;
StringRef Fn;
unsigned Src = 1;
@@ -2949,25 +3038,26 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col,
if (Scope.isCompileUnit()) {
DICompileUnit CU(S);
- Dir = CU.getDirectory();
Fn = CU.getFilename();
+ } else if (Scope.isFile()) {
+ DIFile F(S);
+ Fn = F.getFilename();
} else if (Scope.isSubprogram()) {
DISubprogram SP(S);
- Dir = SP.getDirectory();
Fn = SP.getFilename();
} else if (Scope.isLexicalBlock()) {
DILexicalBlock DB(S);
- Dir = DB.getDirectory();
Fn = DB.getFilename();
} else
assert(0 && "Unexpected scope info");
- Src = GetOrCreateSourceID(Dir, Fn);
+ Src = GetOrCreateSourceID(Fn);
}
- MCSymbol *Label = MMI->getContext().CreateTempSymbol();
- Lines.push_back(SrcLineInfo(Line, Col, Src, Label));
+ Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, DWARF2_FLAG_IS_STMT,
+ 0, 0);
+ MCSymbol *Label = MMI->getContext().CreateTempSymbol();
Asm->OutStreamer.EmitLabel(Label);
return Label;
}
@@ -3151,6 +3241,14 @@ void DwarfDebug::emitDIE(DIE *Die) {
Values[i]->EmitValue(Asm, Form);
break;
}
+ case dwarf::DW_AT_accessibility: {
+ if (Asm->isVerbose()) {
+ DIEInteger *V = cast<DIEInteger>(Values[i]);
+ Asm->OutStreamer.AddComment(dwarf::AccessibilityString(V->getValue()));
+ }
+ Values[i]->EmitValue(Asm, Form);
+ break;
+ }
default:
// Emit an attribute using the defined form.
Values[i]->EmitValue(Asm, Form);
@@ -3270,185 +3368,6 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) {
Asm->EmitInt8(1);
}
-/// emitDebugLines - Emit source line information.
-///
-void DwarfDebug::emitDebugLines() {
- // If the target is using .loc/.file, the assembler will be emitting the
- // .debug_line table automatically.
- if (Asm->MAI->hasDotLocAndDotFile())
- return;
-
- // Minimum line delta, thus ranging from -10..(255-10).
- const int MinLineDelta = -(dwarf::DW_LNS_fixed_advance_pc + 1);
- // Maximum line delta, thus ranging from -10..(255-10).
- const int MaxLineDelta = 255 + MinLineDelta;
-
- // Start the dwarf line section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfLineSection());
-
- // Construct the section header.
- Asm->OutStreamer.AddComment("Length of Source Line Info");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"),
- Asm->GetTempSymbol("line_begin"), 4);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_begin"));
-
- Asm->OutStreamer.AddComment("DWARF version number");
- Asm->EmitInt16(dwarf::DWARF_VERSION);
-
- Asm->OutStreamer.AddComment("Prolog Length");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("line_prolog_end"),
- Asm->GetTempSymbol("line_prolog_begin"), 4);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_begin"));
-
- Asm->OutStreamer.AddComment("Minimum Instruction Length");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("Default is_stmt_start flag");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("Line Base Value (Special Opcodes)");
- Asm->EmitInt8(MinLineDelta);
- Asm->OutStreamer.AddComment("Line Range Value (Special Opcodes)");
- Asm->EmitInt8(MaxLineDelta);
- Asm->OutStreamer.AddComment("Special Opcode Base");
- Asm->EmitInt8(-MinLineDelta);
-
- // Line number standard opcode encodings argument count
- Asm->OutStreamer.AddComment("DW_LNS_copy arg count");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("DW_LNS_advance_pc arg count");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("DW_LNS_advance_line arg count");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("DW_LNS_set_file arg count");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("DW_LNS_set_column arg count");
- Asm->EmitInt8(1);
- Asm->OutStreamer.AddComment("DW_LNS_negate_stmt arg count");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("DW_LNS_set_basic_block arg count");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("DW_LNS_const_add_pc arg count");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("DW_LNS_fixed_advance_pc arg count");
- Asm->EmitInt8(1);
-
- // Emit directories.
- for (unsigned DI = 1, DE = getNumSourceDirectories()+1; DI != DE; ++DI) {
- const std::string &Dir = getSourceDirectoryName(DI);
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Directory");
- Asm->OutStreamer.EmitBytes(StringRef(Dir.c_str(), Dir.size()+1), 0);
- }
-
- Asm->OutStreamer.AddComment("End of directories");
- Asm->EmitInt8(0);
-
- // Emit files.
- for (unsigned SI = 1, SE = getNumSourceIds()+1; SI != SE; ++SI) {
- // Remember source id starts at 1.
- std::pair<unsigned, unsigned> Id = getSourceDirectoryAndFileIds(SI);
- const std::string &FN = getSourceFileName(Id.second);
- if (Asm->isVerbose()) Asm->OutStreamer.AddComment("Source");
- Asm->OutStreamer.EmitBytes(StringRef(FN.c_str(), FN.size()+1), 0);
-
- Asm->EmitULEB128(Id.first, "Directory #");
- Asm->EmitULEB128(0, "Mod date");
- Asm->EmitULEB128(0, "File size");
- }
-
- Asm->OutStreamer.AddComment("End of files");
- Asm->EmitInt8(0);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_prolog_end"));
-
- // A sequence for each text section.
- unsigned SecSrcLinesSize = SectionSourceLines.size();
-
- for (unsigned j = 0; j < SecSrcLinesSize; ++j) {
- // Isolate current sections line info.
- const std::vector<SrcLineInfo> &LineInfos = SectionSourceLines[j];
-
- // Dwarf assumes we start with first line of first source file.
- unsigned Source = 1;
- unsigned Line = 1;
-
- // Construct rows of the address, source, line, column matrix.
- for (unsigned i = 0, N = LineInfos.size(); i < N; ++i) {
- const SrcLineInfo &LineInfo = LineInfos[i];
- MCSymbol *Label = LineInfo.getLabel();
- if (!Label->isDefined()) continue; // Not emitted, in dead code.
-
- if (Asm->isVerbose()) {
- std::pair<unsigned, unsigned> SrcID =
- getSourceDirectoryAndFileIds(LineInfo.getSourceID());
- Asm->OutStreamer.AddComment(Twine(getSourceDirectoryName(SrcID.first)) +
- "/" +
- Twine(getSourceFileName(SrcID.second)) +
- ":" + Twine(LineInfo.getLine()));
- }
-
- // Define the line address.
- Asm->OutStreamer.AddComment("Extended Op");
- Asm->EmitInt8(0);
- Asm->OutStreamer.AddComment("Op size");
- Asm->EmitInt8(Asm->getTargetData().getPointerSize() + 1);
-
- Asm->OutStreamer.AddComment("DW_LNE_set_address");
- Asm->EmitInt8(dwarf::DW_LNE_set_address);
-
- Asm->OutStreamer.AddComment("Location label");
- Asm->OutStreamer.EmitSymbolValue(Label,
- Asm->getTargetData().getPointerSize(),
- 0/*AddrSpace*/);
-
- // If change of source, then switch to the new source.
- if (Source != LineInfo.getSourceID()) {
- Source = LineInfo.getSourceID();
- Asm->OutStreamer.AddComment("DW_LNS_set_file");
- Asm->EmitInt8(dwarf::DW_LNS_set_file);
- Asm->EmitULEB128(Source, "New Source");
- }
-
- // If change of line.
- if (Line != LineInfo.getLine()) {
- // Determine offset.
- int Offset = LineInfo.getLine() - Line;
- int Delta = Offset - MinLineDelta;
-
- // Update line.
- Line = LineInfo.getLine();
-
- // If delta is small enough and in range...
- if (Delta >= 0 && Delta < (MaxLineDelta - 1)) {
- // ... then use fast opcode.
- Asm->OutStreamer.AddComment("Line Delta");
- Asm->EmitInt8(Delta - MinLineDelta);
- } else {
- // ... otherwise use long hand.
- Asm->OutStreamer.AddComment("DW_LNS_advance_line");
- Asm->EmitInt8(dwarf::DW_LNS_advance_line);
- Asm->EmitSLEB128(Offset, "Line Offset");
- Asm->OutStreamer.AddComment("DW_LNS_copy");
- Asm->EmitInt8(dwarf::DW_LNS_copy);
- }
- } else {
- // Copy the previous row (different address or source)
- Asm->OutStreamer.AddComment("DW_LNS_copy");
- Asm->EmitInt8(dwarf::DW_LNS_copy);
- }
- }
-
- emitEndOfLineMatrix(j + 1);
- }
-
- if (SecSrcLinesSize == 0)
- // Because we're emitting a debug_line section, we still need a line
- // table. The linker and friends expect it to exist. If there's nothing to
- // put into it, emit an empty table.
- emitEndOfLineMatrix(1);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("line_end"));
-}
-
/// emitCommonDebugFrame - Emit common frame info into a debug frame section.
///
void DwarfDebug::emitCommonDebugFrame() {
@@ -3456,8 +3375,8 @@ void DwarfDebug::emitCommonDebugFrame() {
return;
int stackGrowth = Asm->getTargetData().getPointerSize();
- if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
- TargetFrameInfo::StackGrowsDown)
+ if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+ TargetFrameLowering::StackGrowsDown)
stackGrowth *= -1;
// Start the dwarf frame section.
@@ -3480,10 +3399,11 @@ void DwarfDebug::emitCommonDebugFrame() {
Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
Asm->OutStreamer.AddComment("CIE RA Column");
const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), false));
std::vector<MachineMove> Moves;
- RI->getInitialFrameState(Moves);
+ TFI->getInitialFrameState(Moves);
Asm->EmitFrameMoves(Moves, 0, false);
@@ -3667,6 +3587,14 @@ void DwarfDebug::emitDebugLoc() {
if (DotDebugLocEntries.empty())
return;
+ for (SmallVector<DotDebugLocEntry, 4>::iterator
+ I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
+ I != E; ++I) {
+ DotDebugLocEntry &Entry = *I;
+ if (I + 1 != DotDebugLocEntries.end())
+ Entry.Merge(I+1);
+ }
+
// Start the dwarf loc section.
Asm->OutStreamer.SwitchSection(
Asm->getObjFileLowering().getDwarfLocSection());
@@ -3676,7 +3604,8 @@ void DwarfDebug::emitDebugLoc() {
for (SmallVector<DotDebugLocEntry, 4>::iterator
I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end();
I != E; ++I, ++index) {
- DotDebugLocEntry Entry = *I;
+ DotDebugLocEntry &Entry = *I;
+ if (Entry.isMerged()) continue;
if (Entry.isEmpty()) {
Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0);
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index f0ff3bc71699..7df0510fbfba 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -23,6 +23,7 @@
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/UniqueVector.h"
#include "llvm/Support/Allocator.h"
+#include "llvm/Support/DebugLoc.h"
namespace llvm {
@@ -51,6 +52,8 @@ class DIType;
class DINameSpace;
class DISubrange;
class DICompositeType;
+class DITemplateTypeParameter;
+class DITemplateValueParameter;
//===----------------------------------------------------------------------===//
/// SrcLineInfo - This class is used to record source line correspondence.
@@ -71,6 +74,28 @@ public:
MCSymbol *getLabel() const { return Label; }
};
+/// DotDebugLocEntry - This struct describes location entries emitted in
+/// .debug_loc section.
+typedef struct DotDebugLocEntry {
+ const MCSymbol *Begin;
+ const MCSymbol *End;
+ MachineLocation Loc;
+ bool Merged;
+ DotDebugLocEntry() : Begin(0), End(0), Merged(false) {}
+ DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L)
+ : Begin(B), End(E), Loc(L), Merged(false) {}
+ /// Empty entries are also used as a trigger to emit temp label. Such
+ /// labels are referenced is used to find debug_loc offset for a given DIE.
+ bool isEmpty() { return Begin == 0 && End == 0; }
+ bool isMerged() { return Merged; }
+ void Merge(DotDebugLocEntry *Next) {
+ if (!(Begin && Loc == Next->Loc && End == Next->Begin))
+ return;
+ Next->Begin = Begin;
+ Merged = true;
+ }
+} DotDebugLocEntry;
+
class DwarfDebug {
/// Asm - Target of Dwarf emission.
AsmPrinter *Asm;
@@ -93,30 +118,9 @@ class DwarfDebug {
///
std::vector<DIEAbbrev *> Abbreviations;
- /// DirectoryIdMap - Directory name to directory id map.
- ///
- StringMap<unsigned> DirectoryIdMap;
-
- /// DirectoryNames - A list of directory names.
- SmallVector<std::string, 8> DirectoryNames;
-
- /// SourceFileIdMap - Source file name to source file id map.
- ///
- StringMap<unsigned> SourceFileIdMap;
-
- /// SourceFileNames - A list of source file names.
- SmallVector<std::string, 8> SourceFileNames;
-
/// SourceIdMap - Source id map, i.e. pair of directory id and source file
/// id mapped to a unique id.
- DenseMap<std::pair<unsigned, unsigned>, unsigned> SourceIdMap;
-
- /// SourceIds - Reverse map from source id to directory id + file id pair.
- ///
- SmallVector<std::pair<unsigned, unsigned>, 8> SourceIds;
-
- /// Lines - List of source line correspondence.
- std::vector<SrcLineInfo> Lines;
+ StringMap<unsigned> SourceIdMap;
/// DIEBlocks - A list of all the DIEBlocks in use.
std::vector<DIEBlock *> DIEBlocks;
@@ -135,10 +139,6 @@ class DwarfDebug {
///
UniqueVector<const MCSection*> SectionMap;
- /// SectionSourceLines - Tracks line numbers per text section.
- ///
- std::vector<std::vector<SrcLineInfo> > SectionSourceLines;
-
// CurrentFnDbgScope - Top level scope for the current function.
//
DbgScope *CurrentFnDbgScope;
@@ -175,23 +175,6 @@ class DwarfDebug {
/// machine instruction.
DenseMap<const DbgVariable *, const MachineInstr *> DbgVariableToDbgInstMap;
- /// DbgVariableLabelsMap - Maps DbgVariable to corresponding MCSymbol.
- DenseMap<const DbgVariable *, const MCSymbol *> DbgVariableLabelsMap;
-
- /// DotDebugLocEntry - This struct describes location entries emitted in
- /// .debug_loc section.
- typedef struct DotDebugLocEntry {
- const MCSymbol *Begin;
- const MCSymbol *End;
- MachineLocation Loc;
- DotDebugLocEntry() : Begin(0), End(0) {}
- DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E,
- MachineLocation &L) : Begin(B), End(E), Loc(L) {}
- /// Empty entries are also used as a trigger to emit temp label. Such
- /// labels are referenced is used to find debug_loc offset for a given DIE.
- bool isEmpty() { return Begin == 0 && End == 0; }
- } DotDebugLocEntry;
-
/// DotDebugLocEntries - Collection of DotDebugLocEntry.
SmallVector<DotDebugLocEntry, 4> DotDebugLocEntries;
@@ -265,35 +248,10 @@ class DwarfDebug {
DIEInteger *DIEIntegerOne;
private:
-
- /// getSourceDirectoryAndFileIds - Return the directory and file ids that
- /// maps to the source id. Source id starts at 1.
- std::pair<unsigned, unsigned>
- getSourceDirectoryAndFileIds(unsigned SId) const {
- return SourceIds[SId-1];
- }
-
- /// getNumSourceDirectories - Return the number of source directories in the
- /// debug info.
- unsigned getNumSourceDirectories() const {
- return DirectoryNames.size();
- }
-
- /// getSourceDirectoryName - Return the name of the directory corresponding
- /// to the id.
- const std::string &getSourceDirectoryName(unsigned Id) const {
- return DirectoryNames[Id - 1];
- }
-
- /// getSourceFileName - Return the name of the source file corresponding
- /// to the id.
- const std::string &getSourceFileName(unsigned Id) const {
- return SourceFileNames[Id - 1];
- }
/// getNumSourceIds - Return the number of unique source ids.
unsigned getNumSourceIds() const {
- return SourceIds.size();
+ return SourceIdMap.size();
}
/// assignAbbrevNumber - Define a unique number for the abbreviation.
@@ -349,13 +307,14 @@ private:
const MachineLocation &Location);
/// addRegisterAddress - Add register location entry in variable DIE.
- bool addRegisterAddress(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+ bool addRegisterAddress(DIE *Die, const MachineOperand &MO);
/// addConstantValue - Add constant value entry in variable DIE.
- bool addConstantValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+ bool addConstantValue(DIE *Die, const MachineOperand &MO);
+ bool addConstantValue(DIE *Die, ConstantInt *CI, bool Unsigned);
/// addConstantFPValue - Add constant value entry in variable DIE.
- bool addConstantFPValue(DIE *Die, const MCSymbol *VS, const MachineOperand &MO);
+ bool addConstantFPValue(DIE *Die, const MachineOperand &MO);
/// addComplexAddress - Start with the address based on the location provided,
/// and generate the DWARF information necessary to find the actual variable
@@ -393,6 +352,14 @@ private:
/// given DIType.
DIE *getOrCreateTypeDIE(DIType Ty);
+ /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateTypeParameter.
+ DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP);
+
+ /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE
+ /// for the given DITemplateValueParameter.
+ DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP);
+
void addPubTypes(DISubprogram SP);
/// constructTypeDIE - Construct basic type die from DIBasicType.
@@ -421,7 +388,7 @@ private:
DIE *createMemberDIE(DIDerivedType DT);
/// createSubprogramDIE - Create new DIE using SP.
- DIE *createSubprogramDIE(DISubprogram SP, bool MakeDecl = false);
+ DIE *createSubprogramDIE(DISubprogram SP);
/// getOrCreateDbgScope - Create DbgScope for the scope.
DbgScope *getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt);
@@ -481,10 +448,6 @@ private:
///
void emitEndOfLineMatrix(unsigned SectionEnd);
- /// emitDebugLines - Emit source line information.
- ///
- void emitDebugLines();
-
/// emitCommonDebugFrame - Emit common frame info into a debug frame section.
///
void emitCommonDebugFrame();
@@ -543,9 +506,8 @@ private:
/// GetOrCreateSourceID - Look up the source id with the given directory and
/// source file names. If none currently exists, create a new id and insert it
- /// in the SourceIds map. This can update DirectoryNames and SourceFileNames
- /// maps as well.
- unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
+ /// in the SourceIds map.
+ unsigned GetOrCreateSourceID(StringRef FullName);
/// constructCompileUnit - Create new CompileUnit for the given
/// metadata node with tag DW_TAG_compile_unit.
@@ -565,12 +527,6 @@ private:
/// the source line list.
MCSymbol *recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope);
- /// getSourceLineCount - Return the number of source lines in the debug
- /// info.
- unsigned getSourceLineCount() const {
- return Lines.size();
- }
-
/// recordVariableFrameIndex - Record a variable's index.
void recordVariableFrameIndex(const DbgVariable *V, int Index);
@@ -578,9 +534,6 @@ private:
/// is found. Update FI to hold value of the index.
bool findVariableFrameIndex(const DbgVariable *V, int *FI);
- /// findVariableLabel - Find MCSymbol for the variable.
- const MCSymbol *findVariableLabel(const DbgVariable *V);
-
/// findDbgScope - Find DbgScope for the debug loc attached with an
/// instruction.
DbgScope *findDbgScope(const MachineInstr *MI);
@@ -630,11 +583,11 @@ public:
/// getLabelAfterInsn - Return Label immediately following the instruction.
const MCSymbol *getLabelAfterInsn(const MachineInstr *MI);
- /// beginScope - Process beginning of a scope.
- void beginScope(const MachineInstr *MI);
+ /// beginInstruction - Process beginning of an instruction.
+ void beginInstruction(const MachineInstr *MI);
- /// endScope - Prcess end of a scope.
- void endScope(const MachineInstr *MI);
+ /// endInstruction - Prcess end of an instruction.
+ void endInstruction(const MachineInstr *MI);
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 86a368831e0e..967a2783da14 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -26,7 +26,7 @@
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -39,238 +39,10 @@
using namespace llvm;
DwarfException::DwarfException(AsmPrinter *A)
- : Asm(A), MMI(Asm->MMI), shouldEmitTable(false), shouldEmitMoves(false),
- shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
+ : Asm(A), MMI(Asm->MMI) {}
DwarfException::~DwarfException() {}
-/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
-/// is shared among many Frame Description Entries. There is at least one CIE
-/// in every non-empty .debug_frame section.
-void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
- // Size and sign of stack growth.
- int stackGrowth = Asm->getTargetData().getPointerSize();
- if (Asm->TM.getFrameInfo()->getStackGrowthDirection() ==
- TargetFrameInfo::StackGrowsDown)
- stackGrowth *= -1;
-
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
- // Begin eh frame section.
- Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
- MCSymbol *EHFrameSym;
- if (TLOF.isFunctionEHFrameSymbolPrivate())
- EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
- else
- EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") +
- Twine(Index));
- Asm->OutStreamer.EmitLabel(EHFrameSym);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
-
- // Define base labels.
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
-
- // Define the eh frame length.
- Asm->OutStreamer.AddComment("Length of Common Information Entry");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
- Asm->GetTempSymbol("eh_frame_common_begin", Index),
- 4);
-
- // EH frame header.
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
- Asm->OutStreamer.AddComment("CIE Identifier Tag");
- Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
- Asm->OutStreamer.AddComment("DW_CIE_VERSION");
- Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
-
- // The personality presence indicates that language specific information will
- // show up in the eh frame. Find out how we are supposed to lower the
- // personality function reference:
-
- unsigned LSDAEncoding = TLOF.getLSDAEncoding();
- unsigned FDEEncoding = TLOF.getFDEEncoding();
- unsigned PerEncoding = TLOF.getPersonalityEncoding();
-
- char Augmentation[6] = { 0 };
- unsigned AugmentationSize = 0;
- char *APtr = Augmentation + 1;
-
- if (PersonalityFn) {
- // There is a personality function.
- *APtr++ = 'P';
- AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
- }
-
- if (UsesLSDA[Index]) {
- // An LSDA pointer is in the FDE augmentation.
- *APtr++ = 'L';
- ++AugmentationSize;
- }
-
- if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
- // A non-default pointer encoding for the FDE.
- *APtr++ = 'R';
- ++AugmentationSize;
- }
-
- if (APtr != Augmentation + 1)
- Augmentation[0] = 'z';
-
- Asm->OutStreamer.AddComment("CIE Augmentation");
- Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
-
- // Round out reader.
- Asm->EmitULEB128(1, "CIE Code Alignment Factor");
- Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
- Asm->OutStreamer.AddComment("CIE Return Address Column");
-
- const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
- Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
-
- if (Augmentation[0]) {
- Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
-
- // If there is a personality, we need to indicate the function's location.
- if (PersonalityFn) {
- Asm->EmitEncodingByte(PerEncoding, "Personality");
- Asm->OutStreamer.AddComment("Personality");
- Asm->EmitReference(PersonalityFn, PerEncoding);
- }
- if (UsesLSDA[Index])
- Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
- if (FDEEncoding != dwarf::DW_EH_PE_absptr)
- Asm->EmitEncodingByte(FDEEncoding, "FDE");
- }
-
- // Indicate locations of general callee saved registers in frame.
- std::vector<MachineMove> Moves;
- RI->getInitialFrameState(Moves);
- Asm->EmitFrameMoves(Moves, 0, true);
-
- // On Darwin the linker honors the alignment of eh_frame, which means it must
- // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get
- // holes which confuse readers of eh_frame.
- Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
-}
-
-/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
-void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
- assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
- "Should not emit 'available externally' functions at all");
-
- const Function *TheFunc = EHFrameInfo.function;
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
-
- unsigned LSDAEncoding = TLOF.getLSDAEncoding();
- unsigned FDEEncoding = TLOF.getFDEEncoding();
-
- Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
-
- // Externally visible entry into the functions eh frame info. If the
- // corresponding function is static, this should not be externally visible.
- if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
-
- // If corresponding function is weak definition, this should be too.
- if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
- MCSA_WeakDefinition);
-
- // If corresponding function is hidden, this should be too.
- if (TheFunc->hasHiddenVisibility())
- if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
- HiddenAttr);
-
- // If there are no calls then you can't unwind. This may mean we can omit the
- // EH Frame, but some environments do not handle weak absolute symbols. If
- // UnwindTablesMandatory is set we cannot do this optimization; the unwind
- // info is to be available for non-EH uses.
- if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
- (!TheFunc->isWeakForLinker() ||
- !Asm->MAI->getWeakDefDirective() ||
- TLOF.getSupportsWeakOmittedEHFrame())) {
- Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
- MCConstantExpr::Create(0, Asm->OutContext));
- // This name has no connection to the function, so it might get
- // dead-stripped when the function is not, erroneously. Prohibit
- // dead-stripping unconditionally.
- if (Asm->MAI->hasNoDeadStrip())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
- MCSA_NoDeadStrip);
- } else {
- Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
-
- // EH frame header.
- Asm->OutStreamer.AddComment("Length of Frame Information Entry");
- Asm->EmitLabelDifference(
- Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
- Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
- EHFrameInfo.Number));
-
- Asm->OutStreamer.AddComment("FDE CIE offset");
- Asm->EmitLabelDifference(
- Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
- Asm->GetTempSymbol("eh_frame_common",
- EHFrameInfo.PersonalityIndex), 4);
-
- MCSymbol *EHFuncBeginSym =
- Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
-
- Asm->OutStreamer.AddComment("FDE initial location");
- Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
-
- Asm->OutStreamer.AddComment("FDE address range");
- Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
- EHFrameInfo.Number),
- EHFuncBeginSym,
- Asm->GetSizeOfEncodedValue(FDEEncoding));
-
- // If there is a personality and landing pads then point to the language
- // specific data area in the exception table.
- if (MMI->getPersonalities()[0] != NULL) {
- unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
-
- Asm->EmitULEB128(Size, "Augmentation size");
- Asm->OutStreamer.AddComment("Language Specific Data Area");
- if (EHFrameInfo.hasLandingPads)
- Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
- LSDAEncoding);
- else
- Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
-
- } else {
- Asm->EmitULEB128(0, "Augmentation size");
- }
-
- // Indicate locations of function specific callee saved registers in frame.
- Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
-
- // On Darwin the linker honors the alignment of eh_frame, which means it
- // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you
- // get holes which confuse readers of eh_frame.
- Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
- EHFrameInfo.Number));
-
- // If the function is marked used, this table should be also. We cannot
- // make the mark unconditional in this case, since retaining the table also
- // retains the function in this case, and there is code around that depends
- // on unused functions (calling undefined externals) being dead-stripped to
- // link correctly. Yes, there really is.
- if (MMI->isUsedFunction(EHFrameInfo.function))
- if (Asm->MAI->hasNoDeadStrip())
- Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
- MCSA_NoDeadStrip);
- }
- Asm->OutStreamer.AddBlankLine();
-}
-
/// SharedTypeIds - How many leading type ids two landing pads have in common.
unsigned DwarfException::SharedTypeIds(const LandingPadInfo *L,
const LandingPadInfo *R) {
@@ -422,7 +194,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
const MachineOperand &MO = MI->getOperand(I);
if (!MO.isGlobal()) continue;
-
+
const Function *F = dyn_cast<Function>(MO.getGlobal());
if (F == 0) continue;
@@ -430,7 +202,7 @@ bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
// Be conservative. If we have more than one function operand for this
// call, then we can't make the assumption that it's the callee and
// not a parameter to the call.
- //
+ //
// FIXME: Determine if there's a way to say that `F' is the callee or
// parameter.
MarkedNoUnwind = false;
@@ -497,8 +269,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// instruction between the previous try-range and this one may throw,
// create a call-site entry with no landing pad for the region between the
// try-ranges.
- if (SawPotentiallyThrowing &&
- Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
CallSiteEntry Site = { LastLabel, BeginLabel, 0, 0 };
CallSites.push_back(Site);
PreviousIsInvoke = false;
@@ -520,8 +291,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
};
// Try to merge with the previous call-site. SJLJ doesn't do this
- if (PreviousIsInvoke &&
- Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+ if (PreviousIsInvoke && Asm->MAI->isExceptionHandlingDwarf()) {
CallSiteEntry &Prev = CallSites.back();
if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
// Extend the range of the previous entry.
@@ -531,7 +301,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
}
// Otherwise, create a new call-site.
- if (Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf)
+ if (Asm->MAI->isExceptionHandlingDwarf())
CallSites.push_back(Site);
else {
// SjLj EH must maintain the call sites in the order assigned
@@ -549,8 +319,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
// If some instruction between the previous try-range and the end of the
// function may throw, create a call-site entry with no landing pad for the
// region following the try-range.
- if (SawPotentiallyThrowing &&
- Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf) {
+ if (SawPotentiallyThrowing && Asm->MAI->isExceptionHandlingDwarf()) {
CallSiteEntry Site = { LastLabel, 0, 0, 0 };
CallSites.push_back(Site);
}
@@ -620,7 +389,7 @@ void DwarfException::EmitExceptionTable() {
// Call sites.
bool IsSJLJ = Asm->MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
-
+
unsigned CallSiteTableLength;
if (IsSJLJ)
CallSiteTableLength = 0;
@@ -628,7 +397,7 @@ void DwarfException::EmitExceptionTable() {
unsigned SiteStartSize = 4; // dwarf::DW_EH_PE_udata4
unsigned SiteLengthSize = 4; // dwarf::DW_EH_PE_udata4
unsigned LandingPadSize = 4; // dwarf::DW_EH_PE_udata4
- CallSiteTableLength =
+ CallSiteTableLength =
CallSites.size() * (SiteStartSize + SiteLengthSize + LandingPadSize);
}
@@ -656,15 +425,15 @@ void DwarfException::EmitExceptionTable() {
// mode, this reference will require a relocation by the dynamic linker.
//
// Because of this, we have a couple of options:
- //
+ //
// 1) If we are in -static mode, we can always use an absolute reference
// from the LSDA, because the static linker will resolve it.
- //
+ //
// 2) Otherwise, if the LSDA section is writable, we can output the direct
// reference to the typeinfo and allow the dynamic linker to relocate
// it. Since it is in a writable section, the dynamic linker won't
// have a problem.
- //
+ //
// 3) Finally, if we're in PIC mode and the LDSA section isn't writable,
// we need to use some form of indirection. For example, on Darwin,
// we can output a statically-relocatable reference to a dyld stub. The
@@ -682,11 +451,14 @@ void DwarfException::EmitExceptionTable() {
}
// Begin the exception table.
- Asm->OutStreamer.SwitchSection(LSDASection);
+ // Sometimes we want not to emit the data into separate section (e.g. ARM
+ // EHABI). In this case LSDASection will be NULL.
+ if (LSDASection)
+ Asm->OutStreamer.SwitchSection(LSDASection);
Asm->EmitAlignment(2);
// Emit the LSDA.
- MCSymbol *GCCETSym =
+ MCSymbol *GCCETSym =
Asm->OutContext.GetOrCreateSymbol(Twine("GCC_except_table")+
Twine(Asm->getFunctionNumber()));
Asm->OutStreamer.EmitLabel(GCCETSym);
@@ -764,7 +536,7 @@ void DwarfException::EmitExceptionTable() {
}
} else {
// DWARF Exception handling
- assert(Asm->MAI->getExceptionHandlingType() == ExceptionHandling::Dwarf);
+ assert(Asm->MAI->isExceptionHandlingDwarf());
// The call-site table is a list of all call sites that may throw an
// exception (including C++ 'throw' statements) in the procedure
@@ -793,23 +565,23 @@ void DwarfException::EmitExceptionTable() {
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
const CallSiteEntry &S = *I;
-
+
MCSymbol *EHFuncBeginSym =
Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber());
-
+
MCSymbol *BeginLabel = S.BeginLabel;
if (BeginLabel == 0)
BeginLabel = EHFuncBeginSym;
MCSymbol *EndLabel = S.EndLabel;
if (EndLabel == 0)
EndLabel = Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber());
-
+
// Offset of the call site relative to the previous call site, counted in
// number of 16-byte bundles. The first call site is counted relative to
// the start of the procedure fragment.
Asm->OutStreamer.AddComment("Region start");
Asm->EmitLabelDifference(BeginLabel, EHFuncBeginSym, 4);
-
+
Asm->OutStreamer.AddComment("Region length");
Asm->EmitLabelDifference(EndLabel, BeginLabel, 4);
@@ -834,7 +606,7 @@ void DwarfException::EmitExceptionTable() {
Asm->OutStreamer.AddComment("-- Action Record Table --");
Asm->OutStreamer.AddBlankLine();
}
-
+
for (SmallVectorImpl<ActionEntry>::const_iterator
I = Actions.begin(), E = Actions.end(); I != E; ++I) {
const ActionEntry &Action = *I;
@@ -888,73 +660,17 @@ void DwarfException::EmitExceptionTable() {
/// EndModule - Emit all exception information that should come after the
/// content.
void DwarfException::EndModule() {
- if (Asm->MAI->getExceptionHandlingType() != ExceptionHandling::Dwarf)
- return;
-
- if (!shouldEmitMovesModule && !shouldEmitTableModule)
- return;
-
- const std::vector<const Function*> &Personalities = MMI->getPersonalities();
-
- for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
- EmitCIE(Personalities[I], I);
-
- for (std::vector<FunctionEHFrameInfo>::iterator
- I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
- EmitFDE(*I);
+ assert(0 && "Should be implemented");
}
/// BeginFunction - Gather pre-function exception information. Assumes it's
/// being emitted immediately after the function entry point.
void DwarfException::BeginFunction(const MachineFunction *MF) {
- shouldEmitTable = shouldEmitMoves = false;
-
- // If any landing pads survive, we need an EH table.
- shouldEmitTable = !MMI->getLandingPads().empty();
-
- // See if we need frame move info.
- shouldEmitMoves =
- !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
-
- if (shouldEmitMoves || shouldEmitTable)
- // Assumes in correct section after the entry point.
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
- Asm->getFunctionNumber()));
-
- shouldEmitTableModule |= shouldEmitTable;
- shouldEmitMovesModule |= shouldEmitMoves;
+ assert(0 && "Should be implemented");
}
/// EndFunction - Gather and emit post-function exception information.
///
void DwarfException::EndFunction() {
- if (!shouldEmitMoves && !shouldEmitTable) return;
-
- Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
- Asm->getFunctionNumber()));
-
- // Record if this personality index uses a landing pad.
- bool HasLandingPad = !MMI->getLandingPads().empty();
- UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
-
- // Map all labels and get rid of any dead landing pads.
- MMI->TidyLandingPads();
-
- if (HasLandingPad)
- EmitExceptionTable();
-
- const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
- MCSymbol *FunctionEHSym =
- Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
- TLOF.isFunctionEHFrameSymbolPrivate());
-
- // Save EH frame information
- EHFrames.
- push_back(FunctionEHFrameInfo(FunctionEHSym,
- Asm->getFunctionNumber(),
- MMI->getPersonalityIndex(),
- Asm->MF->getFrameInfo()->adjustsStack(),
- !MMI->getLandingPads().empty(),
- MMI->getFrameMoves(),
- Asm->MF->getFunction()));
+ assert(0 && "Should be implemented");
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index bc311e67054e..a172e53f8ac7 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -35,60 +35,13 @@ class AsmPrinter;
/// DwarfException - Emits Dwarf exception handling directives.
///
class DwarfException {
+protected:
/// Asm - Target of Dwarf emission.
AsmPrinter *Asm;
/// MMI - Collected machine module information.
MachineModuleInfo *MMI;
- struct FunctionEHFrameInfo {
- MCSymbol *FunctionEHSym; // L_foo.eh
- unsigned Number;
- unsigned PersonalityIndex;
- bool adjustsStack;
- bool hasLandingPads;
- std::vector<MachineMove> Moves;
- const Function *function;
-
- FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
- bool hC, bool hL,
- const std::vector<MachineMove> &M,
- const Function *f):
- FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
- adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
- };
-
- std::vector<FunctionEHFrameInfo> EHFrames;
-
- /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
- /// uses an LSDA. If so, then we need to encode that information in the CIE's
- /// augmentation.
- DenseMap<unsigned, bool> UsesLSDA;
-
- /// shouldEmitTable - Per-function flag to indicate if EH tables should
- /// be emitted.
- bool shouldEmitTable;
-
- /// shouldEmitMoves - Per-function flag to indicate if frame moves info
- /// should be emitted.
- bool shouldEmitMoves;
-
- /// shouldEmitTableModule - Per-module flag to indicate if EH tables
- /// should be emitted.
- bool shouldEmitTableModule;
-
- /// shouldEmitFrameModule - Per-module flag to indicate if frame moves
- /// should be emitted.
- bool shouldEmitMovesModule;
-
- /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
- /// that is shared among many Frame Description Entries. There is at least
- /// one CIE in every non-empty .debug_frame section.
- void EmitCIE(const Function *Personality, unsigned Index);
-
- /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
- void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
-
/// EmitExceptionTable - Emit landing pads and actions.
///
/// The general organization of the table is complex, but the basic concepts
@@ -172,18 +125,116 @@ public:
// Main entry points.
//
DwarfException(AsmPrinter *A);
- ~DwarfException();
+ virtual ~DwarfException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class DwarfCFIException : public DwarfException {
+ /// shouldEmitTable - Per-function flag to indicate if EH tables should
+ /// be emitted.
+ bool shouldEmitTable;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+ /// should be emitted.
+ bool shouldEmitTableModule;
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfCFIException(AsmPrinter *A);
+ virtual ~DwarfCFIException();
+
+ /// EndModule - Emit all exception information that should come after the
+ /// content.
+ virtual void EndModule();
+
+ /// BeginFunction - Gather pre-function exception information. Assumes being
+ /// emitted immediately after the function entry point.
+ virtual void BeginFunction(const MachineFunction *MF);
+
+ /// EndFunction - Gather and emit post-function exception information.
+ virtual void EndFunction();
+};
+
+class DwarfTableException : public DwarfException {
+ /// shouldEmitTable - Per-function flag to indicate if EH tables should
+ /// be emitted.
+ bool shouldEmitTable;
+
+ /// shouldEmitMoves - Per-function flag to indicate if frame moves info
+ /// should be emitted.
+ bool shouldEmitMoves;
+
+ /// shouldEmitTableModule - Per-module flag to indicate if EH tables
+ /// should be emitted.
+ bool shouldEmitTableModule;
+
+ /// shouldEmitMovesModule - Per-module flag to indicate if frame moves
+ /// should be emitted.
+ bool shouldEmitMovesModule;
+
+ struct FunctionEHFrameInfo {
+ MCSymbol *FunctionEHSym; // L_foo.eh
+ unsigned Number;
+ unsigned PersonalityIndex;
+ bool adjustsStack;
+ bool hasLandingPads;
+ std::vector<MachineMove> Moves;
+ const Function *function;
+
+ FunctionEHFrameInfo(MCSymbol *EHSym, unsigned Num, unsigned P,
+ bool hC, bool hL,
+ const std::vector<MachineMove> &M,
+ const Function *f):
+ FunctionEHSym(EHSym), Number(Num), PersonalityIndex(P),
+ adjustsStack(hC), hasLandingPads(hL), Moves(M), function (f) { }
+ };
+
+ std::vector<FunctionEHFrameInfo> EHFrames;
+
+ /// UsesLSDA - Indicates whether an FDE that uses the CIE at the given index
+ /// uses an LSDA. If so, then we need to encode that information in the CIE's
+ /// augmentation.
+ DenseMap<unsigned, bool> UsesLSDA;
+
+ /// EmitCIE - Emit a Common Information Entry (CIE). This holds information
+ /// that is shared among many Frame Description Entries. There is at least
+ /// one CIE in every non-empty .debug_frame section.
+ void EmitCIE(const Function *Personality, unsigned Index);
+
+ /// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+ void EmitFDE(const FunctionEHFrameInfo &EHFrameInfo);
+public:
+ //===--------------------------------------------------------------------===//
+ // Main entry points.
+ //
+ DwarfTableException(AsmPrinter *A);
+ virtual ~DwarfTableException();
/// EndModule - Emit all exception information that should come after the
/// content.
- void EndModule();
+ virtual void EndModule();
/// BeginFunction - Gather pre-function exception information. Assumes being
/// emitted immediately after the function entry point.
- void BeginFunction(const MachineFunction *MF);
+ virtual void BeginFunction(const MachineFunction *MF);
/// EndFunction - Gather and emit post-function exception information.
- void EndFunction();
+ virtual void EndFunction();
};
} // End of namespace llvm
diff --git a/lib/CodeGen/AsmPrinter/DwarfTableException.cpp b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
new file mode 100644
index 000000000000..751901183cd0
--- /dev/null
+++ b/lib/CodeGen/AsmPrinter/DwarfTableException.cpp
@@ -0,0 +1,349 @@
+//===-- CodeGen/AsmPrinter/DwarfTableException.cpp - Dwarf Exception Impl --==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains support for writing DWARF exception info into asm files.
+// The implementation emits all the necessary tables "by hands".
+//
+//===----------------------------------------------------------------------===//
+
+#include "DwarfException.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/MachineModuleInfo.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCExpr.h"
+#include "llvm/MC/MCSection.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameLowering.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Twine.h"
+using namespace llvm;
+
+DwarfTableException::DwarfTableException(AsmPrinter *A)
+ : DwarfException(A),
+ shouldEmitTable(false), shouldEmitMoves(false),
+ shouldEmitTableModule(false), shouldEmitMovesModule(false) {}
+
+DwarfTableException::~DwarfTableException() {}
+
+/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
+/// is shared among many Frame Description Entries. There is at least one CIE
+/// in every non-empty .debug_frame section.
+void DwarfTableException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
+ // Size and sign of stack growth.
+ int stackGrowth = Asm->getTargetData().getPointerSize();
+ if (Asm->TM.getFrameLowering()->getStackGrowthDirection() ==
+ TargetFrameLowering::StackGrowsDown)
+ stackGrowth *= -1;
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ // Begin eh frame section.
+ Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+ MCSymbol *EHFrameSym;
+ if (TLOF.isFunctionEHFrameSymbolPrivate())
+ EHFrameSym = Asm->GetTempSymbol("EH_frame", Index);
+ else
+ EHFrameSym = Asm->OutContext.GetOrCreateSymbol(Twine("EH_frame") +
+ Twine(Index));
+ Asm->OutStreamer.EmitLabel(EHFrameSym);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_eh_frame", Index));
+
+ // Define base labels.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common", Index));
+
+ // Define the eh frame length.
+ Asm->OutStreamer.AddComment("Length of Common Information Entry");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_frame_common_end", Index),
+ Asm->GetTempSymbol("eh_frame_common_begin", Index),
+ 4);
+
+ // EH frame header.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_begin",Index));
+ Asm->OutStreamer.AddComment("CIE Identifier Tag");
+ Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/);
+ Asm->OutStreamer.AddComment("DW_CIE_VERSION");
+ Asm->OutStreamer.EmitIntValue(dwarf::DW_CIE_VERSION, 1/*size*/, 0/*addr*/);
+
+ // The personality presence indicates that language specific information will
+ // show up in the eh frame. Find out how we are supposed to lower the
+ // personality function reference:
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ unsigned FDEEncoding = TLOF.getFDEEncoding();
+ unsigned PerEncoding = TLOF.getPersonalityEncoding();
+
+ char Augmentation[6] = { 0 };
+ unsigned AugmentationSize = 0;
+ char *APtr = Augmentation + 1;
+
+ if (PersonalityFn) {
+ // There is a personality function.
+ *APtr++ = 'P';
+ AugmentationSize += 1 + Asm->GetSizeOfEncodedValue(PerEncoding);
+ }
+
+ if (UsesLSDA[Index]) {
+ // An LSDA pointer is in the FDE augmentation.
+ *APtr++ = 'L';
+ ++AugmentationSize;
+ }
+
+ if (FDEEncoding != dwarf::DW_EH_PE_absptr) {
+ // A non-default pointer encoding for the FDE.
+ *APtr++ = 'R';
+ ++AugmentationSize;
+ }
+
+ if (APtr != Augmentation + 1)
+ Augmentation[0] = 'z';
+
+ Asm->OutStreamer.AddComment("CIE Augmentation");
+ Asm->OutStreamer.EmitBytes(StringRef(Augmentation, strlen(Augmentation)+1),0);
+
+ // Round out reader.
+ Asm->EmitULEB128(1, "CIE Code Alignment Factor");
+ Asm->EmitSLEB128(stackGrowth, "CIE Data Alignment Factor");
+ Asm->OutStreamer.AddComment("CIE Return Address Column");
+
+ const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo();
+ const TargetFrameLowering *TFI = Asm->TM.getFrameLowering();
+ Asm->EmitInt8(RI->getDwarfRegNum(RI->getRARegister(), true));
+
+ if (Augmentation[0]) {
+ Asm->EmitULEB128(AugmentationSize, "Augmentation Size");
+
+ // If there is a personality, we need to indicate the function's location.
+ if (PersonalityFn) {
+ Asm->EmitEncodingByte(PerEncoding, "Personality");
+ Asm->OutStreamer.AddComment("Personality");
+ Asm->EmitReference(PersonalityFn, PerEncoding);
+ }
+ if (UsesLSDA[Index])
+ Asm->EmitEncodingByte(LSDAEncoding, "LSDA");
+ if (FDEEncoding != dwarf::DW_EH_PE_absptr)
+ Asm->EmitEncodingByte(FDEEncoding, "FDE");
+ }
+
+ // Indicate locations of general callee saved registers in frame.
+ std::vector<MachineMove> Moves;
+ TFI->getInitialFrameState(Moves);
+ Asm->EmitFrameMoves(Moves, 0, true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it must
+ // be 8-byte on 64-bit targets to match what gcc does. Otherwise you get
+ // holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_common_end", Index));
+}
+
+/// EmitFDE - Emit the Frame Description Entry (FDE) for the function.
+void DwarfTableException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
+ assert(!EHFrameInfo.function->hasAvailableExternallyLinkage() &&
+ "Should not emit 'available externally' functions at all");
+
+ const Function *TheFunc = EHFrameInfo.function;
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+
+ unsigned LSDAEncoding = TLOF.getLSDAEncoding();
+ unsigned FDEEncoding = TLOF.getFDEEncoding();
+
+ Asm->OutStreamer.SwitchSection(TLOF.getEHFrameSection());
+
+ // Externally visible entry into the functions eh frame info. If the
+ // corresponding function is static, this should not be externally visible.
+ if (!TheFunc->hasLocalLinkage() && TLOF.isFunctionEHSymbolGlobal())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,MCSA_Global);
+
+ // If corresponding function is weak definition, this should be too.
+ if (TheFunc->isWeakForLinker() && Asm->MAI->getWeakDefDirective())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ MCSA_WeakDefinition);
+
+ // If corresponding function is hidden, this should be too.
+ if (TheFunc->hasHiddenVisibility())
+ if (MCSymbolAttr HiddenAttr = Asm->MAI->getHiddenVisibilityAttr())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ HiddenAttr);
+
+ // If there are no calls then you can't unwind. This may mean we can omit the
+ // EH Frame, but some environments do not handle weak absolute symbols. If
+ // UnwindTablesMandatory is set we cannot do this optimization; the unwind
+ // info is to be available for non-EH uses.
+ if (!EHFrameInfo.adjustsStack && !UnwindTablesMandatory &&
+ (!TheFunc->isWeakForLinker() ||
+ !Asm->MAI->getWeakDefDirective() ||
+ TLOF.getSupportsWeakOmittedEHFrame())) {
+ Asm->OutStreamer.EmitAssignment(EHFrameInfo.FunctionEHSym,
+ MCConstantExpr::Create(0, Asm->OutContext));
+ // This name has no connection to the function, so it might get
+ // dead-stripped when the function is not, erroneously. Prohibit
+ // dead-stripping unconditionally.
+ if (Asm->MAI->hasNoDeadStrip())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ MCSA_NoDeadStrip);
+ } else {
+ Asm->OutStreamer.EmitLabel(EHFrameInfo.FunctionEHSym);
+
+ // EH frame header.
+ Asm->OutStreamer.AddComment("Length of Frame Information Entry");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("eh_frame_end", EHFrameInfo.Number),
+ Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number), 4);
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_begin",
+ EHFrameInfo.Number));
+
+ Asm->OutStreamer.AddComment("FDE CIE offset");
+ Asm->EmitLabelDifference(
+ Asm->GetTempSymbol("eh_frame_begin", EHFrameInfo.Number),
+ Asm->GetTempSymbol("eh_frame_common",
+ EHFrameInfo.PersonalityIndex), 4);
+
+ MCSymbol *EHFuncBeginSym =
+ Asm->GetTempSymbol("eh_func_begin", EHFrameInfo.Number);
+
+ Asm->OutStreamer.AddComment("FDE initial location");
+ Asm->EmitReference(EHFuncBeginSym, FDEEncoding);
+
+ Asm->OutStreamer.AddComment("FDE address range");
+ Asm->EmitLabelDifference(Asm->GetTempSymbol("eh_func_end",
+ EHFrameInfo.Number),
+ EHFuncBeginSym,
+ Asm->GetSizeOfEncodedValue(FDEEncoding));
+
+ // If there is a personality and landing pads then point to the language
+ // specific data area in the exception table.
+ if (MMI->getPersonalities()[0] != NULL) {
+ unsigned Size = Asm->GetSizeOfEncodedValue(LSDAEncoding);
+
+ Asm->EmitULEB128(Size, "Augmentation size");
+ Asm->OutStreamer.AddComment("Language Specific Data Area");
+ if (EHFrameInfo.hasLandingPads)
+ Asm->EmitReference(Asm->GetTempSymbol("exception", EHFrameInfo.Number),
+ LSDAEncoding);
+ else
+ Asm->OutStreamer.EmitIntValue(0, Size/*size*/, 0/*addrspace*/);
+
+ } else {
+ Asm->EmitULEB128(0, "Augmentation size");
+ }
+
+ // Indicate locations of function specific callee saved registers in frame.
+ Asm->EmitFrameMoves(EHFrameInfo.Moves, EHFuncBeginSym, true);
+
+ // On Darwin the linker honors the alignment of eh_frame, which means it
+ // must be 8-byte on 64-bit targets to match what gcc does. Otherwise you
+ // get holes which confuse readers of eh_frame.
+ Asm->EmitAlignment(Asm->getTargetData().getPointerSize() == 4 ? 2 : 3);
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_frame_end",
+ EHFrameInfo.Number));
+
+ // If the function is marked used, this table should be also. We cannot
+ // make the mark unconditional in this case, since retaining the table also
+ // retains the function in this case, and there is code around that depends
+ // on unused functions (calling undefined externals) being dead-stripped to
+ // link correctly. Yes, there really is.
+ if (MMI->isUsedFunction(EHFrameInfo.function))
+ if (Asm->MAI->hasNoDeadStrip())
+ Asm->OutStreamer.EmitSymbolAttribute(EHFrameInfo.FunctionEHSym,
+ MCSA_NoDeadStrip);
+ }
+ Asm->OutStreamer.AddBlankLine();
+}
+
+/// EndModule - Emit all exception information that should come after the
+/// content.
+void DwarfTableException::EndModule() {
+ if (!Asm->MAI->isExceptionHandlingDwarf())
+ return;
+
+ if (!shouldEmitMovesModule && !shouldEmitTableModule)
+ return;
+
+ const std::vector<const Function*> &Personalities = MMI->getPersonalities();
+
+ for (unsigned I = 0, E = Personalities.size(); I < E; ++I)
+ EmitCIE(Personalities[I], I);
+
+ for (std::vector<FunctionEHFrameInfo>::iterator
+ I = EHFrames.begin(), E = EHFrames.end(); I != E; ++I)
+ EmitFDE(*I);
+}
+
+/// BeginFunction - Gather pre-function exception information. Assumes it's
+/// being emitted immediately after the function entry point.
+void DwarfTableException::BeginFunction(const MachineFunction *MF) {
+ shouldEmitTable = shouldEmitMoves = false;
+
+ // If any landing pads survive, we need an EH table.
+ shouldEmitTable = !MMI->getLandingPads().empty();
+
+ // See if we need frame move info.
+ shouldEmitMoves =
+ !Asm->MF->getFunction()->doesNotThrow() || UnwindTablesMandatory;
+
+ if (shouldEmitMoves || shouldEmitTable)
+ // Assumes in correct section after the entry point.
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin",
+ Asm->getFunctionNumber()));
+
+ shouldEmitTableModule |= shouldEmitTable;
+ shouldEmitMovesModule |= shouldEmitMoves;
+}
+
+/// EndFunction - Gather and emit post-function exception information.
+///
+void DwarfTableException::EndFunction() {
+ if (!shouldEmitMoves && !shouldEmitTable) return;
+
+ Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end",
+ Asm->getFunctionNumber()));
+
+ // Record if this personality index uses a landing pad.
+ bool HasLandingPad = !MMI->getLandingPads().empty();
+ UsesLSDA[MMI->getPersonalityIndex()] |= HasLandingPad;
+
+ // Map all labels and get rid of any dead landing pads.
+ MMI->TidyLandingPads();
+
+ if (HasLandingPad)
+ EmitExceptionTable();
+
+ const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering();
+ MCSymbol *FunctionEHSym =
+ Asm->GetSymbolWithGlobalValueBase(Asm->MF->getFunction(), ".eh",
+ TLOF.isFunctionEHFrameSymbolPrivate());
+
+ // Save EH frame information
+ EHFrames.
+ push_back(FunctionEHFrameInfo(FunctionEHSym,
+ Asm->getFunctionNumber(),
+ MMI->getPersonalityIndex(),
+ Asm->MF->getFrameInfo()->adjustsStack(),
+ !MMI->getLandingPads().empty(),
+ MMI->getFrameMoves(),
+ Asm->MF->getFunction()));
+}
diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
index c8a63cf2393b..115381767751 100644
--- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp
@@ -26,6 +26,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
+#include <cctype>
using namespace llvm;
namespace {
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 2ef115dbd205..d7d0e1b3812b 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,15 +1,19 @@
add_llvm_library(LLVMCodeGen
AggressiveAntiDepBreaker.cpp
+ AllocationOrder.cpp
Analysis.cpp
BranchFolding.cpp
CalcSpillWeights.cpp
CallingConvLower.cpp
+ CodeGen.cpp
CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
DwarfEHPrepare.cpp
+ EdgeBundles.cpp
ELFCodeEmitter.cpp
ELFWriter.cpp
+ ExpandISelPseudos.cpp
GCMetadata.cpp
GCMetadataPrinter.cpp
GCStrategy.cpp
@@ -18,10 +22,13 @@ add_llvm_library(LLVMCodeGen
IntrinsicLowering.cpp
LLVMTargetMachine.cpp
LatencyPriorityQueue.cpp
+ LiveDebugVariables.cpp
LiveInterval.cpp
LiveIntervalAnalysis.cpp
+ LiveIntervalUnion.cpp
LiveStackAnalysis.cpp
LiveVariables.cpp
+ LiveRangeEdit.cpp
LocalStackSlotAllocation.cpp
LowerSubregs.cpp
MachineBasicBlock.cpp
@@ -34,6 +41,7 @@ add_llvm_library(LLVMCodeGen
MachineInstr.cpp
MachineLICM.cpp
MachineLoopInfo.cpp
+ MachineLoopRanges.cpp
MachineModuleInfo.cpp
MachineModuleInfoImpls.cpp
MachinePassRegistry.cpp
@@ -45,15 +53,17 @@ add_llvm_library(LLVMCodeGen
OcamlGC.cpp
OptimizePHIs.cpp
PHIElimination.cpp
+ PHIEliminationUtils.cpp
Passes.cpp
PeepholeOptimizer.cpp
- PostRAHazardRecognizer.cpp
PostRASchedulerList.cpp
PreAllocSplitting.cpp
ProcessImplicitDefs.cpp
PrologEpilogInserter.cpp
PseudoSourceValue.cpp
+ RegAllocBasic.cpp
RegAllocFast.cpp
+ RegAllocGreedy.cpp
RegAllocLinearScan.cpp
RegAllocPBQP.cpp
RegisterCoalescer.cpp
@@ -63,12 +73,14 @@ add_llvm_library(LLVMCodeGen
ScheduleDAGEmit.cpp
ScheduleDAGInstrs.cpp
ScheduleDAGPrinter.cpp
+ ScoreboardHazardRecognizer.cpp
ShadowStackGC.cpp
ShrinkWrapping.cpp
SimpleRegisterCoalescing.cpp
SjLjEHPrepare.cpp
SlotIndexes.cpp
Spiller.cpp
+ SpillPlacement.cpp
SplitKit.cpp
Splitter.cpp
StackProtector.cpp
@@ -83,4 +95,5 @@ add_llvm_library(LLVMCodeGen
VirtRegRewriter.cpp
)
-target_link_libraries (LLVMCodeGen LLVMCore LLVMScalarOpts)
+add_subdirectory(SelectionDAG)
+add_subdirectory(AsmPrinter)
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 1b7e08a8b6bb..76bb3d148b0b 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -25,8 +25,12 @@
using namespace llvm;
char CalculateSpillWeights::ID = 0;
-INITIALIZE_PASS(CalculateSpillWeights, "calcspillweights",
- "Calculate spill weights", false, false);
+INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights",
+ "Calculate spill weights", false, false)
void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
au.addRequired<LiveIntervals>();
@@ -170,8 +174,7 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) {
totalWeight *= 0.5F;
}
- li.weight = totalWeight;
- lis_.normalizeSpillWeight(li);
+ li.weight = normalizeSpillWeight(totalWeight, li.getSize());
}
void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
@@ -218,7 +221,7 @@ void VirtRegAuxInfo::CalculateRegClass(unsigned reg) {
if (rc == orc)
return;
- DEBUG(dbgs() << "Inflating " << orc->getName() << ":%reg" << reg << " to "
- << rc->getName() <<".\n");
+ DEBUG(dbgs() << "Inflating " << orc->getName() << ':' << PrintReg(reg)
+ << " to " << rc->getName() <<".\n");
mri.setRegClass(reg, rc);
}
diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp
index 62ad8171a9d4..2ad80b4d3a75 100644
--- a/lib/CodeGen/CallingConvLower.cpp
+++ b/lib/CodeGen/CallingConvLower.cpp
@@ -34,8 +34,8 @@ CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm,
// HandleByVal - Allocate a stack slot large enough to pass an argument by
// value. The size and alignment information of the argument is encoded in its
// parameter attribute.
-void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
- EVT LocVT, CCValAssign::LocInfo LocInfo,
+void CCState::HandleByVal(unsigned ValNo, MVT ValVT,
+ MVT LocVT, CCValAssign::LocInfo LocInfo,
int MinSize, int MinAlign,
ISD::ArgFlagsTy ArgFlags) {
unsigned Align = ArgFlags.getByValAlign();
@@ -51,11 +51,9 @@ void CCState::HandleByVal(unsigned ValNo, EVT ValVT,
/// MarkAllocated - Mark a register and all of its aliases as allocated.
void CCState::MarkAllocated(unsigned Reg) {
- UsedRegs[Reg/32] |= 1 << (Reg&31);
-
- if (const unsigned *RegAliases = TRI.getAliasSet(Reg))
- for (; (Reg = *RegAliases); ++RegAliases)
- UsedRegs[Reg/32] |= 1 << (Reg&31);
+ for (const unsigned *Alias = TRI.getOverlaps(Reg);
+ unsigned Reg = *Alias; ++Alias)
+ UsedRegs[Reg/32] |= 1 << (Reg&31);
}
/// AnalyzeFormalArguments - Analyze an array of argument values,
@@ -66,12 +64,12 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
unsigned NumArgs = Ins.size();
for (unsigned i = 0; i != NumArgs; ++i) {
- EVT ArgVT = Ins[i].VT;
+ MVT ArgVT = Ins[i].VT;
ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Formal argument #" << i << " has unhandled type "
- << ArgVT.getEVTString();
+ << EVT(ArgVT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -84,7 +82,7 @@ bool CCState::CheckReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
// Determine which register each value should be copied into.
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].VT;
+ MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
return false;
@@ -98,12 +96,12 @@ void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
// Determine which register each value should be copied into.
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
- EVT VT = Outs[i].VT;
+ MVT VT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Return operand #" << i << " has unhandled type "
- << VT.getEVTString();
+ << EVT(VT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -116,12 +114,12 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
CCAssignFn Fn) {
unsigned NumOps = Outs.size();
for (unsigned i = 0; i != NumOps; ++i) {
- EVT ArgVT = Outs[i].VT;
+ MVT ArgVT = Outs[i].VT;
ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << ArgVT.getEVTString();
+ << EVT(ArgVT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -130,17 +128,17 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
/// AnalyzeCallOperands - Same as above except it takes vectors of types
/// and argument flags.
-void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
+void CCState::AnalyzeCallOperands(SmallVectorImpl<MVT> &ArgVTs,
SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
CCAssignFn Fn) {
unsigned NumOps = ArgVTs.size();
for (unsigned i = 0; i != NumOps; ++i) {
- EVT ArgVT = ArgVTs[i];
+ MVT ArgVT = ArgVTs[i];
ISD::ArgFlagsTy ArgFlags = Flags[i];
if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) {
#ifndef NDEBUG
dbgs() << "Call operand #" << i << " has unhandled type "
- << ArgVT.getEVTString();
+ << EVT(ArgVT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -152,12 +150,12 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl<EVT> &ArgVTs,
void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
CCAssignFn Fn) {
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
- EVT VT = Ins[i].VT;
+ MVT VT = Ins[i].VT;
ISD::ArgFlagsTy Flags = Ins[i].Flags;
if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) {
#ifndef NDEBUG
dbgs() << "Call result #" << i << " has unhandled type "
- << VT.getEVTString();
+ << EVT(VT).getEVTString();
#endif
llvm_unreachable(0);
}
@@ -166,11 +164,11 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl<ISD::InputArg> &Ins,
/// AnalyzeCallResult - Same as above except it's specialized for calls which
/// produce a single value.
-void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) {
+void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) {
if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) {
#ifndef NDEBUG
dbgs() << "Call result has unhandled type "
- << VT.getEVTString();
+ << EVT(VT).getEVTString();
#endif
llvm_unreachable(0);
}
diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp
new file mode 100644
index 000000000000..515e6f9fde87
--- /dev/null
+++ b/lib/CodeGen/CodeGen.cpp
@@ -0,0 +1,61 @@
+//===-- CodeGen.cpp -------------------------------------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the common initialization routines for the
+// CodeGen library.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/InitializePasses.h"
+#include "llvm-c/Initialization.h"
+
+using namespace llvm;
+
+/// initializeCodeGen - Initialize all passes linked into the CodeGen library.
+void llvm::initializeCodeGen(PassRegistry &Registry) {
+ initializeCalculateSpillWeightsPass(Registry);
+ initializeDeadMachineInstructionElimPass(Registry);
+ initializeGCModuleInfoPass(Registry);
+ initializeIfConverterPass(Registry);
+ initializeLiveDebugVariablesPass(Registry);
+ initializeLiveIntervalsPass(Registry);
+ initializeLiveStacksPass(Registry);
+ initializeLiveVariablesPass(Registry);
+ initializeMachineCSEPass(Registry);
+ initializeMachineDominatorTreePass(Registry);
+ initializeMachineLICMPass(Registry);
+ initializeMachineLoopInfoPass(Registry);
+ initializeMachineModuleInfoPass(Registry);
+ initializeMachineSinkingPass(Registry);
+ initializeMachineVerifierPassPass(Registry);
+ initializeOptimizePHIsPass(Registry);
+ initializePHIEliminationPass(Registry);
+ initializePeepholeOptimizerPass(Registry);
+ initializePreAllocSplittingPass(Registry);
+ initializeProcessImplicitDefsPass(Registry);
+ initializePEIPass(Registry);
+ initializeRALinScanPass(Registry);
+ initializeRegisterCoalescerAnalysisGroup(Registry);
+ initializeRenderMachineFunctionPass(Registry);
+ initializeSimpleRegisterCoalescingPass(Registry);
+ initializeSlotIndexesPass(Registry);
+ initializeLoopSplitterPass(Registry);
+ initializeStackProtectorPass(Registry);
+ initializeStackSlotColoringPass(Registry);
+ initializeStrongPHIEliminationPass(Registry);
+ initializeTwoAddressInstructionPassPass(Registry);
+ initializeUnreachableBlockElimPass(Registry);
+ initializeUnreachableMachineBlockElimPass(Registry);
+ initializeVirtRegMapPass(Registry);
+ initializeLowerIntrinsicsPass(Registry);
+}
+
+void LLVMInitializeCodeGen(LLVMPassRegistryRef R) {
+ initializeCodeGen(*unwrap(R));
+}
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 335d2d8e9bac..f79598de1d9e 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -130,21 +130,25 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
return;
assert(Count < InsertPosIndex && "Instruction index out of expected range!");
- // Any register which was defined within the previous scheduling region
- // may have been rescheduled and its lifetime may overlap with registers
- // in ways not reflected in our current liveness state. For each such
- // register, adjust the liveness state to be conservatively correct.
- for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
- if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
- assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
-
- // Mark this register to be non-renamable.
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
+ if (KillIndices[Reg] != ~0u) {
+ // If Reg is currently live, then mark that it can't be renamed as
+ // we don't know the extent of its live-range anymore (now that it
+ // has been scheduled).
+ Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
+ KillIndices[Reg] = Count;
+ } else if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
+ // Any register which was defined within the previous scheduling region
+ // may have been rescheduled and its lifetime may overlap with registers
+ // in ways not reflected in our current liveness state. For each such
+ // register, adjust the liveness state to be conservatively correct.
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
// Move the def index to the end of the previous region, to reflect
// that the def could theoretically have been scheduled at the end.
DefIndices[Reg] = InsertPosIndex;
}
+ }
PrescanInstruction(MI);
ScanInstruction(MI, Count);
@@ -177,7 +181,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) {
// that have special allocation requirements. Also assume all registers
// used in a call must not be changed (ABI).
// FIXME: The issue with predicated instruction is more complex. We are being
- // conservatively here because the kill markers cannot be trusted after
+ // conservative here because the kill markers cannot be trusted after
// if-conversion:
// %R6<def> = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14]
// ...
@@ -321,8 +325,62 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI,
}
}
+// Check all machine operands that reference the antidependent register and must
+// be replaced by NewReg. Return true if any of their parent instructions may
+// clobber the new register.
+//
+// Note: AntiDepReg may be referenced by a two-address instruction such that
+// it's use operand is tied to a def operand. We guard against the case in which
+// the two-address instruction also defines NewReg, as may happen with
+// pre/postincrement loads. In this case, both the use and def operands are in
+// RegRefs because the def is inserted by PrescanInstruction and not erased
+// during ScanInstruction. So checking for an instructions with definitions of
+// both NewReg and AntiDepReg covers it.
+bool
+CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg)
+{
+ for (RegRefIter I = RegRefBegin; I != RegRefEnd; ++I ) {
+ MachineOperand *RefOper = I->second;
+
+ // Don't allow the instruction defining AntiDepReg to earlyclobber its
+ // operands, in case they may be assigned to NewReg. In this case antidep
+ // breaking must fail, but it's too rare to bother optimizing.
+ if (RefOper->isDef() && RefOper->isEarlyClobber())
+ return true;
+
+ // Handle cases in which this instructions defines NewReg.
+ MachineInstr *MI = RefOper->getParent();
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &CheckOper = MI->getOperand(i);
+
+ if (!CheckOper.isReg() || !CheckOper.isDef() ||
+ CheckOper.getReg() != NewReg)
+ continue;
+
+ // Don't allow the instruction to define NewReg and AntiDepReg.
+ // When AntiDepReg is renamed it will be an illegal op.
+ if (RefOper->isDef())
+ return true;
+
+ // Don't allow an instruction using AntiDepReg to be earlyclobbered by
+ // NewReg
+ if (CheckOper.isEarlyClobber())
+ return true;
+
+ // Don't allow inline asm to define NewReg at all. Who know what it's
+ // doing with it.
+ if (MI->isInlineAsm())
+ return true;
+ }
+ }
+ return false;
+}
+
unsigned
-CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
+CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
unsigned AntiDepReg,
unsigned LastNewReg,
const TargetRegisterClass *RC)
@@ -338,10 +396,10 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(MachineInstr *MI,
// an anti-dependence with this AntiDepReg, because that would
// re-introduce that anti-dependence.
if (NewReg == LastNewReg) continue;
- // If the instruction already has a def of the NewReg, it's not suitable.
- // For example, Instruction with multiple definitions can result in this
- // condition.
- if (MI->modifiesRegister(NewReg, TRI)) continue;
+ // If any instructions that define AntiDepReg also define the NewReg, it's
+ // not suitable. For example, Instruction with multiple definitions can
+ // result in this condition.
+ if (isNewRegClobberedByRefs(RegRefBegin, RegRefEnd, NewReg)) continue;
// If NewReg is dead and NewReg's most recent def is not before
// AntiDepReg's kill, it's safe to replace AntiDepReg with NewReg.
assert(((KillIndices[AntiDepReg] == ~0u) != (DefIndices[AntiDepReg] == ~0u))
@@ -548,7 +606,11 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// TODO: Instead of picking the first free register, consider which might
// be the best.
if (AntiDepReg != 0) {
- if (unsigned NewReg = findSuitableFreeRegister(MI, AntiDepReg,
+ std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
+ std::multimap<unsigned, MachineOperand *>::iterator>
+ Range = RegRefs.equal_range(AntiDepReg);
+ if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second,
+ AntiDepReg,
LastNewReg[AntiDepReg],
RC)) {
DEBUG(dbgs() << "Breaking anti-dependence edge on "
@@ -558,9 +620,6 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
// Update the references to the old register to refer to the new
// register.
- std::pair<std::multimap<unsigned, MachineOperand *>::iterator,
- std::multimap<unsigned, MachineOperand *>::iterator>
- Range = RegRefs.equal_range(AntiDepReg);
for (std::multimap<unsigned, MachineOperand *>::iterator
Q = Range.first, QE = Range.second; Q != QE; ++Q) {
Q->second->setReg(NewReg);
@@ -580,7 +639,7 @@ BreakAntiDependencies(const std::vector<SUnit>& SUnits,
}
// We just went back in time and modified history; the
- // liveness information for the anti-depenence reg is now
+ // liveness information for the anti-dependence reg is now
// inconsistent. Set the state as if it were dead.
Classes[NewReg] = Classes[AntiDepReg];
DefIndices[NewReg] = DefIndices[AntiDepReg];
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h
index 0ed7c35b0f0c..0daaef273448 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.h
+++ b/lib/CodeGen/CriticalAntiDepBreaker.h
@@ -48,8 +48,10 @@ class TargetRegisterInfo;
/// pointer.
std::vector<const TargetRegisterClass*> Classes;
- /// RegRegs - Map registers to all their references within a live range.
+ /// RegRefs - Map registers to all their references within a live range.
std::multimap<unsigned, MachineOperand *> RegRefs;
+ typedef std::multimap<unsigned, MachineOperand *>::const_iterator
+ RegRefIter;
/// KillIndices - The index of the most recent kill (proceding bottom-up),
/// or ~0u if the register is not live.
@@ -90,10 +92,14 @@ class TargetRegisterInfo;
private:
void PrescanInstruction(MachineInstr *MI);
void ScanInstruction(MachineInstr *MI, unsigned Count);
- unsigned findSuitableFreeRegister(MachineInstr *MI,
+ bool isNewRegClobberedByRefs(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
+ unsigned NewReg);
+ unsigned findSuitableFreeRegister(RegRefIter RegRefBegin,
+ RegRefIter RegRefEnd,
unsigned AntiDepReg,
unsigned LastNewReg,
- const TargetRegisterClass *);
+ const TargetRegisterClass *RC);
};
}
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index 318d922adebf..fdc1d9142140 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -36,7 +36,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- DeadMachineInstructionElim() : MachineFunctionPass(ID) {}
+ DeadMachineInstructionElim() : MachineFunctionPass(ID) {
+ initializeDeadMachineInstructionElimPass(*PassRegistry::getPassRegistry());
+ }
private:
bool isDead(const MachineInstr *MI) const;
@@ -45,13 +47,19 @@ namespace {
char DeadMachineInstructionElim::ID = 0;
INITIALIZE_PASS(DeadMachineInstructionElim, "dead-mi-elimination",
- "Remove dead machine instructions", false, false);
+ "Remove dead machine instructions", false, false)
FunctionPass *llvm::createDeadMachineInstructionElimPass() {
return new DeadMachineInstructionElim();
}
bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI->isInlineAsm())
+ return false;
+
// Don't delete instructions with side effects.
bool SawStore = false;
if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
@@ -151,7 +159,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef()) {
unsigned Reg = MO.getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
LivePhysRegs.reset(Reg);
// Check the subreg set, not the alias set, because a def
// of a super-register may still be partially live after
@@ -168,7 +176,7 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) {
const MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isUse()) {
unsigned Reg = MO.getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
LivePhysRegs.set(Reg);
for (const unsigned *AliasSet = TRI->getAliasSet(Reg);
*AliasSet; ++AliasSet)
diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp
index 550fd3e25fb7..0ebb5b0db70e 100644
--- a/lib/CodeGen/DwarfEHPrepare.cpp
+++ b/lib/CodeGen/DwarfEHPrepare.cpp
@@ -43,7 +43,7 @@ namespace {
// The eh.selector intrinsic.
Function *SelectorIntrinsic;
- // _Unwind_Resume_or_Rethrow call.
+ // _Unwind_Resume_or_Rethrow or _Unwind_SjLj_Resume call.
Constant *URoR;
// The EH language-specific catch-all type.
@@ -82,11 +82,11 @@ namespace {
/// FindAllURoRInvokes - Find all URoR invokes in the function.
void FindAllURoRInvokes(SmallPtrSet<InvokeInst*, 32> &URoRInvokes);
- /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow"
- /// calls. The "unwind" part of these invokes jump to a landing pad within
- /// the current function. This is a candidate to merge the selector
- /// associated with the URoR invoke with the one from the URoR's landing
- /// pad.
+ /// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+ /// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to
+ /// a landing pad within the current function. This is a candidate to merge
+ /// the selector associated with the URoR invoke with the one from the
+ /// URoR's landing pad.
bool HandleURoRInvokes();
/// FindSelectorAndURoR - Find the eh.selector call and URoR call associated
@@ -100,7 +100,9 @@ namespace {
DwarfEHPrepare(const TargetMachine *tm) :
FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()),
ExceptionValueIntrinsic(0), SelectorIntrinsic(0),
- URoR(0), EHCatchAllValue(0), RewindFunction(0) {}
+ URoR(0), EHCatchAllValue(0), RewindFunction(0) {
+ initializeDominatorTreePass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnFunction(Function &Fn);
@@ -224,10 +226,11 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke,
return Changed;
}
-/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" calls. The
-/// "unwind" part of these invokes jump to a landing pad within the current
-/// function. This is a candidate to merge the selector associated with the URoR
-/// invoke with the one from the URoR's landing pad.
+/// HandleURoRInvokes - Handle invokes of "_Unwind_Resume_or_Rethrow" or
+/// "_Unwind_SjLj_Resume" calls. The "unwind" part of these invokes jump to a
+/// landing pad within the current function. This is a candidate to merge the
+/// selector associated with the URoR invoke with the one from the URoR's
+/// landing pad.
bool DwarfEHPrepare::HandleURoRInvokes() {
if (!EHCatchAllValue) {
EHCatchAllValue =
@@ -247,7 +250,10 @@ bool DwarfEHPrepare::HandleURoRInvokes() {
if (!URoR) {
URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow");
- if (!URoR) return CleanupSelectors(CatchAllSels);
+ if (!URoR) {
+ URoR = F->getParent()->getFunction("_Unwind_SjLj_Resume");
+ if (!URoR) return CleanupSelectors(CatchAllSels);
+ }
}
SmallPtrSet<InvokeInst*, 32> URoRInvokes;
diff --git a/lib/CodeGen/ELF.h b/lib/CodeGen/ELF.h
index fb884c9e8b71..e08feeb27539 100644
--- a/lib/CodeGen/ELF.h
+++ b/lib/CodeGen/ELF.h
@@ -23,7 +23,7 @@
#include "llvm/CodeGen/BinaryObject.h"
#include "llvm/CodeGen/MachineRelocation.h"
#include "llvm/Support/ELF.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
class GlobalValue;
diff --git a/lib/CodeGen/ELFWriter.cpp b/lib/CodeGen/ELFWriter.cpp
index d14728d8a36c..0fd1e8e83bd7 100644
--- a/lib/CodeGen/ELFWriter.cpp
+++ b/lib/CodeGen/ELFWriter.cpp
@@ -45,6 +45,7 @@
#include "llvm/MC/MCSectionELF.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/Mangler.h"
+#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetELFWriterInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -64,7 +65,7 @@ char ELFWriter::ID = 0;
ELFWriter::ELFWriter(raw_ostream &o, TargetMachine &tm)
: MachineFunctionPass(ID), O(o), TM(tm),
- OutContext(*new MCContext(*TM.getMCAsmInfo())),
+ OutContext(*new MCContext(*TM.getMCAsmInfo(), new TargetAsmInfo(tm))),
TLOF(TM.getTargetLowering()->getObjFileLowering()),
is64Bit(TM.getTargetData()->getPointerSizeInBits() == 64),
isLittleEndian(TM.getTargetData()->isLittleEndian()),
@@ -327,6 +328,18 @@ void ELFWriter::AddToSymbolList(ELFSym *GblSym) {
}
}
+/// HasCommonSymbols - True if this section holds common symbols, this is
+/// indicated on the ELF object file by a symbol with SHN_COMMON section
+/// header index.
+static bool HasCommonSymbols(const MCSectionELF &S) {
+ // FIXME: this is wrong, a common symbol can be in .data for example.
+ if (StringRef(S.getSectionName()).startswith(".gnu.linkonce."))
+ return true;
+
+ return false;
+}
+
+
// EmitGlobal - Choose the right section for global and emit it
void ELFWriter::EmitGlobal(const GlobalValue *GV) {
@@ -363,7 +376,7 @@ void ELFWriter::EmitGlobal(const GlobalValue *GV) {
unsigned Size = TD->getTypeAllocSize(GVar->getInitializer()->getType());
GblSym->Size = Size;
- if (S->HasCommonSymbols()) { // Symbol must go to a common section
+ if (HasCommonSymbols(*S)) { // Symbol must go to a common section
GblSym->SectionIdx = ELF::SHN_COMMON;
// A new linkonce section is created for each global in the
diff --git a/lib/CodeGen/EdgeBundles.cpp b/lib/CodeGen/EdgeBundles.cpp
new file mode 100644
index 000000000000..aed8bc947991
--- /dev/null
+++ b/lib/CodeGen/EdgeBundles.cpp
@@ -0,0 +1,86 @@
+//===-------- EdgeBundles.cpp - Bundles of CFG edges ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the EdgeBundles analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/GraphWriter.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+ViewEdgeBundles("view-edge-bundles", cl::Hidden,
+ cl::desc("Pop up a window to show edge bundle graphs"));
+
+char EdgeBundles::ID = 0;
+
+INITIALIZE_PASS(EdgeBundles, "edge-bundles", "Bundle Machine CFG Edges",
+ /* cfg = */true, /* analysis = */ true)
+
+char &llvm::EdgeBundlesID = EdgeBundles::ID;
+
+void EdgeBundles::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+bool EdgeBundles::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ EC.clear();
+ EC.grow(2 * MF->size());
+
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E;
+ ++I) {
+ const MachineBasicBlock &MBB = *I;
+ unsigned OutE = 2 * MBB.getNumber() + 1;
+ // Join the outgoing bundle with the ingoing bundles of all successors.
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI)
+ EC.join(OutE, 2 * (*SI)->getNumber());
+ }
+ EC.compress();
+ if (ViewEdgeBundles)
+ view();
+ return false;
+}
+
+/// view - Visualize the annotated bipartite CFG with Graphviz.
+void EdgeBundles::view() const {
+ ViewGraph(*this, "EdgeBundles");
+}
+
+/// Specialize WriteGraph, the standard implementation won't work.
+raw_ostream &llvm::WriteGraph(raw_ostream &O, const EdgeBundles &G,
+ bool ShortNames,
+ const std::string &Title) {
+ const MachineFunction *MF = G.getMachineFunction();
+
+ O << "digraph {\n";
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ unsigned BB = I->getNumber();
+ O << "\t\"BB#" << BB << "\" [ shape=box ]\n"
+ << '\t' << G.getBundle(BB, false) << " -> \"BB#" << BB << "\"\n"
+ << "\t\"BB#" << BB << "\" -> " << G.getBundle(BB, true) << '\n';
+ for (MachineBasicBlock::const_succ_iterator SI = I->succ_begin(),
+ SE = I->succ_end(); SI != SE; ++SI)
+ O << "\t\"BB#" << BB << "\" -> \"BB#" << (*SI)->getNumber()
+ << "\" [ color=lightgray ]\n";
+ }
+ O << "}\n";
+ return O;
+}
+
+
diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp
new file mode 100644
index 000000000000..b5ec303f5d93
--- /dev/null
+++ b/lib/CodeGen/ExpandISelPseudos.cpp
@@ -0,0 +1,82 @@
+//===-- llvm/CodeGen/ExpandISelPseudos.cpp ----------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Expand Psuedo-instructions produced by ISel. These are usually to allow
+// the expansion to contain control flow, such as a conditional move
+// implemented with a conditional branch and a phi, or an atomic operation
+// implemented with a loop.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "expand-isel-pseudos"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+using namespace llvm;
+
+namespace {
+ class ExpandISelPseudos : public MachineFunctionPass {
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ ExpandISelPseudos() : MachineFunctionPass(ID) {}
+
+ private:
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ const char *getPassName() const {
+ return "Expand ISel Pseudo-instructions";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+} // end anonymous namespace
+
+char ExpandISelPseudos::ID = 0;
+INITIALIZE_PASS(ExpandISelPseudos, "expand-isel-pseudos",
+ "Expand CodeGen Pseudo-instructions", false, false)
+
+FunctionPass *llvm::createExpandISelPseudosPass() {
+ return new ExpandISelPseudos();
+}
+
+bool ExpandISelPseudos::runOnMachineFunction(MachineFunction &MF) {
+ bool Changed = false;
+ const TargetLowering *TLI = MF.getTarget().getTargetLowering();
+
+ // Iterate through each instruction in the function, looking for pseudos.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE; ) {
+ MachineInstr *MI = MBBI++;
+
+ // If MI is a pseudo, expand it.
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (TID.usesCustomInsertionHook()) {
+ Changed = true;
+ MachineBasicBlock *NewMBB =
+ TLI->EmitInstrWithCustomInserter(MI, MBB);
+ // The expansion may involve new basic blocks.
+ if (NewMBB != MBB) {
+ MBB = NewMBB;
+ I = NewMBB;
+ MBBI = NewMBB->begin();
+ MBBE = NewMBB->end();
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp
index 0f6e882a7be4..d757cf409d50 100644
--- a/lib/CodeGen/GCMetadata.cpp
+++ b/lib/CodeGen/GCMetadata.cpp
@@ -30,7 +30,6 @@ namespace {
raw_ostream &OS;
public:
- Printer() : FunctionPass(ID), OS(errs()) {}
explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {}
@@ -56,7 +55,7 @@ namespace {
}
INITIALIZE_PASS(GCModuleInfo, "collector-metadata",
- "Create Garbage Collector Module Metadata", false, false);
+ "Create Garbage Collector Module Metadata", false, false)
// -----------------------------------------------------------------------------
@@ -70,7 +69,9 @@ GCFunctionInfo::~GCFunctionInfo() {}
char GCModuleInfo::ID = 0;
GCModuleInfo::GCModuleInfo()
- : ImmutablePass(ID) {}
+ : ImmutablePass(ID) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+}
GCModuleInfo::~GCModuleInfo() {
clear();
diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp
index 719fa194d8da..766c6ee542a9 100644
--- a/lib/CodeGen/GCStrategy.cpp
+++ b/lib/CodeGen/GCStrategy.cpp
@@ -19,11 +19,12 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/Module.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -123,6 +124,11 @@ GCFunctionInfo *GCStrategy::insertFunctionInfo(const Function &F) {
// -----------------------------------------------------------------------------
+INITIALIZE_PASS_BEGIN(LowerIntrinsics, "gc-lowering", "GC Lowering",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(GCModuleInfo)
+INITIALIZE_PASS_END(LowerIntrinsics, "gc-lowering", "GC Lowering", false, false)
+
FunctionPass *llvm::createGCLoweringPass() {
return new LowerIntrinsics();
}
@@ -130,7 +136,9 @@ FunctionPass *llvm::createGCLoweringPass() {
char LowerIntrinsics::ID = 0;
LowerIntrinsics::LowerIntrinsics()
- : FunctionPass(ID) {}
+ : FunctionPass(ID) {
+ initializeLowerIntrinsicsPass(*PassRegistry::getPassRegistry());
+ }
const char *LowerIntrinsics::getPassName() const {
return "Lower Garbage Collection Instructions";
@@ -139,6 +147,7 @@ const char *LowerIntrinsics::getPassName() const {
void LowerIntrinsics::getAnalysisUsage(AnalysisUsage &AU) const {
FunctionPass::getAnalysisUsage(AU);
AU.addRequired<GCModuleInfo>();
+ AU.addPreserved<DominatorTree>();
}
/// doInitialization - If this module uses the GC intrinsics, find them now.
@@ -249,9 +258,16 @@ bool LowerIntrinsics::runOnFunction(Function &F) {
if (NeedsDefaultLoweringPass(S))
MadeChange |= PerformDefaultLowering(F, S);
- if (NeedsCustomLoweringPass(S))
+ bool UseCustomLoweringPass = NeedsCustomLoweringPass(S);
+ if (UseCustomLoweringPass)
MadeChange |= S.performCustomLowering(F);
-
+
+ // Custom lowering may modify the CFG, so dominators must be recomputed.
+ if (UseCustomLoweringPass) {
+ if (DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>())
+ DT->DT->recalculate(F);
+ }
+
return MadeChange;
}
@@ -345,13 +361,15 @@ void MachineCodeAnalysis::VisitCallPoint(MachineBasicBlock::iterator CI) {
MachineBasicBlock::iterator RAI = CI;
++RAI;
- if (FI->getStrategy().needsSafePoint(GC::PreCall))
- FI->addSafePoint(GC::PreCall, InsertLabel(*CI->getParent(), CI,
- CI->getDebugLoc()));
+ if (FI->getStrategy().needsSafePoint(GC::PreCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), CI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PreCall, Label, CI->getDebugLoc());
+ }
- if (FI->getStrategy().needsSafePoint(GC::PostCall))
- FI->addSafePoint(GC::PostCall, InsertLabel(*CI->getParent(), RAI,
- CI->getDebugLoc()));
+ if (FI->getStrategy().needsSafePoint(GC::PostCall)) {
+ MCSymbol* Label = InsertLabel(*CI->getParent(), RAI, CI->getDebugLoc());
+ FI->addSafePoint(GC::PostCall, Label, CI->getDebugLoc());
+ }
}
void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
@@ -364,12 +382,12 @@ void MachineCodeAnalysis::FindSafePoints(MachineFunction &MF) {
}
void MachineCodeAnalysis::FindStackOffsets(MachineFunction &MF) {
- const TargetRegisterInfo *TRI = TM->getRegisterInfo();
- assert(TRI && "TargetRegisterInfo not available!");
+ const TargetFrameLowering *TFI = TM->getFrameLowering();
+ assert(TFI && "TargetRegisterInfo not available!");
for (GCFunctionInfo::roots_iterator RI = FI->roots_begin(),
RE = FI->roots_end(); RI != RE; ++RI)
- RI->StackOffset = TRI->getFrameIndexOffset(MF, RI->Num);
+ RI->StackOffset = TFI->getFrameIndexOffset(MF, RI->Num);
}
bool MachineCodeAnalysis::runOnMachineFunction(MachineFunction &MF) {
diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp
index 0ea30d7a7929..db53b0473a9a 100644
--- a/lib/CodeGen/IfConversion.cpp
+++ b/lib/CodeGen/IfConversion.cpp
@@ -17,7 +17,9 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -26,6 +28,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
using namespace llvm;
@@ -91,6 +94,8 @@ namespace {
/// ClobbersPred - True if BB could modify predicates (e.g. has
/// cmp, call, etc.)
/// NonPredSize - Number of non-predicated instructions.
+ /// ExtraCost - Extra cost for multi-cycle instructions.
+ /// ExtraCost2 - Some instructions are slower when predicated
/// BB - Corresponding MachineBasicBlock.
/// TrueBB / FalseBB- See AnalyzeBranch().
/// BrCond - Conditions for end of block conditional branches.
@@ -106,6 +111,8 @@ namespace {
bool CannotBeCopied : 1;
bool ClobbersPred : 1;
unsigned NonPredSize;
+ unsigned ExtraCost;
+ unsigned ExtraCost2;
MachineBasicBlock *BB;
MachineBasicBlock *TrueBB;
MachineBasicBlock *FalseBB;
@@ -115,7 +122,7 @@ namespace {
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
HasFallThrough(false), IsUnpredicable(false),
CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
- BB(0), TrueBB(0), FalseBB(0) {}
+ ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
};
/// IfcvtToken - Record information about pending if-conversions to attempt:
@@ -150,20 +157,31 @@ namespace {
const TargetLowering *TLI;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
+ const InstrItineraryData *InstrItins;
+ const MachineLoopInfo *MLI;
bool MadeChange;
int FnNum;
public:
static char ID;
- IfConverter() : MachineFunctionPass(ID), FnNum(-1) {}
+ IfConverter() : MachineFunctionPass(ID), FnNum(-1) {
+ initializeIfConverterPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "If Converter"; }
private:
bool ReverseBranchCondition(BBInfo &BBI);
- bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const;
+ bool ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ float Prediction, float Confidence) const;
bool ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
- bool FalseBranch, unsigned &Dups) const;
+ bool FalseBranch, unsigned &Dups,
+ float Prediction, float Confidence) const;
bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
unsigned &Dups1, unsigned &Dups2) const;
void ScanInstructions(BBInfo &BBI);
@@ -188,14 +206,21 @@ namespace {
bool IgnoreBr = false);
void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
- bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const {
- return Size > 0 && TII->isProfitableToIfCvt(BB, Size);
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
+ unsigned Cycle, unsigned Extra,
+ float Prediction, float Confidence) const {
+ return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
+ Prediction, Confidence);
}
- bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
- MachineBasicBlock &FBB, unsigned FSize) const {
- return TSize > 0 && FSize > 0 &&
- TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize);
+ bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
+ unsigned TCycle, unsigned TExtra,
+ MachineBasicBlock &FBB,
+ unsigned FCycle, unsigned FExtra,
+ float Prediction, float Confidence) const {
+ return TCycle > 0 && FCycle > 0 &&
+ TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
+ Prediction, Confidence);
}
// blockAlwaysFallThrough - Block ends without a terminator.
@@ -230,7 +255,9 @@ namespace {
char IfConverter::ID = 0;
}
-INITIALIZE_PASS(IfConverter, "if-converter", "If Converter", false, false);
+INITIALIZE_PASS_BEGIN(IfConverter, "if-converter", "If Converter", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(IfConverter, "if-converter", "If Converter", false, false)
FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); }
@@ -238,6 +265,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
TLI = MF.getTarget().getTargetLowering();
TII = MF.getTarget().getInstrInfo();
TRI = MF.getTarget().getRegisterInfo();
+ MLI = &getAnalysis<MachineLoopInfo>();
+ InstrItins = MF.getTarget().getInstrItineraryData();
if (!TII) return false;
// Tail merge tend to expose more if-conversion opportunities.
@@ -431,7 +460,8 @@ static inline MachineBasicBlock *getNextBlock(MachineBasicBlock *BB) {
/// predecessor) forms a valid simple shape for ifcvt. It also returns the
/// number of instructions that the ifcvt would need to duplicate if performed
/// in Dups.
-bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
+bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups,
+ float Prediction, float Confidence) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -441,7 +471,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
if (TrueBBI.BB->pred_size() > 1) {
if (TrueBBI.CannotBeCopied ||
- !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize))
+ !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize,
+ Prediction, Confidence))
return false;
Dups = TrueBBI.NonPredSize;
}
@@ -456,7 +487,8 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const {
/// returns the number of instructions that the ifcvt would need to duplicate
/// if performed in 'Dups'.
bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
- bool FalseBranch, unsigned &Dups) const {
+ bool FalseBranch, unsigned &Dups,
+ float Prediction, float Confidence) const {
Dups = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone)
return false;
@@ -478,7 +510,8 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
++Size;
}
}
- if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size))
+ if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size,
+ Prediction, Confidence))
return false;
Dups = Size;
}
@@ -493,18 +526,6 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI,
return TExit && TExit == FalseBBI.BB;
}
-static
-MachineBasicBlock::iterator firstNonBranchInst(MachineBasicBlock *BB,
- const TargetInstrInfo *TII) {
- MachineBasicBlock::iterator I = BB->end();
- while (I != BB->begin()) {
- --I;
- if (!I->getDesc().isBranch())
- break;
- }
- return I;
-}
-
/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
/// with their common predecessor) forms a valid diamond shape for ifcvt.
bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
@@ -533,64 +554,70 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
(TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
return false;
- MachineBasicBlock::iterator TI = TrueBBI.BB->begin();
- MachineBasicBlock::iterator FI = FalseBBI.BB->begin();
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
- // Skip dbg_value instructions
- while (TI != TIE && TI->isDebugValue())
- ++TI;
- while (FI != FIE && FI->isDebugValue())
- ++FI;
- while (TI != TIE && FI != FIE) {
+ while (TIB != TIE && FIB != FIE) {
// Skip dbg_value instructions. These do not count.
- if (TI->isDebugValue()) {
- while (TI != TIE && TI->isDebugValue())
- ++TI;
- if (TI == TIE)
+ if (TIB->isDebugValue()) {
+ while (TIB != TIE && TIB->isDebugValue())
+ ++TIB;
+ if (TIB == TIE)
break;
}
- if (FI->isDebugValue()) {
- while (FI != FIE && FI->isDebugValue())
- ++FI;
- if (FI == FIE)
+ if (FIB->isDebugValue()) {
+ while (FIB != FIE && FIB->isDebugValue())
+ ++FIB;
+ if (FIB == FIE)
break;
}
- if (!TI->isIdenticalTo(FI))
+ if (!TIB->isIdenticalTo(FIB))
break;
++Dups1;
- ++TI;
- ++FI;
+ ++TIB;
+ ++FIB;
}
- TI = firstNonBranchInst(TrueBBI.BB, TII);
- FI = firstNonBranchInst(FalseBBI.BB, TII);
- MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
- MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
- // Skip dbg_value instructions at end of the bb's.
- while (TI != TIB && TI->isDebugValue())
- --TI;
- while (FI != FIB && FI->isDebugValue())
- --FI;
- while (TI != TIB && FI != FIB) {
+ // Now, in preparation for counting duplicate instructions at the ends of the
+ // blocks, move the end iterators up past any branch instructions.
+ while (TIE != TIB) {
+ --TIE;
+ if (!TIE->getDesc().isBranch())
+ break;
+ }
+ while (FIE != FIB) {
+ --FIE;
+ if (!FIE->getDesc().isBranch())
+ break;
+ }
+
+ // If Dups1 includes all of a block, then don't count duplicate
+ // instructions at the end of the blocks.
+ if (TIB == TIE || FIB == FIE)
+ return true;
+
+ // Count duplicate instructions at the ends of the blocks.
+ while (TIE != TIB && FIE != FIB) {
// Skip dbg_value instructions. These do not count.
- if (TI->isDebugValue()) {
- while (TI != TIB && TI->isDebugValue())
- --TI;
- if (TI == TIB)
+ if (TIE->isDebugValue()) {
+ while (TIE != TIB && TIE->isDebugValue())
+ --TIE;
+ if (TIE == TIB)
break;
}
- if (FI->isDebugValue()) {
- while (FI != FIB && FI->isDebugValue())
- --FI;
- if (FI == FIB)
+ if (FIE->isDebugValue()) {
+ while (FIE != FIB && FIE->isDebugValue())
+ --FIE;
+ if (FIE == FIB)
break;
}
- if (!TI->isIdenticalTo(FI))
+ if (!TIE->isIdenticalTo(FIE))
break;
++Dups2;
- --TI;
- --FI;
+ --TIE;
+ --FIE;
}
return true;
@@ -627,6 +654,8 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
// Then scan all the instructions.
BBI.NonPredSize = 0;
+ BBI.ExtraCost = 0;
+ BBI.ExtraCost2 = 0;
BBI.ClobbersPred = false;
for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
I != E; ++I) {
@@ -641,9 +670,15 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
if (!isCondBr) {
- if (!isPredicated)
+ if (!isPredicated) {
BBI.NonPredSize++;
- else if (!AlreadyPredicated) {
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
+ &ExtraPredCost);
+ if (NumCycles > 1)
+ BBI.ExtraCost += NumCycles-1;
+ BBI.ExtraCost2 += ExtraPredCost;
+ } else if (!AlreadyPredicated) {
// FIXME: This instruction is already predicated before the
// if-conversion pass. It's probably something like a conditional move.
// Mark this block unpredicable for now.
@@ -765,9 +800,35 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
bool TNeedSub = TrueBBI.Predicate.size() > 0;
bool FNeedSub = FalseBBI.Predicate.size() > 0;
bool Enqueued = false;
+
+ // Try to predict the branch, using loop info to guide us.
+ // General heuristics are:
+ // - backedge -> 90% taken
+ // - early exit -> 20% taken
+ // - branch predictor confidence -> 90%
+ float Prediction = 0.5f;
+ float Confidence = 0.9f;
+ MachineLoop *Loop = MLI->getLoopFor(BB);
+ if (Loop) {
+ if (TrueBBI.BB == Loop->getHeader())
+ Prediction = 0.9f;
+ else if (FalseBBI.BB == Loop->getHeader())
+ Prediction = 0.1f;
+
+ MachineLoop *TrueLoop = MLI->getLoopFor(TrueBBI.BB);
+ MachineLoop *FalseLoop = MLI->getLoopFor(FalseBBI.BB);
+ if (!TrueLoop || TrueLoop->getParentLoop() == Loop)
+ Prediction = 0.2f;
+ else if (!FalseLoop || FalseLoop->getParentLoop() == Loop)
+ Prediction = 0.8f;
+ }
+
if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2),
- *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
+ TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
+ *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
+ FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
+ Prediction, Confidence) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
// Diamond:
@@ -783,8 +844,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
Enqueued = true;
}
- if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+ if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction, Confidence) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
// Triangle:
// EBB
@@ -797,15 +859,17 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
Enqueued = true;
}
- if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+ if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction, Confidence) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
Enqueued = true;
}
- if (ValidSimple(TrueBBI, Dups) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) &&
+ if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
+ TrueBBI.ExtraCost2, Prediction, Confidence) &&
FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
// Simple (split, no rejoin):
// EBB
@@ -820,22 +884,30 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
if (CanRevCond) {
// Try the other path...
- if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) &&
- MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+ if (ValidTriangle(FalseBBI, TrueBBI, false, Dups,
+ 1.0-Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
Enqueued = true;
}
- if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) &&
- MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+ if (ValidTriangle(FalseBBI, TrueBBI, true, Dups,
+ 1.0-Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
Enqueued = true;
}
- if (ValidSimple(FalseBBI, Dups) &&
- MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) &&
+ if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
+ MeetIfcvtSizeLimit(*FalseBBI.BB,
+ FalseBBI.NonPredSize + FalseBBI.ExtraCost,
+ FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
FeasibilityAnalysis(FalseBBI, RevCond)) {
Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
Enqueued = true;
@@ -1365,6 +1437,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
MachineInstr *MI = MF.CloneMachineInstr(I);
ToBBI.BB->insert(ToBBI.BB->end(), MI);
ToBBI.NonPredSize++;
+ unsigned ExtraPredCost = 0;
+ unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
+ if (NumCycles > 1)
+ ToBBI.ExtraCost += NumCycles-1;
+ ToBBI.ExtraCost2 += ExtraPredCost;
if (!TII->isPredicated(I) && !MI->isDebugValue()) {
if (!TII->PredicateInstruction(MI, Cond)) {
@@ -1438,7 +1515,11 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
FromBBI.Predicate.clear();
ToBBI.NonPredSize += FromBBI.NonPredSize;
+ ToBBI.ExtraCost += FromBBI.ExtraCost;
+ ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
FromBBI.NonPredSize = 0;
+ FromBBI.ExtraCost = 0;
+ FromBBI.ExtraCost2 = 0;
ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
ToBBI.HasFallThrough = FromBBI.HasFallThrough;
diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp
index b965bfdcf3b8..a1bd972d38e2 100644
--- a/lib/CodeGen/InlineSpiller.cpp
+++ b/lib/CodeGen/InlineSpiller.cpp
@@ -12,28 +12,34 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "spiller"
+#define DEBUG_TYPE "regalloc"
#include "Spiller.h"
-#include "SplitKit.h"
+#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool>
+VerifySpills("verify-spills", cl::desc("Verify after each spill/split"));
+
namespace {
class InlineSpiller : public Spiller {
MachineFunctionPass &pass_;
MachineFunction &mf_;
LiveIntervals &lis_;
- MachineLoopInfo &loops_;
+ LiveStacks &lss_;
+ AliasAnalysis *aa_;
VirtRegMap &vrm_;
MachineFrameInfo &mfi_;
MachineRegisterInfo &mri_;
@@ -41,19 +47,12 @@ class InlineSpiller : public Spiller {
const TargetRegisterInfo &tri_;
const BitVector reserved_;
- SplitAnalysis splitAnalysis_;
-
// Variables that are valid during spill(), but used by multiple methods.
- LiveInterval *li_;
- SmallVectorImpl<LiveInterval*> *newIntervals_;
+ LiveRangeEdit *edit_;
const TargetRegisterClass *rc_;
int stackSlot_;
- const SmallVectorImpl<LiveInterval*> *spillIs_;
- // Values of the current interval that can potentially remat.
- SmallPtrSet<VNInfo*, 8> reMattable_;
-
- // Values in reMattable_ that failed to remat at some point.
+ // Values that failed to remat at some point.
SmallPtrSet<VNInfo*, 8> usedValues_;
~InlineSpiller() {}
@@ -65,30 +64,29 @@ public:
: pass_(pass),
mf_(mf),
lis_(pass.getAnalysis<LiveIntervals>()),
- loops_(pass.getAnalysis<MachineLoopInfo>()),
+ lss_(pass.getAnalysis<LiveStacks>()),
+ aa_(&pass.getAnalysis<AliasAnalysis>()),
vrm_(vrm),
mfi_(*mf.getFrameInfo()),
mri_(mf.getRegInfo()),
tii_(*mf.getTarget().getInstrInfo()),
tri_(*mf.getTarget().getRegisterInfo()),
- reserved_(tri_.getReservedRegs(mf_)),
- splitAnalysis_(mf, lis_, loops_) {}
+ reserved_(tri_.getReservedRegs(mf_)) {}
void spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs);
+ const SmallVectorImpl<LiveInterval*> &spillIs);
-private:
- bool split();
+ void spill(LiveRangeEdit &);
- bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
- SlotIndex UseIdx);
+private:
bool reMaterializeFor(MachineBasicBlock::iterator MI);
void reMaterializeAll();
bool coalesceStackAccess(MachineInstr *MI);
bool foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops);
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI = 0);
void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI);
};
@@ -98,106 +96,41 @@ namespace llvm {
Spiller *createInlineSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm) {
+ if (VerifySpills)
+ mf.verify(&pass, "When creating inline spiller");
return new InlineSpiller(pass, mf, vrm);
}
}
-/// split - try splitting the current interval into pieces that may allocate
-/// separately. Return true if successful.
-bool InlineSpiller::split() {
- splitAnalysis_.analyze(li_);
-
- if (const MachineLoop *loop = splitAnalysis_.getBestSplitLoop()) {
- // We can split, but li_ may be left intact with fewer uses.
- if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
- .splitAroundLoop(loop))
- return true;
- }
-
- // Try splitting into single block intervals.
- SplitAnalysis::BlockPtrSet blocks;
- if (splitAnalysis_.getMultiUseBlocks(blocks)) {
- if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
- .splitSingleBlocks(blocks))
- return true;
- }
-
- // Try splitting inside a basic block.
- if (const MachineBasicBlock *MBB = splitAnalysis_.getBlockForInsideSplit()) {
- if (SplitEditor(splitAnalysis_, lis_, vrm_, *newIntervals_)
- .splitInsideBlock(MBB))
- return true;
- }
-
- // We may have been able to split out some uses, but the original interval is
- // intact, and it should still be spilled.
- return false;
-}
-
-/// allUsesAvailableAt - Return true if all registers used by OrigMI at
-/// OrigIdx are also available with the same value at UseIdx.
-bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI,
- SlotIndex OrigIdx,
- SlotIndex UseIdx) {
- OrigIdx = OrigIdx.getUseIndex();
- UseIdx = UseIdx.getUseIndex();
- for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = OrigMI->getOperand(i);
- if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg)
- continue;
- // Reserved registers are OK.
- if (MO.isUndef() || !lis_.hasInterval(MO.getReg()))
- continue;
- // We don't want to move any defs.
- if (MO.isDef())
- return false;
- // We cannot depend on virtual registers in spillIs_. They will be spilled.
- for (unsigned si = 0, se = spillIs_->size(); si != se; ++si)
- if ((*spillIs_)[si]->reg == MO.getReg())
- return false;
-
- LiveInterval &LI = lis_.getInterval(MO.getReg());
- const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx);
- if (!OVNI)
- continue;
- if (OVNI != LI.getVNInfoAt(UseIdx))
- return false;
- }
- return true;
-}
-
-/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of
+/// reMaterializeFor - Attempt to rematerialize edit_->getReg() before MI instead of
/// reloading it.
bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex();
- VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx);
+ VNInfo *OrigVNI = edit_->getParent().getVNInfoAt(UseIdx);
+
if (!OrigVNI) {
DEBUG(dbgs() << "\tadding <undef> flags: ");
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg)
+ if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg())
MO.setIsUndef();
}
DEBUG(dbgs() << UseIdx << '\t' << *MI);
return true;
}
- if (!reMattable_.count(OrigVNI)) {
- DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": "
- << UseIdx << '\t' << *MI);
- return false;
- }
- MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def);
- if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) {
+
+ LiveRangeEdit::Remat RM(OrigVNI);
+ if (!edit_->canRematerializeAt(RM, UseIdx, false, lis_)) {
usedValues_.insert(OrigVNI);
DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI);
return false;
}
- // If the instruction also writes li_->reg, it had better not require the same
- // register for uses and defs.
+ // If the instruction also writes edit_->getReg(), it had better not require
+ // the same register for uses and defs.
bool Reads, Writes;
SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops);
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit_->getReg(), &Ops);
if (Writes) {
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i]);
@@ -209,62 +142,57 @@ bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) {
}
}
+ // Before rematerializing into a register for a single instruction, try to
+ // fold a load into the instruction. That avoids allocating a new register.
+ if (RM.OrigMI->getDesc().canFoldAsLoad() &&
+ foldMemoryOperand(MI, Ops, RM.OrigMI)) {
+ edit_->markRematerialized(RM.ParentVNI);
+ return true;
+ }
+
// Alocate a new register for the remat.
- unsigned NewVReg = mri_.createVirtualRegister(rc_);
- vrm_.grow();
- LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ LiveInterval &NewLI = edit_->create(mri_, lis_, vrm_);
NewLI.markNotSpillable();
- newIntervals_->push_back(&NewLI);
+
+ // Rematting for a copy: Set allocation hint to be the destination register.
+ if (MI->isCopy())
+ mri_.setRegAllocationHint(NewLI.reg, 0, MI->getOperand(0).getReg());
// Finally we can rematerialize OrigMI before MI.
- MachineBasicBlock &MBB = *MI->getParent();
- tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_);
- MachineBasicBlock::iterator RematMI = MI;
- SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex();
- DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI);
+ SlotIndex DefIdx = edit_->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM,
+ lis_, tii_, tri_);
+ DEBUG(dbgs() << "\tremat: " << DefIdx << '\t'
+ << *lis_.getInstructionFromIndex(DefIdx));
// Replace operands
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i]);
- if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) {
- MO.setReg(NewVReg);
+ if (MO.isReg() && MO.isUse() && MO.getReg() == edit_->getReg()) {
+ MO.setReg(NewLI.reg);
MO.setIsKill();
}
}
DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI);
- VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true,
- lis_.getVNInfoAllocator());
+ VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, lis_.getVNInfoAllocator());
NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI));
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
return true;
}
-/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible,
+/// reMaterializeAll - Try to rematerialize as many uses as possible,
/// and trim the live ranges after.
void InlineSpiller::reMaterializeAll() {
// Do a quick scan of the interval values to find if any are remattable.
- reMattable_.clear();
- usedValues_.clear();
- for (LiveInterval::const_vni_iterator I = li_->vni_begin(),
- E = li_->vni_end(); I != E; ++I) {
- VNInfo *VNI = *I;
- if (VNI->isUnused() || !VNI->isDefAccurate())
- continue;
- MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
- if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI))
- continue;
- reMattable_.insert(VNI);
- }
-
- // Often, no defs are remattable.
- if (reMattable_.empty())
+ if (!edit_->anyRematerializable(lis_, tii_, aa_))
return;
- // Try to remat before all uses of li_->reg.
+ usedValues_.clear();
+
+ // Try to remat before all uses of edit_->getReg().
bool anyRemat = false;
for (MachineRegisterInfo::use_nodbg_iterator
- RI = mri_.use_nodbg_begin(li_->reg);
+ RI = mri_.use_nodbg_begin(edit_->getReg());
MachineInstr *MI = RI.skipInstruction();)
anyRemat |= reMaterializeFor(MI);
@@ -273,33 +201,35 @@ void InlineSpiller::reMaterializeAll() {
// Remove any values that were completely rematted.
bool anyRemoved = false;
- for (SmallPtrSet<VNInfo*, 8>::iterator I = reMattable_.begin(),
- E = reMattable_.end(); I != E; ++I) {
+ for (LiveInterval::vni_iterator I = edit_->getParent().vni_begin(),
+ E = edit_->getParent().vni_end(); I != E; ++I) {
VNInfo *VNI = *I;
- if (VNI->hasPHIKill() || usedValues_.count(VNI))
+ if (VNI->hasPHIKill() || !edit_->didRematerialize(VNI) ||
+ usedValues_.count(VNI))
continue;
MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def);
DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI);
lis_.RemoveMachineInstrFromMaps(DefMI);
vrm_.RemoveMachineInstrFromMaps(DefMI);
DefMI->eraseFromParent();
- VNI->setIsDefAccurate(false);
+ VNI->def = SlotIndex();
anyRemoved = true;
}
if (!anyRemoved)
return;
- // Removing values may cause debug uses where li_ is not live.
- for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg);
+ // Removing values may cause debug uses where parent is not live.
+ for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(edit_->getReg());
MachineInstr *MI = RI.skipInstruction();) {
if (!MI->isDebugValue())
continue;
- // Try to preserve the debug value if li_ is live immediately after it.
+ // Try to preserve the debug value if parent is live immediately after it.
MachineBasicBlock::iterator NextMI = MI;
++NextMI;
if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) {
- VNInfo *VNI = li_->getVNInfoAt(lis_.getInstructionIndex(NextMI));
+ SlotIndex Idx = lis_.getInstructionIndex(NextMI);
+ VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
if (VNI && (VNI->hasPHIKill() || usedValues_.count(VNI)))
continue;
}
@@ -317,7 +247,7 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
return false;
// We have a stack access. Is it the right register and slot?
- if (reg != li_->reg || FI != stackSlot_)
+ if (reg != edit_->getReg() || FI != stackSlot_)
return false;
DEBUG(dbgs() << "Coalescing stack access: " << *MI);
@@ -327,9 +257,13 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI) {
}
/// foldMemoryOperand - Try folding stack slot references in Ops into MI.
-/// Return true on success, and MI will be erased.
+/// @param MI Instruction using or defining the current register.
+/// @param Ops Operand indices from readsWritesVirtualRegister().
+/// @param LoadMI Load instruction to use instead of stack slot when non-null.
+/// @return True on success, and MI will be erased.
bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
- const SmallVectorImpl<unsigned> &Ops) {
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr *LoadMI) {
// TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied
// operands.
SmallVector<unsigned, 8> FoldOps;
@@ -341,16 +275,22 @@ bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI,
// FIXME: Teach targets to deal with subregs.
if (MO.getSubReg())
return false;
+ // We cannot fold a load instruction into a def.
+ if (LoadMI && MO.isDef())
+ return false;
// Tied use operands should not be passed to foldMemoryOperand.
if (!MI->isRegTiedToDefOperand(Idx))
FoldOps.push_back(Idx);
}
- MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
+ MachineInstr *FoldMI =
+ LoadMI ? tii_.foldMemoryOperand(MI, FoldOps, LoadMI)
+ : tii_.foldMemoryOperand(MI, FoldOps, stackSlot_);
if (!FoldMI)
return false;
lis_.ReplaceMachineInstrInMaps(MI, FoldMI);
- vrm_.addSpillSlotUse(stackSlot_, FoldMI);
+ if (!LoadMI)
+ vrm_.addSpillSlotUse(stackSlot_, FoldMI);
MI->eraseFromParent();
DEBUG(dbgs() << "\tfolded: " << *FoldMI);
return true;
@@ -366,7 +306,7 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
vrm_.addSpillSlotUse(stackSlot_, MI);
DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI);
- VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true,
+ VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0,
lis_.getVNInfoAllocator());
NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI));
}
@@ -375,44 +315,58 @@ void InlineSpiller::insertReload(LiveInterval &NewLI,
void InlineSpiller::insertSpill(LiveInterval &NewLI,
MachineBasicBlock::iterator MI) {
MachineBasicBlock &MBB = *MI->getParent();
+
+ // Get the defined value. It could be an early clobber so keep the def index.
SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex();
+ VNInfo *VNI = edit_->getParent().getVNInfoAt(Idx);
+ assert(VNI && VNI->def.getDefIndex() == Idx && "Inconsistent VNInfo");
+ Idx = VNI->def;
+
tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_);
--MI; // Point to store instruction.
SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
vrm_.addSpillSlotUse(stackSlot_, MI);
DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI);
- VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true,
- lis_.getVNInfoAllocator());
+ VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, lis_.getVNInfoAllocator());
NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI));
}
void InlineSpiller::spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs) {
- DEBUG(dbgs() << "Inline spilling " << *li << "\n");
- assert(li->isSpillable() && "Attempting to spill already spilled value.");
- assert(!li->isStackSlot() && "Trying to spill a stack slot.");
-
- li_ = li;
- newIntervals_ = &newIntervals;
- rc_ = mri_.getRegClass(li->reg);
- spillIs_ = &spillIs;
+ const SmallVectorImpl<LiveInterval*> &spillIs) {
+ LiveRangeEdit edit(*li, newIntervals, spillIs);
+ spill(edit);
+ if (VerifySpills)
+ mf_.verify(&pass_, "After inline spill");
+}
- if (split())
- return;
+void InlineSpiller::spill(LiveRangeEdit &edit) {
+ edit_ = &edit;
+ assert(!TargetRegisterInfo::isStackSlot(edit.getReg())
+ && "Trying to spill a stack slot.");
+ DEBUG(dbgs() << "Inline spilling "
+ << mri_.getRegClass(edit.getReg())->getName()
+ << ':' << edit.getParent() << "\n");
+ assert(edit.getParent().isSpillable() &&
+ "Attempting to spill already spilled value.");
reMaterializeAll();
// Remat may handle everything.
- if (li_->empty())
+ if (edit_->getParent().empty())
return;
- stackSlot_ = vrm_.getStackSlot(li->reg);
- if (stackSlot_ == VirtRegMap::NO_STACK_SLOT)
- stackSlot_ = vrm_.assignVirt2StackSlot(li->reg);
+ rc_ = mri_.getRegClass(edit.getReg());
+ stackSlot_ = vrm_.assignVirt2StackSlot(edit_->getReg());
+
+ // Update LiveStacks now that we are committed to spilling.
+ LiveInterval &stacklvr = lss_.getOrCreateInterval(stackSlot_, rc_);
+ assert(stacklvr.empty() && "Just created stack slot not empty");
+ stacklvr.getNextValue(SlotIndex(), 0, lss_.getVNInfoAllocator());
+ stacklvr.MergeRangesInAsValue(edit_->getParent(), stacklvr.getValNumInfo(0));
// Iterate over instructions using register.
- for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg);
+ for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(edit.getReg());
MachineInstr *MI = RI.skipInstruction();) {
// Debug values are not allowed to affect codegen.
@@ -440,7 +394,7 @@ void InlineSpiller::spill(LiveInterval *li,
// Analyze instruction.
bool Reads, Writes;
SmallVector<unsigned, 8> Ops;
- tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops);
+ tie(Reads, Writes) = MI->readsWritesVirtualRegister(edit.getReg(), &Ops);
// Attempt to fold memory ops.
if (foldMemoryOperand(MI, Ops))
@@ -448,9 +402,7 @@ void InlineSpiller::spill(LiveInterval *li,
// Allocate interval around instruction.
// FIXME: Infer regclass from instruction alone.
- unsigned NewVReg = mri_.createVirtualRegister(rc_);
- vrm_.grow();
- LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg);
+ LiveInterval &NewLI = edit.create(mri_, lis_, vrm_);
NewLI.markNotSpillable();
if (Reads)
@@ -460,7 +412,7 @@ void InlineSpiller::spill(LiveInterval *li,
bool hasLiveDef = false;
for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(Ops[i]);
- MO.setReg(NewVReg);
+ MO.setReg(NewLI.reg);
if (MO.isUse()) {
if (!MI->isRegTiedToDefOperand(Ops[i]))
MO.setIsKill();
@@ -475,6 +427,5 @@ void InlineSpiller::spill(LiveInterval *li,
insertSpill(NewLI, MI);
DEBUG(dbgs() << "\tinterval: " << NewLI << '\n');
- newIntervals.push_back(&NewLI);
}
}
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 3852ebaf6425..3861ddadf655 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -85,9 +85,11 @@ static CallInst *ReplaceCallWith(const char *NewFn, CallInst *CI,
}
// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
#endif
void IntrinsicLowering::AddPrototypes(Module &M) {
@@ -536,3 +538,27 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
"Lowering should have eliminated any uses of the intrinsic call!");
CI->eraseFromParent();
}
+
+bool IntrinsicLowering::LowerToByteSwap(CallInst *CI) {
+ // Verify this is a simple bswap.
+ if (CI->getNumArgOperands() != 1 ||
+ CI->getType() != CI->getArgOperand(0)->getType() ||
+ !CI->getType()->isIntegerTy())
+ return false;
+
+ const IntegerType *Ty = dyn_cast<IntegerType>(CI->getType());
+ if (!Ty)
+ return false;
+
+ // Okay, we can do this xform, do so now.
+ const Type *Tys[] = { Ty };
+ Module *M = CI->getParent()->getParent()->getParent();
+ Constant *Int = Intrinsic::getDeclaration(M, Intrinsic::bswap, Tys, 1);
+
+ Value *Op = CI->getArgOperand(0);
+ Op = CallInst::Create(Int, Op, CI->getName(), CI);
+
+ CI->replaceAllUsesWith(Op);
+ CI->eraseFromParent();
+ return true;
+}
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 36038027b259..80dfc763af69 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -20,9 +20,11 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/GCStrategy.h"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Target/TargetAsmInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Transforms/Scalar.h"
@@ -30,6 +32,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/StandardPasses.h"
using namespace llvm;
namespace llvm {
@@ -140,13 +143,19 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
// Create a code emitter if asked to show the encoding.
MCCodeEmitter *MCE = 0;
- if (ShowMCEncoding)
+ TargetAsmBackend *TAB = 0;
+ if (ShowMCEncoding) {
MCE = getTarget().createCodeEmitter(*this, *Context);
-
- AsmStreamer.reset(createAsmStreamer(*Context, Out,
- getTargetData()->isLittleEndian(),
- getVerboseAsm(), InstPrinter,
- MCE, ShowMCInst));
+ TAB = getTarget().createAsmBackend(TargetTriple);
+ }
+
+ MCStreamer *S = getTarget().createAsmStreamer(*Context, Out,
+ getVerboseAsm(),
+ hasMCUseLoc(),
+ InstPrinter,
+ MCE, TAB,
+ ShowMCInst);
+ AsmStreamer.reset(S);
break;
}
case CGFT_ObjectFile: {
@@ -159,7 +168,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
AsmStreamer.reset(getTarget().createObjectStreamer(TargetTriple, *Context,
*TAB, Out, MCE,
- hasMCRelaxAll()));
+ hasMCRelaxAll(),
+ hasMCNoExecStack()));
+ AsmStreamer.get()->InitSections();
break;
}
case CGFT_Null:
@@ -241,7 +252,7 @@ static void printAndVerify(PassManagerBase &PM,
PM.add(createMachineFunctionPrinterPass(dbgs(), Banner));
if (VerifyMachineCode)
- PM.add(createMachineVerifierPass());
+ PM.add(createMachineVerifierPass(Banner));
}
/// addCommonCodeGenPasses - Add standard LLVM codegen passes used for both
@@ -253,6 +264,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
MCContext *&OutContext) {
// Standard LLVM-Level Passes.
+ // Basic AliasAnalysis support.
+ createStandardAliasAnalysisPasses(&PM);
+
// Before running any passes, run the verifier to determine if the input
// coming from the front-end and/or optimizer is valid.
if (!DisableVerify)
@@ -288,7 +302,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// edge from elsewhere.
PM.add(createSjLjEHPass(getTargetLowering()));
// FALLTHROUGH
- case ExceptionHandling::Dwarf:
+ case ExceptionHandling::DwarfCFI:
+ case ExceptionHandling::DwarfTable:
PM.add(createDwarfEHPass(this));
break;
case ExceptionHandling::None:
@@ -320,7 +335,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// Install a MachineModuleInfo class, which is an immutable pass that holds
// all the per-module stuff we're generating, including MCContext.
- MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo());
+ TargetAsmInfo *TAI = new TargetAsmInfo(*this);
+ MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), TAI);
PM.add(MMI);
OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref.
@@ -339,6 +355,9 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// Print the instruction selected machine code...
printAndVerify(PM, "After Instruction Selection");
+ // Expand pseudo-instructions emitted by ISel.
+ PM.add(createExpandISelPseudosPass());
+
// Optimize PHIs before DCE: removing dead PHI cycles may make more
// instructions dead.
if (OptLevel != CodeGenOpt::None)
@@ -356,13 +375,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createDeadMachineInstructionElimPass());
printAndVerify(PM, "After codegen DCE pass");
- PM.add(createPeepholeOptimizerPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
PM.add(createMachineCSEPass());
if (!DisableMachineSink)
PM.add(createMachineSinkingPass());
printAndVerify(PM, "After Machine LICM, CSE and Sinking passes");
+
+ PM.add(createPeepholeOptimizerPass());
+ printAndVerify(PM, "After codegen peephole optimization pass");
}
// Pre-ra tail duplication.
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index b9527fafbee8..0eb009ddac29 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "scheduler"
#include "llvm/CodeGen/LatencyPriorityQueue.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
@@ -35,14 +36,14 @@ bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
unsigned RHSLatency = PQ->getLatency(RHSNum);
if (LHSLatency < RHSLatency) return true;
if (LHSLatency > RHSLatency) return false;
-
+
// After that, if two nodes have identical latencies, look to see if one will
// unblock more other nodes than the other.
unsigned LHSBlocked = PQ->getNumSolelyBlockNodes(LHSNum);
unsigned RHSBlocked = PQ->getNumSolelyBlockNodes(RHSNum);
if (LHSBlocked < RHSBlocked) return true;
if (LHSBlocked > RHSBlocked) return false;
-
+
// Finally, just to provide a stable ordering, use the node number as a
// deciding factor.
return LHSNum < RHSNum;
@@ -64,7 +65,7 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
OnlyAvailablePred = &Pred;
}
}
-
+
return OnlyAvailablePred;
}
@@ -78,7 +79,7 @@ void LatencyPriorityQueue::push(SUnit *SU) {
++NumNodesBlocking;
}
NumNodesSolelyBlocking[SU->NodeNum] = NumNodesBlocking;
-
+
Queue.push_back(SU);
}
@@ -102,10 +103,10 @@ void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
/// node of the same priority that will not make a node available.
void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) {
if (SU->isAvailable) return; // All preds scheduled.
-
+
SUnit *OnlyAvailablePred = getSingleUnscheduledPred(SU);
if (OnlyAvailablePred == 0 || !OnlyAvailablePred->isAvailable) return;
-
+
// Okay, we found a single predecessor that is available, but not scheduled.
// Since it is available, it must be in the priority queue. First remove it.
remove(OnlyAvailablePred);
@@ -136,3 +137,16 @@ void LatencyPriorityQueue::remove(SUnit *SU) {
std::swap(*I, Queue.back());
Queue.pop_back();
}
+
+#ifdef NDEBUG
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {}
+#else
+void LatencyPriorityQueue::dump(ScheduleDAG *DAG) const {
+ LatencyPriorityQueue q = *this;
+ while (!q.empty()) {
+ SUnit *su = q.pop();
+ dbgs() << "Height " << su->getHeight() << ": ";
+ su->dump(DAG);
+ }
+}
+#endif
diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp
new file mode 100644
index 000000000000..853ec1ac7c13
--- /dev/null
+++ b/lib/CodeGen/LiveDebugVariables.cpp
@@ -0,0 +1,711 @@
+//===- LiveDebugVariables.cpp - Tracking debug info variables -------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the LiveDebugVariables analysis.
+//
+// Remove all DBG_VALUE instructions referencing virtual registers and replace
+// them with a data structure tracking where live user variables are kept - in a
+// virtual register or in a stack slot.
+//
+// Allow the data structure to be updated during register allocation when values
+// are moved between registers and stack slots. Finally emit new DBG_VALUE
+// instructions after register allocation is complete.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "livedebug"
+#include "LiveDebugVariables.h"
+#include "VirtRegMap.h"
+#include "llvm/Constants.h"
+#include "llvm/Metadata.h"
+#include "llvm/Value.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+static cl::opt<bool>
+EnableLDV("live-debug-variables", cl::init(true),
+ cl::desc("Enable the live debug variables pass"), cl::Hidden);
+
+char LiveDebugVariables::ID = 0;
+
+INITIALIZE_PASS_BEGIN(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LiveDebugVariables, "livedebugvars",
+ "Debug Variable Analysis", false, false)
+
+void LiveDebugVariables::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequiredTransitive<LiveIntervals>();
+ AU.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+LiveDebugVariables::LiveDebugVariables() : MachineFunctionPass(ID), pImpl(0) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+}
+
+/// LocMap - Map of where a user value is live, and its location.
+typedef IntervalMap<SlotIndex, unsigned, 4> LocMap;
+
+/// UserValue - A user value is a part of a debug info user variable.
+///
+/// A DBG_VALUE instruction notes that (a sub-register of) a virtual register
+/// holds part of a user variable. The part is identified by a byte offset.
+///
+/// UserValues are grouped into equivalence classes for easier searching. Two
+/// user values are related if they refer to the same variable, or if they are
+/// held by the same virtual register. The equivalence class is the transitive
+/// closure of that relation.
+namespace {
+class UserValue {
+ const MDNode *variable; ///< The debug info variable we are part of.
+ unsigned offset; ///< Byte offset into variable.
+ DebugLoc dl; ///< The debug location for the variable. This is
+ ///< used by dwarf writer to find lexical scope.
+ UserValue *leader; ///< Equivalence class leader.
+ UserValue *next; ///< Next value in equivalence class, or null.
+
+ /// Numbered locations referenced by locmap.
+ SmallVector<MachineOperand, 4> locations;
+
+ /// Map of slot indices where this value is live.
+ LocMap locInts;
+
+ /// coalesceLocation - After LocNo was changed, check if it has become
+ /// identical to another location, and coalesce them. This may cause LocNo or
+ /// a later location to be erased, but no earlier location will be erased.
+ void coalesceLocation(unsigned LocNo);
+
+ /// insertDebugValue - Insert a DBG_VALUE into MBB at Idx for LocNo.
+ void insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, unsigned LocNo,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+ /// insertDebugKill - Insert an undef DBG_VALUE into MBB at Idx.
+ void insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS, const TargetInstrInfo &TII);
+
+public:
+ /// UserValue - Create a new UserValue.
+ UserValue(const MDNode *var, unsigned o, DebugLoc L,
+ LocMap::Allocator &alloc)
+ : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc)
+ {}
+
+ /// getLeader - Get the leader of this value's equivalence class.
+ UserValue *getLeader() {
+ UserValue *l = leader;
+ while (l != l->leader)
+ l = l->leader;
+ return leader = l;
+ }
+
+ /// getNext - Return the next UserValue in the equivalence class.
+ UserValue *getNext() const { return next; }
+
+ /// match - Does this UserValue match the aprameters?
+ bool match(const MDNode *Var, unsigned Offset) const {
+ return Var == variable && Offset == offset;
+ }
+
+ /// merge - Merge equivalence classes.
+ static UserValue *merge(UserValue *L1, UserValue *L2) {
+ L2 = L2->getLeader();
+ if (!L1)
+ return L2;
+ L1 = L1->getLeader();
+ if (L1 == L2)
+ return L1;
+ // Splice L2 before L1's members.
+ UserValue *End = L2;
+ while (End->next)
+ End->leader = L1, End = End->next;
+ End->leader = L1;
+ End->next = L1->next;
+ L1->next = L2;
+ return L1;
+ }
+
+ /// getLocationNo - Return the location number that matches Loc.
+ unsigned getLocationNo(const MachineOperand &LocMO) {
+ if (LocMO.isReg() && LocMO.getReg() == 0)
+ return ~0u;
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ if (LocMO.isIdenticalTo(locations[i]))
+ return i;
+ locations.push_back(LocMO);
+ // We are storing a MachineOperand outside a MachineInstr.
+ locations.back().clearParent();
+ return locations.size() - 1;
+ }
+
+ /// addDef - Add a definition point to this value.
+ void addDef(SlotIndex Idx, const MachineOperand &LocMO) {
+ // Add a singular (Idx,Idx) -> Loc mapping.
+ LocMap::iterator I = locInts.find(Idx);
+ if (!I.valid() || I.start() != Idx)
+ I.insert(Idx, Idx.getNextSlot(), getLocationNo(LocMO));
+ }
+
+ /// extendDef - Extend the current definition as far as possible down the
+ /// dominator tree. Stop when meeting an existing def or when leaving the live
+ /// range of VNI.
+ /// @param Idx Starting point for the definition.
+ /// @param LocNo Location number to propagate.
+ /// @param LI Restrict liveness to where LI has the value VNI. May be null.
+ /// @param VNI When LI is not null, this is the value to restrict to.
+ /// @param LIS Live intervals analysis.
+ /// @param MDT Dominator tree.
+ void extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+ /// computeIntervals - Compute the live intervals of all locations after
+ /// collecting all their def points.
+ void computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT);
+
+ /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+ const TargetRegisterInfo *TRI);
+
+ /// rewriteLocations - Rewrite virtual register locations according to the
+ /// provided virtual register map.
+ void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI);
+
+ /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM,
+ LiveIntervals &LIS, const TargetInstrInfo &TRI);
+
+ /// findDebugLoc - Return DebugLoc used for this DBG_VALUE instruction. A
+ /// variable may have more than one corresponding DBG_VALUE instructions.
+ /// Only first one needs DebugLoc to identify variable's lexical scope
+ /// in source file.
+ DebugLoc findDebugLoc();
+ void print(raw_ostream&, const TargetRegisterInfo*);
+};
+} // namespace
+
+/// LDVImpl - Implementation of the LiveDebugVariables pass.
+namespace {
+class LDVImpl {
+ LiveDebugVariables &pass;
+ LocMap::Allocator allocator;
+ MachineFunction *MF;
+ LiveIntervals *LIS;
+ MachineDominatorTree *MDT;
+ const TargetRegisterInfo *TRI;
+
+ /// userValues - All allocated UserValue instances.
+ SmallVector<UserValue*, 8> userValues;
+
+ /// Map virtual register to eq class leader.
+ typedef DenseMap<unsigned, UserValue*> VRMap;
+ VRMap virtRegToEqClass;
+
+ /// Map user variable to eq class leader.
+ typedef DenseMap<const MDNode *, UserValue*> UVMap;
+ UVMap userVarMap;
+
+ /// getUserValue - Find or create a UserValue.
+ UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL);
+
+ /// lookupVirtReg - Find the EC leader for VirtReg or null.
+ UserValue *lookupVirtReg(unsigned VirtReg);
+
+ /// mapVirtReg - Map virtual register to an equivalence class.
+ void mapVirtReg(unsigned VirtReg, UserValue *EC);
+
+ /// handleDebugValue - Add DBG_VALUE instruction to our maps.
+ /// @param MI DBG_VALUE instruction
+ /// @param Idx Last valid SLotIndex before instruction.
+ /// @return True if the DBG_VALUE instruction should be deleted.
+ bool handleDebugValue(MachineInstr *MI, SlotIndex Idx);
+
+ /// collectDebugValues - Collect and erase all DBG_VALUE instructions, adding
+ /// a UserValue def for each instruction.
+ /// @param mf MachineFunction to be scanned.
+ /// @return True if any debug values were found.
+ bool collectDebugValues(MachineFunction &mf);
+
+ /// computeIntervals - Compute the live intervals of all user values after
+ /// collecting all their def points.
+ void computeIntervals();
+
+public:
+ LDVImpl(LiveDebugVariables *ps) : pass(*ps) {}
+ bool runOnMachineFunction(MachineFunction &mf);
+
+ /// clear - Relase all memory.
+ void clear() {
+ DeleteContainerPointers(userValues);
+ userValues.clear();
+ virtRegToEqClass.clear();
+ userVarMap.clear();
+ }
+
+ /// renameRegister - Replace all references to OldReg wiht NewReg:SubIdx.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ void print(raw_ostream&);
+};
+} // namespace
+
+void UserValue::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
+ if (const MDString *MDS = dyn_cast<MDString>(variable->getOperand(2)))
+ OS << "!\"" << MDS->getString() << "\"\t";
+ if (offset)
+ OS << '+' << offset;
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I) {
+ OS << " [" << I.start() << ';' << I.stop() << "):";
+ if (I.value() == ~0u)
+ OS << "undef";
+ else
+ OS << I.value();
+ }
+ for (unsigned i = 0, e = locations.size(); i != e; ++i)
+ OS << " Loc" << i << '=' << locations[i];
+ OS << '\n';
+}
+
+void LDVImpl::print(raw_ostream &OS) {
+ OS << "********** DEBUG VARIABLES **********\n";
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+ userValues[i]->print(OS, TRI);
+}
+
+void UserValue::coalesceLocation(unsigned LocNo) {
+ unsigned KeepLoc = 0;
+ for (unsigned e = locations.size(); KeepLoc != e; ++KeepLoc) {
+ if (KeepLoc == LocNo)
+ continue;
+ if (locations[KeepLoc].isIdenticalTo(locations[LocNo]))
+ break;
+ }
+ // No matches.
+ if (KeepLoc == locations.size())
+ return;
+
+ // Keep the smaller location, erase the larger one.
+ unsigned EraseLoc = LocNo;
+ if (KeepLoc > EraseLoc)
+ std::swap(KeepLoc, EraseLoc);
+ locations.erase(locations.begin() + EraseLoc);
+
+ // Rewrite values.
+ for (LocMap::iterator I = locInts.begin(); I.valid(); ++I) {
+ unsigned v = I.value();
+ if (v == EraseLoc)
+ I.setValue(KeepLoc); // Coalesce when possible.
+ else if (v > EraseLoc)
+ I.setValueUnchecked(v-1); // Avoid coalescing with untransformed values.
+ }
+}
+
+UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset,
+ DebugLoc DL) {
+ UserValue *&Leader = userVarMap[Var];
+ if (Leader) {
+ UserValue *UV = Leader->getLeader();
+ Leader = UV;
+ for (; UV; UV = UV->getNext())
+ if (UV->match(Var, Offset))
+ return UV;
+ }
+
+ UserValue *UV = new UserValue(Var, Offset, DL, allocator);
+ userValues.push_back(UV);
+ Leader = UserValue::merge(Leader, UV);
+ return UV;
+}
+
+void LDVImpl::mapVirtReg(unsigned VirtReg, UserValue *EC) {
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) && "Only map VirtRegs");
+ UserValue *&Leader = virtRegToEqClass[VirtReg];
+ Leader = UserValue::merge(Leader, EC);
+}
+
+UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) {
+ if (UserValue *UV = virtRegToEqClass.lookup(VirtReg))
+ return UV->getLeader();
+ return 0;
+}
+
+bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) {
+ // DBG_VALUE loc, offset, variable
+ if (MI->getNumOperands() != 3 ||
+ !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) {
+ DEBUG(dbgs() << "Can't handle " << *MI);
+ return false;
+ }
+
+ // Get or create the UserValue for (variable,offset).
+ unsigned Offset = MI->getOperand(1).getImm();
+ const MDNode *Var = MI->getOperand(2).getMetadata();
+ UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc());
+
+ // If the location is a virtual register, make sure it is mapped.
+ if (MI->getOperand(0).isReg()) {
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ mapVirtReg(Reg, UV);
+ }
+
+ UV->addDef(Idx, MI->getOperand(0));
+ return true;
+}
+
+bool LDVImpl::collectDebugValues(MachineFunction &mf) {
+ bool Changed = false;
+ for (MachineFunction::iterator MFI = mf.begin(), MFE = mf.end(); MFI != MFE;
+ ++MFI) {
+ MachineBasicBlock *MBB = MFI;
+ for (MachineBasicBlock::iterator MBBI = MBB->begin(), MBBE = MBB->end();
+ MBBI != MBBE;) {
+ if (!MBBI->isDebugValue()) {
+ ++MBBI;
+ continue;
+ }
+ // DBG_VALUE has no slot index, use the previous instruction instead.
+ SlotIndex Idx = MBBI == MBB->begin() ?
+ LIS->getMBBStartIdx(MBB) :
+ LIS->getInstructionIndex(llvm::prior(MBBI)).getDefIndex();
+ // Handle consecutive DBG_VALUE instructions with the same slot index.
+ do {
+ if (handleDebugValue(MBBI, Idx)) {
+ MBBI = MBB->erase(MBBI);
+ Changed = true;
+ } else
+ ++MBBI;
+ } while (MBBI != MBBE && MBBI->isDebugValue());
+ }
+ }
+ return Changed;
+}
+
+void UserValue::extendDef(SlotIndex Idx, unsigned LocNo,
+ LiveInterval *LI, const VNInfo *VNI,
+ LiveIntervals &LIS, MachineDominatorTree &MDT) {
+ SmallVector<SlotIndex, 16> Todo;
+ Todo.push_back(Idx);
+
+ do {
+ SlotIndex Start = Todo.pop_back_val();
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex Stop = LIS.getMBBEndIdx(MBB);
+ LocMap::iterator I = locInts.find(Start);
+
+ // Limit to VNI's live range.
+ bool ToEnd = true;
+ if (LI && VNI) {
+ LiveRange *Range = LI->getLiveRangeContaining(Start);
+ if (!Range || Range->valno != VNI)
+ continue;
+ if (Range->end < Stop)
+ Stop = Range->end, ToEnd = false;
+ }
+
+ // There could already be a short def at Start.
+ if (I.valid() && I.start() <= Start) {
+ // Stop when meeting a different location or an already extended interval.
+ Start = Start.getNextSlot();
+ if (I.value() != LocNo || I.stop() != Start)
+ continue;
+ // This is a one-slot placeholder. Just skip it.
+ ++I;
+ }
+
+ // Limited by the next def.
+ if (I.valid() && I.start() < Stop)
+ Stop = I.start(), ToEnd = false;
+
+ if (Start >= Stop)
+ continue;
+
+ I.insert(Start, Stop, LocNo);
+
+ // If we extended to the MBB end, propagate down the dominator tree.
+ if (!ToEnd)
+ continue;
+ const std::vector<MachineDomTreeNode*> &Children =
+ MDT.getNode(MBB)->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ Todo.push_back(LIS.getMBBStartIdx(Children[i]->getBlock()));
+ } while (!Todo.empty());
+}
+
+void
+UserValue::computeIntervals(LiveIntervals &LIS, MachineDominatorTree &MDT) {
+ SmallVector<std::pair<SlotIndex, unsigned>, 16> Defs;
+
+ // Collect all defs to be extended (Skipping undefs).
+ for (LocMap::const_iterator I = locInts.begin(); I.valid(); ++I)
+ if (I.value() != ~0u)
+ Defs.push_back(std::make_pair(I.start(), I.value()));
+
+ for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
+ SlotIndex Idx = Defs[i].first;
+ unsigned LocNo = Defs[i].second;
+ const MachineOperand &Loc = locations[LocNo];
+
+ // Register locations are constrained to where the register value is live.
+ if (Loc.isReg() && LIS.hasInterval(Loc.getReg())) {
+ LiveInterval *LI = &LIS.getInterval(Loc.getReg());
+ const VNInfo *VNI = LI->getVNInfoAt(Idx);
+ extendDef(Idx, LocNo, LI, VNI, LIS, MDT);
+ } else
+ extendDef(Idx, LocNo, 0, 0, LIS, MDT);
+ }
+
+ // Finally, erase all the undefs.
+ for (LocMap::iterator I = locInts.begin(); I.valid();)
+ if (I.value() == ~0u)
+ I.erase();
+ else
+ ++I;
+}
+
+void LDVImpl::computeIntervals() {
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i)
+ userValues[i]->computeIntervals(*LIS, *MDT);
+}
+
+bool LDVImpl::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ LIS = &pass.getAnalysis<LiveIntervals>();
+ MDT = &pass.getAnalysis<MachineDominatorTree>();
+ TRI = mf.getTarget().getRegisterInfo();
+ clear();
+ DEBUG(dbgs() << "********** COMPUTING LIVE DEBUG VARIABLES: "
+ << ((Value*)mf.getFunction())->getName()
+ << " **********\n");
+
+ bool Changed = collectDebugValues(mf);
+ computeIntervals();
+ DEBUG(print(dbgs()));
+ return Changed;
+}
+
+bool LiveDebugVariables::runOnMachineFunction(MachineFunction &mf) {
+ if (!EnableLDV)
+ return false;
+ if (!pImpl)
+ pImpl = new LDVImpl(this);
+ return static_cast<LDVImpl*>(pImpl)->runOnMachineFunction(mf);
+}
+
+void LiveDebugVariables::releaseMemory() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->clear();
+}
+
+LiveDebugVariables::~LiveDebugVariables() {
+ if (pImpl)
+ delete static_cast<LDVImpl*>(pImpl);
+}
+
+void UserValue::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx,
+ const TargetRegisterInfo *TRI) {
+ for (unsigned i = locations.size(); i; --i) {
+ unsigned LocNo = i - 1;
+ MachineOperand &Loc = locations[LocNo];
+ if (!Loc.isReg() || Loc.getReg() != OldReg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(NewReg))
+ Loc.substPhysReg(NewReg, *TRI);
+ else
+ Loc.substVirtReg(NewReg, SubIdx, *TRI);
+ coalesceLocation(LocNo);
+ }
+}
+
+void LDVImpl::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+ UserValue *UV = lookupVirtReg(OldReg);
+ if (!UV)
+ return;
+
+ if (TargetRegisterInfo::isVirtualRegister(NewReg))
+ mapVirtReg(NewReg, UV);
+ virtRegToEqClass.erase(OldReg);
+
+ do {
+ UV->renameRegister(OldReg, NewReg, SubIdx, TRI);
+ UV = UV->getNext();
+ } while (UV);
+}
+
+void LiveDebugVariables::
+renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->renameRegister(OldReg, NewReg, SubIdx);
+}
+
+void
+UserValue::rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI) {
+ // Iterate over locations in reverse makes it easier to handle coalescing.
+ for (unsigned i = locations.size(); i ; --i) {
+ unsigned LocNo = i-1;
+ MachineOperand &Loc = locations[LocNo];
+ // Only virtual registers are rewritten.
+ if (!Loc.isReg() || !Loc.getReg() ||
+ !TargetRegisterInfo::isVirtualRegister(Loc.getReg()))
+ continue;
+ unsigned VirtReg = Loc.getReg();
+ if (VRM.isAssignedReg(VirtReg) &&
+ TargetRegisterInfo::isPhysicalRegister(VRM.getPhys(VirtReg))) {
+ Loc.substPhysReg(VRM.getPhys(VirtReg), TRI);
+ } else if (VRM.getStackSlot(VirtReg) != VirtRegMap::NO_STACK_SLOT &&
+ VRM.isSpillSlotUsed(VRM.getStackSlot(VirtReg))) {
+ // FIXME: Translate SubIdx to a stackslot offset.
+ Loc = MachineOperand::CreateFI(VRM.getStackSlot(VirtReg));
+ } else {
+ Loc.setReg(0);
+ Loc.setSubReg(0);
+ }
+ coalesceLocation(LocNo);
+ }
+ DEBUG(print(dbgs(), &TRI));
+}
+
+/// findInsertLocation - Find an iterator for inserting a DBG_VALUE
+/// instruction.
+static MachineBasicBlock::iterator
+findInsertLocation(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS) {
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ Idx = Idx.getBaseIndex();
+
+ // Try to find an insert location by going backwards from Idx.
+ MachineInstr *MI;
+ while (!(MI = LIS.getInstructionFromIndex(Idx))) {
+ // We've reached the beginning of MBB.
+ if (Idx == Start) {
+ MachineBasicBlock::iterator I = MBB->SkipPHIsAndLabels(MBB->begin());
+ return I;
+ }
+ Idx = Idx.getPrevIndex();
+ }
+
+ // Don't insert anything after the first terminator, though.
+ return MI->getDesc().isTerminator() ? MBB->getFirstTerminator() :
+ llvm::next(MachineBasicBlock::iterator(MI));
+}
+
+DebugLoc UserValue::findDebugLoc() {
+ DebugLoc D = dl;
+ dl = DebugLoc();
+ return D;
+}
+void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx,
+ unsigned LocNo,
+ LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ MachineOperand &Loc = locations[LocNo];
+
+ // Frame index locations may require a target callback.
+ if (Loc.isFI()) {
+ MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(),
+ Loc.getIndex(), offset, variable,
+ findDebugLoc());
+ if (MI) {
+ MBB->insert(I, MI);
+ return;
+ }
+ }
+ // This is not a frame index, or the target is happy with a standard FI.
+ BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE))
+ .addOperand(Loc).addImm(offset).addMetadata(variable);
+}
+
+void UserValue::insertDebugKill(MachineBasicBlock *MBB, SlotIndex Idx,
+ LiveIntervals &LIS, const TargetInstrInfo &TII) {
+ MachineBasicBlock::iterator I = findInsertLocation(MBB, Idx, LIS);
+ BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)).addReg(0)
+ .addImm(offset).addMetadata(variable);
+}
+
+void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS,
+ const TargetInstrInfo &TII) {
+ MachineFunction::iterator MFEnd = VRM->getMachineFunction().end();
+
+ for (LocMap::const_iterator I = locInts.begin(); I.valid();) {
+ SlotIndex Start = I.start();
+ SlotIndex Stop = I.stop();
+ unsigned LocNo = I.value();
+ DEBUG(dbgs() << "\t[" << Start << ';' << Stop << "):" << LocNo);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ SlotIndex MBBEnd = LIS.getMBBEndIdx(MBB);
+
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+
+ // This interval may span multiple basic blocks.
+ // Insert a DBG_VALUE into each one.
+ while(Stop > MBBEnd) {
+ // Move to the next block.
+ Start = MBBEnd;
+ if (++MBB == MFEnd)
+ break;
+ MBBEnd = LIS.getMBBEndIdx(MBB);
+ DEBUG(dbgs() << " BB#" << MBB->getNumber() << '-' << MBBEnd);
+ insertDebugValue(MBB, Start, LocNo, LIS, TII);
+ }
+ DEBUG(dbgs() << '\n');
+ if (MBB == MFEnd)
+ break;
+
+ ++I;
+ if (Stop == MBBEnd)
+ continue;
+ // The current interval ends before MBB.
+ // Insert a kill if there is a gap.
+ if (!I.valid() || I.start() > Stop)
+ insertDebugKill(MBB, Stop, LIS, TII);
+ }
+}
+
+void LDVImpl::emitDebugValues(VirtRegMap *VRM) {
+ DEBUG(dbgs() << "********** EMITTING LIVE DEBUG VARIABLES **********\n");
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ for (unsigned i = 0, e = userValues.size(); i != e; ++i) {
+ userValues[i]->rewriteLocations(*VRM, *TRI);
+ userValues[i]->emitDebugValues(VRM, *LIS, *TII);
+ }
+}
+
+void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->emitDebugValues(VRM);
+}
+
+
+#ifndef NDEBUG
+void LiveDebugVariables::dump() {
+ if (pImpl)
+ static_cast<LDVImpl*>(pImpl)->print(dbgs());
+}
+#endif
+
diff --git a/lib/CodeGen/LiveDebugVariables.h b/lib/CodeGen/LiveDebugVariables.h
new file mode 100644
index 000000000000..a6e40a198456
--- /dev/null
+++ b/lib/CodeGen/LiveDebugVariables.h
@@ -0,0 +1,63 @@
+//===- LiveDebugVariables.h - Tracking debug info variables ----*- c++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface to the LiveDebugVariables analysis.
+//
+// The analysis removes DBG_VALUE instructions for virtual registers and tracks
+// live user variables in a data structure that can be updated during register
+// allocation.
+//
+// After register allocation new DBG_VALUE instructions are emitted to reflect
+// the new locations of user variables.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+#define LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class VirtRegMap;
+
+class LiveDebugVariables : public MachineFunctionPass {
+ void *pImpl;
+public:
+ static char ID; // Pass identification, replacement for typeid
+
+ LiveDebugVariables();
+ ~LiveDebugVariables();
+
+ /// renameRegister - Move any user variables in OldReg to NewReg:SubIdx.
+ /// @param OldReg Old virtual register that is going away.
+ /// @param NewReg New register holding the user variables.
+ /// @param SubIdx If NewReg is a virtual register, SubIdx may indicate a sub-
+ /// register.
+ void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx);
+
+ /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes
+ /// that happened during register allocation.
+ /// @param VRM Rename virtual registers according to map.
+ void emitDebugValues(VirtRegMap *VRM);
+
+ /// dump - Print data structures to dbgs().
+ void dump();
+
+private:
+
+ virtual bool runOnMachineFunction(MachineFunction &);
+ virtual void releaseMemory();
+ virtual void getAnalysisUsage(AnalysisUsage &) const;
+
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_LIVEDEBUGVARIABLES_H
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 59f380ad2641..c2dbd6ab75a1 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -30,58 +30,19 @@
#include <algorithm>
using namespace llvm;
-// An example for liveAt():
-//
-// this = [1,4), liveAt(0) will return false. The instruction defining this
-// spans slots [0,3]. The interval belongs to an spilled definition of the
-// variable it represents. This is because slot 1 is used (def slot) and spans
-// up to slot 3 (store slot).
-//
-bool LiveInterval::liveAt(SlotIndex I) const {
- Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
- if (r == ranges.begin())
- return false;
-
- --r;
- return r->contains(I);
-}
-
-// liveBeforeAndAt - Check if the interval is live at the index and the index
-// just before it. If index is liveAt, check if it starts a new live range.
-// If it does, then check if the previous live range ends at index-1.
-bool LiveInterval::liveBeforeAndAt(SlotIndex I) const {
- Ranges::const_iterator r = std::upper_bound(ranges.begin(), ranges.end(), I);
-
- if (r == ranges.begin())
- return false;
-
- --r;
- if (!r->contains(I))
- return false;
- if (I != r->start)
- return true;
- // I is the start of a live range. Check if the previous live range ends
- // at I-1.
- if (r == ranges.begin())
- return false;
- return r->end == I;
+// CompEnd - Compare LiveRange ends.
+namespace {
+struct CompEnd {
+ bool operator()(const LiveRange &A, const LiveRange &B) const {
+ return A.end < B.end;
+ }
+};
}
-/// killedAt - Return true if a live range ends at index. Note that the kill
-/// point is not contained in the half-open live range. It is usually the
-/// getDefIndex() slot following its last use.
-bool LiveInterval::killedAt(SlotIndex I) const {
- Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I);
-
- // Now r points to the first interval with start >= I, or ranges.end().
- if (r == ranges.begin())
- return false;
-
- --r;
- // Now r points to the last interval with end <= I.
- // r->end is the kill point.
- return r->end == I;
+LiveInterval::iterator LiveInterval::find(SlotIndex Pos) {
+ assert(Pos.isValid() && "Cannot search for an invalid index");
+ return std::upper_bound(begin(), end(), LiveRange(SlotIndex(), Pos, 0),
+ CompEnd());
}
/// killedInRange - Return true if the interval has kills in [Start,End).
@@ -330,25 +291,14 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) {
return ranges.insert(it, LR);
}
-/// isInOneLiveRange - Return true if the range specified is entirely in
-/// a single LiveRange of the live interval.
-bool LiveInterval::isInOneLiveRange(SlotIndex Start, SlotIndex End) {
- Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
- if (I == ranges.begin())
- return false;
- --I;
- return I->containsRange(Start, End);
-}
-
/// removeRange - Remove the specified range from this interval. Note that
/// the range must be in a single LiveRange in its entirety.
void LiveInterval::removeRange(SlotIndex Start, SlotIndex End,
bool RemoveDeadValNo) {
// Find the LiveRange containing this span.
- Ranges::iterator I = std::upper_bound(ranges.begin(), ranges.end(), Start);
- assert(I != ranges.begin() && "Range is not in interval!");
- --I;
+ Ranges::iterator I = find(Start);
+ assert(I != ranges.end() && "Range is not in interval!");
assert(I->containsRange(Start, End) && "Range is not entirely in interval!");
// If the span we are removing is at the start of the LiveRange, adjust it.
@@ -405,32 +355,6 @@ void LiveInterval::removeValNo(VNInfo *ValNo) {
markValNoForDeletion(ValNo);
}
-/// getLiveRangeContaining - Return the live range that contains the
-/// specified index, or null if there is none.
-LiveInterval::const_iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) const {
- const_iterator It = std::upper_bound(begin(), end(), Idx);
- if (It != ranges.begin()) {
- --It;
- if (It->contains(Idx))
- return It;
- }
-
- return end();
-}
-
-LiveInterval::iterator
-LiveInterval::FindLiveRangeContaining(SlotIndex Idx) {
- iterator It = std::upper_bound(begin(), end(), Idx);
- if (It != begin()) {
- --It;
- if (It->contains(Idx))
- return It;
- }
-
- return end();
-}
-
/// findDefinedVNInfo - Find the VNInfo defined by the specified
/// index (register interval).
VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
@@ -443,17 +367,6 @@ VNInfo *LiveInterval::findDefinedVNInfoForRegInt(SlotIndex Idx) const {
return 0;
}
-/// findDefinedVNInfo - Find the VNInfo defined by the specified
-/// register (stack inteval).
-VNInfo *LiveInterval::findDefinedVNInfoForStackInt(unsigned reg) const {
- for (LiveInterval::const_vni_iterator i = vni_begin(), e = vni_end();
- i != e; ++i) {
- if ((*i)->getReg() == reg)
- return *i;
- }
- return 0;
-}
-
/// join - Join two live intervals (this, and other) together. This applies
/// mappings to the value numbers in the LHS/RHS intervals as specified. If
/// the intervals are not joinable, this aborts.
@@ -616,103 +529,6 @@ void LiveInterval::MergeValueInAsValue(
}
-/// MergeInClobberRanges - For any live ranges that are not defined in the
-/// current interval, but are defined in the Clobbers interval, mark them
-/// used with an unknown definition value.
-void LiveInterval::MergeInClobberRanges(LiveIntervals &li_,
- const LiveInterval &Clobbers,
- VNInfo::Allocator &VNInfoAllocator) {
- if (Clobbers.empty()) return;
-
- DenseMap<VNInfo*, VNInfo*> ValNoMaps;
- VNInfo *UnusedValNo = 0;
- iterator IP = begin();
- for (const_iterator I = Clobbers.begin(), E = Clobbers.end(); I != E; ++I) {
- // For every val# in the Clobbers interval, create a new "unknown" val#.
- VNInfo *ClobberValNo = 0;
- DenseMap<VNInfo*, VNInfo*>::iterator VI = ValNoMaps.find(I->valno);
- if (VI != ValNoMaps.end())
- ClobberValNo = VI->second;
- else if (UnusedValNo)
- ClobberValNo = UnusedValNo;
- else {
- UnusedValNo = ClobberValNo =
- getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
- ValNoMaps.insert(std::make_pair(I->valno, ClobberValNo));
- }
-
- bool Done = false;
- SlotIndex Start = I->start, End = I->end;
- // If a clobber range starts before an existing range and ends after
- // it, the clobber range will need to be split into multiple ranges.
- // Loop until the entire clobber range is handled.
- while (!Done) {
- Done = true;
- IP = std::upper_bound(IP, end(), Start);
- SlotIndex SubRangeStart = Start;
- SlotIndex SubRangeEnd = End;
-
- // If the start of this range overlaps with an existing liverange, trim it.
- if (IP != begin() && IP[-1].end > SubRangeStart) {
- SubRangeStart = IP[-1].end;
- // Trimmed away the whole range?
- if (SubRangeStart >= SubRangeEnd) continue;
- }
- // If the end of this range overlaps with an existing liverange, trim it.
- if (IP != end() && SubRangeEnd > IP->start) {
- // If the clobber live range extends beyond the existing live range,
- // it'll need at least another live range, so set the flag to keep
- // iterating.
- if (SubRangeEnd > IP->end) {
- Start = IP->end;
- Done = false;
- }
- SubRangeEnd = IP->start;
- // If this trimmed away the whole range, ignore it.
- if (SubRangeStart == SubRangeEnd) continue;
- }
-
- // Insert the clobber interval.
- IP = addRangeFrom(LiveRange(SubRangeStart, SubRangeEnd, ClobberValNo),
- IP);
- UnusedValNo = 0;
- }
- }
-
- if (UnusedValNo) {
- // Delete the last unused val#.
- valnos.pop_back();
- }
-}
-
-void LiveInterval::MergeInClobberRange(LiveIntervals &li_,
- SlotIndex Start,
- SlotIndex End,
- VNInfo::Allocator &VNInfoAllocator) {
- // Find a value # to use for the clobber ranges. If there is already a value#
- // for unknown values, use it.
- VNInfo *ClobberValNo =
- getNextValue(li_.getInvalidIndex(), 0, false, VNInfoAllocator);
-
- iterator IP = begin();
- IP = std::upper_bound(IP, end(), Start);
-
- // If the start of this range overlaps with an existing liverange, trim it.
- if (IP != begin() && IP[-1].end > Start) {
- Start = IP[-1].end;
- // Trimmed away the whole range?
- if (Start >= End) return;
- }
- // If the end of this range overlaps with an existing liverange, trim it.
- if (IP != end() && End > IP->start) {
- End = IP->start;
- // If this trimmed away the whole range, ignore it.
- if (Start == End) return;
- }
-
- // Insert the clobber interval.
- addRangeFrom(LiveRange(Start, End, ClobberValNo), IP);
-}
/// MergeValueNumberInto - This method is called when two value nubmers
/// are found to be equivalent. This eliminates V1, replacing all
@@ -767,6 +583,9 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) {
}
}
+ // Merge the relevant flags.
+ V2->mergeFlags(V1);
+
// Now that V1 is dead, remove it.
markValNoForDeletion(V1);
@@ -831,14 +650,9 @@ void LiveRange::dump() const {
}
void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
- if (isStackSlot())
- OS << "SS#" << getStackSlotIndex();
- else if (TRI && TargetRegisterInfo::isPhysicalRegister(reg))
- OS << TRI->getName(reg);
- else
- OS << "%reg" << reg;
-
- OS << ',' << weight;
+ OS << PrintReg(reg, TRI);
+ if (weight != 0)
+ OS << ',' << weight;
if (empty())
OS << " EMPTY";
@@ -863,10 +677,9 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
if (vni->isUnused()) {
OS << "x";
} else {
- if (!vni->isDefAccurate() && !vni->isPHIDef())
- OS << "?";
- else
- OS << vni->def;
+ OS << vni->def;
+ if (vni->isPHIDef())
+ OS << "-phidef";
if (vni->hasPHIKill())
OS << "-phikill";
if (vni->hasRedefByEC())
@@ -884,3 +697,84 @@ void LiveInterval::dump() const {
void LiveRange::print(raw_ostream &os) const {
os << *this;
}
+
+unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) {
+ // Create initial equivalence classes.
+ eqClass_.clear();
+ eqClass_.grow(LI->getNumValNums());
+
+ const VNInfo *used = 0, *unused = 0;
+
+ // Determine connections.
+ for (LiveInterval::const_vni_iterator I = LI->vni_begin(), E = LI->vni_end();
+ I != E; ++I) {
+ const VNInfo *VNI = *I;
+ // Group all unused values into one class.
+ if (VNI->isUnused()) {
+ if (unused)
+ eqClass_.join(unused->id, VNI->id);
+ unused = VNI;
+ continue;
+ }
+ used = VNI;
+ if (VNI->isPHIDef()) {
+ const MachineBasicBlock *MBB = lis_.getMBBFromIndex(VNI->def);
+ assert(MBB && "Phi-def has no defining MBB");
+ // Connect to values live out of predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI)
+ if (const VNInfo *PVNI =
+ LI->getVNInfoAt(lis_.getMBBEndIdx(*PI).getPrevSlot()))
+ eqClass_.join(VNI->id, PVNI->id);
+ } else {
+ // Normal value defined by an instruction. Check for two-addr redef.
+ // FIXME: This could be coincidental. Should we really check for a tied
+ // operand constraint?
+ // Note that VNI->def may be a use slot for an early clobber def.
+ if (const VNInfo *UVNI = LI->getVNInfoAt(VNI->def.getPrevSlot()))
+ eqClass_.join(VNI->id, UVNI->id);
+ }
+ }
+
+ // Lump all the unused values in with the last used value.
+ if (used && unused)
+ eqClass_.join(used->id, unused->id);
+
+ eqClass_.compress();
+ return eqClass_.getNumClasses();
+}
+
+void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[]) {
+ assert(LIV[0] && "LIV[0] must be set");
+ LiveInterval &LI = *LIV[0];
+
+ // First move runs to new intervals.
+ LiveInterval::iterator J = LI.begin(), E = LI.end();
+ while (J != E && eqClass_[J->valno->id] == 0)
+ ++J;
+ for (LiveInterval::iterator I = J; I != E; ++I) {
+ if (unsigned eq = eqClass_[I->valno->id]) {
+ assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) &&
+ "New intervals should be empty");
+ LIV[eq]->ranges.push_back(*I);
+ } else
+ *J++ = *I;
+ }
+ LI.ranges.erase(J, E);
+
+ // Transfer VNInfos to their new owners and renumber them.
+ unsigned j = 0, e = LI.getNumValNums();
+ while (j != e && eqClass_[j] == 0)
+ ++j;
+ for (unsigned i = j; i != e; ++i) {
+ VNInfo *VNI = LI.getValNumInfo(i);
+ if (unsigned eq = eqClass_[i]) {
+ VNI->id = LIV[eq]->getNumValNums();
+ LIV[eq]->valnos.push_back(VNI);
+ } else {
+ VNI->id = j;
+ LI.valnos[j++] = VNI;
+ }
+ }
+ LI.valnos.resize(j);
+}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 2726fc337539..aef5b5f77e78 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -20,6 +20,7 @@
#include "VirtRegMap.h"
#include "llvm/Value.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -55,8 +56,17 @@ STATISTIC(numFolds , "Number of loads/stores folded into instructions");
STATISTIC(numSplits , "Number of intervals split");
char LiveIntervals::ID = 0;
-INITIALIZE_PASS(LiveIntervals, "liveintervals",
- "Live Interval Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_PASS_DEPENDENCY(ProcessImplicitDefs)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(LiveIntervals, "liveintervals",
+ "Live Interval Analysis", false, false)
void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -132,19 +142,7 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const {
void LiveIntervals::printInstrs(raw_ostream &OS) const {
OS << "********** MACHINEINSTRS **********\n";
-
- for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
- mbbi != mbbe; ++mbbi) {
- OS << "BB#" << mbbi->getNumber()
- << ":\t\t# derived from " << mbbi->getName() << "\n";
- for (MachineBasicBlock::iterator mii = mbbi->begin(),
- mie = mbbi->end(); mii != mie; ++mii) {
- if (mii->isDebugValue())
- OS << " \t" << *mii;
- else
- OS << getInstructionIndex(mii) << '\t' << *mii;
- }
- }
+ mf_->print(OS, indexes_);
}
void LiveIntervals::dumpInstrs() const {
@@ -248,15 +246,6 @@ bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg,
return false;
}
-#ifndef NDEBUG
-static void printRegName(unsigned reg, const TargetRegisterInfo* tri_) {
- if (TargetRegisterInfo::isPhysicalRegister(reg))
- dbgs() << tri_->getName(reg);
- else
- dbgs() << "%reg" << reg;
-}
-#endif
-
static
bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) {
unsigned Reg = MI.getOperand(MOIdx).getReg();
@@ -285,8 +274,8 @@ bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO,
SlotIndex RedefIndex = MIIdx.getDefIndex();
const LiveRange *OldLR =
interval.getLiveRangeContaining(RedefIndex.getUseIndex());
- if (OldLR->valno->isDefAccurate()) {
- MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+ MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def);
+ if (DefMI != 0) {
return DefMI->findRegisterDefOperandIdx(interval.reg) != -1;
}
return false;
@@ -298,10 +287,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
MachineOperand& MO,
unsigned MOIdx,
LiveInterval &interval) {
- DEBUG({
- dbgs() << "\t\tregister: ";
- printRegName(interval.reg, tri_);
- });
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
// Virtual registers may be defined multiple times (due to phi
// elimination and 2-addr elimination). Much of what we do only has to be
@@ -326,8 +312,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
CopyMI = mi;
}
- VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true,
- VNInfoAllocator);
+ VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
assert(ValNo->id == 0 && "First value in interval is not 0?");
// Loop over all of the blocks that the vreg is defined in. There are
@@ -393,8 +378,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// Create interval with one of a NEW value number. Note that this value
// number isn't actually defined by an instruction, weird huh? :)
if (PHIJoin) {
- ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false,
- VNInfoAllocator);
+ assert(getInstructionFromIndex(Start) == 0 &&
+ "PHI def index points at actual instruction.");
+ ValNo = interval.getNextValue(Start, 0, VNInfoAllocator);
ValNo->setIsPHIDef(true);
}
LiveRange LR(Start, killIdx, ValNo);
@@ -440,10 +426,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// The new value number (#1) is defined by the instruction we claimed
// defined value #0.
- VNInfo *ValNo = interval.getNextValue(OldValNo->def, OldValNo->getCopy(),
- false, // update at *
- VNInfoAllocator);
- ValNo->setFlags(OldValNo->getFlags()); // * <- updating here
+ VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator);
// Value#0 is now defined by the 2-addr instruction.
OldValNo->def = RedefIndex;
@@ -481,7 +464,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
MachineInstr *CopyMI = NULL;
if (mi->isCopyLike())
CopyMI = mi;
- ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator);
+ ValNo = interval.getNextValue(defIndex, CopyMI, VNInfoAllocator);
SlotIndex killIndex = getMBBEndIdx(mbb);
LiveRange LR(defIndex, killIndex, ValNo);
@@ -504,10 +487,7 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
MachineInstr *CopyMI) {
// A physical register cannot be live across basic block, so its
// lifetime must end somewhere in its defining basic block.
- DEBUG({
- dbgs() << "\t\tregister: ";
- printRegName(interval.reg, tri_);
- });
+ DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, tri_));
SlotIndex baseIndex = MIIdx;
SlotIndex start = baseIndex.getDefIndex();
@@ -573,11 +553,11 @@ exit:
assert(start < end && "did not find end of interval?");
// Already exists? Extend old live interval.
- LiveInterval::iterator OldLR = interval.FindLiveRangeContaining(start);
- bool Extend = OldLR != interval.end();
- VNInfo *ValNo = Extend
- ? OldLR->valno : interval.getNextValue(start, CopyMI, true, VNInfoAllocator);
- if (MO.isEarlyClobber() && Extend)
+ VNInfo *ValNo = interval.getVNInfoAt(start);
+ bool Extend = ValNo != 0;
+ if (!Extend)
+ ValNo = interval.getNextValue(start, CopyMI, VNInfoAllocator);
+ if (Extend && MO.isEarlyClobber())
ValNo->setHasRedefByEC(true);
LiveRange LR(start, end, ValNo);
interval.addRange(LR);
@@ -611,10 +591,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB,
void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
SlotIndex MIIdx,
LiveInterval &interval, bool isAlias) {
- DEBUG({
- dbgs() << "\t\tlivein register: ";
- printRegName(interval.reg, tri_);
- });
+ DEBUG(dbgs() << "\t\tlivein register: " << PrintReg(interval.reg, tri_));
// Look for kills, if it reaches a def before it's killed, then it shouldn't
// be considered a livein.
@@ -672,9 +649,11 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
}
}
+ SlotIndex defIdx = getMBBStartIdx(MBB);
+ assert(getInstructionFromIndex(defIdx) == 0 &&
+ "PHI def index points at actual instruction.");
VNInfo *vni =
- interval.getNextValue(SlotIndex(getMBBStartIdx(MBB), true),
- 0, false, VNInfoAllocator);
+ interval.getNextValue(defIdx, 0, VNInfoAllocator);
vni->setIsPHIDef(true);
LiveRange LR(start, end, vni);
@@ -764,10 +743,177 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) {
return NewLI;
}
+/// shrinkToUses - After removing some uses of a register, shrink its live
+/// range to just the remaining uses. This method does not compute reaching
+/// defs for new uses, and it doesn't remove dead defs.
+void LiveIntervals::shrinkToUses(LiveInterval *li) {
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+ assert(TargetRegisterInfo::isVirtualRegister(li->reg)
+ && "Can't only shrink physical registers");
+ // Find all the values used, including PHI kills.
+ SmallVector<std::pair<SlotIndex, VNInfo*>, 16> WorkList;
+
+ // Visit all instructions reading li->reg.
+ for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li->reg);
+ MachineInstr *UseMI = I.skipInstruction();) {
+ if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg))
+ continue;
+ SlotIndex Idx = getInstructionIndex(UseMI).getUseIndex();
+ VNInfo *VNI = li->getVNInfoAt(Idx);
+ assert(VNI && "Live interval not live into reading instruction");
+ if (VNI->def == Idx) {
+ // Special case: An early-clobber tied operand reads and writes the
+ // register one slot early.
+ Idx = Idx.getPrevSlot();
+ VNI = li->getVNInfoAt(Idx);
+ assert(VNI && "Early-clobber tied value not available");
+ }
+ WorkList.push_back(std::make_pair(Idx, VNI));
+ }
+
+ // Create a new live interval with only minimal live segments per def.
+ LiveInterval NewLI(li->reg, 0);
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ NewLI.addRange(LiveRange(VNI->def, VNI->def.getNextSlot(), VNI));
+ }
+
+ // Extend intervals to reach all uses in WorkList.
+ while (!WorkList.empty()) {
+ SlotIndex Idx = WorkList.back().first;
+ VNInfo *VNI = WorkList.back().second;
+ WorkList.pop_back();
+
+ // Extend the live range for VNI to be live at Idx.
+ LiveInterval::iterator I = NewLI.find(Idx);
+
+ // Already got it?
+ if (I != NewLI.end() && I->start <= Idx) {
+ assert(I->valno == VNI && "Unexpected existing value number");
+ continue;
+ }
+
+ // Is there already a live range in the block containing Idx?
+ const MachineBasicBlock *MBB = getMBBFromIndex(Idx);
+ SlotIndex BlockStart = getMBBStartIdx(MBB);
+ DEBUG(dbgs() << "Shrink: Use val#" << VNI->id << " at " << Idx
+ << " in BB#" << MBB->getNumber() << '@' << BlockStart);
+ if (I != NewLI.begin() && (--I)->end > BlockStart) {
+ assert(I->valno == VNI && "Wrong reaching def");
+ DEBUG(dbgs() << " extend [" << I->start << ';' << I->end << ")\n");
+ // Is this the first use of a PHIDef in its defining block?
+ if (VNI->isPHIDef() && I->end == VNI->def.getNextSlot()) {
+ // The PHI is live, make sure the predecessors are live-out.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ VNInfo *PVNI = li->getVNInfoAt(Stop);
+ // A predecessor is not required to have a live-out value for a PHI.
+ if (PVNI) {
+ assert(PVNI->hasPHIKill() && "Missing hasPHIKill flag");
+ WorkList.push_back(std::make_pair(Stop, PVNI));
+ }
+ }
+ }
+
+ // Extend the live range in the block to include Idx.
+ NewLI.addRange(LiveRange(I->end, Idx.getNextSlot(), VNI));
+ continue;
+ }
+
+ // VNI is live-in to MBB.
+ DEBUG(dbgs() << " live-in at " << BlockStart << '\n');
+ NewLI.addRange(LiveRange(BlockStart, Idx.getNextSlot(), VNI));
+
+ // Make sure VNI is live-out from the predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex Stop = getMBBEndIdx(*PI).getPrevSlot();
+ assert(li->getVNInfoAt(Stop) == VNI && "Wrong value out of predecessor");
+ WorkList.push_back(std::make_pair(Stop, VNI));
+ }
+ }
+
+ // Handle dead values.
+ for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end();
+ I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def);
+ assert(LII != NewLI.end() && "Missing live range for PHI");
+ if (LII->end != VNI->def.getNextSlot())
+ continue;
+ if (!VNI->isPHIDef()) {
+ // This is a dead PHI. Remove it.
+ VNI->setIsUnused(true);
+ NewLI.removeRange(*LII);
+ } else {
+ // This is a dead def. Make sure the instruction knows.
+ MachineInstr *MI = getInstructionFromIndex(VNI->def);
+ assert(MI && "No instruction defining live value");
+ MI->addRegisterDead(li->reg, tri_);
+ }
+ }
+
+ // Move the trimmed ranges back.
+ li->ranges.swap(NewLI.ranges);
+ DEBUG(dbgs() << "Shrink: " << *li << '\n');
+}
+
+
//===----------------------------------------------------------------------===//
// Register allocator hooks.
//
+MachineBasicBlock::iterator
+LiveIntervals::getLastSplitPoint(const LiveInterval &li,
+ MachineBasicBlock *mbb) const {
+ const MachineBasicBlock *lpad = mbb->getLandingPadSuccessor();
+
+ // If li is not live into a landing pad, we can insert spill code before the
+ // first terminator.
+ if (!lpad || !isLiveInToMBB(li, lpad))
+ return mbb->getFirstTerminator();
+
+ // When there is a landing pad, spill code must go before the call instruction
+ // that can throw.
+ MachineBasicBlock::iterator I = mbb->end(), B = mbb->begin();
+ while (I != B) {
+ --I;
+ if (I->getDesc().isCall())
+ return I;
+ }
+ // The block contains no calls that can throw, so use the first terminator.
+ return mbb->getFirstTerminator();
+}
+
+void LiveIntervals::addKillFlags() {
+ for (iterator I = begin(), E = end(); I != E; ++I) {
+ unsigned Reg = I->first;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (mri_->reg_nodbg_empty(Reg))
+ continue;
+ LiveInterval *LI = I->second;
+
+ // Every instruction that kills Reg corresponds to a live range end point.
+ for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE;
+ ++RI) {
+ // A LOAD index indicates an MBB edge.
+ if (RI->end.isLoad())
+ continue;
+ MachineInstr *MI = getInstructionFromIndex(RI->end);
+ if (!MI)
+ continue;
+ MI->addRegisterKilled(Reg, NULL);
+ }
+ }
+}
+
/// getReMatImplicitUse - If the remat definition MI has one (for now, we only
/// allow one) virtual register operand, then its uses are implicitly using
/// the register. Returns the virtual register.
@@ -800,18 +946,17 @@ unsigned LiveIntervals::getReMatImplicitUse(const LiveInterval &li,
/// which reaches the given instruction also reaches the specified use index.
bool LiveIntervals::isValNoAvailableAt(const LiveInterval &li, MachineInstr *MI,
SlotIndex UseIdx) const {
- SlotIndex Index = getInstructionIndex(MI);
- VNInfo *ValNo = li.FindLiveRangeContaining(Index)->valno;
- LiveInterval::const_iterator UI = li.FindLiveRangeContaining(UseIdx);
- return UI != li.end() && UI->valno == ValNo;
+ VNInfo *UValNo = li.getVNInfoAt(UseIdx);
+ return UValNo && UValNo == li.getVNInfoAt(getInstructionIndex(MI));
}
/// isReMaterializable - Returns true if the definition MI of the specified
/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
- const VNInfo *ValNo, MachineInstr *MI,
- SmallVectorImpl<LiveInterval*> &SpillIs,
- bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const VNInfo *ValNo, MachineInstr *MI,
+ const SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
if (DisableReMat)
return false;
@@ -829,7 +974,7 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
ri != re; ++ri) {
MachineInstr *UseMI = &*ri;
SlotIndex UseIdx = getInstructionIndex(UseMI);
- if (li.FindLiveRangeContaining(UseIdx)->valno != ValNo)
+ if (li.getVNInfoAt(UseIdx) != ValNo)
continue;
if (!isValNoAvailableAt(ImpLi, MI, UseIdx))
return false;
@@ -855,9 +1000,10 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
/// isReMaterializable - Returns true if every definition of MI of every
/// val# of the specified interval is re-materializable.
-bool LiveIntervals::isReMaterializable(const LiveInterval &li,
- SmallVectorImpl<LiveInterval*> &SpillIs,
- bool &isLoad) {
+bool
+LiveIntervals::isReMaterializable(const LiveInterval &li,
+ const SmallVectorImpl<LiveInterval*> &SpillIs,
+ bool &isLoad) {
isLoad = false;
for (LiveInterval::const_vni_iterator i = li.vni_begin(), e = li.vni_end();
i != e; ++i) {
@@ -865,9 +1011,9 @@ bool LiveIntervals::isReMaterializable(const LiveInterval &li,
if (VNI->isUnused())
continue; // Dead val#.
// Is the def for the val# rematerializable?
- if (!VNI->isDefAccurate())
- return false;
MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
+ if (!ReMatDefMI)
+ return false;
bool DefIsLoad = false;
if (!ReMatDefMI ||
!isReMaterializable(li, VNI, ReMatDefMI, SpillIs, DefIsLoad))
@@ -1010,7 +1156,7 @@ void LiveIntervals::rewriteImplicitOps(const LiveInterval &li,
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (!vrm.isReMaterialized(Reg))
continue;
@@ -1044,7 +1190,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
if (!mop.isReg())
continue;
unsigned Reg = mop.getReg();
- if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (Reg != li.reg)
continue;
@@ -1140,11 +1286,14 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
rewriteImplicitOps(li, MI, NewVReg, vrm);
// Reuse NewVReg for other reads.
+ bool HasEarlyClobber = false;
for (unsigned j = 0, e = Ops.size(); j != e; ++j) {
MachineOperand &mopj = MI->getOperand(Ops[j]);
mopj.setReg(NewVReg);
if (mopj.isImplicit())
rewriteImplicitOps(li, MI, NewVReg, vrm);
+ if (mopj.isEarlyClobber())
+ HasEarlyClobber = true;
}
if (CreatedNewVReg) {
@@ -1190,7 +1339,7 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
if (HasUse) {
if (CreatedNewVReg) {
LiveRange LR(index.getLoadIndex(), index.getDefIndex(),
- nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+ nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
DEBUG(dbgs() << " +" << LR);
nI.addRange(LR);
} else {
@@ -1203,8 +1352,12 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI,
}
}
if (HasDef) {
- LiveRange LR(index.getDefIndex(), index.getStoreIndex(),
- nI.getNextValue(SlotIndex(), 0, false, VNInfoAllocator));
+ // An early clobber starts at the use slot, except for an early clobber
+ // tied to a use operand (yes, that is a thing).
+ LiveRange LR(HasEarlyClobber && !HasUse ?
+ index.getUseIndex() : index.getDefIndex(),
+ index.getStoreIndex(),
+ nI.getNextValue(SlotIndex(), 0, VNInfoAllocator));
DEBUG(dbgs() << " +" << LR);
nI.addRange(LR);
}
@@ -1554,15 +1707,15 @@ LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
return (isDef + isUse) * lc;
}
-void
-LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+static void normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
- normalizeSpillWeight(*NewLIs[i]);
+ NewLIs[i]->weight =
+ normalizeSpillWeight(NewLIs[i]->weight, NewLIs[i]->getSize());
}
std::vector<LiveInterval*> LiveIntervals::
addIntervalsForSpills(const LiveInterval &li,
- SmallVectorImpl<LiveInterval*> &SpillIs,
+ const SmallVectorImpl<LiveInterval*> &SpillIs,
const MachineLoopInfo *loopInfo, VirtRegMap &vrm) {
assert(li.isSpillable() && "attempt to spill already spilled interval!");
@@ -1653,8 +1806,7 @@ addIntervalsForSpills(const LiveInterval &li,
if (VNI->isUnused())
continue; // Dead val#.
// Is the def for the val# rematerializable?
- MachineInstr *ReMatDefMI = VNI->isDefAccurate()
- ? getInstructionFromIndex(VNI->def) : 0;
+ MachineInstr *ReMatDefMI = getInstructionFromIndex(VNI->def);
bool dummy;
if (ReMatDefMI && isReMaterializable(li, VNI, ReMatDefMI, SpillIs, dummy)) {
// Remember how to remat the def of this val#.
@@ -1926,6 +2078,9 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
unsigned PhysReg, VirtRegMap &vrm) {
unsigned SpillReg = getRepresentativeReg(PhysReg);
+ DEBUG(dbgs() << "spillPhysRegAroundRegDefsUses " << tri_->getName(PhysReg)
+ << " represented by " << tri_->getName(SpillReg) << '\n');
+
for (const unsigned *AS = tri_->getAliasSet(PhysReg); *AS; ++AS)
// If there are registers which alias PhysReg, but which are not a
// sub-register of the chosen representative super register. Assert
@@ -1937,15 +2092,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
SmallVector<unsigned, 4> PRegs;
if (hasInterval(SpillReg))
PRegs.push_back(SpillReg);
- else {
- SmallSet<unsigned, 4> Added;
- for (const unsigned* AS = tri_->getSubRegisters(SpillReg); *AS; ++AS)
- if (Added.insert(*AS) && hasInterval(*AS)) {
- PRegs.push_back(*AS);
- for (const unsigned* ASS = tri_->getSubRegisters(*AS); *ASS; ++ASS)
- Added.insert(*ASS);
- }
- }
+ for (const unsigned *SR = tri_->getSubRegisters(SpillReg); *SR; ++SR)
+ if (hasInterval(*SR))
+ PRegs.push_back(*SR);
+
+ DEBUG({
+ dbgs() << "Trying to spill:";
+ for (unsigned i = 0, e = PRegs.size(); i != e; ++i)
+ dbgs() << ' ' << tri_->getName(PRegs[i]);
+ dbgs() << '\n';
+ });
SmallPtrSet<MachineInstr*, 8> SeenMIs;
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(li.reg),
@@ -1956,18 +2112,16 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
continue;
SeenMIs.insert(MI);
SlotIndex Index = getInstructionIndex(MI);
+ bool LiveReg = false;
for (unsigned i = 0, e = PRegs.size(); i != e; ++i) {
unsigned PReg = PRegs[i];
LiveInterval &pli = getInterval(PReg);
if (!pli.liveAt(Index))
continue;
- vrm.addEmergencySpill(PReg, MI);
+ LiveReg = true;
SlotIndex StartIdx = Index.getLoadIndex();
SlotIndex EndIdx = Index.getNextIndex().getBaseIndex();
- if (pli.isInOneLiveRange(StartIdx, EndIdx)) {
- pli.removeRange(StartIdx, EndIdx);
- Cut = true;
- } else {
+ if (!pli.isInOneLiveRange(StartIdx, EndIdx)) {
std::string msg;
raw_string_ostream Msg(msg);
Msg << "Ran out of registers during register allocation!";
@@ -1978,15 +2132,14 @@ bool LiveIntervals::spillPhysRegAroundRegDefsUses(const LiveInterval &li,
}
report_fatal_error(Msg.str());
}
- for (const unsigned* AS = tri_->getSubRegisters(PReg); *AS; ++AS) {
- if (!hasInterval(*AS))
- continue;
- LiveInterval &spli = getInterval(*AS);
- if (spli.liveAt(Index))
- spli.removeRange(Index.getLoadIndex(),
- Index.getNextIndex().getBaseIndex());
- }
+ pli.removeRange(StartIdx, EndIdx);
+ LiveReg = true;
}
+ if (!LiveReg)
+ continue;
+ DEBUG(dbgs() << "Emergency spill around " << Index << '\t' << *MI);
+ vrm.addEmergencySpill(SpillReg, MI);
+ Cut = true;
}
return Cut;
}
@@ -1996,7 +2149,7 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg,
LiveInterval& Interval = getOrCreateInterval(reg);
VNInfo* VN = Interval.getNextValue(
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
- startInst, true, getVNInfoAllocator());
+ startInst, getVNInfoAllocator());
VN->setHasPHIKill(true);
LiveRange LR(
SlotIndex(getInstructionIndex(startInst).getDefIndex()),
diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp
new file mode 100644
index 000000000000..205f28a0d65a
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalUnion.cpp
@@ -0,0 +1,315 @@
+//===-- LiveIntervalUnion.cpp - Live interval union data structure --------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion represents a coalesced set of live intervals. This may be
+// used during coalescing to represent a congruence class, or during register
+// allocation to model liveness of a physical register.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "llvm/ADT/SparseBitVector.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+
+// Merge a LiveInterval's segments. Guarantee no overlaps.
+void LiveIntervalUnion::unify(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Insert each of the virtual register's live segments into the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ for (;;) {
+ SegPos.insert(RegPos->start, RegPos->end, &VirtReg);
+ if (++RegPos == RegEnd)
+ return;
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+// Remove a live virtual register's segments from this union.
+void LiveIntervalUnion::extract(LiveInterval &VirtReg) {
+ if (VirtReg.empty())
+ return;
+ ++Tag;
+
+ // Remove each of the virtual register's live segments from the map.
+ LiveInterval::iterator RegPos = VirtReg.begin();
+ LiveInterval::iterator RegEnd = VirtReg.end();
+ SegmentIter SegPos = Segments.find(RegPos->start);
+
+ for (;;) {
+ assert(SegPos.value() == &VirtReg && "Inconsistent LiveInterval");
+ SegPos.erase();
+ if (!SegPos.valid())
+ return;
+
+ // Skip all segments that may have been coalesced.
+ RegPos = VirtReg.advanceTo(RegPos, SegPos.start());
+ if (RegPos == RegEnd)
+ return;
+
+ SegPos.advanceTo(RegPos->start);
+ }
+}
+
+void
+LiveIntervalUnion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
+ OS << "LIU " << PrintReg(RepReg, TRI);
+ if (empty()) {
+ OS << " empty\n";
+ return;
+ }
+ for (LiveSegments::const_iterator SI = Segments.begin(); SI.valid(); ++SI) {
+ OS << " [" << SI.start() << ' ' << SI.stop() << "):"
+ << PrintReg(SI.value()->reg, TRI);
+ }
+ OS << '\n';
+}
+
+void LiveIntervalUnion::InterferenceResult::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) const {
+ OS << '[' << start() << ';' << stop() << "):"
+ << PrintReg(interference()->reg, TRI);
+}
+
+void LiveIntervalUnion::Query::print(raw_ostream &OS,
+ const TargetRegisterInfo *TRI) {
+ OS << "Interferences with ";
+ LiveUnion->print(OS, TRI);
+ InterferenceResult IR = firstInterference();
+ while (isInterference(IR)) {
+ OS << " ";
+ IR.print(OS, TRI);
+ OS << '\n';
+ nextInterference(IR);
+ }
+}
+
+#ifndef NDEBUG
+// Verify the live intervals in this union and add them to the visited set.
+void LiveIntervalUnion::verify(LiveVirtRegBitSet& VisitedVRegs) {
+ for (SegmentIter SI = Segments.begin(); SI.valid(); ++SI)
+ VisitedVRegs.set(SI.value()->reg);
+}
+#endif //!NDEBUG
+
+// Private interface accessed by Query.
+//
+// Find a pair of segments that intersect, one in the live virtual register
+// (LiveInterval), and the other in this LiveIntervalUnion. The caller (Query)
+// is responsible for advancing the LiveIntervalUnion segments to find a
+// "notable" intersection, which requires query-specific logic.
+//
+// This design assumes only a fast mechanism for intersecting a single live
+// virtual register segment with a set of LiveIntervalUnion segments. This may
+// be ok since most virtual registers have very few segments. If we had a data
+// structure that optimizd MxN intersection of segments, then we would bypass
+// the loop that advances within the LiveInterval.
+//
+// If no intersection exists, set VirtRegI = VirtRegEnd, and set SI to the first
+// segment whose start point is greater than LiveInterval's end point.
+//
+// Assumes that segments are sorted by start position in both
+// LiveInterval and LiveSegments.
+void LiveIntervalUnion::Query::findIntersection(InterferenceResult &IR) const {
+ // Search until reaching the end of the LiveUnion segments.
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ if (IR.VirtRegI == VirtRegEnd)
+ return;
+ while (IR.LiveUnionI.valid()) {
+ // Slowly advance the live virtual reg iterator until we surpass the next
+ // segment in LiveUnion.
+ //
+ // Note: If this is ever used for coalescing of fixed registers and we have
+ // a live vreg with thousands of segments, then change this code to use
+ // upperBound instead.
+ IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+ if (IR.VirtRegI == VirtRegEnd)
+ break; // Retain current (nonoverlapping) LiveUnionI
+
+ // VirtRegI may have advanced far beyond LiveUnionI, catch up.
+ IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+
+ // Check if no LiveUnionI exists with VirtRegI->Start < LiveUnionI.end
+ if (!IR.LiveUnionI.valid())
+ break;
+ if (IR.LiveUnionI.start() < IR.VirtRegI->end) {
+ assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+ "upperBound postcondition");
+ break;
+ }
+ }
+ if (!IR.LiveUnionI.valid())
+ IR.VirtRegI = VirtRegEnd;
+}
+
+// Find the first intersection, and cache interference info
+// (retain segment iterators into both VirtReg and LiveUnion).
+const LiveIntervalUnion::InterferenceResult &
+LiveIntervalUnion::Query::firstInterference() {
+ if (CheckedFirstInterference)
+ return FirstInterference;
+ CheckedFirstInterference = true;
+ InterferenceResult &IR = FirstInterference;
+
+ // Quickly skip interference check for empty sets.
+ if (VirtReg->empty() || LiveUnion->empty()) {
+ IR.VirtRegI = VirtReg->end();
+ } else if (VirtReg->beginIndex() < LiveUnion->startIndex()) {
+ // VirtReg starts first, perform double binary search.
+ IR.VirtRegI = VirtReg->find(LiveUnion->startIndex());
+ if (IR.VirtRegI != VirtReg->end())
+ IR.LiveUnionI = LiveUnion->find(IR.VirtRegI->start);
+ } else {
+ // LiveUnion starts first, perform double binary search.
+ IR.LiveUnionI = LiveUnion->find(VirtReg->beginIndex());
+ if (IR.LiveUnionI.valid())
+ IR.VirtRegI = VirtReg->find(IR.LiveUnionI.start());
+ else
+ IR.VirtRegI = VirtReg->end();
+ }
+ findIntersection(FirstInterference);
+ assert((IR.VirtRegI == VirtReg->end() || IR.LiveUnionI.valid())
+ && "Uninitialized iterator");
+ return FirstInterference;
+}
+
+// Treat the result as an iterator and advance to the next interfering pair
+// of segments. This is a plain iterator with no filter.
+bool LiveIntervalUnion::Query::nextInterference(InterferenceResult &IR) const {
+ assert(isInterference(IR) && "iteration past end of interferences");
+
+ // Advance either the VirtReg or LiveUnion segment to ensure that we visit all
+ // unique overlapping pairs.
+ if (IR.VirtRegI->end < IR.LiveUnionI.stop()) {
+ if (++IR.VirtRegI == VirtReg->end())
+ return false;
+ }
+ else {
+ if (!(++IR.LiveUnionI).valid()) {
+ IR.VirtRegI = VirtReg->end();
+ return false;
+ }
+ }
+ // Short-circuit findIntersection() if possible.
+ if (overlap(*IR.VirtRegI, IR.LiveUnionI))
+ return true;
+
+ // Find the next intersection.
+ findIntersection(IR);
+ return isInterference(IR);
+}
+
+// Scan the vector of interfering virtual registers in this union. Assume it's
+// quite small.
+bool LiveIntervalUnion::Query::isSeenInterference(LiveInterval *VirtReg) const {
+ SmallVectorImpl<LiveInterval*>::const_iterator I =
+ std::find(InterferingVRegs.begin(), InterferingVRegs.end(), VirtReg);
+ return I != InterferingVRegs.end();
+}
+
+// Count the number of virtual registers in this union that interfere with this
+// query's live virtual register.
+//
+// The number of times that we either advance IR.VirtRegI or call
+// LiveUnion.upperBound() will be no more than the number of holes in
+// VirtReg. So each invocation of collectInterferingVRegs() takes
+// time proportional to |VirtReg Holes| * time(LiveUnion.upperBound()).
+//
+// For comments on how to speed it up, see Query::findIntersection().
+unsigned LiveIntervalUnion::Query::
+collectInterferingVRegs(unsigned MaxInterferingRegs) {
+ InterferenceResult IR = firstInterference();
+ LiveInterval::iterator VirtRegEnd = VirtReg->end();
+ LiveInterval *RecentInterferingVReg = NULL;
+ if (IR.VirtRegI != VirtRegEnd) while (IR.LiveUnionI.valid()) {
+ // Advance the union's iterator to reach an unseen interfering vreg.
+ do {
+ if (IR.LiveUnionI.value() == RecentInterferingVReg)
+ continue;
+
+ if (!isSeenInterference(IR.LiveUnionI.value()))
+ break;
+
+ // Cache the most recent interfering vreg to bypass isSeenInterference.
+ RecentInterferingVReg = IR.LiveUnionI.value();
+
+ } while ((++IR.LiveUnionI).valid());
+ if (!IR.LiveUnionI.valid())
+ break;
+
+ // Advance the VirtReg iterator until surpassing the next segment in
+ // LiveUnion.
+ IR.VirtRegI = VirtReg->advanceTo(IR.VirtRegI, IR.LiveUnionI.start());
+ if (IR.VirtRegI == VirtRegEnd)
+ break;
+
+ // Check for intersection with the union's segment.
+ if (overlap(*IR.VirtRegI, IR.LiveUnionI)) {
+
+ if (!IR.LiveUnionI.value()->isSpillable())
+ SeenUnspillableVReg = true;
+
+ if (InterferingVRegs.size() == MaxInterferingRegs)
+ // Leave SeenAllInterferences set to false to indicate that at least one
+ // interference exists beyond those we collected.
+ return MaxInterferingRegs;
+
+ InterferingVRegs.push_back(IR.LiveUnionI.value());
+
+ // Cache the most recent interfering vreg to bypass isSeenInterference.
+ RecentInterferingVReg = IR.LiveUnionI.value();
+ ++IR.LiveUnionI;
+ continue;
+ }
+ // VirtRegI may have advanced far beyond LiveUnionI,
+ // do a fast intersection test to "catch up"
+ IR.LiveUnionI.advanceTo(IR.VirtRegI->start);
+ }
+ SeenAllInterferences = true;
+ return InterferingVRegs.size();
+}
+
+bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) {
+ // VirtReg is likely live throughout the loop, so start by checking LIU-Loop
+ // overlaps.
+ IntervalMapOverlaps<LiveIntervalUnion::Map, MachineLoopRange::Map>
+ Overlaps(LiveUnion->getMap(), Loop->getMap());
+ if (!Overlaps.valid())
+ return false;
+
+ // The loop is overlapping an LIU assignment. Check VirtReg as well.
+ LiveInterval::iterator VRI = VirtReg->find(Overlaps.start());
+
+ for (;;) {
+ if (VRI == VirtReg->end())
+ return false;
+ if (VRI->start < Overlaps.stop())
+ return true;
+
+ Overlaps.advanceTo(VRI->start);
+ if (!Overlaps.valid())
+ return false;
+ if (Overlaps.start() < VRI->end)
+ return true;
+
+ VRI = VirtReg->advanceTo(VRI, Overlaps.start());
+ }
+}
diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h
new file mode 100644
index 000000000000..6f9c5f4455e9
--- /dev/null
+++ b/lib/CodeGen/LiveIntervalUnion.h
@@ -0,0 +1,258 @@
+//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// LiveIntervalUnion is a union of live segments across multiple live virtual
+// registers. This may be used during coalescing to represent a congruence
+// class, or during register allocation to model liveness of a physical
+// register.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVEINTERVALUNION
+#define LLVM_CODEGEN_LIVEINTERVALUNION
+
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/CodeGen/LiveInterval.h"
+
+#include <algorithm>
+
+namespace llvm {
+
+class MachineLoopRange;
+class TargetRegisterInfo;
+
+#ifndef NDEBUG
+// forward declaration
+template <unsigned Element> class SparseBitVector;
+typedef SparseBitVector<128> LiveVirtRegBitSet;
+#endif
+
+/// Compare a live virtual register segment to a LiveIntervalUnion segment.
+inline bool
+overlap(const LiveRange &VRSeg,
+ const IntervalMap<SlotIndex, LiveInterval*>::const_iterator &LUSeg) {
+ return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end;
+}
+
+/// Union of live intervals that are strong candidates for coalescing into a
+/// single register (either physical or virtual depending on the context). We
+/// expect the constituent live intervals to be disjoint, although we may
+/// eventually make exceptions to handle value-based interference.
+class LiveIntervalUnion {
+ // A set of live virtual register segments that supports fast insertion,
+ // intersection, and removal.
+ // Mapping SlotIndex intervals to virtual register numbers.
+ typedef IntervalMap<SlotIndex, LiveInterval*> LiveSegments;
+
+public:
+ // SegmentIter can advance to the next segment ordered by starting position
+ // which may belong to a different live virtual register. We also must be able
+ // to reach the current segment's containing virtual register.
+ typedef LiveSegments::iterator SegmentIter;
+
+ // LiveIntervalUnions share an external allocator.
+ typedef LiveSegments::Allocator Allocator;
+
+ class InterferenceResult;
+ class Query;
+
+private:
+ const unsigned RepReg; // representative register number
+ unsigned Tag; // unique tag for current contents.
+ LiveSegments Segments; // union of virtual reg segments
+
+public:
+ LiveIntervalUnion(unsigned r, Allocator &a) : RepReg(r), Tag(0), Segments(a)
+ {}
+
+ // Iterate over all segments in the union of live virtual registers ordered
+ // by their starting position.
+ SegmentIter begin() { return Segments.begin(); }
+ SegmentIter end() { return Segments.end(); }
+ SegmentIter find(SlotIndex x) { return Segments.find(x); }
+ bool empty() const { return Segments.empty(); }
+ SlotIndex startIndex() const { return Segments.start(); }
+
+ // Provide public access to the underlying map to allow overlap iteration.
+ typedef LiveSegments Map;
+ const Map &getMap() { return Segments; }
+
+ /// getTag - Return an opaque tag representing the current state of the union.
+ unsigned getTag() const { return Tag; }
+
+ /// changedSince - Return true if the union change since getTag returned tag.
+ bool changedSince(unsigned tag) const { return tag != Tag; }
+
+ // Add a live virtual register to this union and merge its segments.
+ void unify(LiveInterval &VirtReg);
+
+ // Remove a live virtual register's segments from this union.
+ void extract(LiveInterval &VirtReg);
+
+ // Print union, using TRI to translate register names
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+
+#ifndef NDEBUG
+ // Verify the live intervals in this union and add them to the visited set.
+ void verify(LiveVirtRegBitSet& VisitedVRegs);
+#endif
+
+ /// Cache a single interference test result in the form of two intersecting
+ /// segments. This allows efficiently iterating over the interferences. The
+ /// iteration logic is handled by LiveIntervalUnion::Query which may
+ /// filter interferences depending on the type of query.
+ class InterferenceResult {
+ friend class Query;
+
+ LiveInterval::iterator VirtRegI; // current position in VirtReg
+ SegmentIter LiveUnionI; // current position in LiveUnion
+
+ // Internal ctor.
+ InterferenceResult(LiveInterval::iterator VRegI, SegmentIter UnionI)
+ : VirtRegI(VRegI), LiveUnionI(UnionI) {}
+
+ public:
+ // Public default ctor.
+ InterferenceResult(): VirtRegI(), LiveUnionI() {}
+
+ /// start - Return the start of the current overlap.
+ SlotIndex start() const {
+ return std::max(VirtRegI->start, LiveUnionI.start());
+ }
+
+ /// stop - Return the end of the current overlap.
+ SlotIndex stop() const {
+ return std::min(VirtRegI->end, LiveUnionI.stop());
+ }
+
+ /// interference - Return the register that is interfering here.
+ LiveInterval *interference() const { return LiveUnionI.value(); }
+
+ // Note: this interface provides raw access to the iterators because the
+ // result has no way to tell if it's valid to dereference them.
+
+ // Access the VirtReg segment.
+ LiveInterval::iterator virtRegPos() const { return VirtRegI; }
+
+ // Access the LiveUnion segment.
+ const SegmentIter &liveUnionPos() const { return LiveUnionI; }
+
+ bool operator==(const InterferenceResult &IR) const {
+ return VirtRegI == IR.VirtRegI && LiveUnionI == IR.LiveUnionI;
+ }
+ bool operator!=(const InterferenceResult &IR) const {
+ return !operator==(IR);
+ }
+
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const;
+ };
+
+ /// Query interferences between a single live virtual register and a live
+ /// interval union.
+ class Query {
+ LiveIntervalUnion *LiveUnion;
+ LiveInterval *VirtReg;
+ InterferenceResult FirstInterference;
+ SmallVector<LiveInterval*,4> InterferingVRegs;
+ bool CheckedFirstInterference;
+ bool SeenAllInterferences;
+ bool SeenUnspillableVReg;
+ unsigned Tag;
+
+ public:
+ Query(): LiveUnion(), VirtReg() {}
+
+ Query(LiveInterval *VReg, LiveIntervalUnion *LIU):
+ LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false),
+ SeenAllInterferences(false), SeenUnspillableVReg(false)
+ {}
+
+ void clear() {
+ LiveUnion = NULL;
+ VirtReg = NULL;
+ InterferingVRegs.clear();
+ CheckedFirstInterference = false;
+ SeenAllInterferences = false;
+ SeenUnspillableVReg = false;
+ Tag = 0;
+ }
+
+ void init(LiveInterval *VReg, LiveIntervalUnion *LIU) {
+ assert(VReg && LIU && "Invalid arguments");
+ if (VirtReg == VReg && LiveUnion == LIU && !LIU->changedSince(Tag)) {
+ // Retain cached results, e.g. firstInterference.
+ return;
+ }
+ clear();
+ LiveUnion = LIU;
+ VirtReg = VReg;
+ Tag = LIU->getTag();
+ }
+
+ LiveInterval &virtReg() const {
+ assert(VirtReg && "uninitialized");
+ return *VirtReg;
+ }
+
+ bool isInterference(const InterferenceResult &IR) const {
+ if (IR.VirtRegI != VirtReg->end()) {
+ assert(overlap(*IR.VirtRegI, IR.LiveUnionI) &&
+ "invalid segment iterators");
+ return true;
+ }
+ return false;
+ }
+
+ // Does this live virtual register interfere with the union?
+ bool checkInterference() { return isInterference(firstInterference()); }
+
+ // Get the first pair of interfering segments, or a noninterfering result.
+ // This initializes the firstInterference_ cache.
+ const InterferenceResult &firstInterference();
+
+ // Treat the result as an iterator and advance to the next interfering pair
+ // of segments. Visiting each unique interfering pairs means that the same
+ // VirtReg or LiveUnion segment may be visited multiple times.
+ bool nextInterference(InterferenceResult &IR) const;
+
+ // Count the virtual registers in this union that interfere with this
+ // query's live virtual register, up to maxInterferingRegs.
+ unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX);
+
+ // Was this virtual register visited during collectInterferingVRegs?
+ bool isSeenInterference(LiveInterval *VReg) const;
+
+ // Did collectInterferingVRegs collect all interferences?
+ bool seenAllInterferences() const { return SeenAllInterferences; }
+
+ // Did collectInterferingVRegs encounter an unspillable vreg?
+ bool seenUnspillableVReg() const { return SeenUnspillableVReg; }
+
+ // Vector generated by collectInterferingVRegs.
+ const SmallVectorImpl<LiveInterval*> &interferingVRegs() const {
+ return InterferingVRegs;
+ }
+
+ /// checkLoopInterference - Return true if there is interference overlapping
+ /// Loop.
+ bool checkLoopInterference(MachineLoopRange*);
+
+ void print(raw_ostream &OS, const TargetRegisterInfo *TRI);
+ private:
+ Query(const Query&); // DO NOT IMPLEMENT
+ void operator=(const Query&); // DO NOT IMPLEMENT
+
+ // Private interface for queries
+ void findIntersection(InterferenceResult &IR) const;
+ };
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION)
diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp
new file mode 100644
index 000000000000..3bbda1c2e609
--- /dev/null
+++ b/lib/CodeGen/LiveRangeEdit.cpp
@@ -0,0 +1,129 @@
+//===--- LiveRangeEdit.cpp - Basic tools for editing a register live range --===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//===----------------------------------------------------------------------===//
+
+#include "LiveRangeEdit.h"
+#include "VirtRegMap.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+
+using namespace llvm;
+
+LiveInterval &LiveRangeEdit::create(MachineRegisterInfo &mri,
+ LiveIntervals &lis,
+ VirtRegMap &vrm) {
+ const TargetRegisterClass *RC = mri.getRegClass(getReg());
+ unsigned VReg = mri.createVirtualRegister(RC);
+ vrm.grow();
+ vrm.setIsSplitFromReg(VReg, vrm.getOriginal(getReg()));
+ LiveInterval &li = lis.getOrCreateInterval(VReg);
+ newRegs_.push_back(&li);
+ return li;
+}
+
+void LiveRangeEdit::scanRemattable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ for (LiveInterval::vni_iterator I = parent_.vni_begin(),
+ E = parent_.vni_end(); I != E; ++I) {
+ VNInfo *VNI = *I;
+ if (VNI->isUnused())
+ continue;
+ MachineInstr *DefMI = lis.getInstructionFromIndex(VNI->def);
+ if (!DefMI)
+ continue;
+ if (tii.isTriviallyReMaterializable(DefMI, aa))
+ remattable_.insert(VNI);
+ }
+ scannedRemattable_ = true;
+}
+
+bool LiveRangeEdit::anyRematerializable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa) {
+ if (!scannedRemattable_)
+ scanRemattable(lis, tii, aa);
+ return !remattable_.empty();
+}
+
+/// allUsesAvailableAt - Return true if all registers used by OrigMI at
+/// OrigIdx are also available with the same value at UseIdx.
+bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI,
+ SlotIndex OrigIdx,
+ SlotIndex UseIdx,
+ LiveIntervals &lis) {
+ OrigIdx = OrigIdx.getUseIndex();
+ UseIdx = UseIdx.getUseIndex();
+ for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = OrigMI->getOperand(i);
+ if (!MO.isReg() || !MO.getReg() || MO.getReg() == getReg())
+ continue;
+ // Reserved registers are OK.
+ if (MO.isUndef() || !lis.hasInterval(MO.getReg()))
+ continue;
+ // We don't want to move any defs.
+ if (MO.isDef())
+ return false;
+ // We cannot depend on virtual registers in uselessRegs_.
+ for (unsigned ui = 0, ue = uselessRegs_.size(); ui != ue; ++ui)
+ if (uselessRegs_[ui]->reg == MO.getReg())
+ return false;
+
+ LiveInterval &li = lis.getInterval(MO.getReg());
+ const VNInfo *OVNI = li.getVNInfoAt(OrigIdx);
+ if (!OVNI)
+ continue;
+ if (OVNI != li.getVNInfoAt(UseIdx))
+ return false;
+ }
+ return true;
+}
+
+bool LiveRangeEdit::canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove,
+ LiveIntervals &lis) {
+ assert(scannedRemattable_ && "Call anyRematerializable first");
+
+ // Use scanRemattable info.
+ if (!remattable_.count(RM.ParentVNI))
+ return false;
+
+ // No defining instruction.
+ RM.OrigMI = lis.getInstructionFromIndex(RM.ParentVNI->def);
+ assert(RM.OrigMI && "Defining instruction for remattable value disappeared");
+
+ // If only cheap remats were requested, bail out early.
+ if (cheapAsAMove && !RM.OrigMI->getDesc().isAsCheapAsAMove())
+ return false;
+
+ // Verify that all used registers are available with the same values.
+ if (!allUsesAvailableAt(RM.OrigMI, RM.ParentVNI->def, UseIdx, lis))
+ return false;
+
+ return true;
+}
+
+SlotIndex LiveRangeEdit::rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ const TargetRegisterInfo &tri) {
+ assert(RM.OrigMI && "Invalid remat");
+ tii.reMaterialize(MBB, MI, DestReg, 0, RM.OrigMI, tri);
+ rematted_.insert(RM.ParentVNI);
+ return lis.InsertMachineInstrInMaps(--MI).getDefIndex();
+}
+
diff --git a/lib/CodeGen/LiveRangeEdit.h b/lib/CodeGen/LiveRangeEdit.h
new file mode 100644
index 000000000000..73f69ed63983
--- /dev/null
+++ b/lib/CodeGen/LiveRangeEdit.h
@@ -0,0 +1,135 @@
+//===---- LiveRangeEdit.h - Basic tools for split and spill -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// The LiveRangeEdit class represents changes done to a virtual register when it
+// is spilled or split.
+//
+// The parent register is never changed. Instead, a number of new virtual
+// registers are created and added to the newRegs vector.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_LIVERANGEEDIT_H
+#define LLVM_CODEGEN_LIVERANGEEDIT_H
+
+#include "llvm/CodeGen/LiveInterval.h"
+#include "llvm/ADT/SmallPtrSet.h"
+
+namespace llvm {
+
+class AliasAnalysis;
+class LiveIntervals;
+class MachineRegisterInfo;
+class VirtRegMap;
+
+class LiveRangeEdit {
+ LiveInterval &parent_;
+ SmallVectorImpl<LiveInterval*> &newRegs_;
+ const SmallVectorImpl<LiveInterval*> &uselessRegs_;
+
+ /// firstNew_ - Index of the first register added to newRegs_.
+ const unsigned firstNew_;
+
+ /// scannedRemattable_ - true when remattable values have been identified.
+ bool scannedRemattable_;
+
+ /// remattable_ - Values defined by remattable instructions as identified by
+ /// tii.isTriviallyReMaterializable().
+ SmallPtrSet<VNInfo*,4> remattable_;
+
+ /// rematted_ - Values that were actually rematted, and so need to have their
+ /// live range trimmed or entirely removed.
+ SmallPtrSet<VNInfo*,4> rematted_;
+
+ /// scanRemattable - Identify the parent_ values that may rematerialize.
+ void scanRemattable(LiveIntervals &lis,
+ const TargetInstrInfo &tii,
+ AliasAnalysis *aa);
+
+ /// allUsesAvailableAt - Return true if all registers used by OrigMI at
+ /// OrigIdx are also available with the same value at UseIdx.
+ bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx,
+ SlotIndex UseIdx, LiveIntervals &lis);
+
+public:
+ /// Create a LiveRangeEdit for breaking down parent into smaller pieces.
+ /// @param parent The register being spilled or split.
+ /// @param newRegs List to receive any new registers created. This needn't be
+ /// empty initially, any existing registers are ignored.
+ /// @param uselessRegs List of registers that can't be used when
+ /// rematerializing values because they are about to be removed.
+ LiveRangeEdit(LiveInterval &parent,
+ SmallVectorImpl<LiveInterval*> &newRegs,
+ const SmallVectorImpl<LiveInterval*> &uselessRegs)
+ : parent_(parent), newRegs_(newRegs), uselessRegs_(uselessRegs),
+ firstNew_(newRegs.size()), scannedRemattable_(false) {}
+
+ LiveInterval &getParent() const { return parent_; }
+ unsigned getReg() const { return parent_.reg; }
+
+ /// Iterator for accessing the new registers added by this edit.
+ typedef SmallVectorImpl<LiveInterval*>::const_iterator iterator;
+ iterator begin() const { return newRegs_.begin()+firstNew_; }
+ iterator end() const { return newRegs_.end(); }
+ unsigned size() const { return newRegs_.size()-firstNew_; }
+ bool empty() const { return size() == 0; }
+ LiveInterval *get(unsigned idx) const { return newRegs_[idx+firstNew_]; }
+
+ /// create - Create a new register with the same class and stack slot as
+ /// parent.
+ LiveInterval &create(MachineRegisterInfo&, LiveIntervals&, VirtRegMap&);
+
+ /// anyRematerializable - Return true if any parent values may be
+ /// rematerializable.
+ /// This function must be called before ny rematerialization is attempted.
+ bool anyRematerializable(LiveIntervals&, const TargetInstrInfo&,
+ AliasAnalysis*);
+
+ /// Remat - Information needed to rematerialize at a specific location.
+ struct Remat {
+ VNInfo *ParentVNI; // parent_'s value at the remat location.
+ MachineInstr *OrigMI; // Instruction defining ParentVNI.
+ explicit Remat(VNInfo *ParentVNI) : ParentVNI(ParentVNI), OrigMI(0) {}
+ };
+
+ /// canRematerializeAt - Determine if ParentVNI can be rematerialized at
+ /// UseIdx. It is assumed that parent_.getVNINfoAt(UseIdx) == ParentVNI.
+ /// When cheapAsAMove is set, only cheap remats are allowed.
+ bool canRematerializeAt(Remat &RM,
+ SlotIndex UseIdx,
+ bool cheapAsAMove,
+ LiveIntervals &lis);
+
+ /// rematerializeAt - Rematerialize RM.ParentVNI into DestReg by inserting an
+ /// instruction into MBB before MI. The new instruction is mapped, but
+ /// liveness is not updated.
+ /// Return the SlotIndex of the new instruction.
+ SlotIndex rematerializeAt(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg,
+ const Remat &RM,
+ LiveIntervals&,
+ const TargetInstrInfo&,
+ const TargetRegisterInfo&);
+
+ /// markRematerialized - explicitly mark a value as rematerialized after doing
+ /// it manually.
+ void markRematerialized(VNInfo *ParentVNI) {
+ rematted_.insert(ParentVNI);
+ }
+
+ /// didRematerialize - Return true if ParentVNI was rematerialized anywhere.
+ bool didRematerialize(VNInfo *ParentVNI) const {
+ return rematted_.count(ParentVNI);
+ }
+};
+
+}
+
+#endif
diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp
index b5c385f77239..c75196a47210 100644
--- a/lib/CodeGen/LiveStackAnalysis.cpp
+++ b/lib/CodeGen/LiveStackAnalysis.cpp
@@ -26,7 +26,9 @@ using namespace llvm;
char LiveStacks::ID = 0;
INITIALIZE_PASS(LiveStacks, "livestacks",
- "Live Stack Slot Analysis", false, false);
+ "Live Stack Slot Analysis", false, false)
+
+char &llvm::LiveStacksID = LiveStacks::ID;
void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -48,6 +50,22 @@ bool LiveStacks::runOnMachineFunction(MachineFunction &) {
return false;
}
+LiveInterval &
+LiveStacks::getOrCreateInterval(int Slot, const TargetRegisterClass *RC) {
+ assert(Slot >= 0 && "Spill slot indice must be >= 0");
+ SS2IntervalMap::iterator I = S2IMap.find(Slot);
+ if (I == S2IMap.end()) {
+ I = S2IMap.insert(I, std::make_pair(Slot,
+ LiveInterval(TargetRegisterInfo::index2StackSlot(Slot), 0.0F)));
+ S2RCMap.insert(std::make_pair(Slot, RC));
+ } else {
+ // Use the largest common subclass register class.
+ const TargetRegisterClass *OldRC = S2RCMap[Slot];
+ S2RCMap[Slot] = getCommonSubClass(OldRC, RC);
+ }
+ return I->second;
+}
+
/// print - Implement the dump method.
void LiveStacks::print(raw_ostream &OS, const Module*) const {
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 375307b973a9..dd43ef2530c1 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -42,8 +41,11 @@
using namespace llvm;
char LiveVariables::ID = 0;
-INITIALIZE_PASS(LiveVariables, "livevars",
- "Live Variable Analysis", false, false);
+INITIALIZE_PASS_BEGIN(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
+INITIALIZE_PASS_DEPENDENCY(UnreachableMachineBlockElim)
+INITIALIZE_PASS_END(LiveVariables, "livevars",
+ "Live Variable Analysis", false, false)
void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
@@ -79,13 +81,7 @@ void LiveVariables::VarInfo::dump() const {
LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
assert(TargetRegisterInfo::isVirtualRegister(RegIdx) &&
"getVarInfo: not a virtual register!");
- RegIdx -= TargetRegisterInfo::FirstVirtualRegister;
- if (RegIdx >= VirtRegInfo.size()) {
- if (RegIdx >= 2*VirtRegInfo.size())
- VirtRegInfo.resize(RegIdx*2);
- else
- VirtRegInfo.resize(2*VirtRegInfo.size());
- }
+ VirtRegInfo.grow(RegIdx);
return VirtRegInfo[RegIdx];
}
@@ -498,9 +494,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
PHIJoins.clear();
- /// Get some space for a respectable number of registers.
- VirtRegInfo.resize(64);
-
analyzePHINodes(mf);
// Calculate live variable information in depth first order on the CFG of the
@@ -628,19 +621,14 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
// Convert and transfer the dead / killed information we have gathered into
// VirtRegInfo onto MI's.
- for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i)
- for (unsigned j = 0, e2 = VirtRegInfo[i].Kills.size(); j != e2; ++j)
- if (VirtRegInfo[i].Kills[j] ==
- MRI->getVRegDef(i + TargetRegisterInfo::FirstVirtualRegister))
- VirtRegInfo[i]
- .Kills[j]->addRegisterDead(i +
- TargetRegisterInfo::FirstVirtualRegister,
- TRI);
+ for (unsigned i = 0, e1 = VirtRegInfo.size(); i != e1; ++i) {
+ const unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ for (unsigned j = 0, e2 = VirtRegInfo[Reg].Kills.size(); j != e2; ++j)
+ if (VirtRegInfo[Reg].Kills[j] == MRI->getVRegDef(Reg))
+ VirtRegInfo[Reg].Kills[j]->addRegisterDead(Reg, TRI);
else
- VirtRegInfo[i]
- .Kills[j]->addRegisterKilled(i +
- TargetRegisterInfo::FirstVirtualRegister,
- TRI);
+ VirtRegInfo[Reg].Kills[j]->addRegisterKilled(Reg, TRI);
+ }
// Check to make sure there are no unreachable blocks in the MC CFG for the
// function. If so, it is due to a bug in the instruction selector or some
@@ -775,8 +763,8 @@ void LiveVariables::addNewBlock(MachineBasicBlock *BB,
getVarInfo(BBI->getOperand(i).getReg()).AliveBlocks.set(NumNew);
// Update info for all live variables
- for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
- E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
VarInfo &VI = getVarInfo(Reg);
if (!VI.AliveBlocks.test(NumNew) && VI.isLiveIn(*SuccBB, Reg, *MRI))
VI.AliveBlocks.set(NumNew);
diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp
index 7e366f0ceec0..1318d6212497 100644
--- a/lib/CodeGen/LocalStackSlotAllocation.cpp
+++ b/lib/CodeGen/LocalStackSlotAllocation.cpp
@@ -9,7 +9,7 @@
//
// This pass assigns local frame indices to stack slots relative to one another
// and allocates additional base registers to access them when the target
-// estimates the are likely to be out of range of stack pointer and frame
+// estimates they are likely to be out of range of stack pointer and frame
// pointer relative addressing.
//
//===----------------------------------------------------------------------===//
@@ -34,7 +34,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
using namespace llvm;
@@ -152,9 +152,9 @@ void LocalStackSlotPass::AdjustStackOffset(MachineFrameInfo *MFI,
void LocalStackSlotPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo *MFI = Fn.getFrameInfo();
- const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
bool StackGrowsDown =
- TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int64_t Offset = 0;
unsigned MaxAlign = 0;
@@ -227,27 +227,28 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
MachineFrameInfo *MFI = Fn.getFrameInfo();
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
- const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
bool StackGrowsDown =
- TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
- MachineBasicBlock::iterator InsertionPt = Fn.begin()->begin();
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
// Collect all of the instructions in the block that reference
// a frame index. Also store the frame index referenced to ease later
// lookup. (For any insn that has more than one FI reference, we arbitrarily
// choose the first one).
SmallVector<FrameRef, 64> FrameReferenceInsns;
- // A base register definition is a register+offset pair.
- SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
+ // A base register definition is a register + offset pair.
+ SmallVector<std::pair<unsigned, int64_t>, 8> BaseRegisters;
for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
MachineInstr *MI = I;
+
// Debug value instructions can't be out of range, so they don't need
// any updates.
if (MI->isDebugValue())
continue;
+
// For now, allocate the base register(s) within the basic block
// where they're used, and don't try to keep them around outside
// of that. It may be beneficial to try sharing them more broadly
@@ -268,11 +269,13 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
}
}
}
+
// Sort the frame references by local offset
array_pod_sort(FrameReferenceInsns.begin(), FrameReferenceInsns.end());
+ MachineBasicBlock *Entry = Fn.begin();
- // Loop throught the frame references and allocate for them as necessary
+ // Loop through the frame references and allocate for them as necessary.
for (int ref = 0, e = FrameReferenceInsns.size(); ref < e ; ++ref) {
MachineBasicBlock::iterator I =
FrameReferenceInsns[ref].getMachineInstr();
@@ -321,10 +324,12 @@ bool LocalStackSlotPass::insertFrameReferenceRegisters(MachineFunction &Fn) {
DEBUG(dbgs() << " Materializing base register " << BaseReg <<
" at frame local offset " <<
LocalOffsets[FrameIdx] + InstrOffset << "\n");
+
// Tell the target to insert the instruction to initialize
// the base register.
- TRI->materializeFrameBaseRegister(InsertionPt, BaseReg,
- FrameIdx, InstrOffset);
+ // MachineBasicBlock::iterator InsertionPt = Entry->begin();
+ TRI->materializeFrameBaseRegister(Entry, BaseReg, FrameIdx,
+ InstrOffset);
// The base register already includes any offset specified
// by the instruction, so account for that so it doesn't get
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 50f3f672dced..ccbff0af5b2c 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -17,6 +17,7 @@
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -146,27 +147,46 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() {
return I;
}
+MachineBasicBlock::iterator
+MachineBasicBlock::SkipPHIsAndLabels(MachineBasicBlock::iterator I) {
+ while (I != end() && (I->isPHI() || I->isLabel() || I->isDebugValue()))
+ ++I;
+ return I;
+}
+
MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
iterator I = end();
- while (I != begin() && (--I)->getDesc().isTerminator())
+ while (I != begin() && ((--I)->getDesc().isTerminator() || I->isDebugValue()))
; /*noop */
- if (I != end() && !I->getDesc().isTerminator()) ++I;
+ while (I != end() && !I->getDesc().isTerminator())
+ ++I;
return I;
}
-void MachineBasicBlock::dump() const {
- print(dbgs());
+MachineBasicBlock::iterator MachineBasicBlock::getLastNonDebugInstr() {
+ iterator B = begin(), I = end();
+ while (I != B) {
+ --I;
+ if (I->isDebugValue())
+ continue;
+ return I;
+ }
+ // The block is all debug values.
+ return end();
+}
+
+const MachineBasicBlock *MachineBasicBlock::getLandingPadSuccessor() const {
+ // A block with a landing pad successor only has one other successor.
+ if (succ_size() > 2)
+ return 0;
+ for (const_succ_iterator I = succ_begin(), E = succ_end(); I != E; ++I)
+ if ((*I)->isLandingPad())
+ return *I;
+ return 0;
}
-static inline void OutputReg(raw_ostream &os, unsigned RegNo,
- const TargetRegisterInfo *TRI = 0) {
- if (RegNo != 0 && TargetRegisterInfo::isPhysicalRegister(RegNo)) {
- if (TRI)
- os << " %" << TRI->get(RegNo).Name;
- else
- os << " %physreg" << RegNo;
- } else
- os << " %reg" << RegNo;
+void MachineBasicBlock::dump() const {
+ print(dbgs());
}
StringRef MachineBasicBlock::getName() const {
@@ -176,7 +196,7 @@ StringRef MachineBasicBlock::getName() const {
return "(null)";
}
-void MachineBasicBlock::print(raw_ostream &OS) const {
+void MachineBasicBlock::print(raw_ostream &OS, SlotIndexes *Indexes) const {
const MachineFunction *MF = getParent();
if (!MF) {
OS << "Can't print out MachineBasicBlock because parent MachineFunction"
@@ -186,6 +206,9 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
if (Alignment) { OS << "Alignment " << Alignment << "\n"; }
+ if (Indexes)
+ OS << Indexes->getMBBStartIdx(this) << '\t';
+
OS << "BB#" << getNumber() << ": ";
const char *Comma = "";
@@ -198,28 +221,36 @@ void MachineBasicBlock::print(raw_ostream &OS) const {
if (hasAddressTaken()) { OS << Comma << "ADDRESS TAKEN"; Comma = ", "; }
OS << '\n';
- const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
if (!livein_empty()) {
+ if (Indexes) OS << '\t';
OS << " Live Ins:";
for (livein_iterator I = livein_begin(),E = livein_end(); I != E; ++I)
- OutputReg(OS, *I, TRI);
+ OS << ' ' << PrintReg(*I, TRI);
OS << '\n';
}
// Print the preds of this block according to the CFG.
if (!pred_empty()) {
+ if (Indexes) OS << '\t';
OS << " Predecessors according to CFG:";
for (const_pred_iterator PI = pred_begin(), E = pred_end(); PI != E; ++PI)
OS << " BB#" << (*PI)->getNumber();
OS << '\n';
}
-
+
for (const_iterator I = begin(); I != end(); ++I) {
+ if (Indexes) {
+ if (Indexes->hasIndex(I))
+ OS << Indexes->getInstructionIndex(I);
+ OS << '\t';
+ }
OS << '\t';
I->print(OS, &getParent()->getTarget());
}
// Print the successors of this block according to the CFG.
if (!succ_empty()) {
+ if (Indexes) OS << '\t';
OS << " Successors according to CFG:";
for (const_succ_iterator SI = succ_begin(), E = succ_end(); SI != E; ++SI)
OS << " BB#" << (*SI)->getNumber();
@@ -431,14 +462,24 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) {
MachineFunction *MF = getParent();
DebugLoc dl; // FIXME: this is nowhere
- // We may need to update this's terminator, but we can't do that if AnalyzeBranch
- // fails. If this uses a jump table, we won't touch it.
+ // We may need to update this's terminator, but we can't do that if
+ // AnalyzeBranch fails. If this uses a jump table, we won't touch it.
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
if (TII->AnalyzeBranch(*this, TBB, FBB, Cond))
return NULL;
+ // Avoid bugpoint weirdness: A block may end with a conditional branch but
+ // jumps to the same MBB is either case. We have duplicate CFG edges in that
+ // case that we can't handle. Since this never happens in properly optimized
+ // code, just skip those edges.
+ if (TBB && TBB == FBB) {
+ DEBUG(dbgs() << "Won't split critical edge after degenerate BB#"
+ << getNumber() << '\n');
+ return NULL;
+ }
+
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB);
DEBUG(dbgs() << "Splitting critical edge:"
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
index 272b54dea1fa..07a7d27b019f 100644
--- a/lib/CodeGen/MachineCSE.cpp
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -22,15 +22,18 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/RecyclingAllocator.h"
using namespace llvm;
STATISTIC(NumCoalesces, "Number of copies coalesced");
STATISTIC(NumCSEs, "Number of common subexpression eliminated");
-STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated");
+STATISTIC(NumPhysCSEs,
+ "Number of physreg referencing common subexpr eliminated");
+STATISTIC(NumCommutes, "Number of copies coalesced after commuting");
namespace {
class MachineCSE : public MachineFunctionPass {
@@ -41,7 +44,9 @@ namespace {
MachineRegisterInfo *MRI;
public:
static char ID; // Pass identification
- MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {}
+ MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
+ initializeMachineCSEPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -61,10 +66,13 @@ namespace {
private:
const unsigned LookAheadLimit;
- typedef ScopedHashTableScope<MachineInstr*, unsigned,
- MachineInstrExpressionTrait> ScopeType;
+ typedef RecyclingAllocator<BumpPtrAllocator,
+ ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
+ typedef ScopedHashTable<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
+ typedef ScopedHTType::ScopeTy ScopeType;
DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
- ScopedHashTable<MachineInstr*, unsigned, MachineInstrExpressionTrait> VNT;
+ ScopedHTType VNT;
SmallVector<MachineInstr*, 64> Exps;
unsigned CurrVN;
@@ -72,11 +80,11 @@ namespace {
bool isPhysDefTriviallyDead(unsigned Reg,
MachineBasicBlock::const_iterator I,
MachineBasicBlock::const_iterator E) const ;
- bool hasLivePhysRegDefUse(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- unsigned &PhysDef) const;
- bool PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
- unsigned PhysDef) const;
+ bool hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs) const;
+ bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs) const;
bool isCSECandidate(MachineInstr *MI);
bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI);
@@ -91,8 +99,12 @@ namespace {
} // end anonymous namespace
char MachineCSE::ID = 0;
-INITIALIZE_PASS(MachineCSE, "machine-cse",
- "Machine Common Subexpression Elimination", false, false);
+INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineCSE, "machine-cse",
+ "Machine Common Subexpression Elimination", false, false)
FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
@@ -104,7 +116,7 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
if (!MO.isReg() || !MO.isUse())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (!MRI->hasOneNonDBGUse(Reg))
// Only coalesce single use copies. This ensure the copy will be
@@ -120,17 +132,12 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
continue;
if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg())
continue;
- const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
- const TargetRegisterClass *RC = MRI->getRegClass(Reg);
- const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC);
- if (!NewRC)
+ if (!MRI->constrainRegClass(SrcReg, MRI->getRegClass(Reg)))
continue;
DEBUG(dbgs() << "Coalescing: " << *DefMI);
- DEBUG(dbgs() << "*** to: " << *MI);
+ DEBUG(dbgs() << "*** to: " << *MI);
MO.setReg(SrcReg);
MRI->clearKillFlags(SrcReg);
- if (NewRC != SRC)
- MRI->setRegClass(SrcReg, NewRC);
DefMI->eraseFromParent();
++NumCoalesces;
Changed = true;
@@ -176,14 +183,14 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
return false;
}
-/// hasLivePhysRegDefUse - Return true if the specified instruction read / write
+/// hasLivePhysRegDefUses - Return true if the specified instruction read/write
/// physical registers (except for dead defs of physical registers). It also
/// returns the physical register def by reference if it's the only one and the
/// instruction does not uses a physical register.
-bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
- const MachineBasicBlock *MBB,
- unsigned &PhysDef) const {
- PhysDef = 0;
+bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
+ const MachineBasicBlock *MBB,
+ SmallSet<unsigned,8> &PhysRefs) const {
+ MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg())
@@ -193,35 +200,22 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI,
continue;
if (TargetRegisterInfo::isVirtualRegister(Reg))
continue;
- if (MO.isUse()) {
- // Can't touch anything to read a physical register.
- PhysDef = 0;
- return true;
- }
- if (MO.isDead())
- // If the def is dead, it's ok.
- continue;
- // Ok, this is a physical register def that's not marked "dead". That's
+ // If the def is dead, it's ok. But the def may not marked "dead". That's
// common since this pass is run before livevariables. We can scan
// forward a few instructions and check if it is obviously dead.
- if (PhysDef) {
- // Multiple physical register defs. These are rare, forget about it.
- PhysDef = 0;
- return true;
- }
- PhysDef = Reg;
+ if (MO.isDef() &&
+ (MO.isDead() || isPhysDefTriviallyDead(Reg, I, MBB->end())))
+ continue;
+ PhysRefs.insert(Reg);
+ for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
+ PhysRefs.insert(*Alias);
}
- if (PhysDef) {
- MachineBasicBlock::const_iterator I = MI; I = llvm::next(I);
- if (!isPhysDefTriviallyDead(PhysDef, I, MBB->end()))
- return true;
- }
- return false;
+ return !PhysRefs.empty();
}
-bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
- unsigned PhysDef) const {
+bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
+ SmallSet<unsigned,8> &PhysRefs) const {
// For now conservatively returns false if the common subexpression is
// not in the same basic block as the given instruction.
MachineBasicBlock *MBB = MI->getParent();
@@ -237,8 +231,17 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI,
if (I == E)
return true;
- if (I->modifiesRegister(PhysDef, TRI))
- return false;
+
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = I->getOperand(i);
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (TargetRegisterInfo::isVirtualRegister(MOReg))
+ continue;
+ if (PhysRefs.count(MOReg))
+ return false;
+ }
--LookAheadLeft;
++I;
@@ -259,7 +262,7 @@ bool MachineCSE::isCSECandidate(MachineInstr *MI) {
// Ignore stuff that we obviously can't move.
const TargetInstrDesc &TID = MI->getDesc();
if (TID.mayStore() || TID.isCall() || TID.isTerminator() ||
- TID.hasUnmodeledSideEffects())
+ MI->hasUnmodeledSideEffects())
return false;
if (TID.mayLoad()) {
@@ -281,14 +284,13 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
MachineInstr *CSMI, MachineInstr *MI) {
// FIXME: Heuristics that works around the lack the live range splitting.
- // Heuristics #1: Don't cse "cheap" computating if the def is not local or in an
- // immediate predecessor. We don't want to increase register pressure and end up
- // causing other computation to be spilled.
+ // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
+ // an immediate predecessor. We don't want to increase register pressure and
+ // end up causing other computation to be spilled.
if (MI->getDesc().isAsCheapAsAMove()) {
MachineBasicBlock *CSBB = CSMI->getParent();
MachineBasicBlock *BB = MI->getParent();
- if (CSBB != BB &&
- find(CSBB->succ_begin(), CSBB->succ_end(), BB) == CSBB->succ_end())
+ if (CSBB != BB && !CSBB->isSuccessor(BB))
return false;
}
@@ -297,7 +299,7 @@ bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
bool HasVRegUse = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isUse() && MO.getReg() &&
+ if (MO.isReg() && MO.isUse() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
HasVRegUse = true;
break;
@@ -359,7 +361,6 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
if (!isCSECandidate(MI))
continue;
- bool DefPhys = false;
bool FoundCSE = VNT.count(MI);
if (!FoundCSE) {
// Look for trivial copy coalescing opportunities.
@@ -370,24 +371,37 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
FoundCSE = VNT.count(MI);
}
}
- // FIXME: commute commutable instructions?
- // If the instruction defines a physical register and the value *may* be
+ // Commute commutable instructions.
+ bool Commuted = false;
+ if (!FoundCSE && MI->getDesc().isCommutable()) {
+ MachineInstr *NewMI = TII->commuteInstruction(MI);
+ if (NewMI) {
+ Commuted = true;
+ FoundCSE = VNT.count(NewMI);
+ if (NewMI != MI)
+ // New instruction. It doesn't need to be kept.
+ NewMI->eraseFromParent();
+ else if (!FoundCSE)
+ // MI was changed but it didn't help, commute it back!
+ (void)TII->commuteInstruction(MI);
+ }
+ }
+
+ // If the instruction defines physical registers and the values *may* be
// used, then it's not safe to replace it with a common subexpression.
- unsigned PhysDef = 0;
- if (FoundCSE && hasLivePhysRegDefUse(MI, MBB, PhysDef)) {
+ // It's also not safe if the instruction uses physical registers.
+ SmallSet<unsigned,8> PhysRefs;
+ if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs)) {
FoundCSE = false;
// ... Unless the CS is local and it also defines the physical register
- // which is not clobbered in between.
- if (PhysDef) {
- unsigned CSVN = VNT.lookup(MI);
- MachineInstr *CSMI = Exps[CSVN];
- if (PhysRegDefReaches(CSMI, MI, PhysDef)) {
- FoundCSE = true;
- DefPhys = true;
- }
- }
+ // which is not clobbered in between and the physical register uses
+ // were not clobbered.
+ unsigned CSVN = VNT.lookup(MI);
+ MachineInstr *CSMI = Exps[CSVN];
+ if (PhysRegDefsReach(CSMI, MI, PhysRefs))
+ FoundCSE = true;
}
if (!FoundCSE) {
@@ -432,8 +446,10 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
}
MI->eraseFromParent();
++NumCSEs;
- if (DefPhys)
+ if (!PhysRefs.empty())
++NumPhysCSEs;
+ if (Commuted)
+ ++NumCommutes;
} else {
DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
VNT.insert(MI, CurrVN++);
diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp
index 3c674789244a..04c8ecbf9bdc 100644
--- a/lib/CodeGen/MachineDominators.cpp
+++ b/lib/CodeGen/MachineDominators.cpp
@@ -25,7 +25,7 @@ TEMPLATE_INSTANTIATION(class DominatorTreeBase<MachineBasicBlock>);
char MachineDominatorTree::ID = 0;
INITIALIZE_PASS(MachineDominatorTree, "machinedomtree",
- "MachineDominator Tree Construction", true, true);
+ "MachineDominator Tree Construction", true, true)
char &llvm::MachineDominatorsID = MachineDominatorTree::ID;
@@ -42,6 +42,7 @@ bool MachineDominatorTree::runOnMachineFunction(MachineFunction &F) {
MachineDominatorTree::MachineDominatorTree()
: MachineFunctionPass(ID) {
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
DT = new DominatorTreeBase<MachineBasicBlock>(false);
}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 017170076ceb..85532407ca43 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -33,7 +33,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/GraphWriter.h"
@@ -52,14 +52,15 @@ void ilist_traits<MachineBasicBlock>::deleteNode(MachineBasicBlock *MBB) {
}
MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM,
- unsigned FunctionNum, MachineModuleInfo &mmi)
- : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi) {
+ unsigned FunctionNum, MachineModuleInfo &mmi,
+ GCModuleInfo* gmi)
+ : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) {
if (TM.getRegisterInfo())
RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo());
else
RegInfo = 0;
MFInfo = 0;
- FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameInfo());
+ FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering());
if (Fn->hasFnAttr(Attribute::StackAlignment))
FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
Fn->getAttributes().getFnAttributes()));
@@ -190,20 +191,21 @@ MachineFunction::DeleteMachineBasicBlock(MachineBasicBlock *MBB) {
}
MachineMemOperand *
-MachineFunction::getMachineMemOperand(const Value *v, unsigned f,
- int64_t o, uint64_t s,
- unsigned base_alignment) {
- return new (Allocator) MachineMemOperand(v, f, o, s, base_alignment);
+MachineFunction::getMachineMemOperand(MachinePointerInfo PtrInfo, unsigned f,
+ uint64_t s, unsigned base_alignment,
+ const MDNode *TBAAInfo) {
+ return new (Allocator) MachineMemOperand(PtrInfo, f, s, base_alignment,
+ TBAAInfo);
}
MachineMemOperand *
MachineFunction::getMachineMemOperand(const MachineMemOperand *MMO,
int64_t Offset, uint64_t Size) {
return new (Allocator)
- MachineMemOperand(MMO->getValue(), MMO->getFlags(),
- int64_t(uint64_t(MMO->getOffset()) +
- uint64_t(Offset)),
- Size, MMO->getBaseAlignment());
+ MachineMemOperand(MachinePointerInfo(MMO->getValue(),
+ MMO->getOffset()+Offset),
+ MMO->getFlags(), Size,
+ MMO->getBaseAlignment(), 0);
}
MachineInstr::mmo_iterator
@@ -231,10 +233,10 @@ MachineFunction::extractLoadMemRefs(MachineInstr::mmo_iterator Begin,
else {
// Clone the MMO and unset the store flag.
MachineMemOperand *JustLoad =
- getMachineMemOperand((*I)->getValue(),
+ getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOStore,
- (*I)->getOffset(), (*I)->getSize(),
- (*I)->getBaseAlignment());
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
Result[Index] = JustLoad;
}
++Index;
@@ -263,10 +265,10 @@ MachineFunction::extractStoreMemRefs(MachineInstr::mmo_iterator Begin,
else {
// Clone the MMO and unset the load flag.
MachineMemOperand *JustStore =
- getMachineMemOperand((*I)->getValue(),
+ getMachineMemOperand((*I)->getPointerInfo(),
(*I)->getFlags() & ~MachineMemOperand::MOLoad,
- (*I)->getOffset(), (*I)->getSize(),
- (*I)->getBaseAlignment());
+ (*I)->getSize(), (*I)->getBaseAlignment(),
+ (*I)->getTBAAInfo());
Result[Index] = JustStore;
}
++Index;
@@ -279,7 +281,7 @@ void MachineFunction::dump() const {
print(dbgs());
}
-void MachineFunction::print(raw_ostream &OS) const {
+void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const {
OS << "# Machine code for function " << Fn->getName() << ":\n";
// Print Frame Information
@@ -328,7 +330,7 @@ void MachineFunction::print(raw_ostream &OS) const {
for (const_iterator BB = begin(), E = end(); BB != E; ++BB) {
OS << '\n';
- BB->print(OS);
+ BB->print(OS, Indexes);
}
OS << "\n# End machine code for function " << Fn->getName() << ".\n\n";
@@ -346,17 +348,15 @@ namespace llvm {
std::string getNodeLabel(const MachineBasicBlock *Node,
const MachineFunction *Graph) {
- if (isSimple () && Node->getBasicBlock() &&
- !Node->getBasicBlock()->getName().empty())
- return Node->getBasicBlock()->getNameStr() + ":";
-
std::string OutStr;
{
raw_string_ostream OSS(OutStr);
-
- if (isSimple())
- OSS << Node->getNumber() << ':';
- else
+
+ if (isSimple()) {
+ OSS << "BB#" << Node->getNumber();
+ if (const BasicBlock *BB = Node->getBasicBlock())
+ OSS << ": " << BB->getName();
+ } else
Node->print(OSS);
}
@@ -396,7 +396,8 @@ void MachineFunction::viewCFGOnly() const
/// addLiveIn - Add the specified physical register as a live-in value and
/// create a corresponding virtual register for it.
unsigned MachineFunction::addLiveIn(unsigned PReg,
- const TargetRegisterClass *RC) {
+ const TargetRegisterClass *RC,
+ DebugLoc DL) {
MachineRegisterInfo &MRI = getRegInfo();
unsigned VReg = MRI.getLiveInVirtReg(PReg);
if (VReg) {
@@ -405,6 +406,7 @@ unsigned MachineFunction::addLiveIn(unsigned PReg,
}
VReg = MRI.createVirtualRegister(RC);
MRI.addLiveIn(PReg, VReg);
+ MRI.addLiveInLoc(VReg, DL);
return VReg;
}
@@ -426,6 +428,13 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx,
return Ctx.GetOrCreateSymbol(Name.str());
}
+/// getPICBaseSymbol - Return a function-local symbol to represent the PIC
+/// base.
+MCSymbol *MachineFunction::getPICBaseSymbol() const {
+ const MCAsmInfo &MAI = *Target.getMCAsmInfo();
+ return Ctx.GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix())+
+ Twine(getFunctionNumber())+"$pb");
+}
//===----------------------------------------------------------------------===//
// MachineFrameInfo implementation
@@ -485,7 +494,7 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const {
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
if (Objects.empty()) return;
- const TargetFrameInfo *FI = MF.getTarget().getFrameInfo();
+ const TargetFrameLowering *FI = MF.getTarget().getFrameLowering();
int ValOffset = (FI ? FI->getOffsetOfLocalArea() : 0);
OS << "Frame Objects:\n";
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 4f84b952e061..054c750c9f2b 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -12,22 +12,17 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
+#include "llvm/CodeGen/GCMetadata.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
using namespace llvm;
-// Register this pass with PassInfo directly to avoid having to define
-// a default constructor.
-static PassInfo
-X("Machine Function Analysis", "machine-function-analysis",
- &MachineFunctionAnalysis::ID, 0,
- /*CFGOnly=*/false, /*is_analysis=*/true);
-
char MachineFunctionAnalysis::ID = 0;
MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
CodeGenOpt::Level OL) :
FunctionPass(ID), TM(tm), OptLevel(OL), MF(0) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
}
MachineFunctionAnalysis::~MachineFunctionAnalysis() {
@@ -52,7 +47,8 @@ bool MachineFunctionAnalysis::doInitialization(Module &M) {
bool MachineFunctionAnalysis::runOnFunction(Function &F) {
assert(!MF && "MachineFunctionAnalysis already initialized!");
MF = new MachineFunction(&F, TM, NextFnNum++,
- getAnalysis<MachineModuleInfo>());
+ getAnalysis<MachineModuleInfo>(),
+ getAnalysisIfAvailable<GCModuleInfo>());
return false;
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 446e461d5460..aa9ea61acec7 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -102,13 +102,13 @@ void MachineOperand::setReg(unsigned Reg) {
if (MachineBasicBlock *MBB = MI->getParent())
if (MachineFunction *MF = MBB->getParent()) {
RemoveRegOperandFromRegInfo();
- Contents.Reg.RegNo = Reg;
+ SmallContents.RegNo = Reg;
AddRegOperandToRegInfo(&MF->getRegInfo());
return;
}
// Otherwise, just change the register, no problem. :)
- Contents.Reg.RegNo = Reg;
+ SmallContents.RegNo = Reg;
}
void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx,
@@ -159,7 +159,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp,
} else {
// Otherwise, change this to a register and set the reg#.
OpKind = MO_Register;
- Contents.Reg.RegNo = Reg;
+ SmallContents.RegNo = Reg;
// If this operand is embedded in a function, add the operand to the
// register's use/def list.
@@ -227,24 +227,11 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
if (const MachineBasicBlock *MBB = MI->getParent())
if (const MachineFunction *MF = MBB->getParent())
TM = &MF->getTarget();
+ const TargetRegisterInfo *TRI = TM ? TM->getRegisterInfo() : 0;
switch (getType()) {
case MachineOperand::MO_Register:
- if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
- OS << "%reg" << getReg();
- } else {
- if (TM)
- OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
- else
- OS << "%physreg" << getReg();
- }
-
- if (getSubReg() != 0) {
- if (TM)
- OS << ':' << TM->getRegisterInfo()->getSubRegIndexName(getSubReg());
- else
- OS << ':' << getSubReg();
- }
+ OS << PrintReg(getReg(), TRI, getSubReg());
if (isDef() || isKill() || isDead() || isImplicit() || isUndef() ||
isEarlyClobber()) {
@@ -335,10 +322,45 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
// MachineMemOperand Implementation
//===----------------------------------------------------------------------===//
-MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
- int64_t o, uint64_t s, unsigned int a)
- : Offset(o), Size(s), V(v),
- Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)) {
+/// getAddrSpace - Return the LLVM IR address space number that this pointer
+/// points into.
+unsigned MachinePointerInfo::getAddrSpace() const {
+ if (V == 0) return 0;
+ return cast<PointerType>(V->getType())->getAddressSpace();
+}
+
+/// getConstantPool - Return a MachinePointerInfo record that refers to the
+/// constant pool.
+MachinePointerInfo MachinePointerInfo::getConstantPool() {
+ return MachinePointerInfo(PseudoSourceValue::getConstantPool());
+}
+
+/// getFixedStack - Return a MachinePointerInfo record that refers to the
+/// the specified FrameIndex.
+MachinePointerInfo MachinePointerInfo::getFixedStack(int FI, int64_t offset) {
+ return MachinePointerInfo(PseudoSourceValue::getFixedStack(FI), offset);
+}
+
+MachinePointerInfo MachinePointerInfo::getJumpTable() {
+ return MachinePointerInfo(PseudoSourceValue::getJumpTable());
+}
+
+MachinePointerInfo MachinePointerInfo::getGOT() {
+ return MachinePointerInfo(PseudoSourceValue::getGOT());
+}
+
+MachinePointerInfo MachinePointerInfo::getStack(int64_t Offset) {
+ return MachinePointerInfo(PseudoSourceValue::getStack(), Offset);
+}
+
+MachineMemOperand::MachineMemOperand(MachinePointerInfo ptrinfo, unsigned f,
+ uint64_t s, unsigned int a,
+ const MDNode *TBAAInfo)
+ : PtrInfo(ptrinfo), Size(s),
+ Flags((f & ((1 << MOMaxBits) - 1)) | ((Log2_32(a) + 1) << MOMaxBits)),
+ TBAAInfo(TBAAInfo) {
+ assert((PtrInfo.V == 0 || isa<PointerType>(PtrInfo.V->getType())) &&
+ "invalid pointer value");
assert(getBaseAlignment() == a && "Alignment is not a power of 2!");
assert((isLoad() || isStore()) && "Not a load/store!");
}
@@ -346,9 +368,9 @@ MachineMemOperand::MachineMemOperand(const Value *v, unsigned int f,
/// Profile - Gather unique data for the object.
///
void MachineMemOperand::Profile(FoldingSetNodeID &ID) const {
- ID.AddInteger(Offset);
+ ID.AddInteger(getOffset());
ID.AddInteger(Size);
- ID.AddPointer(V);
+ ID.AddPointer(getValue());
ID.AddInteger(Flags);
}
@@ -364,8 +386,7 @@ void MachineMemOperand::refineAlignment(const MachineMemOperand *MMO) {
((Log2_32(MMO->getBaseAlignment()) + 1) << MOMaxBits);
// Also update the base and offset, because the new alignment may
// not be applicable with the old ones.
- V = MMO->getValue();
- Offset = MMO->getOffset();
+ PtrInfo = MMO->PtrInfo;
}
}
@@ -410,6 +431,16 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
MMO.getBaseAlignment() != MMO.getSize())
OS << "(align=" << MMO.getAlignment() << ")";
+ // Print TBAA info.
+ if (const MDNode *TBAAInfo = MMO.getTBAAInfo()) {
+ OS << "(tbaa=";
+ if (TBAAInfo->getNumOperands() > 0)
+ WriteAsOperand(OS, TBAAInfo->getOperand(0), /*PrintType=*/false);
+ else
+ OS << "<unknown>";
+ OS << ")";
+ }
+
return OS;
}
@@ -782,6 +813,14 @@ unsigned MachineInstr::getNumExplicitOperands() const {
return NumOperands;
}
+bool MachineInstr::isStackAligningInlineAsm() const {
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ return true;
+ }
+ return false;
+}
/// findRegisterUseOperandIdx() - Returns the MachineOperand that is a use of
/// the specific register or -1 if it is not found. It further tightens
@@ -881,14 +920,15 @@ int MachineInstr::findFirstPredOperandIdx() const {
bool MachineInstr::
isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
if (isInlineAsm()) {
- assert(DefOpIdx >= 3);
+ assert(DefOpIdx > InlineAsm::MIOp_FirstOperand);
const MachineOperand &MO = getOperand(DefOpIdx);
if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0)
return false;
// Determine the actual operand index that corresponds to this index.
unsigned DefNo = 0;
unsigned DefPart = 0;
- for (unsigned i = 2, e = getNumOperands(); i < e; ) {
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+ i < e; ) {
const MachineOperand &FMO = getOperand(i);
// After the normal asm operands there may be additional imp-def regs.
if (!FMO.isImm())
@@ -903,7 +943,8 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const {
}
++DefNo;
}
- for (unsigned i = 2, e = getNumOperands(); i != e; ++i) {
+ for (unsigned i = InlineAsm::MIOp_FirstOperand, e = getNumOperands();
+ i != e; ++i) {
const MachineOperand &FMO = getOperand(i);
if (!FMO.isImm())
continue;
@@ -946,7 +987,8 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
// Find the flag operand corresponding to UseOpIdx
unsigned FlagIdx, NumOps=0;
- for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
+ for (FlagIdx = InlineAsm::MIOp_FirstOperand;
+ FlagIdx < UseOpIdx; FlagIdx += NumOps+1) {
const MachineOperand &UFMO = getOperand(FlagIdx);
// After the normal asm operands there may be additional imp-def regs.
if (!UFMO.isImm())
@@ -964,9 +1006,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const {
if (!DefOpIdx)
return true;
- unsigned DefIdx = 2;
+ unsigned DefIdx = InlineAsm::MIOp_FirstOperand;
// Remember to adjust the index. First operand is asm string, second is
- // the AlignStack bit, then there is a flag for each.
+ // the HasSideEffects and AlignStack bits, then there is a flag for each.
while (DefNo) {
const MachineOperand &FMO = getOperand(DefIdx);
assert(FMO.isImm());
@@ -1071,7 +1113,9 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
SawStore = true;
return false;
}
- if (TID->isTerminator() || TID->hasUnmodeledSideEffects())
+
+ if (isLabel() || isDebugValue() ||
+ TID->isTerminator() || hasUnmodeledSideEffects())
return false;
// See if this instruction does a load. If so, we have to guarantee that the
@@ -1122,7 +1166,7 @@ bool MachineInstr::hasVolatileMemoryRef() const {
if (!TID->mayStore() &&
!TID->mayLoad() &&
!TID->isCall() &&
- !TID->hasUnmodeledSideEffects())
+ !hasUnmodeledSideEffects())
return false;
// Otherwise, if the instruction has no memory reference information,
@@ -1166,7 +1210,9 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
if (PSV->isConstant(MFI))
continue;
// If we have an AliasAnalysis, ask it whether the memory is constant.
- if (AA && AA->pointsToConstantMemory(V))
+ if (AA && AA->pointsToConstantMemory(
+ AliasAnalysis::Location(V, (*I)->getSize(),
+ (*I)->getTBAAInfo())))
continue;
}
@@ -1194,6 +1240,18 @@ unsigned MachineInstr::isConstantValuePHI() const {
return Reg;
}
+bool MachineInstr::hasUnmodeledSideEffects() const {
+ if (getDesc().hasUnmodeledSideEffects())
+ return true;
+ if (isInlineAsm()) {
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ return true;
+ }
+
+ return false;
+}
+
/// allDefsAreDead - Return true if all the defs of this instruction are dead.
///
bool MachineInstr::allDefsAreDead() const {
@@ -1207,6 +1265,17 @@ bool MachineInstr::allDefsAreDead() const {
return true;
}
+/// copyImplicitOps - Copy implicit register operands from specified
+/// instruction to this instruction.
+void MachineInstr::copyImplicitOps(const MachineInstr *MI) {
+ for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
+ i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isImplicit())
+ addOperand(MO);
+ }
+}
+
void MachineInstr::dump() const {
dbgs() << " " << *this;
}
@@ -1257,7 +1326,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
if (StartOp != 0) OS << ", ";
getOperand(StartOp).print(OS, TM);
unsigned Reg = getOperand(StartOp).getReg();
- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg))
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
VirtRegs.push_back(Reg);
}
@@ -1270,11 +1339,28 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
// Print the rest of the operands.
bool OmittedAnyCallClobbers = false;
bool FirstOp = true;
+
+ if (isInlineAsm()) {
+ // Print asm string.
+ OS << " ";
+ getOperand(InlineAsm::MIOp_AsmString).print(OS, TM);
+
+ // Print HasSideEffects, IsAlignStack
+ unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+ OS << " [sideeffect]";
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+ OS << " [alignstack]";
+
+ StartOp = InlineAsm::MIOp_FirstOperand;
+ FirstOp = false;
+ }
+
+
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
- if (MO.isReg() && MO.getReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
VirtRegs.push_back(MO.getReg());
// Omit call-clobbered registers which aren't used anywhere. This makes
@@ -1284,7 +1370,7 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
if (MF && getDesc().isCall() &&
MO.isReg() && MO.isImplicit() && MO.isDef()) {
unsigned Reg = MO.getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
const MachineRegisterInfo &MRI = MF->getRegInfo();
if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
bool HasAliasLive = false;
@@ -1348,14 +1434,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
if (!HaveSemi) OS << ";"; HaveSemi = true;
for (unsigned i = 0; i != VirtRegs.size(); ++i) {
const TargetRegisterClass *RC = MRI->getRegClass(VirtRegs[i]);
- OS << " " << RC->getName() << ":%reg" << VirtRegs[i];
+ OS << " " << RC->getName() << ':' << PrintReg(VirtRegs[i]);
for (unsigned j = i+1; j != VirtRegs.size();) {
if (MRI->getRegClass(VirtRegs[j]) != RC) {
++j;
continue;
}
if (VirtRegs[i] != VirtRegs[j])
- OS << "," << VirtRegs[j];
+ OS << "," << PrintReg(VirtRegs[j]);
VirtRegs.erase(VirtRegs.begin()+j);
}
}
@@ -1533,8 +1619,7 @@ MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) {
switch (MO.getType()) {
default: break;
case MachineOperand::MO_Register:
- if (MO.isDef() && MO.getReg() &&
- TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
continue; // Skip virtual register defs.
Key |= MO.getReg();
break;
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 1a74b747e9f2..443fc2d97bdf 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -28,8 +28,10 @@
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/ADT/DenseMap.h"
@@ -40,8 +42,14 @@
using namespace llvm;
-STATISTIC(NumHoisted, "Number of machine instructions hoisted out of loops");
-STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed");
+STATISTIC(NumHoisted,
+ "Number of machine instructions hoisted out of loops");
+STATISTIC(NumLowRP,
+ "Number of instructions hoisted in low reg pressure situation");
+STATISTIC(NumHighLatency,
+ "Number of high latency instructions hoisted");
+STATISTIC(NumCSEed,
+ "Number of hoisted machine instructions CSEed");
STATISTIC(NumPostRAHoisted,
"Number of machine instructions hoisted out of loops post regalloc");
@@ -51,9 +59,11 @@ namespace {
const TargetMachine *TM;
const TargetInstrInfo *TII;
+ const TargetLowering *TLI;
const TargetRegisterInfo *TRI;
const MachineFrameInfo *MFI;
- MachineRegisterInfo *RegInfo;
+ MachineRegisterInfo *MRI;
+ const InstrItineraryData *InstrItins;
// Various analyses that we use...
AliasAnalysis *AA; // Alias analysis info.
@@ -68,23 +78,37 @@ namespace {
BitVector AllocatableSet;
+ // Track 'estimated' register pressure.
+ SmallSet<unsigned, 32> RegSeen;
+ SmallVector<unsigned, 8> RegPressure;
+
+ // Register pressure "limit" per register class. If the pressure
+ // is higher than the limit, then it's considered high.
+ SmallVector<unsigned, 8> RegLimit;
+
+ // Register pressure on path leading from loop preheader to current BB.
+ SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
+
// For each opcode, keep a list of potential CSE instructions.
DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
public:
static char ID; // Pass identification, replacement for typeid
MachineLICM() :
- MachineFunctionPass(ID), PreRegAlloc(true) {}
+ MachineFunctionPass(ID), PreRegAlloc(true) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
explicit MachineLICM(bool PreRA) :
- MachineFunctionPass(ID), PreRegAlloc(PreRA) {}
+ MachineFunctionPass(ID), PreRegAlloc(PreRA) {
+ initializeMachineLICMPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
const char *getPassName() const { return "Machine Instruction LICM"; }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<MachineDominatorTree>();
AU.addRequired<AliasAnalysis>();
@@ -94,6 +118,13 @@ namespace {
}
virtual void releaseMemory() {
+ RegSeen.clear();
+ RegPressure.clear();
+ RegLimit.clear();
+ BackTrace.clear();
+ for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
+ CI->second.clear();
CSEMap.clear();
}
@@ -138,6 +169,24 @@ namespace {
///
bool IsLoopInvariantInst(MachineInstr &I);
+ /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+ /// and an use in the current loop, return true if the target considered
+ /// it 'high'.
+ bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
+ unsigned Reg) const;
+
+ bool IsCheapInstruction(MachineInstr &MI) const;
+
+ /// CanCauseHighRegPressure - Visit BBs from header to current BB,
+ /// check if hoisting an instruction of the given cost matrix can cause high
+ /// register pressure.
+ bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost);
+
+ /// UpdateBackTraceRegPressure - Traverse the back trace from header to
+ /// the current block and update their register pressures to reflect the
+ /// effect of hoisting MI from the current block to the preheader.
+ void UpdateBackTraceRegPressure(const MachineInstr *MI);
+
/// IsProfitableToHoist - Return true if it is potentially profitable to
/// hoist the given loop invariant.
bool IsProfitableToHoist(MachineInstr &MI);
@@ -148,11 +197,16 @@ namespace {
/// visit definitions before uses, allowing us to hoist a loop body in one
/// pass without iteration.
///
- void HoistRegion(MachineDomTreeNode *N);
+ void HoistRegion(MachineDomTreeNode *N, bool IsHeader = false);
+
+ /// InitRegPressure - Find all virtual register references that are liveout
+ /// of the preheader to initialize the starting "register pressure". Note
+ /// this does not count live through (livein but not used) registers.
+ void InitRegPressure(MachineBasicBlock *BB);
- /// isLoadFromConstantMemory - Return true if the given instruction is a
- /// load from constant memory.
- bool isLoadFromConstantMemory(MachineInstr *MI);
+ /// UpdateRegPressure - Update estimate of register pressure after the
+ /// specified instruction.
+ void UpdateRegPressure(const MachineInstr *MI);
/// ExtractHoistableLoad - Unfold a load from the given machineinstr if
/// the load itself could be hoisted. Return the unfolded and hoistable
@@ -174,8 +228,8 @@ namespace {
/// Hoist - When an instruction is found to only use loop invariant operands
/// that is safe to hoist, this instruction is called to do the dirty work.
- ///
- void Hoist(MachineInstr *MI);
+ /// It returns true if the instruction is hoisted.
+ bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
/// InitCSEMap - Initialize the CSE map with instructions that are in the
/// current loop preheader that may become duplicates of instructions that
@@ -189,8 +243,13 @@ namespace {
} // end anonymous namespace
char MachineLICM::ID = 0;
-INITIALIZE_PASS(MachineLICM, "machinelicm",
- "Machine Loop Invariant Code Motion", false, false);
+INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineLICM, "machinelicm",
+ "Machine Loop Invariant Code Motion", false, false)
FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) {
return new MachineLICM(PreRegAlloc);
@@ -212,18 +271,32 @@ static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
if (PreRegAlloc)
- DEBUG(dbgs() << "******** Pre-regalloc Machine LICM ********\n");
+ DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
else
- DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n");
+ DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
+ DEBUG(dbgs() << MF.getFunction()->getName() << " ********\n");
Changed = FirstInLoop = false;
TM = &MF.getTarget();
TII = TM->getInstrInfo();
+ TLI = TM->getTargetLowering();
TRI = TM->getRegisterInfo();
MFI = MF.getFrameInfo();
- RegInfo = &MF.getRegInfo();
+ MRI = &MF.getRegInfo();
+ InstrItins = TM->getInstrItineraryData();
AllocatableSet = TRI->getAllocatableSet(MF);
+ if (PreRegAlloc) {
+ // Estimate register pressure during pre-regalloc pass.
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegPressure.resize(NumRC);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ RegLimit.resize(NumRC);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = TLI->getRegPressureLimit(*I, MF);
+ }
+
// Get our Loop information...
MLI = &getAnalysis<MachineLoopInfo>();
DT = &getAnalysis<MachineDominatorTree>();
@@ -248,7 +321,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
// being hoisted.
MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
FirstInLoop = true;
- HoistRegion(N);
+ HoistRegion(N, true);
CSEMap.clear();
}
}
@@ -474,17 +547,33 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
/// first order w.r.t the DominatorTree. This allows us to visit definitions
/// before uses, allowing us to hoist a loop body in one pass without iteration.
///
-void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
+void MachineLICM::HoistRegion(MachineDomTreeNode *N, bool IsHeader) {
assert(N != 0 && "Null dominator tree node?");
MachineBasicBlock *BB = N->getBlock();
// If this subregion is not in the top level loop at all, exit.
if (!CurLoop->contains(BB)) return;
+ MachineBasicBlock *Preheader = getCurPreheader();
+ if (!Preheader)
+ return;
+
+ if (IsHeader) {
+ // Compute registers which are livein into the loop headers.
+ RegSeen.clear();
+ BackTrace.clear();
+ InitRegPressure(Preheader);
+ }
+
+ // Remember livein register pressure.
+ BackTrace.push_back(RegPressure);
+
for (MachineBasicBlock::iterator
MII = BB->begin(), E = BB->end(); MII != E; ) {
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
- Hoist(&*MII);
+ MachineInstr *MI = &*MII;
+ if (!Hoist(MI, Preheader))
+ UpdateRegPressure(MI);
MII = NextMII;
}
@@ -496,6 +585,99 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) {
for (unsigned I = 0, E = Children.size(); I != E; ++I)
HoistRegion(Children[I]);
}
+
+ BackTrace.pop_back();
+}
+
+static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
+ return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
+}
+
+/// InitRegPressure - Find all virtual register references that are liveout of
+/// the preheader to initialize the starting "register pressure". Note this
+/// does not count live through (livein but not used) registers.
+void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+
+ // If the preheader has only a single predecessor and it ends with a
+ // fallthrough or an unconditional branch, then scan its predecessor for live
+ // defs as well. This happens whenever the preheader is created by splitting
+ // the critical edge from the loop predecessor to the loop header.
+ if (BB->pred_size() == 1) {
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
+ InitRegPressure(*BB->pred_begin());
+ }
+
+ for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
+ MII != E; ++MII) {
+ MachineInstr *MI = &*MII;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (MO.isDef())
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ else {
+ bool isKill = isOperandKill(MO, MRI);
+ if (isNew && !isKill)
+ // Haven't seen this, it must be a livein.
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ else if (!isNew && isKill)
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+ }
+}
+
+/// UpdateRegPressure - Update estimate of register pressure after the
+/// specified instruction.
+void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ SmallVector<unsigned, 4> Defs;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ bool isNew = RegSeen.insert(Reg);
+ if (MO.isDef())
+ Defs.push_back(Reg);
+ else if (!isNew && isOperandKill(MO, MRI)) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+
+ if (RCCost > RegPressure[RCId])
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= RCCost;
+ }
+ }
+
+ while (!Defs.empty()) {
+ unsigned Reg = Defs.pop_back_val();
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ RegPressure[RCId] += RCCost;
+ }
}
/// IsLICMCandidate - Returns true if the instruction may be a suitable
@@ -535,14 +717,14 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!RegInfo->def_empty(Reg))
+ if (!MRI->def_empty(Reg))
return false;
if (AllocatableSet.test(Reg))
return false;
// Check for a def among the register's aliases too.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
- if (!RegInfo->def_empty(AliasReg))
+ if (!MRI->def_empty(AliasReg))
return false;
if (AllocatableSet.test(AliasReg))
return false;
@@ -562,12 +744,12 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
if (!MO.isUse())
continue;
- assert(RegInfo->getVRegDef(Reg) &&
+ assert(MRI->getVRegDef(Reg) &&
"Machine instr not mapped for this vreg?!");
// If the loop contains the definition of an operand, then the instruction
// isn't loop invariant.
- if (CurLoop->contains(RegInfo->getVRegDef(Reg)))
+ if (CurLoop->contains(MRI->getVRegDef(Reg)))
return false;
}
@@ -577,9 +759,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
/// HasPHIUses - Return true if the specified register has any PHI use.
-static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
- for (MachineRegisterInfo::use_iterator UI = RegInfo->use_begin(Reg),
- UE = RegInfo->use_end(); UI != UE; ++UI) {
+static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
if (UseMI->isPHI())
return true;
@@ -587,37 +769,210 @@ static bool HasPHIUses(unsigned Reg, MachineRegisterInfo *RegInfo) {
return false;
}
-/// isLoadFromConstantMemory - Return true if the given instruction is a
-/// load from constant memory. Machine LICM will hoist these even if they are
-/// not re-materializable.
-bool MachineLICM::isLoadFromConstantMemory(MachineInstr *MI) {
- if (!MI->getDesc().mayLoad()) return false;
- if (!MI->hasOneMemOperand()) return false;
- MachineMemOperand *MMO = *MI->memoperands_begin();
- if (MMO->isVolatile()) return false;
- if (!MMO->getValue()) return false;
- const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(MMO->getValue());
- if (PSV) {
- MachineFunction &MF = *MI->getParent()->getParent();
- return PSV->isConstant(MF.getFrameInfo());
- } else {
- return AA->pointsToConstantMemory(MMO->getValue());
+
+/// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
+/// and an use in the current loop, return true if the target considered
+/// it 'high'.
+bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
+ unsigned DefIdx, unsigned Reg) const {
+ if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
+ return false;
+
+ for (MachineRegisterInfo::use_nodbg_iterator I = MRI->use_nodbg_begin(Reg),
+ E = MRI->use_nodbg_end(); I != E; ++I) {
+ MachineInstr *UseMI = &*I;
+ if (UseMI->isCopyLike())
+ continue;
+ if (!CurLoop->contains(UseMI->getParent()))
+ continue;
+ for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = UseMI->getOperand(i);
+ if (!MO.isReg() || !MO.isUse())
+ continue;
+ unsigned MOReg = MO.getReg();
+ if (MOReg != Reg)
+ continue;
+
+ if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, UseMI, i))
+ return true;
+ }
+
+ // Only look at the first in loop use.
+ break;
+ }
+
+ return false;
+}
+
+/// IsCheapInstruction - Return true if the instruction is marked "cheap" or
+/// the operand latency between its def and a use is one or less.
+bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
+ if (MI.getDesc().isAsCheapAsAMove() || MI.isCopyLike())
+ return true;
+ if (!InstrItins || InstrItins->isEmpty())
+ return false;
+
+ bool isCheap = false;
+ unsigned NumDefs = MI.getDesc().getNumDefs();
+ for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
+ MachineOperand &DefMO = MI.getOperand(i);
+ if (!DefMO.isReg() || !DefMO.isDef())
+ continue;
+ --NumDefs;
+ unsigned Reg = DefMO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+
+ if (!TII->hasLowDefLatency(InstrItins, &MI, i))
+ return false;
+ isCheap = true;
+ }
+
+ return isCheap;
+}
+
+/// CanCauseHighRegPressure - Visit BBs from header to current BB, check
+/// if hoisting an instruction of the given cost matrix can cause high
+/// register pressure.
+bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost) {
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ if (CI->second <= 0)
+ continue;
+
+ unsigned RCId = CI->first;
+ for (unsigned i = BackTrace.size(); i != 0; --i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i-1];
+ if (RP[RCId] + CI->second >= RegLimit[RCId])
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// UpdateBackTraceRegPressure - Traverse the back trace from header to the
+/// current block and update their register pressures to reflect the effect
+/// of hoisting MI from the current block to the preheader.
+void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
+ if (MI->isImplicitDef())
+ return;
+
+ // First compute the 'cost' of the instruction, i.e. its contribution
+ // to register pressure.
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ if (MO.isDef()) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Update register pressure of blocks from loop header to current block.
+ for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
+ SmallVector<unsigned, 8> &RP = BackTrace[i];
+ for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
+ CI != CE; ++CI) {
+ unsigned RCId = CI->first;
+ RP[RCId] += CI->second;
+ }
}
}
/// IsProfitableToHoist - Return true if it is potentially profitable to hoist
/// the given loop invariant.
bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
- // FIXME: For now, only hoist re-materilizable instructions. LICM will
- // increase register pressure. We want to make sure it doesn't increase
- // spilling.
+ if (MI.isImplicitDef())
+ return true;
+
+ // If the instruction is cheap, only hoist if it is re-materilizable. LICM
+ // will increase register pressure. It's probably not worth it if the
+ // instruction is cheap.
// Also hoist loads from constant memory, e.g. load from stubs, GOT. Hoisting
// these tend to help performance in low register pressure situation. The
// trade off is it may cause spill in high pressure situation. It will end up
// adding a store in the loop preheader. But the reload is no more expensive.
// The side benefit is these loads are frequently CSE'ed.
- if (!TII->isTriviallyReMaterializable(&MI, AA)) {
- if (!isLoadFromConstantMemory(&MI))
+ if (IsCheapInstruction(MI)) {
+ if (!TII->isTriviallyReMaterializable(&MI, AA))
+ return false;
+ } else {
+ // Estimate register pressure to determine whether to LICM the instruction.
+ // In low register pressure situation, we can be more aggressive about
+ // hoisting. Also, favors hoisting long latency instructions even in
+ // moderately high pressure situation.
+ // FIXME: If there are long latency loop-invariant instructions inside the
+ // loop at this point, why didn't the optimizer's LICM hoist them?
+ DenseMap<unsigned, int> Cost;
+ for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.isImplicit())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ if (HasHighOperandLatency(MI, i, Reg)) {
+ ++NumHighLatency;
+ return true;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second += RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, RCCost));
+ } else if (isOperandKill(MO, MRI)) {
+ // Is a virtual register use is a kill, hoisting it out of the loop
+ // may actually reduce register pressure or be register pressure
+ // neutral.
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ EVT VT = *RC->vt_begin();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned RCCost = TLI->getRepRegClassCostFor(VT);
+ DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
+ if (CI != Cost.end())
+ CI->second -= RCCost;
+ else
+ Cost.insert(std::make_pair(RCId, -RCCost));
+ }
+ }
+
+ // Visit BBs from header to current BB, if hoisting this doesn't cause
+ // high register pressure, then it's safe to proceed.
+ if (!CanCauseHighRegPressure(Cost)) {
+ ++NumLowRP;
+ return true;
+ }
+
+ // High register pressure situation, only hoist if the instruction is going to
+ // be remat'ed.
+ if (!TII->isTriviallyReMaterializable(&MI, AA) &&
+ !MI.isInvariantLoad(AA))
return false;
}
@@ -628,7 +983,7 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
const MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.isDef())
continue;
- if (HasPHIUses(MO.getReg(), RegInfo))
+ if (HasPHIUses(MO.getReg(), MRI))
return false;
}
@@ -636,10 +991,14 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
}
MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
+ // Don't unfold simple loads.
+ if (MI->getDesc().canFoldAsLoad())
+ return 0;
+
// If not, we may be able to unfold a load and hoist that.
// First test whether the instruction is loading from an amenable
// memory location.
- if (!isLoadFromConstantMemory(MI))
+ if (!MI->isInvariantLoad(AA))
return 0;
// Next determine the register class for a temporary register.
@@ -654,7 +1013,7 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
if (TID.getNumDefs() != 1) return 0;
const TargetRegisterClass *RC = TID.OpInfo[LoadRegIndex].getRegClass(TRI);
// Ok, we're unfolding. Create a temporary register and do the unfold.
- unsigned Reg = RegInfo->createVirtualRegister(RC);
+ unsigned Reg = MRI->createVirtualRegister(RC);
MachineFunction &MF = *MI->getParent()->getParent();
SmallVector<MachineInstr *, 2> NewMIs;
@@ -678,6 +1037,10 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
NewMIs[1]->eraseFromParent();
return 0;
}
+
+ // Update register pressure for the unfolded instruction.
+ UpdateRegPressure(NewMIs[1]);
+
// Otherwise we successfully unfolded a load that we can hoist.
MI->eraseFromParent();
return NewMIs[0];
@@ -686,20 +1049,15 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
const MachineInstr *MI = &*I;
- // FIXME: For now, only hoist re-materilizable instructions. LICM will
- // increase register pressure. We want to make sure it doesn't increase
- // spilling.
- if (TII->isTriviallyReMaterializable(MI, AA)) {
- unsigned Opcode = MI->getOpcode();
- DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
- CI = CSEMap.find(Opcode);
- if (CI != CSEMap.end())
- CI->second.push_back(MI);
- else {
- std::vector<const MachineInstr*> CSEMIs;
- CSEMIs.push_back(MI);
- CSEMap.insert(std::make_pair(Opcode, CSEMIs));
- }
+ unsigned Opcode = MI->getOpcode();
+ DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
+ CI = CSEMap.find(Opcode);
+ if (CI != CSEMap.end())
+ CI->second.push_back(MI);
+ else {
+ std::vector<const MachineInstr*> CSEMIs;
+ CSEMIs.push_back(MI);
+ CSEMap.insert(std::make_pair(Opcode, CSEMIs));
}
}
}
@@ -709,7 +1067,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
const MachineInstr *PrevMI = PrevMIs[i];
- if (TII->produceSameValue(MI, PrevMI))
+ if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : 0)))
return PrevMI;
}
return 0;
@@ -738,8 +1096,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
if (MO.isReg() && MO.isDef() &&
!TargetRegisterInfo::isPhysicalRegister(MO.getReg())) {
- RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
- RegInfo->clearKillFlags(Dup->getOperand(i).getReg());
+ MRI->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
+ MRI->clearKillFlags(Dup->getOperand(i).getReg());
}
}
MI->eraseFromParent();
@@ -752,15 +1110,12 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
/// Hoist - When an instruction is found to use only loop invariant operands
/// that are safe to hoist, this instruction is called to do the dirty work.
///
-void MachineLICM::Hoist(MachineInstr *MI) {
- MachineBasicBlock *Preheader = getCurPreheader();
- if (!Preheader) return;
-
+bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
// First check whether we should hoist this instruction.
if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
// If not, try unfolding a hoistable load.
MI = ExtractHoistableLoad(MI);
- if (!MI) return;
+ if (!MI) return false;
}
// Now move the instructions to the predecessor, inserting it before any
@@ -791,13 +1146,16 @@ void MachineLICM::Hoist(MachineInstr *MI) {
// Otherwise, splice the instruction to the preheader.
Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
+ // Update register pressure for BBs from header to this block.
+ UpdateBackTraceRegPressure(MI);
+
// Clear the kill flags of any register this instruction defines,
// since they may need to be live throughout the entire loop
// rather than just live for part of it.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (MO.isReg() && MO.isDef() && !MO.isDead())
- RegInfo->clearKillFlags(MO.getReg());
+ MRI->clearKillFlags(MO.getReg());
}
// Add to the CSE map.
@@ -812,6 +1170,8 @@ void MachineLICM::Hoist(MachineInstr *MI) {
++NumHoisted;
Changed = true;
+
+ return true;
}
MachineBasicBlock *MachineLICM::getCurPreheader() {
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index bca4b0c28985..189cb2ba5d1d 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -30,8 +30,11 @@ TEMPLATE_INSTANTIATION(MLIB);
}
char MachineLoopInfo::ID = 0;
-INITIALIZE_PASS(MachineLoopInfo, "machine-loops",
- "Machine Natural Loop Construction", true, true);
+INITIALIZE_PASS_BEGIN(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(MachineLoopInfo, "machine-loops",
+ "Machine Natural Loop Construction", true, true)
char &llvm::MachineLoopInfoID = MachineLoopInfo::ID;
diff --git a/lib/CodeGen/MachineLoopRanges.cpp b/lib/CodeGen/MachineLoopRanges.cpp
new file mode 100644
index 000000000000..17fe67f65045
--- /dev/null
+++ b/lib/CodeGen/MachineLoopRanges.cpp
@@ -0,0 +1,116 @@
+//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MachineLoopRanges analysis.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+char MachineLoopRanges::ID = 0;
+INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges",
+ "Machine Loop Ranges", true, true)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges",
+ "Machine Loop Ranges", true, true)
+
+char &llvm::MachineLoopRangesID = MachineLoopRanges::ID;
+
+void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<SlotIndexes>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// runOnMachineFunction - Don't do much, loop ranges are computed on demand.
+bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) {
+ releaseMemory();
+ Indexes = &getAnalysis<SlotIndexes>();
+ return false;
+}
+
+void MachineLoopRanges::releaseMemory() {
+ DeleteContainerSeconds(Cache);
+ Cache.clear();
+}
+
+MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) {
+ MachineLoopRange *&Range = Cache[Loop];
+ if (!Range)
+ Range = new MachineLoopRange(Loop, Allocator, *Indexes);
+ return Range;
+}
+
+/// Create a MachineLoopRange, only accessible to MachineLoopRanges.
+MachineLoopRange::MachineLoopRange(const MachineLoop *loop,
+ MachineLoopRange::Allocator &alloc,
+ SlotIndexes &Indexes)
+ : Loop(loop), Intervals(alloc), Area(0) {
+ // Compute loop coverage.
+ for (MachineLoop::block_iterator I = Loop->block_begin(),
+ E = Loop->block_end(); I != E; ++I) {
+ const std::pair<SlotIndex, SlotIndex> &Range = Indexes.getMBBRange(*I);
+ Intervals.insert(Range.first, Range.second, 1u);
+ Area += Range.first.distance(Range.second);
+ }
+}
+
+/// overlaps - Return true if this loop overlaps the given range of machine
+/// instructions.
+bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) {
+ Map::const_iterator I = Intervals.find(Start);
+ return I.valid() && Stop > I.start();
+}
+
+unsigned MachineLoopRange::getNumber() const {
+ return Loop->getHeader()->getNumber();
+}
+
+/// byNumber - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by number.
+int MachineLoopRange::byNumber(const void *pa, const void *pb) {
+ const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+ const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+ unsigned na = a->getNumber();
+ unsigned nb = b->getNumber();
+ if (na < nb)
+ return -1;
+ if (na > nb)
+ return 1;
+ return 0;
+}
+
+/// byAreaDesc - Comparator for array_pod_sort that sorts a list of
+/// MachineLoopRange pointers by:
+/// 1. Descending area.
+/// 2. Ascending number.
+int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) {
+ const MachineLoopRange *a = *static_cast<MachineLoopRange *const *>(pa);
+ const MachineLoopRange *b = *static_cast<MachineLoopRange *const *>(pb);
+ if (a->getArea() != b->getArea())
+ return a->getArea() > b->getArea() ? -1 : 1;
+ return byNumber(pa, pb);
+}
+
+void MachineLoopRange::print(raw_ostream &OS) const {
+ OS << "Loop#" << getNumber() << " =";
+ for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I)
+ OS << " [" << I.start() << ';' << I.stop() << ')';
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) {
+ MLR.print(OS);
+ return OS;
+}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index b647a4dcc530..fadc594efcb2 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -29,7 +29,7 @@ using namespace llvm::dwarf;
// Handle the Pass registration stuff necessary to use TargetData's.
INITIALIZE_PASS(MachineModuleInfo, "machinemoduleinfo",
- "Machine Module Information", false, false);
+ "Machine Module Information", false, false)
char MachineModuleInfo::ID = 0;
// Out of line virtual method.
@@ -41,30 +41,30 @@ class MMIAddrLabelMapCallbackPtr : CallbackVH {
public:
MMIAddrLabelMapCallbackPtr() : Map(0) {}
MMIAddrLabelMapCallbackPtr(Value *V) : CallbackVH(V), Map(0) {}
-
+
void setPtr(BasicBlock *BB) {
ValueHandleBase::operator=(BB);
}
-
+
void setMap(MMIAddrLabelMap *map) { Map = map; }
-
+
virtual void deleted();
virtual void allUsesReplacedWith(Value *V2);
};
-
+
class MMIAddrLabelMap {
MCContext &Context;
struct AddrLabelSymEntry {
/// Symbols - The symbols for the label. This is a pointer union that is
/// either one symbol (the common case) or a list of symbols.
PointerUnion<MCSymbol *, std::vector<MCSymbol*>*> Symbols;
-
+
Function *Fn; // The containing function of the BasicBlock.
unsigned Index; // The index in BBCallbacks for the BasicBlock.
};
-
+
DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry> AddrLabelSymbols;
-
+
/// BBCallbacks - Callbacks for the BasicBlock's that we have entries for. We
/// use this so we get notified if a block is deleted or RAUWd.
std::vector<MMIAddrLabelMapCallbackPtr> BBCallbacks;
@@ -76,23 +76,23 @@ class MMIAddrLabelMap {
DenseMap<AssertingVH<Function>, std::vector<MCSymbol*> >
DeletedAddrLabelsNeedingEmission;
public:
-
+
MMIAddrLabelMap(MCContext &context) : Context(context) {}
~MMIAddrLabelMap() {
assert(DeletedAddrLabelsNeedingEmission.empty() &&
"Some labels for deleted blocks never got emitted");
-
+
// Deallocate any of the 'list of symbols' case.
for (DenseMap<AssertingVH<BasicBlock>, AddrLabelSymEntry>::iterator
I = AddrLabelSymbols.begin(), E = AddrLabelSymbols.end(); I != E; ++I)
if (I->second.Symbols.is<std::vector<MCSymbol*>*>())
delete I->second.Symbols.get<std::vector<MCSymbol*>*>();
}
-
+
MCSymbol *getAddrLabelSymbol(BasicBlock *BB);
std::vector<MCSymbol*> getAddrLabelSymbolToEmit(BasicBlock *BB);
- void takeDeletedSymbolsForFunction(Function *F,
+ void takeDeletedSymbolsForFunction(Function *F,
std::vector<MCSymbol*> &Result);
void UpdateForDeletedBlock(BasicBlock *BB);
@@ -104,7 +104,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
assert(BB->hasAddressTaken() &&
"Shouldn't get label for block without address taken");
AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-
+
// If we already had an entry for this block, just return it.
if (!Entry.Symbols.isNull()) {
assert(BB->getParent() == Entry.Fn && "Parent changed");
@@ -112,7 +112,7 @@ MCSymbol *MMIAddrLabelMap::getAddrLabelSymbol(BasicBlock *BB) {
return Entry.Symbols.get<MCSymbol*>();
return (*Entry.Symbols.get<std::vector<MCSymbol*>*>())[0];
}
-
+
// Otherwise, this is a new entry, create a new symbol for it and add an
// entry to BBCallbacks so we can be notified if the BB is deleted or RAUWd.
BBCallbacks.push_back(BB);
@@ -129,9 +129,9 @@ MMIAddrLabelMap::getAddrLabelSymbolToEmit(BasicBlock *BB) {
assert(BB->hasAddressTaken() &&
"Shouldn't get label for block without address taken");
AddrLabelSymEntry &Entry = AddrLabelSymbols[BB];
-
+
std::vector<MCSymbol*> Result;
-
+
// If we already had an entry for this block, just return it.
if (Entry.Symbols.isNull())
Result.push_back(getAddrLabelSymbol(BB));
@@ -152,7 +152,7 @@ takeDeletedSymbolsForFunction(Function *F, std::vector<MCSymbol*> &Result) {
// If there are no entries for the function, just return.
if (I == DeletedAddrLabelsNeedingEmission.end()) return;
-
+
// Otherwise, take the list.
std::swap(Result, I->second);
DeletedAddrLabelsNeedingEmission.erase(I);
@@ -175,7 +175,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
if (MCSymbol *Sym = Entry.Symbols.dyn_cast<MCSymbol*>()) {
if (Sym->isDefined())
return;
-
+
// If the block is not yet defined, we need to emit it at the end of the
// function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
// for the containing Function. Since the block is being deleted, its
@@ -187,7 +187,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
for (unsigned i = 0, e = Syms->size(); i != e; ++i) {
MCSymbol *Sym = (*Syms)[i];
if (Sym->isDefined()) continue; // Ignore already emitted labels.
-
+
// If the block is not yet defined, we need to emit it at the end of the
// function. Add the symbol to the DeletedAddrLabelsNeedingEmission list
// for the containing Function. Since the block is being deleted, its
@@ -195,7 +195,7 @@ void MMIAddrLabelMap::UpdateForDeletedBlock(BasicBlock *BB) {
// 'Entry'.
DeletedAddrLabelsNeedingEmission[Entry.Fn].push_back(Sym);
}
-
+
// The entry is deleted, free the memory associated with the symbol list.
delete Syms;
}
@@ -225,7 +225,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
SymList->push_back(PrevSym);
NewEntry.Symbols = SymList;
}
-
+
std::vector<MCSymbol*> *SymList =
NewEntry.Symbols.get<std::vector<MCSymbol*>*>();
@@ -234,7 +234,7 @@ void MMIAddrLabelMap::UpdateForRAUWBlock(BasicBlock *Old, BasicBlock *New) {
SymList->push_back(Sym);
return;
}
-
+
// Otherwise, concatenate the list.
std::vector<MCSymbol*> *Syms =OldEntry.Symbols.get<std::vector<MCSymbol*>*>();
SymList->insert(SymList->end(), Syms->begin(), Syms->end());
@@ -253,10 +253,13 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) {
//===----------------------------------------------------------------------===//
-MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
-: ImmutablePass(ID), Context(MAI),
+MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI,
+ const TargetAsmInfo *TAI)
+: ImmutablePass(ID), Context(MAI, TAI),
ObjFileMMI(0),
- CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false){
+ CurCallSite(0), CallsEHReturn(0), CallsUnwindInit(0), DbgInfoAvailable(false),
+ CallsExternalVAFunctionWithFloatingPointArguments(false) {
+ initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry());
// Always emit some info, by default "no personality" info.
Personalities.push_back(NULL);
AddrLabelSymbols = 0;
@@ -264,7 +267,7 @@ MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI)
}
MachineModuleInfo::MachineModuleInfo()
-: ImmutablePass(ID), Context(*(MCAsmInfo*)0) {
+: ImmutablePass(ID), Context(*(MCAsmInfo*)0, NULL) {
assert(0 && "This MachineModuleInfo constructor should never be called, MMI "
"should always be explicitly constructed by LLVMTargetMachine");
abort();
@@ -272,7 +275,7 @@ MachineModuleInfo::MachineModuleInfo()
MachineModuleInfo::~MachineModuleInfo() {
delete ObjFileMMI;
-
+
// FIXME: Why isn't doFinalization being called??
//assert(AddrLabelSymbols == 0 && "doFinalization not called");
delete AddrLabelSymbols;
@@ -472,7 +475,7 @@ void MachineModuleInfo::TidyLandingPads(DenseMap<MCSymbol*, uintptr_t> *LPMap) {
(LPMap && (*LPMap)[BeginLabel] != 0)) &&
(EndLabel->isDefined() ||
(LPMap && (*LPMap)[EndLabel] != 0))) continue;
-
+
LandingPad.BeginLabels.erase(LandingPad.BeginLabels.begin() + j);
LandingPad.EndLabels.erase(LandingPad.EndLabels.begin() + j);
--j, --e;
@@ -562,20 +565,3 @@ unsigned MachineModuleInfo::getPersonalityIndex() const {
// in the zero index.
return 0;
}
-
-namespace {
- /// VariableDebugSorter - Comparison to sort the VariableDbgInfo map
- /// by source location, to avoid depending on the arbitrary order that
- /// instruction selection visits variables in.
- struct VariableDebugSorter {
- bool operator()(const MachineModuleInfo::VariableDbgInfoMapTy::value_type &A,
- const MachineModuleInfo::VariableDbgInfoMapTy::value_type &B)
- const {
- if (A.second.second.getLine() != B.second.second.getLine())
- return A.second.second.getLine() < B.second.second.getLine();
- if (A.second.second.getCol() != B.second.second.getCol())
- return A.second.second.getCol() < B.second.second.getCol();
- return false;
- }
- };
-}
diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp
index 5d852f26beda..b3fb33736ffc 100644
--- a/lib/CodeGen/MachineRegisterInfo.cpp
+++ b/lib/CodeGen/MachineRegisterInfo.cpp
@@ -30,8 +30,9 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) {
MachineRegisterInfo::~MachineRegisterInfo() {
#ifndef NDEBUG
- for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i)
- assert(VRegInfo[i].second == 0 && "Vreg use list non-empty still?");
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i)
+ assert(VRegInfo[TargetRegisterInfo::index2VirtReg(i)].second == 0 &&
+ "Vreg use list non-empty still?");
for (unsigned i = 0, e = UsedPhysRegs.size(); i != e; ++i)
assert(!PhysRegUseDefLists[i] &&
"PhysRegUseDefLists has entries after all instructions are deleted");
@@ -44,20 +45,32 @@ MachineRegisterInfo::~MachineRegisterInfo() {
///
void
MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
- unsigned VR = Reg;
- Reg -= TargetRegisterInfo::FirstVirtualRegister;
- assert(Reg < VRegInfo.size() && "Invalid vreg!");
const TargetRegisterClass *OldRC = VRegInfo[Reg].first;
VRegInfo[Reg].first = RC;
// Remove from old register class's vregs list. This may be slow but
// fortunately this operation is rarely needed.
std::vector<unsigned> &VRegs = RegClass2VRegMap[OldRC->getID()];
- std::vector<unsigned>::iterator I = std::find(VRegs.begin(), VRegs.end(), VR);
+ std::vector<unsigned>::iterator I =
+ std::find(VRegs.begin(), VRegs.end(), Reg);
VRegs.erase(I);
// Add to new register class's vregs list.
- RegClass2VRegMap[RC->getID()].push_back(VR);
+ RegClass2VRegMap[RC->getID()].push_back(Reg);
+}
+
+const TargetRegisterClass *
+MachineRegisterInfo::constrainRegClass(unsigned Reg,
+ const TargetRegisterClass *RC) {
+ const TargetRegisterClass *OldRC = getRegClass(Reg);
+ if (OldRC == RC)
+ return RC;
+ const TargetRegisterClass *NewRC = getCommonSubClass(OldRC, RC);
+ if (!NewRC)
+ return 0;
+ if (NewRC != OldRC)
+ setRegClass(Reg, NewRC);
+ return NewRC;
}
/// createVirtualRegister - Create and return a new virtual register in the
@@ -66,17 +79,22 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) {
unsigned
MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
assert(RegClass && "Cannot create register without RegClass!");
+
+ // New virtual register number.
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(getNumVirtRegs());
+
// Add a reg, but keep track of whether the vector reallocated or not.
- void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0];
- VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0));
- RegAllocHints.push_back(std::make_pair(0, 0));
+ const unsigned FirstVirtReg = TargetRegisterInfo::index2VirtReg(0);
+ void *ArrayBase = getNumVirtRegs() == 0 ? 0 : &VRegInfo[FirstVirtReg];
+ VRegInfo.grow(Reg);
+ VRegInfo[Reg].first = RegClass;
+ RegAllocHints.grow(Reg);
- if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1)))
+ if (ArrayBase && &VRegInfo[FirstVirtReg] != ArrayBase)
// The vector reallocated, handle this now.
HandleVRegListReallocation();
- unsigned VR = getLastVirtReg();
- RegClass2VRegMap[RegClass->getID()].push_back(VR);
- return VR;
+ RegClass2VRegMap[RegClass->getID()].push_back(Reg);
+ return Reg;
}
/// HandleVRegListReallocation - We just added a virtual register to the
@@ -85,11 +103,12 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){
void MachineRegisterInfo::HandleVRegListReallocation() {
// The back pointers for the vreg lists point into the previous vector.
// Update them to point to their correct slots.
- for (unsigned i = 0, e = VRegInfo.size(); i != e; ++i) {
- MachineOperand *List = VRegInfo[i].second;
+ for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ MachineOperand *List = VRegInfo[Reg].second;
if (!List) continue;
// Update the back-pointer to be accurate once more.
- List->Contents.Reg.Prev = &VRegInfo[i].second;
+ List->Contents.Reg.Prev = &VRegInfo[Reg].second;
}
}
@@ -112,8 +131,6 @@ void MachineRegisterInfo::replaceRegWith(unsigned FromReg, unsigned ToReg) {
/// register or null if none is found. This assumes that the code is in SSA
/// form, so there should only be one definition.
MachineInstr *MachineRegisterInfo::getVRegDef(unsigned Reg) const {
- assert(Reg-TargetRegisterInfo::FirstVirtualRegister < VRegInfo.size() &&
- "Invalid vreg!");
// Since we are in SSA form, we can use the first definition.
if (!def_empty(Reg))
return &*def_begin(Reg);
@@ -193,8 +210,15 @@ MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB,
LiveIns.erase(LiveIns.begin() + i);
--i; --e;
} else {
+ DebugLoc DL;
+ // If there is a location for this live in then use it.
+ DenseMap<unsigned, DebugLoc>::iterator DLI =
+ LiveInLocs.find(LiveIns[i].second);
+ if (DLI != LiveInLocs.end())
+ DL = DLI->second;
+
// Emit a copy.
- BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(),
+ BuildMI(*EntryMBB, EntryMBB->begin(), DL,
TII.get(TargetOpcode::COPY), LiveIns[i].second)
.addReg(LiveIns[i].first);
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index c8f8fafe227e..8a93a24287b6 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -25,6 +25,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -34,27 +35,31 @@ using namespace llvm;
static cl::opt<bool>
SplitEdges("machine-sink-split",
cl::desc("Split critical edges during machine sinking"),
- cl::init(false), cl::Hidden);
-static cl::opt<unsigned>
-SplitLimit("split-limit",
- cl::init(~0u), cl::Hidden);
+ cl::init(true), cl::Hidden);
-STATISTIC(NumSunk, "Number of machine instructions sunk");
-STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumSunk, "Number of machine instructions sunk");
+STATISTIC(NumSplit, "Number of critical edges split");
+STATISTIC(NumCoalesces, "Number of copies coalesced");
namespace {
class MachineSinking : public MachineFunctionPass {
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
- MachineRegisterInfo *RegInfo; // Machine register information
+ MachineRegisterInfo *MRI; // Machine register information
MachineDominatorTree *DT; // Machine dominator tree
MachineLoopInfo *LI;
AliasAnalysis *AA;
BitVector AllocatableSet; // Which physregs are allocatable?
+ // Remember which edges have been considered for breaking.
+ SmallSet<std::pair<MachineBasicBlock*,MachineBasicBlock*>, 8>
+ CEBCandidates;
+
public:
static char ID; // Pass identification
- MachineSinking() : MachineFunctionPass(ID) {}
+ MachineSinking() : MachineFunctionPass(ID) {
+ initializeMachineSinkingPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -67,43 +72,125 @@ namespace {
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
}
+
+ virtual void releaseMemory() {
+ CEBCandidates.clear();
+ }
+
private:
bool ProcessBlock(MachineBasicBlock &MBB);
- MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *From,
- MachineBasicBlock *To);
+ bool isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To);
+ MachineBasicBlock *SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To,
+ bool BreakPHIEdge);
bool SinkInstruction(MachineInstr *MI, bool &SawStore);
bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB,
- MachineBasicBlock *DefMBB, bool &LocalUse) const;
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge, bool &LocalUse) const;
+ bool PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB);
};
} // end anonymous namespace
char MachineSinking::ID = 0;
-INITIALIZE_PASS(MachineSinking, "machine-sink",
- "Machine code sinking", false, false);
+INITIALIZE_PASS_BEGIN(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(MachineSinking, "machine-sink",
+ "Machine code sinking", false, false)
FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); }
+bool MachineSinking::PerformTrivialForwardCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ if (!MI->isCopy())
+ return false;
+
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(SrcReg) ||
+ !TargetRegisterInfo::isVirtualRegister(DstReg) ||
+ !MRI->hasOneNonDBGUse(SrcReg))
+ return false;
+
+ const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg);
+ const TargetRegisterClass *DRC = MRI->getRegClass(DstReg);
+ if (SRC != DRC)
+ return false;
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI->isCopyLike())
+ return false;
+ DEBUG(dbgs() << "Coalescing: " << *DefMI);
+ DEBUG(dbgs() << "*** to: " << *MI);
+ MRI->replaceRegWith(DstReg, SrcReg);
+ MI->eraseFromParent();
+ ++NumCoalesces;
+ return true;
+}
+
/// AllUsesDominatedByBlock - Return true if all uses of the specified register
/// occur in blocks dominated by the specified block. If any use is in the
/// definition block, then return false since it is never legal to move def
/// after uses.
-bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
- MachineBasicBlock *MBB,
- MachineBasicBlock *DefMBB,
- bool &LocalUse) const {
+bool
+MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
+ MachineBasicBlock *MBB,
+ MachineBasicBlock *DefMBB,
+ bool &BreakPHIEdge,
+ bool &LocalUse) const {
assert(TargetRegisterInfo::isVirtualRegister(Reg) &&
"Only makes sense for vregs");
+
+ if (MRI->use_nodbg_empty(Reg))
+ return true;
+
// Ignoring debug uses is necessary so debug info doesn't affect the code.
// This may leave a referencing dbg_value in the original block, before
// the definition of the vreg. Dwarf generator handles this although the
// user might not get the right info at runtime.
+
+ // BreakPHIEdge is true if all the uses are in the successor MBB being sunken
+ // into and they are all PHI nodes. In this case, machine-sink must break
+ // the critical edge first. e.g.
+ //
+ // BB#1: derived from LLVM BB %bb4.preheader
+ // Predecessors according to CFG: BB#0
+ // ...
+ // %reg16385<def> = DEC64_32r %reg16437, %EFLAGS<imp-def,dead>
+ // ...
+ // JE_4 <BB#37>, %EFLAGS<imp-use>
+ // Successors according to CFG: BB#37 BB#2
+ //
+ // BB#2: derived from LLVM BB %bb.nph
+ // Predecessors according to CFG: BB#0 BB#1
+ // %reg16386<def> = PHI %reg16434, <BB#0>, %reg16385, <BB#1>
+ BreakPHIEdge = true;
for (MachineRegisterInfo::use_nodbg_iterator
- I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end();
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
I != E; ++I) {
- // Determine the block of the use.
MachineInstr *UseInst = &*I;
MachineBasicBlock *UseBlock = UseInst->getParent();
+ if (!(UseBlock == MBB && UseInst->isPHI() &&
+ UseInst->getOperand(I.getOperandNo()+1).getMBB() == DefMBB)) {
+ BreakPHIEdge = false;
+ break;
+ }
+ }
+ if (BreakPHIEdge)
+ return true;
+ for (MachineRegisterInfo::use_nodbg_iterator
+ I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end();
+ I != E; ++I) {
+ // Determine the block of the use.
+ MachineInstr *UseInst = &*I;
+ MachineBasicBlock *UseBlock = UseInst->getParent();
if (UseInst->isPHI()) {
// PHI nodes use the operand in the predecessor block, not the block with
// the PHI.
@@ -127,7 +214,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
const TargetMachine &TM = MF.getTarget();
TII = TM.getInstrInfo();
TRI = TM.getRegisterInfo();
- RegInfo = &MF.getRegInfo();
+ MRI = &MF.getRegInfo();
DT = &getAnalysis<MachineDominatorTree>();
LI = &getAnalysis<MachineLoopInfo>();
AA = &getAnalysis<AliasAnalysis>();
@@ -139,6 +226,7 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) {
bool MadeChange = false;
// Process all basic blocks.
+ CEBCandidates.clear();
for (MachineFunction::iterator I = MF.begin(), E = MF.end();
I != E; ++I)
MadeChange |= ProcessBlock(*I);
@@ -177,6 +265,9 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
if (MI->isDebugValue())
continue;
+ if (PerformTrivialForwardCoalescing(MI, &MBB))
+ continue;
+
if (SinkInstruction(MI, SawStore))
++NumSunk, MadeChange = true;
@@ -186,51 +277,92 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
return MadeChange;
}
-MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
- MachineBasicBlock *ToBB) {
+bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *From,
+ MachineBasicBlock *To) {
+ // FIXME: Need much better heuristics.
+
+ // If the pass has already considered breaking this edge (during this pass
+ // through the function), then let's go ahead and break it. This means
+ // sinking multiple "cheap" instructions into the same block.
+ if (!CEBCandidates.insert(std::make_pair(From, To)))
+ return true;
+
+ if (!MI->isCopy() && !MI->getDesc().isAsCheapAsAMove())
+ return true;
+
+ // MI is cheap, we probably don't want to break the critical edge for it.
+ // However, if this would allow some definitions of its source operands
+ // to be sunk then it's probably worth it.
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MRI->hasOneNonDBGUse(Reg))
+ return true;
+ }
+
+ return false;
+}
+
+MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineInstr *MI,
+ MachineBasicBlock *FromBB,
+ MachineBasicBlock *ToBB,
+ bool BreakPHIEdge) {
+ if (!isWorthBreakingCriticalEdge(MI, FromBB, ToBB))
+ return 0;
+
// Avoid breaking back edge. From == To means backedge for single BB loop.
- if (!SplitEdges || NumSplit == SplitLimit || FromBB == ToBB)
+ if (!SplitEdges || FromBB == ToBB)
+ return 0;
+
+ // Check for backedges of more "complex" loops.
+ if (LI->getLoopFor(FromBB) == LI->getLoopFor(ToBB) &&
+ LI->isLoopHeader(ToBB))
return 0;
- // Check for more "complex" loops.
- if (LI->getLoopFor(FromBB) != LI->getLoopFor(ToBB) ||
- !LI->isLoopHeader(ToBB)) {
- // It's not always legal to break critical edges and sink the computation
- // to the edge.
- //
- // BB#1:
- // v1024
- // Beq BB#3
- // <fallthrough>
- // BB#2:
- // ... no uses of v1024
- // <fallthrough>
- // BB#3:
- // ...
- // = v1024
- //
- // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
- //
- // BB#1:
- // ...
- // Bne BB#2
- // BB#4:
- // v1024 =
- // B BB#3
- // BB#2:
- // ... no uses of v1024
- // <fallthrough>
- // BB#3:
- // ...
- // = v1024
- //
- // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
- // flow. We need to ensure the new basic block where the computation is
- // sunk to dominates all the uses.
- // It's only legal to break critical edge and sink the computation to the
- // new block if all the predecessors of "To", except for "From", are
- // not dominated by "From". Given SSA property, this means these
- // predecessors are dominated by "To".
+ // It's not always legal to break critical edges and sink the computation
+ // to the edge.
+ //
+ // BB#1:
+ // v1024
+ // Beq BB#3
+ // <fallthrough>
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // If BB#1 -> BB#3 edge is broken and computation of v1024 is inserted:
+ //
+ // BB#1:
+ // ...
+ // Bne BB#2
+ // BB#4:
+ // v1024 =
+ // B BB#3
+ // BB#2:
+ // ... no uses of v1024
+ // <fallthrough>
+ // BB#3:
+ // ...
+ // = v1024
+ //
+ // This is incorrect since v1024 is not computed along the BB#1->BB#2->BB#3
+ // flow. We need to ensure the new basic block where the computation is
+ // sunk to dominates all the uses.
+ // It's only legal to break critical edge and sink the computation to the
+ // new block if all the predecessors of "To", except for "From", are
+ // not dominated by "From". Given SSA property, this means these
+ // predecessors are dominated by "To".
+ //
+ // There is no need to do this check if all the uses are PHI nodes. PHI
+ // sources are only defined on the specific predecessor edges.
+ if (!BreakPHIEdge) {
for (MachineBasicBlock::pred_iterator PI = ToBB->pred_begin(),
E = ToBB->pred_end(); PI != E; ++PI) {
if (*PI == FromBB)
@@ -238,17 +370,23 @@ MachineBasicBlock *MachineSinking::SplitCriticalEdge(MachineBasicBlock *FromBB,
if (!DT->dominates(ToBB, *PI))
return 0;
}
-
- // FIXME: Determine if it's cost effective to break this edge.
- return FromBB->SplitCriticalEdge(ToBB, this);
}
- return 0;
+ return FromBB->SplitCriticalEdge(ToBB, this);
+}
+
+static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) {
+ return MI->isInsertSubreg() || MI->isSubregToReg() || MI->isRegSequence();
}
/// SinkInstruction - Determine whether it is safe to sink the specified machine
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
+ // Don't sink insert_subreg, subreg_to_reg, reg_sequence. These are meant to
+ // be close to the source to make it easier to coalesce.
+ if (AvoidsSinking(MI, MRI))
+ return false;
+
// Check if it's safe to move the instruction.
if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
@@ -269,6 +407,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// decide.
MachineBasicBlock *SuccToSinkTo = 0;
+ bool BreakPHIEdge = false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue; // Ignore non-register operands.
@@ -281,7 +420,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If the physreg has no defs anywhere, it's just an ambient register
// and we can freely move its uses. Alternatively, if it's allocatable,
// it could get allocated to something with a def during allocation.
- if (!RegInfo->def_empty(Reg))
+ if (!MRI->def_empty(Reg))
return false;
if (AllocatableSet.test(Reg))
@@ -290,7 +429,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// Check for a def among the register's aliases too.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
- if (!RegInfo->def_empty(AliasReg))
+ if (!MRI->def_empty(AliasReg))
return false;
if (AllocatableSet.test(AliasReg))
@@ -305,7 +444,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (MO.isUse()) continue;
// If it's not safe to move defs of the register class, then abort.
- if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg)))
+ if (!TII->isSafeToMoveRegClassDefs(MRI->getRegClass(Reg)))
return false;
// FIXME: This picks a successor to sink into based on having one
@@ -327,7 +466,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If a previous operand picked a block to sink to, then this operand
// must be sinkable to the same block.
bool LocalUse = false;
- if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock, LocalUse))
+ if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo, ParentBlock,
+ BreakPHIEdge, LocalUse))
return false;
continue;
@@ -338,7 +478,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(),
E = ParentBlock->succ_end(); SI != E; ++SI) {
bool LocalUse = false;
- if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock, LocalUse)) {
+ if (AllUsesDominatedByBlock(Reg, *SI, ParentBlock,
+ BreakPHIEdge, LocalUse)) {
SuccToSinkTo = *SI;
break;
}
@@ -384,7 +525,6 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// If the block has multiple predecessors, this would introduce computation on
// a path that it doesn't already exist. We could split the critical edge,
// but for now we just punt.
- // FIXME: Split critical edges if not backedges.
if (SuccToSinkTo->pred_size() > 1) {
// We cannot sink a load across a critical edge - there may be stores in
// other code paths.
@@ -412,10 +552,11 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
if (!TryBreak)
DEBUG(dbgs() << "Sinking along critical edge.\n");
else {
- MachineBasicBlock *NewSucc = SplitCriticalEdge(ParentBlock, SuccToSinkTo);
+ MachineBasicBlock *NewSucc =
+ SplitCriticalEdge(MI, ParentBlock, SuccToSinkTo, BreakPHIEdge);
if (!NewSucc) {
- DEBUG(dbgs() <<
- " *** PUNTING: Not legal or profitable to break critical edge\n");
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
return false;
} else {
DEBUG(dbgs() << " *** Splitting critical edge:"
@@ -424,10 +565,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
<< " -- BB#" << SuccToSinkTo->getNumber() << '\n');
SuccToSinkTo = NewSucc;
++NumSplit;
+ BreakPHIEdge = false;
}
}
}
+ if (BreakPHIEdge) {
+ // BreakPHIEdge is true if all the uses are in the successor MBB being
+ // sunken into and they are all PHI nodes. In this case, machine-sink must
+ // break the critical edge first.
+ MachineBasicBlock *NewSucc = SplitCriticalEdge(MI, ParentBlock,
+ SuccToSinkTo, BreakPHIEdge);
+ if (!NewSucc) {
+ DEBUG(dbgs() << " *** PUNTING: Not legal or profitable to "
+ "break critical edge\n");
+ return false;
+ }
+
+ DEBUG(dbgs() << " *** Splitting critical edge:"
+ " BB#" << ParentBlock->getNumber()
+ << " -- BB#" << NewSucc->getNumber()
+ << " -- BB#" << SuccToSinkTo->getNumber() << '\n');
+ SuccToSinkTo = NewSucc;
+ ++NumSplit;
+ }
+
// Determine where to insert into. Skip phi nodes.
MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin();
while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI())
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 1e88562935ea..7351119f4728 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -26,6 +26,7 @@
#include "llvm/Function.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -45,14 +46,16 @@ using namespace llvm;
namespace {
struct MachineVerifier {
- MachineVerifier(Pass *pass) :
+ MachineVerifier(Pass *pass, const char *b) :
PASS(pass),
+ Banner(b),
OutFileName(getenv("LLVM_VERIFY_MACHINEINSTRS"))
{}
bool runOnMachineFunction(MachineFunction &MF);
Pass *const PASS;
+ const char *Banner;
const char *const OutFileName;
raw_ostream *OS;
const MachineFunction *MF;
@@ -71,6 +74,8 @@ namespace {
RegVector regsDefined, regsDead, regsKilled;
RegSet regsLiveInButUnused;
+ SlotIndex lastIndex;
+
// Add Reg and any sub-registers to RV
void addRegWithSubRegs(RegVector &RV, unsigned Reg) {
RV.push_back(Reg);
@@ -167,7 +172,9 @@ namespace {
// Analysis information if available
LiveVariables *LiveVars;
- const LiveIntervals *LiveInts;
+ LiveIntervals *LiveInts;
+ LiveStacks *LiveStks;
+ SlotIndexes *Indexes;
void visitMachineFunctionBefore();
void visitMachineBasicBlockBefore(const MachineBasicBlock *MBB);
@@ -193,9 +200,12 @@ namespace {
struct MachineVerifierPass : public MachineFunctionPass {
static char ID; // Pass ID, replacement for typeid
+ const char *const Banner;
- MachineVerifierPass()
- : MachineFunctionPass(ID) {}
+ MachineVerifierPass(const char *b = 0)
+ : MachineFunctionPass(ID), Banner(b) {
+ initializeMachineVerifierPassPass(*PassRegistry::getPassRegistry());
+ }
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
@@ -203,7 +213,7 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &MF) {
- MF.verify(this);
+ MF.verify(this, Banner);
return false;
}
};
@@ -212,14 +222,15 @@ namespace {
char MachineVerifierPass::ID = 0;
INITIALIZE_PASS(MachineVerifierPass, "machineverifier",
- "Verify generated machine code", false, false);
+ "Verify generated machine code", false, false)
-FunctionPass *llvm::createMachineVerifierPass() {
- return new MachineVerifierPass();
+FunctionPass *llvm::createMachineVerifierPass(const char *Banner) {
+ return new MachineVerifierPass(Banner);
}
-void MachineFunction::verify(Pass *p) const {
- MachineVerifier(p).runOnMachineFunction(const_cast<MachineFunction&>(*this));
+void MachineFunction::verify(Pass *p, const char *Banner) const {
+ MachineVerifier(p, Banner)
+ .runOnMachineFunction(const_cast<MachineFunction&>(*this));
}
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
@@ -247,11 +258,15 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
LiveVars = NULL;
LiveInts = NULL;
+ LiveStks = NULL;
+ Indexes = NULL;
if (PASS) {
LiveInts = PASS->getAnalysisIfAvailable<LiveIntervals>();
// We don't want to verify LiveVariables if LiveIntervals is available.
if (!LiveInts)
LiveVars = PASS->getAnalysisIfAvailable<LiveVariables>();
+ LiveStks = PASS->getAnalysisIfAvailable<LiveStacks>();
+ Indexes = PASS->getAnalysisIfAvailable<SlotIndexes>();
}
visitMachineFunctionBefore();
@@ -260,6 +275,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
visitMachineBasicBlockBefore(MFI);
for (MachineBasicBlock::const_iterator MBBI = MFI->begin(),
MBBE = MFI->end(); MBBI != MBBE; ++MBBI) {
+ if (MBBI->getParent() != MFI) {
+ report("Bad instruction parent pointer", MFI);
+ *OS << "Instruction: " << *MBBI;
+ continue;
+ }
visitMachineInstrBefore(MBBI);
for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I)
visitMachineOperand(&MBBI->getOperand(I), I);
@@ -288,8 +308,11 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
void MachineVerifier::report(const char *msg, const MachineFunction *MF) {
assert(MF);
*OS << '\n';
- if (!foundErrors++)
- MF->print(*OS);
+ if (!foundErrors++) {
+ if (Banner)
+ *OS << "# " << Banner << '\n';
+ MF->print(*OS, Indexes);
+ }
*OS << "*** Bad machine code: " << msg << " ***\n"
<< "- function: " << MF->getFunction()->getNameStr() << "\n";
}
@@ -299,13 +322,19 @@ void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB) {
report(msg, MBB->getParent());
*OS << "- basic block: " << MBB->getName()
<< " " << (void*)MBB
- << " (BB#" << MBB->getNumber() << ")\n";
+ << " (BB#" << MBB->getNumber() << ")";
+ if (Indexes)
+ *OS << " [" << Indexes->getMBBStartIdx(MBB)
+ << ';' << Indexes->getMBBEndIdx(MBB) << ')';
+ *OS << '\n';
}
void MachineVerifier::report(const char *msg, const MachineInstr *MI) {
assert(MI);
report(msg, MI->getParent());
*OS << "- instruction: ";
+ if (Indexes && Indexes->hasIndex(MI))
+ *OS << Indexes->getInstructionIndex(MI) << '\t';
MI->print(*OS, TM);
}
@@ -329,6 +358,7 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
}
void MachineVerifier::visitMachineFunctionBefore() {
+ lastIndex = SlotIndex();
regsReserved = TRI->getReservedRegs(*MF);
// A sub-register of a reserved register is also reserved
@@ -357,6 +387,16 @@ void
MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ // Count the number of landing pad successors.
+ SmallPtrSet<MachineBasicBlock*, 4> LandingPadSuccs;
+ for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(),
+ E = MBB->succ_end(); I != E; ++I) {
+ if ((*I)->isLandingPad())
+ LandingPadSuccs.insert(*I);
+ }
+ if (LandingPadSuccs.size() > 1)
+ report("MBB has more than one landing pad successor", MBB);
+
// Call AnalyzeBranch. If it succeeds, there several more conditions to check.
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
@@ -372,14 +412,14 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
// It's possible that the block legitimately ends with a noreturn
// call or an unreachable, in which case it won't actually fall
// out the bottom of the function.
- } else if (MBB->succ_empty()) {
+ } else if (MBB->succ_size() == LandingPadSuccs.size()) {
// It's possible that the block legitimately ends with a noreturn
// call or an unreachable, in which case it won't actuall fall
// out of the block.
- } else if (MBB->succ_size() != 1) {
+ } else if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
report("MBB exits via unconditional fall-through but doesn't have "
"exactly one CFG successor!", MBB);
- } else if (MBB->succ_begin()[0] != MBBI) {
+ } else if (!MBB->isSuccessor(MBBI)) {
report("MBB exits via unconditional fall-through but its successor "
"differs from its CFG successor!", MBB);
}
@@ -394,10 +434,10 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
}
} else if (TBB && !FBB && Cond.empty()) {
// Block unconditionally branches somewhere.
- if (MBB->succ_size() != 1) {
+ if (MBB->succ_size() != 1+LandingPadSuccs.size()) {
report("MBB exits via unconditional branch but doesn't have "
"exactly one CFG successor!", MBB);
- } else if (MBB->succ_begin()[0] != TBB) {
+ } else if (!MBB->isSuccessor(TBB)) {
report("MBB exits via unconditional branch but the CFG "
"successor doesn't match the actual successor!", MBB);
}
@@ -487,6 +527,9 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
regsKilled.clear();
regsDefined.clear();
+
+ if (Indexes)
+ lastIndex = Indexes->getMBBStartIdx(MBB);
}
void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) {
@@ -525,6 +568,7 @@ void
MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
const MachineInstr *MI = MO->getParent();
const TargetInstrDesc &TI = MI->getDesc();
+ const TargetOperandInfo &TOI = TI.OpInfo[MONum];
// The first TI.NumDefs operands must be explicit register defines
if (MONum < TI.getNumDefs()) {
@@ -535,9 +579,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
else if (MO->isImplicit())
report("Explicit definition marked as implicit", MO, MONum);
} else if (MONum < TI.getNumOperands()) {
- if (MO->isReg()) {
- if (MO->isDef())
- report("Explicit operand marked as def", MO, MONum);
+ // Don't check if it's the last operand in a variadic instruction. See,
+ // e.g., LDM_RET in the arm back end.
+ if (MO->isReg() && !(TI.isVariadic() && MONum == TI.getNumOperands()-1)) {
+ if (MO->isDef() && !TOI.isOptionalDef())
+ report("Explicit operand marked as def", MO, MONum);
if (MO->isImplicit())
report("Explicit operand marked as implicit", MO, MONum);
}
@@ -554,7 +600,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
return;
// Check Live Variables.
- if (MO->isUndef()) {
+ if (MI->isDebugValue()) {
+ // Liveness checks are not valid for debug values.
+ } else if (MO->isUndef()) {
// An <undef> doesn't refer to any register, so just skip it.
} else if (MO->isUse()) {
regsLiveInButUnused.erase(Reg);
@@ -566,7 +614,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
unsigned DefReg = MI->getOperand(defIdx).getReg();
if (Reg == DefReg) {
isKill = true;
- // ANd in that case an explicit kill flag is not allowed.
+ // And in that case an explicit kill flag is not allowed.
if (MO->isKill())
report("Illegal kill flag on two-address instruction operand",
MO, MONum);
@@ -590,7 +638,8 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
}
// Check LiveInts liveness and kill.
- if (LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
SlotIndex UseIdx = LiveInts->getInstructionIndex(MI).getUseIndex();
if (LiveInts->hasInterval(Reg)) {
const LiveInterval &LI = LiveInts->getInterval(Reg);
@@ -598,8 +647,13 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("No live range at use", MO, MONum);
*OS << UseIdx << " is not live in " << LI << '\n';
}
- // TODO: Verify isKill == LI.killedAt.
- } else if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ // Check for extra kill flags.
+ // Note that we allow missing kill flags for now.
+ if (MO->isKill() && !LI.killedAt(UseIdx.getDefIndex())) {
+ report("Live range continues after kill flag", MO, MONum);
+ *OS << "Live range: " << LI << '\n';
+ }
+ } else {
report("Virtual register has no Live interval", MO, MONum);
}
}
@@ -636,11 +690,11 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
SlotIndex DefIdx = LiveInts->getInstructionIndex(MI).getDefIndex();
if (LiveInts->hasInterval(Reg)) {
const LiveInterval &LI = LiveInts->getInterval(Reg);
- if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) {
- assert(LR->valno && "NULL valno is not allowed");
- if (LR->valno->def != DefIdx) {
+ if (const VNInfo *VNI = LI.getVNInfoAt(DefIdx)) {
+ assert(VNI && "NULL valno is not allowed");
+ if (VNI->def != DefIdx && !MO->isEarlyClobber()) {
report("Inconsistent valno->def", MO, MONum);
- *OS << "Valno " << LR->valno->id << " is not defined at "
+ *OS << "Valno " << VNI->id << " is not defined at "
<< DefIdx << " in " << LI << '\n';
}
} else {
@@ -655,7 +709,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
// Check register classes.
if (MONum < TI.getNumOperands() && !MO->isImplicit()) {
- const TargetOperandInfo &TOI = TI.OpInfo[MONum];
unsigned SubIdx = MO->getSubReg();
if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
@@ -706,6 +759,22 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
report("PHI operand is not in the CFG", MO, MONum);
break;
+ case MachineOperand::MO_FrameIndex:
+ if (LiveStks && LiveStks->hasInterval(MO->getIndex()) &&
+ LiveInts && !LiveInts->isNotInMIMap(MI)) {
+ LiveInterval &LI = LiveStks->getInterval(MO->getIndex());
+ SlotIndex Idx = LiveInts->getInstructionIndex(MI);
+ if (TI.mayLoad() && !LI.liveAt(Idx.getUseIndex())) {
+ report("Instruction loads from dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ if (TI.mayStore() && !LI.liveAt(Idx.getDefIndex())) {
+ report("Instruction stores to dead spill slot", MO, MONum);
+ *OS << "Live stack: " << LI << '\n';
+ }
+ }
+ break;
+
default:
break;
}
@@ -717,12 +786,31 @@ void MachineVerifier::visitMachineInstrAfter(const MachineInstr *MI) {
set_subtract(regsLive, regsKilled); regsKilled.clear();
set_subtract(regsLive, regsDead); regsDead.clear();
set_union(regsLive, regsDefined); regsDefined.clear();
+
+ if (Indexes && Indexes->hasIndex(MI)) {
+ SlotIndex idx = Indexes->getInstructionIndex(MI);
+ if (!(idx > lastIndex)) {
+ report("Instruction index out of order", MI);
+ *OS << "Last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = idx;
+ }
}
void
MachineVerifier::visitMachineBasicBlockAfter(const MachineBasicBlock *MBB) {
MBBInfoMap[MBB].regsLiveOut = regsLive;
regsLive.clear();
+
+ if (Indexes) {
+ SlotIndex stop = Indexes->getMBBEndIdx(MBB);
+ if (!(stop > lastIndex)) {
+ report("Block ends before last instruction index", MBB);
+ *OS << "Block ends at " << stop
+ << " last instruction was at " << lastIndex << '\n';
+ }
+ lastIndex = stop;
+ }
}
// Calculate the largest possible vregsPassed sets. These are the registers that
@@ -854,8 +942,8 @@ void MachineVerifier::visitMachineFunctionAfter() {
void MachineVerifier::verifyLiveVariables() {
assert(LiveVars && "Don't call verifyLiveVariables without LiveVars");
- for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
- RegE = MRI->getLastVirtReg()-1; Reg != RegE; ++Reg) {
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
LiveVariables::VarInfo &VI = LiveVars->getVarInfo(Reg);
for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end();
MFI != MFE; ++MFI) {
@@ -865,13 +953,13 @@ void MachineVerifier::verifyLiveVariables() {
if (MInfo.vregsRequired.count(Reg)) {
if (!VI.AliveBlocks.test(MFI->getNumber())) {
report("LiveVariables: Block missing from AliveBlocks", MFI);
- *OS << "Virtual register %reg" << Reg
+ *OS << "Virtual register " << PrintReg(Reg)
<< " must be live through the block.\n";
}
} else {
if (VI.AliveBlocks.test(MFI->getNumber())) {
report("LiveVariables: Block should not be in AliveBlocks", MFI);
- *OS << "Virtual register %reg" << Reg
+ *OS << "Virtual register " << PrintReg(Reg)
<< " is not needed live through the block.\n";
}
}
@@ -884,14 +972,24 @@ void MachineVerifier::verifyLiveIntervals() {
for (LiveIntervals::const_iterator LVI = LiveInts->begin(),
LVE = LiveInts->end(); LVI != LVE; ++LVI) {
const LiveInterval &LI = *LVI->second;
+
+ // Spilling and splitting may leave unused registers around. Skip them.
+ if (MRI->use_empty(LI.reg))
+ continue;
+
+ // Physical registers have much weirdness going on, mostly from coalescing.
+ // We should probably fix it, but for now just ignore them.
+ if (TargetRegisterInfo::isPhysicalRegister(LI.reg))
+ continue;
+
assert(LVI->first == LI.reg && "Invalid reg to interval mapping");
for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end();
I!=E; ++I) {
VNInfo *VNI = *I;
- const LiveRange *DefLR = LI.getLiveRangeContaining(VNI->def);
+ const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def);
- if (!DefLR) {
+ if (!DefVNI) {
if (!VNI->isUnused()) {
report("Valno not live at def and not marked unused", MF);
*OS << "Valno #" << VNI->id << " in " << LI << '\n';
@@ -902,31 +1000,216 @@ void MachineVerifier::verifyLiveIntervals() {
if (VNI->isUnused())
continue;
- if (DefLR->valno != VNI) {
+ if (DefVNI != VNI) {
report("Live range at def has different valno", MF);
- DefLR->print(*OS);
- *OS << " should use valno #" << VNI->id << " in " << LI << '\n';
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " where valno #" << DefVNI->id << " is live in " << LI << '\n';
+ continue;
}
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def);
+ if (!MBB) {
+ report("Invalid definition index", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ continue;
+ }
+
+ if (VNI->isPHIDef()) {
+ if (VNI->def != LiveInts->getMBBStartIdx(MBB)) {
+ report("PHIDef value is not defined at MBB start", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << ", not at the beginning of BB#" << MBB->getNumber()
+ << " in " << LI << '\n';
+ }
+ } else {
+ // Non-PHI def.
+ const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def);
+ if (!MI) {
+ report("No instruction at def index", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ } else if (!MI->modifiesRegister(LI.reg, TRI)) {
+ report("Defining instruction does not modify register", MI);
+ *OS << "Valno #" << VNI->id << " in " << LI << '\n';
+ }
+
+ bool isEarlyClobber = false;
+ if (MI) {
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() &&
+ MOI->isEarlyClobber()) {
+ isEarlyClobber = true;
+ break;
+ }
+ }
+ }
+
+ // Early clobber defs begin at USE slots, but other defs must begin at
+ // DEF slots.
+ if (isEarlyClobber) {
+ if (!VNI->def.isUse()) {
+ report("Early clobber def must be at a USE slot", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ }
+ } else if (!VNI->def.isDef()) {
+ report("Non-PHI, non-early clobber def must be at a DEF slot", MF);
+ *OS << "Valno #" << VNI->id << " is defined at " << VNI->def
+ << " in " << LI << '\n';
+ }
+ }
}
for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) {
- const LiveRange &LR = *I;
- assert(LR.valno && "Live range has no valno");
+ const VNInfo *VNI = I->valno;
+ assert(VNI && "Live range has no valno");
- if (LR.valno->id >= LI.getNumValNums() ||
- LR.valno != LI.getValNumInfo(LR.valno->id)) {
+ if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) {
report("Foreign valno in live range", MF);
- LR.print(*OS);
+ I->print(*OS);
*OS << " has a valno not in " << LI << '\n';
}
- if (LR.valno->isUnused()) {
+ if (VNI->isUnused()) {
report("Live range valno is marked unused", MF);
- LR.print(*OS);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+
+ const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start);
+ if (!MBB) {
+ report("Bad start of live segment, no basic block", MF);
+ I->print(*OS);
*OS << " in " << LI << '\n';
+ continue;
+ }
+ SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB);
+ if (I->start != MBBStartIdx && I->start != VNI->def) {
+ report("Live segment must begin at MBB entry or valno def", MBB);
+ I->print(*OS);
+ *OS << " in " << LI << '\n' << "Basic block starts at "
+ << MBBStartIdx << '\n';
+ }
+
+ const MachineBasicBlock *EndMBB =
+ LiveInts->getMBBFromIndex(I->end.getPrevSlot());
+ if (!EndMBB) {
+ report("Bad end of live segment, no basic block", MF);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ continue;
+ }
+ if (I->end != LiveInts->getMBBEndIdx(EndMBB)) {
+ // The live segment is ending inside EndMBB
+ const MachineInstr *MI =
+ LiveInts->getInstructionFromIndex(I->end.getPrevSlot());
+ if (!MI) {
+ report("Live segment doesn't end at a valid instruction", EndMBB);
+ I->print(*OS);
+ *OS << " in " << LI << '\n' << "Basic block starts at "
+ << MBBStartIdx << '\n';
+ } else if (TargetRegisterInfo::isVirtualRegister(LI.reg) &&
+ !MI->readsVirtualRegister(LI.reg)) {
+ // A live range can end with either a redefinition, a kill flag on a
+ // use, or a dead flag on a def.
+ // FIXME: Should we check for each of these?
+ bool hasDeadDef = false;
+ for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ if (MOI->isReg() && MOI->getReg() == LI.reg && MOI->isDef() && MOI->isDead()) {
+ hasDeadDef = true;
+ break;
+ }
+ }
+
+ if (!hasDeadDef) {
+ report("Instruction killing live segment neither defines nor reads "
+ "register", MI);
+ I->print(*OS);
+ *OS << " in " << LI << '\n';
+ }
+ }
+ }
+
+ // Now check all the basic blocks in this live segment.
+ MachineFunction::const_iterator MFI = MBB;
+ // Is this live range the beginning of a non-PHIDef VN?
+ if (I->start == VNI->def && !VNI->isPHIDef()) {
+ // Not live-in to any blocks.
+ if (MBB == EndMBB)
+ continue;
+ // Skip this block.
+ ++MFI;
+ }
+ for (;;) {
+ assert(LiveInts->isLiveInToMBB(LI, MFI));
+ // We don't know how to track physregs into a landing pad.
+ if (TargetRegisterInfo::isPhysicalRegister(LI.reg) &&
+ MFI->isLandingPad()) {
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
+ continue;
+ }
+ // Check that VNI is live-out of all predecessors.
+ for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(),
+ PE = MFI->pred_end(); PI != PE; ++PI) {
+ SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI).getPrevSlot();
+ const VNInfo *PVNI = LI.getVNInfoAt(PEnd);
+
+ if (VNI->isPHIDef() && VNI->def == LiveInts->getMBBStartIdx(MFI)) {
+ if (PVNI && !PVNI->hasPHIKill()) {
+ report("Value live out of predecessor doesn't have PHIKill", MF);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << " doesn't have PHIKill, but Valno #" << VNI->id
+ << " is PHIDef and defined at the beginning of BB#"
+ << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI)
+ << " in " << LI << '\n';
+ }
+ continue;
+ }
+
+ if (!PVNI) {
+ report("Register not marked live out of predecessor", *PI);
+ *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live at "
+ << PEnd << " in " << LI << '\n';
+ continue;
+ }
+
+ if (PVNI != VNI) {
+ report("Different value live out of predecessor", *PI);
+ *OS << "Valno #" << PVNI->id << " live out of BB#"
+ << (*PI)->getNumber() << '@' << PEnd
+ << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber()
+ << '@' << LiveInts->getMBBStartIdx(MFI) << " in " << LI << '\n';
+ }
+ }
+ if (&*MFI == EndMBB)
+ break;
+ ++MFI;
}
+ }
+ // Check the LI only has one connected component.
+ if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
+ ConnectedVNInfoEqClasses ConEQ(*LiveInts);
+ unsigned NumComp = ConEQ.Classify(&LI);
+ if (NumComp > 1) {
+ report("Multiple connected components in live interval", MF);
+ *OS << NumComp << " components in " << LI << '\n';
+ for (unsigned comp = 0; comp != NumComp; ++comp) {
+ *OS << comp << ": valnos";
+ for (LiveInterval::const_vni_iterator I = LI.vni_begin(),
+ E = LI.vni_end(); I!=E; ++I)
+ if (comp == ConEQ.getEqClass(*I))
+ *OS << ' ' << (*I)->id;
+ *OS << '\n';
+ }
+ }
}
}
}
diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp
index edb4eea71b8a..c05be130ec61 100644
--- a/lib/CodeGen/OptimizePHIs.cpp
+++ b/lib/CodeGen/OptimizePHIs.cpp
@@ -33,7 +33,9 @@ namespace {
public:
static char ID; // Pass identification
- OptimizePHIs() : MachineFunctionPass(ID) {}
+ OptimizePHIs() : MachineFunctionPass(ID) {
+ initializeOptimizePHIsPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -55,7 +57,7 @@ namespace {
char OptimizePHIs::ID = 0;
INITIALIZE_PASS(OptimizePHIs, "opt-phis",
- "Optimize machine instruction PHIs", false, false);
+ "Optimize machine instruction PHIs", false, false)
FunctionPass *llvm::createOptimizePHIsPass() { return new OptimizePHIs(); }
diff --git a/lib/CodeGen/PBQP/Graph.h b/lib/CodeGen/PBQP/Graph.h
deleted file mode 100644
index b2224cb051dc..000000000000
--- a/lib/CodeGen/PBQP/Graph.h
+++ /dev/null
@@ -1,425 +0,0 @@
-//===-------------------- Graph.h - PBQP Graph ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PBQP Graph class.
-//
-//===----------------------------------------------------------------------===//
-
-
-#ifndef LLVM_CODEGEN_PBQP_GRAPH_H
-#define LLVM_CODEGEN_PBQP_GRAPH_H
-
-#include "Math.h"
-
-#include <list>
-#include <vector>
-#include <map>
-
-namespace PBQP {
-
- /// PBQP Graph class.
- /// Instances of this class describe PBQP problems.
- class Graph {
- private:
-
- // ----- TYPEDEFS -----
- class NodeEntry;
- class EdgeEntry;
-
- typedef std::list<NodeEntry> NodeList;
- typedef std::list<EdgeEntry> EdgeList;
-
- public:
-
- typedef NodeList::iterator NodeItr;
- typedef NodeList::const_iterator ConstNodeItr;
-
- typedef EdgeList::iterator EdgeItr;
- typedef EdgeList::const_iterator ConstEdgeItr;
-
- private:
-
- typedef std::list<EdgeItr> AdjEdgeList;
-
- public:
-
- typedef AdjEdgeList::iterator AdjEdgeItr;
-
- private:
-
- class NodeEntry {
- private:
- Vector costs;
- AdjEdgeList adjEdges;
- unsigned degree;
- void *data;
- public:
- NodeEntry(const Vector &costs) : costs(costs), degree(0) {}
- Vector& getCosts() { return costs; }
- const Vector& getCosts() const { return costs; }
- unsigned getDegree() const { return degree; }
- AdjEdgeItr edgesBegin() { return adjEdges.begin(); }
- AdjEdgeItr edgesEnd() { return adjEdges.end(); }
- AdjEdgeItr addEdge(EdgeItr e) {
- ++degree;
- return adjEdges.insert(adjEdges.end(), e);
- }
- void removeEdge(AdjEdgeItr ae) {
- --degree;
- adjEdges.erase(ae);
- }
- void setData(void *data) { this->data = data; }
- void* getData() { return data; }
- };
-
- class EdgeEntry {
- private:
- NodeItr node1, node2;
- Matrix costs;
- AdjEdgeItr node1AEItr, node2AEItr;
- void *data;
- public:
- EdgeEntry(NodeItr node1, NodeItr node2, const Matrix &costs)
- : node1(node1), node2(node2), costs(costs) {}
- NodeItr getNode1() const { return node1; }
- NodeItr getNode2() const { return node2; }
- Matrix& getCosts() { return costs; }
- const Matrix& getCosts() const { return costs; }
- void setNode1AEItr(AdjEdgeItr ae) { node1AEItr = ae; }
- AdjEdgeItr getNode1AEItr() { return node1AEItr; }
- void setNode2AEItr(AdjEdgeItr ae) { node2AEItr = ae; }
- AdjEdgeItr getNode2AEItr() { return node2AEItr; }
- void setData(void *data) { this->data = data; }
- void *getData() { return data; }
- };
-
- // ----- MEMBERS -----
-
- NodeList nodes;
- unsigned numNodes;
-
- EdgeList edges;
- unsigned numEdges;
-
- // ----- INTERNAL METHODS -----
-
- NodeEntry& getNode(NodeItr nItr) { return *nItr; }
- const NodeEntry& getNode(ConstNodeItr nItr) const { return *nItr; }
-
- EdgeEntry& getEdge(EdgeItr eItr) { return *eItr; }
- const EdgeEntry& getEdge(ConstEdgeItr eItr) const { return *eItr; }
-
- NodeItr addConstructedNode(const NodeEntry &n) {
- ++numNodes;
- return nodes.insert(nodes.end(), n);
- }
-
- EdgeItr addConstructedEdge(const EdgeEntry &e) {
- assert(findEdge(e.getNode1(), e.getNode2()) == edges.end() &&
- "Attempt to add duplicate edge.");
- ++numEdges;
- EdgeItr edgeItr = edges.insert(edges.end(), e);
- EdgeEntry &ne = getEdge(edgeItr);
- NodeEntry &n1 = getNode(ne.getNode1());
- NodeEntry &n2 = getNode(ne.getNode2());
- // Sanity check on matrix dimensions:
- assert((n1.getCosts().getLength() == ne.getCosts().getRows()) &&
- (n2.getCosts().getLength() == ne.getCosts().getCols()) &&
- "Edge cost dimensions do not match node costs dimensions.");
- ne.setNode1AEItr(n1.addEdge(edgeItr));
- ne.setNode2AEItr(n2.addEdge(edgeItr));
- return edgeItr;
- }
-
- inline void copyFrom(const Graph &other);
- public:
-
- /// \brief Construct an empty PBQP graph.
- Graph() : numNodes(0), numEdges(0) {}
-
- /// \brief Copy construct this graph from "other". Note: Does not copy node
- /// and edge data, only graph structure and costs.
- /// @param other Source graph to copy from.
- Graph(const Graph &other) : numNodes(0), numEdges(0) {
- copyFrom(other);
- }
-
- /// \brief Make this graph a copy of "other". Note: Does not copy node and
- /// edge data, only graph structure and costs.
- /// @param other The graph to copy from.
- /// @return A reference to this graph.
- ///
- /// This will clear the current graph, erasing any nodes and edges added,
- /// before copying from other.
- Graph& operator=(const Graph &other) {
- clear();
- copyFrom(other);
- return *this;
- }
-
- /// \brief Add a node with the given costs.
- /// @param costs Cost vector for the new node.
- /// @return Node iterator for the added node.
- NodeItr addNode(const Vector &costs) {
- return addConstructedNode(NodeEntry(costs));
- }
-
- /// \brief Add an edge between the given nodes with the given costs.
- /// @param n1Itr First node.
- /// @param n2Itr Second node.
- /// @return Edge iterator for the added edge.
- EdgeItr addEdge(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr,
- const Matrix &costs) {
- assert(getNodeCosts(n1Itr).getLength() == costs.getRows() &&
- getNodeCosts(n2Itr).getLength() == costs.getCols() &&
- "Matrix dimensions mismatch.");
- return addConstructedEdge(EdgeEntry(n1Itr, n2Itr, costs));
- }
-
- /// \brief Get the number of nodes in the graph.
- /// @return Number of nodes in the graph.
- unsigned getNumNodes() const { return numNodes; }
-
- /// \brief Get the number of edges in the graph.
- /// @return Number of edges in the graph.
- unsigned getNumEdges() const { return numEdges; }
-
- /// \brief Get a node's cost vector.
- /// @param nItr Node iterator.
- /// @return Node cost vector.
- Vector& getNodeCosts(NodeItr nItr) { return getNode(nItr).getCosts(); }
-
- /// \brief Get a node's cost vector (const version).
- /// @param nItr Node iterator.
- /// @return Node cost vector.
- const Vector& getNodeCosts(ConstNodeItr nItr) const {
- return getNode(nItr).getCosts();
- }
-
- /// \brief Set a node's data pointer.
- /// @param nItr Node iterator.
- /// @param data Pointer to node data.
- ///
- /// Typically used by a PBQP solver to attach data to aid in solution.
- void setNodeData(NodeItr nItr, void *data) { getNode(nItr).setData(data); }
-
- /// \brief Get the node's data pointer.
- /// @param nItr Node iterator.
- /// @return Pointer to node data.
- void* getNodeData(NodeItr nItr) { return getNode(nItr).getData(); }
-
- /// \brief Get an edge's cost matrix.
- /// @param eItr Edge iterator.
- /// @return Edge cost matrix.
- Matrix& getEdgeCosts(EdgeItr eItr) { return getEdge(eItr).getCosts(); }
-
- /// \brief Get an edge's cost matrix (const version).
- /// @param eItr Edge iterator.
- /// @return Edge cost matrix.
- const Matrix& getEdgeCosts(ConstEdgeItr eItr) const {
- return getEdge(eItr).getCosts();
- }
-
- /// \brief Set an edge's data pointer.
- /// @param eItr Edge iterator.
- /// @param data Pointer to edge data.
- ///
- /// Typically used by a PBQP solver to attach data to aid in solution.
- void setEdgeData(EdgeItr eItr, void *data) { getEdge(eItr).setData(data); }
-
- /// \brief Get an edge's data pointer.
- /// @param eItr Edge iterator.
- /// @return Pointer to edge data.
- void* getEdgeData(EdgeItr eItr) { return getEdge(eItr).getData(); }
-
- /// \brief Get a node's degree.
- /// @param nItr Node iterator.
- /// @return The degree of the node.
- unsigned getNodeDegree(NodeItr nItr) const {
- return getNode(nItr).getDegree();
- }
-
- /// \brief Begin iterator for node set.
- NodeItr nodesBegin() { return nodes.begin(); }
-
- /// \brief Begin const iterator for node set.
- ConstNodeItr nodesBegin() const { return nodes.begin(); }
-
- /// \brief End iterator for node set.
- NodeItr nodesEnd() { return nodes.end(); }
-
- /// \brief End const iterator for node set.
- ConstNodeItr nodesEnd() const { return nodes.end(); }
-
- /// \brief Begin iterator for edge set.
- EdgeItr edgesBegin() { return edges.begin(); }
-
- /// \brief End iterator for edge set.
- EdgeItr edgesEnd() { return edges.end(); }
-
- /// \brief Get begin iterator for adjacent edge set.
- /// @param nItr Node iterator.
- /// @return Begin iterator for the set of edges connected to the given node.
- AdjEdgeItr adjEdgesBegin(NodeItr nItr) {
- return getNode(nItr).edgesBegin();
- }
-
- /// \brief Get end iterator for adjacent edge set.
- /// @param nItr Node iterator.
- /// @return End iterator for the set of edges connected to the given node.
- AdjEdgeItr adjEdgesEnd(NodeItr nItr) {
- return getNode(nItr).edgesEnd();
- }
-
- /// \brief Get the first node connected to this edge.
- /// @param eItr Edge iterator.
- /// @return The first node connected to the given edge.
- NodeItr getEdgeNode1(EdgeItr eItr) {
- return getEdge(eItr).getNode1();
- }
-
- /// \brief Get the second node connected to this edge.
- /// @param eItr Edge iterator.
- /// @return The second node connected to the given edge.
- NodeItr getEdgeNode2(EdgeItr eItr) {
- return getEdge(eItr).getNode2();
- }
-
- /// \brief Get the "other" node connected to this edge.
- /// @param eItr Edge iterator.
- /// @param nItr Node iterator for the "given" node.
- /// @return The iterator for the "other" node connected to this edge.
- NodeItr getEdgeOtherNode(EdgeItr eItr, NodeItr nItr) {
- EdgeEntry &e = getEdge(eItr);
- if (e.getNode1() == nItr) {
- return e.getNode2();
- } // else
- return e.getNode1();
- }
-
- /// \brief Get the edge connecting two nodes.
- /// @param n1Itr First node iterator.
- /// @param n2Itr Second node iterator.
- /// @return An iterator for edge (n1Itr, n2Itr) if such an edge exists,
- /// otherwise returns edgesEnd().
- EdgeItr findEdge(NodeItr n1Itr, NodeItr n2Itr) {
- for (AdjEdgeItr aeItr = adjEdgesBegin(n1Itr), aeEnd = adjEdgesEnd(n1Itr);
- aeItr != aeEnd; ++aeItr) {
- if ((getEdgeNode1(*aeItr) == n2Itr) ||
- (getEdgeNode2(*aeItr) == n2Itr)) {
- return *aeItr;
- }
- }
- return edges.end();
- }
-
- /// \brief Remove a node from the graph.
- /// @param nItr Node iterator.
- void removeNode(NodeItr nItr) {
- NodeEntry &n = getNode(nItr);
- for (AdjEdgeItr itr = n.edgesBegin(), end = n.edgesEnd(); itr != end;) {
- EdgeItr eItr = *itr;
- ++itr;
- removeEdge(eItr);
- }
- nodes.erase(nItr);
- --numNodes;
- }
-
- /// \brief Remove an edge from the graph.
- /// @param eItr Edge iterator.
- void removeEdge(EdgeItr eItr) {
- EdgeEntry &e = getEdge(eItr);
- NodeEntry &n1 = getNode(e.getNode1());
- NodeEntry &n2 = getNode(e.getNode2());
- n1.removeEdge(e.getNode1AEItr());
- n2.removeEdge(e.getNode2AEItr());
- edges.erase(eItr);
- --numEdges;
- }
-
- /// \brief Remove all nodes and edges from the graph.
- void clear() {
- nodes.clear();
- edges.clear();
- numNodes = numEdges = 0;
- }
-
- /// \brief Print a representation of this graph in DOT format.
- /// @param os Output stream to print on.
- template <typename OStream>
- void printDot(OStream &os) {
-
- os << "graph {\n";
-
- for (NodeItr nodeItr = nodesBegin(), nodeEnd = nodesEnd();
- nodeItr != nodeEnd; ++nodeItr) {
-
- os << " node" << nodeItr << " [ label=\""
- << nodeItr << ": " << getNodeCosts(nodeItr) << "\" ]\n";
- }
-
- os << " edge [ len=" << getNumNodes() << " ]\n";
-
- for (EdgeItr edgeItr = edgesBegin(), edgeEnd = edgesEnd();
- edgeItr != edgeEnd; ++edgeItr) {
-
- os << " node" << getEdgeNode1(edgeItr)
- << " -- node" << getEdgeNode2(edgeItr)
- << " [ label=\"";
-
- const Matrix &edgeCosts = getEdgeCosts(edgeItr);
-
- for (unsigned i = 0; i < edgeCosts.getRows(); ++i) {
- os << edgeCosts.getRowAsVector(i) << "\\n";
- }
- os << "\" ]\n";
- }
- os << "}\n";
- }
-
- };
-
- class NodeItrComparator {
- public:
- bool operator()(Graph::NodeItr n1, Graph::NodeItr n2) const {
- return &*n1 < &*n2;
- }
-
- bool operator()(Graph::ConstNodeItr n1, Graph::ConstNodeItr n2) const {
- return &*n1 < &*n2;
- }
- };
-
- class EdgeItrCompartor {
- public:
- bool operator()(Graph::EdgeItr e1, Graph::EdgeItr e2) const {
- return &*e1 < &*e2;
- }
-
- bool operator()(Graph::ConstEdgeItr e1, Graph::ConstEdgeItr e2) const {
- return &*e1 < &*e2;
- }
- };
-
- void Graph::copyFrom(const Graph &other) {
- std::map<Graph::ConstNodeItr, Graph::NodeItr,
- NodeItrComparator> nodeMap;
-
- for (Graph::ConstNodeItr nItr = other.nodesBegin(),
- nEnd = other.nodesEnd();
- nItr != nEnd; ++nItr) {
- nodeMap[nItr] = addNode(other.getNodeCosts(nItr));
- }
-
- }
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_GRAPH_HPP
diff --git a/lib/CodeGen/PBQP/HeuristicBase.h b/lib/CodeGen/PBQP/HeuristicBase.h
deleted file mode 100644
index 791c227f0d07..000000000000
--- a/lib/CodeGen/PBQP/HeuristicBase.h
+++ /dev/null
@@ -1,246 +0,0 @@
-//===-- HeuristcBase.h --- Heuristic base class for PBQP --------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICBASE_H
-#define LLVM_CODEGEN_PBQP_HEURISTICBASE_H
-
-#include "HeuristicSolver.h"
-
-namespace PBQP {
-
- /// \brief Abstract base class for heuristic implementations.
- ///
- /// This class provides a handy base for heuristic implementations with common
- /// solver behaviour implemented for a number of methods.
- ///
- /// To implement your own heuristic using this class as a base you'll have to
- /// implement, as a minimum, the following methods:
- /// <ul>
- /// <li> void addToHeuristicList(Graph::NodeItr) : Add a node to the
- /// heuristic reduction list.
- /// <li> void heuristicReduce() : Perform a single heuristic reduction.
- /// <li> void preUpdateEdgeCosts(Graph::EdgeItr) : Handle the (imminent)
- /// change to the cost matrix on the given edge (by R2).
- /// <li> void postUpdateEdgeCostts(Graph::EdgeItr) : Handle the new
- /// costs on the given edge.
- /// <li> void handleAddEdge(Graph::EdgeItr) : Handle the addition of a new
- /// edge into the PBQP graph (by R2).
- /// <li> void handleRemoveEdge(Graph::EdgeItr, Graph::NodeItr) : Handle the
- /// disconnection of the given edge from the given node.
- /// <li> A constructor for your derived class : to pass back a reference to
- /// the solver which is using this heuristic.
- /// </ul>
- ///
- /// These methods are implemented in this class for documentation purposes,
- /// but will assert if called.
- ///
- /// Note that this class uses the curiously recursive template idiom to
- /// forward calls to the derived class. These methods need not be made
- /// virtual, and indeed probably shouldn't for performance reasons.
- ///
- /// You'll also need to provide NodeData and EdgeData structs in your class.
- /// These can be used to attach data relevant to your heuristic to each
- /// node/edge in the PBQP graph.
-
- template <typename HImpl>
- class HeuristicBase {
- private:
-
- typedef std::list<Graph::NodeItr> OptimalList;
-
- HeuristicSolverImpl<HImpl> &s;
- Graph &g;
- OptimalList optimalList;
-
- // Return a reference to the derived heuristic.
- HImpl& impl() { return static_cast<HImpl&>(*this); }
-
- // Add the given node to the optimal reductions list. Keep an iterator to
- // its location for fast removal.
- void addToOptimalReductionList(Graph::NodeItr nItr) {
- optimalList.insert(optimalList.end(), nItr);
- }
-
- public:
-
- /// \brief Construct an instance with a reference to the given solver.
- /// @param solver The solver which is using this heuristic instance.
- HeuristicBase(HeuristicSolverImpl<HImpl> &solver)
- : s(solver), g(s.getGraph()) { }
-
- /// \brief Get the solver which is using this heuristic instance.
- /// @return The solver which is using this heuristic instance.
- ///
- /// You can use this method to get access to the solver in your derived
- /// heuristic implementation.
- HeuristicSolverImpl<HImpl>& getSolver() { return s; }
-
- /// \brief Get the graph representing the problem to be solved.
- /// @return The graph representing the problem to be solved.
- Graph& getGraph() { return g; }
-
- /// \brief Tell the solver to simplify the graph before the reduction phase.
- /// @return Whether or not the solver should run a simplification phase
- /// prior to the main setup and reduction.
- ///
- /// HeuristicBase returns true from this method as it's a sensible default,
- /// however you can over-ride it in your derived class if you want different
- /// behaviour.
- bool solverRunSimplify() const { return true; }
-
- /// \brief Decide whether a node should be optimally or heuristically
- /// reduced.
- /// @return Whether or not the given node should be listed for optimal
- /// reduction (via R0, R1 or R2).
- ///
- /// HeuristicBase returns true for any node with degree less than 3. This is
- /// sane and sensible for many situations, but not all. You can over-ride
- /// this method in your derived class if you want a different selection
- /// criteria. Note however that your criteria for selecting optimal nodes
- /// should be <i>at least</i> as strong as this. I.e. Nodes of degree 3 or
- /// higher should not be selected under any circumstances.
- bool shouldOptimallyReduce(Graph::NodeItr nItr) {
- if (g.getNodeDegree(nItr) < 3)
- return true;
- // else
- return false;
- }
-
- /// \brief Add the given node to the list of nodes to be optimally reduced.
- /// @return nItr Node iterator to be added.
- ///
- /// You probably don't want to over-ride this, except perhaps to record
- /// statistics before calling this implementation. HeuristicBase relies on
- /// its behaviour.
- void addToOptimalReduceList(Graph::NodeItr nItr) {
- optimalList.push_back(nItr);
- }
-
- /// \brief Initialise the heuristic.
- ///
- /// HeuristicBase iterates over all nodes in the problem and adds them to
- /// the appropriate list using addToOptimalReduceList or
- /// addToHeuristicReduceList based on the result of shouldOptimallyReduce.
- ///
- /// This behaviour should be fine for most situations.
- void setup() {
- for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
- nItr != nEnd; ++nItr) {
- if (impl().shouldOptimallyReduce(nItr)) {
- addToOptimalReduceList(nItr);
- } else {
- impl().addToHeuristicReduceList(nItr);
- }
- }
- }
-
- /// \brief Optimally reduce one of the nodes in the optimal reduce list.
- /// @return True if a reduction takes place, false if the optimal reduce
- /// list is empty.
- ///
- /// Selects a node from the optimal reduce list and removes it, applying
- /// R0, R1 or R2 as appropriate based on the selected node's degree.
- bool optimalReduce() {
- if (optimalList.empty())
- return false;
-
- Graph::NodeItr nItr = optimalList.front();
- optimalList.pop_front();
-
- switch (s.getSolverDegree(nItr)) {
- case 0: s.applyR0(nItr); break;
- case 1: s.applyR1(nItr); break;
- case 2: s.applyR2(nItr); break;
- default: assert(false &&
- "Optimal reductions of degree > 2 nodes is invalid.");
- }
-
- return true;
- }
-
- /// \brief Perform the PBQP reduction process.
- ///
- /// Reduces the problem to the empty graph by repeated application of the
- /// reduction rules R0, R1, R2 and RN.
- /// R0, R1 or R2 are always applied if possible before RN is used.
- void reduce() {
- bool finished = false;
-
- while (!finished) {
- if (!optimalReduce()) {
- if (impl().heuristicReduce()) {
- getSolver().recordRN();
- } else {
- finished = true;
- }
- }
- }
- }
-
- /// \brief Add a node to the heuristic reduce list.
- /// @param nItr Node iterator to add to the heuristic reduce list.
- void addToHeuristicList(Graph::NodeItr nItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Heuristically reduce one of the nodes in the heuristic
- /// reduce list.
- /// @return True if a reduction takes place, false if the heuristic reduce
- /// list is empty.
- void heuristicReduce() {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Prepare a change in the costs on the given edge.
- /// @param eItr Edge iterator.
- void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Handle the change in the costs on the given edge.
- /// @param eItr Edge iterator.
- void postUpdateEdgeCostts(Graph::EdgeItr eItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Handle the addition of a new edge into the PBQP graph.
- /// @param eItr Edge iterator for the added edge.
- void handleAddEdge(Graph::EdgeItr eItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Handle disconnection of an edge from a node.
- /// @param eItr Edge iterator for edge being disconnected.
- /// @param nItr Node iterator for the node being disconnected from.
- ///
- /// Edges are frequently removed due to the removal of a node. This
- /// method allows for the effect to be computed only for the remaining
- /// node in the graph.
- void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
- assert(false && "Must be implemented in derived class.");
- }
-
- /// \brief Clean up any structures used by HeuristicBase.
- ///
- /// At present this just performs a sanity check: that the optimal reduce
- /// list is empty now that reduction has completed.
- ///
- /// If your derived class has more complex structures which need tearing
- /// down you should over-ride this method but include a call back to this
- /// implementation.
- void cleanup() {
- assert(optimalList.empty() && "Nodes left over in optimal reduce list?");
- }
-
- };
-
-}
-
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICBASE_H
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
deleted file mode 100644
index 35514f967478..000000000000
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ /dev/null
@@ -1,616 +0,0 @@
-//===-- HeuristicSolver.h - Heuristic PBQP Solver --------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// Heuristic PBQP solver. This solver is able to perform optimal reductions for
-// nodes of degree 0, 1 or 2. For nodes of degree >2 a plugable heuristic is
-// used to select a node for reduction.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
-#define LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
-
-#include "Graph.h"
-#include "Solution.h"
-#include <vector>
-#include <limits>
-
-namespace PBQP {
-
- /// \brief Heuristic PBQP solver implementation.
- ///
- /// This class should usually be created (and destroyed) indirectly via a call
- /// to HeuristicSolver<HImpl>::solve(Graph&).
- /// See the comments for HeuristicSolver.
- ///
- /// HeuristicSolverImpl provides the R0, R1 and R2 reduction rules,
- /// backpropagation phase, and maintains the internal copy of the graph on
- /// which the reduction is carried out (the original being kept to facilitate
- /// backpropagation).
- template <typename HImpl>
- class HeuristicSolverImpl {
- private:
-
- typedef typename HImpl::NodeData HeuristicNodeData;
- typedef typename HImpl::EdgeData HeuristicEdgeData;
-
- typedef std::list<Graph::EdgeItr> SolverEdges;
-
- public:
-
- /// \brief Iterator type for edges in the solver graph.
- typedef SolverEdges::iterator SolverEdgeItr;
-
- private:
-
- class NodeData {
- public:
- NodeData() : solverDegree(0) {}
-
- HeuristicNodeData& getHeuristicData() { return hData; }
-
- SolverEdgeItr addSolverEdge(Graph::EdgeItr eItr) {
- ++solverDegree;
- return solverEdges.insert(solverEdges.end(), eItr);
- }
-
- void removeSolverEdge(SolverEdgeItr seItr) {
- --solverDegree;
- solverEdges.erase(seItr);
- }
-
- SolverEdgeItr solverEdgesBegin() { return solverEdges.begin(); }
- SolverEdgeItr solverEdgesEnd() { return solverEdges.end(); }
- unsigned getSolverDegree() const { return solverDegree; }
- void clearSolverEdges() {
- solverDegree = 0;
- solverEdges.clear();
- }
-
- private:
- HeuristicNodeData hData;
- unsigned solverDegree;
- SolverEdges solverEdges;
- };
-
- class EdgeData {
- public:
- HeuristicEdgeData& getHeuristicData() { return hData; }
-
- void setN1SolverEdgeItr(SolverEdgeItr n1SolverEdgeItr) {
- this->n1SolverEdgeItr = n1SolverEdgeItr;
- }
-
- SolverEdgeItr getN1SolverEdgeItr() { return n1SolverEdgeItr; }
-
- void setN2SolverEdgeItr(SolverEdgeItr n2SolverEdgeItr){
- this->n2SolverEdgeItr = n2SolverEdgeItr;
- }
-
- SolverEdgeItr getN2SolverEdgeItr() { return n2SolverEdgeItr; }
-
- private:
-
- HeuristicEdgeData hData;
- SolverEdgeItr n1SolverEdgeItr, n2SolverEdgeItr;
- };
-
- Graph &g;
- HImpl h;
- Solution s;
- std::vector<Graph::NodeItr> stack;
-
- typedef std::list<NodeData> NodeDataList;
- NodeDataList nodeDataList;
-
- typedef std::list<EdgeData> EdgeDataList;
- EdgeDataList edgeDataList;
-
- public:
-
- /// \brief Construct a heuristic solver implementation to solve the given
- /// graph.
- /// @param g The graph representing the problem instance to be solved.
- HeuristicSolverImpl(Graph &g) : g(g), h(*this) {}
-
- /// \brief Get the graph being solved by this solver.
- /// @return The graph representing the problem instance being solved by this
- /// solver.
- Graph& getGraph() { return g; }
-
- /// \brief Get the heuristic data attached to the given node.
- /// @param nItr Node iterator.
- /// @return The heuristic data attached to the given node.
- HeuristicNodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
- return getSolverNodeData(nItr).getHeuristicData();
- }
-
- /// \brief Get the heuristic data attached to the given edge.
- /// @param eItr Edge iterator.
- /// @return The heuristic data attached to the given node.
- HeuristicEdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
- return getSolverEdgeData(eItr).getHeuristicData();
- }
-
- /// \brief Begin iterator for the set of edges adjacent to the given node in
- /// the solver graph.
- /// @param nItr Node iterator.
- /// @return Begin iterator for the set of edges adjacent to the given node
- /// in the solver graph.
- SolverEdgeItr solverEdgesBegin(Graph::NodeItr nItr) {
- return getSolverNodeData(nItr).solverEdgesBegin();
- }
-
- /// \brief End iterator for the set of edges adjacent to the given node in
- /// the solver graph.
- /// @param nItr Node iterator.
- /// @return End iterator for the set of edges adjacent to the given node in
- /// the solver graph.
- SolverEdgeItr solverEdgesEnd(Graph::NodeItr nItr) {
- return getSolverNodeData(nItr).solverEdgesEnd();
- }
-
- /// \brief Remove a node from the solver graph.
- /// @param eItr Edge iterator for edge to be removed.
- ///
- /// Does <i>not</i> notify the heuristic of the removal. That should be
- /// done manually if necessary.
- void removeSolverEdge(Graph::EdgeItr eItr) {
- EdgeData &eData = getSolverEdgeData(eItr);
- NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
- &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
-
- n1Data.removeSolverEdge(eData.getN1SolverEdgeItr());
- n2Data.removeSolverEdge(eData.getN2SolverEdgeItr());
- }
-
- /// \brief Compute a solution to the PBQP problem instance with which this
- /// heuristic solver was constructed.
- /// @return A solution to the PBQP problem.
- ///
- /// Performs the full PBQP heuristic solver algorithm, including setup,
- /// calls to the heuristic (which will call back to the reduction rules in
- /// this class), and cleanup.
- Solution computeSolution() {
- setup();
- h.setup();
- h.reduce();
- backpropagate();
- h.cleanup();
- cleanup();
- return s;
- }
-
- /// \brief Add to the end of the stack.
- /// @param nItr Node iterator to add to the reduction stack.
- void pushToStack(Graph::NodeItr nItr) {
- getSolverNodeData(nItr).clearSolverEdges();
- stack.push_back(nItr);
- }
-
- /// \brief Returns the solver degree of the given node.
- /// @param nItr Node iterator for which degree is requested.
- /// @return Node degree in the <i>solver</i> graph (not the original graph).
- unsigned getSolverDegree(Graph::NodeItr nItr) {
- return getSolverNodeData(nItr).getSolverDegree();
- }
-
- /// \brief Set the solution of the given node.
- /// @param nItr Node iterator to set solution for.
- /// @param selection Selection for node.
- void setSolution(const Graph::NodeItr &nItr, unsigned selection) {
- s.setSelection(nItr, selection);
-
- for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
- aeEnd = g.adjEdgesEnd(nItr);
- aeItr != aeEnd; ++aeItr) {
- Graph::EdgeItr eItr(*aeItr);
- Graph::NodeItr anItr(g.getEdgeOtherNode(eItr, nItr));
- getSolverNodeData(anItr).addSolverEdge(eItr);
- }
- }
-
- /// \brief Apply rule R0.
- /// @param nItr Node iterator for node to apply R0 to.
- ///
- /// Node will be automatically pushed to the solver stack.
- void applyR0(Graph::NodeItr nItr) {
- assert(getSolverNodeData(nItr).getSolverDegree() == 0 &&
- "R0 applied to node with degree != 0.");
-
- // Nothing to do. Just push the node onto the reduction stack.
- pushToStack(nItr);
-
- s.recordR0();
- }
-
- /// \brief Apply rule R1.
- /// @param xnItr Node iterator for node to apply R1 to.
- ///
- /// Node will be automatically pushed to the solver stack.
- void applyR1(Graph::NodeItr xnItr) {
- NodeData &nd = getSolverNodeData(xnItr);
- assert(nd.getSolverDegree() == 1 &&
- "R1 applied to node with degree != 1.");
-
- Graph::EdgeItr eItr = *nd.solverEdgesBegin();
-
- const Matrix &eCosts = g.getEdgeCosts(eItr);
- const Vector &xCosts = g.getNodeCosts(xnItr);
-
- // Duplicate a little to avoid transposing matrices.
- if (xnItr == g.getEdgeNode1(eItr)) {
- Graph::NodeItr ynItr = g.getEdgeNode2(eItr);
- Vector &yCosts = g.getNodeCosts(ynItr);
- for (unsigned j = 0; j < yCosts.getLength(); ++j) {
- PBQPNum min = eCosts[0][j] + xCosts[0];
- for (unsigned i = 1; i < xCosts.getLength(); ++i) {
- PBQPNum c = eCosts[i][j] + xCosts[i];
- if (c < min)
- min = c;
- }
- yCosts[j] += min;
- }
- h.handleRemoveEdge(eItr, ynItr);
- } else {
- Graph::NodeItr ynItr = g.getEdgeNode1(eItr);
- Vector &yCosts = g.getNodeCosts(ynItr);
- for (unsigned i = 0; i < yCosts.getLength(); ++i) {
- PBQPNum min = eCosts[i][0] + xCosts[0];
- for (unsigned j = 1; j < xCosts.getLength(); ++j) {
- PBQPNum c = eCosts[i][j] + xCosts[j];
- if (c < min)
- min = c;
- }
- yCosts[i] += min;
- }
- h.handleRemoveEdge(eItr, ynItr);
- }
- removeSolverEdge(eItr);
- assert(nd.getSolverDegree() == 0 &&
- "Degree 1 with edge removed should be 0.");
- pushToStack(xnItr);
- s.recordR1();
- }
-
- /// \brief Apply rule R2.
- /// @param xnItr Node iterator for node to apply R2 to.
- ///
- /// Node will be automatically pushed to the solver stack.
- void applyR2(Graph::NodeItr xnItr) {
- assert(getSolverNodeData(xnItr).getSolverDegree() == 2 &&
- "R2 applied to node with degree != 2.");
-
- NodeData &nd = getSolverNodeData(xnItr);
- const Vector &xCosts = g.getNodeCosts(xnItr);
-
- SolverEdgeItr aeItr = nd.solverEdgesBegin();
- Graph::EdgeItr yxeItr = *aeItr,
- zxeItr = *(++aeItr);
-
- Graph::NodeItr ynItr = g.getEdgeOtherNode(yxeItr, xnItr),
- znItr = g.getEdgeOtherNode(zxeItr, xnItr);
-
- bool flipEdge1 = (g.getEdgeNode1(yxeItr) == xnItr),
- flipEdge2 = (g.getEdgeNode1(zxeItr) == xnItr);
-
- const Matrix *yxeCosts = flipEdge1 ?
- new Matrix(g.getEdgeCosts(yxeItr).transpose()) :
- &g.getEdgeCosts(yxeItr);
-
- const Matrix *zxeCosts = flipEdge2 ?
- new Matrix(g.getEdgeCosts(zxeItr).transpose()) :
- &g.getEdgeCosts(zxeItr);
-
- unsigned xLen = xCosts.getLength(),
- yLen = yxeCosts->getRows(),
- zLen = zxeCosts->getRows();
-
- Matrix delta(yLen, zLen);
-
- for (unsigned i = 0; i < yLen; ++i) {
- for (unsigned j = 0; j < zLen; ++j) {
- PBQPNum min = (*yxeCosts)[i][0] + (*zxeCosts)[j][0] + xCosts[0];
- for (unsigned k = 1; k < xLen; ++k) {
- PBQPNum c = (*yxeCosts)[i][k] + (*zxeCosts)[j][k] + xCosts[k];
- if (c < min) {
- min = c;
- }
- }
- delta[i][j] = min;
- }
- }
-
- if (flipEdge1)
- delete yxeCosts;
-
- if (flipEdge2)
- delete zxeCosts;
-
- Graph::EdgeItr yzeItr = g.findEdge(ynItr, znItr);
- bool addedEdge = false;
-
- if (yzeItr == g.edgesEnd()) {
- yzeItr = g.addEdge(ynItr, znItr, delta);
- addedEdge = true;
- } else {
- Matrix &yzeCosts = g.getEdgeCosts(yzeItr);
- h.preUpdateEdgeCosts(yzeItr);
- if (ynItr == g.getEdgeNode1(yzeItr)) {
- yzeCosts += delta;
- } else {
- yzeCosts += delta.transpose();
- }
- }
-
- bool nullCostEdge = tryNormaliseEdgeMatrix(yzeItr);
-
- if (!addedEdge) {
- // If we modified the edge costs let the heuristic know.
- h.postUpdateEdgeCosts(yzeItr);
- }
-
- if (nullCostEdge) {
- // If this edge ended up null remove it.
- if (!addedEdge) {
- // We didn't just add it, so we need to notify the heuristic
- // and remove it from the solver.
- h.handleRemoveEdge(yzeItr, ynItr);
- h.handleRemoveEdge(yzeItr, znItr);
- removeSolverEdge(yzeItr);
- }
- g.removeEdge(yzeItr);
- } else if (addedEdge) {
- // If the edge was added, and non-null, finish setting it up, add it to
- // the solver & notify heuristic.
- edgeDataList.push_back(EdgeData());
- g.setEdgeData(yzeItr, &edgeDataList.back());
- addSolverEdge(yzeItr);
- h.handleAddEdge(yzeItr);
- }
-
- h.handleRemoveEdge(yxeItr, ynItr);
- removeSolverEdge(yxeItr);
- h.handleRemoveEdge(zxeItr, znItr);
- removeSolverEdge(zxeItr);
-
- pushToStack(xnItr);
- s.recordR2();
- }
-
- /// \brief Record an application of the RN rule.
- ///
- /// For use by the HeuristicBase.
- void recordRN() { s.recordRN(); }
-
- private:
-
- NodeData& getSolverNodeData(Graph::NodeItr nItr) {
- return *static_cast<NodeData*>(g.getNodeData(nItr));
- }
-
- EdgeData& getSolverEdgeData(Graph::EdgeItr eItr) {
- return *static_cast<EdgeData*>(g.getEdgeData(eItr));
- }
-
- void addSolverEdge(Graph::EdgeItr eItr) {
- EdgeData &eData = getSolverEdgeData(eItr);
- NodeData &n1Data = getSolverNodeData(g.getEdgeNode1(eItr)),
- &n2Data = getSolverNodeData(g.getEdgeNode2(eItr));
-
- eData.setN1SolverEdgeItr(n1Data.addSolverEdge(eItr));
- eData.setN2SolverEdgeItr(n2Data.addSolverEdge(eItr));
- }
-
- void setup() {
- if (h.solverRunSimplify()) {
- simplify();
- }
-
- // Create node data objects.
- for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
- nItr != nEnd; ++nItr) {
- nodeDataList.push_back(NodeData());
- g.setNodeData(nItr, &nodeDataList.back());
- }
-
- // Create edge data objects.
- for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
- eItr != eEnd; ++eItr) {
- edgeDataList.push_back(EdgeData());
- g.setEdgeData(eItr, &edgeDataList.back());
- addSolverEdge(eItr);
- }
- }
-
- void simplify() {
- disconnectTrivialNodes();
- eliminateIndependentEdges();
- }
-
- // Eliminate trivial nodes.
- void disconnectTrivialNodes() {
- unsigned numDisconnected = 0;
-
- for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd();
- nItr != nEnd; ++nItr) {
-
- if (g.getNodeCosts(nItr).getLength() == 1) {
-
- std::vector<Graph::EdgeItr> edgesToRemove;
-
- for (Graph::AdjEdgeItr aeItr = g.adjEdgesBegin(nItr),
- aeEnd = g.adjEdgesEnd(nItr);
- aeItr != aeEnd; ++aeItr) {
-
- Graph::EdgeItr eItr = *aeItr;
-
- if (g.getEdgeNode1(eItr) == nItr) {
- Graph::NodeItr otherNodeItr = g.getEdgeNode2(eItr);
- g.getNodeCosts(otherNodeItr) +=
- g.getEdgeCosts(eItr).getRowAsVector(0);
- }
- else {
- Graph::NodeItr otherNodeItr = g.getEdgeNode1(eItr);
- g.getNodeCosts(otherNodeItr) +=
- g.getEdgeCosts(eItr).getColAsVector(0);
- }
-
- edgesToRemove.push_back(eItr);
- }
-
- if (!edgesToRemove.empty())
- ++numDisconnected;
-
- while (!edgesToRemove.empty()) {
- g.removeEdge(edgesToRemove.back());
- edgesToRemove.pop_back();
- }
- }
- }
- }
-
- void eliminateIndependentEdges() {
- std::vector<Graph::EdgeItr> edgesToProcess;
- unsigned numEliminated = 0;
-
- for (Graph::EdgeItr eItr = g.edgesBegin(), eEnd = g.edgesEnd();
- eItr != eEnd; ++eItr) {
- edgesToProcess.push_back(eItr);
- }
-
- while (!edgesToProcess.empty()) {
- if (tryToEliminateEdge(edgesToProcess.back()))
- ++numEliminated;
- edgesToProcess.pop_back();
- }
- }
-
- bool tryToEliminateEdge(Graph::EdgeItr eItr) {
- if (tryNormaliseEdgeMatrix(eItr)) {
- g.removeEdge(eItr);
- return true;
- }
- return false;
- }
-
- bool tryNormaliseEdgeMatrix(Graph::EdgeItr &eItr) {
-
- const PBQPNum infinity = std::numeric_limits<PBQPNum>::infinity();
-
- Matrix &edgeCosts = g.getEdgeCosts(eItr);
- Vector &uCosts = g.getNodeCosts(g.getEdgeNode1(eItr)),
- &vCosts = g.getNodeCosts(g.getEdgeNode2(eItr));
-
- for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
- PBQPNum rowMin = infinity;
-
- for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
- if (vCosts[c] != infinity && edgeCosts[r][c] < rowMin)
- rowMin = edgeCosts[r][c];
- }
-
- uCosts[r] += rowMin;
-
- if (rowMin != infinity) {
- edgeCosts.subFromRow(r, rowMin);
- }
- else {
- edgeCosts.setRow(r, 0);
- }
- }
-
- for (unsigned c = 0; c < edgeCosts.getCols(); ++c) {
- PBQPNum colMin = infinity;
-
- for (unsigned r = 0; r < edgeCosts.getRows(); ++r) {
- if (uCosts[r] != infinity && edgeCosts[r][c] < colMin)
- colMin = edgeCosts[r][c];
- }
-
- vCosts[c] += colMin;
-
- if (colMin != infinity) {
- edgeCosts.subFromCol(c, colMin);
- }
- else {
- edgeCosts.setCol(c, 0);
- }
- }
-
- return edgeCosts.isZero();
- }
-
- void backpropagate() {
- while (!stack.empty()) {
- computeSolution(stack.back());
- stack.pop_back();
- }
- }
-
- void computeSolution(Graph::NodeItr nItr) {
-
- NodeData &nodeData = getSolverNodeData(nItr);
-
- Vector v(g.getNodeCosts(nItr));
-
- // Solve based on existing solved edges.
- for (SolverEdgeItr solvedEdgeItr = nodeData.solverEdgesBegin(),
- solvedEdgeEnd = nodeData.solverEdgesEnd();
- solvedEdgeItr != solvedEdgeEnd; ++solvedEdgeItr) {
-
- Graph::EdgeItr eItr(*solvedEdgeItr);
- Matrix &edgeCosts = g.getEdgeCosts(eItr);
-
- if (nItr == g.getEdgeNode1(eItr)) {
- Graph::NodeItr adjNode(g.getEdgeNode2(eItr));
- unsigned adjSolution = s.getSelection(adjNode);
- v += edgeCosts.getColAsVector(adjSolution);
- }
- else {
- Graph::NodeItr adjNode(g.getEdgeNode1(eItr));
- unsigned adjSolution = s.getSelection(adjNode);
- v += edgeCosts.getRowAsVector(adjSolution);
- }
-
- }
-
- setSolution(nItr, v.minIndex());
- }
-
- void cleanup() {
- h.cleanup();
- nodeDataList.clear();
- edgeDataList.clear();
- }
- };
-
- /// \brief PBQP heuristic solver class.
- ///
- /// Given a PBQP Graph g representing a PBQP problem, you can find a solution
- /// by calling
- /// <tt>Solution s = HeuristicSolver<H>::solve(g);</tt>
- ///
- /// The choice of heuristic for the H parameter will affect both the solver
- /// speed and solution quality. The heuristic should be chosen based on the
- /// nature of the problem being solved.
- /// Currently the only solver included with LLVM is the Briggs heuristic for
- /// register allocation.
- template <typename HImpl>
- class HeuristicSolver {
- public:
- static Solution solve(Graph &g) {
- HeuristicSolverImpl<HImpl> hs(g);
- return hs.computeSolution();
- }
- };
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICSOLVER_H
diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h
deleted file mode 100644
index 18eaf7c0da9b..000000000000
--- a/lib/CodeGen/PBQP/Heuristics/Briggs.h
+++ /dev/null
@@ -1,460 +0,0 @@
-//===-- Briggs.h --- Briggs Heuristic for PBQP ------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This class implements the Briggs test for "allocability" of nodes in a
-// PBQP graph representing a register allocation problem. Nodes which can be
-// proven allocable (by a safe and relatively accurate test) are removed from
-// the PBQP graph first. If no provably allocable node is present in the graph
-// then the node with the minimal spill-cost to degree ratio is removed.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-#define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
-
-#include "../HeuristicSolver.h"
-#include "../HeuristicBase.h"
-
-#include <set>
-#include <limits>
-
-namespace PBQP {
- namespace Heuristics {
-
- /// \brief PBQP Heuristic which applies an allocability test based on
- /// Briggs.
- ///
- /// This heuristic assumes that the elements of cost vectors in the PBQP
- /// problem represent storage options, with the first being the spill
- /// option and subsequent elements representing legal registers for the
- /// corresponding node. Edge cost matrices are likewise assumed to represent
- /// register constraints.
- /// If one or more nodes can be proven allocable by this heuristic (by
- /// inspection of their constraint matrices) then the allocable node of
- /// highest degree is selected for the next reduction and pushed to the
- /// solver stack. If no nodes can be proven allocable then the node with
- /// the lowest estimated spill cost is selected and push to the solver stack
- /// instead.
- ///
- /// This implementation is built on top of HeuristicBase.
- class Briggs : public HeuristicBase<Briggs> {
- private:
-
- class LinkDegreeComparator {
- public:
- LinkDegreeComparator(HeuristicSolverImpl<Briggs> &s) : s(&s) {}
- bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
- if (s->getSolverDegree(n1Itr) > s->getSolverDegree(n2Itr))
- return true;
- return false;
- }
- private:
- HeuristicSolverImpl<Briggs> *s;
- };
-
- class SpillCostComparator {
- public:
- SpillCostComparator(HeuristicSolverImpl<Briggs> &s)
- : s(&s), g(&s.getGraph()) {}
- bool operator()(Graph::NodeItr n1Itr, Graph::NodeItr n2Itr) const {
- PBQPNum cost1 = g->getNodeCosts(n1Itr)[0] / s->getSolverDegree(n1Itr),
- cost2 = g->getNodeCosts(n2Itr)[0] / s->getSolverDegree(n2Itr);
- if (cost1 < cost2)
- return true;
- return false;
- }
-
- private:
- HeuristicSolverImpl<Briggs> *s;
- Graph *g;
- };
-
- typedef std::list<Graph::NodeItr> RNAllocableList;
- typedef RNAllocableList::iterator RNAllocableListItr;
-
- typedef std::list<Graph::NodeItr> RNUnallocableList;
- typedef RNUnallocableList::iterator RNUnallocableListItr;
-
- public:
-
- struct NodeData {
- typedef std::vector<unsigned> UnsafeDegreesArray;
- bool isHeuristic, isAllocable, isInitialized;
- unsigned numDenied, numSafe;
- UnsafeDegreesArray unsafeDegrees;
- RNAllocableListItr rnaItr;
- RNUnallocableListItr rnuItr;
-
- NodeData()
- : isHeuristic(false), isAllocable(false), isInitialized(false),
- numDenied(0), numSafe(0) { }
- };
-
- struct EdgeData {
- typedef std::vector<unsigned> UnsafeArray;
- unsigned worst, reverseWorst;
- UnsafeArray unsafe, reverseUnsafe;
- bool isUpToDate;
-
- EdgeData() : worst(0), reverseWorst(0), isUpToDate(false) {}
- };
-
- /// \brief Construct an instance of the Briggs heuristic.
- /// @param solver A reference to the solver which is using this heuristic.
- Briggs(HeuristicSolverImpl<Briggs> &solver) :
- HeuristicBase<Briggs>(solver) {}
-
- /// \brief Determine whether a node should be reduced using optimal
- /// reduction.
- /// @param nItr Node iterator to be considered.
- /// @return True if the given node should be optimally reduced, false
- /// otherwise.
- ///
- /// Selects nodes of degree 0, 1 or 2 for optimal reduction, with one
- /// exception. Nodes whose spill cost (element 0 of their cost vector) is
- /// infinite are checked for allocability first. Allocable nodes may be
- /// optimally reduced, but nodes whose allocability cannot be proven are
- /// selected for heuristic reduction instead.
- bool shouldOptimallyReduce(Graph::NodeItr nItr) {
- if (getSolver().getSolverDegree(nItr) < 3) {
- return true;
- }
- // else
- return false;
- }
-
- /// \brief Add a node to the heuristic reduce list.
- /// @param nItr Node iterator to add to the heuristic reduce list.
- void addToHeuristicReduceList(Graph::NodeItr nItr) {
- NodeData &nd = getHeuristicNodeData(nItr);
- initializeNode(nItr);
- nd.isHeuristic = true;
- if (nd.isAllocable) {
- nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
- } else {
- nd.rnuItr = rnUnallocableList.insert(rnUnallocableList.end(), nItr);
- }
- }
-
- /// \brief Heuristically reduce one of the nodes in the heuristic
- /// reduce list.
- /// @return True if a reduction takes place, false if the heuristic reduce
- /// list is empty.
- ///
- /// If the list of allocable nodes is non-empty a node is selected
- /// from it and pushed to the stack. Otherwise if the non-allocable list
- /// is non-empty a node is selected from it and pushed to the stack.
- /// If both lists are empty the method simply returns false with no action
- /// taken.
- bool heuristicReduce() {
- if (!rnAllocableList.empty()) {
- RNAllocableListItr rnaItr =
- min_element(rnAllocableList.begin(), rnAllocableList.end(),
- LinkDegreeComparator(getSolver()));
- Graph::NodeItr nItr = *rnaItr;
- rnAllocableList.erase(rnaItr);
- handleRemoveNode(nItr);
- getSolver().pushToStack(nItr);
- return true;
- } else if (!rnUnallocableList.empty()) {
- RNUnallocableListItr rnuItr =
- min_element(rnUnallocableList.begin(), rnUnallocableList.end(),
- SpillCostComparator(getSolver()));
- Graph::NodeItr nItr = *rnuItr;
- rnUnallocableList.erase(rnuItr);
- handleRemoveNode(nItr);
- getSolver().pushToStack(nItr);
- return true;
- }
- // else
- return false;
- }
-
- /// \brief Prepare a change in the costs on the given edge.
- /// @param eItr Edge iterator.
- void preUpdateEdgeCosts(Graph::EdgeItr eItr) {
- Graph &g = getGraph();
- Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
- n2Itr = g.getEdgeNode2(eItr);
- NodeData &n1 = getHeuristicNodeData(n1Itr),
- &n2 = getHeuristicNodeData(n2Itr);
-
- if (n1.isHeuristic)
- subtractEdgeContributions(eItr, getGraph().getEdgeNode1(eItr));
- if (n2.isHeuristic)
- subtractEdgeContributions(eItr, getGraph().getEdgeNode2(eItr));
-
- EdgeData &ed = getHeuristicEdgeData(eItr);
- ed.isUpToDate = false;
- }
-
- /// \brief Handle the change in the costs on the given edge.
- /// @param eItr Edge iterator.
- void postUpdateEdgeCosts(Graph::EdgeItr eItr) {
- // This is effectively the same as adding a new edge now, since
- // we've factored out the costs of the old one.
- handleAddEdge(eItr);
- }
-
- /// \brief Handle the addition of a new edge into the PBQP graph.
- /// @param eItr Edge iterator for the added edge.
- ///
- /// Updates allocability of any nodes connected by this edge which are
- /// being managed by the heuristic. If allocability changes they are
- /// moved to the appropriate list.
- void handleAddEdge(Graph::EdgeItr eItr) {
- Graph &g = getGraph();
- Graph::NodeItr n1Itr = g.getEdgeNode1(eItr),
- n2Itr = g.getEdgeNode2(eItr);
- NodeData &n1 = getHeuristicNodeData(n1Itr),
- &n2 = getHeuristicNodeData(n2Itr);
-
- // If neither node is managed by the heuristic there's nothing to be
- // done.
- if (!n1.isHeuristic && !n2.isHeuristic)
- return;
-
- // Ok - we need to update at least one node.
- computeEdgeContributions(eItr);
-
- // Update node 1 if it's managed by the heuristic.
- if (n1.isHeuristic) {
- bool n1WasAllocable = n1.isAllocable;
- addEdgeContributions(eItr, n1Itr);
- updateAllocability(n1Itr);
- if (n1WasAllocable && !n1.isAllocable) {
- rnAllocableList.erase(n1.rnaItr);
- n1.rnuItr =
- rnUnallocableList.insert(rnUnallocableList.end(), n1Itr);
- }
- }
-
- // Likewise for node 2.
- if (n2.isHeuristic) {
- bool n2WasAllocable = n2.isAllocable;
- addEdgeContributions(eItr, n2Itr);
- updateAllocability(n2Itr);
- if (n2WasAllocable && !n2.isAllocable) {
- rnAllocableList.erase(n2.rnaItr);
- n2.rnuItr =
- rnUnallocableList.insert(rnUnallocableList.end(), n2Itr);
- }
- }
- }
-
- /// \brief Handle disconnection of an edge from a node.
- /// @param eItr Edge iterator for edge being disconnected.
- /// @param nItr Node iterator for the node being disconnected from.
- ///
- /// Updates allocability of the given node and, if appropriate, moves the
- /// node to a new list.
- void handleRemoveEdge(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
- NodeData &nd = getHeuristicNodeData(nItr);
-
- // If the node is not managed by the heuristic there's nothing to be
- // done.
- if (!nd.isHeuristic)
- return;
-
- EdgeData &ed = getHeuristicEdgeData(eItr);
- (void)ed;
- assert(ed.isUpToDate && "Edge data is not up to date.");
-
- // Update node.
- bool ndWasAllocable = nd.isAllocable;
- subtractEdgeContributions(eItr, nItr);
- updateAllocability(nItr);
-
- // If the node has gone optimal...
- if (shouldOptimallyReduce(nItr)) {
- nd.isHeuristic = false;
- addToOptimalReduceList(nItr);
- if (ndWasAllocable) {
- rnAllocableList.erase(nd.rnaItr);
- } else {
- rnUnallocableList.erase(nd.rnuItr);
- }
- } else {
- // Node didn't go optimal, but we might have to move it
- // from "unallocable" to "allocable".
- if (!ndWasAllocable && nd.isAllocable) {
- rnUnallocableList.erase(nd.rnuItr);
- nd.rnaItr = rnAllocableList.insert(rnAllocableList.end(), nItr);
- }
- }
- }
-
- private:
-
- NodeData& getHeuristicNodeData(Graph::NodeItr nItr) {
- return getSolver().getHeuristicNodeData(nItr);
- }
-
- EdgeData& getHeuristicEdgeData(Graph::EdgeItr eItr) {
- return getSolver().getHeuristicEdgeData(eItr);
- }
-
- // Work out what this edge will contribute to the allocability of the
- // nodes connected to it.
- void computeEdgeContributions(Graph::EdgeItr eItr) {
- EdgeData &ed = getHeuristicEdgeData(eItr);
-
- if (ed.isUpToDate)
- return; // Edge data is already up to date.
-
- Matrix &eCosts = getGraph().getEdgeCosts(eItr);
-
- unsigned numRegs = eCosts.getRows() - 1,
- numReverseRegs = eCosts.getCols() - 1;
-
- std::vector<unsigned> rowInfCounts(numRegs, 0),
- colInfCounts(numReverseRegs, 0);
-
- ed.worst = 0;
- ed.reverseWorst = 0;
- ed.unsafe.clear();
- ed.unsafe.resize(numRegs, 0);
- ed.reverseUnsafe.clear();
- ed.reverseUnsafe.resize(numReverseRegs, 0);
-
- for (unsigned i = 0; i < numRegs; ++i) {
- for (unsigned j = 0; j < numReverseRegs; ++j) {
- if (eCosts[i + 1][j + 1] ==
- std::numeric_limits<PBQPNum>::infinity()) {
- ed.unsafe[i] = 1;
- ed.reverseUnsafe[j] = 1;
- ++rowInfCounts[i];
- ++colInfCounts[j];
-
- if (colInfCounts[j] > ed.worst) {
- ed.worst = colInfCounts[j];
- }
-
- if (rowInfCounts[i] > ed.reverseWorst) {
- ed.reverseWorst = rowInfCounts[i];
- }
- }
- }
- }
-
- ed.isUpToDate = true;
- }
-
- // Add the contributions of the given edge to the given node's
- // numDenied and safe members. No action is taken other than to update
- // these member values. Once updated these numbers can be used by clients
- // to update the node's allocability.
- void addEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
- EdgeData &ed = getHeuristicEdgeData(eItr);
-
- assert(ed.isUpToDate && "Using out-of-date edge numbers.");
-
- NodeData &nd = getHeuristicNodeData(nItr);
- unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-
- bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
- EdgeData::UnsafeArray &unsafe =
- nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
- nd.numDenied += nIsNode1 ? ed.worst : ed.reverseWorst;
-
- for (unsigned r = 0; r < numRegs; ++r) {
- if (unsafe[r]) {
- if (nd.unsafeDegrees[r]==0) {
- --nd.numSafe;
- }
- ++nd.unsafeDegrees[r];
- }
- }
- }
-
- // Subtract the contributions of the given edge to the given node's
- // numDenied and safe members. No action is taken other than to update
- // these member values. Once updated these numbers can be used by clients
- // to update the node's allocability.
- void subtractEdgeContributions(Graph::EdgeItr eItr, Graph::NodeItr nItr) {
- EdgeData &ed = getHeuristicEdgeData(eItr);
-
- assert(ed.isUpToDate && "Using out-of-date edge numbers.");
-
- NodeData &nd = getHeuristicNodeData(nItr);
- unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-
- bool nIsNode1 = nItr == getGraph().getEdgeNode1(eItr);
- EdgeData::UnsafeArray &unsafe =
- nIsNode1 ? ed.unsafe : ed.reverseUnsafe;
- nd.numDenied -= nIsNode1 ? ed.worst : ed.reverseWorst;
-
- for (unsigned r = 0; r < numRegs; ++r) {
- if (unsafe[r]) {
- if (nd.unsafeDegrees[r] == 1) {
- ++nd.numSafe;
- }
- --nd.unsafeDegrees[r];
- }
- }
- }
-
- void updateAllocability(Graph::NodeItr nItr) {
- NodeData &nd = getHeuristicNodeData(nItr);
- unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
- nd.isAllocable = nd.numDenied < numRegs || nd.numSafe > 0;
- }
-
- void initializeNode(Graph::NodeItr nItr) {
- NodeData &nd = getHeuristicNodeData(nItr);
-
- if (nd.isInitialized)
- return; // Node data is already up to date.
-
- unsigned numRegs = getGraph().getNodeCosts(nItr).getLength() - 1;
-
- nd.numDenied = 0;
- nd.numSafe = numRegs;
- nd.unsafeDegrees.resize(numRegs, 0);
-
- typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
-
- for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(nItr),
- aeEnd = getSolver().solverEdgesEnd(nItr);
- aeItr != aeEnd; ++aeItr) {
-
- Graph::EdgeItr eItr = *aeItr;
- computeEdgeContributions(eItr);
- addEdgeContributions(eItr, nItr);
- }
-
- updateAllocability(nItr);
- nd.isInitialized = true;
- }
-
- void handleRemoveNode(Graph::NodeItr xnItr) {
- typedef HeuristicSolverImpl<Briggs>::SolverEdgeItr SolverEdgeItr;
- std::vector<Graph::EdgeItr> edgesToRemove;
- for (SolverEdgeItr aeItr = getSolver().solverEdgesBegin(xnItr),
- aeEnd = getSolver().solverEdgesEnd(xnItr);
- aeItr != aeEnd; ++aeItr) {
- Graph::NodeItr ynItr = getGraph().getEdgeOtherNode(*aeItr, xnItr);
- handleRemoveEdge(*aeItr, ynItr);
- edgesToRemove.push_back(*aeItr);
- }
- while (!edgesToRemove.empty()) {
- getSolver().removeSolverEdge(edgesToRemove.back());
- edgesToRemove.pop_back();
- }
- }
-
- RNAllocableList rnAllocableList;
- RNUnallocableList rnUnallocableList;
- };
-
- }
-}
-
-
-#endif // LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H
diff --git a/lib/CodeGen/PBQP/Math.h b/lib/CodeGen/PBQP/Math.h
deleted file mode 100644
index e7598bf3e3f1..000000000000
--- a/lib/CodeGen/PBQP/Math.h
+++ /dev/null
@@ -1,288 +0,0 @@
-//===------ Math.h - PBQP Vector and Matrix classes -------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_MATH_H
-#define LLVM_CODEGEN_PBQP_MATH_H
-
-#include <cassert>
-#include <algorithm>
-#include <functional>
-
-namespace PBQP {
-
-typedef float PBQPNum;
-
-/// \brief PBQP Vector class.
-class Vector {
- public:
-
- /// \brief Construct a PBQP vector of the given size.
- explicit Vector(unsigned length) :
- length(length), data(new PBQPNum[length]) {
- }
-
- /// \brief Construct a PBQP vector with initializer.
- Vector(unsigned length, PBQPNum initVal) :
- length(length), data(new PBQPNum[length]) {
- std::fill(data, data + length, initVal);
- }
-
- /// \brief Copy construct a PBQP vector.
- Vector(const Vector &v) :
- length(v.length), data(new PBQPNum[length]) {
- std::copy(v.data, v.data + length, data);
- }
-
- /// \brief Destroy this vector, return its memory.
- ~Vector() { delete[] data; }
-
- /// \brief Assignment operator.
- Vector& operator=(const Vector &v) {
- delete[] data;
- length = v.length;
- data = new PBQPNum[length];
- std::copy(v.data, v.data + length, data);
- return *this;
- }
-
- /// \brief Return the length of the vector
- unsigned getLength() const {
- return length;
- }
-
- /// \brief Element access.
- PBQPNum& operator[](unsigned index) {
- assert(index < length && "Vector element access out of bounds.");
- return data[index];
- }
-
- /// \brief Const element access.
- const PBQPNum& operator[](unsigned index) const {
- assert(index < length && "Vector element access out of bounds.");
- return data[index];
- }
-
- /// \brief Add another vector to this one.
- Vector& operator+=(const Vector &v) {
- assert(length == v.length && "Vector length mismatch.");
- std::transform(data, data + length, v.data, data, std::plus<PBQPNum>());
- return *this;
- }
-
- /// \brief Subtract another vector from this one.
- Vector& operator-=(const Vector &v) {
- assert(length == v.length && "Vector length mismatch.");
- std::transform(data, data + length, v.data, data, std::minus<PBQPNum>());
- return *this;
- }
-
- /// \brief Returns the index of the minimum value in this vector
- unsigned minIndex() const {
- return std::min_element(data, data + length) - data;
- }
-
- private:
- unsigned length;
- PBQPNum *data;
-};
-
-/// \brief Output a textual representation of the given vector on the given
-/// output stream.
-template <typename OStream>
-OStream& operator<<(OStream &os, const Vector &v) {
- assert((v.getLength() != 0) && "Zero-length vector badness.");
-
- os << "[ " << v[0];
- for (unsigned i = 1; i < v.getLength(); ++i) {
- os << ", " << v[i];
- }
- os << " ]";
-
- return os;
-}
-
-
-/// \brief PBQP Matrix class
-class Matrix {
- public:
-
- /// \brief Construct a PBQP Matrix with the given dimensions.
- Matrix(unsigned rows, unsigned cols) :
- rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
- }
-
- /// \brief Construct a PBQP Matrix with the given dimensions and initial
- /// value.
- Matrix(unsigned rows, unsigned cols, PBQPNum initVal) :
- rows(rows), cols(cols), data(new PBQPNum[rows * cols]) {
- std::fill(data, data + (rows * cols), initVal);
- }
-
- /// \brief Copy construct a PBQP matrix.
- Matrix(const Matrix &m) :
- rows(m.rows), cols(m.cols), data(new PBQPNum[rows * cols]) {
- std::copy(m.data, m.data + (rows * cols), data);
- }
-
- /// \brief Destroy this matrix, return its memory.
- ~Matrix() { delete[] data; }
-
- /// \brief Assignment operator.
- Matrix& operator=(const Matrix &m) {
- delete[] data;
- rows = m.rows; cols = m.cols;
- data = new PBQPNum[rows * cols];
- std::copy(m.data, m.data + (rows * cols), data);
- return *this;
- }
-
- /// \brief Return the number of rows in this matrix.
- unsigned getRows() const { return rows; }
-
- /// \brief Return the number of cols in this matrix.
- unsigned getCols() const { return cols; }
-
- /// \brief Matrix element access.
- PBQPNum* operator[](unsigned r) {
- assert(r < rows && "Row out of bounds.");
- return data + (r * cols);
- }
-
- /// \brief Matrix element access.
- const PBQPNum* operator[](unsigned r) const {
- assert(r < rows && "Row out of bounds.");
- return data + (r * cols);
- }
-
- /// \brief Returns the given row as a vector.
- Vector getRowAsVector(unsigned r) const {
- Vector v(cols);
- for (unsigned c = 0; c < cols; ++c)
- v[c] = (*this)[r][c];
- return v;
- }
-
- /// \brief Returns the given column as a vector.
- Vector getColAsVector(unsigned c) const {
- Vector v(rows);
- for (unsigned r = 0; r < rows; ++r)
- v[r] = (*this)[r][c];
- return v;
- }
-
- /// \brief Reset the matrix to the given value.
- Matrix& reset(PBQPNum val = 0) {
- std::fill(data, data + (rows * cols), val);
- return *this;
- }
-
- /// \brief Set a single row of this matrix to the given value.
- Matrix& setRow(unsigned r, PBQPNum val) {
- assert(r < rows && "Row out of bounds.");
- std::fill(data + (r * cols), data + ((r + 1) * cols), val);
- return *this;
- }
-
- /// \brief Set a single column of this matrix to the given value.
- Matrix& setCol(unsigned c, PBQPNum val) {
- assert(c < cols && "Column out of bounds.");
- for (unsigned r = 0; r < rows; ++r)
- (*this)[r][c] = val;
- return *this;
- }
-
- /// \brief Matrix transpose.
- Matrix transpose() const {
- Matrix m(cols, rows);
- for (unsigned r = 0; r < rows; ++r)
- for (unsigned c = 0; c < cols; ++c)
- m[c][r] = (*this)[r][c];
- return m;
- }
-
- /// \brief Returns the diagonal of the matrix as a vector.
- ///
- /// Matrix must be square.
- Vector diagonalize() const {
- assert(rows == cols && "Attempt to diagonalize non-square matrix.");
-
- Vector v(rows);
- for (unsigned r = 0; r < rows; ++r)
- v[r] = (*this)[r][r];
- return v;
- }
-
- /// \brief Add the given matrix to this one.
- Matrix& operator+=(const Matrix &m) {
- assert(rows == m.rows && cols == m.cols &&
- "Matrix dimensions mismatch.");
- std::transform(data, data + (rows * cols), m.data, data,
- std::plus<PBQPNum>());
- return *this;
- }
-
- /// \brief Returns the minimum of the given row
- PBQPNum getRowMin(unsigned r) const {
- assert(r < rows && "Row out of bounds");
- return *std::min_element(data + (r * cols), data + ((r + 1) * cols));
- }
-
- /// \brief Returns the minimum of the given column
- PBQPNum getColMin(unsigned c) const {
- PBQPNum minElem = (*this)[0][c];
- for (unsigned r = 1; r < rows; ++r)
- if ((*this)[r][c] < minElem) minElem = (*this)[r][c];
- return minElem;
- }
-
- /// \brief Subtracts the given scalar from the elements of the given row.
- Matrix& subFromRow(unsigned r, PBQPNum val) {
- assert(r < rows && "Row out of bounds");
- std::transform(data + (r * cols), data + ((r + 1) * cols),
- data + (r * cols),
- std::bind2nd(std::minus<PBQPNum>(), val));
- return *this;
- }
-
- /// \brief Subtracts the given scalar from the elements of the given column.
- Matrix& subFromCol(unsigned c, PBQPNum val) {
- for (unsigned r = 0; r < rows; ++r)
- (*this)[r][c] -= val;
- return *this;
- }
-
- /// \brief Returns true if this is a zero matrix.
- bool isZero() const {
- return find_if(data, data + (rows * cols),
- std::bind2nd(std::not_equal_to<PBQPNum>(), 0)) ==
- data + (rows * cols);
- }
-
- private:
- unsigned rows, cols;
- PBQPNum *data;
-};
-
-/// \brief Output a textual representation of the given matrix on the given
-/// output stream.
-template <typename OStream>
-OStream& operator<<(OStream &os, const Matrix &m) {
-
- assert((m.getRows() != 0) && "Zero-row matrix badness.");
-
- for (unsigned i = 0; i < m.getRows(); ++i) {
- os << m.getRowAsVector(i);
- }
-
- return os;
-}
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_MATH_H
diff --git a/lib/CodeGen/PBQP/Solution.h b/lib/CodeGen/PBQP/Solution.h
deleted file mode 100644
index 047fd04c7cb8..000000000000
--- a/lib/CodeGen/PBQP/Solution.h
+++ /dev/null
@@ -1,89 +0,0 @@
-//===-- Solution.h ------- PBQP Solution ------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// PBQP Solution class.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PBQP_SOLUTION_H
-#define LLVM_CODEGEN_PBQP_SOLUTION_H
-
-#include "Math.h"
-#include "Graph.h"
-
-#include <map>
-
-namespace PBQP {
-
- /// \brief Represents a solution to a PBQP problem.
- ///
- /// To get the selection for each node in the problem use the getSelection method.
- class Solution {
- private:
-
- typedef std::map<Graph::NodeItr, unsigned, NodeItrComparator> SelectionsMap;
- SelectionsMap selections;
-
- unsigned r0Reductions, r1Reductions, r2Reductions, rNReductions;
-
- public:
-
- /// \brief Number of nodes for which selections have been made.
- /// @return Number of nodes for which selections have been made.
- unsigned numNodes() const { return selections.size(); }
-
- /// \brief Records a reduction via the R0 rule. Should be called from the
- /// solver only.
- void recordR0() { ++r0Reductions; }
-
- /// \brief Returns the number of R0 reductions applied to solve the problem.
- unsigned numR0Reductions() const { return r0Reductions; }
-
- /// \brief Records a reduction via the R1 rule. Should be called from the
- /// solver only.
- void recordR1() { ++r1Reductions; }
-
- /// \brief Returns the number of R1 reductions applied to solve the problem.
- unsigned numR1Reductions() const { return r1Reductions; }
-
- /// \brief Records a reduction via the R2 rule. Should be called from the
- /// solver only.
- void recordR2() { ++r2Reductions; }
-
- /// \brief Returns the number of R2 reductions applied to solve the problem.
- unsigned numR2Reductions() const { return r2Reductions; }
-
- /// \brief Records a reduction via the RN rule. Should be called from the
- /// solver only.
- void recordRN() { ++ rNReductions; }
-
- /// \brief Returns the number of RN reductions applied to solve the problem.
- unsigned numRNReductions() const { return rNReductions; }
-
- /// \brief Set the selection for a given node.
- /// @param nItr Node iterator.
- /// @param selection Selection for nItr.
- void setSelection(Graph::NodeItr nItr, unsigned selection) {
- selections[nItr] = selection;
- }
-
- /// \brief Get a node's selection.
- /// @param nItr Node iterator.
- /// @return The selection for nItr;
- unsigned getSelection(Graph::NodeItr nItr) const {
- SelectionsMap::const_iterator sItr = selections.find(nItr);
- assert(sItr != selections.end() && "No selection for node.");
- return sItr->second;
- }
-
- };
-
-}
-
-#endif // LLVM_CODEGEN_PBQP_SOLUTION_H
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index d4df4c548711..5f7cf582c960 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -14,7 +14,7 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "phielim"
-#include "PHIElimination.h"
+#include "PHIEliminationUtils.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -34,23 +34,72 @@
#include <map>
using namespace llvm;
+namespace {
+ class PHIElimination : public MachineFunctionPass {
+ MachineRegisterInfo *MRI; // Machine register information
+
+ public:
+ static char ID; // Pass identification, replacement for typeid
+ PHIElimination() : MachineFunctionPass(ID) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ private:
+ /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
+ /// in predecessor basic blocks.
+ ///
+ bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
+ void LowerAtomicPHINode(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator AfterPHIsIt);
+
+ /// analyzePHINodes - Gather information about the PHI nodes in
+ /// here. In particular, we want to map the number of uses of a virtual
+ /// register which is used in a PHI node. We map that to the BB the
+ /// vreg is coming from. This is used later to determine when the vreg
+ /// is killed in the BB.
+ ///
+ void analyzePHINodes(const MachineFunction& Fn);
+
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
+ LiveVariables &LV, MachineLoopInfo *MLI);
+
+ typedef std::pair<unsigned, unsigned> BBVRegPair;
+ typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
+
+ VRegPHIUse VRegPHIUseCount;
+
+ // Defs of PHI sources which are implicit_def.
+ SmallPtrSet<MachineInstr*, 4> ImpDefs;
+
+ // Map reusable lowered PHI node -> incoming join register.
+ typedef DenseMap<MachineInstr*, unsigned,
+ MachineInstrExpressionTrait> LoweredPHIMap;
+ LoweredPHIMap LoweredPHIs;
+ };
+}
+
STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split");
STATISTIC(NumReused, "Number of reused lowered phis");
char PHIElimination::ID = 0;
INITIALIZE_PASS(PHIElimination, "phi-node-elimination",
- "Eliminate PHI nodes for register allocation", false, false);
+ "Eliminate PHI nodes for register allocation", false, false)
-char &llvm::PHIEliminationID = PHIElimination::ID;
+char& llvm::PHIEliminationID = PHIElimination::ID;
-void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addPreserved<LiveVariables>();
AU.addPreserved<MachineDominatorTree>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}
-bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
+bool PHIElimination::runOnMachineFunction(MachineFunction &MF) {
MRI = &MF.getRegInfo();
bool Changed = false;
@@ -93,14 +142,14 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &MF) {
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
/// predecessor basic blocks.
///
-bool llvm::PHIElimination::EliminatePHINodes(MachineFunction &MF,
+bool PHIElimination::EliminatePHINodes(MachineFunction &MF,
MachineBasicBlock &MBB) {
if (MBB.empty() || !MBB.front().isPHI())
return false; // Quick exit for basic blocks without PHIs.
// Get an iterator to the first instruction after the last PHI node (this may
// also be the end of the basic block).
- MachineBasicBlock::iterator AfterPHIsIt = SkipPHIsAndLabels(MBB, MBB.begin());
+ MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin());
while (MBB.front().isPHI())
LowerAtomicPHINode(MBB, AfterPHIsIt);
@@ -121,58 +170,14 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
return true;
}
-// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
-// when following the CFG edge to SuccMBB. This needs to be after any def of
-// SrcReg, but before any subsequent point where control flow might jump out of
-// the basic block.
-MachineBasicBlock::iterator
-llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
- MachineBasicBlock &SuccMBB,
- unsigned SrcReg) {
- // Handle the trivial case trivially.
- if (MBB.empty())
- return MBB.begin();
-
- // Usually, we just want to insert the copy before the first terminator
- // instruction. However, for the edge going to a landing pad, we must insert
- // the copy before the call/invoke instruction.
- if (!SuccMBB.isLandingPad())
- return MBB.getFirstTerminator();
-
- // Discover any defs/uses in this basic block.
- SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
- for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
- RE = MRI->reg_end(); RI != RE; ++RI) {
- MachineInstr *DefUseMI = &*RI;
- if (DefUseMI->getParent() == &MBB)
- DefUsesInMBB.insert(DefUseMI);
- }
- MachineBasicBlock::iterator InsertPoint;
- if (DefUsesInMBB.empty()) {
- // No defs. Insert the copy at the start of the basic block.
- InsertPoint = MBB.begin();
- } else if (DefUsesInMBB.size() == 1) {
- // Insert the copy immediately after the def/use.
- InsertPoint = *DefUsesInMBB.begin();
- ++InsertPoint;
- } else {
- // Insert the copy immediately after the last def/use.
- InsertPoint = MBB.end();
- while (!DefUsesInMBB.count(&*--InsertPoint)) {}
- ++InsertPoint;
- }
-
- // Make sure the copy goes after any phi nodes however.
- return SkipPHIsAndLabels(MBB, InsertPoint);
-}
/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block,
/// under the assuption that it needs to be lowered in a way that supports
/// atomic execution of PHIs. This lowering method is always correct all of the
/// time.
///
-void llvm::PHIElimination::LowerAtomicPHINode(
+void PHIElimination::LowerAtomicPHINode(
MachineBasicBlock &MBB,
MachineBasicBlock::iterator AfterPHIsIt) {
++NumAtomic;
@@ -207,7 +212,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
IncomingReg = entry;
reusedIncoming = true;
++NumReused;
- DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi);
+ DEBUG(dbgs() << "Reusing " << PrintReg(IncomingReg) << " for " << *MPhi);
} else {
const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg);
entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC);
@@ -294,7 +299,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Find a safe location to insert the copy, this may be the first terminator
// in the block (or end()).
MachineBasicBlock::iterator InsertPos =
- FindCopyInsertPoint(opBlock, MBB, SrcReg);
+ findPHICopyInsertPoint(&opBlock, &MBB, SrcReg);
// Insert the copy.
if (!reusedIncoming && IncomingReg)
@@ -335,6 +340,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
#ifndef NDEBUG
for (MachineBasicBlock::iterator TI = llvm::next(Term);
TI != opBlock.end(); ++TI) {
+ if (TI->isDebugValue())
+ continue;
assert(!TI->readsRegister(SrcReg) &&
"Terminator instructions cannot use virtual registers unless"
"they are the first terminator in a block!");
@@ -343,9 +350,13 @@ void llvm::PHIElimination::LowerAtomicPHINode(
} else if (reusedIncoming || !IncomingReg) {
// We may have to rewind a bit if we didn't insert a copy this time.
KillInst = Term;
- while (KillInst != opBlock.begin())
- if ((--KillInst)->readsRegister(SrcReg))
+ while (KillInst != opBlock.begin()) {
+ --KillInst;
+ if (KillInst->isDebugValue())
+ continue;
+ if (KillInst->readsRegister(SrcReg))
break;
+ }
} else {
// We just inserted this copy.
KillInst = prior(InsertPos);
@@ -371,7 +382,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
/// used in a PHI node. We map that to the BB the vreg is coming from. This is
/// used later to determine when the vreg is killed in the BB.
///
-void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
+void PHIElimination::analyzePHINodes(const MachineFunction& MF) {
for (MachineFunction::const_iterator I = MF.begin(), E = MF.end();
I != E; ++I)
for (MachineBasicBlock::const_iterator BBI = I->begin(), BBE = I->end();
@@ -381,10 +392,10 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& MF) {
BBI->getOperand(i).getReg())];
}
-bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
- MachineBasicBlock &MBB,
- LiveVariables &LV,
- MachineLoopInfo *MLI) {
+bool PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ LiveVariables &LV,
+ MachineLoopInfo *MLI) {
if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad())
return false; // Quick exit for basic blocks without PHIs.
@@ -403,10 +414,14 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) {
if (!MLI ||
!(MLI->getLoopFor(PreMBB) == MLI->getLoopFor(&MBB) &&
- MLI->isLoopHeader(&MBB)))
- Changed |= PreMBB->SplitCriticalEdge(&MBB, this) != 0;
+ MLI->isLoopHeader(&MBB))) {
+ if (PreMBB->SplitCriticalEdge(&MBB, this)) {
+ Changed = true;
+ ++NumCriticalEdgesSplit;
+ }
+ }
}
}
}
- return true;
+ return Changed;
}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
deleted file mode 100644
index 45a97182e71c..000000000000
--- a/lib/CodeGen/PHIElimination.h
+++ /dev/null
@@ -1,115 +0,0 @@
-//===-- lib/CodeGen/PHIElimination.h ----------------------------*- C++ -*-===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_CODEGEN_PHIELIMINATION_HPP
-#define LLVM_CODEGEN_PHIELIMINATION_HPP
-
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallSet.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
-
-namespace llvm {
- class LiveVariables;
- class MachineRegisterInfo;
- class MachineLoopInfo;
-
- /// Lower PHI instructions to copies.
- class PHIElimination : public MachineFunctionPass {
- MachineRegisterInfo *MRI; // Machine register information
-
- public:
- static char ID; // Pass identification, replacement for typeid
- PHIElimination() : MachineFunctionPass(ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &Fn);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-
- private:
- /// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
- /// in predecessor basic blocks.
- ///
- bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB);
- void LowerAtomicPHINode(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator AfterPHIsIt);
-
- /// analyzePHINodes - Gather information about the PHI nodes in
- /// here. In particular, we want to map the number of uses of a virtual
- /// register which is used in a PHI node. We map that to the BB the
- /// vreg is coming from. This is used later to determine when the vreg
- /// is killed in the BB.
- ///
- void analyzePHINodes(const MachineFunction& Fn);
-
- /// Split critical edges where necessary for good coalescer performance.
- bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
- LiveVariables &LV, MachineLoopInfo *MLI);
-
- /// SplitCriticalEdge - Split a critical edge from A to B by
- /// inserting a new MBB. Update branches in A and PHI instructions
- /// in B. Return the new block.
- MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A,
- MachineBasicBlock *B);
-
- /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
- /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
- /// any def of SrcReg, but before any subsequent point where control flow
- /// might jump out of the basic block.
- MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
- MachineBasicBlock &SuccMBB,
- unsigned SrcReg);
-
- // SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
- // also after any exception handling labels: in landing pads execution
- // starts at the label, so any copies placed before it won't be executed!
- // We also deal with DBG_VALUEs, which are a bit tricky:
- // PHI
- // DBG_VALUE
- // LABEL
- // Here the DBG_VALUE needs to be skipped, and if it refers to a PHI it
- // needs to be annulled or, better, moved to follow the label, as well.
- // PHI
- // DBG_VALUE
- // no label
- // Here it is not a good idea to skip the DBG_VALUE.
- // FIXME: For now we skip and annul all DBG_VALUEs, maximally simple and
- // maximally stupid.
- MachineBasicBlock::iterator SkipPHIsAndLabels(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
- // Rather than assuming that EH labels come before other kinds of labels,
- // just skip all labels.
- while (I != MBB.end() &&
- (I->isPHI() || I->isLabel() || I->isDebugValue())) {
- if (I->isDebugValue() && I->getNumOperands()==3 &&
- I->getOperand(0).isReg())
- I->getOperand(0).setReg(0U);
- ++I;
- }
- return I;
- }
-
- typedef std::pair<unsigned, unsigned> BBVRegPair;
- typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
-
- VRegPHIUse VRegPHIUseCount;
-
- // Defs of PHI sources which are implicit_def.
- SmallPtrSet<MachineInstr*, 4> ImpDefs;
-
- // Map reusable lowered PHI node -> incoming join register.
- typedef DenseMap<MachineInstr*, unsigned,
- MachineInstrExpressionTrait> LoweredPHIMap;
- LoweredPHIMap LoweredPHIs;
- };
-
-}
-
-#endif /* LLVM_CODEGEN_PHIELIMINATION_HPP */
diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp
new file mode 100644
index 000000000000..10bfdcce6769
--- /dev/null
+++ b/lib/CodeGen/PHIEliminationUtils.cpp
@@ -0,0 +1,61 @@
+//===-- PHIEliminationUtils.cpp - Helper functions for PHI elimination ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PHIEliminationUtils.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/ADT/SmallPtrSet.h"
+using namespace llvm;
+
+// findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
+MachineBasicBlock::iterator
+llvm::findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg) {
+ // Handle the trivial case trivially.
+ if (MBB->empty())
+ return MBB->begin();
+
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB->isLandingPad())
+ return MBB->getFirstTerminator();
+
+ // Discover any defs/uses in this basic block.
+ SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
+ MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(SrcReg),
+ RE = MRI.reg_end(); RI != RE; ++RI) {
+ MachineInstr* DefUseMI = &*RI;
+ if (DefUseMI->getParent() == MBB)
+ DefUsesInMBB.insert(DefUseMI);
+ }
+
+ MachineBasicBlock::iterator InsertPoint;
+ if (DefUsesInMBB.empty()) {
+ // No defs. Insert the copy at the start of the basic block.
+ InsertPoint = MBB->begin();
+ } else if (DefUsesInMBB.size() == 1) {
+ // Insert the copy immediately after the def/use.
+ InsertPoint = *DefUsesInMBB.begin();
+ ++InsertPoint;
+ } else {
+ // Insert the copy immediately after the last def/use.
+ InsertPoint = MBB->end();
+ while (!DefUsesInMBB.count(&*--InsertPoint)) {}
+ ++InsertPoint;
+ }
+
+ // Make sure the copy goes after any phi nodes however.
+ return MBB->SkipPHIsAndLabels(InsertPoint);
+}
diff --git a/lib/CodeGen/PHIEliminationUtils.h b/lib/CodeGen/PHIEliminationUtils.h
new file mode 100644
index 000000000000..9ac47fb4c505
--- /dev/null
+++ b/lib/CodeGen/PHIEliminationUtils.h
@@ -0,0 +1,25 @@
+//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+#define LLVM_CODEGEN_PHIELIMINATIONUTILS_H
+
+#include "llvm/CodeGen/MachineBasicBlock.h"
+
+namespace llvm {
+ /// findPHICopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
+ MachineBasicBlock::iterator
+ findPHICopyInsertPoint(MachineBasicBlock* MBB, MachineBasicBlock* SuccMBB,
+ unsigned SrcReg);
+}
+
+#endif
diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp
index 17cee46ca16c..5d7123caa017 100644
--- a/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/lib/CodeGen/PeepholeOptimizer.cpp
@@ -41,7 +41,9 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -50,8 +52,13 @@ static cl::opt<bool>
Aggressive("aggressive-ext-opt", cl::Hidden,
cl::desc("Aggressive extension optimization"));
+static cl::opt<bool>
+DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
+ cl::desc("Disable the peephole optimizer"));
+
STATISTIC(NumReuse, "Number of extension results reused");
STATISTIC(NumEliminated, "Number of compares eliminated");
+STATISTIC(NumImmFold, "Number of move immediate foled");
namespace {
class PeepholeOptimizer : public MachineFunctionPass {
@@ -62,7 +69,9 @@ namespace {
public:
static char ID; // Pass identification
- PeepholeOptimizer() : MachineFunctionPass(ID) {}
+ PeepholeOptimizer() : MachineFunctionPass(ID) {
+ initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -79,12 +88,21 @@ namespace {
bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs);
+ bool isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
+ bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
};
}
char PeepholeOptimizer::ID = 0;
-INITIALIZE_PASS(PeepholeOptimizer, "peephole-opts",
- "Peephole Optimizations", false, false);
+INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
+ "Peephole Optimizations", false, false)
FunctionPass *llvm::createPeepholeOptimizerPass() {
return new PeepholeOptimizer();
@@ -102,12 +120,10 @@ FunctionPass *llvm::createPeepholeOptimizerPass() {
bool PeepholeOptimizer::
OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
- LocalMIs.insert(MI);
-
unsigned SrcReg, DstReg, SubIdx;
if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
return false;
-
+
if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
@@ -232,22 +248,17 @@ OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
/// set) the same flag as the compare, then we can remove the comparison and use
/// the flag from the previous instruction.
bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
- MachineBasicBlock *MBB) {
+ MachineBasicBlock *MBB){
// If this instruction is a comparison against zero and isn't comparing a
// physical register, we can try to optimize it.
unsigned SrcReg;
- int CmpValue;
- if (!TII->AnalyzeCompare(MI, SrcReg, CmpValue) ||
- TargetRegisterInfo::isPhysicalRegister(SrcReg) || CmpValue != 0)
- return false;
-
- MachineRegisterInfo::def_iterator DI = MRI->def_begin(SrcReg);
- if (llvm::next(DI) != MRI->def_end())
- // Only support one definition.
+ int CmpMask, CmpValue;
+ if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
+ TargetRegisterInfo::isPhysicalRegister(SrcReg))
return false;
- // Attempt to convert the defining instruction to set the "zero" flag.
- if (TII->ConvertToSetZeroFlag(&*DI, MI)) {
+ // Attempt to optimize the comparison instruction.
+ if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
++NumEliminated;
return true;
}
@@ -255,7 +266,53 @@ bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
return false;
}
+bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ const TargetInstrDesc &TID = MI->getDesc();
+ if (!TID.isMoveImmediate())
+ return false;
+ if (TID.getNumDefs() != 1)
+ return false;
+ unsigned Reg = MI->getOperand(0).getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ ImmDefMIs.insert(std::make_pair(Reg, MI));
+ ImmDefRegs.insert(Reg);
+ return true;
+ }
+
+ return false;
+}
+
+/// FoldImmediate - Try folding register operands that are defined by move
+/// immediate instructions, i.e. a trivial constant folding optimization, if
+/// and only if the def and use are in the same BB.
+bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
+ SmallSet<unsigned, 4> &ImmDefRegs,
+ DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
+ for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg() || MO.isDef())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
+ if (ImmDefRegs.count(Reg) == 0)
+ continue;
+ DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
+ assert(II != ImmDefMIs.end());
+ if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
+ ++NumImmFold;
+ return true;
+ }
+ }
+ return false;
+}
+
bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
+ if (DisablePeephole)
+ return false;
+
TM = &MF.getTarget();
TII = TM->getInstrInfo();
MRI = &MF.getRegInfo();
@@ -264,22 +321,50 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
SmallPtrSet<MachineInstr*, 8> LocalMIs;
+ SmallSet<unsigned, 4> ImmDefRegs;
+ DenseMap<unsigned, MachineInstr*> ImmDefMIs;
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
MachineBasicBlock *MBB = &*I;
+
+ bool SeenMoveImm = false;
LocalMIs.clear();
+ ImmDefRegs.clear();
+ ImmDefMIs.clear();
+ bool First = true;
+ MachineBasicBlock::iterator PMII;
for (MachineBasicBlock::iterator
- MII = I->begin(), ME = I->end(); MII != ME; ) {
+ MII = I->begin(), MIE = I->end(); MII != MIE; ) {
MachineInstr *MI = &*MII;
+ LocalMIs.insert(MI);
- if (MI->getDesc().isCompare() &&
- !MI->getDesc().hasUnmodeledSideEffects()) {
- ++MII; // The iterator may become invalid if the compare is deleted.
- Changed |= OptimizeCmpInstr(MI, MBB);
+ if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
+ MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
+ MI->hasUnmodeledSideEffects()) {
+ ++MII;
+ continue;
+ }
+
+ if (MI->getDesc().isCompare()) {
+ if (OptimizeCmpInstr(MI, MBB)) {
+ // MI is deleted.
+ Changed = true;
+ MII = First ? I->begin() : llvm::next(PMII);
+ continue;
+ }
+ }
+
+ if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
+ SeenMoveImm = true;
} else {
Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
- ++MII;
+ if (SeenMoveImm)
+ Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
}
+
+ First = false;
+ PMII = MII;
+ ++MII;
}
}
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index f0bd6d1372be..60c24b710792 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -133,18 +133,12 @@ namespace {
std::vector<unsigned> KillIndices;
public:
- SchedulePostRATDList(MachineFunction &MF,
- const MachineLoopInfo &MLI,
- const MachineDominatorTree &MDT,
- ScheduleHazardRecognizer *HR,
- AntiDepBreaker *ADB,
- AliasAnalysis *aa)
- : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits),
- HazardRec(HR), AntiDepBreak(ADB), AA(aa),
- KillIndices(TRI->getNumRegs()) {}
-
- ~SchedulePostRATDList() {
- }
+ SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs);
+
+ ~SchedulePostRATDList();
/// StartBlock - Initialize register live-range state for scheduling in
/// this block.
@@ -183,9 +177,34 @@ namespace {
};
}
+SchedulePostRATDList::SchedulePostRATDList(
+ MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT,
+ AliasAnalysis *AA, TargetSubtarget::AntiDepBreakMode AntiDepMode,
+ SmallVectorImpl<TargetRegisterClass*> &CriticalPathRCs)
+ : ScheduleDAGInstrs(MF, MLI, MDT), Topo(SUnits), AA(AA),
+ KillIndices(TRI->getNumRegs())
+{
+ const TargetMachine &TM = MF.getTarget();
+ const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
+ HazardRec =
+ TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins, this);
+ AntiDepBreak =
+ ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(MF, CriticalPathRCs) :
+ ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
+ (AntiDepBreaker *)new CriticalAntiDepBreaker(MF) : NULL));
+}
+
+SchedulePostRATDList::~SchedulePostRATDList() {
+ delete HazardRec;
+ delete AntiDepBreak;
+}
+
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
- AA = &getAnalysis<AliasAnalysis>();
TII = Fn.getTarget().getInstrInfo();
+ MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
+ MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
+ AliasAnalysis *AA = &getAnalysis<AliasAnalysis>();
// Check for explicit enable/disable of post-ra scheduling.
TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
@@ -195,6 +214,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
return false;
} else {
// Check that post-RA scheduling is enabled for this target.
+ // This may upgrade the AntiDepMode.
const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
return false;
@@ -210,19 +230,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
DEBUG(dbgs() << "PostRAScheduler\n");
- const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
- const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
- const TargetMachine &TM = Fn.getTarget();
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- ScheduleHazardRecognizer *HR =
- TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins);
- AntiDepBreaker *ADB =
- ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
- (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
- ((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
- (AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
-
- SchedulePostRATDList Scheduler(Fn, MLI, MDT, HR, ADB, AA);
+ SchedulePostRATDList Scheduler(Fn, MLI, MDT, AA, AntiDepMode,
+ CriticalPathRCs);
// Loop over all of the basic blocks
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
@@ -270,9 +279,6 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
Scheduler.FixupKills(MBB);
}
- delete HR;
- delete ADB;
-
return true;
}
@@ -617,13 +623,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
MinDepth = PendingQueue[i]->getDepth();
}
- DEBUG(dbgs() << "\n*** Examining Available\n";
- LatencyPriorityQueue q = AvailableQueue;
- while (!q.empty()) {
- SUnit *su = q.pop();
- dbgs() << "Height " << su->getHeight() << ": ";
- su->dump(this);
- });
+ DEBUG(dbgs() << "\n*** Examining Available\n"; AvailableQueue.dump(this));
SUnit *FoundSUnit = 0;
bool HasNoopHazards = false;
@@ -631,7 +631,7 @@ void SchedulePostRATDList::ListScheduleTopDown() {
SUnit *CurSUnit = AvailableQueue.pop();
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(CurSUnit);
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
if (HT == ScheduleHazardRecognizer::NoHazard) {
FoundSUnit = CurSUnit;
break;
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index cd9d83eeb684..d6e31dae9d13 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -91,8 +91,9 @@ namespace {
public:
static char ID;
- PreAllocSplitting()
- : MachineFunctionPass(ID) {}
+ PreAllocSplitting() : MachineFunctionPass(ID) {
+ initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+ }
virtual bool runOnMachineFunction(MachineFunction &MF);
@@ -106,10 +107,8 @@ namespace {
AU.addPreserved<LiveStacks>();
AU.addPreserved<RegisterCoalescer>();
AU.addPreserved<CalculateSpillWeights>();
- if (StrongPHIElim)
- AU.addPreservedID(StrongPHIEliminationID);
- else
- AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(StrongPHIEliminationID);
+ AU.addPreservedID(PHIEliminationID);
AU.addRequired<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addRequired<VirtRegMap>();
@@ -203,9 +202,18 @@ namespace {
char PreAllocSplitting::ID = 0;
-INITIALIZE_PASS(PreAllocSplitting, "pre-alloc-splitting",
+INITIALIZE_PASS_BEGIN(PreAllocSplitting, "pre-alloc-splitting",
+ "Pre-Register Allocation Live Interval Splitting",
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_END(PreAllocSplitting, "pre-alloc-splitting",
"Pre-Register Allocation Live Interval Splitting",
- false, false);
+ false, false)
char &llvm::PreAllocSplittingID = PreAllocSplitting::ID;
@@ -324,7 +332,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
if (CurrSLI->hasAtLeastOneValue())
CurrSValNo = CurrSLI->getValNumInfo(0);
else
- CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+ CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
LSs->getVNInfoAllocator());
return SS;
}
@@ -585,7 +593,7 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us
SlotIndex StartIndex = LIs->getMBBStartIdx(MBB);
VNInfo *RetVNI = Phis[MBB] =
- LI->getNextValue(SlotIndex(), /*FIXME*/ 0, false,
+ LI->getNextValue(SlotIndex(), /*FIXME*/ 0,
LIs->getVNInfoAllocator());
if (!IsIntraBlock) LiveOut[MBB] = RetVNI;
@@ -674,7 +682,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
DefIdx = DefIdx.getDefIndex();
assert(!DI->isPHI() && "PHI instr in code during pre-alloc splitting.");
- VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
+ VNInfo* NewVN = LI->getNextValue(DefIdx, 0, Alloc);
// If the def is a move, set the copy field.
if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg)
@@ -807,7 +815,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
MachineBasicBlock& MBB = *RestorePt->getParent();
MachineBasicBlock::iterator KillPt = BarrierMBB->end();
- if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
+ if (!DefMI || DefMI->getParent() == BarrierMBB)
KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
else
KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
@@ -872,7 +880,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
if (CurrSLI->hasAtLeastOneValue())
CurrSValNo = CurrSLI->getValNumInfo(0);
else
- CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0, false,
+ CurrSValNo = CurrSLI->getNextValue(SlotIndex(), 0,
LSs->getVNInfoAllocator());
}
@@ -967,8 +975,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
assert(!ValNo->isUnused() && "Val# is defined by a dead def?");
- MachineInstr *DefMI = ValNo->isDefAccurate()
- ? LIs->getInstructionFromIndex(ValNo->def) : NULL;
+ MachineInstr *DefMI = LIs->getInstructionFromIndex(ValNo->def);
// If this would create a new join point, do not split.
if (DefMI && createsNewJoin(LR, DefMI->getParent(), Barrier->getParent())) {
@@ -1005,7 +1012,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
SlotIndex SpillIndex;
MachineInstr *SpillMI = NULL;
int SS = -1;
- if (!ValNo->isDefAccurate()) {
+ if (!DefMI) {
// If we don't know where the def is we must split just before the barrier.
if ((SpillMI = FoldSpill(LI->reg, RC, 0, Barrier,
BarrierMBB, SS, RefsInMBB))) {
@@ -1199,12 +1206,12 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
// We also don't try to handle the results of PHI joins, since there's
// no defining instruction to analyze.
- if (!CurrVN->isDefAccurate() || CurrVN->isUnused()) continue;
+ MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
+ if (!DefMI || CurrVN->isUnused()) continue;
// We're only interested in eliminating cruft introduced by the splitter,
// is of the form load-use or load-use-store. First, check that the
// definition is a load, and remember what stack slot we loaded it from.
- MachineInstr* DefMI = LIs->getInstructionFromIndex(CurrVN->def);
int FrameIndex;
if (!TII->isLoadFromStackSlot(DefMI, FrameIndex)) continue;
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index b8831db1d118..9cd9941e56b3 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -26,8 +26,11 @@
using namespace llvm;
char ProcessImplicitDefs::ID = 0;
-INITIALIZE_PASS(ProcessImplicitDefs, "processimpdefs",
- "Process Implicit Definitions.", false, false);
+INITIALIZE_PASS_BEGIN(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveVariables)
+INITIALIZE_PASS_END(ProcessImplicitDefs, "processimpdefs",
+ "Process Implicit Definitions", false, false)
void ProcessImplicitDefs::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index e2802c1fdf4a..ad7b6e4aa97f 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -21,6 +21,7 @@
#define DEBUG_TYPE "pei"
#include "PrologEpilogInserter.h"
+#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
@@ -29,7 +30,7 @@
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetRegisterInfo.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
@@ -44,8 +45,12 @@ using namespace llvm;
char PEI::ID = 0;
-INITIALIZE_PASS(PEI, "prologepilog",
- "Prologue/Epilogue Insertion", false, false);
+INITIALIZE_PASS_BEGIN(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_END(PEI, "prologepilog",
+ "Prologue/Epilogue Insertion", false, false)
STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered");
STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged");
@@ -61,6 +66,8 @@ FunctionPass *llvm::createPrologEpilogCodeInserter() { return new PEI(); }
bool PEI::runOnMachineFunction(MachineFunction &Fn) {
const Function* F = Fn.getFunction();
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
+
RS = TRI->requiresRegisterScavenging(Fn) ? new RegScavenger() : NULL;
FrameIndexVirtualScavenging = TRI->requiresFrameIndexScavenging(Fn);
@@ -71,7 +78,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// Allow the target machine to make some adjustments to the function
// e.g. UsedPhysRegs before calculateCalleeSavedRegisters.
- TRI->processFunctionBeforeCalleeSavedScan(Fn, RS);
+ TFI->processFunctionBeforeCalleeSavedScan(Fn, RS);
// Scan the function for modified callee saved registers and insert spill code
// for any callee saved registers that are modified.
@@ -91,7 +98,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// Allow the target machine to make final modifications to the function
// before the frame layout is finalized.
- TRI->processFunctionBeforeFrameFinalized(Fn);
+ TFI->processFunctionBeforeFrameFinalized(Fn);
// Calculate actual frame offsets for all abstract stack objects...
calculateFrameObjectOffsets(Fn);
@@ -138,6 +145,7 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
/// pseudo instructions.
void PEI::calculateCallsInformation(MachineFunction &Fn) {
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
unsigned MaxCallFrameSize = 0;
@@ -165,7 +173,8 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
FrameSDOps.push_back(I);
} else if (I->isInlineAsm()) {
// Some inline asm's need a stack frame, as indicated by operand 1.
- if (I->getOperand(1).getImm())
+ unsigned ExtraInfo = I->getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
+ if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
AdjustsStack = true;
}
@@ -180,7 +189,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
// the target doesn't indicate otherwise, remove the call frame pseudos
// here. The sub/add sp instruction pairs are still inserted, but we don't
// need to track the SP adjustment for frame index elimination.
- if (RegInfo->canSimplifyCallFramePseudos(Fn))
+ if (TFI->canSimplifyCallFramePseudos(Fn))
RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
}
}
@@ -190,7 +199,7 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
/// registers.
void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
- const TargetFrameInfo *TFI = Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
MachineFrameInfo *MFI = Fn.getFrameInfo();
// Get the callee saved register list...
@@ -229,7 +238,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
return; // Early exit if no callee saved registers are modified!
unsigned NumFixedSpillSlots;
- const TargetFrameInfo::SpillSlot *FixedSpillSlots =
+ const TargetFrameLowering::SpillSlot *FixedSpillSlots =
TFI->getCalleeSavedSpillSlots(NumFixedSpillSlots);
// Now that we know which registers need to be saved and restored, allocate
@@ -247,7 +256,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
// Check to see if this physreg must be spilled to a particular stack slot
// on this target.
- const TargetFrameInfo::SpillSlot *FixedSlot = FixedSpillSlots;
+ const TargetFrameLowering::SpillSlot *FixedSlot = FixedSpillSlots;
while (FixedSlot != FixedSpillSlots+NumFixedSpillSlots &&
FixedSlot->Reg != Reg)
++FixedSlot;
@@ -290,13 +299,14 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
return;
const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo();
+ const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering();
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
MachineBasicBlock::iterator I;
if (! ShrinkWrapThisFunction) {
// Spill using target interface.
I = EntryBlock->begin();
- if (!TII.spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
+ if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
// Add the callee-saved register as live-in.
// It's killed at the spill.
@@ -328,7 +338,7 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) {
// Restore all registers immediately before the return and any
// terminators that preceed it.
- if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
+ if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) {
for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
unsigned Reg = CSI[i].getReg();
const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
@@ -480,10 +490,10 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
/// abstract stack objects.
///
void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
- const TargetFrameInfo &TFI = *Fn.getTarget().getFrameInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
bool StackGrowsDown =
- TFI.getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
// Loop over all of the stack objects, assigning sequential addresses...
MachineFrameInfo *MFI = Fn.getFrameInfo();
@@ -549,7 +559,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure the special register scavenging spill slot is closest to the
// frame pointer if a frame pointer is required.
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
- if (RS && RegInfo->hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
+ if (RS && TFI.hasFP(Fn) && !RegInfo->needsStackRealignment(Fn)) {
int SFI = RS->getScavengingFrameIndex();
if (SFI >= 0)
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
@@ -631,17 +641,17 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// Make sure the special register scavenging spill slot is closest to the
// stack pointer.
- if (RS && (!RegInfo->hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
+ if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn))) {
int SFI = RS->getScavengingFrameIndex();
if (SFI >= 0)
AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign);
}
- if (!RegInfo->targetHandlesStackFrameRounding()) {
+ if (!TFI.targetHandlesStackFrameRounding()) {
// If we have reserved argument space for call sites in the function
// immediately on entry to the current function, count it as part of the
// overall stack size.
- if (MFI->adjustsStack() && RegInfo->hasReservedCallFrame(Fn))
+ if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
Offset += MFI->getMaxCallFrameSize();
// Round up the size to a multiple of the alignment. If the function has
@@ -672,16 +682,16 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
/// prolog and epilog code to the function.
///
void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
- const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
+ const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
// Add prologue to the function...
- TRI->emitPrologue(Fn);
+ TFI.emitPrologue(Fn);
// Add epilogue to restore the callee-save registers in each exiting block
for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
// If last instruction is a return instruction, add an epilogue
if (!I->empty() && I->back().getDesc().isReturn())
- TRI->emitEpilogue(Fn, *I);
+ TFI.emitEpilogue(Fn, *I);
}
}
@@ -694,9 +704,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
const TargetMachine &TM = Fn.getTarget();
assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!");
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
- const TargetFrameInfo *TFI = TM.getFrameInfo();
+ const TargetFrameLowering *TFI = TM.getFrameLowering();
bool StackGrowsDown =
- TFI->getStackGrowthDirection() == TargetFrameInfo::StackGrowsDown;
+ TFI->getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
int FrameSetupOpcode = TRI.getCallFrameSetupOpcode();
int FrameDestroyOpcode = TRI.getCallFrameDestroyOpcode();
@@ -755,8 +765,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
// If this instruction has a FrameIndex operand, we need to
// use that target machine register info object to eliminate
// it.
- TRI.eliminateFrameIndex(MI, SPAdj,
- FrameIndexVirtualScavenging ? NULL : RS);
+ TRI.eliminateFrameIndex(MI, SPAdj,
+ FrameIndexVirtualScavenging ? NULL : RS);
// Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) {
@@ -825,7 +835,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
ScratchReg = RS->scavengeRegister(RC, I, SPAdj);
++NumScavengedRegs;
}
- // replace this reference to the virtual register with the
+ // Replace this reference to the virtual register with the
// scratch register.
assert (ScratchReg && "Missing scratch register!");
MI->getOperand(i).setReg(ScratchReg);
diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h
index d575124a6b3e..e2391591ad06 100644
--- a/lib/CodeGen/PrologEpilogInserter.h
+++ b/lib/CodeGen/PrologEpilogInserter.h
@@ -36,7 +36,9 @@ namespace llvm {
class PEI : public MachineFunctionPass {
public:
static char ID;
- PEI() : MachineFunctionPass(ID) {}
+ PEI() : MachineFunctionPass(ID) {
+ initializePEIPass(*PassRegistry::getPassRegistry());
+ }
const char *getPassName() const {
return "Prolog/Epilog Insertion & Frame Finalization";
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 5e86e5a9447e..73b66d868f3d 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -18,7 +18,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include <map>
using namespace llvm;
diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h
new file mode 100644
index 000000000000..8c7e5f53b824
--- /dev/null
+++ b/lib/CodeGen/RegAllocBase.h
@@ -0,0 +1,181 @@
+//===-- RegAllocBase.h - basic regalloc interface and driver --*- C++ -*---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RegAllocBase class, which is the skeleton of a basic
+// register allocation algorithm and interface for extending it. It provides the
+// building blocks on which to construct other experimental allocators and test
+// the validity of two principles:
+//
+// - If virtual and physical register liveness is modeled using intervals, then
+// on-the-fly interference checking is cheap. Furthermore, interferences can be
+// lazily cached and reused.
+//
+// - Register allocation complexity, and generated code performance is
+// determined by the effectiveness of live range splitting rather than optimal
+// coloring.
+//
+// Following the first principle, interfering checking revolves around the
+// LiveIntervalUnion data structure.
+//
+// To fulfill the second principle, the basic allocator provides a driver for
+// incremental splitting. It essentially punts on the problem of register
+// coloring, instead driving the assignment of virtual to physical registers by
+// the cost of splitting. The basic allocator allows for heuristic reassignment
+// of registers, if a more sophisticated allocator chooses to do that.
+//
+// This framework provides a way to engineer the compile time vs. code
+// quality trade-off without relying on a particular theoretical solver.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_REGALLOCBASE
+#define LLVM_CODEGEN_REGALLOCBASE
+
+#include "llvm/ADT/OwningPtr.h"
+#include "LiveIntervalUnion.h"
+#include <queue>
+
+namespace llvm {
+
+template<typename T> class SmallVectorImpl;
+class TargetRegisterInfo;
+class VirtRegMap;
+class LiveIntervals;
+class Spiller;
+
+// Forward declare a priority queue of live virtual registers. If an
+// implementation needs to prioritize by anything other than spill weight, then
+// this will become an abstract base class with virtual calls to push/get.
+class LiveVirtRegQueue;
+
+/// RegAllocBase provides the register allocation driver and interface that can
+/// be extended to add interesting heuristics.
+///
+/// Register allocators must override the selectOrSplit() method to implement
+/// live range splitting. They may also override getPriority() which otherwise
+/// defaults to the spill weight computed by CalculateSpillWeights.
+class RegAllocBase {
+ LiveIntervalUnion::Allocator UnionAllocator;
+protected:
+ // Array of LiveIntervalUnions indexed by physical register.
+ class LiveUnionArray {
+ unsigned NumRegs;
+ LiveIntervalUnion *Array;
+ public:
+ LiveUnionArray(): NumRegs(0), Array(0) {}
+ ~LiveUnionArray() { clear(); }
+
+ unsigned numRegs() const { return NumRegs; }
+
+ void init(LiveIntervalUnion::Allocator &, unsigned NRegs);
+
+ void clear();
+
+ LiveIntervalUnion& operator[](unsigned PhysReg) {
+ assert(PhysReg < NumRegs && "physReg out of bounds");
+ return Array[PhysReg];
+ }
+ };
+
+ const TargetRegisterInfo *TRI;
+ MachineRegisterInfo *MRI;
+ VirtRegMap *VRM;
+ LiveIntervals *LIS;
+ LiveUnionArray PhysReg2LiveUnion;
+
+ // Current queries, one per physreg. They must be reinitialized each time we
+ // query on a new live virtual register.
+ OwningArrayPtr<LiveIntervalUnion::Query> Queries;
+
+ RegAllocBase(): TRI(0), MRI(0), VRM(0), LIS(0) {}
+
+ virtual ~RegAllocBase() {}
+
+ // A RegAlloc pass should call this before allocatePhysRegs.
+ void init(VirtRegMap &vrm, LiveIntervals &lis);
+
+ // Get an initialized query to check interferences between lvr and preg. Note
+ // that Query::init must be called at least once for each physical register
+ // before querying a new live virtual register. This ties Queries and
+ // PhysReg2LiveUnion together.
+ LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned PhysReg) {
+ Queries[PhysReg].init(&VirtReg, &PhysReg2LiveUnion[PhysReg]);
+ return Queries[PhysReg];
+ }
+
+ // The top-level driver. The output is a VirtRegMap that us updated with
+ // physical register assignments.
+ //
+ // If an implementation wants to override the LiveInterval comparator, we
+ // should modify this interface to allow passing in an instance derived from
+ // LiveVirtRegQueue.
+ void allocatePhysRegs();
+
+ // Get a temporary reference to a Spiller instance.
+ virtual Spiller &spiller() = 0;
+
+ // getPriority - Calculate the allocation priority for VirtReg.
+ // Virtual registers with higher priorities are allocated first.
+ virtual float getPriority(LiveInterval *LI) = 0;
+
+ // A RegAlloc pass should override this to provide the allocation heuristics.
+ // Each call must guarantee forward progess by returning an available PhysReg
+ // or new set of split live virtual registers. It is up to the splitter to
+ // converge quickly toward fully spilled live ranges.
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &splitLVRs) = 0;
+
+ // A RegAlloc pass should call this when PassManager releases its memory.
+ virtual void releaseMemory();
+
+ // Helper for checking interference between a live virtual register and a
+ // physical register, including all its register aliases. If an interference
+ // exists, return the interfering register, which may be preg or an alias.
+ unsigned checkPhysRegInterference(LiveInterval& VirtReg, unsigned PhysReg);
+
+ /// assign - Assign VirtReg to PhysReg.
+ /// This should not be called from selectOrSplit for the current register.
+ void assign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ /// unassign - Undo a previous assignment of VirtReg to PhysReg.
+ /// This can be invoked from selectOrSplit, but be careful to guarantee that
+ /// allocation is making progress.
+ void unassign(LiveInterval &VirtReg, unsigned PhysReg);
+
+ // Helper for spilling all live virtual registers currently unified under preg
+ // that interfere with the most recently queried lvr. Return true if spilling
+ // was successful, and append any new spilled/split intervals to splitLVRs.
+ bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// addMBBLiveIns - Add physreg liveins to basic blocks.
+ void addMBBLiveIns(MachineFunction *);
+
+#ifndef NDEBUG
+ // Verify each LiveIntervalUnion.
+ void verify();
+#endif
+
+ // Use this group name for NamedRegionTimer.
+ static const char *TimerGroupName;
+
+public:
+ /// VerifyEnabled - True when -verify-regalloc is given.
+ static bool VerifyEnabled;
+
+private:
+ void seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> >&);
+
+ void spillReg(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+};
+
+} // end namespace llvm
+
+#endif // !defined(LLVM_CODEGEN_REGALLOCBASE)
diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp
new file mode 100644
index 000000000000..045c8db9dadb
--- /dev/null
+++ b/lib/CodeGen/RegAllocBasic.cpp
@@ -0,0 +1,523 @@
+//===-- RegAllocBasic.cpp - basic register allocator ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RABasic function pass, which provides a minimal
+// implementation of the basic register allocator.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "LiveIntervalUnion.h"
+#include "RegAllocBase.h"
+#include "RenderMachineFunction.h"
+#include "Spiller.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/OwningPtr.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#ifndef NDEBUG
+#include "llvm/ADT/SparseBitVector.h"
+#endif
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+#include <cstdlib>
+
+using namespace llvm;
+
+STATISTIC(NumAssigned , "Number of registers assigned");
+STATISTIC(NumUnassigned , "Number of registers unassigned");
+STATISTIC(NumNewQueued , "Number of new live ranges queued");
+
+static RegisterRegAlloc basicRegAlloc("basic", "basic register allocator",
+ createBasicRegisterAllocator);
+
+// Temporary verification option until we can put verification inside
+// MachineVerifier.
+static cl::opt<bool, true>
+VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled),
+ cl::desc("Verify during register allocation"));
+
+const char *RegAllocBase::TimerGroupName = "Register Allocation";
+bool RegAllocBase::VerifyEnabled = false;
+
+namespace {
+/// RABasic provides a minimal implementation of the basic register allocation
+/// algorithm. It prioritizes live virtual registers by spill weight and spills
+/// whenever a register is unavailable. This is not practical in production but
+/// provides a useful baseline both for measuring other allocators and comparing
+/// the speed of the basic algorithm against other styles of allocators.
+class RABasic : public MachineFunctionPass, public RegAllocBase
+{
+ // context
+ MachineFunction *MF;
+ BitVector ReservedRegs;
+
+ // analyses
+ LiveStacks *LS;
+ RenderMachineFunction *RMF;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+
+public:
+ RABasic();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Basic Register Allocator";
+ }
+
+ /// RABasic analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ virtual Spiller &spiller() { return *SpillerInstance; }
+
+ virtual float getPriority(LiveInterval *LI) { return LI->weight; }
+
+ virtual unsigned selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+};
+
+char RABasic::ID = 0;
+
+} // end anonymous namespace
+
+RABasic::RABasic(): MachineFunctionPass(ID) {
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+}
+
+void RABasic::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<SlotIndexes>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequiredID(MachineDominatorsID);
+ AU.addPreservedID(MachineDominatorsID);
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ DEBUG(AU.addRequired<RenderMachineFunction>());
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RABasic::releaseMemory() {
+ SpillerInstance.reset(0);
+ RegAllocBase::releaseMemory();
+}
+
+#ifndef NDEBUG
+// Verify each LiveIntervalUnion.
+void RegAllocBase::verify() {
+ LiveVirtRegBitSet VisitedVRegs;
+ OwningArrayPtr<LiveVirtRegBitSet>
+ unionVRegs(new LiveVirtRegBitSet[PhysReg2LiveUnion.numRegs()]);
+
+ // Verify disjoint unions.
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ DEBUG(PhysReg2LiveUnion[PhysReg].print(dbgs(), TRI));
+ LiveVirtRegBitSet &VRegs = unionVRegs[PhysReg];
+ PhysReg2LiveUnion[PhysReg].verify(VRegs);
+ // Union + intersection test could be done efficiently in one pass, but
+ // don't add a method to SparseBitVector unless we really need it.
+ assert(!VisitedVRegs.intersects(VRegs) && "vreg in multiple unions");
+ VisitedVRegs |= VRegs;
+ }
+
+ // Verify vreg coverage.
+ for (LiveIntervals::iterator liItr = LIS->begin(), liEnd = LIS->end();
+ liItr != liEnd; ++liItr) {
+ unsigned reg = liItr->first;
+ if (TargetRegisterInfo::isPhysicalRegister(reg)) continue;
+ if (!VRM->hasPhys(reg)) continue; // spilled?
+ unsigned PhysReg = VRM->getPhys(reg);
+ if (!unionVRegs[PhysReg].test(reg)) {
+ dbgs() << "LiveVirtReg " << reg << " not in union " <<
+ TRI->getName(PhysReg) << "\n";
+ llvm_unreachable("unallocated live vreg");
+ }
+ }
+ // FIXME: I'm not sure how to verify spilled intervals.
+}
+#endif //!NDEBUG
+
+//===----------------------------------------------------------------------===//
+// RegAllocBase Implementation
+//===----------------------------------------------------------------------===//
+
+// Instantiate a LiveIntervalUnion for each physical register.
+void RegAllocBase::LiveUnionArray::init(LiveIntervalUnion::Allocator &allocator,
+ unsigned NRegs) {
+ NumRegs = NRegs;
+ Array =
+ static_cast<LiveIntervalUnion*>(malloc(sizeof(LiveIntervalUnion)*NRegs));
+ for (unsigned r = 0; r != NRegs; ++r)
+ new(Array + r) LiveIntervalUnion(r, allocator);
+}
+
+void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis) {
+ NamedRegionTimer T("Initialize", TimerGroupName, TimePassesIsEnabled);
+ TRI = &vrm.getTargetRegInfo();
+ MRI = &vrm.getRegInfo();
+ VRM = &vrm;
+ LIS = &lis;
+ PhysReg2LiveUnion.init(UnionAllocator, TRI->getNumRegs());
+ // Cache an interferece query for each physical reg
+ Queries.reset(new LiveIntervalUnion::Query[PhysReg2LiveUnion.numRegs()]);
+}
+
+void RegAllocBase::LiveUnionArray::clear() {
+ if (!Array)
+ return;
+ for (unsigned r = 0; r != NumRegs; ++r)
+ Array[r].~LiveIntervalUnion();
+ free(Array);
+ NumRegs = 0;
+ Array = 0;
+}
+
+void RegAllocBase::releaseMemory() {
+ PhysReg2LiveUnion.clear();
+}
+
+// Visit all the live virtual registers. If they are already assigned to a
+// physical register, unify them with the corresponding LiveIntervalUnion,
+// otherwise push them on the priority queue for later assignment.
+void RegAllocBase::
+seedLiveVirtRegs(std::priority_queue<std::pair<float, unsigned> > &VirtRegQ) {
+ for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); I != E; ++I) {
+ unsigned RegNum = I->first;
+ LiveInterval &VirtReg = *I->second;
+ if (TargetRegisterInfo::isPhysicalRegister(RegNum))
+ PhysReg2LiveUnion[RegNum].unify(VirtReg);
+ else
+ VirtRegQ.push(std::make_pair(getPriority(&VirtReg), RegNum));
+ }
+}
+
+void RegAllocBase::assign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "assigning " << PrintReg(VirtReg.reg, TRI)
+ << " to " << PrintReg(PhysReg, TRI) << '\n');
+ assert(!VRM->hasPhys(VirtReg.reg) && "Duplicate VirtReg assignment");
+ VRM->assignVirt2Phys(VirtReg.reg, PhysReg);
+ PhysReg2LiveUnion[PhysReg].unify(VirtReg);
+ ++NumAssigned;
+}
+
+void RegAllocBase::unassign(LiveInterval &VirtReg, unsigned PhysReg) {
+ DEBUG(dbgs() << "unassigning " << PrintReg(VirtReg.reg, TRI)
+ << " from " << PrintReg(PhysReg, TRI) << '\n');
+ assert(VRM->getPhys(VirtReg.reg) == PhysReg && "Inconsistent unassign");
+ PhysReg2LiveUnion[PhysReg].extract(VirtReg);
+ VRM->clearVirt(VirtReg.reg);
+ ++NumUnassigned;
+}
+
+// Top-level driver to manage the queue of unassigned VirtRegs and call the
+// selectOrSplit implementation.
+void RegAllocBase::allocatePhysRegs() {
+
+ // Push each vreg onto a queue or "precolor" by adding it to a physreg union.
+ std::priority_queue<std::pair<float, unsigned> > VirtRegQ;
+ seedLiveVirtRegs(VirtRegQ);
+
+ // Continue assigning vregs one at a time to available physical registers.
+ while (!VirtRegQ.empty()) {
+ // Pop the highest priority vreg.
+ LiveInterval &VirtReg = LIS->getInterval(VirtRegQ.top().second);
+ VirtRegQ.pop();
+
+ // selectOrSplit requests the allocator to return an available physical
+ // register if possible and populate a list of new live intervals that
+ // result from splitting.
+ DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg.reg)->getName()
+ << ':' << VirtReg << '\n');
+ typedef SmallVector<LiveInterval*, 4> VirtRegVec;
+ VirtRegVec SplitVRegs;
+ unsigned AvailablePhysReg = selectOrSplit(VirtReg, SplitVRegs);
+
+ if (AvailablePhysReg)
+ assign(VirtReg, AvailablePhysReg);
+
+ for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end();
+ I != E; ++I) {
+ LiveInterval* SplitVirtReg = *I;
+ if (SplitVirtReg->empty()) continue;
+ DEBUG(dbgs() << "queuing new interval: " << *SplitVirtReg << "\n");
+ assert(TargetRegisterInfo::isVirtualRegister(SplitVirtReg->reg) &&
+ "expect split value in virtual register");
+ VirtRegQ.push(std::make_pair(getPriority(SplitVirtReg),
+ SplitVirtReg->reg));
+ ++NumNewQueued;
+ }
+ }
+}
+
+// Check if this live virtual register interferes with a physical register. If
+// not, then check for interference on each register that aliases with the
+// physical register. Return the interfering register.
+unsigned RegAllocBase::checkPhysRegInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ if (query(VirtReg, *AliasI).checkInterference())
+ return *AliasI;
+ return 0;
+}
+
+// Helper for spillInteferences() that spills all interfering vregs currently
+// assigned to this physical register.
+void RegAllocBase::spillReg(LiveInterval& VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, PhysReg);
+ assert(Q.seenAllInterferences() && "need collectInterferences()");
+ const SmallVectorImpl<LiveInterval*> &PendingSpills = Q.interferingVRegs();
+
+ for (SmallVectorImpl<LiveInterval*>::const_iterator I = PendingSpills.begin(),
+ E = PendingSpills.end(); I != E; ++I) {
+ LiveInterval &SpilledVReg = **I;
+ DEBUG(dbgs() << "extracting from " <<
+ TRI->getName(PhysReg) << " " << SpilledVReg << '\n');
+
+ // Deallocate the interfering vreg by removing it from the union.
+ // A LiveInterval instance may not be in a union during modification!
+ unassign(SpilledVReg, PhysReg);
+
+ // Spill the extracted interval.
+ spiller().spill(&SpilledVReg, SplitVRegs, PendingSpills);
+ }
+ // After extracting segments, the query's results are invalid. But keep the
+ // contents valid until we're done accessing pendingSpills.
+ Q.clear();
+}
+
+// Spill or split all live virtual registers currently unified under PhysReg
+// that interfere with VirtReg. The newly spilled or split live intervals are
+// returned by appending them to SplitVRegs.
+bool
+RegAllocBase::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Record each interference and determine if all are spillable before mutating
+ // either the union or live intervals.
+ unsigned NumInterferences = 0;
+ // Collect interferences assigned to any alias of the physical register.
+ for (const unsigned *asI = TRI->getOverlaps(PhysReg); *asI; ++asI) {
+ LiveIntervalUnion::Query &QAlias = query(VirtReg, *asI);
+ NumInterferences += QAlias.collectInterferingVRegs();
+ if (QAlias.seenUnspillableVReg()) {
+ return false;
+ }
+ }
+ DEBUG(dbgs() << "spilling " << TRI->getName(PhysReg) <<
+ " interferences with " << VirtReg << "\n");
+ assert(NumInterferences > 0 && "expect interference");
+
+ // Spill each interfering vreg allocated to PhysReg or an alias.
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI)
+ spillReg(VirtReg, *AliasI, SplitVRegs);
+ return true;
+}
+
+// Add newly allocated physical registers to the MBB live in sets.
+void RegAllocBase::addMBBLiveIns(MachineFunction *MF) {
+ NamedRegionTimer T("MBB Live Ins", TimerGroupName, TimePassesIsEnabled);
+ typedef SmallVector<MachineBasicBlock*, 8> MBBVec;
+ MBBVec liveInMBBs;
+ MachineBasicBlock &entryMBB = *MF->begin();
+
+ for (unsigned PhysReg = 0; PhysReg < PhysReg2LiveUnion.numRegs(); ++PhysReg) {
+ LiveIntervalUnion &LiveUnion = PhysReg2LiveUnion[PhysReg];
+ if (LiveUnion.empty())
+ continue;
+ for (LiveIntervalUnion::SegmentIter SI = LiveUnion.begin(); SI.valid();
+ ++SI) {
+
+ // Find the set of basic blocks which this range is live into...
+ liveInMBBs.clear();
+ if (!LIS->findLiveInMBBs(SI.start(), SI.stop(), liveInMBBs)) continue;
+
+ // And add the physreg for this interval to their live-in sets.
+ for (MBBVec::iterator I = liveInMBBs.begin(), E = liveInMBBs.end();
+ I != E; ++I) {
+ MachineBasicBlock *MBB = *I;
+ if (MBB == &entryMBB) continue;
+ if (MBB->isLiveIn(PhysReg)) continue;
+ MBB->addLiveIn(PhysReg);
+ }
+ }
+ }
+}
+
+
+//===----------------------------------------------------------------------===//
+// RABasic Implementation
+//===----------------------------------------------------------------------===//
+
+// Driver for the register assignment and splitting heuristics.
+// Manages iteration over the LiveIntervalUnions.
+//
+// This is a minimal implementation of register assignment and splitting that
+// spills whenever we run out of registers.
+//
+// selectOrSplit can only be called once per live virtual register. We then do a
+// single interference test for each register the correct class until we find an
+// available register. So, the number of interference tests in the worst case is
+// |vregs| * |machineregs|. And since the number of interference tests is
+// minimal, there is no value in caching them outside the scope of
+// selectOrSplit().
+unsigned RABasic::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &SplitVRegs) {
+ // Populate a list of physical register spill candidates.
+ SmallVector<unsigned, 8> PhysRegSpillCands;
+
+ // Check for an available register in this class.
+ const TargetRegisterClass *TRC = MRI->getRegClass(VirtReg.reg);
+
+ for (TargetRegisterClass::iterator I = TRC->allocation_order_begin(*MF),
+ E = TRC->allocation_order_end(*MF);
+ I != E; ++I) {
+
+ unsigned PhysReg = *I;
+ if (ReservedRegs.test(PhysReg)) continue;
+
+ // Check interference and as a side effect, intialize queries for this
+ // VirtReg and its aliases.
+ unsigned interfReg = checkPhysRegInterference(VirtReg, PhysReg);
+ if (interfReg == 0) {
+ // Found an available register.
+ return PhysReg;
+ }
+ LiveInterval *interferingVirtReg =
+ Queries[interfReg].firstInterference().liveUnionPos().value();
+
+ // The current VirtReg must either be spillable, or one of its interferences
+ // must have less spill weight.
+ if (interferingVirtReg->weight < VirtReg.weight ) {
+ PhysRegSpillCands.push_back(PhysReg);
+ }
+ }
+ // Try to spill another interfering reg with less spill weight.
+ for (SmallVectorImpl<unsigned>::iterator PhysRegI = PhysRegSpillCands.begin(),
+ PhysRegE = PhysRegSpillCands.end(); PhysRegI != PhysRegE; ++PhysRegI) {
+
+ if (!spillInterferences(VirtReg, *PhysRegI, SplitVRegs)) continue;
+
+ assert(checkPhysRegInterference(VirtReg, *PhysRegI) == 0 &&
+ "Interference after spill.");
+ // Tell the caller to allocate to this newly freed physical register.
+ return *PhysRegI;
+ }
+ // No other spill candidates were found, so spill the current VirtReg.
+ DEBUG(dbgs() << "spilling: " << VirtReg << '\n');
+ SmallVector<LiveInterval*, 1> pendingSpills;
+
+ spiller().spill(&VirtReg, SplitVRegs, pendingSpills);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RABasic::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** BASIC REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)mf.getFunction())->getName() << '\n');
+
+ MF = &mf;
+ DEBUG(RMF = &getAnalysis<RenderMachineFunction>());
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+
+ ReservedRegs = TRI->getReservedRegs(*MF);
+
+ SpillerInstance.reset(createSpiller(*this, *MF, *VRM));
+
+ allocatePhysRegs();
+
+ addMBBLiveIns(MF);
+
+ // Diagnostic output before rewriting
+ DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *VRM << "\n");
+
+ // optional HTML output
+ DEBUG(RMF->renderMachineFunction("After basic register allocation.", VRM));
+
+ // FIXME: Verification currently must run before VirtRegRewriter. We should
+ // make the rewriter a separate pass and override verifyAnalysis instead. When
+ // that happens, verification naturally falls under VerifyMachineCode.
+#ifndef NDEBUG
+ if (VerifyEnabled) {
+ // Verify accuracy of LiveIntervals. The standard machine code verifier
+ // ensures that each LiveIntervals covers all uses of the virtual reg.
+
+ // FIXME: MachineVerifier is badly broken when using the standard
+ // spiller. Always use -spiller=inline with -verify-regalloc. Even with the
+ // inline spiller, some tests fail to verify because the coalescer does not
+ // always generate verifiable code.
+ MF->verify(this, "In RABasic::verify");
+
+ // Verify that LiveIntervals are partitioned into unions and disjoint within
+ // the unions.
+ verify();
+ }
+#endif // !NDEBUG
+
+ // Run rewriter
+ VRM->rewrite(LIS->getSlotIndexes());
+
+ // The pass output is in VirtRegMap. Release all the transient data.
+ releaseMemory();
+
+ return true;
+}
+
+FunctionPass* llvm::createBasicRegisterAllocator()
+{
+ return new RABasic();
+}
diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp
index fc150d55e226..15036e38b893 100644
--- a/lib/CodeGen/RegAllocFast.cpp
+++ b/lib/CodeGen/RegAllocFast.cpp
@@ -48,7 +48,10 @@ namespace {
public:
static char ID;
RAFast() : MachineFunctionPass(ID), StackSlotForVirtReg(-1),
- isBulkSpilling(false) {}
+ isBulkSpilling(false) {
+ initializePHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
private:
const TargetMachine *TM;
MachineFunction *MF;
@@ -259,8 +262,8 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI,
// instruction, not on the spill.
bool SpillKill = LR.LastUse != MI;
LR.Dirty = false;
- DEBUG(dbgs() << "Spilling %reg" << LRI->first
- << " in " << TRI->getName(LR.PhysReg));
+ DEBUG(dbgs() << "Spilling " << PrintReg(LRI->first, TRI)
+ << " in " << PrintReg(LR.PhysReg, TRI));
const TargetRegisterClass *RC = MRI->getRegClass(LRI->first);
int FI = getStackSpaceFor(LRI->first, RC);
DEBUG(dbgs() << " to stack slot #" << FI << "\n");
@@ -331,7 +334,7 @@ void RAFast::usePhysReg(MachineOperand &MO) {
MO.setIsKill();
return;
default:
- // The physreg was allocated to a virtual register. That means to value we
+ // The physreg was allocated to a virtual register. That means the value we
// wanted has been clobbered.
llvm_unreachable("Instruction uses an allocated register");
}
@@ -458,8 +461,8 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const {
/// register must not be used for anything else when this is called.
///
void RAFast::assignVirtToPhysReg(LiveRegEntry &LRE, unsigned PhysReg) {
- DEBUG(dbgs() << "Assigning %reg" << LRE.first << " to "
- << TRI->getName(PhysReg) << "\n");
+ DEBUG(dbgs() << "Assigning " << PrintReg(LRE.first, TRI) << " to "
+ << PrintReg(PhysReg, TRI) << "\n");
PhysRegState[PhysReg] = LRE.first;
assert(!LRE.second.PhysReg && "Already assigned a physreg");
LRE.second.PhysReg = PhysReg;
@@ -503,8 +506,8 @@ void RAFast::allocVirtReg(MachineInstr *MI, LiveRegEntry &LRE, unsigned Hint) {
return assignVirtToPhysReg(LRE, PhysReg);
}
- DEBUG(dbgs() << "Allocating %reg" << VirtReg << " from " << RC->getName()
- << "\n");
+ DEBUG(dbgs() << "Allocating " << PrintReg(VirtReg) << " from "
+ << RC->getName() << "\n");
unsigned BestReg = 0, BestCost = spillImpossible;
for (TargetRegisterClass::iterator I = AOB; I != AOE; ++I) {
@@ -584,8 +587,8 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum,
allocVirtReg(MI, *LRI, Hint);
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
int FrameIndex = getStackSpaceFor(VirtReg, RC);
- DEBUG(dbgs() << "Reloading %reg" << VirtReg << " into "
- << TRI->getName(LR.PhysReg) << "\n");
+ DEBUG(dbgs() << "Reloading " << PrintReg(VirtReg, TRI) << " into "
+ << PrintReg(LR.PhysReg, TRI) << "\n");
TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FrameIndex, RC, TRI);
++NumLoads;
} else if (LR.Dirty) {
@@ -653,11 +656,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
+ continue;
if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) ||
(MO.getSubReg() && MI->readsVirtualRegister(Reg))) {
if (ThroughRegs.insert(Reg))
- DEBUG(dbgs() << " %reg" << Reg);
+ DEBUG(dbgs() << ' ' << PrintReg(Reg));
}
}
@@ -685,7 +689,7 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
unsigned DefIdx = 0;
if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue;
@@ -731,6 +735,27 @@ void RAFast::handleThroughOperands(MachineInstr *MI,
void RAFast::AllocateBasicBlock() {
DEBUG(dbgs() << "\nAllocating " << *MBB);
+ // FIXME: This should probably be added by instruction selection instead?
+ // If the last instruction in the block is a return, make sure to mark it as
+ // using all of the live-out values in the function. Things marked both call
+ // and return are tail calls; do not do this for them. The tail callee need
+ // not take the same registers as input that it produces as output, and there
+ // are dependencies for its input registers elsewhere.
+ if (!MBB->empty() && MBB->back().getDesc().isReturn() &&
+ !MBB->back().getDesc().isCall()) {
+ MachineInstr *Ret = &MBB->back();
+
+ for (MachineRegisterInfo::liveout_iterator
+ I = MF->getRegInfo().liveout_begin(),
+ E = MF->getRegInfo().liveout_end(); I != E; ++I) {
+ assert(TargetRegisterInfo::isPhysicalRegister(*I) &&
+ "Cannot have a live-out virtual register.");
+
+ // Add live-out registers as implicit uses.
+ Ret->addRegisterKilled(*I, TRI, true);
+ }
+ }
+
PhysRegState.assign(TRI->getNumRegs(), regDisabled);
assert(LiveVirtRegs.empty() && "Mapping not cleared form last block?");
@@ -761,7 +786,7 @@ void RAFast::AllocateBasicBlock() {
dbgs() << "*";
break;
default:
- dbgs() << "=%reg" << PhysRegState[Reg];
+ dbgs() << '=' << PrintReg(PhysRegState[Reg]);
if (LiveVirtRegs[PhysRegState[Reg]].Dirty)
dbgs() << "*";
assert(LiveVirtRegs[PhysRegState[Reg]].PhysReg == Reg &&
@@ -791,16 +816,18 @@ void RAFast::AllocateBasicBlock() {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
LiveDbgValueMap[Reg] = MI;
LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg);
if (LRI != LiveVirtRegs.end())
setPhysReg(MI, i, LRI->second.PhysReg);
else {
int SS = StackSlotForVirtReg[Reg];
- if (SS == -1)
+ if (SS == -1) {
// We can't allocate a physreg for a DebugValue, sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
MO.setReg(0);
+ }
else {
// Modify DBG_VALUE now that the value is in a spill slot.
int64_t Offset = MI->getOperand(1).getImm();
@@ -817,9 +844,11 @@ void RAFast::AllocateBasicBlock() {
MI = NewDV;
ScanDbgValue = true;
break;
- } else
+ } else {
// We can't allocate a physreg for a DebugValue; sorry!
+ DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE");
MO.setReg(0);
+ }
}
}
}
@@ -902,7 +931,7 @@ void RAFast::AllocateBasicBlock() {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg()) continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue;
+ if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue;
if (MO.isUse()) {
LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, CopyDst);
unsigned PhysReg = LRI->second.PhysReg;
@@ -1017,8 +1046,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) {
// initialize the virtual->physical register map to have a 'null'
// mapping for all virtual registers
- unsigned LastVirtReg = MRI->getLastVirtReg();
- StackSlotForVirtReg.grow(LastVirtReg);
+ StackSlotForVirtReg.resize(MRI->getNumVirtRegs());
// Loop over all of the basic blocks, eliminating virtual register references
for (MachineFunction::iterator MBBi = Fn.begin(), MBBe = Fn.end();
diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp
new file mode 100644
index 000000000000..c1372cd038cf
--- /dev/null
+++ b/lib/CodeGen/RegAllocGreedy.cpp
@@ -0,0 +1,1285 @@
+//===-- RegAllocGreedy.cpp - greedy register allocator --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the RAGreedy function pass for register allocation in
+// optimized builds.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "regalloc"
+#include "AllocationOrder.h"
+#include "LiveIntervalUnion.h"
+#include "LiveRangeEdit.h"
+#include "RegAllocBase.h"
+#include "Spiller.h"
+#include "SpillPlacement.h"
+#include "SplitKit.h"
+#include "VirtRegMap.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Function.h"
+#include "llvm/PassAnalysisSupport.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineLoopRanges.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/CodeGen/RegisterCoalescer.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Timer.h"
+
+using namespace llvm;
+
+STATISTIC(NumGlobalSplits, "Number of split global live ranges");
+STATISTIC(NumLocalSplits, "Number of split local live ranges");
+STATISTIC(NumReassigned, "Number of interferences reassigned");
+STATISTIC(NumEvicted, "Number of interferences evicted");
+
+static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator",
+ createGreedyRegisterAllocator);
+
+namespace {
+class RAGreedy : public MachineFunctionPass, public RegAllocBase {
+ // context
+ MachineFunction *MF;
+ BitVector ReservedRegs;
+
+ // analyses
+ SlotIndexes *Indexes;
+ LiveStacks *LS;
+ MachineDominatorTree *DomTree;
+ MachineLoopInfo *Loops;
+ MachineLoopRanges *LoopRanges;
+ EdgeBundles *Bundles;
+ SpillPlacement *SpillPlacer;
+
+ // state
+ std::auto_ptr<Spiller> SpillerInstance;
+ std::auto_ptr<SplitAnalysis> SA;
+
+ // splitting state.
+
+ /// All basic blocks where the current register is live.
+ SmallVector<SpillPlacement::BlockConstraint, 8> SpillConstraints;
+
+ /// For every instruction in SA->UseSlots, store the previous non-copy
+ /// instruction.
+ SmallVector<SlotIndex, 8> PrevSlot;
+
+public:
+ RAGreedy();
+
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "Greedy Register Allocator";
+ }
+
+ /// RAGreedy analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const;
+
+ virtual void releaseMemory();
+
+ virtual Spiller &spiller() { return *SpillerInstance; }
+
+ virtual float getPriority(LiveInterval *LI);
+
+ virtual unsigned selectOrSplit(LiveInterval&,
+ SmallVectorImpl<LiveInterval*>&);
+
+ /// Perform register allocation.
+ virtual bool runOnMachineFunction(MachineFunction &mf);
+
+ static char ID;
+
+private:
+ bool checkUncachedInterference(LiveInterval&, unsigned);
+ LiveInterval *getSingleInterference(LiveInterval&, unsigned);
+ bool reassignVReg(LiveInterval &InterferingVReg, unsigned OldPhysReg);
+ float calcInterferenceWeight(LiveInterval&, unsigned);
+ float calcInterferenceInfo(LiveInterval&, unsigned);
+ float calcGlobalSplitCost(const BitVector&);
+ void splitAroundRegion(LiveInterval&, unsigned, const BitVector&,
+ SmallVectorImpl<LiveInterval*>&);
+ void calcGapWeights(unsigned, SmallVectorImpl<float>&);
+ SlotIndex getPrevMappedIndex(const MachineInstr*);
+ void calcPrevSlots();
+ unsigned nextSplitPoint(unsigned);
+
+ unsigned tryReassignOrEvict(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryRegionSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned tryLocalSplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned trySplit(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+ unsigned trySpillInterferences(LiveInterval&, AllocationOrder&,
+ SmallVectorImpl<LiveInterval*>&);
+};
+} // end anonymous namespace
+
+char RAGreedy::ID = 0;
+
+FunctionPass* llvm::createGreedyRegisterAllocator() {
+ return new RAGreedy();
+}
+
+RAGreedy::RAGreedy(): MachineFunctionPass(ID) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopRangesPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeEdgeBundlesPass(*PassRegistry::getPassRegistry());
+ initializeSpillPlacementPass(*PassRegistry::getPassRegistry());
+}
+
+void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
+ AU.addRequired<LiveIntervals>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ if (StrongPHIElim)
+ AU.addRequiredID(StrongPHIEliminationID);
+ AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequired<CalculateSpillWeights>();
+ AU.addRequired<LiveStacks>();
+ AU.addPreserved<LiveStacks>();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ AU.addRequired<MachineLoopInfo>();
+ AU.addPreserved<MachineLoopInfo>();
+ AU.addRequired<MachineLoopRanges>();
+ AU.addPreserved<MachineLoopRanges>();
+ AU.addRequired<VirtRegMap>();
+ AU.addPreserved<VirtRegMap>();
+ AU.addRequired<EdgeBundles>();
+ AU.addRequired<SpillPlacement>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+void RAGreedy::releaseMemory() {
+ SpillerInstance.reset(0);
+ RegAllocBase::releaseMemory();
+}
+
+float RAGreedy::getPriority(LiveInterval *LI) {
+ float Priority = LI->weight;
+
+ // Prioritize hinted registers so they are allocated first.
+ std::pair<unsigned, unsigned> Hint;
+ if (Hint.first || Hint.second) {
+ // The hint can be target specific, a virtual register, or a physreg.
+ Priority *= 2;
+
+ // Prefer physreg hints above anything else.
+ if (Hint.first == 0 && TargetRegisterInfo::isPhysicalRegister(Hint.second))
+ Priority *= 2;
+ }
+ return Priority;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Register Reassignment
+//===----------------------------------------------------------------------===//
+
+// Check interference without using the cache.
+bool RAGreedy::checkUncachedInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ LiveIntervalUnion::Query subQ(&VirtReg, &PhysReg2LiveUnion[*AliasI]);
+ if (subQ.checkInterference())
+ return true;
+ }
+ return false;
+}
+
+/// getSingleInterference - Return the single interfering virtual register
+/// assigned to PhysReg. Return 0 if more than one virtual register is
+/// interfering.
+LiveInterval *RAGreedy::getSingleInterference(LiveInterval &VirtReg,
+ unsigned PhysReg) {
+ // Check physreg and aliases.
+ LiveInterval *Interference = 0;
+ for (const unsigned *AliasI = TRI->getOverlaps(PhysReg); *AliasI; ++AliasI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AliasI);
+ if (Q.checkInterference()) {
+ if (Interference)
+ return 0;
+ Q.collectInterferingVRegs(1);
+ if (!Q.seenAllInterferences())
+ return 0;
+ Interference = Q.interferingVRegs().front();
+ }
+ }
+ return Interference;
+}
+
+// Attempt to reassign this virtual register to a different physical register.
+//
+// FIXME: we are not yet caching these "second-level" interferences discovered
+// in the sub-queries. These interferences can change with each call to
+// selectOrSplit. However, we could implement a "may-interfere" cache that
+// could be conservatively dirtied when we reassign or split.
+//
+// FIXME: This may result in a lot of alias queries. We could summarize alias
+// live intervals in their parent register's live union, but it's messy.
+bool RAGreedy::reassignVReg(LiveInterval &InterferingVReg,
+ unsigned WantedPhysReg) {
+ assert(TargetRegisterInfo::isVirtualRegister(InterferingVReg.reg) &&
+ "Can only reassign virtual registers");
+ assert(TRI->regsOverlap(WantedPhysReg, VRM->getPhys(InterferingVReg.reg)) &&
+ "inconsistent phys reg assigment");
+
+ AllocationOrder Order(InterferingVReg.reg, *VRM, ReservedRegs);
+ while (unsigned PhysReg = Order.next()) {
+ // Don't reassign to a WantedPhysReg alias.
+ if (TRI->regsOverlap(PhysReg, WantedPhysReg))
+ continue;
+
+ if (checkUncachedInterference(InterferingVReg, PhysReg))
+ continue;
+
+ // Reassign the interfering virtual reg to this physical reg.
+ unsigned OldAssign = VRM->getPhys(InterferingVReg.reg);
+ DEBUG(dbgs() << "reassigning: " << InterferingVReg << " from " <<
+ TRI->getName(OldAssign) << " to " << TRI->getName(PhysReg) << '\n');
+ unassign(InterferingVReg, OldAssign);
+ assign(InterferingVReg, PhysReg);
+ ++NumReassigned;
+ return true;
+ }
+ return false;
+}
+
+/// tryReassignOrEvict - Try to reassign a single interferences to a different
+/// physreg, or evict a single interference with a lower spill weight.
+/// @param VirtReg Currently unassigned virtual register.
+/// @param Order Physregs to try.
+/// @return Physreg to assign VirtReg, or 0.
+unsigned RAGreedy::tryReassignOrEvict(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs){
+ NamedRegionTimer T("Reassign", TimerGroupName, TimePassesIsEnabled);
+
+ // Keep track of the lightest single interference seen so far.
+ float BestWeight = VirtReg.weight;
+ LiveInterval *BestVirt = 0;
+ unsigned BestPhys = 0;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ LiveInterval *InterferingVReg = getSingleInterference(VirtReg, PhysReg);
+ if (!InterferingVReg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(InterferingVReg->reg))
+ continue;
+ if (reassignVReg(*InterferingVReg, PhysReg))
+ return PhysReg;
+
+ // Cannot reassign, is this an eviction candidate?
+ if (InterferingVReg->weight < BestWeight) {
+ BestVirt = InterferingVReg;
+ BestPhys = PhysReg;
+ BestWeight = InterferingVReg->weight;
+ }
+ }
+
+ // Nothing reassigned, can we evict a lighter single interference?
+ if (BestVirt) {
+ DEBUG(dbgs() << "evicting lighter " << *BestVirt << '\n');
+ unassign(*BestVirt, VRM->getPhys(BestVirt->reg));
+ ++NumEvicted;
+ NewVRegs.push_back(BestVirt);
+ return BestPhys;
+ }
+
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Region Splitting
+//===----------------------------------------------------------------------===//
+
+/// calcInterferenceInfo - Compute per-block outgoing and ingoing constraints
+/// when considering interference from PhysReg. Also compute an optimistic local
+/// cost of this interference pattern.
+///
+/// The final cost of a split is the local cost + global cost of preferences
+/// broken by SpillPlacement.
+///
+float RAGreedy::calcInterferenceInfo(LiveInterval &VirtReg, unsigned PhysReg) {
+ // Reset interference dependent info.
+ SpillConstraints.resize(SA->LiveBlocks.size());
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ BC.Number = BI.MBB->getNumber();
+ BC.Entry = (BI.Uses && BI.LiveIn) ?
+ SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BC.Exit = (BI.Uses && BI.LiveOut) ?
+ SpillPlacement::PrefReg : SpillPlacement::DontCare;
+ BI.OverlapEntry = BI.OverlapExit = false;
+ }
+
+ // Add interference info from each PhysReg alias.
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ if (!query(VirtReg, *AI).checkInterference())
+ continue;
+ LiveIntervalUnion::SegmentIter IntI =
+ PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
+ if (!IntI.valid())
+ continue;
+
+ // Determine which blocks have interference live in or after the last split
+ // point.
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+ // Skip interference-free blocks.
+ if (IntI.start() >= Stop)
+ continue;
+
+ // Is the interference live-in?
+ if (BI.LiveIn) {
+ IntI.advanceTo(Start);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() <= Start)
+ BC.Entry = SpillPlacement::MustSpill;
+ }
+
+ // Is the interference overlapping the last split point?
+ if (BI.LiveOut) {
+ if (IntI.stop() < BI.LastSplitPoint)
+ IntI.advanceTo(BI.LastSplitPoint.getPrevSlot());
+ if (!IntI.valid())
+ break;
+ if (IntI.start() < Stop)
+ BC.Exit = SpillPlacement::MustSpill;
+ }
+ }
+
+ // Rewind iterator and check other interferences.
+ IntI.find(VirtReg.beginIndex());
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+ // Skip interference-free blocks.
+ if (IntI.start() >= Stop)
+ continue;
+
+ // Handle transparent blocks with interference separately.
+ // Transparent blocks never incur any fixed cost.
+ if (BI.LiveThrough && !BI.Uses) {
+ IntI.advanceTo(Start);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() >= Stop)
+ continue;
+
+ if (BC.Entry != SpillPlacement::MustSpill)
+ BC.Entry = SpillPlacement::PrefSpill;
+ if (BC.Exit != SpillPlacement::MustSpill)
+ BC.Exit = SpillPlacement::PrefSpill;
+ continue;
+ }
+
+ // Now we only have blocks with uses left.
+ // Check if the interference overlaps the uses.
+ assert(BI.Uses && "Non-transparent block without any uses");
+
+ // Check interference on entry.
+ if (BI.LiveIn && BC.Entry != SpillPlacement::MustSpill) {
+ IntI.advanceTo(Start);
+ if (!IntI.valid())
+ break;
+ // Not live in, but before the first use.
+ if (IntI.start() < BI.FirstUse)
+ BC.Entry = SpillPlacement::PrefSpill;
+ }
+
+ // Does interference overlap the uses in the entry segment
+ // [FirstUse;Kill)?
+ if (BI.LiveIn && !BI.OverlapEntry) {
+ IntI.advanceTo(BI.FirstUse);
+ if (!IntI.valid())
+ break;
+ // A live-through interval has no kill.
+ // Check [FirstUse;LastUse) instead.
+ if (IntI.start() < (BI.LiveThrough ? BI.LastUse : BI.Kill))
+ BI.OverlapEntry = true;
+ }
+
+ // Does interference overlap the uses in the exit segment [Def;LastUse)?
+ if (BI.LiveOut && !BI.LiveThrough && !BI.OverlapExit) {
+ IntI.advanceTo(BI.Def);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() < BI.LastUse)
+ BI.OverlapExit = true;
+ }
+
+ // Check interference on exit.
+ if (BI.LiveOut && BC.Exit != SpillPlacement::MustSpill) {
+ // Check interference between LastUse and Stop.
+ if (BC.Exit != SpillPlacement::PrefSpill) {
+ IntI.advanceTo(BI.LastUse);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() < Stop)
+ BC.Exit = SpillPlacement::PrefSpill;
+ }
+ }
+ }
+ }
+
+ // Accumulate a local cost of this interference pattern.
+ float LocalCost = 0;
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ if (!BI.Uses)
+ continue;
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ unsigned Inserts = 0;
+
+ // Do we need spill code for the entry segment?
+ if (BI.LiveIn)
+ Inserts += BI.OverlapEntry || BC.Entry != SpillPlacement::PrefReg;
+
+ // For the exit segment?
+ if (BI.LiveOut)
+ Inserts += BI.OverlapExit || BC.Exit != SpillPlacement::PrefReg;
+
+ // The local cost of spill code in this block is the block frequency times
+ // the number of spill instructions inserted.
+ if (Inserts)
+ LocalCost += Inserts * SpillPlacer->getBlockFrequency(BI.MBB);
+ }
+ DEBUG(dbgs() << "Local cost of " << PrintReg(PhysReg, TRI) << " = "
+ << LocalCost << '\n');
+ return LocalCost;
+}
+
+/// calcGlobalSplitCost - Return the global split cost of following the split
+/// pattern in LiveBundles. This cost should be added to the local cost of the
+/// interference pattern in SpillConstraints.
+///
+float RAGreedy::calcGlobalSplitCost(const BitVector &LiveBundles) {
+ float GlobalCost = 0;
+ for (unsigned i = 0, e = SpillConstraints.size(); i != e; ++i) {
+ SpillPlacement::BlockConstraint &BC = SpillConstraints[i];
+ unsigned Inserts = 0;
+ // Broken entry preference?
+ Inserts += LiveBundles[Bundles->getBundle(BC.Number, 0)] !=
+ (BC.Entry == SpillPlacement::PrefReg);
+ // Broken exit preference?
+ Inserts += LiveBundles[Bundles->getBundle(BC.Number, 1)] !=
+ (BC.Exit == SpillPlacement::PrefReg);
+ if (Inserts)
+ GlobalCost +=
+ Inserts * SpillPlacer->getBlockFrequency(SA->LiveBlocks[i].MBB);
+ }
+ DEBUG(dbgs() << "Global cost = " << GlobalCost << '\n');
+ return GlobalCost;
+}
+
+/// splitAroundRegion - Split VirtReg around the region determined by
+/// LiveBundles. Make an effort to avoid interference from PhysReg.
+///
+/// The 'register' interval is going to contain as many uses as possible while
+/// avoiding interference. The 'stack' interval is the complement constructed by
+/// SplitEditor. It will contain the rest.
+///
+void RAGreedy::splitAroundRegion(LiveInterval &VirtReg, unsigned PhysReg,
+ const BitVector &LiveBundles,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ DEBUG({
+ dbgs() << "Splitting around region for " << PrintReg(PhysReg, TRI)
+ << " with bundles";
+ for (int i = LiveBundles.find_first(); i>=0; i = LiveBundles.find_next(i))
+ dbgs() << " EB#" << i;
+ dbgs() << ".\n";
+ });
+
+ // First compute interference ranges in the live blocks.
+ typedef std::pair<SlotIndex, SlotIndex> IndexPair;
+ SmallVector<IndexPair, 8> InterferenceRanges;
+ InterferenceRanges.resize(SA->LiveBlocks.size());
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ if (!query(VirtReg, *AI).checkInterference())
+ continue;
+ LiveIntervalUnion::SegmentIter IntI =
+ PhysReg2LiveUnion[*AI].find(VirtReg.beginIndex());
+ if (!IntI.valid())
+ continue;
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ IndexPair &IP = InterferenceRanges[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+ // Skip interference-free blocks.
+ if (IntI.start() >= Stop)
+ continue;
+
+ // First interference in block.
+ if (BI.LiveIn) {
+ IntI.advanceTo(Start);
+ if (!IntI.valid())
+ break;
+ if (IntI.start() >= Stop)
+ continue;
+ if (!IP.first.isValid() || IntI.start() < IP.first)
+ IP.first = IntI.start();
+ }
+
+ // Last interference in block.
+ if (BI.LiveOut) {
+ IntI.advanceTo(Stop);
+ if (!IntI.valid() || IntI.start() >= Stop)
+ --IntI;
+ if (IntI.stop() <= Start)
+ continue;
+ if (!IP.second.isValid() || IntI.stop() > IP.second)
+ IP.second = IntI.stop();
+ }
+ }
+ }
+
+ SmallVector<LiveInterval*, 4> SpillRegs;
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+ SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+
+ // Create the main cross-block interval.
+ SE.openIntv();
+
+ // First add all defs that are live out of a block.
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+ // Should the register be live out?
+ if (!BI.LiveOut || !RegOut)
+ continue;
+
+ IndexPair &IP = InterferenceRanges[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " -> EB#"
+ << Bundles->getBundle(BI.MBB->getNumber(), 1)
+ << " intf [" << IP.first << ';' << IP.second << ')');
+
+ // The interference interval should either be invalid or overlap MBB.
+ assert((!IP.first.isValid() || IP.first < Stop) && "Bad interference");
+ assert((!IP.second.isValid() || IP.second > Start) && "Bad interference");
+
+ // Check interference leaving the block.
+ if (!IP.second.isValid()) {
+ // Block is interference-free.
+ DEBUG(dbgs() << ", no interference");
+ if (!BI.Uses) {
+ assert(BI.LiveThrough && "No uses, but not live through block?");
+ // Block is live-through without interference.
+ DEBUG(dbgs() << ", no uses"
+ << (RegIn ? ", live-through.\n" : ", stack in.\n"));
+ if (!RegIn)
+ SE.enterIntvAtEnd(*BI.MBB);
+ continue;
+ }
+ if (!BI.LiveThrough) {
+ DEBUG(dbgs() << ", not live-through.\n");
+ SE.useIntv(SE.enterIntvBefore(BI.Def), Stop);
+ continue;
+ }
+ if (!RegIn) {
+ // Block is live-through, but entry bundle is on the stack.
+ // Reload just before the first use.
+ DEBUG(dbgs() << ", not live-in, enter before first use.\n");
+ SE.useIntv(SE.enterIntvBefore(BI.FirstUse), Stop);
+ continue;
+ }
+ DEBUG(dbgs() << ", live-through.\n");
+ continue;
+ }
+
+ // Block has interference.
+ DEBUG(dbgs() << ", interference to " << IP.second);
+
+ if (!BI.LiveThrough && IP.second <= BI.Def) {
+ // The interference doesn't reach the outgoing segment.
+ DEBUG(dbgs() << " doesn't affect def from " << BI.Def << '\n');
+ SE.useIntv(BI.Def, Stop);
+ continue;
+ }
+
+
+ if (!BI.Uses) {
+ // No uses in block, avoid interference by reloading as late as possible.
+ DEBUG(dbgs() << ", no uses.\n");
+ SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
+ assert(SegStart >= IP.second && "Couldn't avoid interference");
+ continue;
+ }
+
+ if (IP.second.getBoundaryIndex() < BI.LastUse) {
+ // There are interference-free uses at the end of the block.
+ // Find the first use that can get the live-out register.
+ SmallVectorImpl<SlotIndex>::const_iterator UI =
+ std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+ IP.second.getBoundaryIndex());
+ assert(UI != SA->UseSlots.end() && "Couldn't find last use");
+ SlotIndex Use = *UI;
+ assert(Use <= BI.LastUse && "Couldn't find last use");
+ // Only attempt a split befroe the last split point.
+ if (Use.getBaseIndex() <= BI.LastSplitPoint) {
+ DEBUG(dbgs() << ", free use at " << Use << ".\n");
+ SlotIndex SegStart = SE.enterIntvBefore(Use);
+ assert(SegStart >= IP.second && "Couldn't avoid interference");
+ assert(SegStart < BI.LastSplitPoint && "Impossible split point");
+ SE.useIntv(SegStart, Stop);
+ continue;
+ }
+ }
+
+ // Interference is after the last use.
+ DEBUG(dbgs() << " after last use.\n");
+ SlotIndex SegStart = SE.enterIntvAtEnd(*BI.MBB);
+ assert(SegStart >= IP.second && "Couldn't avoid interference");
+ }
+
+ // Now all defs leading to live bundles are handled, do everything else.
+ for (unsigned i = 0, e = SA->LiveBlocks.size(); i != e; ++i) {
+ SplitAnalysis::BlockInfo &BI = SA->LiveBlocks[i];
+ bool RegIn = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 0)];
+ bool RegOut = LiveBundles[Bundles->getBundle(BI.MBB->getNumber(), 1)];
+
+ // Is the register live-in?
+ if (!BI.LiveIn || !RegIn)
+ continue;
+
+ // We have an incoming register. Check for interference.
+ IndexPair &IP = InterferenceRanges[i];
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = Indexes->getMBBRange(BI.MBB);
+
+ DEBUG(dbgs() << "EB#" << Bundles->getBundle(BI.MBB->getNumber(), 0)
+ << " -> BB#" << BI.MBB->getNumber());
+
+ // Check interference entering the block.
+ if (!IP.first.isValid()) {
+ // Block is interference-free.
+ DEBUG(dbgs() << ", no interference");
+ if (!BI.Uses) {
+ assert(BI.LiveThrough && "No uses, but not live through block?");
+ // Block is live-through without interference.
+ if (RegOut) {
+ DEBUG(dbgs() << ", no uses, live-through.\n");
+ SE.useIntv(Start, Stop);
+ } else {
+ DEBUG(dbgs() << ", no uses, stack-out.\n");
+ SE.leaveIntvAtTop(*BI.MBB);
+ }
+ continue;
+ }
+ if (!BI.LiveThrough) {
+ DEBUG(dbgs() << ", killed in block.\n");
+ SE.useIntv(Start, SE.leaveIntvAfter(BI.Kill));
+ continue;
+ }
+ if (!RegOut) {
+ // Block is live-through, but exit bundle is on the stack.
+ // Spill immediately after the last use.
+ if (BI.LastUse < BI.LastSplitPoint) {
+ DEBUG(dbgs() << ", uses, stack-out.\n");
+ SE.useIntv(Start, SE.leaveIntvAfter(BI.LastUse));
+ continue;
+ }
+ // The last use is after the last split point, it is probably an
+ // indirect jump.
+ DEBUG(dbgs() << ", uses at " << BI.LastUse << " after split point "
+ << BI.LastSplitPoint << ", stack-out.\n");
+ SlotIndex SegEnd = SE.leaveIntvBefore(BI.LastSplitPoint);
+ SE.useIntv(Start, SegEnd);
+ // Run a double interval from the split to the last use.
+ // This makes it possible to spill the complement without affecting the
+ // indirect branch.
+ SE.overlapIntv(SegEnd, BI.LastUse);
+ continue;
+ }
+ // Register is live-through.
+ DEBUG(dbgs() << ", uses, live-through.\n");
+ SE.useIntv(Start, Stop);
+ continue;
+ }
+
+ // Block has interference.
+ DEBUG(dbgs() << ", interference from " << IP.first);
+
+ if (!BI.LiveThrough && IP.first >= BI.Kill) {
+ // The interference doesn't reach the outgoing segment.
+ DEBUG(dbgs() << " doesn't affect kill at " << BI.Kill << '\n');
+ SE.useIntv(Start, BI.Kill);
+ continue;
+ }
+
+ if (!BI.Uses) {
+ // No uses in block, avoid interference by spilling as soon as possible.
+ DEBUG(dbgs() << ", no uses.\n");
+ SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
+ assert(SegEnd <= IP.first && "Couldn't avoid interference");
+ continue;
+ }
+ if (IP.first.getBaseIndex() > BI.FirstUse) {
+ // There are interference-free uses at the beginning of the block.
+ // Find the last use that can get the register.
+ SmallVectorImpl<SlotIndex>::const_iterator UI =
+ std::lower_bound(SA->UseSlots.begin(), SA->UseSlots.end(),
+ IP.first.getBaseIndex());
+ assert(UI != SA->UseSlots.begin() && "Couldn't find first use");
+ SlotIndex Use = (--UI)->getBoundaryIndex();
+ DEBUG(dbgs() << ", free use at " << *UI << ".\n");
+ SlotIndex SegEnd = SE.leaveIntvAfter(Use);
+ assert(SegEnd <= IP.first && "Couldn't avoid interference");
+ SE.useIntv(Start, SegEnd);
+ continue;
+ }
+
+ // Interference is before the first use.
+ DEBUG(dbgs() << " before first use.\n");
+ SlotIndex SegEnd = SE.leaveIntvAtTop(*BI.MBB);
+ assert(SegEnd <= IP.first && "Couldn't avoid interference");
+ }
+
+ SE.closeIntv();
+
+ // FIXME: Should we be more aggressive about splitting the stack region into
+ // per-block segments? The current approach allows the stack region to
+ // separate into connected components. Some components may be allocatable.
+ SE.finish();
+ ++NumGlobalSplits;
+
+ if (VerifyEnabled) {
+ MF->verify(this, "After splitting live range around region");
+
+#ifndef NDEBUG
+ // Make sure that at least one of the new intervals can allocate to PhysReg.
+ // That was the whole point of splitting the live range.
+ bool found = false;
+ for (LiveRangeEdit::iterator I = LREdit.begin(), E = LREdit.end(); I != E;
+ ++I)
+ if (!checkUncachedInterference(**I, PhysReg)) {
+ found = true;
+ break;
+ }
+ assert(found && "No allocatable intervals after pointless splitting");
+#endif
+ }
+}
+
+unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ BitVector LiveBundles, BestBundles;
+ float BestCost = 0;
+ unsigned BestReg = 0;
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ float Cost = calcInterferenceInfo(VirtReg, PhysReg);
+ if (BestReg && Cost >= BestCost)
+ continue;
+
+ SpillPlacer->placeSpills(SpillConstraints, LiveBundles);
+ // No live bundles, defer to splitSingleBlocks().
+ if (!LiveBundles.any())
+ continue;
+
+ Cost += calcGlobalSplitCost(LiveBundles);
+ if (!BestReg || Cost < BestCost) {
+ BestReg = PhysReg;
+ BestCost = Cost;
+ BestBundles.swap(LiveBundles);
+ }
+ }
+
+ if (!BestReg)
+ return 0;
+
+ splitAroundRegion(VirtReg, BestReg, BestBundles, NewVRegs);
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Local Splitting
+//===----------------------------------------------------------------------===//
+
+
+/// calcGapWeights - Compute the maximum spill weight that needs to be evicted
+/// in order to use PhysReg between two entries in SA->UseSlots.
+///
+/// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1].
+///
+void RAGreedy::calcGapWeights(unsigned PhysReg,
+ SmallVectorImpl<float> &GapWeight) {
+ assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ const unsigned NumGaps = Uses.size()-1;
+
+ // Start and end points for the interference check.
+ SlotIndex StartIdx = BI.LiveIn ? BI.FirstUse.getBaseIndex() : BI.FirstUse;
+ SlotIndex StopIdx = BI.LiveOut ? BI.LastUse.getBoundaryIndex() : BI.LastUse;
+
+ GapWeight.assign(NumGaps, 0.0f);
+
+ // Add interference from each overlapping register.
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ if (!query(const_cast<LiveInterval&>(SA->getParent()), *AI)
+ .checkInterference())
+ continue;
+
+ // We know that VirtReg is a continuous interval from FirstUse to LastUse,
+ // so we don't need InterferenceQuery.
+ //
+ // Interference that overlaps an instruction is counted in both gaps
+ // surrounding the instruction. The exception is interference before
+ // StartIdx and after StopIdx.
+ //
+ LiveIntervalUnion::SegmentIter IntI = PhysReg2LiveUnion[*AI].find(StartIdx);
+ for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) {
+ // Skip the gaps before IntI.
+ while (Uses[Gap+1].getBoundaryIndex() < IntI.start())
+ if (++Gap == NumGaps)
+ break;
+ if (Gap == NumGaps)
+ break;
+
+ // Update the gaps covered by IntI.
+ const float weight = IntI.value()->weight;
+ for (; Gap != NumGaps; ++Gap) {
+ GapWeight[Gap] = std::max(GapWeight[Gap], weight);
+ if (Uses[Gap+1].getBaseIndex() >= IntI.stop())
+ break;
+ }
+ if (Gap == NumGaps)
+ break;
+ }
+ }
+}
+
+/// getPrevMappedIndex - Return the slot index of the last non-copy instruction
+/// before MI that has a slot index. If MI is the first mapped instruction in
+/// its block, return the block start index instead.
+///
+SlotIndex RAGreedy::getPrevMappedIndex(const MachineInstr *MI) {
+ assert(MI && "Missing MachineInstr");
+ const MachineBasicBlock *MBB = MI->getParent();
+ MachineBasicBlock::const_iterator B = MBB->begin(), I = MI;
+ while (I != B)
+ if (!(--I)->isDebugValue() && !I->isCopy())
+ return Indexes->getInstructionIndex(I);
+ return Indexes->getMBBStartIdx(MBB);
+}
+
+/// calcPrevSlots - Fill in the PrevSlot array with the index of the previous
+/// real non-copy instruction for each instruction in SA->UseSlots.
+///
+void RAGreedy::calcPrevSlots() {
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ PrevSlot.clear();
+ PrevSlot.reserve(Uses.size());
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
+ const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i]);
+ PrevSlot.push_back(getPrevMappedIndex(MI).getDefIndex());
+ }
+}
+
+/// nextSplitPoint - Find the next index into SA->UseSlots > i such that it may
+/// be beneficial to split before UseSlots[i].
+///
+/// 0 is always a valid split point
+unsigned RAGreedy::nextSplitPoint(unsigned i) {
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ const unsigned Size = Uses.size();
+ assert(i != Size && "No split points after the end");
+ // Allow split before i when Uses[i] is not adjacent to the previous use.
+ while (++i != Size && PrevSlot[i].getBaseIndex() <= Uses[i-1].getBaseIndex())
+ ;
+ return i;
+}
+
+/// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only
+/// basic block.
+///
+unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ assert(SA->LiveBlocks.size() == 1 && "Not a local interval");
+ const SplitAnalysis::BlockInfo &BI = SA->LiveBlocks.front();
+
+ // Note that it is possible to have an interval that is live-in or live-out
+ // while only covering a single block - A phi-def can use undef values from
+ // predecessors, and the block could be a single-block loop.
+ // We don't bother doing anything clever about such a case, we simply assume
+ // that the interval is continuous from FirstUse to LastUse. We should make
+ // sure that we don't do anything illegal to such an interval, though.
+
+ const SmallVectorImpl<SlotIndex> &Uses = SA->UseSlots;
+ if (Uses.size() <= 2)
+ return 0;
+ const unsigned NumGaps = Uses.size()-1;
+
+ DEBUG({
+ dbgs() << "tryLocalSplit: ";
+ for (unsigned i = 0, e = Uses.size(); i != e; ++i)
+ dbgs() << ' ' << SA->UseSlots[i];
+ dbgs() << '\n';
+ });
+
+ // For every use, find the previous mapped non-copy instruction.
+ // We use this to detect valid split points, and to estimate new interval
+ // sizes.
+ calcPrevSlots();
+
+ unsigned BestBefore = NumGaps;
+ unsigned BestAfter = 0;
+ float BestDiff = 0;
+
+ const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB);
+ SmallVector<float, 8> GapWeight;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ // Keep track of the largest spill weight that would need to be evicted in
+ // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1].
+ calcGapWeights(PhysReg, GapWeight);
+
+ // Try to find the best sequence of gaps to close.
+ // The new spill weight must be larger than any gap interference.
+
+ // We will split before Uses[SplitBefore] and after Uses[SplitAfter].
+ unsigned SplitBefore = 0, SplitAfter = nextSplitPoint(1) - 1;
+
+ // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]).
+ // It is the spill weight that needs to be evicted.
+ float MaxGap = GapWeight[0];
+ for (unsigned i = 1; i != SplitAfter; ++i)
+ MaxGap = std::max(MaxGap, GapWeight[i]);
+
+ for (;;) {
+ // Live before/after split?
+ const bool LiveBefore = SplitBefore != 0 || BI.LiveIn;
+ const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut;
+
+ DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' '
+ << Uses[SplitBefore] << '-' << Uses[SplitAfter]
+ << " i=" << MaxGap);
+
+ // Stop before the interval gets so big we wouldn't be making progress.
+ if (!LiveBefore && !LiveAfter) {
+ DEBUG(dbgs() << " all\n");
+ break;
+ }
+ // Should the interval be extended or shrunk?
+ bool Shrink = true;
+ if (MaxGap < HUGE_VALF) {
+ // Estimate the new spill weight.
+ //
+ // Each instruction reads and writes the register, except the first
+ // instr doesn't read when !FirstLive, and the last instr doesn't write
+ // when !LastLive.
+ //
+ // We will be inserting copies before and after, so the total number of
+ // reads and writes is 2 * EstUses.
+ //
+ const unsigned EstUses = 2*(SplitAfter - SplitBefore) +
+ 2*(LiveBefore + LiveAfter);
+
+ // Try to guess the size of the new interval. This should be trivial,
+ // but the slot index of an inserted copy can be a lot smaller than the
+ // instruction it is inserted before if there are many dead indexes
+ // between them.
+ //
+ // We measure the distance from the instruction before SplitBefore to
+ // get a conservative estimate.
+ //
+ // The final distance can still be different if inserting copies
+ // triggers a slot index renumbering.
+ //
+ const float EstWeight = normalizeSpillWeight(blockFreq * EstUses,
+ PrevSlot[SplitBefore].distance(Uses[SplitAfter]));
+ // Would this split be possible to allocate?
+ // Never allocate all gaps, we wouldn't be making progress.
+ float Diff = EstWeight - MaxGap;
+ DEBUG(dbgs() << " w=" << EstWeight << " d=" << Diff);
+ if (Diff > 0) {
+ Shrink = false;
+ if (Diff > BestDiff) {
+ DEBUG(dbgs() << " (best)");
+ BestDiff = Diff;
+ BestBefore = SplitBefore;
+ BestAfter = SplitAfter;
+ }
+ }
+ }
+
+ // Try to shrink.
+ if (Shrink) {
+ SplitBefore = nextSplitPoint(SplitBefore);
+ if (SplitBefore < SplitAfter) {
+ DEBUG(dbgs() << " shrink\n");
+ // Recompute the max when necessary.
+ if (GapWeight[SplitBefore - 1] >= MaxGap) {
+ MaxGap = GapWeight[SplitBefore];
+ for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i)
+ MaxGap = std::max(MaxGap, GapWeight[i]);
+ }
+ continue;
+ }
+ MaxGap = 0;
+ }
+
+ // Try to extend the interval.
+ if (SplitAfter >= NumGaps) {
+ DEBUG(dbgs() << " end\n");
+ break;
+ }
+
+ DEBUG(dbgs() << " extend\n");
+ for (unsigned e = nextSplitPoint(SplitAfter + 1) - 1;
+ SplitAfter != e; ++SplitAfter)
+ MaxGap = std::max(MaxGap, GapWeight[SplitAfter]);
+ continue;
+ }
+ }
+
+ // Didn't find any candidates?
+ if (BestBefore == NumGaps)
+ return 0;
+
+ DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore]
+ << '-' << Uses[BestAfter] << ", " << BestDiff
+ << ", " << (BestAfter - BestBefore + 1) << " instrs\n");
+
+ SmallVector<LiveInterval*, 4> SpillRegs;
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+ SplitEditor SE(*SA, *LIS, *VRM, *DomTree, LREdit);
+
+ SE.openIntv();
+ SlotIndex SegStart = SE.enterIntvBefore(Uses[BestBefore]);
+ SlotIndex SegStop = SE.leaveIntvAfter(Uses[BestAfter]);
+ SE.useIntv(SegStart, SegStop);
+ SE.closeIntv();
+ SE.finish();
+ ++NumLocalSplits;
+
+ return 0;
+}
+
+//===----------------------------------------------------------------------===//
+// Live Range Splitting
+//===----------------------------------------------------------------------===//
+
+/// trySplit - Try to split VirtReg or one of its interferences, making it
+/// assignable.
+/// @return Physreg when VirtReg may be assigned and/or new NewVRegs.
+unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*>&NewVRegs) {
+ SA->analyze(&VirtReg);
+
+ // Local intervals are handled separately.
+ if (LIS->intervalIsInOneMBB(VirtReg)) {
+ NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled);
+ return tryLocalSplit(VirtReg, Order, NewVRegs);
+ }
+
+ NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled);
+
+ // First try to split around a region spanning multiple blocks.
+ unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Then isolate blocks with multiple uses.
+ SplitAnalysis::BlockPtrSet Blocks;
+ if (SA->getMultiUseBlocks(Blocks)) {
+ SmallVector<LiveInterval*, 4> SpillRegs;
+ LiveRangeEdit LREdit(VirtReg, NewVRegs, SpillRegs);
+ SplitEditor(*SA, *LIS, *VRM, *DomTree, LREdit).splitSingleBlocks(Blocks);
+ if (VerifyEnabled)
+ MF->verify(this, "After splitting live range around basic blocks");
+ }
+
+ // Don't assign any physregs.
+ return 0;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Spilling
+//===----------------------------------------------------------------------===//
+
+/// calcInterferenceWeight - Calculate the combined spill weight of
+/// interferences when assigning VirtReg to PhysReg.
+float RAGreedy::calcInterferenceWeight(LiveInterval &VirtReg, unsigned PhysReg){
+ float Sum = 0;
+ for (const unsigned *AI = TRI->getOverlaps(PhysReg); *AI; ++AI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
+ Q.collectInterferingVRegs();
+ if (Q.seenUnspillableVReg())
+ return HUGE_VALF;
+ for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i)
+ Sum += Q.interferingVRegs()[i]->weight;
+ }
+ return Sum;
+}
+
+/// trySpillInterferences - Try to spill interfering registers instead of the
+/// current one. Only do it if the accumulated spill weight is smaller than the
+/// current spill weight.
+unsigned RAGreedy::trySpillInterferences(LiveInterval &VirtReg,
+ AllocationOrder &Order,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ NamedRegionTimer T("Spill Interference", TimerGroupName, TimePassesIsEnabled);
+ unsigned BestPhys = 0;
+ float BestWeight = 0;
+
+ Order.rewind();
+ while (unsigned PhysReg = Order.next()) {
+ float Weight = calcInterferenceWeight(VirtReg, PhysReg);
+ if (Weight == HUGE_VALF || Weight >= VirtReg.weight)
+ continue;
+ if (!BestPhys || Weight < BestWeight)
+ BestPhys = PhysReg, BestWeight = Weight;
+ }
+
+ // No candidates found.
+ if (!BestPhys)
+ return 0;
+
+ // Collect all interfering registers.
+ SmallVector<LiveInterval*, 8> Spills;
+ for (const unsigned *AI = TRI->getOverlaps(BestPhys); *AI; ++AI) {
+ LiveIntervalUnion::Query &Q = query(VirtReg, *AI);
+ Spills.append(Q.interferingVRegs().begin(), Q.interferingVRegs().end());
+ for (unsigned i = 0, e = Q.interferingVRegs().size(); i != e; ++i) {
+ LiveInterval *VReg = Q.interferingVRegs()[i];
+ unassign(*VReg, *AI);
+ }
+ }
+
+ // Spill them all.
+ DEBUG(dbgs() << "spilling " << Spills.size() << " interferences with weight "
+ << BestWeight << '\n');
+ for (unsigned i = 0, e = Spills.size(); i != e; ++i)
+ spiller().spill(Spills[i], NewVRegs, Spills);
+ return BestPhys;
+}
+
+
+//===----------------------------------------------------------------------===//
+// Main Entry Point
+//===----------------------------------------------------------------------===//
+
+unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg,
+ SmallVectorImpl<LiveInterval*> &NewVRegs) {
+ // First try assigning a free register.
+ AllocationOrder Order(VirtReg.reg, *VRM, ReservedRegs);
+ while (unsigned PhysReg = Order.next()) {
+ if (!checkPhysRegInterference(VirtReg, PhysReg))
+ return PhysReg;
+ }
+
+ // Try to reassign interferences.
+ if (unsigned PhysReg = tryReassignOrEvict(VirtReg, Order, NewVRegs))
+ return PhysReg;
+
+ assert(NewVRegs.empty() && "Cannot append to existing NewVRegs");
+
+ // Try splitting VirtReg or interferences.
+ unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs);
+ if (PhysReg || !NewVRegs.empty())
+ return PhysReg;
+
+ // Try to spill another interfering reg with less spill weight.
+ PhysReg = trySpillInterferences(VirtReg, Order, NewVRegs);
+ if (PhysReg)
+ return PhysReg;
+
+ // Finally spill VirtReg itself.
+ NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled);
+ SmallVector<LiveInterval*, 1> pendingSpills;
+ spiller().spill(&VirtReg, NewVRegs, pendingSpills);
+
+ // The live virtual register requesting allocation was spilled, so tell
+ // the caller not to allocate anything during this round.
+ return 0;
+}
+
+bool RAGreedy::runOnMachineFunction(MachineFunction &mf) {
+ DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n"
+ << "********** Function: "
+ << ((Value*)mf.getFunction())->getName() << '\n');
+
+ MF = &mf;
+ if (VerifyEnabled)
+ MF->verify(this, "Before greedy register allocator");
+
+ RegAllocBase::init(getAnalysis<VirtRegMap>(), getAnalysis<LiveIntervals>());
+ Indexes = &getAnalysis<SlotIndexes>();
+ DomTree = &getAnalysis<MachineDominatorTree>();
+ ReservedRegs = TRI->getReservedRegs(*MF);
+ SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM));
+ Loops = &getAnalysis<MachineLoopInfo>();
+ LoopRanges = &getAnalysis<MachineLoopRanges>();
+ Bundles = &getAnalysis<EdgeBundles>();
+ SpillPlacer = &getAnalysis<SpillPlacement>();
+
+ SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops));
+
+ allocatePhysRegs();
+ addMBBLiveIns(MF);
+ LIS->addKillFlags();
+
+ // Run rewriter
+ {
+ NamedRegionTimer T("Rewriter", TimerGroupName, TimePassesIsEnabled);
+ VRM->rewrite(Indexes);
+ }
+
+ // The pass output is in VirtRegMap. Release all the transient data.
+ releaseMemory();
+
+ return true;
+}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 5c62354a8872..b959878bcdba 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -12,13 +12,14 @@
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "regalloc"
+#include "LiveDebugVariables.h"
#include "VirtRegMap.h"
#include "VirtRegRewriter.h"
#include "Spiller.h"
+#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
-#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -91,6 +92,19 @@ namespace {
struct RALinScan : public MachineFunctionPass {
static char ID;
RALinScan() : MachineFunctionPass(ID) {
+ initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerAnalysisGroup(
+ *PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializePreAllocSplittingPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry());
+
// Initialize the queue to record recently-used registers.
if (NumRecentlyUsedRegs > 0)
RecentRegs.resize(NumRecentlyUsedRegs, 0);
@@ -127,7 +141,6 @@ namespace {
BitVector allocatableRegs_;
BitVector reservedRegs_;
LiveIntervals* li_;
- LiveStacks* ls_;
MachineLoopInfo *loopInfo;
/// handled_ - Intervals are added to the handled_ set in the order of their
@@ -183,6 +196,8 @@ namespace {
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+ AU.addRequired<AliasAnalysis>();
+ AU.addPreserved<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<SlotIndexes>();
if (StrongPHIElim)
@@ -193,12 +208,15 @@ namespace {
AU.addRequired<CalculateSpillWeights>();
if (PreSplitIntervals)
AU.addRequiredID(PreAllocSplittingID);
- AU.addRequired<LiveStacks>();
- AU.addPreserved<LiveStacks>();
+ AU.addRequiredID(LiveStacksID);
+ AU.addPreservedID(LiveStacksID);
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addRequired<VirtRegMap>();
AU.addPreserved<VirtRegMap>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
+ AU.addRequiredID(MachineDominatorsID);
AU.addPreservedID(MachineDominatorsID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -370,8 +388,19 @@ namespace {
char RALinScan::ID = 0;
}
-INITIALIZE_PASS(RALinScan, "linearscan-regalloc",
- "Linear Scan Register Allocator", false, false);
+INITIALIZE_PASS_BEGIN(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(CalculateSpillWeights)
+INITIALIZE_PASS_DEPENDENCY(PreAllocSplitting)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_AG_DEPENDENCY(RegisterCoalescer)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(RALinScan, "linearscan-regalloc",
+ "Linear Scan Register Allocator", false, false)
void RALinScan::ComputeRelatedRegClasses() {
// First pass, add all reg classes to the union, and determine at least one
@@ -402,8 +431,12 @@ void RALinScan::ComputeRelatedRegClasses() {
for (DenseMap<unsigned, const TargetRegisterClass*>::iterator
I = OneClassForEachPhysReg.begin(), E = OneClassForEachPhysReg.end();
I != E; ++I)
- for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS)
- RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
+ for (const unsigned *AS = tri_->getAliasSet(I->first); *AS; ++AS) {
+ const TargetRegisterClass *AliasClass =
+ OneClassForEachPhysReg.lookup(*AS);
+ if (AliasClass)
+ RelatedRegClasses.unionSets(I->second, AliasClass);
+ }
}
/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
@@ -431,8 +464,7 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
unsigned CandReg;
{
MachineInstr *CopyMI;
- if (vni->def != SlotIndex() && vni->isDefAccurate() &&
- (CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
+ if ((CopyMI = li_->getInstructionFromIndex(vni->def)) && CopyMI->isCopy())
// Defined by a copy, try to extend SrcReg forward
CandReg = CopyMI->getOperand(1).getReg();
else if (TrivCoalesceEnds &&
@@ -442,6 +474,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
CandReg = CopyMI->getOperand(0).getReg();
else
return Reg;
+
+ // If the target of the copy is a sub-register then don't coalesce.
+ if(CopyMI->getOperand(0).getSubReg())
+ return Reg;
}
if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
@@ -478,7 +514,6 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
allocatableRegs_ = tri_->getAllocatableSet(fn);
reservedRegs_ = tri_->getReservedRegs(fn);
li_ = &getAnalysis<LiveIntervals>();
- ls_ = &getAnalysis<LiveStacks>();
loopInfo = &getAnalysis<MachineLoopInfo>();
// We don't run the coalescer here because we have no reason to
@@ -505,6 +540,9 @@ bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
// Rewrite spill code and update the PhysRegsUsed set.
rewriter_->runOnMachineFunction(*mf_, *vrm_, li_);
+ // Write out new DBG_VALUE instructions.
+ getAnalysis<LiveDebugVariables>().emitDebugValues(vrm_);
+
assert(unhandled_.empty() && "Unhandled live intervals remain!");
finalizeRegUses();
@@ -638,8 +676,6 @@ void RALinScan::linearScan() {
// Look for physical registers that end up not being allocated even though
// register allocator had to spill other registers in its register class.
- if (ls_->getNumIntervals() == 0)
- return;
if (!vrm_->FindUnusedRegisters(li_))
return;
}
@@ -784,30 +820,6 @@ static void RevertVectorIteratorsTo(RALinScan::IntervalPtrs &V,
}
}
-/// addStackInterval - Create a LiveInterval for stack if the specified live
-/// interval has been spilled.
-static void addStackInterval(LiveInterval *cur, LiveStacks *ls_,
- LiveIntervals *li_,
- MachineRegisterInfo* mri_, VirtRegMap &vrm_) {
- int SS = vrm_.getStackSlot(cur->reg);
- if (SS == VirtRegMap::NO_STACK_SLOT)
- return;
-
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
- LiveInterval &SI = ls_->getOrCreateInterval(SS, RC);
-
- VNInfo *VNI;
- if (SI.hasAtLeastOneValue())
- VNI = SI.getValNumInfo(0);
- else
- VNI = SI.getNextValue(SlotIndex(), 0, false,
- ls_->getVNInfoAllocator());
-
- LiveInterval &RI = li_->getInterval(cur->reg);
- // FIXME: This may be overly conservative.
- SI.MergeRangesInAsValue(RI, VNI);
-}
-
/// getConflictWeight - Return the number of conflicts between cur
/// live interval and defs and uses of Reg weighted by loop depthes.
static
@@ -925,13 +937,9 @@ LiveInterval *RALinScan::hasNextReloadInterval(LiveInterval *cur) {
}
void RALinScan::DowngradeRegister(LiveInterval *li, unsigned Reg) {
- bool isNew = DowngradedRegs.insert(Reg);
- isNew = isNew; // Silence compiler warning.
- assert(isNew && "Multiple reloads holding the same register?");
- DowngradeMap.insert(std::make_pair(li->reg, Reg));
- for (const unsigned *AS = tri_->getAliasSet(Reg); *AS; ++AS) {
- isNew = DowngradedRegs.insert(*AS);
- isNew = isNew; // Silence compiler warning.
+ for (const unsigned *AS = tri_->getOverlaps(Reg); *AS; ++AS) {
+ bool isNew = DowngradedRegs.insert(*AS);
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Multiple reloads holding the same register?");
DowngradeMap.insert(std::make_pair(li->reg, *AS));
}
@@ -957,10 +965,11 @@ namespace {
/// assignRegOrStackSlotAtInterval - assign a register if one is available, or
/// spill.
void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
- DEBUG(dbgs() << "\tallocating current interval: ");
+ const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
+ DEBUG(dbgs() << "\tallocating current interval from "
+ << RC->getName() << ": ");
// This is an implicitly defined live interval, just assign any register.
- const TargetRegisterClass *RC = mri_->getRegClass(cur->reg);
if (cur->empty()) {
unsigned physReg = vrm_->getRegAllocPref(cur->reg);
if (!physReg)
@@ -984,8 +993,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
// one, e.g. X86::mov32to32_. These move instructions are not coalescable.
if (!vrm_->getRegAllocPref(cur->reg) && cur->hasAtLeastOneValue()) {
VNInfo *vni = cur->begin()->valno;
- if ((vni->def != SlotIndex()) && !vni->isUnused() &&
- vni->isDefAccurate()) {
+ if (!vni->isUnused()) {
MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
if (CopyMI && CopyMI->isCopy()) {
unsigned DstSubReg = CopyMI->getOperand(0).getSubReg();
@@ -1225,7 +1233,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
spiller_->spill(cur, added, spillIs);
std::sort(added.begin(), added.end(), LISorter());
- addStackInterval(cur, ls_, li_, mri_, *vrm_);
if (added.empty())
return; // Early exit if all spills were folded.
@@ -1300,7 +1307,6 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
if (sli->beginIndex() < earliestStart)
earliestStart = sli->beginIndex();
spiller_->spill(sli, added, spillIs);
- addStackInterval(sli, ls_, li_, mri_, *vrm_);
spilled.insert(sli->reg);
}
@@ -1419,8 +1425,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval* cur,
std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(cur->reg);
// Resolve second part of the hint (if possible) given the current allocation.
unsigned physReg = Hint.second;
- if (physReg &&
- TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
+ if (TargetRegisterInfo::isVirtualRegister(physReg) && vrm_->hasPhys(physReg))
physReg = vrm_->getPhys(physReg);
TargetRegisterClass::iterator I, E;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 61f337bab49c..ea0d1fe0233f 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -31,9 +31,6 @@
#define DEBUG_TYPE "regalloc"
-#include "PBQP/HeuristicSolver.h"
-#include "PBQP/Graph.h"
-#include "PBQP/Heuristics/Briggs.h"
#include "RenderMachineFunction.h"
#include "Splitter.h"
#include "VirtRegMap.h"
@@ -41,9 +38,13 @@
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
+#include "llvm/CodeGen/RegAllocPBQP.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/PBQP/HeuristicSolver.h"
+#include "llvm/CodeGen/PBQP/Graph.h"
+#include "llvm/CodeGen/PBQP/Heuristics/Briggs.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
#include "llvm/CodeGen/RegisterCoalescer.h"
#include "llvm/Support/Debug.h"
@@ -51,7 +52,6 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include <limits>
-#include <map>
#include <memory>
#include <set>
#include <vector>
@@ -60,7 +60,7 @@ using namespace llvm;
static RegisterRegAlloc
registerPBQPRepAlloc("pbqp", "PBQP register allocator",
- llvm::createPBQPRegisterAllocator);
+ createDefaultPBQPRegisterAllocator);
static cl::opt<bool>
pbqpCoalescing("pbqp-coalescing",
@@ -69,698 +69,471 @@ pbqpCoalescing("pbqp-coalescing",
static cl::opt<bool>
pbqpPreSplitting("pbqp-pre-splitting",
- cl::desc("Pre-splite before PBQP register allocation."),
+ cl::desc("Pre-split before PBQP register allocation."),
cl::init(false), cl::Hidden);
namespace {
- ///
- /// PBQP based allocators solve the register allocation problem by mapping
- /// register allocation problems to Partitioned Boolean Quadratic
- /// Programming problems.
- class PBQPRegAlloc : public MachineFunctionPass {
- public:
+///
+/// PBQP based allocators solve the register allocation problem by mapping
+/// register allocation problems to Partitioned Boolean Quadratic
+/// Programming problems.
+class RegAllocPBQP : public MachineFunctionPass {
+public:
+
+ static char ID;
+
+ /// Construct a PBQP register allocator.
+ RegAllocPBQP(std::auto_ptr<PBQPBuilder> b)
+ : MachineFunctionPass(ID), builder(b) {
+ initializeSlotIndexesPass(*PassRegistry::getPassRegistry());
+ initializeLiveIntervalsPass(*PassRegistry::getPassRegistry());
+ initializeRegisterCoalescerAnalysisGroup(*PassRegistry::getPassRegistry());
+ initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry());
+ initializeLiveStacksPass(*PassRegistry::getPassRegistry());
+ initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry());
+ initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+ initializeVirtRegMapPass(*PassRegistry::getPassRegistry());
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ }
- static char ID;
+ /// Return the pass name.
+ virtual const char* getPassName() const {
+ return "PBQP Register Allocator";
+ }
- /// Construct a PBQP register allocator.
- PBQPRegAlloc() : MachineFunctionPass(ID) {}
+ /// PBQP analysis usage.
+ virtual void getAnalysisUsage(AnalysisUsage &au) const;
- /// Return the pass name.
- virtual const char* getPassName() const {
- return "PBQP Register Allocator";
- }
+ /// Perform register allocation
+ virtual bool runOnMachineFunction(MachineFunction &MF);
- /// PBQP analysis usage.
- virtual void getAnalysisUsage(AnalysisUsage &au) const {
- au.addRequired<SlotIndexes>();
- au.addPreserved<SlotIndexes>();
- au.addRequired<LiveIntervals>();
- //au.addRequiredID(SplitCriticalEdgesID);
- au.addRequired<RegisterCoalescer>();
- au.addRequired<CalculateSpillWeights>();
- au.addRequired<LiveStacks>();
- au.addPreserved<LiveStacks>();
- au.addRequired<MachineLoopInfo>();
- au.addPreserved<MachineLoopInfo>();
- if (pbqpPreSplitting)
- au.addRequired<LoopSplitter>();
- au.addRequired<VirtRegMap>();
- au.addRequired<RenderMachineFunction>();
- MachineFunctionPass::getAnalysisUsage(au);
- }
+private:
- /// Perform register allocation
- virtual bool runOnMachineFunction(MachineFunction &MF);
+ typedef std::map<const LiveInterval*, unsigned> LI2NodeMap;
+ typedef std::vector<const LiveInterval*> Node2LIMap;
+ typedef std::vector<unsigned> AllowedSet;
+ typedef std::vector<AllowedSet> AllowedSetMap;
+ typedef std::pair<unsigned, unsigned> RegPair;
+ typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
+ typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
+ typedef std::set<unsigned> RegSet;
- private:
- class LIOrdering {
- public:
- bool operator()(const LiveInterval *li1, const LiveInterval *li2) const {
- return li1->reg < li2->reg;
- }
- };
-
- typedef std::map<const LiveInterval*, unsigned, LIOrdering> LI2NodeMap;
- typedef std::vector<const LiveInterval*> Node2LIMap;
- typedef std::vector<unsigned> AllowedSet;
- typedef std::vector<AllowedSet> AllowedSetMap;
- typedef std::set<unsigned> RegSet;
- typedef std::pair<unsigned, unsigned> RegPair;
- typedef std::map<RegPair, PBQP::PBQPNum> CoalesceMap;
-
- typedef std::set<LiveInterval*, LIOrdering> LiveIntervalSet;
-
- typedef std::vector<PBQP::Graph::NodeItr> NodeVector;
-
- MachineFunction *mf;
- const TargetMachine *tm;
- const TargetRegisterInfo *tri;
- const TargetInstrInfo *tii;
- const MachineLoopInfo *loopInfo;
- MachineRegisterInfo *mri;
- RenderMachineFunction *rmf;
-
- LiveIntervals *lis;
- LiveStacks *lss;
- VirtRegMap *vrm;
-
- LI2NodeMap li2Node;
- Node2LIMap node2LI;
- AllowedSetMap allowedSets;
- LiveIntervalSet vregIntervalsToAlloc,
- emptyVRegIntervals;
- NodeVector problemNodes;
-
-
- /// Builds a PBQP cost vector.
- template <typename RegContainer>
- PBQP::Vector buildCostVector(unsigned vReg,
- const RegContainer &allowed,
- const CoalesceMap &cealesces,
- PBQP::PBQPNum spillCost) const;
-
- /// \brief Builds a PBQP interference matrix.
- ///
- /// @return Either a pointer to a non-zero PBQP matrix representing the
- /// allocation option costs, or a null pointer for a zero matrix.
- ///
- /// Expects allowed sets for two interfering LiveIntervals. These allowed
- /// sets should contain only allocable registers from the LiveInterval's
- /// register class, with any interfering pre-colored registers removed.
- template <typename RegContainer>
- PBQP::Matrix* buildInterferenceMatrix(const RegContainer &allowed1,
- const RegContainer &allowed2) const;
-
- ///
- /// Expects allowed sets for two potentially coalescable LiveIntervals,
- /// and an estimated benefit due to coalescing. The allowed sets should
- /// contain only allocable registers from the LiveInterval's register
- /// classes, with any interfering pre-colored registers removed.
- template <typename RegContainer>
- PBQP::Matrix* buildCoalescingMatrix(const RegContainer &allowed1,
- const RegContainer &allowed2,
- PBQP::PBQPNum cBenefit) const;
-
- /// \brief Finds coalescing opportunities and returns them as a map.
- ///
- /// Any entries in the map are guaranteed coalescable, even if their
- /// corresponding live intervals overlap.
- CoalesceMap findCoalesces();
-
- /// \brief Finds the initial set of vreg intervals to allocate.
- void findVRegIntervalsToAlloc();
-
- /// \brief Constructs a PBQP problem representation of the register
- /// allocation problem for this function.
- ///
- /// @return a PBQP solver object for the register allocation problem.
- PBQP::Graph constructPBQPProblem();
-
- /// \brief Adds a stack interval if the given live interval has been
- /// spilled. Used to support stack slot coloring.
- void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
-
- /// \brief Given a solved PBQP problem maps this solution back to a register
- /// assignment.
- bool mapPBQPToRegAlloc(const PBQP::Solution &solution);
-
- /// \brief Postprocessing before final spilling. Sets basic block "live in"
- /// variables.
- void finalizeAlloc() const;
-
- };
-
- char PBQPRegAlloc::ID = 0;
-}
+ std::auto_ptr<PBQPBuilder> builder;
+ MachineFunction *mf;
+ const TargetMachine *tm;
+ const TargetRegisterInfo *tri;
+ const TargetInstrInfo *tii;
+ const MachineLoopInfo *loopInfo;
+ MachineRegisterInfo *mri;
+ RenderMachineFunction *rmf;
-template <typename RegContainer>
-PBQP::Vector PBQPRegAlloc::buildCostVector(unsigned vReg,
- const RegContainer &allowed,
- const CoalesceMap &coalesces,
- PBQP::PBQPNum spillCost) const {
+ LiveIntervals *lis;
+ LiveStacks *lss;
+ VirtRegMap *vrm;
- typedef typename RegContainer::const_iterator AllowedItr;
+ RegSet vregsToAlloc, emptyIntervalVRegs;
- // Allocate vector. Additional element (0th) used for spill option
- PBQP::Vector v(allowed.size() + 1, 0);
+ /// \brief Finds the initial set of vreg intervals to allocate.
+ void findVRegIntervalsToAlloc();
- v[0] = spillCost;
+ /// \brief Adds a stack interval if the given live interval has been
+ /// spilled. Used to support stack slot coloring.
+ void addStackInterval(const LiveInterval *spilled,MachineRegisterInfo* mri);
- // Iterate over the allowed registers inserting coalesce benefits if there
- // are any.
- unsigned ai = 0;
- for (AllowedItr itr = allowed.begin(), end = allowed.end();
- itr != end; ++itr, ++ai) {
+ /// \brief Given a solved PBQP problem maps this solution back to a register
+ /// assignment.
+ bool mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution);
- unsigned pReg = *itr;
+ /// \brief Postprocessing before final spilling. Sets basic block "live in"
+ /// variables.
+ void finalizeAlloc() const;
- CoalesceMap::const_iterator cmItr =
- coalesces.find(RegPair(vReg, pReg));
+};
- // No coalesce - on to the next preg.
- if (cmItr == coalesces.end())
- continue;
+char RegAllocPBQP::ID = 0;
- // We have a coalesce - insert the benefit.
- v[ai + 1] = -cmItr->second;
- }
+} // End anonymous namespace.
- return v;
+unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const {
+ Node2VReg::const_iterator vregItr = node2VReg.find(node);
+ assert(vregItr != node2VReg.end() && "No vreg for node.");
+ return vregItr->second;
}
-template <typename RegContainer>
-PBQP::Matrix* PBQPRegAlloc::buildInterferenceMatrix(
- const RegContainer &allowed1, const RegContainer &allowed2) const {
-
- typedef typename RegContainer::const_iterator RegContainerIterator;
-
- // Construct a PBQP matrix representing the cost of allocation options. The
- // rows and columns correspond to the allocation options for the two live
- // intervals. Elements will be infinite where corresponding registers alias,
- // since we cannot allocate aliasing registers to interfering live intervals.
- // All other elements (non-aliasing combinations) will have zero cost. Note
- // that the spill option (element 0,0) has zero cost, since we can allocate
- // both intervals to memory safely (the cost for each individual allocation
- // to memory is accounted for by the cost vectors for each live interval).
- PBQP::Matrix *m =
- new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
-
- // Assume this is a zero matrix until proven otherwise. Zero matrices occur
- // between interfering live ranges with non-overlapping register sets (e.g.
- // non-overlapping reg classes, or disjoint sets of allowed regs within the
- // same class). The term "overlapping" is used advisedly: sets which do not
- // intersect, but contain registers which alias, will have non-zero matrices.
- // We optimize zero matrices away to improve solver speed.
- bool isZeroMatrix = true;
-
-
- // Row index. Starts at 1, since the 0th row is for the spill option, which
- // is always zero.
- unsigned ri = 1;
-
- // Iterate over allowed sets, insert infinities where required.
- for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
- a1Itr != a1End; ++a1Itr) {
-
- // Column index, starts at 1 as for row index.
- unsigned ci = 1;
- unsigned reg1 = *a1Itr;
-
- for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
- a2Itr != a2End; ++a2Itr) {
-
- unsigned reg2 = *a2Itr;
-
- // If the row/column regs are identical or alias insert an infinity.
- if (tri->regsOverlap(reg1, reg2)) {
- (*m)[ri][ci] = std::numeric_limits<PBQP::PBQPNum>::infinity();
- isZeroMatrix = false;
- }
-
- ++ci;
- }
-
- ++ri;
- }
-
- // If this turns out to be a zero matrix...
- if (isZeroMatrix) {
- // free it and return null.
- delete m;
- return 0;
- }
-
- // ...otherwise return the cost matrix.
- return m;
+PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const {
+ VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg);
+ assert(nodeItr != vreg2Node.end() && "No node for vreg.");
+ return nodeItr->second;
+
}
-template <typename RegContainer>
-PBQP::Matrix* PBQPRegAlloc::buildCoalescingMatrix(
- const RegContainer &allowed1, const RegContainer &allowed2,
- PBQP::PBQPNum cBenefit) const {
-
- typedef typename RegContainer::const_iterator RegContainerIterator;
-
- // Construct a PBQP Matrix representing the benefits of coalescing. As with
- // interference matrices the rows and columns represent allowed registers
- // for the LiveIntervals which are (potentially) to be coalesced. The amount
- // -cBenefit will be placed in any element representing the same register
- // for both intervals.
- PBQP::Matrix *m =
- new PBQP::Matrix(allowed1.size() + 1, allowed2.size() + 1, 0);
-
- // Reset costs to zero.
- m->reset(0);
-
- // Assume the matrix is zero till proven otherwise. Zero matrices will be
- // optimized away as in the interference case.
- bool isZeroMatrix = true;
-
- // Row index. Starts at 1, since the 0th row is for the spill option, which
- // is always zero.
- unsigned ri = 1;
-
- // Iterate over the allowed sets, insert coalescing benefits where
- // appropriate.
- for (RegContainerIterator a1Itr = allowed1.begin(), a1End = allowed1.end();
- a1Itr != a1End; ++a1Itr) {
-
- // Column index, starts at 1 as for row index.
- unsigned ci = 1;
- unsigned reg1 = *a1Itr;
-
- for (RegContainerIterator a2Itr = allowed2.begin(), a2End = allowed2.end();
- a2Itr != a2End; ++a2Itr) {
-
- // If the row and column represent the same register insert a beneficial
- // cost to preference this allocation - it would allow us to eliminate a
- // move instruction.
- if (reg1 == *a2Itr) {
- (*m)[ri][ci] = -cBenefit;
- isZeroMatrix = false;
- }
-
- ++ci;
- }
-
- ++ri;
- }
-
- // If this turns out to be a zero matrix...
- if (isZeroMatrix) {
- // ...free it and return null.
- delete m;
- return 0;
- }
-
- return m;
+const PBQPRAProblem::AllowedSet&
+ PBQPRAProblem::getAllowedSet(unsigned vreg) const {
+ AllowedSetMap::const_iterator allowedSetItr = allowedSets.find(vreg);
+ assert(allowedSetItr != allowedSets.end() && "No pregs for vreg.");
+ const AllowedSet &allowedSet = allowedSetItr->second;
+ return allowedSet;
}
-PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() {
-
- typedef MachineFunction::const_iterator MFIterator;
- typedef MachineBasicBlock::const_iterator MBBIterator;
- typedef LiveInterval::const_vni_iterator VNIIterator;
+unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const {
+ assert(isPRegOption(vreg, option) && "Not a preg option.");
- CoalesceMap coalescesFound;
+ const AllowedSet& allowedSet = getAllowedSet(vreg);
+ assert(option <= allowedSet.size() && "Option outside allowed set.");
+ return allowedSet[option - 1];
+}
- // To find coalesces we need to iterate over the function looking for
- // copy instructions.
- for (MFIterator bbItr = mf->begin(), bbEnd = mf->end();
- bbItr != bbEnd; ++bbItr) {
+std::auto_ptr<PBQPRAProblem> PBQPBuilder::build(MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
- const MachineBasicBlock *mbb = &*bbItr;
+ typedef std::vector<const LiveInterval*> LIVector;
- for (MBBIterator iItr = mbb->begin(), iEnd = mbb->end();
- iItr != iEnd; ++iItr) {
+ MachineRegisterInfo *mri = &mf->getRegInfo();
+ const TargetRegisterInfo *tri = mf->getTarget().getRegisterInfo();
- const MachineInstr *instr = &*iItr;
+ std::auto_ptr<PBQPRAProblem> p(new PBQPRAProblem());
+ PBQP::Graph &g = p->getGraph();
+ RegSet pregs;
- // If this isn't a copy then continue to the next instruction.
- if (!instr->isCopy())
- continue;
-
- unsigned srcReg = instr->getOperand(1).getReg();
- unsigned dstReg = instr->getOperand(0).getReg();
+ // Collect the set of preg intervals, record that they're used in the MF.
+ for (LiveIntervals::const_iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
+ pregs.insert(itr->first);
+ mri->setPhysRegUsed(itr->first);
+ }
+ }
- // If the registers are already the same our job is nice and easy.
- if (dstReg == srcReg)
- continue;
+ BitVector reservedRegs = tri->getReservedRegs(*mf);
+
+ // Iterate over vregs.
+ for (RegSet::const_iterator vregItr = vregs.begin(), vregEnd = vregs.end();
+ vregItr != vregEnd; ++vregItr) {
+ unsigned vreg = *vregItr;
+ const TargetRegisterClass *trc = mri->getRegClass(vreg);
+ const LiveInterval *vregLI = &lis->getInterval(vreg);
+
+ // Compute an initial allowed set for the current vreg.
+ typedef std::vector<unsigned> VRAllowed;
+ VRAllowed vrAllowed;
+ for (TargetRegisterClass::iterator aoItr = trc->allocation_order_begin(*mf),
+ aoEnd = trc->allocation_order_end(*mf);
+ aoItr != aoEnd; ++aoItr) {
+ unsigned preg = *aoItr;
+ if (!reservedRegs.test(preg)) {
+ vrAllowed.push_back(preg);
+ }
+ }
- bool srcRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(srcReg),
- dstRegIsPhysical = TargetRegisterInfo::isPhysicalRegister(dstReg);
+ // Remove any physical registers which overlap.
+ for (RegSet::const_iterator pregItr = pregs.begin(),
+ pregEnd = pregs.end();
+ pregItr != pregEnd; ++pregItr) {
+ unsigned preg = *pregItr;
+ const LiveInterval *pregLI = &lis->getInterval(preg);
- // If both registers are physical then we can't coalesce.
- if (srcRegIsPhysical && dstRegIsPhysical)
+ if (pregLI->empty()) {
continue;
+ }
- // If it's a copy that includes two virtual register but the source and
- // destination classes differ then we can't coalesce.
- if (!srcRegIsPhysical && !dstRegIsPhysical &&
- mri->getRegClass(srcReg) != mri->getRegClass(dstReg))
+ if (!vregLI->overlaps(*pregLI)) {
continue;
-
- // If one is physical and one is virtual, check that the physical is
- // allocatable in the class of the virtual.
- if (srcRegIsPhysical && !dstRegIsPhysical) {
- const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg);
- if (std::find(dstRegClass->allocation_order_begin(*mf),
- dstRegClass->allocation_order_end(*mf), srcReg) ==
- dstRegClass->allocation_order_end(*mf))
- continue;
}
- if (!srcRegIsPhysical && dstRegIsPhysical) {
- const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg);
- if (std::find(srcRegClass->allocation_order_begin(*mf),
- srcRegClass->allocation_order_end(*mf), dstReg) ==
- srcRegClass->allocation_order_end(*mf))
- continue;
- }
-
- // If we've made it here we have a copy with compatible register classes.
- // We can probably coalesce, but we need to consider overlap.
- const LiveInterval *srcLI = &lis->getInterval(srcReg),
- *dstLI = &lis->getInterval(dstReg);
- if (srcLI->overlaps(*dstLI)) {
- // Even in the case of an overlap we might still be able to coalesce,
- // but we need to make sure that no definition of either range occurs
- // while the other range is live.
+ // Remove the register from the allowed set.
+ VRAllowed::iterator eraseItr =
+ std::find(vrAllowed.begin(), vrAllowed.end(), preg);
- // Otherwise start by assuming we're ok.
- bool badDef = false;
-
- // Test all defs of the source range.
- for (VNIIterator
- vniItr = srcLI->vni_begin(), vniEnd = srcLI->vni_end();
- vniItr != vniEnd; ++vniItr) {
+ if (eraseItr != vrAllowed.end()) {
+ vrAllowed.erase(eraseItr);
+ }
- // If we find a poorly defined def we err on the side of caution.
- if (!(*vniItr)->def.isValid()) {
- badDef = true;
- break;
- }
+ // Also remove any aliases.
+ const unsigned *aliasItr = tri->getAliasSet(preg);
+ if (aliasItr != 0) {
+ for (; *aliasItr != 0; ++aliasItr) {
+ VRAllowed::iterator eraseItr =
+ std::find(vrAllowed.begin(), vrAllowed.end(), *aliasItr);
- // If we find a def that kills the coalescing opportunity then
- // record it and break from the loop.
- if (dstLI->liveAt((*vniItr)->def)) {
- badDef = true;
- break;
+ if (eraseItr != vrAllowed.end()) {
+ vrAllowed.erase(eraseItr);
}
}
+ }
+ }
- // If we have a bad def give up, continue to the next instruction.
- if (badDef)
- continue;
-
- // Otherwise test definitions of the destination range.
- for (VNIIterator
- vniItr = dstLI->vni_begin(), vniEnd = dstLI->vni_end();
- vniItr != vniEnd; ++vniItr) {
+ // Construct the node.
+ PBQP::Graph::NodeItr node =
+ g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0));
- // We want to make sure we skip the copy instruction itself.
- if ((*vniItr)->getCopy() == instr)
- continue;
+ // Record the mapping and allowed set in the problem.
+ p->recordVReg(vreg, node, vrAllowed.begin(), vrAllowed.end());
- if (!(*vniItr)->def.isValid()) {
- badDef = true;
- break;
- }
+ PBQP::PBQPNum spillCost = (vregLI->weight != 0.0) ?
+ vregLI->weight : std::numeric_limits<PBQP::PBQPNum>::min();
- if (srcLI->liveAt((*vniItr)->def)) {
- badDef = true;
- break;
- }
- }
+ addSpillCosts(g.getNodeCosts(node), spillCost);
+ }
- // As before a bad def we give up and continue to the next instr.
- if (badDef)
- continue;
+ for (RegSet::const_iterator vr1Itr = vregs.begin(), vrEnd = vregs.end();
+ vr1Itr != vrEnd; ++vr1Itr) {
+ unsigned vr1 = *vr1Itr;
+ const LiveInterval &l1 = lis->getInterval(vr1);
+ const PBQPRAProblem::AllowedSet &vr1Allowed = p->getAllowedSet(vr1);
+
+ for (RegSet::const_iterator vr2Itr = llvm::next(vr1Itr);
+ vr2Itr != vrEnd; ++vr2Itr) {
+ unsigned vr2 = *vr2Itr;
+ const LiveInterval &l2 = lis->getInterval(vr2);
+ const PBQPRAProblem::AllowedSet &vr2Allowed = p->getAllowedSet(vr2);
+
+ assert(!l2.empty() && "Empty interval in vreg set?");
+ if (l1.overlaps(l2)) {
+ PBQP::Graph::EdgeItr edge =
+ g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2),
+ PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0));
+
+ addInterferenceCosts(g.getEdgeCosts(edge), vr1Allowed, vr2Allowed, tri);
}
-
- // If we make it to here then either the ranges didn't overlap, or they
- // did, but none of their definitions would prevent us from coalescing.
- // We're good to go with the coalesce.
-
- float cBenefit = std::pow(10.0f, (float)loopInfo->getLoopDepth(mbb)) / 5.0;
-
- coalescesFound[RegPair(srcReg, dstReg)] = cBenefit;
- coalescesFound[RegPair(dstReg, srcReg)] = cBenefit;
}
-
}
- return coalescesFound;
+ return p;
}
-void PBQPRegAlloc::findVRegIntervalsToAlloc() {
-
- // Iterate over all live ranges.
- for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
- itr != end; ++itr) {
-
- // Ignore physical ones.
- if (TargetRegisterInfo::isPhysicalRegister(itr->first))
- continue;
-
- LiveInterval *li = itr->second;
-
- // If this live interval is non-empty we will use pbqp to allocate it.
- // Empty intervals we allocate in a simple post-processing stage in
- // finalizeAlloc.
- if (!li->empty()) {
- vregIntervalsToAlloc.insert(li);
- }
- else {
- emptyVRegIntervals.insert(li);
- }
- }
+void PBQPBuilder::addSpillCosts(PBQP::Vector &costVec,
+ PBQP::PBQPNum spillCost) {
+ costVec[0] = spillCost;
}
-PBQP::Graph PBQPRegAlloc::constructPBQPProblem() {
-
- typedef std::vector<const LiveInterval*> LIVector;
- typedef std::vector<unsigned> RegVector;
+void PBQPBuilder::addInterferenceCosts(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ const TargetRegisterInfo *tri) {
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Matrix height mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Matrix width mismatch.");
- // This will store the physical intervals for easy reference.
- LIVector physIntervals;
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
- // Start by clearing the old node <-> live interval mappings & allowed sets
- li2Node.clear();
- node2LI.clear();
- allowedSets.clear();
-
- // Populate physIntervals, update preg use:
- for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
- itr != end; ++itr) {
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
- if (TargetRegisterInfo::isPhysicalRegister(itr->first)) {
- physIntervals.push_back(itr->second);
- mri->setPhysRegUsed(itr->second->reg);
+ if (tri->regsOverlap(preg1, preg2)) {
+ costMat[i + 1][j + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
+ }
}
}
+}
- // Iterate over vreg intervals, construct live interval <-> node number
- // mappings.
- for (LiveIntervalSet::const_iterator
- itr = vregIntervalsToAlloc.begin(), end = vregIntervalsToAlloc.end();
- itr != end; ++itr) {
- const LiveInterval *li = *itr;
-
- li2Node[li] = node2LI.size();
- node2LI.push_back(li);
- }
-
- // Get the set of potential coalesces.
- CoalesceMap coalesces;
-
- if (pbqpCoalescing) {
- coalesces = findCoalesces();
- }
-
- // Construct a PBQP solver for this problem
- PBQP::Graph problem;
- problemNodes.resize(vregIntervalsToAlloc.size());
-
- // Resize allowedSets container appropriately.
- allowedSets.resize(vregIntervalsToAlloc.size());
-
- BitVector ReservedRegs = tri->getReservedRegs(*mf);
-
- // Iterate over virtual register intervals to compute allowed sets...
- for (unsigned node = 0; node < node2LI.size(); ++node) {
-
- // Grab pointers to the interval and its register class.
- const LiveInterval *li = node2LI[node];
- const TargetRegisterClass *liRC = mri->getRegClass(li->reg);
+std::auto_ptr<PBQPRAProblem> PBQPBuilderWithCoalescing::build(
+ MachineFunction *mf,
+ const LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo,
+ const RegSet &vregs) {
- // Start by assuming all allocable registers in the class are allowed...
- RegVector liAllowed;
- TargetRegisterClass::iterator aob = liRC->allocation_order_begin(*mf);
- TargetRegisterClass::iterator aoe = liRC->allocation_order_end(*mf);
- for (TargetRegisterClass::iterator it = aob; it != aoe; ++it)
- if (!ReservedRegs.test(*it))
- liAllowed.push_back(*it);
+ std::auto_ptr<PBQPRAProblem> p = PBQPBuilder::build(mf, lis, loopInfo, vregs);
+ PBQP::Graph &g = p->getGraph();
- // Eliminate the physical registers which overlap with this range, along
- // with all their aliases.
- for (LIVector::iterator pItr = physIntervals.begin(),
- pEnd = physIntervals.end(); pItr != pEnd; ++pItr) {
+ const TargetMachine &tm = mf->getTarget();
+ CoalescerPair cp(*tm.getInstrInfo(), *tm.getRegisterInfo());
- if (!li->overlaps(**pItr))
- continue;
+ // Scan the machine function and add a coalescing cost whenever CoalescerPair
+ // gives the Ok.
+ for (MachineFunction::const_iterator mbbItr = mf->begin(),
+ mbbEnd = mf->end();
+ mbbItr != mbbEnd; ++mbbItr) {
+ const MachineBasicBlock *mbb = &*mbbItr;
- unsigned pReg = (*pItr)->reg;
-
- // If we get here then the live intervals overlap, but we're still ok
- // if they're coalescable.
- if (coalesces.find(RegPair(li->reg, pReg)) != coalesces.end())
- continue;
+ for (MachineBasicBlock::const_iterator miItr = mbb->begin(),
+ miEnd = mbb->end();
+ miItr != miEnd; ++miItr) {
+ const MachineInstr *mi = &*miItr;
- // If we get here then we have a genuine exclusion.
+ if (!cp.setRegisters(mi)) {
+ continue; // Not coalescable.
+ }
- // Remove the overlapping reg...
- RegVector::iterator eraseItr =
- std::find(liAllowed.begin(), liAllowed.end(), pReg);
+ if (cp.getSrcReg() == cp.getDstReg()) {
+ continue; // Already coalesced.
+ }
- if (eraseItr != liAllowed.end())
- liAllowed.erase(eraseItr);
+ unsigned dst = cp.getDstReg(),
+ src = cp.getSrcReg();
- const unsigned *aliasItr = tri->getAliasSet(pReg);
+ const float copyFactor = 0.5; // Cost of copy relative to load. Current
+ // value plucked randomly out of the air.
+
+ PBQP::PBQPNum cBenefit =
+ copyFactor * LiveIntervals::getSpillWeight(false, true,
+ loopInfo->getLoopDepth(mbb));
- if (aliasItr != 0) {
- // ...and its aliases.
- for (; *aliasItr != 0; ++aliasItr) {
- RegVector::iterator eraseItr =
- std::find(liAllowed.begin(), liAllowed.end(), *aliasItr);
+ if (cp.isPhys()) {
+ if (!lis->isAllocatable(dst)) {
+ continue;
+ }
- if (eraseItr != liAllowed.end()) {
- liAllowed.erase(eraseItr);
+ const PBQPRAProblem::AllowedSet &allowed = p->getAllowedSet(src);
+ unsigned pregOpt = 0;
+ while (pregOpt < allowed.size() && allowed[pregOpt] != dst) {
+ ++pregOpt;
+ }
+ if (pregOpt < allowed.size()) {
+ ++pregOpt; // +1 to account for spill option.
+ PBQP::Graph::NodeItr node = p->getNodeForVReg(src);
+ addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit);
+ }
+ } else {
+ const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst);
+ const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src);
+ PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst);
+ PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src);
+ PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2);
+ if (edge == g.edgesEnd()) {
+ edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1,
+ allowed2->size() + 1,
+ 0));
+ } else {
+ if (g.getEdgeNode1(edge) == node2) {
+ std::swap(node1, node2);
+ std::swap(allowed1, allowed2);
}
}
+
+ addVirtRegCoalesce(g.getEdgeCosts(edge), *allowed1, *allowed2,
+ cBenefit);
}
}
+ }
- // Copy the allowed set into a member vector for use when constructing cost
- // vectors & matrices, and mapping PBQP solutions back to assignments.
- allowedSets[node] = AllowedSet(liAllowed.begin(), liAllowed.end());
+ return p;
+}
- // Set the spill cost to the interval weight, or epsilon if the
- // interval weight is zero
- PBQP::PBQPNum spillCost = (li->weight != 0.0) ?
- li->weight : std::numeric_limits<PBQP::PBQPNum>::min();
+void PBQPBuilderWithCoalescing::addPhysRegCoalesce(PBQP::Vector &costVec,
+ unsigned pregOption,
+ PBQP::PBQPNum benefit) {
+ costVec[pregOption] += -benefit;
+}
- // Build a cost vector for this interval.
- problemNodes[node] =
- problem.addNode(
- buildCostVector(li->reg, allowedSets[node], coalesces, spillCost));
+void PBQPBuilderWithCoalescing::addVirtRegCoalesce(
+ PBQP::Matrix &costMat,
+ const PBQPRAProblem::AllowedSet &vr1Allowed,
+ const PBQPRAProblem::AllowedSet &vr2Allowed,
+ PBQP::PBQPNum benefit) {
- }
+ assert(costMat.getRows() == vr1Allowed.size() + 1 && "Size mismatch.");
+ assert(costMat.getCols() == vr2Allowed.size() + 1 && "Size mismatch.");
+ for (unsigned i = 0; i != vr1Allowed.size(); ++i) {
+ unsigned preg1 = vr1Allowed[i];
+ for (unsigned j = 0; j != vr2Allowed.size(); ++j) {
+ unsigned preg2 = vr2Allowed[j];
+
+ if (preg1 == preg2) {
+ costMat[i + 1][j + 1] += -benefit;
+ }
+ }
+ }
+}
- // Now add the cost matrices...
- for (unsigned node1 = 0; node1 < node2LI.size(); ++node1) {
- const LiveInterval *li = node2LI[node1];
- // Test for live range overlaps and insert interference matrices.
- for (unsigned node2 = node1 + 1; node2 < node2LI.size(); ++node2) {
- const LiveInterval *li2 = node2LI[node2];
+void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<SlotIndexes>();
+ au.addPreserved<SlotIndexes>();
+ au.addRequired<LiveIntervals>();
+ //au.addRequiredID(SplitCriticalEdgesID);
+ au.addRequired<RegisterCoalescer>();
+ au.addRequired<CalculateSpillWeights>();
+ au.addRequired<LiveStacks>();
+ au.addPreserved<LiveStacks>();
+ au.addRequired<MachineLoopInfo>();
+ au.addPreserved<MachineLoopInfo>();
+ if (pbqpPreSplitting)
+ au.addRequired<LoopSplitter>();
+ au.addRequired<VirtRegMap>();
+ au.addRequired<RenderMachineFunction>();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
- CoalesceMap::const_iterator cmItr =
- coalesces.find(RegPair(li->reg, li2->reg));
+void RegAllocPBQP::findVRegIntervalsToAlloc() {
- PBQP::Matrix *m = 0;
+ // Iterate over all live ranges.
+ for (LiveIntervals::iterator itr = lis->begin(), end = lis->end();
+ itr != end; ++itr) {
- if (cmItr != coalesces.end()) {
- m = buildCoalescingMatrix(allowedSets[node1], allowedSets[node2],
- cmItr->second);
- }
- else if (li->overlaps(*li2)) {
- m = buildInterferenceMatrix(allowedSets[node1], allowedSets[node2]);
- }
+ // Ignore physical ones.
+ if (TargetRegisterInfo::isPhysicalRegister(itr->first))
+ continue;
- if (m != 0) {
- problem.addEdge(problemNodes[node1],
- problemNodes[node2],
- *m);
+ LiveInterval *li = itr->second;
- delete m;
- }
+ // If this live interval is non-empty we will use pbqp to allocate it.
+ // Empty intervals we allocate in a simple post-processing stage in
+ // finalizeAlloc.
+ if (!li->empty()) {
+ vregsToAlloc.insert(li->reg);
+ } else {
+ emptyIntervalVRegs.insert(li->reg);
}
}
-
- assert(problem.getNumNodes() == allowedSets.size());
-/*
- std::cerr << "Allocating for " << problem.getNumNodes() << " nodes, "
- << problem.getNumEdges() << " edges.\n";
-
- problem.printDot(std::cerr);
-*/
- // We're done, PBQP problem constructed - return it.
- return problem;
}
-void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
+void RegAllocPBQP::addStackInterval(const LiveInterval *spilled,
MachineRegisterInfo* mri) {
int stackSlot = vrm->getStackSlot(spilled->reg);
- if (stackSlot == VirtRegMap::NO_STACK_SLOT)
+ if (stackSlot == VirtRegMap::NO_STACK_SLOT) {
return;
+ }
const TargetRegisterClass *RC = mri->getRegClass(spilled->reg);
LiveInterval &stackInterval = lss->getOrCreateInterval(stackSlot, RC);
VNInfo *vni;
- if (stackInterval.getNumValNums() != 0)
+ if (stackInterval.getNumValNums() != 0) {
vni = stackInterval.getValNumInfo(0);
- else
+ } else {
vni = stackInterval.getNextValue(
- SlotIndex(), 0, false, lss->getVNInfoAllocator());
+ SlotIndex(), 0, lss->getVNInfoAllocator());
+ }
LiveInterval &rhsInterval = lis->getInterval(spilled->reg);
stackInterval.MergeRangesInAsValue(rhsInterval, vni);
}
-bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
-
+bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem,
+ const PBQP::Solution &solution) {
// Set to true if we have any spills
bool anotherRoundNeeded = false;
// Clear the existing allocation.
vrm->clearAllVirt();
- // Iterate over the nodes mapping the PBQP solution to a register assignment.
- for (unsigned node = 0; node < node2LI.size(); ++node) {
- unsigned virtReg = node2LI[node]->reg,
- allocSelection = solution.getSelection(problemNodes[node]);
-
-
- // If the PBQP solution is non-zero it's a physical register...
- if (allocSelection != 0) {
- // Get the physical reg, subtracting 1 to account for the spill option.
- unsigned physReg = allowedSets[node][allocSelection - 1];
-
- DEBUG(dbgs() << "VREG " << virtReg << " -> "
- << tri->getName(physReg) << "\n");
-
- assert(physReg != 0);
-
- // Add to the virt reg map and update the used phys regs.
- vrm->assignVirt2Phys(virtReg, physReg);
- }
- // ...Otherwise it's a spill.
- else {
-
- // Make sure we ignore this virtual reg on the next round
- // of allocation
- vregIntervalsToAlloc.erase(&lis->getInterval(virtReg));
-
- // Insert spill ranges for this live range
- const LiveInterval *spillInterval = node2LI[node];
- double oldSpillWeight = spillInterval->weight;
+ const PBQP::Graph &g = problem.getGraph();
+ // Iterate over the nodes mapping the PBQP solution to a register
+ // assignment.
+ for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(),
+ nodeEnd = g.nodesEnd();
+ node != nodeEnd; ++node) {
+ unsigned vreg = problem.getVRegForNode(node);
+ unsigned alloc = solution.getSelection(node);
+
+ if (problem.isPRegOption(vreg, alloc)) {
+ unsigned preg = problem.getPRegForOption(vreg, alloc);
+ DEBUG(dbgs() << "VREG " << vreg << " -> " << tri->getName(preg) << "\n");
+ assert(preg != 0 && "Invalid preg selected.");
+ vrm->assignVirt2Phys(vreg, preg);
+ } else if (problem.isSpillOption(vreg, alloc)) {
+ vregsToAlloc.erase(vreg);
+ const LiveInterval* spillInterval = &lis->getInterval(vreg);
+ double oldWeight = spillInterval->weight;
SmallVector<LiveInterval*, 8> spillIs;
rmf->rememberUseDefs(spillInterval);
std::vector<LiveInterval*> newSpills =
@@ -768,42 +541,42 @@ bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
addStackInterval(spillInterval, mri);
rmf->rememberSpills(spillInterval, newSpills);
- (void) oldSpillWeight;
- DEBUG(dbgs() << "VREG " << virtReg << " -> SPILLED (Cost: "
- << oldSpillWeight << ", New vregs: ");
+ (void) oldWeight;
+ DEBUG(dbgs() << "VREG " << vreg << " -> SPILLED (Cost: "
+ << oldWeight << ", New vregs: ");
// Copy any newly inserted live intervals into the list of regs to
// allocate.
for (std::vector<LiveInterval*>::const_iterator
itr = newSpills.begin(), end = newSpills.end();
itr != end; ++itr) {
-
assert(!(*itr)->empty() && "Empty spill range.");
-
DEBUG(dbgs() << (*itr)->reg << " ");
-
- vregIntervalsToAlloc.insert(*itr);
+ vregsToAlloc.insert((*itr)->reg);
}
DEBUG(dbgs() << ")\n");
// We need another round if spill intervals were added.
anotherRoundNeeded |= !newSpills.empty();
+ } else {
+ assert(false && "Unknown allocation option.");
}
}
return !anotherRoundNeeded;
}
-void PBQPRegAlloc::finalizeAlloc() const {
+
+void RegAllocPBQP::finalizeAlloc() const {
typedef LiveIntervals::iterator LIIterator;
typedef LiveInterval::Ranges::const_iterator LRIterator;
// First allocate registers for the empty intervals.
- for (LiveIntervalSet::const_iterator
- itr = emptyVRegIntervals.begin(), end = emptyVRegIntervals.end();
+ for (RegSet::const_iterator
+ itr = emptyIntervalVRegs.begin(), end = emptyIntervalVRegs.end();
itr != end; ++itr) {
- LiveInterval *li = *itr;
+ LiveInterval *li = &lis->getInterval(*itr);
unsigned physReg = vrm->getRegAllocPref(li->reg);
@@ -828,11 +601,9 @@ void PBQPRegAlloc::finalizeAlloc() const {
// Get the physical register for this interval
if (TargetRegisterInfo::isPhysicalRegister(li->reg)) {
reg = li->reg;
- }
- else if (vrm->isAssignedReg(li->reg)) {
+ } else if (vrm->isAssignedReg(li->reg)) {
reg = vrm->getPhys(li->reg);
- }
- else {
+ } else {
// Ranges which are assigned a stack slot only are ignored.
continue;
}
@@ -849,7 +620,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
// Find the set of basic blocks which this range is live into...
if (lis->findLiveInMBBs(lrItr->start, lrItr->end, liveInMBBs)) {
// And add the physreg for this interval to their live-in sets.
- for (unsigned i = 0; i < liveInMBBs.size(); ++i) {
+ for (unsigned i = 0; i != liveInMBBs.size(); ++i) {
if (liveInMBBs[i] != entryMBB) {
if (!liveInMBBs[i]->isLiveIn(reg)) {
liveInMBBs[i]->addLiveIn(reg);
@@ -863,7 +634,7 @@ void PBQPRegAlloc::finalizeAlloc() const {
}
-bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
+bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) {
mf = &MF;
tm = &mf->getTarget();
@@ -894,7 +665,7 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
findVRegIntervalsToAlloc();
// If there are non-empty intervals allocate them using pbqp.
- if (!vregIntervalsToAlloc.empty()) {
+ if (!vregsToAlloc.empty()) {
bool pbqpAllocComplete = false;
unsigned round = 0;
@@ -902,11 +673,13 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
while (!pbqpAllocComplete) {
DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n");
- PBQP::Graph problem = constructPBQPProblem();
+ std::auto_ptr<PBQPRAProblem> problem =
+ builder->build(mf, lis, loopInfo, vregsToAlloc);
PBQP::Solution solution =
- PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(problem);
+ PBQP::HeuristicSolver<PBQP::Heuristics::Briggs>::solve(
+ problem->getGraph());
- pbqpAllocComplete = mapPBQPToRegAlloc(solution);
+ pbqpAllocComplete = mapPBQPToRegAlloc(*problem, solution);
++round;
}
@@ -917,12 +690,8 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
rmf->renderMachineFunction("After PBQP register allocation.", vrm);
- vregIntervalsToAlloc.clear();
- emptyVRegIntervals.clear();
- li2Node.clear();
- node2LI.clear();
- allowedSets.clear();
- problemNodes.clear();
+ vregsToAlloc.clear();
+ emptyIntervalVRegs.clear();
DEBUG(dbgs() << "Post alloc VirtRegMap:\n" << *vrm << "\n");
@@ -934,9 +703,18 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
return true;
}
-FunctionPass* llvm::createPBQPRegisterAllocator() {
- return new PBQPRegAlloc();
+FunctionPass* llvm::createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder> builder) {
+ return new RegAllocPBQP(builder);
}
+FunctionPass* llvm::createDefaultPBQPRegisterAllocator() {
+ if (pbqpCoalescing) {
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilderWithCoalescing()));
+ } // else
+ return createPBQPRegisterAllocator(
+ std::auto_ptr<PBQPBuilder>(new PBQPBuilder()));
+}
#undef DEBUG_TYPE
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index 02b5539f0f4f..407559a211a0 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -24,7 +24,8 @@
using namespace llvm;
// Register the RegisterCoalescer interface, providing a nice name to refer to.
-static RegisterAnalysisGroup<RegisterCoalescer> Z("Register Coalescer");
+INITIALIZE_ANALYSIS_GROUP(RegisterCoalescer, "Register Coalescer",
+ SimpleRegisterCoalescing)
char RegisterCoalescer::ID = 0;
// RegisterCoalescer destructor: DO NOT move this to the header file
diff --git a/lib/CodeGen/RenderMachineFunction.cpp b/lib/CodeGen/RenderMachineFunction.cpp
index 93426eecbbc1..cbfd5a23d63d 100644
--- a/lib/CodeGen/RenderMachineFunction.cpp
+++ b/lib/CodeGen/RenderMachineFunction.cpp
@@ -30,9 +30,14 @@
using namespace llvm;
char RenderMachineFunction::ID = 0;
-INITIALIZE_PASS(RenderMachineFunction, "rendermf",
+INITIALIZE_PASS_BEGIN(RenderMachineFunction, "rendermf",
"Render machine functions (and related info) to HTML pages",
- false, false);
+ false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(RenderMachineFunction, "rendermf",
+ "Render machine functions (and related info) to HTML pages",
+ false, false)
static cl::opt<std::string>
outputFileSuffix("rmf-file-suffix",
@@ -458,14 +463,9 @@ namespace llvm {
liItr != liEnd; ++liItr) {
LiveInterval *li = liItr->second;
- const TargetRegisterClass *liTRC;
-
if (TargetRegisterInfo::isPhysicalRegister(li->reg))
continue;
- liTRC = mri->getRegClass(li->reg);
-
-
// For all ranges in the current interal.
for (LiveInterval::iterator lrItr = li->begin(),
lrEnd = li->end();
diff --git a/lib/CodeGen/RenderMachineFunction.h b/lib/CodeGen/RenderMachineFunction.h
index 8d56a8292ac5..85719923c0c6 100644
--- a/lib/CodeGen/RenderMachineFunction.h
+++ b/lib/CodeGen/RenderMachineFunction.h
@@ -202,7 +202,9 @@ namespace llvm {
public:
static char ID;
- RenderMachineFunction() : MachineFunctionPass(ID) {}
+ RenderMachineFunction() : MachineFunctionPass(ID) {
+ initializeRenderMachineFunctionPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &au) const;
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 7d39dc496afe..3388889c9e91 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "pre-RA-sched"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
+#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -33,6 +34,12 @@ ScheduleDAG::ScheduleDAG(MachineFunction &mf)
ScheduleDAG::~ScheduleDAG() {}
+/// getInstrDesc helper to handle SDNodes.
+const TargetInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const {
+ if (!Node || !Node->isMachineOpcode()) return NULL;
+ return &TII->get(Node->getMachineOpcode());
+}
+
/// dump - dump the schedule.
void ScheduleDAG::dumpSchedule() const {
for (unsigned i = 0, e = Sequence.size(); i != e; i++) {
@@ -68,12 +75,12 @@ void ScheduleDAG::Run(MachineBasicBlock *bb,
/// addPred - This adds the specified edge as a pred of the current node if
/// not already. It also adds the current node as a successor of the
/// specified node.
-void SUnit::addPred(const SDep &D) {
+bool SUnit::addPred(const SDep &D) {
// If this node already has this depenence, don't add a redundant one.
for (SmallVector<SDep, 4>::const_iterator I = Preds.begin(), E = Preds.end();
I != E; ++I)
if (*I == D)
- return;
+ return false;
// Now add a corresponding succ to N.
SDep P = D;
P.setSUnit(this);
@@ -99,6 +106,7 @@ void SUnit::addPred(const SDep &D) {
this->setDepthDirty();
N->setHeightDirty();
}
+ return true;
}
/// removePred - This removes the specified edge as a pred of the current
@@ -278,6 +286,7 @@ void SUnit::dumpAll(const ScheduleDAG *G) const {
dbgs() << " # preds left : " << NumPredsLeft << "\n";
dbgs() << " # succs left : " << NumSuccsLeft << "\n";
+ dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n";
dbgs() << " Latency : " << Latency << "\n";
dbgs() << " Depth : " << Depth << "\n";
dbgs() << " Height : " << Height << "\n";
@@ -492,7 +501,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) {
/// all nodes affected by the edge insertion. These nodes will later get new
/// topological indexes by means of the Shift method.
void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound,
- bool& HasLoop) {
+ bool &HasLoop) {
std::vector<const SUnit*> WorkList;
WorkList.reserve(SUnits.size());
diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp
index 0a2fb3796a42..6b7a8c6491bd 100644
--- a/lib/CodeGen/ScheduleDAGEmit.cpp
+++ b/lib/CodeGen/ScheduleDAGEmit.cpp
@@ -57,7 +57,7 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
assert(I->getReg() && "Unknown physical register!");
unsigned VRBase = MRI.createVirtualRegister(SU->CopyDstRC);
bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase)
.addReg(I->getReg());
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index ea93dd5c6663..f17023eabb72 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -16,6 +16,7 @@
#include "ScheduleDAGInstrs.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -32,9 +33,9 @@ using namespace llvm;
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
const MachineLoopInfo &mli,
const MachineDominatorTree &mdt)
- : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()),
- Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
- MFI = mf.getFrameInfo();
+ : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
+ InstrItins(mf.getTarget().getInstrItineraryData()),
+ Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
DbgValueVec.clear();
}
@@ -78,12 +79,12 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
} while (1);
}
-/// getUnderlyingObject - This is a wrapper around Value::getUnderlyingObject
+/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject
/// and adds support for basic ptrtoint+arithmetic+inttoptr sequences.
static const Value *getUnderlyingObject(const Value *V) {
// First just call Value::getUnderlyingObject to let it do what it does.
do {
- V = V->getUnderlyingObject();
+ V = GetUnderlyingObject(V);
// If it found an inttoptr, use special code to continue climing.
if (Operator::getOpcode(V) != Instruction::IntToPtr)
break;
@@ -141,6 +142,46 @@ void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
}
}
+/// AddSchedBarrierDeps - Add dependencies from instructions in the current
+/// list of instructions being scheduled to scheduling barrier by adding
+/// the exit SU to the register defs and use list. This is because we want to
+/// make sure instructions which define registers that are either used by
+/// the terminator or are live-out are properly scheduled. This is
+/// especially important when the definition latency of the return value(s)
+/// are too high to be hidden by the branch or when the liveout registers
+/// used by instructions in the fallthrough block.
+void ScheduleDAGInstrs::AddSchedBarrierDeps() {
+ MachineInstr *ExitMI = InsertPos != BB->end() ? &*InsertPos : 0;
+ ExitSU.setInstr(ExitMI);
+ bool AllDepKnown = ExitMI &&
+ (ExitMI->getDesc().isCall() || ExitMI->getDesc().isBarrier());
+ if (ExitMI && AllDepKnown) {
+ // If it's a call or a barrier, add dependencies on the defs and uses of
+ // instruction.
+ for (unsigned i = 0, e = ExitMI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = ExitMI->getOperand(i);
+ if (!MO.isReg() || MO.isDef()) continue;
+ unsigned Reg = MO.getReg();
+ if (Reg == 0) continue;
+
+ assert(TRI->isPhysicalRegister(Reg) && "Virtual register encountered!");
+ Uses[Reg].push_back(&ExitSU);
+ }
+ } else {
+ // For others, e.g. fallthrough, conditional branch, assume the exit
+ // uses all the registers that are livein to the successor blocks.
+ SmallSet<unsigned, 8> Seen;
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(),
+ E = (*SI)->livein_end(); I != E; ++I) {
+ unsigned Reg = *I;
+ if (Seen.insert(Reg))
+ Uses[Reg].push_back(&ExitSU);
+ }
+ }
+}
+
void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// We'll be allocating one SUnit for each instruction, plus one for
// the region exit node.
@@ -175,6 +216,10 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// without emitting the info from the previous call.
DbgValueVec.clear();
+ // Model data dependencies between instructions being scheduled and the
+ // ExitSU.
+ AddSchedBarrierDeps();
+
// Walk the list of instructions, from bottom moving up.
for (MachineBasicBlock::iterator MII = InsertPos, MIE = Begin;
MII != MIE; --MII) {
@@ -194,6 +239,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
"Cannot schedule terminators or labels!");
// Create the SUnit for this MI.
SUnit *SU = NewSUnit(MI);
+ SU->isCall = TID.isCall();
+ SU->isCommutable = TID.isCommutable();
// Assign the Latency field of SU using target-provided information.
if (UnitLatencies)
@@ -228,6 +275,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
unsigned AOLatency = (Kind == SDep::Anti) ? 0 : 1;
for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
SUnit *DefSU = DefList[i];
+ if (DefSU == &ExitSU)
+ continue;
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(Reg)))
@@ -237,6 +286,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
std::vector<SUnit *> &DefList = Defs[*Alias];
for (unsigned i = 0, e = DefList.size(); i != e; ++i) {
SUnit *DefSU = DefList[i];
+ if (DefSU == &ExitSU)
+ continue;
if (DefSU != SU &&
(Kind != SDep::Output || !MO.isDead() ||
!DefSU->getInstr()->registerDefIsDead(*Alias)))
@@ -258,12 +309,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// TODO: Perhaps we should get rid of
// SpecialAddressLatency and just move this into
// adjustSchedDependency for the targets that care about it.
- if (SpecialAddressLatency != 0 && !UnitLatencies) {
+ if (SpecialAddressLatency != 0 && !UnitLatencies &&
+ UseSU != &ExitSU) {
MachineInstr *UseMI = UseSU->getInstr();
const TargetInstrDesc &UseTID = UseMI->getDesc();
int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
- if ((UseTID.mayLoad() || UseTID.mayStore()) &&
+ if (RegUseIndex >= 0 &&
+ (UseTID.mayLoad() || UseTID.mayStore()) &&
(unsigned)RegUseIndex < UseTID.getNumOperands() &&
UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
LDataLatency += SpecialAddressLatency;
@@ -357,7 +410,7 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// produce more precise dependence information.
#define STORE_LOAD_LATENCY 1
unsigned TrueMemOrderLatency = 0;
- if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
+ if (TID.isCall() || MI->hasUnmodeledSideEffects() ||
(MI->hasVolatileMemoryRef() &&
(!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
// Be conservative with these and add dependencies on all memory
@@ -446,6 +499,14 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// Treat all other stores conservatively.
goto new_alias_chain;
}
+
+ if (!ExitSU.isPred(SU))
+ // Push store's up a bit to avoid them getting in between cmp
+ // and branches.
+ ExitSU.addPred(SDep(SU, SDep::Order, 0,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
} else if (TID.mayLoad()) {
bool MayAlias = true;
TrueMemOrderLatency = 0;
@@ -498,23 +559,22 @@ void ScheduleDAGInstrs::FinishBlock() {
}
void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
-
// Compute the latency for the node.
- SU->Latency =
- InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
+ if (!InstrItins || InstrItins->isEmpty()) {
+ SU->Latency = 1;
- // Simplistic target-independent heuristic: assume that loads take
- // extra time.
- if (InstrItins.isEmpty())
+ // Simplistic target-independent heuristic: assume that loads take
+ // extra time.
if (SU->getInstr()->getDesc().mayLoad())
SU->Latency += 2;
+ } else {
+ SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
+ }
}
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
SDep& dep) const {
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty())
+ if (!InstrItins || InstrItins->isEmpty())
return;
// For a data dependency with a known register...
@@ -528,14 +588,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
MachineInstr *DefMI = Def->getInstr();
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
if (DefIdx != -1) {
- int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
- DefIdx);
- if (DefCycle >= 0) {
- MachineInstr *UseMI = Use->getInstr();
- const unsigned UseClass = UseMI->getDesc().getSchedClass();
-
- // For all uses of the register, calculate the maxmimum latency
- int Latency = -1;
+ const MachineOperand &MO = DefMI->getOperand(DefIdx);
+ if (MO.isReg() && MO.isImplicit() &&
+ DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
+ // This is an implicit def, getOperandLatency() won't return the correct
+ // latency. e.g.
+ // %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
+ // %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
+ // What we want is to compute latency between def of %D6/%D7 and use of
+ // %Q3 instead.
+ DefIdx = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
+ }
+ MachineInstr *UseMI = Use->getInstr();
+ // For all uses of the register, calculate the maxmimum latency
+ int Latency = -1;
+ if (UseMI) {
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = UseMI->getOperand(i);
if (!MO.isReg() || !MO.isUse())
@@ -544,15 +611,21 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
if (MOReg != Reg)
continue;
- int UseCycle = InstrItins.getOperandCycle(UseClass, i);
- if (UseCycle >= 0)
- Latency = std::max(Latency, DefCycle - UseCycle + 1);
+ int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
+ UseMI, i);
+ Latency = std::max(Latency, UseCycle);
}
-
- // If we found a latency, then replace the existing dependence latency.
- if (Latency >= 0)
- dep.setLatency(Latency);
+ } else {
+ // UseMI is null, then it must be a scheduling barrier.
+ if (!InstrItins || InstrItins->isEmpty())
+ return;
+ unsigned DefClass = DefMI->getDesc().getSchedClass();
+ Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
}
+
+ // If we found a latency, then replace the existing dependence latency.
+ if (Latency >= 0)
+ dep.setLatency(Latency);
}
}
diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h
index c8f543f7146d..c878287d9c8c 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.h
+++ b/lib/CodeGen/ScheduleDAGInstrs.h
@@ -101,6 +101,7 @@ namespace llvm {
const MachineLoopInfo &MLI;
const MachineDominatorTree &MDT;
const MachineFrameInfo *MFI;
+ const InstrItineraryData *InstrItins;
/// Defs, Uses - Remember where defs and uses of each physical register
/// are as we iterate upward through the instructions. This is allocated
@@ -163,6 +164,15 @@ namespace llvm {
/// input.
virtual void BuildSchedGraph(AliasAnalysis *AA);
+ /// AddSchedBarrierDeps - Add dependencies from instructions in the current
+ /// list of instructions being scheduled to scheduling barrier. We want to
+ /// make sure instructions which define registers that are either used by
+ /// the terminator or are live-out are properly scheduled. This is
+ /// especially important when the definition latency of the return value(s)
+ /// are too high to be hidden by the branch or when the liveout registers
+ /// used by instructions in the fallthrough block.
+ void AddSchedBarrierDeps();
+
/// ComputeLatency - Compute node latency.
///
virtual void ComputeLatency(SUnit *SU);
diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
index cbde2b01eeaf..e6d7ded8a784 100644
--- a/lib/CodeGen/PostRAHazardRecognizer.cpp
+++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp
@@ -1,4 +1,4 @@
-//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===//
+//===----- ScoreboardHazardRecognizer.cpp - Scheduler Support -------------===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,56 +7,81 @@
//
//===----------------------------------------------------------------------===//
//
-// This implements a hazard recognizer using the instructions itineraries
-// defined for the current target.
+// This file implements the ScoreboardHazardRecognizer class, which
+// encapsultes hazard-avoidance heuristics for scheduling, based on the
+// scheduling itineraries specified for the target.
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "post-RA-sched"
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#define DEBUG_TYPE ::llvm::ScoreboardHazardRecognizer::DebugType
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/ScheduleDAG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetInstrItineraries.h"
using namespace llvm;
-PostRAHazardRecognizer::
-PostRAHazardRecognizer(const InstrItineraryData &LItinData) :
- ScheduleHazardRecognizer(), ItinData(LItinData) {
+#ifndef NDEBUG
+const char *ScoreboardHazardRecognizer::DebugType = "";
+#endif
+
+ScoreboardHazardRecognizer::
+ScoreboardHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *SchedDAG,
+ const char *ParentDebugType) :
+ ScheduleHazardRecognizer(), ItinData(II), DAG(SchedDAG), IssueWidth(0),
+ IssueCount(0) {
+
+#ifndef NDEBUG
+ DebugType = ParentDebugType;
+#endif
+
// Determine the maximum depth of any itinerary. This determines the
// depth of the scoreboard. We always make the scoreboard at least 1
// cycle deep to avoid dealing with the boundary condition.
unsigned ScoreboardDepth = 1;
- if (!ItinData.isEmpty()) {
+ if (ItinData && !ItinData->isEmpty()) {
+ IssueWidth = ItinData->IssueWidth;
+
for (unsigned idx = 0; ; ++idx) {
- if (ItinData.isEndMarker(idx))
+ if (ItinData->isEndMarker(idx))
break;
- const InstrStage *IS = ItinData.beginStage(idx);
- const InstrStage *E = ItinData.endStage(idx);
+ const InstrStage *IS = ItinData->beginStage(idx);
+ const InstrStage *E = ItinData->endStage(idx);
+ unsigned CurCycle = 0;
unsigned ItinDepth = 0;
- for (; IS != E; ++IS)
- ItinDepth += IS->getCycles();
+ for (; IS != E; ++IS) {
+ unsigned StageDepth = CurCycle + IS->getCycles();
+ if (ItinDepth < StageDepth) ItinDepth = StageDepth;
+ CurCycle += IS->getNextCycles();
+ }
- ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
+ // Find the next power-of-2 >= ItinDepth
+ while (ItinDepth > ScoreboardDepth) {
+ ScoreboardDepth *= 2;
+ }
}
+ MaxLookAhead = ScoreboardDepth;
}
ReservedScoreboard.reset(ScoreboardDepth);
RequiredScoreboard.reset(ScoreboardDepth);
- DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = "
+ DEBUG(dbgs() << "Using scoreboard hazard recognizer: Depth = "
<< ScoreboardDepth << '\n');
}
-void PostRAHazardRecognizer::Reset() {
+void ScoreboardHazardRecognizer::Reset() {
+ IssueCount = 0;
RequiredScoreboard.reset();
ReservedScoreboard.reset();
}
-void PostRAHazardRecognizer::ScoreBoard::dump() const {
+void ScoreboardHazardRecognizer::Scoreboard::dump() const {
dbgs() << "Scoreboard:\n";
unsigned last = Depth - 1;
@@ -72,24 +97,46 @@ void PostRAHazardRecognizer::ScoreBoard::dump() const {
}
}
+bool ScoreboardHazardRecognizer::atIssueLimit() const {
+ if (IssueWidth == 0)
+ return false;
+
+ return IssueCount == IssueWidth;
+}
+
ScheduleHazardRecognizer::HazardType
-PostRAHazardRecognizer::getHazardType(SUnit *SU) {
- if (ItinData.isEmpty())
+ScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
+ if (!ItinData || ItinData->isEmpty())
return NoHazard;
- unsigned cycle = 0;
+ // Note that stalls will be negative for bottom-up scheduling.
+ int cycle = Stalls;
// Use the itinerary for the underlying instruction to check for
// free FU's in the scoreboard at the appropriate future cycles.
- unsigned idx = SU->getInstr()->getDesc().getSchedClass();
- for (const InstrStage *IS = ItinData.beginStage(idx),
- *E = ItinData.endStage(idx); IS != E; ++IS) {
+
+ const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+ if (TID == NULL) {
+ // Don't check hazards for non-machineinstr Nodes.
+ return NoHazard;
+ }
+ unsigned idx = TID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
// We must find one of the stage's units free for every cycle the
// stage is occupied. FIXME it would be more accurate to find the
// same unit free in all the cycles.
for (unsigned int i = 0; i < IS->getCycles(); ++i) {
- assert(((cycle + i) < RequiredScoreboard.getDepth()) &&
- "Scoreboard depth exceeded!");
+ int StageCycle = cycle + (int)i;
+ if (StageCycle < 0)
+ continue;
+
+ if (StageCycle >= (int)RequiredScoreboard.getDepth()) {
+ assert((StageCycle - Stalls) < (int)RequiredScoreboard.getDepth() &&
+ "Scoreboard depth exceeded!");
+ // This stage was stalled beyond pipeline depth, so cannot conflict.
+ break;
+ }
unsigned freeUnits = IS->getUnits();
switch (IS->getReservationKind()) {
@@ -97,18 +144,18 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) {
assert(0 && "Invalid FU reservation");
case InstrStage::Required:
// Required FUs conflict with both reserved and required ones
- freeUnits &= ~ReservedScoreboard[cycle + i];
+ freeUnits &= ~ReservedScoreboard[StageCycle];
// FALLTHROUGH
case InstrStage::Reserved:
// Reserved FUs can conflict only with required ones.
- freeUnits &= ~RequiredScoreboard[cycle + i];
+ freeUnits &= ~RequiredScoreboard[StageCycle];
break;
}
if (!freeUnits) {
DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", ");
DEBUG(dbgs() << "SU(" << SU->NodeNum << "): ");
- DEBUG(SU->getInstr()->dump());
+ DEBUG(DAG->dumpNode(SU));
return Hazard;
}
}
@@ -120,17 +167,24 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) {
return NoHazard;
}
-void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
- if (ItinData.isEmpty())
+void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
+ if (!ItinData || ItinData->isEmpty())
return;
- unsigned cycle = 0;
-
// Use the itinerary for the underlying instruction to reserve FU's
// in the scoreboard at the appropriate future cycles.
- unsigned idx = SU->getInstr()->getDesc().getSchedClass();
- for (const InstrStage *IS = ItinData.beginStage(idx),
- *E = ItinData.endStage(idx); IS != E; ++IS) {
+ const TargetInstrDesc *TID = DAG->getInstrDesc(SU);
+ assert(TID && "The scheduler must filter non-machineinstrs");
+ if (DAG->TII->isZeroCost(TID->Opcode))
+ return;
+
+ ++IssueCount;
+
+ unsigned cycle = 0;
+
+ unsigned idx = TID->getSchedClass();
+ for (const InstrStage *IS = ItinData->beginStage(idx),
+ *E = ItinData->endStage(idx); IS != E; ++IS) {
// We must reserve one of the stage's units for every cycle the
// stage is occupied. FIXME it would be more accurate to reserve
// the same unit free in all the cycles.
@@ -174,7 +228,16 @@ void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
DEBUG(RequiredScoreboard.dump());
}
-void PostRAHazardRecognizer::AdvanceCycle() {
+void ScoreboardHazardRecognizer::AdvanceCycle() {
+ IssueCount = 0;
ReservedScoreboard[0] = 0; ReservedScoreboard.advance();
RequiredScoreboard[0] = 0; RequiredScoreboard.advance();
}
+
+void ScoreboardHazardRecognizer::RecedeCycle() {
+ IssueCount = 0;
+ ReservedScoreboard[ReservedScoreboard.getDepth()-1] = 0;
+ ReservedScoreboard.recede();
+ RequiredScoreboard[RequiredScoreboard.getDepth()-1] = 0;
+ RequiredScoreboard.recede();
+}
diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt
index 799988a4c862..15932c03a190 100644
--- a/lib/CodeGen/SelectionDAG/CMakeLists.txt
+++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt
@@ -21,5 +21,3 @@ add_llvm_library(LLVMSelectionDAG
TargetLowering.cpp
TargetSelectionDAGInfo.cpp
)
-
-target_link_libraries (LLVMSelectionDAG LLVMAnalysis LLVMAsmPrinter LLVMCodeGen)
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index c9c4d91e9736..90356021f602 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -25,7 +25,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
@@ -43,6 +42,7 @@ STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
+STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int");
namespace {
static cl::opt<bool>
@@ -185,7 +185,7 @@ namespace {
SDValue visitANY_EXTEND(SDNode *N);
SDValue visitSIGN_EXTEND_INREG(SDNode *N);
SDValue visitTRUNCATE(SDNode *N);
- SDValue visitBIT_CONVERT(SDNode *N);
+ SDValue visitBITCAST(SDNode *N);
SDValue visitBUILD_PAIR(SDNode *N);
SDValue visitFADD(SDNode *N);
SDValue visitFSUB(SDNode *N);
@@ -229,12 +229,13 @@ namespace {
SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
unsigned HiOp);
SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
- SDValue ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *, EVT);
+ SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
SDValue BuildSDIV(SDNode *N);
SDValue BuildUDIV(SDNode *N);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
SDValue ReduceLoadOpStoreWidth(SDNode *N);
+ SDValue TransformFPLoadStorePair(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -248,16 +249,19 @@ namespace {
bool isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2,
const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2) const;
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const;
/// FindAliasInfo - Extracts the relevant alias information from the memory
/// node. Returns true if the operand was a load.
bool FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
const Value *&SrcValue, int &SrcValueOffset,
- unsigned &SrcValueAlignment) const;
+ unsigned &SrcValueAlignment,
+ const MDNode *&TBAAInfo) const;
/// FindBetterChain - Walk up chain skipping non-aliasing memory nodes,
/// looking for a better chain (aliasing node.)
@@ -270,15 +274,15 @@ namespace {
/// Run - runs the dag combiner on all nodes in the work list
void Run(CombineLevel AtLevel);
-
+
SelectionDAG &getDAG() const { return DAG; }
-
+
/// getShiftAmountTy - Returns a type large enough to hold any valid
/// shift amount - before type legalization these can be huge.
EVT getShiftAmountTy() {
return LegalTypes ? TLI.getShiftAmountTy() : TLI.getPointerTy();
}
-
+
/// isTypeLegal - This method returns true if we are running before type
/// legalization or if the specified VT is legal.
bool isTypeLegal(const EVT &VT) {
@@ -631,7 +635,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
// Replace the old value with the new one.
++NodesCombined;
- DEBUG(dbgs() << "\nReplacing.2 ";
+ DEBUG(dbgs() << "\nReplacing.2 ";
TLO.Old.getNode()->dump(&DAG);
dbgs() << "\nWith: ";
TLO.New.getNode()->dump(&DAG);
@@ -666,12 +670,13 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
Replace = true;
- return DAG.getExtLoad(ExtType, PVT, dl,
+ return DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
}
@@ -691,7 +696,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
unsigned ExtOpc =
Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
return DAG.getNode(ExtOpc, dl, PVT, Op);
- }
+ }
}
if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
@@ -889,11 +894,12 @@ bool DAGCombiner::PromoteLoad(SDValue Op) {
LoadSDNode *LD = cast<LoadSDNode>(N);
EVT MemVT = LD->getMemoryVT();
ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
- ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD)
+ ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD
+ : ISD::EXTLOAD)
: LD->getExtensionType();
- SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl,
+ SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
LD->getChain(), LD->getBasePtr(),
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo(),
MemVT, LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
@@ -975,7 +981,7 @@ void DAGCombiner::Run(CombineLevel AtLevel) {
RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
"Node was deleted but visit returned new node!");
- DEBUG(dbgs() << "\nReplacing.3 ";
+ DEBUG(dbgs() << "\nReplacing.3 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
RV.getNode()->dump(&DAG);
@@ -1054,7 +1060,7 @@ SDValue DAGCombiner::visit(SDNode *N) {
case ISD::ANY_EXTEND: return visitANY_EXTEND(N);
case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N);
case ISD::TRUNCATE: return visitTRUNCATE(N);
- case ISD::BIT_CONVERT: return visitBIT_CONVERT(N);
+ case ISD::BITCAST: return visitBITCAST(N);
case ISD::BUILD_PAIR: return visitBUILD_PAIR(N);
case ISD::FADD: return visitFADD(N);
case ISD::FSUB: return visitFSUB(N);
@@ -1225,7 +1231,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
}
}
}
-
+
SDValue Result;
// If we've change things around then replace token factor.
@@ -1424,6 +1430,29 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
N0.getOperand(0).getOperand(1),
N0.getOperand(1)));
+ if (N1.getOpcode() == ISD::AND) {
+ SDValue AndOp0 = N1.getOperand(0);
+ ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1));
+ unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
+ unsigned DestBits = VT.getScalarType().getSizeInBits();
+
+ // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
+ // and similar xforms where the inner op is either ~0 or 0.
+ if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) {
+ DebugLoc DL = N->getDebugLoc();
+ return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
+ }
+ }
+
+ // add (sext i1), X -> sub X, (zext i1)
+ if (N0.getOpcode() == ISD::SIGN_EXTEND &&
+ N0.getOperand(0).getValueType() == MVT::i1 &&
+ !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
+ DebugLoc DL = N->getDebugLoc();
+ SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
+ return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
+ }
+
return SDValue();
}
@@ -1438,7 +1467,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
if (N->hasNUsesOfValue(0, 1))
return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, N0),
DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
// canonicalize constant to RHS.
if (N0C && !N1C)
@@ -1447,7 +1476,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
// fold (addc x, 0) -> x + no carry out
if (N1C && N1C->isNullValue())
return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
@@ -1464,7 +1493,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
(LHSZero & (~RHSZero & Mask)) == (~RHSZero & Mask))
return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1),
DAG.getNode(ISD::CARRY_FALSE,
- N->getDebugLoc(), MVT::Flag));
+ N->getDebugLoc(), MVT::Glue));
}
return SDValue();
@@ -1489,6 +1518,22 @@ SDValue DAGCombiner::visitADDE(SDNode *N) {
return SDValue();
}
+// Since it may not be valid to emit a fold to zero for vector initializers
+// check if we can before folding.
+static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT,
+ SelectionDAG &DAG, bool LegalOperations) {
+ if (!VT.isVector()) {
+ return DAG.getConstant(0, VT);
+ } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) {
+ // Produce a vector of zeros.
+ SDValue El = DAG.getConstant(0, VT.getVectorElementType());
+ std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
+ &Ops[0], Ops.size());
+ }
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSUB(SDNode *N) {
SDValue N0 = N->getOperand(0);
SDValue N1 = N->getOperand(1);
@@ -1503,8 +1548,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
}
// fold (sub x, x) -> 0
+ // FIXME: Refactor this and xor and other similar operations together.
if (N0 == N1)
- return DAG.getConstant(0, N->getValueType(0));
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
// fold (sub c1, c2) -> c1-c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C);
@@ -1515,6 +1561,9 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
// Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
if (N0C && N0C->isAllOnesValue())
return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0);
+ // fold A-(A-B) -> B
+ if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
+ return N1.getOperand(1);
// fold (A+B)-A -> B
if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
return N0.getOperand(1);
@@ -1897,6 +1946,7 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
// fold (mulhs x, 0) -> 0
if (N1C && N1C->isNullValue())
@@ -1910,6 +1960,22 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
+ // If the type twice as wide is legal, transform the mulhs to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
return SDValue();
}
@@ -1918,6 +1984,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
SDValue N1 = N->getOperand(1);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
// fold (mulhu x, 0) -> 0
if (N1C && N1C->isNullValue())
@@ -1929,6 +1996,22 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) {
if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
return DAG.getConstant(0, VT);
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
+ N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
+ N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
+ N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
+ }
+ }
+
return SDValue();
}
@@ -1992,6 +2075,29 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS);
if (Res.getNode()) return Res;
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
return SDValue();
}
@@ -1999,6 +2105,29 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU);
if (Res.getNode()) return Res;
+ EVT VT = N->getValueType(0);
+ DebugLoc DL = N->getDebugLoc();
+
+ // If the type twice as wide is legal, transform the mulhu to a wider multiply
+ // plus a shift.
+ if (VT.isSimple() && !VT.isVector()) {
+ MVT Simple = VT.getSimpleVT();
+ unsigned SimpleSize = Simple.getSizeInBits();
+ EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
+ if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
+ SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
+ SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
+ Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
+ // Compute the high part as N1.
+ Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
+ DAG.getConstant(SimpleSize, getShiftAmountTy()));
+ Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
+ // Compute the low part as N0.
+ Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
+ return CombineTo(N, Lo, Hi);
+ }
+ }
+
return SDValue();
}
@@ -2116,7 +2245,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N0Op0 = N0.getOperand(0);
APInt Mask = ~N1C->getAPIntValue();
- Mask.trunc(N0Op0.getValueSizeInBits());
+ Mask = Mask.trunc(N0Op0.getValueSizeInBits());
if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(),
N0.getValueType(), N0Op0);
@@ -2198,10 +2327,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getPointerInfo(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
@@ -2221,10 +2349,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
BitWidth - MemVT.getScalarType().getSizeInBits())) &&
((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
@@ -2253,18 +2381,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
if (ExtVT == LoadedVT &&
(!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) {
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
-
- SDValue NewLoad =
- DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+
+ SDValue NewLoad =
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
AddToWorkList(N);
CombineTo(LN0, NewLoad, NewLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
-
+
// Do not change the width of a volatile load.
// Do not generate loads of non-round integer types since these can
// be expensive (and would be wrong if the type is not byte sized).
@@ -2288,12 +2416,12 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
}
AddToWorkList(NewPtr.getNode());
-
+
EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
SDValue Load =
- DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(),
+ DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy,
LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo(),
ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
Alignment);
AddToWorkList(N);
@@ -2722,17 +2850,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) {
N01C->getAPIntValue(), VT));
}
// fold (xor x, x) -> 0
- if (N0 == N1) {
- if (!VT.isVector()) {
- return DAG.getConstant(0, VT);
- } else if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)){
- // Produce a vector of zeros.
- SDValue El = DAG.getConstant(0, VT.getVectorElementType());
- std::vector<SDValue> Ops(VT.getVectorNumElements(), El);
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &Ops[0], Ops.size());
- }
- }
+ if (N0 == N1)
+ return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations);
// Simplify: xor (op x...), (op y...) -> (op (xor x, y))
if (N0.getOpcode() == N1.getOpcode()) {
@@ -2810,7 +2929,8 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
LHS->getOperand(1), N->getOperand(1));
// Create the new shift.
- SDValue NewShift = DAG.getNode(N->getOpcode(), LHS->getOperand(0).getDebugLoc(),
+ SDValue NewShift = DAG.getNode(N->getOpcode(),
+ LHS->getOperand(0).getDebugLoc(),
VT, LHS->getOperand(0), N->getOperand(1));
// Create the new binop.
@@ -2850,7 +2970,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT,
DAG.getNode(ISD::TRUNCATE,
@@ -2868,11 +2988,37 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 > OpSizeInBits)
+ if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
+
+ // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
+ // For this to be valid, the second form must not preserve any of the bits
+ // that are shifted out by the inner shift in the first form. This means
+ // the outer shift size must be >= the number of bits added by the ext.
+ // As a corollary, we don't care what kind of ext it is.
+ if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
+ N0.getOpcode() == ISD::ANY_EXTEND ||
+ N0.getOpcode() == ISD::SIGN_EXTEND) &&
+ N0.getOperand(0).getOpcode() == ISD::SHL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ if (c2 >= OpSizeInBits - InnerShiftSize) {
+ if (c1 + c2 >= OpSizeInBits)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT,
+ DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT,
+ N0.getOperand(0)->getOperand(0)),
+ DAG.getConstant(c1 + c2, N1.getValueType()));
+ }
+ }
+
// fold (shl (srl x, c1), c2) -> (shl (and x, (shl -1, c1)), (sub c2, c1)) or
// (srl (and x, (shl -1, c1)), (sub c1, c2))
if (N1C && N0.getOpcode() == ISD::SRL &&
@@ -2973,7 +3119,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N01C && N1C) {
// Determine what the truncate's result bitsize and type would be.
EVT TruncVT =
- EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
+ EVT::getIntegerVT(*DAG.getContext(),
+ OpSizeInBits - N1C->getZExtValue());
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
@@ -3006,7 +3153,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
@@ -3017,6 +3164,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}
+ // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ // if c1 is equal to the number of bits the trunc removes
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() &&
+ N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+ EVT LargeVT = N0.getOperand(0).getValueType();
+ ConstantSDNode *LargeShiftAmt =
+ cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+ if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+ LargeShiftAmt->getZExtValue()) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+ getShiftAmountTy());
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+ N0.getOperand(0).getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+ }
+ }
+
// Simplify, based on bits shifted out of the LHS.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
@@ -3065,12 +3235,33 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
uint64_t c2 = N1C->getZExtValue();
- if (c1 + c2 > OpSizeInBits)
+ if (c1 + c2 >= OpSizeInBits)
return DAG.getConstant(0, VT);
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(c1 + c2, N1.getValueType()));
}
-
+
+ // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
+ if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
+ N0.getOperand(0).getOpcode() == ISD::SRL &&
+ isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
+ uint64_t c1 =
+ cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
+ uint64_t c2 = N1C->getZExtValue();
+ EVT InnerShiftVT = N0.getOperand(0).getValueType();
+ EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
+ uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
+ // This is only valid if the OpSizeInBits + c1 = size of inner shift.
+ if (c1 + OpSizeInBits == InnerShiftSize) {
+ if (c1 + c2 >= InnerShiftSize)
+ return DAG.getConstant(0, VT);
+ return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT,
+ DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT,
+ N0.getOperand(0)->getOperand(0),
+ DAG.getConstant(c1 + c2, ShiftCountVT)));
+ }
+ }
+
// fold (srl (shl x, c), c) -> (and x, cst2)
if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 &&
N0.getValueSizeInBits() <= 64) {
@@ -3078,7 +3269,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(~0ULL >> ShAmt, VT));
}
-
+
// fold (srl (anyextend x), c) -> (anyextend (srl x, c))
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
@@ -3147,7 +3338,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC = TruncC.trunc(TruncVT.getSizeInBits());
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
@@ -3182,7 +3373,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
// brcond i32 %c ...
//
// into
- //
+ //
// %a = ...
// %b = and %a, 2
// %c = setcc eq %b, 0
@@ -3422,7 +3613,7 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
}
if (BothLiveOut)
// Both unextended and extended values are live out. There had better be
- // good a reason for the transformation.
+ // a good reason for the transformation.
return ExtendNodes.size();
}
return true;
@@ -3503,10 +3694,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3547,10 +3737,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -3611,7 +3801,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
N0.getOperand(0), N0.getOperand(1),
cast<CondCodeSDNode>(N0.getOperand(2))->get()),
NegOne, DAG.getConstant(0, VT));
- }
+ }
// fold (sext x) -> (zext x) if the sign bit is known zero.
if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
@@ -3652,6 +3842,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
// fold (zext (truncate x)) -> (and x, mask)
if (N0.getOpcode() == ISD::TRUNCATE &&
(!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) {
+
+ // fold (zext (truncate (load x))) -> (zext (smaller load x))
+ // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
+ SDValue NarrowLoad = ReduceLoadWidth(N0.getNode());
+ if (NarrowLoad.getNode()) {
+ SDNode* oye = N0.getNode()->getOperand(0).getNode();
+ if (NarrowLoad.getNode() != N0.getNode()) {
+ CombineTo(N0.getNode(), NarrowLoad);
+ // CombineTo deleted the truncate, if needed, but not what's under it.
+ AddToWorkList(oye);
+ }
+ return SDValue(N, 0); // Return N so it doesn't get rechecked!
+ }
+
SDValue Op = N0.getOperand(0);
if (Op.getValueType().bitsLT(VT)) {
Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op);
@@ -3677,7 +3881,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask.zext(VT.getSizeInBits());
+ Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
X, DAG.getConstant(Mask, VT));
}
@@ -3692,10 +3896,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3736,10 +3939,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
EVT MemVT = LN0->getMemoryVT();
if ((!LegalOperations && !LN0->isVolatile()) ||
TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) {
- SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -3805,21 +4008,27 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
isa<ConstantSDNode>(N0.getOperand(1)) &&
N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
N0.hasOneUse()) {
+ SDValue ShAmt = N0.getOperand(1);
+ unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
if (N0.getOpcode() == ISD::SHL) {
+ SDValue InnerZExt = N0.getOperand(0);
// If the original shl may be shifting out bits, do not perform this
// transformation.
- unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
- unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
- N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
- if (ShAmt > KnownZeroBits)
+ unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
+ InnerZExt.getOperand(0).getValueType().getSizeInBits();
+ if (ShAmtVal > KnownZeroBits)
return SDValue();
}
- DebugLoc dl = N->getDebugLoc();
- return DAG.getNode(N0.getOpcode(), dl, VT,
- DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
- DAG.getNode(ISD::ZERO_EXTEND, dl,
- N0.getOperand(1).getValueType(),
- N0.getOperand(1)));
+
+ DebugLoc DL = N->getDebugLoc();
+
+ // Ensure that the shift amount is wide enough for the shifted value.
+ if (VT.getSizeInBits() >= 256)
+ ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
+
+ return DAG.getNode(N0.getOpcode(), DL, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
+ ShAmt);
}
return SDValue();
@@ -3879,7 +4088,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X);
}
APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
- Mask.zext(VT.getSizeInBits());
+ Mask = Mask.zext(VT.getSizeInBits());
return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
X, DAG.getConstant(Mask, VT));
}
@@ -3894,10 +4103,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -3938,11 +4146,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
N0.hasOneUse()) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
EVT MemVT = LN0->getMemoryVT();
- SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT,
- N->getDebugLoc(),
- LN0->getChain(), LN0->getBasePtr(),
- LN0->getSrcValue(),
- LN0->getSrcValueOffset(), MemVT,
+ SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(),
+ VT, LN0->getChain(), LN0->getBasePtr(),
+ LN0->getPointerInfo(), MemVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4053,11 +4259,8 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if (Opc == ISD::SIGN_EXTEND_INREG) {
ExtType = ISD::SEXTLOAD;
ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
- if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT))
- return SDValue();
} else if (Opc == ISD::SRL) {
- // Annother special-case: SRL is basically zero-extending a narrower
- // value.
+ // Another special-case: SRL is basically zero-extending a narrower value.
ExtType = ISD::ZEXTLOAD;
N0 = SDValue(N, 0);
ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
@@ -4065,10 +4268,18 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
ExtVT = EVT::getIntegerVT(*DAG.getContext(),
VT.getSizeInBits() - N01->getZExtValue());
}
+ if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT))
+ return SDValue();
unsigned EVTBits = ExtVT.getSizeInBits();
+
+ // Do not generate loads of non-round integer types since these can
+ // be expensive (and would be wrong if the type is not byte sized).
+ if (!ExtVT.isRound())
+ return SDValue();
+
unsigned ShAmt = 0;
- if (N0.getOpcode() == ISD::SRL && N0.hasOneUse() && ExtVT.isRound()) {
+ if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
ShAmt = N01->getZExtValue();
// Is the shift amount a multiple of size of VT?
@@ -4078,52 +4289,88 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
return SDValue();
}
+
+ // At this point, we must have a load or else we can't do the transform.
+ if (!isa<LoadSDNode>(N0)) return SDValue();
+
+ // If the shift amount is larger than the input type then we're not
+ // accessing any of the loaded bytes. If the load was a zextload/extload
+ // then the result of the shift+trunc is zero/undef (handled elsewhere).
+ // If the load was a sextload then the result is a splat of the sign bit
+ // of the extended byte. This is not worth optimizing for.
+ if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
+ return SDValue();
}
}
- // Do not generate loads of non-round integer types since these can
- // be expensive (and would be wrong if the type is not byte sized).
- if (isa<LoadSDNode>(N0) && N0.hasOneUse() && ExtVT.isRound() &&
- cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() >= EVTBits &&
- // Do not change the width of a volatile load.
- !cast<LoadSDNode>(N0)->isVolatile()) {
- LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- EVT PtrType = N0.getOperand(1).getValueType();
-
- // For big endian targets, we need to adjust the offset to the pointer to
- // load the correct bytes.
- if (TLI.isBigEndian()) {
- unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
- unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
- ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
- }
-
- uint64_t PtrOff = ShAmt / 8;
- unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
- SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
- PtrType, LN0->getBasePtr(),
- DAG.getConstant(PtrOff, PtrType));
- AddToWorkList(NewPtr.getNode());
-
- SDValue Load = (ExtType == ISD::NON_EXTLOAD)
- ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- LN0->isVolatile(), LN0->isNonTemporal(), NewAlign)
- : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff,
- ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
- NewAlign);
-
- // Replace the old load's chain with the new load's chain.
- WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
- &DeadNodes);
+ // If the load is shifted left (and the result isn't shifted back right),
+ // we can fold the truncate through the shift.
+ unsigned ShLeftAmt = 0;
+ if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
+ ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
+ if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
+ ShLeftAmt = N01->getZExtValue();
+ N0 = N0.getOperand(0);
+ }
+ }
+
+ // If we haven't found a load, we can't narrow it. Don't transform one with
+ // multiple uses, this would require adding a new load.
+ if (!isa<LoadSDNode>(N0) || !N0.hasOneUse() ||
+ // Don't change the width of a volatile load.
+ cast<LoadSDNode>(N0)->isVolatile())
+ return SDValue();
+
+ // Verify that we are actually reducing a load width here.
+ if (cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits() < EVTBits)
+ return SDValue();
+
+ LoadSDNode *LN0 = cast<LoadSDNode>(N0);
+ EVT PtrType = N0.getOperand(1).getValueType();
+
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian()) {
+ unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
+ unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
+ ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
+ }
+
+ uint64_t PtrOff = ShAmt / 8;
+ unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(),
+ PtrType, LN0->getBasePtr(),
+ DAG.getConstant(PtrOff, PtrType));
+ AddToWorkList(NewPtr.getNode());
+
+ SDValue Load;
+ if (ExtType == ISD::NON_EXTLOAD)
+ Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ LN0->isVolatile(), LN0->isNonTemporal(), NewAlign);
+ else
+ Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr,
+ LN0->getPointerInfo().getWithOffset(PtrOff),
+ ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
+ NewAlign);
+
+ // Replace the old load's chain with the new load's chain.
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1),
+ &DeadNodes);
- // Return the new loaded value.
- return Load;
+ // Shift the result left, if we've swallowed a left shift.
+ SDValue Result = Load;
+ if (ShLeftAmt != 0) {
+ EVT ShImmTy = getShiftAmountTy();
+ if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
+ ShImmTy = VT;
+ Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT,
+ Result, DAG.getConstant(ShLeftAmt, ShImmTy));
}
- return SDValue();
+ // Return the new loaded value.
+ return Result;
}
SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
@@ -4196,10 +4443,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4213,10 +4460,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(), EVT,
+ LN0->getBasePtr(), LN0->getPointerInfo(),
+ EVT,
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
CombineTo(N, ExtLoad);
@@ -4295,7 +4542,9 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
- if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
+ if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
+ LD1->getPointerInfo().getAddrSpace() !=
+ LD2->getPointerInfo().getAddrSpace())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
@@ -4313,14 +4562,14 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (NewAlign <= Align &&
(!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(),
- LD1->getBasePtr(), LD1->getSrcValue(),
- LD1->getSrcValueOffset(), false, false, Align);
+ LD1->getBasePtr(), LD1->getPointerInfo(),
+ false, false, Align);
}
return SDValue();
}
-SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
+SDValue DAGCombiner::visitBITCAST(SDNode *N) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
@@ -4344,12 +4593,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
assert(!DestEltVT.isVector() &&
"Element type of vector ValueType must not be vector!");
if (isSimple)
- return ConstantFoldBIT_CONVERTofBUILD_VECTOR(N0.getNode(), DestEltVT);
+ return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
}
// If the input is a constant, let getNode fold it.
if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
- SDValue Res = DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT, N0);
+ SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0);
if (Res.getNode() != N) {
if (!LegalOperations ||
TLI.isOperationLegal(Res.getNode()->getOpcode(), VT))
@@ -4365,8 +4614,8 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
}
// (conv (conv x, t1), t2) -> (conv x, t2)
- if (N0.getOpcode() == ISD::BIT_CONVERT)
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), VT,
+ if (N0.getOpcode() == ISD::BITCAST)
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT,
N0.getOperand(0));
// fold (conv (load x)) -> (load (conv*)x)
@@ -4382,13 +4631,12 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
if (Align <= OrigAlign) {
SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(),
- LN0->getBasePtr(),
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
LN0->isVolatile(), LN0->isNonTemporal(),
OrigAlign);
AddToWorkList(N);
CombineTo(N0.getNode(),
- DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
N0.getValueType(), Load),
Load.getValue(1));
return Load;
@@ -4400,7 +4648,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
// This often reduces constant pool loads.
if ((N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FABS) &&
N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector()) {
- SDValue NewConv = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(), VT,
+ SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT,
N0.getOperand(0));
AddToWorkList(NewConv.getNode());
@@ -4423,7 +4671,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
if (isTypeLegal(IntXVT)) {
- SDValue X = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
IntXVT, N0.getOperand(1));
AddToWorkList(X.getNode());
@@ -4448,7 +4696,7 @@ SDValue DAGCombiner::visitBIT_CONVERT(SDNode *N) {
X, DAG.getConstant(SignBit, VT));
AddToWorkList(X.getNode());
- SDValue Cst = DAG.getNode(ISD::BIT_CONVERT, N0.getDebugLoc(),
+ SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(),
VT, N0.getOperand(0));
Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT,
Cst, DAG.getConstant(~SignBit, VT));
@@ -4473,11 +4721,11 @@ SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
return CombineConsecutiveLoads(N, VT);
}
-/// ConstantFoldBIT_CONVERTofBUILD_VECTOR - We know that BV is a build_vector
+/// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector
/// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the
/// destination element value type.
SDValue DAGCombiner::
-ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
+ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
// If this is already the right type, we're done.
@@ -4495,10 +4743,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// Due to the FP element handling below calling this routine recursively,
// we can end up with a scalar-to-vector node here.
if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
- return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
- DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
+ DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
DstEltVT, BV->getOperand(0)));
-
+
SmallVector<SDValue, 8> Ops;
for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) {
SDValue Op = BV->getOperand(i);
@@ -4506,7 +4754,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// are promoted and implicitly truncated. Make that explicit here.
if (Op.getValueType() != SrcEltVT)
Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op);
- Ops.push_back(DAG.getNode(ISD::BIT_CONVERT, BV->getDebugLoc(),
+ Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(),
DstEltVT, Op));
AddToWorkList(Ops.back().getNode());
}
@@ -4522,7 +4770,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
// same sizes.
assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
- BV = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, IntVT).getNode();
+ BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
SrcEltVT = IntVT;
}
@@ -4531,10 +4779,10 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
if (DstEltVT.isFloatingPoint()) {
assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!");
EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
- SDNode *Tmp = ConstantFoldBIT_CONVERTofBUILD_VECTOR(BV, TmpVT).getNode();
+ SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
// Next, convert to FP elements of the same size.
- return ConstantFoldBIT_CONVERTofBUILD_VECTOR(Tmp, DstEltVT);
+ return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
}
// Okay, we know the src/dst types are both integers of differing types.
@@ -4556,7 +4804,7 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
if (Op.getOpcode() == ISD::UNDEF) continue;
EltIsUndef = false;
- NewBits |= APInt(cast<ConstantSDNode>(Op)->getAPIntValue()).
+ NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
zextOrTrunc(SrcBitSize).zext(DstBitSize);
}
@@ -4586,13 +4834,13 @@ ConstantFoldBIT_CONVERTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
continue;
}
- APInt OpVal = APInt(cast<ConstantSDNode>(BV->getOperand(i))->
- getAPIntValue()).zextOrTrunc(SrcBitSize);
+ APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))->
+ getAPIntValue().zextOrTrunc(SrcBitSize);
for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
- APInt ThisVal = APInt(OpVal).trunc(DstBitSize);
+ APInt ThisVal = OpVal.trunc(DstBitSize);
Ops.push_back(DAG.getConstant(ThisVal, DstEltVT));
- if (isS2V && i == 0 && j == 0 && APInt(ThisVal).zext(SrcBitSize) == OpVal)
+ if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal)
// Simply turn this into a SCALAR_TO_VECTOR of the new type.
return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT,
Ops[0]);
@@ -4984,10 +5232,9 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
- SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(),
+ SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
- LN0->getBasePtr(), LN0->getSrcValue(),
- LN0->getSrcValueOffset(),
+ LN0->getBasePtr(), LN0->getPointerInfo(),
N0.getValueType(),
LN0->isVolatile(), LN0->isNonTemporal(),
LN0->getAlignment());
@@ -5011,7 +5258,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
// Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BIT_CONVERT &&
+ if (N0.getOpcode() == ISD::BITCAST &&
!VT.isVector() &&
N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger()) {
@@ -5021,7 +5268,7 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) {
Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
VT, Int);
}
}
@@ -5047,7 +5294,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
// Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading
// constant pool values.
- if (N0.getOpcode() == ISD::BIT_CONVERT && N0.getNode()->hasOneUse() &&
+ if (N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() &&
N0.getOperand(0).getValueType().isInteger() &&
!N0.getOperand(0).getValueType().isVector()) {
SDValue Int = N0.getOperand(0);
@@ -5056,7 +5303,7 @@ SDValue DAGCombiner::visitFABS(SDNode *N) {
Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int,
DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT));
AddToWorkList(Int.getNode());
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
N->getValueType(0), Int);
}
}
@@ -5084,14 +5331,17 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
N1.getOperand(0), N1.getOperand(1), N2);
}
- SDNode *Trunc = 0;
- if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) {
- // Look past truncate.
- Trunc = N1.getNode();
- N1 = N1.getOperand(0);
- }
+ if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
+ ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
+ (N1.getOperand(0).hasOneUse() &&
+ N1.getOperand(0).getOpcode() == ISD::SRL))) {
+ SDNode *Trunc = 0;
+ if (N1.getOpcode() == ISD::TRUNCATE) {
+ // Look pass the truncate.
+ Trunc = N1.getNode();
+ N1 = N1.getOperand(0);
+ }
- if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) {
// Match this pattern so that we can generate simpler code:
//
// %a = ...
@@ -5100,7 +5350,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
// brcond i32 %c ...
//
// into
- //
+ //
// %a = ...
// %b = and i32 %a, 2
// %c = setcc eq %b, 0
@@ -5146,8 +5396,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
}
}
}
+
+ if (Trunc)
+ // Restore N1 if the above transformation doesn't match.
+ N1 = N->getOperand(1);
}
-
+
// Transform br(xor(x, y)) -> br(x != y)
// Transform br(xor(xor(x,y), 1)) -> br (x == y)
if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
@@ -5181,9 +5435,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal = true;
}
- SDValue NodeToReplace = Trunc ? SDValue(Trunc, 0) : N1;
-
- EVT SetCCVT = NodeToReplace.getValueType();
+ EVT SetCCVT = N1.getValueType();
if (LegalTypes)
SetCCVT = TLI.getSetCCResultType(SetCCVT);
SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
@@ -5192,9 +5444,9 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
Equal ? ISD::SETEQ : ISD::SETNE);
// Replace the uses of XOR with SETCC
WorkListRemover DeadNodes(*this);
- DAG.ReplaceAllUsesOfValueWith(NodeToReplace, SetCC, &DeadNodes);
- removeFromWorkList(NodeToReplace.getNode());
- DAG.DeleteNode(NodeToReplace.getNode());
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
MVT::Other, Chain, SetCC, N2);
}
@@ -5568,10 +5820,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment())
- return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
- N->getDebugLoc(),
- Chain, Ptr, LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->getMemoryVT(),
+ return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
+ LD->getValueType(0),
+ Chain, Ptr, LD->getPointerInfo(),
+ LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), Align);
}
}
@@ -5587,15 +5839,13 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Replace the chain to void dependency.
if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(),
- BetterChain, Ptr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ BetterChain, Ptr, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
} else {
- ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0),
- LD->getDebugLoc(),
- BetterChain, Ptr, LD->getSrcValue(),
- LD->getSrcValueOffset(),
+ ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(),
+ LD->getValueType(0),
+ BetterChain, Ptr, LD->getPointerInfo(),
LD->getMemoryVT(),
LD->isVolatile(),
LD->isNonTemporal(),
@@ -5605,10 +5855,10 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Create token factor to keep old chain connected.
SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(),
MVT::Other, Chain, ReplLoad.getValue(1));
-
+
// Make sure the new and old chains are cleaned up.
AddToWorkList(Token.getNode());
-
+
// Replace uses with load result and token factor. Don't add users
// to work list.
return CombineTo(N, ReplLoad.getValue(0), Token, false);
@@ -5628,17 +5878,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
static std::pair<unsigned, unsigned>
CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
std::pair<unsigned, unsigned> Result(0, 0);
-
+
// Check for the structure we're looking for.
if (V->getOpcode() != ISD::AND ||
!isa<ConstantSDNode>(V->getOperand(1)) ||
!ISD::isNormalLoad(V->getOperand(0).getNode()))
return Result;
-
+
// Check the chain and pointer.
LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer.
-
+
// The store should be chained directly to the load or be an operand of a
// tokenfactor.
if (LD == Chain.getNode())
@@ -5654,7 +5904,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
}
if (!isOk) return Result;
}
-
+
// This only handles simple types.
if (V.getValueType() != MVT::i16 &&
V.getValueType() != MVT::i32 &&
@@ -5670,7 +5920,7 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
unsigned NotMaskTZ = CountTrailingZeros_64(NotMask);
if (NotMaskTZ & 7) return Result; // Must be multiple of a byte.
if (NotMaskLZ == 64) return Result; // All zero mask.
-
+
// See if we have a continuous run of bits. If so, we have 0*1+0*
if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64)
return Result;
@@ -5678,19 +5928,19 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
// Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
if (V.getValueType() != MVT::i64 && NotMaskLZ)
NotMaskLZ -= 64-V.getValueSizeInBits();
-
+
unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
switch (MaskedBytes) {
- case 1:
- case 2:
+ case 1:
+ case 2:
case 4: break;
default: return Result; // All one mask, or 5-byte mask.
}
-
+
// Verify that the first bit starts at a multiple of mask so that the access
// is aligned the same as the access width.
if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
-
+
Result.first = MaskedBytes;
Result.second = NotMaskTZ/8;
return Result;
@@ -5707,20 +5957,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
unsigned NumBytes = MaskInfo.first;
unsigned ByteShift = MaskInfo.second;
SelectionDAG &DAG = DC->getDAG();
-
+
// Check to see if IVal is all zeros in the part being masked in by the 'or'
// that uses this. If not, this is not a replacement.
APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
ByteShift*8, (ByteShift+NumBytes)*8);
if (!DAG.MaskedValueIsZero(IVal, Mask)) return 0;
-
+
// Check that it is legal on the target to do this. It is legal if the new
// VT we're shrinking to (i8/i16/i32) is legal or we're still before type
// legalization.
MVT VT = MVT::getIntegerVT(NumBytes*8);
if (!DC->isTypeLegal(VT))
return 0;
-
+
// Okay, we can do this! Replace the 'St' store with a store of IVal that is
// shifted by ByteShift and truncated down to NumBytes.
if (ByteShift)
@@ -5735,20 +5985,20 @@ ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
StOffset = ByteShift;
else
StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
-
+
SDValue Ptr = St->getBasePtr();
if (StOffset) {
Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(),
Ptr, DAG.getConstant(StOffset, Ptr.getValueType()));
NewAlign = MinAlign(NewAlign, StOffset);
}
-
+
// Truncate down to the new size.
IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal);
-
+
++OpsNarrowed;
- return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
- St->getSrcValue(), St->getSrcValueOffset()+StOffset,
+ return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr,
+ St->getPointerInfo().getWithOffset(StOffset),
false, false, NewAlign).getNode();
}
@@ -5771,7 +6021,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
unsigned Opc = Value.getOpcode();
-
+
// If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
// is a byte mask indicating a consecutive number of bytes, check to see if
// Y is known to provide just those bytes. If so, we try to replace the
@@ -5784,7 +6034,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
Value.getOperand(1), ST,this))
return SDValue(NewST, 0);
-
+
// Or is commutative, so try swapping X and Y.
MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
if (MaskedLoad.first)
@@ -5792,7 +6042,7 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
Value.getOperand(0), ST,this))
return SDValue(NewST, 0);
}
-
+
if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
Value.getOperand(1).getOpcode() != ISD::Constant)
return SDValue();
@@ -5801,7 +6051,9 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
Chain == SDValue(N0.getNode(), 1)) {
LoadSDNode *LD = cast<LoadSDNode>(N0);
- if (LD->getBasePtr() != Ptr)
+ if (LD->getBasePtr() != Ptr ||
+ LD->getPointerInfo().getAddrSpace() !=
+ ST->getPointerInfo().getAddrSpace())
return SDValue();
// Find the type to narrow it the load / op / store to.
@@ -5850,14 +6102,14 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
DAG.getConstant(PtrOff, Ptr.getValueType()));
SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
LD->getChain(), NewPtr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->getPointerInfo().getWithOffset(PtrOff),
LD->isVolatile(), LD->isNonTemporal(),
NewAlign);
SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
DAG.getConstant(NewImm, NewVT));
SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
NewVal, NewPtr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo().getWithOffset(PtrOff),
false, false, NewAlign);
AddToWorkList(NewPtr.getNode());
@@ -5874,6 +6126,63 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
return SDValue();
}
+/// TransformFPLoadStorePair - For a given floating point load / store pair,
+/// if the load value isn't used by any other operations, then consider
+/// transforming the pair to integer load / store operations if the target
+/// deems the transformation profitable.
+SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
+ Value.hasOneUse() &&
+ Chain == SDValue(Value.getNode(), 1)) {
+ LoadSDNode *LD = cast<LoadSDNode>(Value);
+ EVT VT = LD->getMemoryVT();
+ if (!VT.isFloatingPoint() ||
+ VT != ST->getMemoryVT() ||
+ LD->isNonTemporal() ||
+ ST->isNonTemporal() ||
+ LD->getPointerInfo().getAddrSpace() != 0 ||
+ ST->getPointerInfo().getAddrSpace() != 0)
+ return SDValue();
+
+ EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+ if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
+ !TLI.isOperationLegal(ISD::STORE, IntVT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
+ !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
+ return SDValue();
+
+ unsigned LDAlign = LD->getAlignment();
+ unsigned STAlign = ST->getAlignment();
+ const Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlign = TLI.getTargetData()->getABITypeAlignment(IntVTTy);
+ if (LDAlign < ABIAlign || STAlign < ABIAlign)
+ return SDValue();
+
+ SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(),
+ LD->getChain(), LD->getBasePtr(),
+ LD->getPointerInfo(),
+ false, false, LDAlign);
+
+ SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(),
+ NewLD, ST->getBasePtr(),
+ ST->getPointerInfo(),
+ false, false, STAlign);
+
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewST.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++LdStFP2Int;
+ return NewST;
+ }
+
+ return SDValue();
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -5882,7 +6191,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// If this is a store of a bit convert, store the input value if the
// resultant store does not need a higher alignment than the original.
- if (Value.getOpcode() == ISD::BIT_CONVERT && !ST->isTruncatingStore() &&
+ if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
ST->isUnindexed()) {
unsigned OrigAlign = ST->getAlignment();
EVT SVT = Value.getOperand(0).getValueType();
@@ -5892,8 +6201,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
((!LegalOperations && !ST->isVolatile()) ||
TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0),
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), OrigAlign);
}
@@ -5917,8 +6225,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
bitcastToAPInt().getZExtValue(), MVT::i32);
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
}
break;
@@ -5929,8 +6236,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
getZExtValue(), MVT::i64);
return DAG.getStore(Chain, N->getDebugLoc(), Tmp,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->isVolatile(),
+ Ptr, ST->getPointerInfo(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
} else if (!ST->isVolatile() &&
TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
@@ -5942,23 +6248,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(),
+ Ptr, ST->getPointerInfo(),
isVolatile, isNonTemporal,
ST->getAlignment());
Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr,
DAG.getConstant(4, Ptr.getValueType()));
- SVOffset += 4;
Alignment = MinAlign(Alignment, 4U);
SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi,
- Ptr, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal,
+ Ptr, ST->getPointerInfo().getWithOffset(4),
+ isVolatile, isNonTemporal,
Alignment);
return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
St0, St1);
@@ -5974,12 +6277,17 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(), Align);
}
}
+ // Try transforming a pair floating point load / store ops to integer
+ // load / store ops.
+ SDValue NewST = TransformFPLoadStorePair(N);
+ if (NewST.getNode())
+ return NewST;
+
if (CombinerAA) {
// Walk up chain skipping non-aliasing memory nodes.
SDValue BetterChain = FindBetterChain(N, Chain);
@@ -5991,12 +6299,12 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Replace the chain to avoid dependency.
if (ST->isTruncatingStore()) {
ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr,
- ST->getSrcValue(),ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->getMemoryVT(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
} else {
ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -6030,17 +6338,16 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
AddToWorkList(Value.getNode());
if (Shorter.getNode())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter,
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
// Otherwise, see if we can simplify the operation with
// SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value,
- APInt::getLowBitsSet(
- Value.getValueType().getScalarType().getSizeInBits(),
- ST->getMemoryVT().getScalarType().getSizeInBits())))
+ APInt::getLowBitsSet(
+ Value.getValueType().getScalarType().getSizeInBits(),
+ ST->getMemoryVT().getScalarType().getSizeInBits())))
return SDValue(N, 0);
}
@@ -6064,8 +6371,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
ST->getMemoryVT())) {
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0),
- Ptr, ST->getSrcValue(),
- ST->getSrcValueOffset(), ST->getMemoryVT(),
+ Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -6082,6 +6388,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (InVal.getOpcode() == ISD::UNDEF)
return InVec;
+ EVT VT = InVec.getValueType();
+
+ // If we can't generate a legal BUILD_VECTOR, exit
+ if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
+ return SDValue();
+
// If the invec is a BUILD_VECTOR and if EltNo is a constant, build a new
// vector with the inserted element.
if (InVec.getOpcode() == ISD::BUILD_VECTOR && isa<ConstantSDNode>(EltNo)) {
@@ -6091,13 +6403,12 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (Elt < Ops.size())
Ops[Elt] = InVal;
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- InVec.getValueType(), &Ops[0], Ops.size());
+ VT, &Ops[0], Ops.size());
}
- // If the invec is an UNDEF and if EltNo is a constant, create a new
+ // If the invec is an UNDEF and if EltNo is a constant, create a new
// BUILD_VECTOR with undef elements and the inserted element.
- if (!LegalOperations && InVec.getOpcode() == ISD::UNDEF &&
+ if (InVec.getOpcode() == ISD::UNDEF &&
isa<ConstantSDNode>(EltNo)) {
- EVT VT = InVec.getValueType();
EVT EltVT = VT.getVectorElementType();
unsigned NElts = VT.getVectorNumElements();
SmallVector<SDValue, 8> Ops(NElts, DAG.getUNDEF(EltVT));
@@ -6106,7 +6417,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
if (Elt < Ops.size())
Ops[Elt] = InVal;
return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
- InVec.getValueType(), &Ops[0], Ops.size());
+ VT, &Ops[0], Ops.size());
}
return SDValue();
}
@@ -6138,14 +6449,14 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
SDValue EltNo = N->getOperand(1);
if (isa<ConstantSDNode>(EltNo)) {
- unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
+ int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
bool NewLoad = false;
bool BCNumEltsChanged = false;
EVT VT = InVec.getValueType();
EVT ExtVT = VT.getVectorElementType();
EVT LVT = ExtVT;
- if (InVec.getOpcode() == ISD::BIT_CONVERT) {
+ if (InVec.getOpcode() == ISD::BITCAST) {
EVT BCVT = InVec.getOperand(0).getValueType();
if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
return SDValue();
@@ -6176,10 +6487,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
// Select the input vector, guarding against out of range extract vector.
unsigned NumElems = VT.getVectorNumElements();
- int Idx = (Elt > NumElems) ? -1 : SVN->getMaskElt(Elt);
+ int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
- if (InVec.getOpcode() == ISD::BIT_CONVERT)
+ if (InVec.getOpcode() == ISD::BITCAST)
InVec = InVec.getOperand(0);
if (ISD::isNormalLoad(InVec.getNode())) {
LN0 = cast<LoadSDNode>(InVec);
@@ -6190,12 +6501,17 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
if (!LN0 || !LN0->hasOneUse() || LN0->isVolatile())
return SDValue();
+ // If Idx was -1 above, Elt is going to be -1, so just return undef.
+ if (Elt == -1)
+ return DAG.getUNDEF(LN0->getBasePtr().getValueType());
+
unsigned Align = LN0->getAlignment();
if (NewLoad) {
// Check the resultant load doesn't need a higher alignment than the
// original load.
unsigned NewAlign =
- TLI.getTargetData()->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
+ TLI.getTargetData()
+ ->getABITypeAlignment(LVT.getTypeForEVT(*DAG.getContext()));
if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, LVT))
return SDValue();
@@ -6204,8 +6520,10 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
SDValue NewPtr = LN0->getBasePtr();
+ unsigned PtrOff = 0;
+
if (Elt) {
- unsigned PtrOff = LVT.getSizeInBits() * Elt / 8;
+ PtrOff = LVT.getSizeInBits() * Elt / 8;
EVT PtrType = NewPtr.getValueType();
if (TLI.isBigEndian())
PtrOff = VT.getSizeInBits() / 8 - PtrOff;
@@ -6214,7 +6532,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
}
return DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr,
- LN0->getSrcValue(), LN0->getSrcValueOffset(),
+ LN0->getPointerInfo().getWithOffset(PtrOff),
LN0->isVolatile(), LN0->isNonTemporal(), Align);
}
@@ -6280,7 +6598,7 @@ SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
if (ExtIndex > VT.getVectorNumElements())
return SDValue();
-
+
Mask.push_back(ExtIndex);
continue;
}
@@ -6328,15 +6646,16 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
// FIXME: implement canonicalizations from DAG.getVectorShuffle()
- // If it is a splat, check if the argument vector is a build_vector with
- // all scalar elements the same.
- if (cast<ShuffleVectorSDNode>(N)->isSplat()) {
+ // If it is a splat, check if the argument vector is another splat or a
+ // build_vector with all scalar elements the same.
+ ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
+ if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
SDNode *V = N0.getNode();
// If this is a bit convert that changes the element type of the vector but
// not the number of vector elements, look through it. Be careful not to
// look though conversions that change things like v4f32 to v2f64.
- if (V->getOpcode() == ISD::BIT_CONVERT) {
+ if (V->getOpcode() == ISD::BITCAST) {
SDValue ConvInput = V->getOperand(0);
if (ConvInput.getValueType().isVector() &&
ConvInput.getValueType().getVectorNumElements() == NumElts)
@@ -6344,30 +6663,28 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
}
if (V->getOpcode() == ISD::BUILD_VECTOR) {
- unsigned NumElems = V->getNumOperands();
- unsigned BaseIdx = cast<ShuffleVectorSDNode>(N)->getSplatIndex();
- if (NumElems > BaseIdx) {
- SDValue Base;
- bool AllSame = true;
- for (unsigned i = 0; i != NumElems; ++i) {
- if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
- Base = V->getOperand(i);
- break;
- }
+ assert(V->getNumOperands() == NumElts &&
+ "BUILD_VECTOR has wrong number of operands");
+ SDValue Base;
+ bool AllSame = true;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
+ Base = V->getOperand(i);
+ break;
}
- // Splat of <u, u, u, u>, return <u, u, u, u>
- if (!Base.getNode())
- return N0;
- for (unsigned i = 0; i != NumElems; ++i) {
- if (V->getOperand(i) != Base) {
- AllSame = false;
- break;
- }
+ }
+ // Splat of <u, u, u, u>, return <u, u, u, u>
+ if (!Base.getNode())
+ return N0;
+ for (unsigned i = 0; i != NumElts; ++i) {
+ if (V->getOperand(i) != Base) {
+ AllSame = false;
+ break;
}
- // Splat of <x, x, x, x>, return <x, x, x, x>
- if (AllSame)
- return N0;
}
+ // Splat of <x, x, x, x>, return <x, x, x, x>
+ if (AllSame)
+ return N0;
}
}
return SDValue();
@@ -6436,7 +6753,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
if (N->getOpcode() == ISD::AND) {
- if (RHS.getOpcode() == ISD::BIT_CONVERT)
+ if (RHS.getOpcode() == ISD::BITCAST)
RHS = RHS.getOperand(0);
if (RHS.getOpcode() == ISD::BUILD_VECTOR) {
SmallVector<int, 8> Indices;
@@ -6464,9 +6781,9 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
DAG.getConstant(0, EltVT));
SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
RVT, &ZeroOps[0], ZeroOps.size());
- LHS = DAG.getNode(ISD::BIT_CONVERT, dl, RVT, LHS);
+ LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS);
SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]);
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Shuf);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Shuf);
}
}
@@ -6480,10 +6797,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
// things. Simplifying them may result in a loss of legality.
if (LegalOperations) return SDValue();
- EVT VT = N->getValueType(0);
- assert(VT.isVector() && "SimplifyVBinOp only works on vectors!");
+ assert(N->getValueType(0).isVector() &&
+ "SimplifyVBinOp only works on vectors!");
- EVT EltType = VT.getVectorElementType();
SDValue LHS = N->getOperand(0);
SDValue RHS = N->getOperand(1);
SDValue Shuffle = XformToShuffleWithZero(N);
@@ -6516,14 +6832,10 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
break;
}
- // If the vector element type is not legal, the BUILD_VECTOR operands
- // are promoted and implicitly truncated. Make that explicit here.
- if (LHSOp.getValueType() != EltType)
- LHSOp = DAG.getNode(ISD::TRUNCATE, LHS.getDebugLoc(), EltType, LHSOp);
- if (RHSOp.getValueType() != EltType)
- RHSOp = DAG.getNode(ISD::TRUNCATE, RHS.getDebugLoc(), EltType, RHSOp);
-
- SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), EltType,
+ EVT VT = LHSOp.getValueType();
+ assert(RHSOp.getValueType() == VT &&
+ "SimplifyVBinOp with different BUILD_VECTOR element types");
+ SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT,
LHSOp, RHSOp);
if (FoldOp.getOpcode() != ISD::UNDEF &&
FoldOp.getOpcode() != ISD::Constant &&
@@ -6533,11 +6845,9 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
AddToWorkList(FoldOp.getNode());
}
- if (Ops.size() == LHS.getNumOperands()) {
- EVT VT = LHS.getValueType();
- return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT,
- &Ops[0], Ops.size());
- }
+ if (Ops.size() == LHS.getNumOperands())
+ return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ LHS.getValueType(), &Ops[0], Ops.size());
}
return SDValue();
@@ -6580,103 +6890,101 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0,
bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
SDValue RHS) {
+ // Cannot simplify select with vector condition
+ if (TheSelect->getOperand(0).getValueType().isVector()) return false;
+
// If this is a select from two identical things, try to pull the operation
// through the select.
- if (LHS.getOpcode() == RHS.getOpcode() && LHS.hasOneUse() && RHS.hasOneUse()){
- // If this is a load and the token chain is identical, replace the select
- // of two loads with a load through a select of the address to load from.
- // This triggers in things like "select bool X, 10.0, 123.0" after the FP
- // constants have been dropped into the constant pool.
- if (LHS.getOpcode() == ISD::LOAD &&
+ if (LHS.getOpcode() != RHS.getOpcode() ||
+ !LHS.hasOneUse() || !RHS.hasOneUse())
+ return false;
+
+ // If this is a load and the token chain is identical, replace the select
+ // of two loads with a load through a select of the address to load from.
+ // This triggers in things like "select bool X, 10.0, 123.0" after the FP
+ // constants have been dropped into the constant pool.
+ if (LHS.getOpcode() == ISD::LOAD) {
+ LoadSDNode *LLD = cast<LoadSDNode>(LHS);
+ LoadSDNode *RLD = cast<LoadSDNode>(RHS);
+
+ // Token chains must be identical.
+ if (LHS.getOperand(0) != RHS.getOperand(0) ||
// Do not let this transformation reduce the number of volatile loads.
- !cast<LoadSDNode>(LHS)->isVolatile() &&
- !cast<LoadSDNode>(RHS)->isVolatile() &&
- // Token chains must be identical.
- LHS.getOperand(0) == RHS.getOperand(0)) {
- LoadSDNode *LLD = cast<LoadSDNode>(LHS);
- LoadSDNode *RLD = cast<LoadSDNode>(RHS);
-
- // If this is an EXTLOAD, the VT's must match.
- if (LLD->getMemoryVT() == RLD->getMemoryVT()) {
+ LLD->isVolatile() || RLD->isVolatile() ||
+ // If this is an EXTLOAD, the VT's must match.
+ LLD->getMemoryVT() != RLD->getMemoryVT() ||
+ // If this is an EXTLOAD, the kind of extension must match.
+ (LLD->getExtensionType() != RLD->getExtensionType() &&
+ // The only exception is if one of the extensions is anyext.
+ LLD->getExtensionType() != ISD::EXTLOAD &&
+ RLD->getExtensionType() != ISD::EXTLOAD) ||
// FIXME: this discards src value information. This is
// over-conservative. It would be beneficial to be able to remember
// both potential memory locations. Since we are discarding
// src value info, don't do the transformation if the memory
// locations are not in the default address space.
- unsigned LLDAddrSpace = 0, RLDAddrSpace = 0;
- if (const Value *LLDVal = LLD->getMemOperand()->getValue()) {
- if (const PointerType *PT = dyn_cast<PointerType>(LLDVal->getType()))
- LLDAddrSpace = PT->getAddressSpace();
- }
- if (const Value *RLDVal = RLD->getMemOperand()->getValue()) {
- if (const PointerType *PT = dyn_cast<PointerType>(RLDVal->getType()))
- RLDAddrSpace = PT->getAddressSpace();
- }
- SDValue Addr;
- if (LLDAddrSpace == 0 && RLDAddrSpace == 0) {
- if (TheSelect->getOpcode() == ISD::SELECT) {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if ((!LLD->hasAnyUseOfValue(1) ||
- !LLD->isPredecessorOf(TheSelect->getOperand(0).getNode())) &&
- (!RLD->hasAnyUseOfValue(1) ||
- !RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()))) {
- Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0), LLD->getBasePtr(),
- RLD->getBasePtr());
- }
- } else {
- // Check that the condition doesn't reach either load. If so, folding
- // this will induce a cycle into the DAG.
- if ((!LLD->hasAnyUseOfValue(1) ||
- (!LLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !LLD->isPredecessorOf(TheSelect->getOperand(1).getNode()))) &&
- (!RLD->hasAnyUseOfValue(1) ||
- (!RLD->isPredecessorOf(TheSelect->getOperand(0).getNode()) &&
- !RLD->isPredecessorOf(TheSelect->getOperand(1).getNode())))) {
- Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
- LLD->getBasePtr().getValueType(),
- TheSelect->getOperand(0),
- TheSelect->getOperand(1),
- LLD->getBasePtr(), RLD->getBasePtr(),
- TheSelect->getOperand(4));
- }
- }
- }
-
- if (Addr.getNode()) {
- SDValue Load;
- if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
- Load = DAG.getLoad(TheSelect->getValueType(0),
- TheSelect->getDebugLoc(),
- LLD->getChain(),
- Addr, 0, 0,
- LLD->isVolatile(),
- LLD->isNonTemporal(),
- LLD->getAlignment());
- } else {
- Load = DAG.getExtLoad(LLD->getExtensionType(),
- TheSelect->getValueType(0),
- TheSelect->getDebugLoc(),
- LLD->getChain(), Addr, 0, 0,
- LLD->getMemoryVT(),
- LLD->isVolatile(),
- LLD->isNonTemporal(),
- LLD->getAlignment());
- }
+ LLD->getPointerInfo().getAddrSpace() != 0 ||
+ RLD->getPointerInfo().getAddrSpace() != 0)
+ return false;
- // Users of the select now use the result of the load.
- CombineTo(TheSelect, Load);
+ // Check that the select condition doesn't reach either load. If so,
+ // folding this will induce a cycle into the DAG. If not, this is safe to
+ // xform, so create a select of the addresses.
+ SDValue Addr;
+ if (TheSelect->getOpcode() == ISD::SELECT) {
+ SDNode *CondNode = TheSelect->getOperand(0).getNode();
+ if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
+ (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
+ return false;
+ Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0), LLD->getBasePtr(),
+ RLD->getBasePtr());
+ } else { // Otherwise SELECT_CC
+ SDNode *CondLHS = TheSelect->getOperand(0).getNode();
+ SDNode *CondRHS = TheSelect->getOperand(1).getNode();
+
+ if ((LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
+ (LLD->hasAnyUseOfValue(1) &&
+ (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))))
+ return false;
- // Users of the old loads now use the new load's chain. We know the
- // old-load value is dead now.
- CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
- CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
- return true;
- }
- }
- }
+ Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(),
+ LLD->getBasePtr().getValueType(),
+ TheSelect->getOperand(0),
+ TheSelect->getOperand(1),
+ LLD->getBasePtr(), RLD->getBasePtr(),
+ TheSelect->getOperand(4));
+ }
+
+ SDValue Load;
+ if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
+ Load = DAG.getLoad(TheSelect->getValueType(0),
+ TheSelect->getDebugLoc(),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->isVolatile(), LLD->isNonTemporal(),
+ LLD->getAlignment());
+ } else {
+ Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
+ RLD->getExtensionType() : LLD->getExtensionType(),
+ TheSelect->getDebugLoc(),
+ TheSelect->getValueType(0),
+ // FIXME: Discards pointer info.
+ LLD->getChain(), Addr, MachinePointerInfo(),
+ LLD->getMemoryVT(), LLD->isVolatile(),
+ LLD->isNonTemporal(), LLD->getAlignment());
+ }
+
+ // Users of the select now use the result of the load.
+ CombineTo(TheSelect, Load);
+
+ // Users of the old loads now use the new load's chain. We know the
+ // old-load value is dead now.
+ CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
+ CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
+ return true;
}
return false;
@@ -6689,7 +6997,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
ISD::CondCode CC, bool NotExtCompare) {
// (x ? y : y) -> y.
if (N2 == N3) return N2;
-
+
EVT VT = N2.getValueType();
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
@@ -6725,7 +7033,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
return DAG.getNode(ISD::FABS, DL, VT, N3);
}
}
-
+
// Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
// where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
// in it. This is a win when the constant is not otherwise available because
@@ -6748,7 +7056,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
};
const Type *FPTy = Elts[0]->getType();
const TargetData &TD = *TLI.getTargetData();
-
+
// Create a ConstantArray of the two constants.
Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts, 2);
SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(),
@@ -6760,7 +7068,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
SDValue Zero = DAG.getIntPtrConstant(0);
unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
SDValue One = DAG.getIntPtrConstant(EltSize);
-
+
SDValue Cond = DAG.getSetCC(DL,
TLI.getSetCCResultType(N0.getValueType()),
N0, N1, CC);
@@ -6769,11 +7077,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx,
CstOffset);
return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false,
+ MachinePointerInfo::getConstantPool(), false,
false, Alignment);
}
- }
+ }
// Check to see if we can perform the "gzip trick", transforming
// (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
@@ -6818,6 +7126,35 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1,
}
}
+ // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
+ // where y is has a single bit set.
+ // A plaintext description would be, we can turn the SELECT_CC into an AND
+ // when the condition can be materialized as an all-ones register. Any
+ // single bit-test can be materialized as an all-ones register with
+ // shift-left and shift-right-arith.
+ if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
+ N0->getValueType(0) == VT &&
+ N1C && N1C->isNullValue() &&
+ N2C && N2C->isNullValue()) {
+ SDValue AndLHS = N0->getOperand(0);
+ ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
+ if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
+ // Shift the tested bit over the sign bit.
+ APInt AndMask = ConstAndRHS->getAPIntValue();
+ SDValue ShlAmt =
+ DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy());
+ SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt);
+
+ // Now arithmetic right shift it all the way over, so the result is either
+ // all-ones, or zero.
+ SDValue ShrAmt =
+ DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy());
+ SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt);
+
+ return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
+ }
+ }
+
// fold select C, 16, 0 -> shl C, 4
if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() &&
TLI.getBooleanContents() == TargetLowering::ZeroOrOneBooleanContent) {
@@ -6971,7 +7308,8 @@ SDValue DAGCombiner::BuildUDIV(SDNode *N) {
}
/// FindBaseOffset - Return true if base is a frame index, which is known not
-// to alias with anything but itself. Provides base object and offset as results.
+// to alias with anything but itself. Provides base object and offset as
+// results.
static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
const GlobalValue *&GV, void *&CV) {
// Assume it is a primitive operation.
@@ -6984,7 +7322,7 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
Offset += C->getZExtValue();
}
}
-
+
// Return the underlying GlobalValue, and update the Offset. Return false
// for GlobalAddressSDNode since the same GlobalAddress may be represented
// by multiple nodes with different offsets.
@@ -7012,9 +7350,11 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
const Value *SrcValue1, int SrcValueOffset1,
unsigned SrcValueAlign1,
+ const MDNode *TBAAInfo1,
SDValue Ptr2, int64_t Size2,
const Value *SrcValue2, int SrcValueOffset2,
- unsigned SrcValueAlign2) const {
+ unsigned SrcValueAlign2,
+ const MDNode *TBAAInfo2) const {
// If they are the same then they must be aliases.
if (Ptr1 == Ptr2) return true;
@@ -7030,8 +7370,19 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
- // If we know what the bases are, and they aren't identical, then we know they
- // cannot alias.
+ // It is possible for different frame indices to alias each other, mostly
+ // when tail call optimization reuses return address slots for arguments.
+ // To catch this case, look up the actual index of frame indices to compute
+ // the real alias relationship.
+ if (isFrameIndex1 && isFrameIndex2) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
+ Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
+ return !((Offset1 + Size1) <= Offset2 || (Offset2 + Size2) <= Offset1);
+ }
+
+ // Otherwise, if we know what the bases are, and they aren't identical, then
+ // we know they cannot alias.
if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
return false;
@@ -7044,20 +7395,21 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
(Size1 == Size2) && (SrcValueAlign1 > Size1)) {
int64_t OffAlign1 = SrcValueOffset1 % SrcValueAlign1;
int64_t OffAlign2 = SrcValueOffset2 % SrcValueAlign1;
-
+
// There is no overlap between these relatively aligned accesses of similar
// size, return no alias.
if ((OffAlign1 + Size1) <= OffAlign2 || (OffAlign2 + Size2) <= OffAlign1)
return false;
}
-
+
if (CombinerGlobalAA) {
// Use alias analysis information.
int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2);
int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset;
int64_t Overlap2 = Size2 + SrcValueOffset2 - MinOffset;
AliasAnalysis::AliasResult AAResult =
- AA.alias(SrcValue1, Overlap1, SrcValue2, Overlap2);
+ AA.alias(AliasAnalysis::Location(SrcValue1, Overlap1, TBAAInfo1),
+ AliasAnalysis::Location(SrcValue2, Overlap2, TBAAInfo2));
if (AAResult == AliasAnalysis::NoAlias)
return false;
}
@@ -7070,15 +7422,17 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1,
/// node. Returns true if the operand was a load.
bool DAGCombiner::FindAliasInfo(SDNode *N,
SDValue &Ptr, int64_t &Size,
- const Value *&SrcValue,
+ const Value *&SrcValue,
int &SrcValueOffset,
- unsigned &SrcValueAlign) const {
+ unsigned &SrcValueAlign,
+ const MDNode *&TBAAInfo) const {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
Ptr = LD->getBasePtr();
Size = LD->getMemoryVT().getSizeInBits() >> 3;
SrcValue = LD->getSrcValue();
SrcValueOffset = LD->getSrcValueOffset();
SrcValueAlign = LD->getOriginalAlignment();
+ TBAAInfo = LD->getTBAAInfo();
return true;
} else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
Ptr = ST->getBasePtr();
@@ -7086,6 +7440,7 @@ bool DAGCombiner::FindAliasInfo(SDNode *N,
SrcValue = ST->getSrcValue();
SrcValueOffset = ST->getSrcValueOffset();
SrcValueAlign = ST->getOriginalAlignment();
+ TBAAInfo = ST->getTBAAInfo();
} else {
llvm_unreachable("FindAliasInfo expected a memory operand");
}
@@ -7106,26 +7461,27 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
const Value *SrcValue;
int SrcValueOffset;
unsigned SrcValueAlign;
- bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
- SrcValueAlign);
+ const MDNode *SrcTBAAInfo;
+ bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset,
+ SrcValueAlign, SrcTBAAInfo);
// Starting off.
Chains.push_back(OriginalChain);
unsigned Depth = 0;
-
+
// Look at each chain and determine if it is an alias. If so, add it to the
// aliases list. If not, then continue up the chain looking for the next
// candidate.
while (!Chains.empty()) {
SDValue Chain = Chains.back();
Chains.pop_back();
-
- // For TokenFactor nodes, look at each operand and only continue up the
- // chain until we find two aliases. If we've seen two aliases, assume we'll
+
+ // For TokenFactor nodes, look at each operand and only continue up the
+ // chain until we find two aliases. If we've seen two aliases, assume we'll
// find more and revert to original chain since the xform is unlikely to be
// profitable.
- //
- // FIXME: The depth check could be made to return the last non-aliasing
+ //
+ // FIXME: The depth check could be made to return the last non-aliasing
// chain we found before we hit a tokenfactor rather than the original
// chain.
if (Depth > 6 || Aliases.size() == 2) {
@@ -7151,15 +7507,18 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
const Value *OpSrcValue;
int OpSrcValueOffset;
unsigned OpSrcValueAlign;
+ const MDNode *OpSrcTBAAInfo;
bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize,
OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign);
+ OpSrcValueAlign,
+ OpSrcTBAAInfo);
// If chain is alias then stop here.
if (!(IsLoad && IsOpLoad) &&
isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign,
+ SrcTBAAInfo,
OpPtr, OpSize, OpSrcValue, OpSrcValueOffset,
- OpSrcValueAlign)) {
+ OpSrcValueAlign, OpSrcTBAAInfo)) {
Aliases.push_back(Chain);
} else {
// Look further up the chain.
@@ -7206,9 +7565,9 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
// If a single operand then chain to it. We don't need to revisit it.
return Aliases[0];
}
-
+
// Construct a custom tailored token factor.
- return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
+ return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other,
&Aliases[0], Aliases.size());
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index a4eed71e65c0..490b857b0e9c 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -55,6 +55,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/Debug.h"
using namespace llvm;
/// startNewBlock - Set the current block to which generated machine
@@ -197,12 +198,12 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) {
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
TII.get(TargetOpcode::IMPLICIT_DEF), Reg);
}
-
+
// If target-independent code couldn't handle the value, give target-specific
// code a try.
if (!Reg && isa<Constant>(V))
Reg = TargetMaterializeConstant(cast<Constant>(V));
-
+
// Don't cache constant materializations in the general ValueMap.
// To do so would require tracking what uses they dominate.
if (Reg != 0) {
@@ -234,7 +235,7 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) {
LocalValueMap[I] = Reg;
return Reg;
}
-
+
unsigned &AssignedReg = FuncInfo.ValueMap[I];
if (AssignedReg == 0)
// Use the new register.
@@ -414,7 +415,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
// If this is a constant subscript, handle it quickly.
if (const ConstantInt *CI = dyn_cast<ConstantInt>(Idx)) {
if (CI->isZero()) continue;
- uint64_t Offs =
+ uint64_t Offs =
TD.getTypeAllocSize(Ty)*cast<ConstantInt>(CI)->getSExtValue();
N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT);
if (N == 0)
@@ -423,7 +424,7 @@ bool FastISel::SelectGetElementPtr(const User *I) {
NIsKill = true;
continue;
}
-
+
// N = N + Idx * ElementSize;
uint64_t ElementSize = TD.getTypeAllocSize(Ty);
std::pair<unsigned, bool> Pair = getRegForGEPIndex(Idx);
@@ -467,16 +468,28 @@ bool FastISel::SelectCall(const User *I) {
return true;
const Value *Address = DI->getAddress();
- if (!Address)
+ if (!Address || isa<UndefValue>(Address) || isa<AllocaInst>(Address))
return true;
- if (isa<UndefValue>(Address))
- return true;
- const AllocaInst *AI = dyn_cast<AllocaInst>(Address);
- // Don't handle byval struct arguments or VLAs, for example.
- if (!AI)
- // Building the map above is target independent. Generating DBG_VALUE
- // inline is target dependent; do this now.
- (void)TargetSelectInstruction(cast<Instruction>(I));
+
+ unsigned Reg = 0;
+ unsigned Offset = 0;
+ if (const Argument *Arg = dyn_cast<Argument>(Address)) {
+ if (Arg->hasByValAttr()) {
+ // Byval arguments' frame index is recorded during argument lowering.
+ // Use this info directly.
+ Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ if (Offset)
+ Reg = TRI.getFrameRegister(*FuncInfo.MF);
+ }
+ }
+ if (!Reg)
+ Reg = getRegForValue(Address);
+
+ if (Reg)
+ BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL,
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(Reg, RegState::Debug).addImm(Offset)
+ .addMetadata(DI->getVariable());
return true;
}
case Intrinsic::dbg_value: {
@@ -505,11 +518,8 @@ bool FastISel::SelectCall(const User *I) {
} else {
// We can't yet handle anything else here because it would require
// generating code, thus altering codegen because of debug info.
- // Insert an undef so we can see what we dropped.
- BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II)
- .addReg(0U).addImm(DI->getOffset())
- .addMetadata(DI->getVariable());
- }
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
+ }
return true;
}
case Intrinsic::eh_exception: {
@@ -582,12 +592,12 @@ bool FastISel::SelectCall(const User *I) {
bool FastISel::SelectCast(const User *I, unsigned Opcode) {
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
-
+
if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
DstVT == MVT::Other || !DstVT.isSimple())
// Unhandled type. Halt "fast" selection and bail.
return false;
-
+
// Check if the destination type is legal. Or as a special case,
// it may be i1 if we're doing a truncate because that's
// easy and somewhat common.
@@ -629,7 +639,7 @@ bool FastISel::SelectCast(const User *I, unsigned Opcode) {
InputReg, InputRegIsKill);
if (!ResultReg)
return false;
-
+
UpdateValueMap(I, ResultReg);
return true;
}
@@ -644,23 +654,23 @@ bool FastISel::SelectBitCast(const User *I) {
return true;
}
- // Bitcasts of other values become reg-reg copies or BIT_CONVERT operators.
+ // Bitcasts of other values become reg-reg copies or BITCAST operators.
EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType());
EVT DstVT = TLI.getValueType(I->getType());
-
+
if (SrcVT == MVT::Other || !SrcVT.isSimple() ||
DstVT == MVT::Other || !DstVT.isSimple() ||
!TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT))
// Unhandled type. Halt "fast" selection and bail.
return false;
-
+
unsigned Op0 = getRegForValue(I->getOperand(0));
if (Op0 == 0)
// Unhandled operand. Halt "fast" selection and bail.
return false;
bool Op0IsKill = hasTrivialKill(I->getOperand(0));
-
+
// First, try to perform the bitcast by inserting a reg-reg copy.
unsigned ResultReg = 0;
if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) {
@@ -673,15 +683,15 @@ bool FastISel::SelectBitCast(const User *I) {
ResultReg).addReg(Op0);
}
}
-
- // If the reg-reg copy failed, select a BIT_CONVERT opcode.
+
+ // If the reg-reg copy failed, select a BITCAST opcode.
if (!ResultReg)
ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(),
- ISD::BIT_CONVERT, Op0, Op0IsKill);
-
+ ISD::BITCAST, Op0, Op0IsKill);
+
if (!ResultReg)
return false;
-
+
UpdateValueMap(I, ResultReg);
return true;
}
@@ -753,7 +763,7 @@ FastISel::SelectFNeg(const User *I) {
return false;
unsigned IntReg = FastEmit_r(VT.getSimpleVT(), IntVT.getSimpleVT(),
- ISD::BIT_CONVERT, OpReg, OpRegIsKill);
+ ISD::BITCAST, OpReg, OpRegIsKill);
if (IntReg == 0)
return false;
@@ -765,7 +775,7 @@ FastISel::SelectFNeg(const User *I) {
return false;
ResultReg = FastEmit_r(IntVT.getSimpleVT(), VT.getSimpleVT(),
- ISD::BIT_CONVERT, IntResultReg, /*Kill=*/true);
+ ISD::BITCAST, IntResultReg, /*Kill=*/true);
if (ResultReg == 0)
return false;
@@ -845,10 +855,10 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) {
// Dynamic-sized alloca is not handled yet.
return false;
-
+
case Instruction::Call:
return SelectCall(I);
-
+
case Instruction::BitCast:
return SelectBitCast(I);
@@ -911,7 +921,7 @@ unsigned FastISel::FastEmit_r(MVT, MVT,
return 0;
}
-unsigned FastISel::FastEmit_rr(MVT, MVT,
+unsigned FastISel::FastEmit_rr(MVT, MVT,
unsigned,
unsigned /*Op0*/, bool /*Op0IsKill*/,
unsigned /*Op1*/, bool /*Op1IsKill*/) {
@@ -1139,7 +1149,7 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode,
uint64_t Imm) {
unsigned ResultReg = createResultReg(RC);
const TargetInstrDesc &II = TII.get(MachineInstOpcode);
-
+
if (II.getNumDefs() >= 1)
BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm);
else {
diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
index 5ef6404ee5d6..98582ba99f14 100644
--- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
+++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp
@@ -29,7 +29,6 @@
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index 61c2a90e7edc..e309defba20f 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -31,11 +31,11 @@
using namespace llvm;
/// CountResults - The results of target nodes have register or immediate
-/// operands first, then an optional chain, and optional flag operands (which do
+/// operands first, then an optional chain, and optional glue operands (which do
/// not go into the resulting MachineInstr).
unsigned InstrEmitter::CountResults(SDNode *Node) {
unsigned N = Node->getNumValues();
- while (N && Node->getValueType(N - 1) == MVT::Flag)
+ while (N && Node->getValueType(N - 1) == MVT::Glue)
--N;
if (N && Node->getValueType(N - 1) == MVT::Other)
--N; // Skip over chain result.
@@ -43,12 +43,12 @@ unsigned InstrEmitter::CountResults(SDNode *Node) {
}
/// CountOperands - The inputs to target nodes have any actual inputs first,
-/// followed by an optional chain operand, then an optional flag operand.
+/// followed by an optional chain operand, then an optional glue operand.
/// Compute the number of actual operands that will go into the resulting
/// MachineInstr.
unsigned InstrEmitter::CountOperands(SDNode *Node) {
unsigned N = Node->getNumOperands();
- while (N && Node->getOperand(N - 1).getValueType() == MVT::Flag)
+ while (N && Node->getOperand(N - 1).getValueType() == MVT::Glue)
--N;
if (N && Node->getOperand(N - 1).getValueType() == MVT::Other)
--N; // Ignore chain if it exists.
@@ -67,7 +67,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, SrcReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
return;
}
@@ -96,7 +96,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (Op.getNode() != Node || Op.getResNo() != ResNo)
continue;
EVT VT = Node->getValueType(Op.getResNo());
- if (VT == MVT::Other || VT == MVT::Flag)
+ if (VT == MVT::Other || VT == MVT::Glue)
continue;
Match = false;
if (User->isMachineOpcode()) {
@@ -150,7 +150,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned,
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -224,7 +224,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI,
if (IsClone)
VRBaseMap.erase(Op);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
}
@@ -264,8 +264,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op,
DenseMap<SDValue, unsigned> &VRBaseMap,
bool IsDebug, bool IsClone, bool IsCloned) {
assert(Op.getValueType() != MVT::Other &&
- Op.getValueType() != MVT::Flag &&
- "Chain and flag operands should occur at end of operand list!");
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
// Get/emit the operand.
unsigned VReg = getVR(Op, VRBaseMap);
assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?");
@@ -377,8 +377,8 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op,
BA->getTargetFlags()));
} else {
assert(Op.getValueType() != MVT::Other &&
- Op.getValueType() != MVT::Flag &&
- "Chain and flag operands should occur at end of operand list!");
+ Op.getValueType() != MVT::Glue &&
+ "Chain and glue operands should occur at end of operand list!");
AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap,
IsDebug, IsClone, IsCloned);
}
@@ -428,31 +428,47 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
// Figure out the register class to create for the destreg.
unsigned VReg = getVR(Node->getOperand(0), VRBaseMap);
- const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
- const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
- assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
-
- // Figure out the register class to create for the destreg.
- // Note that if we're going to directly use an existing register,
- // it must be precisely the required class, and not a subclass
- // thereof.
- if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
- // Create the reg
- assert(SRC && "Couldn't find source register class");
- VRBase = MRI->createVirtualRegister(SRC);
- }
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ unsigned SrcReg, DstReg, DefSubIdx;
+ if (DefMI &&
+ TII->isCoalescableExtInstr(*DefMI, SrcReg, DstReg, DefSubIdx) &&
+ SubIdx == DefSubIdx) {
+ // Optimize these:
+ // r1025 = s/zext r1024, 4
+ // r1026 = extract_subreg r1025, 4
+ // to a copy
+ // r1026 = copy r1024
+ const TargetRegisterClass *TRC = MRI->getRegClass(SrcReg);
+ VRBase = MRI->createVirtualRegister(TRC);
+ BuildMI(*MBB, InsertPos, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase).addReg(SrcReg);
+ } else {
+ const TargetRegisterClass *TRC = MRI->getRegClass(VReg);
+ const TargetRegisterClass *SRC = TRC->getSubRegisterRegClass(SubIdx);
+ assert(SRC && "Invalid subregister index in EXTRACT_SUBREG");
+
+ // Figure out the register class to create for the destreg.
+ // Note that if we're going to directly use an existing register,
+ // it must be precisely the required class, and not a subclass
+ // thereof.
+ if (VRBase == 0 || SRC != MRI->getRegClass(VRBase)) {
+ // Create the reg
+ assert(SRC && "Couldn't find source register class");
+ VRBase = MRI->createVirtualRegister(SRC);
+ }
- // Create the extract_subreg machine instruction.
- MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
- TII->get(TargetOpcode::COPY), VRBase);
+ // Create the extract_subreg machine instruction.
+ MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
+ TII->get(TargetOpcode::COPY), VRBase);
- // Add source, and subreg index
- AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
- IsClone, IsCloned);
- assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) &&
- "Cannot yet extract from physregs");
- MI->getOperand(1).setSubReg(SubIdx);
- MBB->insert(InsertPos, MI);
+ // Add source, and subreg index
+ AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false,
+ IsClone, IsCloned);
+ assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg())&&
+ "Cannot yet extract from physregs");
+ MI->getOperand(1).setSubReg(SubIdx);
+ MBB->insert(InsertPos, MI);
+ }
} else if (Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG) {
SDValue N0 = Node->getOperand(0);
@@ -496,7 +512,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node,
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, VRBase)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -518,7 +534,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -543,9 +559,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
const TargetRegisterClass *TRC = MRI->getRegClass(SubReg);
const TargetRegisterClass *SRC =
TRI->getMatchingSuperRegClass(RC, TRC, SubIdx);
- if (!SRC)
- llvm_unreachable("Invalid subregister index in REG_SEQUENCE");
- if (SRC != RC) {
+ if (SRC && SRC != RC) {
MRI->setRegClass(NewVReg, SRC);
RC = SRC;
}
@@ -557,7 +571,7 @@ void InstrEmitter::EmitRegSequence(SDNode *Node,
MBB->insert(InsertPos, MI);
SDValue Op(Node, 0);
bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second;
- isNew = isNew; // Silence compiler warning.
+ (void)isNew; // Silence compiler warning.
assert(isNew && "Node emitted out of order - early");
}
@@ -673,10 +687,10 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// The MachineInstr constructor adds implicit-def operands. Scan through
// these to determine which are dead.
if (MI->getNumOperands() != 0 &&
- Node->getValueType(Node->getNumValues()-1) == MVT::Flag) {
+ Node->getValueType(Node->getNumValues()-1) == MVT::Glue) {
// First, collect all used registers.
SmallVector<unsigned, 8> UsedRegs;
- for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser())
+ for (SDNode *F = Node->getGluedUser(); F; F = F->getGluedUser())
if (F->getOpcode() == ISD::CopyFromReg)
UsedRegs.push_back(cast<RegisterSDNode>(F->getOperand(1))->getReg());
else {
@@ -689,7 +703,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i)
if (RegisterSDNode *R = dyn_cast<RegisterSDNode>(F->getOperand(i))) {
unsigned Reg = R->getReg();
- if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg))
UsedRegs.push_back(Reg);
}
}
@@ -721,20 +735,7 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
// hook knows where in the block to insert the replacement code.
MBB->insert(InsertPos, MI);
- if (II.usesCustomInsertionHook()) {
- // Insert this instruction into the basic block using a target
- // specific inserter which may returns a new basic block.
- bool AtEnd = InsertPos == MBB->end();
- MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB);
- if (NewMBB != MBB) {
- if (AtEnd)
- InsertPos = NewMBB->end();
- MBB = NewMBB;
- }
- return;
- }
-
- // Additional results must be an physical register def.
+ // Additional results must be physical register defs.
if (HasPhysRegOuts) {
for (unsigned i = II.getNumDefs(); i < NumResults; ++i) {
unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()];
@@ -742,17 +743,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned,
EmitCopyFromReg(Node, i, IsClone, IsCloned, Reg, VRBaseMap);
// If there are no uses, mark the register as dead now, so that
// MachineLICM/Sink can see that it's dead. Don't do this if the
- // node has a Flag value, for the benefit of targets still using
- // Flag for values in physregs.
- else if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ // node has a Glue value, for the benefit of targets still using
+ // Glue for values in physregs.
+ else if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
MI->addRegisterDead(Reg, TRI);
}
}
// If the instruction has implicit defs and the node doesn't, mark the
- // implicit def as dead. If the node has any flag outputs, we don't do this
- // because we don't know what implicit defs are being used by flagged nodes.
- if (Node->getValueType(Node->getNumValues()-1) != MVT::Flag)
+ // implicit def as dead. If the node has any glue outputs, we don't do this
+ // because we don't know what implicit defs are being used by glued nodes.
+ if (Node->getValueType(Node->getNumValues()-1) != MVT::Glue)
if (const unsigned *IDList = II.getImplicitDefs()) {
for (unsigned i = NumResults, e = II.getNumDefs()+II.getNumImplicitDefs();
i != e; ++i)
@@ -808,8 +809,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
case ISD::INLINEASM: {
unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
- --NumOps; // Ignore the flag operand.
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
// Create the inline asm machine instruction.
MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(),
@@ -820,11 +821,11 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned,
const char *AsmStr = cast<ExternalSymbolSDNode>(AsmStrV)->getSymbol();
MI->addOperand(MachineOperand::CreateES(AsmStr));
- // Add the isAlignStack bit.
- int64_t isAlignStack =
- cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_IsAlignStack))->
+ // Add the HasSideEffect and isAlignStack bits.
+ int64_t ExtraInfo =
+ cast<ConstantSDNode>(Node->getOperand(InlineAsm::Op_ExtraInfo))->
getZExtValue();
- MI->addOperand(MachineOperand::CreateImm(isAlignStack));
+ MI->addOperand(MachineOperand::CreateImm(ExtraInfo));
// Add all of the operand registers to the instruction.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 2981cd3f1cab..49c862ce3e0b 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -11,14 +11,15 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -65,11 +66,6 @@ class SelectionDAGLegalize {
/// against each other, including inserted libcalls.
SDValue LastCALLSEQ_END;
- /// IsLegalizingCall - This member is used *only* for purposes of providing
- /// helpful assertions that a libcall isn't created while another call is
- /// being legalized (which could lead to non-serialized call sequences).
- bool IsLegalizingCall;
-
enum LegalizeAction {
Legal, // The target natively supports this operation.
Promote, // This operation should be executed in a larger type.
@@ -91,6 +87,9 @@ class SelectionDAGLegalize {
// If someone requests legalization of the new node, return itself.
if (From != To)
LegalizedNodes.insert(std::make_pair(To, To));
+
+ // Transfer SDDbgValues.
+ DAG.TransferDbgValues(From, To);
}
public:
@@ -172,6 +171,7 @@ private:
SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl);
SDValue ExpandExtractFromVectorThroughStack(SDValue Op);
+ SDValue ExpandInsertToVectorThroughStack(SDValue Op);
SDValue ExpandVectorBuildThroughStack(SDNode* Node);
std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
@@ -224,7 +224,6 @@ SelectionDAGLegalize::SelectionDAGLegalize(SelectionDAG &dag,
void SelectionDAGLegalize::LegalizeDAG() {
LastCALLSEQ_END = DAG.getEntryNode();
- IsLegalizingCall = false;
// The legalize process is inherently a bottom-up recursive process (users
// legalize their uses before themselves). Given infinite stack space, we
@@ -251,9 +250,16 @@ void SelectionDAGLegalize::LegalizeDAG() {
/// FindCallEndFromCallStart - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_END node that terminates the call sequence.
-static SDNode *FindCallEndFromCallStart(SDNode *Node) {
- if (Node->getOpcode() == ISD::CALLSEQ_END)
- return Node;
+static SDNode *FindCallEndFromCallStart(SDNode *Node, int depth = 0) {
+ // Nested CALLSEQ_START/END constructs aren't yet legal,
+ // but we can DTRT and handle them correctly here.
+ if (Node->getOpcode() == ISD::CALLSEQ_START)
+ depth++;
+ else if (Node->getOpcode() == ISD::CALLSEQ_END) {
+ depth--;
+ if (depth == 0)
+ return Node;
+ }
if (Node->use_empty())
return 0; // No CallSeqEnd
@@ -283,7 +289,7 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i)
if (User->getOperand(i) == TheChain)
- if (SDNode *Result = FindCallEndFromCallStart(User))
+ if (SDNode *Result = FindCallEndFromCallStart(User, depth))
return Result;
}
return 0;
@@ -292,12 +298,26 @@ static SDNode *FindCallEndFromCallStart(SDNode *Node) {
/// FindCallStartFromCallEnd - Given a chained node that is part of a call
/// sequence, find the CALLSEQ_START node that initiates the call sequence.
static SDNode *FindCallStartFromCallEnd(SDNode *Node) {
+ int nested = 0;
assert(Node && "Didn't find callseq_start for a call??");
- if (Node->getOpcode() == ISD::CALLSEQ_START) return Node;
-
- assert(Node->getOperand(0).getValueType() == MVT::Other &&
- "Node doesn't have a token chain argument!");
- return FindCallStartFromCallEnd(Node->getOperand(0).getNode());
+ while (Node->getOpcode() != ISD::CALLSEQ_START || nested) {
+ Node = Node->getOperand(0).getNode();
+ assert(Node->getOperand(0).getValueType() == MVT::Other &&
+ "Node doesn't have a token chain argument!");
+ switch (Node->getOpcode()) {
+ default:
+ break;
+ case ISD::CALLSEQ_START:
+ if (!nested)
+ return Node;
+ nested--;
+ break;
+ case ISD::CALLSEQ_END:
+ nested++;
+ break;
+ }
+ }
+ return 0;
}
/// LegalizeAllNodesNotLeadingTo - Recursively walk the uses of N, looking to
@@ -377,12 +397,12 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP,
SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
if (Extend)
- return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl,
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, OrigVT,
DAG.getEntryNode(),
- CPIdx, PseudoSourceValue::getConstantPool(),
- 0, VT, false, false, Alignment);
+ CPIdx, MachinePointerInfo::getConstantPool(),
+ VT, false, false, Alignment);
return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0, false, false,
+ MachinePointerInfo::getConstantPool(), false, false,
Alignment);
}
@@ -395,7 +415,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
SDValue Val = ST->getValue();
EVT VT = Val.getValueType();
int Alignment = ST->getAlignment();
- int SVOffset = ST->getSrcValueOffset();
DebugLoc dl = ST->getDebugLoc();
if (ST->getMemoryVT().isFloatingPoint() ||
ST->getMemoryVT().isVector()) {
@@ -404,10 +423,9 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Expand to a bitconvert of the value to the integer type of the
// same size, then a (misaligned) int store.
// FIXME: Does not handle truncating floating point stores!
- SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, intVT, Val);
- return DAG.getStore(Chain, dl, Result, Ptr, ST->getSrcValue(),
- SVOffset, ST->isVolatile(), ST->isNonTemporal(),
- Alignment);
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, intVT, Val);
+ return DAG.getStore(Chain, dl, Result, Ptr, ST->getPointerInfo(),
+ ST->isVolatile(), ST->isNonTemporal(), Alignment);
} else {
// Do a (aligned) store to a stack slot, then copy from the stack slot
// to the final destination using (unaligned) integer loads and stores.
@@ -425,8 +443,8 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Perform the original store, only redirected to the stack slot.
SDValue Store = DAG.getTruncStore(Chain, dl,
- Val, StackPtr, NULL, 0, StoredVT,
- false, false, 0);
+ Val, StackPtr, MachinePointerInfo(),
+ StoredVT, false, false, 0);
SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
SmallVector<SDValue, 8> Stores;
unsigned Offset = 0;
@@ -434,11 +452,12 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Do all but one copies using the full register width.
for (unsigned i = 1; i < NumRegs; i++) {
// Load one integer register's worth from the stack slot.
- SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr, NULL, 0,
+ SDValue Load = DAG.getLoad(RegVT, dl, Store, StackPtr,
+ MachinePointerInfo(),
false, false, 0);
// Store it to the final location. Remember the store.
Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, Ptr,
- ST->getSrcValue(), SVOffset + Offset,
+ ST->getPointerInfo().getWithOffset(Offset),
ST->isVolatile(), ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
// Increment the pointers.
@@ -455,11 +474,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
8 * (StoredBytes - Offset));
// Load from the stack slot.
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr,
- NULL, 0, MemVT, false, false, 0);
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr,
+ MachinePointerInfo(),
+ MemVT, false, false, 0);
Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr,
- ST->getSrcValue(), SVOffset + Offset,
+ ST->getPointerInfo()
+ .getWithOffset(Offset),
MemVT, ST->isVolatile(),
ST->isNonTemporal(),
MinAlign(ST->getAlignment(), Offset)));
@@ -484,13 +505,13 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
// Store the two parts
SDValue Store1, Store2;
Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr,
- ST->getSrcValue(), SVOffset, NewStoredVT,
+ ST->getPointerInfo(), NewStoredVT,
ST->isVolatile(), ST->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
Alignment = MinAlign(Alignment, IncrementSize);
Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr,
- ST->getSrcValue(), SVOffset + IncrementSize,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
NewStoredVT, ST->isVolatile(), ST->isNonTemporal(),
Alignment);
@@ -501,7 +522,6 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG,
static
SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
const TargetLowering &TLI) {
- int SVOffset = LD->getSrcValueOffset();
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
EVT VT = LD->getValueType(0);
@@ -512,74 +532,75 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
if (TLI.isTypeLegal(intVT)) {
// Expand to a (misaligned) integer load of the same size,
// then bitconvert to floating point or vector.
- SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, LD->isVolatile(),
+ SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(),
+ LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
- SDValue Result = DAG.getNode(ISD::BIT_CONVERT, dl, LoadedVT, newLoad);
+ SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad);
if (VT.isFloatingPoint() && LoadedVT != VT)
Result = DAG.getNode(ISD::FP_EXTEND, dl, VT, Result);
SDValue Ops[] = { Result, Chain };
return DAG.getMergeValues(Ops, 2, dl);
- } else {
- // Copy the value to a (aligned) stack slot using (unaligned) integer
- // loads and stores, then do a (aligned) load from the stack slot.
- EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
- unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
- unsigned RegBytes = RegVT.getSizeInBits() / 8;
- unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
-
- // Make sure the stack slot is also aligned for the register type.
- SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
-
- SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
- SmallVector<SDValue, 8> Stores;
- SDValue StackPtr = StackBase;
- unsigned Offset = 0;
-
- // Do all but one copies using the full register width.
- for (unsigned i = 1; i < NumRegs; i++) {
- // Load one integer register's worth from the original location.
- SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + Offset, LD->isVolatile(),
- LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
- // Follow the load with a store to the stack slot. Remember the store.
- Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0, false, false, 0));
- // Increment the pointers.
- Offset += RegBytes;
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
- StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
- Increment);
- }
+ }
- // The last copy may be partial. Do an extending load.
- EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
- 8 * (LoadedBytes - Offset));
- SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr,
- LD->getSrcValue(), SVOffset + Offset,
- MemVT, LD->isVolatile(),
- LD->isNonTemporal(),
- MinAlign(LD->getAlignment(), Offset));
+ // Copy the value to a (aligned) stack slot using (unaligned) integer
+ // loads and stores, then do a (aligned) load from the stack slot.
+ EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT);
+ unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8;
+ unsigned RegBytes = RegVT.getSizeInBits() / 8;
+ unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes;
+
+ // Make sure the stack slot is also aligned for the register type.
+ SDValue StackBase = DAG.CreateStackTemporary(LoadedVT, RegVT);
+
+ SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy());
+ SmallVector<SDValue, 8> Stores;
+ SDValue StackPtr = StackBase;
+ unsigned Offset = 0;
+
+ // Do all but one copies using the full register width.
+ for (unsigned i = 1; i < NumRegs; i++) {
+ // Load one integer register's worth from the original location.
+ SDValue Load = DAG.getLoad(RegVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ LD->isVolatile(), LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
// Follow the load with a store to the stack slot. Remember the store.
- // On big-endian machines this requires a truncating store to ensure
- // that the bits end up in the right place.
- Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
- NULL, 0, MemVT, false, false, 0));
-
- // The order of the stores doesn't matter - say it with a TokenFactor.
- SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
- Stores.size());
-
- // Finally, perform the original load only redirected to the stack slot.
- Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase,
- NULL, 0, LoadedVT, false, false, 0);
-
- // Callers expect a MERGE_VALUES node.
- SDValue Ops[] = { Load, TF };
- return DAG.getMergeValues(Ops, 2, dl);
+ Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), false, false, 0));
+ // Increment the pointers.
+ Offset += RegBytes;
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
+ StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr,
+ Increment);
}
+
+ // The last copy may be partial. Do an extending load.
+ EVT MemVT = EVT::getIntegerVT(*DAG.getContext(),
+ 8 * (LoadedBytes - Offset));
+ SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ MemVT, LD->isVolatile(),
+ LD->isNonTemporal(),
+ MinAlign(LD->getAlignment(), Offset));
+ // Follow the load with a store to the stack slot. Remember the store.
+ // On big-endian machines this requires a truncating store to ensure
+ // that the bits end up in the right place.
+ Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, StackPtr,
+ MachinePointerInfo(), MemVT,
+ false, false, 0));
+
+ // The order of the stores doesn't matter - say it with a TokenFactor.
+ SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0],
+ Stores.size());
+
+ // Finally, perform the original load only redirected to the stack slot.
+ Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase,
+ MachinePointerInfo(), LoadedVT, false, false, 0);
+
+ // Callers expect a MERGE_VALUES node.
+ SDValue Ops[] = { Load, TF };
+ return DAG.getMergeValues(Ops, 2, dl);
}
assert(LoadedVT.isInteger() && !LoadedVT.isVector() &&
"Unaligned load of unsupported type.");
@@ -602,22 +623,24 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG,
// Load the value in two parts
SDValue Lo, Hi;
if (TLI.isLittleEndian()) {
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
} else {
- Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset, NewLoadedVT, LD->isVolatile(),
+ Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getConstant(IncrementSize, TLI.getPointerTy()));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(),
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ NewLoadedVT, LD->isVolatile(),
LD->isNonTemporal(), MinAlign(Alignment,IncrementSize));
}
@@ -660,7 +683,7 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
// Store the vector.
SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Tmp1, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
false, false, 0);
// Truncate or zero extend offset to target pointer type.
@@ -671,13 +694,11 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx,
Tmp3 = DAG.getNode(ISD::MUL, dl, IdxVT, Tmp3,DAG.getConstant(EltSize, IdxVT));
SDValue StackPtr2 = DAG.getNode(ISD::ADD, dl, IdxVT, Tmp3, StackPtr);
// Store the scalar value.
- Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2,
- PseudoSourceValue::getFixedStack(SPFI), 0, EltVT,
+ Ch = DAG.getTruncStore(Ch, dl, Tmp2, StackPtr2, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the updated vector.
return DAG.getLoad(VT, dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
- false, false, 0);
+ MachinePointerInfo::getFixedStack(SPFI), false, false, 0);
}
@@ -719,7 +740,6 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
SDValue Tmp1 = ST->getChain();
SDValue Tmp2 = ST->getBasePtr();
SDValue Tmp3;
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -730,29 +750,34 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) {
Tmp3 = DAG.getConstant(CFP->getValueAPF().
bitcastToAPInt().zextOrTrunc(32),
MVT::i32);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
- } else if (CFP->getValueType(0) == MVT::f64) {
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (CFP->getValueType(0) == MVT::f64) {
// If this target supports 64-bit registers, do a single 64-bit store.
if (getTypeAction(MVT::i64) == Legal) {
Tmp3 = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
zextOrTrunc(64), MVT::i64);
- return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
- } else if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
+ return DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
+ }
+
+ if (getTypeAction(MVT::i32) == Legal && !ST->isVolatile()) {
// Otherwise, if the target supports 32-bit registers, use 2 32-bit
// stores. If the target supports neither 32- nor 64-bits, this
// xform is certainly not worth it.
const APInt &IntVal =CFP->getValueAPF().bitcastToAPInt();
- SDValue Lo = DAG.getConstant(APInt(IntVal).trunc(32), MVT::i32);
+ SDValue Lo = DAG.getConstant(IntVal.trunc(32), MVT::i32);
SDValue Hi = DAG.getConstant(IntVal.lshr(32).trunc(32), MVT::i32);
if (TLI.isBigEndian()) std::swap(Lo, Hi);
- Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal, Alignment);
+ Lo = DAG.getStore(Tmp1, dl, Lo, Tmp2, ST->getPointerInfo(), isVolatile,
+ isNonTemporal, Alignment);
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(4));
- Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(), SVOffset+4,
+ Hi = DAG.getStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(4),
isVolatile, isNonTemporal, MinAlign(Alignment, 4U));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
@@ -792,7 +817,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
bool isCustom = false;
// Figure out the correct action; the way to query this varies by opcode
- TargetLowering::LegalizeAction Action;
+ TargetLowering::LegalizeAction Action = TargetLowering::Legal;
bool SimpleFinishLegalizing = true;
switch (Node->getOpcode()) {
case ISD::INTRINSIC_W_CHAIN:
@@ -860,6 +885,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
case ISD::FRAME_TO_ARGS_OFFSET:
case ISD::EH_SJLJ_SETJMP:
case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
// These operations lie about being legal: when they claim to be legal,
// they should actually be expanded.
Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0));
@@ -996,6 +1022,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
}
break;
case ISD::CALLSEQ_START: {
+ static int depth = 0;
SDNode *CallEnd = FindCallEndFromCallStart(Node);
// Recursively Legalize all of the inputs of the call end that do not lead
@@ -1013,7 +1040,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Merge in the last call to ensure that this call starts after the last
// call ended.
- if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) {
+ if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken && depth == 0) {
Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
Tmp1, LastCALLSEQ_END);
Tmp1 = LegalizeOp(Tmp1);
@@ -1036,14 +1063,18 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// sequence have been legalized, legalize the call itself. During this
// process, no libcalls can/will be inserted, guaranteeing that no calls
// can overlap.
- assert(!IsLegalizingCall && "Inconsistent sequentialization of calls!");
+
+ SDValue Saved_LastCALLSEQ_END = LastCALLSEQ_END ;
// Note that we are selecting this call!
LastCALLSEQ_END = SDValue(CallEnd, 0);
- IsLegalizingCall = true;
+ depth++;
// Legalize the call, starting from the CALLSEQ_END.
LegalizeOp(LastCALLSEQ_END);
- assert(!IsLegalizingCall && "CALLSEQ_END should have cleared this!");
+ depth--;
+ assert(depth >= 0 && "Un-matched CALLSEQ_START?");
+ if (depth > 0)
+ LastCALLSEQ_END = Saved_LastCALLSEQ_END;
return Result;
}
case ISD::CALLSEQ_END:
@@ -1062,7 +1093,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain.
// Do not try to legalize the target-specific arguments (#1+), except for
// an optional flag input.
- if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Flag){
+ if (Node->getOperand(Node->getNumOperands()-1).getValueType() != MVT::Glue){
if (Tmp1 != Node->getOperand(0)) {
SmallVector<SDValue, 8> Ops(Node->op_begin(), Node->op_end());
Ops[0] = Tmp1;
@@ -1082,10 +1113,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
Result.getResNo());
}
}
- assert(IsLegalizingCall && "Call sequence imbalance between start/end?");
// This finishes up call legalization.
- IsLegalizingCall = false;
-
// If the CALLSEQ_END node has a flag, remember that we legalized it.
AddLegalizedOperand(SDValue(Node, 0), Result.getValue(0));
if (Node->getNumValues() == 2)
@@ -1136,11 +1164,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Change base type to a different vector type.
EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT);
- Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(),
+ Tmp1 = DAG.getLoad(NVT, dl, Tmp1, Tmp2, LD->getPointerInfo(),
LD->isVolatile(), LD->isNonTemporal(),
LD->getAlignment());
- Tmp3 = LegalizeOp(DAG.getNode(ISD::BIT_CONVERT, dl, VT, Tmp1));
+ Tmp3 = LegalizeOp(DAG.getNode(ISD::BITCAST, dl, VT, Tmp1));
Tmp4 = LegalizeOp(Tmp1.getValue(1));
break;
}
@@ -1150,227 +1177,224 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
AddLegalizedOperand(SDValue(Node, 0), Tmp3);
AddLegalizedOperand(SDValue(Node, 1), Tmp4);
return Op.getResNo() ? Tmp4 : Tmp3;
- } else {
- EVT SrcVT = LD->getMemoryVT();
- unsigned SrcWidth = SrcVT.getSizeInBits();
- int SVOffset = LD->getSrcValueOffset();
- unsigned Alignment = LD->getAlignment();
- bool isVolatile = LD->isVolatile();
- bool isNonTemporal = LD->isNonTemporal();
-
- if (SrcWidth != SrcVT.getStoreSizeInBits() &&
- // Some targets pretend to have an i1 loading operation, and actually
- // load an i8. This trick is correct for ZEXTLOAD because the top 7
- // bits are guaranteed to be zero; it helps the optimizers understand
- // that these bits are zero. It is also useful for EXTLOAD, since it
- // tells the optimizers that those bits are undefined. It would be
- // nice to have an effective generic way of getting these benefits...
- // Until such a way is found, don't insist on promoting i1 here.
- (SrcVT != MVT::i1 ||
- TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
- // Promote to a byte-sized load if not loading an integral number of
- // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
- unsigned NewWidth = SrcVT.getStoreSizeInBits();
- EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
- SDValue Ch;
-
- // The extra bits are guaranteed to be zero, since we stored them that
- // way. A zext load from NVT thus automatically gives zext from SrcVT.
-
- ISD::LoadExtType NewExtType =
- ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
-
- Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl,
- Tmp1, Tmp2, LD->getSrcValue(), SVOffset,
- NVT, isVolatile, isNonTemporal, Alignment);
-
- Ch = Result.getValue(1); // The chain.
-
- if (ExtType == ISD::SEXTLOAD)
- // Having the top bits zero doesn't help when sign extending.
- Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
- // All the top bits are guaranteed to be zero - inform the optimizers.
- Result = DAG.getNode(ISD::AssertZext, dl,
- Result.getValueType(), Result,
- DAG.getValueType(SrcVT));
-
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
- } else if (SrcWidth & (SrcWidth - 1)) {
- // If not loading a power-of-2 number of bits, expand as two loads.
- assert(!SrcVT.isVector() && "Unsupported extload!");
- unsigned RoundWidth = 1 << Log2_32(SrcWidth);
- assert(RoundWidth < SrcWidth);
- unsigned ExtraWidth = SrcWidth - RoundWidth;
- assert(ExtraWidth < RoundWidth);
- assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
- "Load size not an integral number of bytes!");
- EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
- EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
- SDValue Lo, Hi, Ch;
- unsigned IncrementSize;
+ }
- if (TLI.isLittleEndian()) {
- // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
- // Load the bottom RoundWidth bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl,
- Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+ EVT SrcVT = LD->getMemoryVT();
+ unsigned SrcWidth = SrcVT.getSizeInBits();
+ unsigned Alignment = LD->getAlignment();
+ bool isVolatile = LD->isVolatile();
+ bool isNonTemporal = LD->isNonTemporal();
+
+ if (SrcWidth != SrcVT.getStoreSizeInBits() &&
+ // Some targets pretend to have an i1 loading operation, and actually
+ // load an i8. This trick is correct for ZEXTLOAD because the top 7
+ // bits are guaranteed to be zero; it helps the optimizers understand
+ // that these bits are zero. It is also useful for EXTLOAD, since it
+ // tells the optimizers that those bits are undefined. It would be
+ // nice to have an effective generic way of getting these benefits...
+ // Until such a way is found, don't insist on promoting i1 here.
+ (SrcVT != MVT::i1 ||
+ TLI.getLoadExtAction(ExtType, MVT::i1) == TargetLowering::Promote)) {
+ // Promote to a byte-sized load if not loading an integral number of
+ // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
+ unsigned NewWidth = SrcVT.getStoreSizeInBits();
+ EVT NVT = EVT::getIntegerVT(*DAG.getContext(), NewWidth);
+ SDValue Ch;
+
+ // The extra bits are guaranteed to be zero, since we stored them that
+ // way. A zext load from NVT thus automatically gives zext from SrcVT.
+
+ ISD::LoadExtType NewExtType =
+ ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD;
+
+ Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
+
+ Ch = Result.getValue(1); // The chain.
+
+ if (ExtType == ISD::SEXTLOAD)
+ // Having the top bits zero doesn't help when sign extending.
+ Result = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else if (ExtType == ISD::ZEXTLOAD || NVT == Result.getValueType())
+ // All the top bits are guaranteed to be zero - inform the optimizers.
+ Result = DAG.getNode(ISD::AssertZext, dl,
+ Result.getValueType(), Result,
+ DAG.getValueType(SrcVT));
+
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else if (SrcWidth & (SrcWidth - 1)) {
+ // If not loading a power-of-2 number of bits, expand as two loads.
+ assert(!SrcVT.isVector() && "Unsupported extload!");
+ unsigned RoundWidth = 1 << Log2_32(SrcWidth);
+ assert(RoundWidth < SrcWidth);
+ unsigned ExtraWidth = SrcWidth - RoundWidth;
+ assert(ExtraWidth < RoundWidth);
+ assert(!(RoundWidth % 8) && !(ExtraWidth % 8) &&
+ "Load size not an integral number of bytes!");
+ EVT RoundVT = EVT::getIntegerVT(*DAG.getContext(), RoundWidth);
+ EVT ExtraVT = EVT::getIntegerVT(*DAG.getContext(), ExtraWidth);
+ SDValue Lo, Hi, Ch;
+ unsigned IncrementSize;
+
+ if (TLI.isLittleEndian()) {
+ // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16)
+ // Load the bottom RoundWidth bits.
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ } else {
+ // Big endian - avoid unaligned loads.
+ // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
+ // Load the top RoundWidth bits.
+ Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo(), RoundVT, isVolatile,
+ isNonTemporal, Alignment);
+
+ // Load the remaining ExtraWidth bits.
+ IncrementSize = RoundWidth / 8;
+ Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
+ DAG.getIntPtrConstant(IncrementSize));
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
+ dl, Node->getValueType(0), Tmp1, Tmp2,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+
+ // Build a factor node to remember that this load is independent of
+ // the other one.
+ Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
+ Hi.getValue(1));
+
+ // Move the top bits to the right place.
+ Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
+ DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
+
+ // Join the hi and lo parts.
+ Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ }
- // Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
+ Tmp1 = LegalizeOp(Result);
+ Tmp2 = LegalizeOp(Ch);
+ } else {
+ switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
+ default: assert(0 && "This action is not supported yet!");
+ case TargetLowering::Custom:
+ isCustom = true;
+ // FALLTHROUGH
+ case TargetLowering::Legal:
+ Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
+ Tmp1, Tmp2, LD->getOffset()),
+ Result.getResNo());
+ Tmp1 = Result.getValue(0);
+ Tmp2 = Result.getValue(1);
+
+ if (isCustom) {
+ Tmp3 = TLI.LowerOperation(Result, DAG);
+ if (Tmp3.getNode()) {
+ Tmp1 = LegalizeOp(Tmp3);
+ Tmp2 = LegalizeOp(Tmp3.getValue(1));
+ }
} else {
- // Big endian - avoid unaligned loads.
- // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8
- // Load the top RoundWidth bits.
- Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset, RoundVT, isVolatile,
- isNonTemporal, Alignment);
-
- // Load the remaining ExtraWidth bits.
- IncrementSize = RoundWidth / 8;
- Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
- DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD,
- Node->getValueType(0), dl, Tmp1, Tmp2,
- LD->getSrcValue(), SVOffset + IncrementSize,
- ExtraVT, isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
-
- // Build a factor node to remember that this load is independent of
- // the other one.
- Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1),
- Hi.getValue(1));
-
- // Move the top bits to the right place.
- Hi = DAG.getNode(ISD::SHL, dl, Hi.getValueType(), Hi,
- DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
-
- // Join the hi and lo parts.
- Result = DAG.getNode(ISD::OR, dl, Node->getValueType(0), Lo, Hi);
- }
-
- Tmp1 = LegalizeOp(Result);
- Tmp2 = LegalizeOp(Ch);
- } else {
- switch (TLI.getLoadExtAction(ExtType, SrcVT)) {
- default: assert(0 && "This action is not supported yet!");
- case TargetLowering::Custom:
- isCustom = true;
- // FALLTHROUGH
- case TargetLowering::Legal:
- Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(),
- Tmp1, Tmp2, LD->getOffset()),
- Result.getResNo());
- Tmp1 = Result.getValue(0);
- Tmp2 = Result.getValue(1);
-
- if (isCustom) {
- Tmp3 = TLI.LowerOperation(Result, DAG);
- if (Tmp3.getNode()) {
- Tmp1 = LegalizeOp(Tmp3);
- Tmp2 = LegalizeOp(Tmp3.getValue(1));
- }
- } else {
- // If this is an unaligned load and the target doesn't support it,
- // expand it.
- if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
- const Type *Ty =
- LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
- unsigned ABIAlignment =
- TLI.getTargetData()->getABITypeAlignment(Ty);
- if (LD->getAlignment() < ABIAlignment){
- Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
- DAG, TLI);
- Tmp1 = Result.getOperand(0);
- Tmp2 = Result.getOperand(1);
- Tmp1 = LegalizeOp(Tmp1);
- Tmp2 = LegalizeOp(Tmp2);
- }
+ // If this is an unaligned load and the target doesn't support it,
+ // expand it.
+ if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) {
+ const Type *Ty =
+ LD->getMemoryVT().getTypeForEVT(*DAG.getContext());
+ unsigned ABIAlignment =
+ TLI.getTargetData()->getABITypeAlignment(Ty);
+ if (LD->getAlignment() < ABIAlignment){
+ Result = ExpandUnalignedLoad(cast<LoadSDNode>(Result.getNode()),
+ DAG, TLI);
+ Tmp1 = Result.getOperand(0);
+ Tmp2 = Result.getOperand(1);
+ Tmp1 = LegalizeOp(Tmp1);
+ Tmp2 = LegalizeOp(Tmp2);
}
}
- break;
- case TargetLowering::Expand:
- if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
- SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(),
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- unsigned ExtendOp;
- switch (ExtType) {
- case ISD::EXTLOAD:
- ExtendOp = (SrcVT.isFloatingPoint() ?
- ISD::FP_EXTEND : ISD::ANY_EXTEND);
- break;
- case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
- case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
- default: llvm_unreachable("Unexpected extend load type!");
- }
- Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
- Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Load.getValue(1));
+ }
+ break;
+ case TargetLowering::Expand:
+ if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && isTypeLegal(SrcVT)) {
+ SDValue Load = DAG.getLoad(SrcVT, dl, Tmp1, Tmp2,
+ LD->getPointerInfo(),
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ unsigned ExtendOp;
+ switch (ExtType) {
+ case ISD::EXTLOAD:
+ ExtendOp = (SrcVT.isFloatingPoint() ?
+ ISD::FP_EXTEND : ISD::ANY_EXTEND);
break;
+ case ISD::SEXTLOAD: ExtendOp = ISD::SIGN_EXTEND; break;
+ case ISD::ZEXTLOAD: ExtendOp = ISD::ZERO_EXTEND; break;
+ default: llvm_unreachable("Unexpected extend load type!");
}
- // FIXME: This does not work for vectors on most targets. Sign- and
- // zero-extend operations are currently folded into extending loads,
- // whether they are legal or not, and then we end up here without any
- // support for legalizing them.
- assert(ExtType != ISD::EXTLOAD &&
- "EXTLOAD should always be supported!");
- // Turn the unsupported load into an EXTLOAD followed by an explicit
- // zero/sign extend inreg.
- Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl,
- Tmp1, Tmp2, LD->getSrcValue(),
- LD->getSrcValueOffset(), SrcVT,
- LD->isVolatile(), LD->isNonTemporal(),
- LD->getAlignment());
- SDValue ValRes;
- if (ExtType == ISD::SEXTLOAD)
- ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
- Result.getValueType(),
- Result, DAG.getValueType(SrcVT));
- else
- ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT);
- Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
- Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ Result = DAG.getNode(ExtendOp, dl, Node->getValueType(0), Load);
+ Tmp1 = LegalizeOp(Result); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Load.getValue(1));
break;
}
+ // FIXME: This does not work for vectors on most targets. Sign- and
+ // zero-extend operations are currently folded into extending loads,
+ // whether they are legal or not, and then we end up here without any
+ // support for legalizing them.
+ assert(ExtType != ISD::EXTLOAD &&
+ "EXTLOAD should always be supported!");
+ // Turn the unsupported load into an EXTLOAD followed by an explicit
+ // zero/sign extend inreg.
+ Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0),
+ Tmp1, Tmp2, LD->getPointerInfo(), SrcVT,
+ LD->isVolatile(), LD->isNonTemporal(),
+ LD->getAlignment());
+ SDValue ValRes;
+ if (ExtType == ISD::SEXTLOAD)
+ ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl,
+ Result.getValueType(),
+ Result, DAG.getValueType(SrcVT));
+ else
+ ValRes = DAG.getZeroExtendInReg(Result, dl, SrcVT.getScalarType());
+ Tmp1 = LegalizeOp(ValRes); // Relegalize new nodes.
+ Tmp2 = LegalizeOp(Result.getValue(1)); // Relegalize new nodes.
+ break;
}
-
- // Since loads produce two values, make sure to remember that we legalized
- // both of them.
- AddLegalizedOperand(SDValue(Node, 0), Tmp1);
- AddLegalizedOperand(SDValue(Node, 1), Tmp2);
- return Op.getResNo() ? Tmp2 : Tmp1;
}
+
+ // Since loads produce two values, make sure to remember that we legalized
+ // both of them.
+ AddLegalizedOperand(SDValue(Node, 0), Tmp1);
+ AddLegalizedOperand(SDValue(Node, 1), Tmp2);
+ return Op.getResNo() ? Tmp2 : Tmp1;
}
case ISD::STORE: {
StoreSDNode *ST = cast<StoreSDNode>(Node);
Tmp1 = LegalizeOp(ST->getChain()); // Legalize the chain.
Tmp2 = LegalizeOp(ST->getBasePtr()); // Legalize the pointer.
- int SVOffset = ST->getSrcValueOffset();
unsigned Alignment = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -1408,10 +1432,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
break;
case TargetLowering::Promote:
assert(VT.isVector() && "Unknown legal promote case!");
- Tmp3 = DAG.getNode(ISD::BIT_CONVERT, dl,
+ Tmp3 = DAG.getNode(ISD::BITCAST, dl,
TLI.getTypeToPromoteTo(ISD::STORE, VT), Tmp3);
Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2,
- ST->getSrcValue(), SVOffset, isVolatile,
+ ST->getPointerInfo(), isVolatile,
isNonTemporal, Alignment);
break;
}
@@ -1430,9 +1454,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
EVT NVT = EVT::getIntegerVT(*DAG.getContext(),
StVT.getStoreSizeInBits());
Tmp3 = DAG.getZeroExtendInReg(Tmp3, dl, StVT);
- Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, NVT, isVolatile, isNonTemporal,
- Alignment);
+ Result = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ NVT, isVolatile, isNonTemporal, Alignment);
} else if (StWidth & (StWidth - 1)) {
// If not storing a power-of-2 number of bits, expand as two stores.
assert(!StVT.isVector() && "Unsupported truncstore!");
@@ -1450,8 +1473,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
if (TLI.isLittleEndian()) {
// TRUNCSTORE:i24 X -> TRUNCSTORE:i16 X, TRUNCSTORE@+2:i8 (srl X, 16)
// Store the bottom RoundWidth bits.
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, RoundVT,
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ RoundVT,
isVolatile, isNonTemporal, Alignment);
// Store the remaining ExtraWidth bits.
@@ -1460,9 +1483,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
DAG.getIntPtrConstant(IncrementSize));
Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
DAG.getConstant(RoundWidth, TLI.getShiftAmountTy()));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
- SVOffset + IncrementSize, ExtraVT, isVolatile,
- isNonTemporal,
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
} else {
// Big endian - avoid unaligned stores.
@@ -1470,17 +1493,16 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// Store the top RoundWidth bits.
Hi = DAG.getNode(ISD::SRL, dl, Tmp3.getValueType(), Tmp3,
DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy()));
- Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getSrcValue(),
- SVOffset, RoundVT, isVolatile, isNonTemporal,
- Alignment);
+ Hi = DAG.getTruncStore(Tmp1, dl, Hi, Tmp2, ST->getPointerInfo(),
+ RoundVT, isVolatile, isNonTemporal, Alignment);
// Store the remaining ExtraWidth bits.
IncrementSize = RoundWidth / 8;
Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2,
DAG.getIntPtrConstant(IncrementSize));
- Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset + IncrementSize, ExtraVT, isVolatile,
- isNonTemporal,
+ Lo = DAG.getTruncStore(Tmp1, dl, Tmp3, Tmp2,
+ ST->getPointerInfo().getWithOffset(IncrementSize),
+ ExtraVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
}
@@ -1514,9 +1536,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) {
// TRUNCSTORE:i16 i32 -> STORE i16
assert(isTypeLegal(StVT) && "Do not know how to expand this store!");
Tmp3 = DAG.getNode(ISD::TRUNCATE, dl, StVT, Tmp3);
- Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getSrcValue(),
- SVOffset, isVolatile, isNonTemporal,
- Alignment);
+ Result = DAG.getStore(Tmp1, dl, Tmp3, Tmp2, ST->getPointerInfo(),
+ isVolatile, isNonTemporal, Alignment);
break;
}
}
@@ -1543,8 +1564,8 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
DebugLoc dl = Op.getDebugLoc();
// Store the value to a temporary stack slot, then LOAD the returned part.
SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
- SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Add the offset to the index.
unsigned EltSize =
@@ -1560,12 +1581,56 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) {
StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr);
if (Op.getValueType().isVector())
- return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0,
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr,MachinePointerInfo(),
false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr,
+ MachinePointerInfo(),
+ Vec.getValueType().getVectorElementType(),
+ false, false, 0);
+}
+
+SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) {
+ assert(Op.getValueType().isVector() && "Non-vector insert subvector!");
+
+ SDValue Vec = Op.getOperand(0);
+ SDValue Part = Op.getOperand(1);
+ SDValue Idx = Op.getOperand(2);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // Store the value to a temporary stack slot, then LOAD the returned part.
+
+ SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType());
+ int FI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
+
+ // First store the whole vector.
+ SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, PtrInfo,
+ false, false, 0);
+
+ // Then store the inserted part.
+
+ // Add the offset to the index.
+ unsigned EltSize =
+ Vec.getValueType().getVectorElementType().getSizeInBits()/8;
+
+ Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx,
+ DAG.getConstant(EltSize, Idx.getValueType()));
+
+ if (Idx.getValueType().bitsGT(TLI.getPointerTy()))
+ Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx);
else
- return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr,
- NULL, 0, Vec.getValueType().getVectorElementType(),
- false, false, 0);
+ Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx);
+
+ SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
+ StackPtr);
+
+ // Store the subvector.
+ Ch = DAG.getStore(DAG.getEntryNode(), dl, Part, SubStackPtr,
+ MachinePointerInfo(), false, false, 0);
+
+ // Finally, load the updated vector.
+ return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, PtrInfo,
+ false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
@@ -1578,7 +1643,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
DebugLoc dl = Node->getDebugLoc();
SDValue FIPtr = DAG.CreateStackTemporary(VT);
int FI = cast<FrameIndexSDNode>(FIPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(FI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI);
// Emit a store of each element to the stack slot.
SmallVector<SDValue, 8> Stores;
@@ -1597,11 +1662,13 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// element type, only store the bits necessary.
if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
EltVT, false, false, 0));
} else
Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl,
- Node->getOperand(i), Idx, SV, Offset,
+ Node->getOperand(i), Idx,
+ PtrInfo.getWithOffset(Offset),
false, false, 0));
}
@@ -1613,7 +1680,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
StoreChain = DAG.getEntryNode();
// Result is a load from the stack slot.
- return DAG.getLoad(VT, dl, StoreChain, FIPtr, SV, 0, false, false, 0);
+ return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, 0);
}
SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
@@ -1628,7 +1695,7 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
EVT IVT = EVT::getIntegerVT(*DAG.getContext(), FloatVT.getSizeInBits());
if (isTypeLegal(IVT)) {
// Convert to an integer with the same sign bit.
- SignBit = DAG.getNode(ISD::BIT_CONVERT, dl, IVT, Tmp2);
+ SignBit = DAG.getNode(ISD::BITCAST, dl, IVT, Tmp2);
} else {
// Store the float to memory, then load the sign part out as an integer.
MVT LoadTy = TLI.getPointerTy();
@@ -1636,12 +1703,13 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
SDValue StackPtr = DAG.CreateStackTemporary(FloatVT, LoadTy);
// Then store the float to it.
SDValue Ch =
- DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, NULL, 0,
+ DAG.getStore(DAG.getEntryNode(), dl, Tmp2, StackPtr, MachinePointerInfo(),
false, false, 0);
if (TLI.isBigEndian()) {
assert(FloatVT.isByteSized() && "Unsupported floating point type!");
// Load out a legal integer with the same sign bit as the float.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, NULL, 0, false, false, 0);
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, StackPtr, MachinePointerInfo(),
+ false, false, 0);
} else { // Little endian
SDValue LoadPtr = StackPtr;
// The float may be wider than the integer we are going to load. Advance
@@ -1651,7 +1719,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) {
LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(),
LoadPtr, DAG.getIntPtrConstant(ByteOffset));
// Load a legal integer containing the sign bit.
- SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, NULL, 0, false, false, 0);
+ SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(),
+ false, false, 0);
// Move the sign bit to the top bit of the loaded integer.
unsigned BitShift = LoadTy.getSizeInBits() -
(FloatVT.getSizeInBits() - 8 * ByteOffset);
@@ -1694,7 +1763,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node,
SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT);
Chain = SP.getValue(1);
unsigned Align = cast<ConstantSDNode>(Tmp3)->getZExtValue();
- unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Align > StackAlign)
SP = DAG.getNode(ISD::AND, dl, VT, SP,
DAG.getConstant(-(uint64_t)Align, VT));
@@ -1768,7 +1837,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
FrameIndexSDNode *StackPtrFI = cast<FrameIndexSDNode>(FIPtr);
int SPFI = StackPtrFI->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
unsigned SrcSize = SrcOp.getValueType().getSizeInBits();
unsigned SlotSize = SlotVT.getSizeInBits();
@@ -1782,21 +1851,21 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp,
if (SrcSize > SlotSize)
Store = DAG.getTruncStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, SlotVT, false, false, SrcAlign);
+ PtrInfo, SlotVT, false, false, SrcAlign);
else {
assert(SrcSize == SlotSize && "Invalid store");
Store = DAG.getStore(DAG.getEntryNode(), dl, SrcOp, FIPtr,
- SV, 0, false, false, SrcAlign);
+ PtrInfo, false, false, SrcAlign);
}
// Result is a load from the stack slot.
if (SlotSize == DestSize)
- return DAG.getLoad(DestVT, dl, Store, FIPtr, SV, 0, false, false,
- DestAlign);
+ return DAG.getLoad(DestVT, dl, Store, FIPtr, PtrInfo,
+ false, false, DestAlign);
assert(SlotSize < DestSize && "Unknown extension!");
- return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT,
- false, false, DestAlign);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr,
+ PtrInfo, SlotVT, false, false, DestAlign);
}
SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
@@ -1810,11 +1879,11 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) {
SDValue Ch = DAG.getTruncStore(DAG.getEntryNode(), dl, Node->getOperand(0),
StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
Node->getValueType(0).getVectorElementType(),
false, false, 0);
return DAG.getLoad(Node->getValueType(0), dl, Ch, StackPtr,
- PseudoSourceValue::getFixedStack(SPFI), 0,
+ MachinePointerInfo::getFixedStack(SPFI),
false, false, 0);
}
@@ -1888,7 +1957,7 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
SDValue CPIdx = DAG.getConstantPool(CP, TLI.getPointerTy());
unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
return DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment);
}
@@ -1924,7 +1993,6 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
// and leave the Hi part unset.
SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
// The input chain to this libcall is the entry node of the function.
// Legalizing the call will automatically add the previous call to the
// dependence.
@@ -1945,12 +2013,20 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
// Splice the libcall in wherever FindInputOutputChains tells us to.
const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+
+ // isTailCall may be true since the callee does not reference caller stack
+ // frame. Check if it's in the right position.
+ bool isTailCall = isInTailCallPosition(DAG, Node, TLI);
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), isTailCall,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
+ if (!CallInfo.second.getNode())
+ // It's a tailcall, return the chain (which is the DAG root).
+ return DAG.getRoot();
+
// Legalize the call sequence, starting with the chain. This will advance
// the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
// was added by LowerCallTo (guaranteeing proper serialization of calls).
@@ -1964,7 +2040,6 @@ std::pair<SDValue, SDValue>
SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
SDNode *Node,
bool isSigned) {
- assert(!IsLegalizingCall && "Cannot overlap legalization of calls!");
SDValue InChain = Node->getOperand(0);
TargetLowering::ArgListTy Args;
@@ -1985,7 +2060,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC,
const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
std::pair<SDValue, SDValue> CallInfo =
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
- 0, TLI.getLibcallCallingConv(LC), false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
Callee, Args, DAG, Node->getDebugLoc());
@@ -2064,16 +2139,17 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
}
// store the lo of the constructed double - based on integer input
SDValue Store1 = DAG.getStore(DAG.getEntryNode(), dl,
- Op0Mapped, Lo, NULL, 0,
+ Op0Mapped, Lo, MachinePointerInfo(),
false, false, 0);
// initial hi portion of constructed double
SDValue InitialHi = DAG.getConstant(0x43300000u, MVT::i32);
// store the hi of the constructed double - biased exponent
- SDValue Store2=DAG.getStore(Store1, dl, InitialHi, Hi, NULL, 0,
- false, false, 0);
+ SDValue Store2 = DAG.getStore(Store1, dl, InitialHi, Hi,
+ MachinePointerInfo(),
+ false, false, 0);
// load the constructed double
- SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot, NULL, 0,
- false, false, 0);
+ SDValue Load = DAG.getLoad(MVT::f64, dl, Store2, StackSlot,
+ MachinePointerInfo(), false, false, 0);
// FP constant to bias correct the final result
SDValue Bias = DAG.getConstantFP(isSigned ?
BitsToDouble(0x4330000080000000ULL) :
@@ -2116,17 +2192,40 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
DAG.getConstant(32, MVT::i64));
SDValue LoOr = DAG.getNode(ISD::OR, dl, MVT::i64, Lo, TwoP52);
SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84);
- SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr);
- SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr);
+ SDValue LoFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, LoOr);
+ SDValue HiFlt = DAG.getNode(ISD::BITCAST, dl, MVT::f64, HiOr);
SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt,
TwoP84PlusTwoP52);
return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub);
}
- // Implementation of unsigned i64 to f32. This implementation has the
- // advantage of performing rounding correctly.
+ // Implementation of unsigned i64 to f32.
// TODO: Generalize this for use with other types.
if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) {
+ // For unsigned conversions, convert them to signed conversions using the
+ // algorithm from the x86_64 __floatundidf in compiler_rt.
+ if (!isSigned) {
+ SDValue Fast = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Op0);
+
+ SDValue ShiftConst = DAG.getConstant(1, TLI.getShiftAmountTy());
+ SDValue Shr = DAG.getNode(ISD::SRL, dl, MVT::i64, Op0, ShiftConst);
+ SDValue AndConst = DAG.getConstant(1, MVT::i64);
+ SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, AndConst);
+ SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, Shr);
+
+ SDValue SignCvt = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Or);
+ SDValue Slow = DAG.getNode(ISD::FADD, dl, MVT::f32, SignCvt, SignCvt);
+
+ // TODO: This really should be implemented using a branch rather than a
+ // select. We happen to get lucky and machinesink does the right
+ // thing most of the time. This would be a good candidate for a
+ //pseudo-op, or, even better, for whole-function isel.
+ SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
+ Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT);
+ return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast);
+ }
+
+ // Otherwise, implement the fully general conversion.
EVT SHVT = TLI.getShiftAmountTy();
SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0,
@@ -2140,7 +2239,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0);
SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64),
Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64),
- ISD::SETUGE);
+ ISD::SETUGE);
SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0);
SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2,
@@ -2155,7 +2254,6 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue Fadd = DAG.getNode(ISD::FADD, dl, MVT::f64, Fmul, Fcvt2);
return DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, Fadd,
DAG.getIntPtrConstant(0));
-
}
SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0);
@@ -2189,13 +2287,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned,
SDValue FudgeInReg;
if (DestVT == MVT::f32)
FudgeInReg = DAG.getLoad(MVT::f32, dl, DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
false, false, Alignment);
else {
FudgeInReg =
- LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl,
+ LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT,
DAG.getEntryNode(), CPIdx,
- PseudoSourceValue::getConstantPool(), 0,
+ MachinePointerInfo::getConstantPool(),
MVT::f32, false, false, Alignment));
}
@@ -2332,6 +2430,18 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) {
}
}
+/// SplatByte - Distribute ByteVal over NumBits bits.
+// FIXME: Move this helper to a common place.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
/// ExpandBitCount - Expand the specified bitcount instruction into operations.
///
SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
@@ -2339,26 +2449,45 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op,
switch (Opc) {
default: assert(0 && "Cannot expand this yet!");
case ISD::CTPOP: {
- static const uint64_t mask[6] = {
- 0x5555555555555555ULL, 0x3333333333333333ULL,
- 0x0F0F0F0F0F0F0F0FULL, 0x00FF00FF00FF00FFULL,
- 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL
- };
EVT VT = Op.getValueType();
EVT ShVT = TLI.getShiftAmountTy();
- unsigned len = VT.getSizeInBits();
- for (unsigned i = 0; (1U << i) <= (len / 2); ++i) {
- //x = (x & mask[i][len/8]) + (x >> (1 << i) & mask[i][len/8])
- unsigned EltSize = VT.isVector() ?
- VT.getVectorElementType().getSizeInBits() : len;
- SDValue Tmp2 = DAG.getConstant(APInt(EltSize, mask[i]), VT);
- SDValue Tmp3 = DAG.getConstant(1ULL << i, ShVT);
- Op = DAG.getNode(ISD::ADD, dl, VT,
- DAG.getNode(ISD::AND, dl, VT, Op, Tmp2),
- DAG.getNode(ISD::AND, dl, VT,
- DAG.getNode(ISD::SRL, dl, VT, Op, Tmp3),
- Tmp2));
- }
+ unsigned Len = VT.getSizeInBits();
+
+ assert(VT.isInteger() && Len <= 128 && Len % 8 == 0 &&
+ "CTPOP not implemented for this type.");
+
+ // This is the "best" algorithm from
+ // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
+
+ SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT);
+ SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT);
+ SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT);
+ SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT);
+
+ // v = v - ((v >> 1) & 0x55555555...)
+ Op = DAG.getNode(ISD::SUB, dl, VT, Op,
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(1, ShVT)),
+ Mask55));
+ // v = (v & 0x33333333...) + ((v >> 2) & 0x33333333...)
+ Op = DAG.getNode(ISD::ADD, dl, VT,
+ DAG.getNode(ISD::AND, dl, VT, Op, Mask33),
+ DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(2, ShVT)),
+ Mask33));
+ // v = (v + (v >> 4)) & 0x0F0F0F0F...
+ Op = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::ADD, dl, VT, Op,
+ DAG.getNode(ISD::SRL, dl, VT, Op,
+ DAG.getConstant(4, ShVT))),
+ Mask0F);
+ // v = (v * 0x01010101...) >> (Len - 8)
+ Op = DAG.getNode(ISD::SRL, dl, VT,
+ DAG.getNode(ISD::MUL, dl, VT, Op, Mask01),
+ DAG.getConstant(Len - 8, ShVT));
+
return Op;
}
case ISD::CTLZ: {
@@ -2516,9 +2645,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::PREFETCH:
case ISD::VAEND:
case ISD::EH_SJLJ_LONGJMP:
+ case ISD::EH_SJLJ_DISPATCHSETUP:
+ // If the target didn't expand these, there's nothing to do, so just
+ // preserve the chain and be done.
Results.push_back(Node->getOperand(0));
break;
case ISD::EH_SJLJ_SETJMP:
+ // If the target didn't expand this, just return 'zero' and preserve the
+ // chain.
Results.push_back(DAG.getConstant(0, MVT::i32));
Results.push_back(Node->getOperand(0));
break;
@@ -2527,7 +2661,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__sync_synchronize",
TLI.getPointerTy()),
@@ -2538,7 +2673,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
// By default, atomic intrinsics are marked Legal and lowered. Targets
// which don't support them directly, however, may want libcalls, in which
// case they mark them Expand, and we get here.
- // FIXME: Unimplemented for now. Add libcalls.
case ISD::ATOMIC_SWAP:
case ISD::ATOMIC_LOAD_ADD:
case ISD::ATOMIC_LOAD_SUB:
@@ -2578,7 +2712,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TargetLowering::ArgListTy Args;
std::pair<SDValue, SDValue> CallResult =
TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()),
- false, false, false, false, 0, CallingConv::C, false,
+ false, false, false, false, 0, CallingConv::C,
+ /*isTailCall=*/false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
Args, DAG, dl);
@@ -2586,7 +2721,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
break;
}
case ISD::FP_ROUND:
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
Tmp1 = EmitStackConvert(Node->getOperand(0), Node->getValueType(0),
Node->getValueType(0), dl);
Results.push_back(Tmp1);
@@ -2637,8 +2772,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
SDValue True, False;
EVT VT = Node->getOperand(0).getValueType();
EVT NVT = Node->getValueType(0);
- const uint64_t zero[] = {0, 0};
- APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
APInt x = APInt::getSignBit(NVT.getSizeInBits());
(void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven);
Tmp1 = DAG.getConstantFP(apf, VT);
@@ -2662,8 +2796,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Tmp2 = Node->getOperand(1);
unsigned Align = Node->getConstantOperandVal(3);
- SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0,
- false, false, 0);
+ SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
SDValue VAList = VAListLoad;
if (Align > TLI.getMinStackArgumentAlignment()) {
@@ -2674,7 +2808,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
TLI.getPointerTy()));
VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList,
- DAG.getConstant(-Align,
+ DAG.getConstant(-(int64_t)Align,
TLI.getPointerTy()));
}
@@ -2684,10 +2818,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())),
TLI.getPointerTy()));
// Store the incremented VAList to the legalized pointer
- Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0,
- false, false, 0);
+ Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2,
+ MachinePointerInfo(V), false, false, 0);
// Load the actual argument out of the pointer VAList
- Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0,
+ Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, MachinePointerInfo(),
false, false, 0));
Results.push_back(Results[0].getValue(1));
break;
@@ -2698,16 +2832,17 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
const Value *VD = cast<SrcValueSDNode>(Node->getOperand(3))->getValue();
const Value *VS = cast<SrcValueSDNode>(Node->getOperand(4))->getValue();
Tmp1 = DAG.getLoad(TLI.getPointerTy(), dl, Node->getOperand(0),
- Node->getOperand(2), VS, 0, false, false, 0);
- Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1), VD, 0,
- false, false, 0);
+ Node->getOperand(2), MachinePointerInfo(VS),
+ false, false, 0);
+ Tmp1 = DAG.getStore(Tmp1.getValue(1), dl, Tmp1, Node->getOperand(1),
+ MachinePointerInfo(VD), false, false, 0);
Results.push_back(Tmp1);
break;
}
case ISD::EXTRACT_VECTOR_ELT:
if (Node->getOperand(0).getValueType().getVectorNumElements() == 1)
// This must be an access of the only element. Return it.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0),
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, Node->getValueType(0),
Node->getOperand(0));
else
Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0));
@@ -2716,6 +2851,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
case ISD::EXTRACT_SUBVECTOR:
Results.push_back(ExpandExtractFromVectorThroughStack(SDValue(Node, 0)));
break;
+ case ISD::INSERT_SUBVECTOR:
+ Results.push_back(ExpandInsertToVectorThroughStack(SDValue(Node, 0)));
+ break;
case ISD::CONCAT_VECTORS: {
Results.push_back(ExpandVectorBuildThroughStack(Node));
break;
@@ -3094,14 +3232,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
BottomHalf = DAG.getNode(Ops[isSigned][1], dl, DAG.getVTList(VT, VT), LHS,
RHS);
TopHalf = BottomHalf.getValue(1);
- } else {
- // FIXME: We should be able to fall back to a libcall with an illegal
- // type in some cases.
- // Also, we can fall back to a division in some cases, but that's a big
- // performance hit in the general case.
- assert(TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
- VT.getSizeInBits() * 2)) &&
- "Don't know how to expand this operation yet!");
+ } else if (TLI.isTypeLegal(EVT::getIntegerVT(*DAG.getContext(),
+ VT.getSizeInBits() * 2))) {
EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
@@ -3110,6 +3242,30 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
DAG.getIntPtrConstant(0));
TopHalf = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, VT, Tmp1,
DAG.getIntPtrConstant(1));
+ } else {
+ // We can fall back to a libcall with an illegal type for the MUL if we
+ // have a libcall big enough.
+ // Also, we can fall back to a division in some cases, but that's a big
+ // performance hit in the general case.
+ EVT WideVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() * 2);
+ RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL;
+ if (WideVT == MVT::i16)
+ LC = RTLIB::MUL_I16;
+ else if (WideVT == MVT::i32)
+ LC = RTLIB::MUL_I32;
+ else if (WideVT == MVT::i64)
+ LC = RTLIB::MUL_I64;
+ else if (WideVT == MVT::i128)
+ LC = RTLIB::MUL_I128;
+ assert(LC != RTLIB::UNKNOWN_LIBCALL && "Cannot expand this operation!");
+ LHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, LHS);
+ RHS = DAG.getNode(Ops[isSigned][2], dl, WideVT, RHS);
+
+ SDValue Ret = ExpandLibCall(LC, Node, isSigned);
+ BottomHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, Ret);
+ TopHalf = DAG.getNode(ISD::SRL, dl, Ret.getValueType(), Ret,
+ DAG.getConstant(VT.getSizeInBits(), TLI.getPointerTy()));
+ TopHalf = DAG.getNode(ISD::TRUNCATE, dl, VT, TopHalf);
}
if (isSigned) {
Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy());
@@ -3165,8 +3321,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table);
EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8);
- SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr,
- PseudoSourceValue::getJumpTable(), 0, MemVT,
+ SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr,
+ MachinePointerInfo::getJumpTable(), MemVT,
false, false, 0);
Addr = LD;
if (TM.getRelocationModel() == Reloc::PIC_) {
@@ -3329,8 +3485,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
case ISD::XOR: {
unsigned ExtOp, TruncOp;
if (OVT.isVector()) {
- ExtOp = ISD::BIT_CONVERT;
- TruncOp = ISD::BIT_CONVERT;
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
} else {
assert(OVT.isInteger() && "Cannot promote logic operation");
ExtOp = ISD::ANY_EXTEND;
@@ -3347,8 +3503,8 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
case ISD::SELECT: {
unsigned ExtOp, TruncOp;
if (Node->getValueType(0).isVector()) {
- ExtOp = ISD::BIT_CONVERT;
- TruncOp = ISD::BIT_CONVERT;
+ ExtOp = ISD::BITCAST;
+ TruncOp = ISD::BITCAST;
} else if (Node->getValueType(0).isInteger()) {
ExtOp = ISD::ANY_EXTEND;
TruncOp = ISD::TRUNCATE;
@@ -3375,12 +3531,12 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
cast<ShuffleVectorSDNode>(Node)->getMask(Mask);
// Cast the two input vectors.
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(0));
- Tmp2 = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Node->getOperand(1));
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(0));
+ Tmp2 = DAG.getNode(ISD::BITCAST, dl, NVT, Node->getOperand(1));
// Convert the shuffle mask to the right # elements.
Tmp1 = ShuffleWithNarrowerEltType(NVT, OVT, dl, Tmp1, Tmp2, Mask);
- Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, OVT, Tmp1);
+ Tmp1 = DAG.getNode(ISD::BITCAST, dl, OVT, Tmp1);
Results.push_back(Tmp1);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
index 650ee5a0721c..27752123aac4 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp
@@ -55,7 +55,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to soften the result of this operator!");
- case ISD::BIT_CONVERT: R = SoftenFloatRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break;
case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break;
case ISD::ConstantFP:
R = SoftenFloatRes_ConstantFP(cast<ConstantFPSDNode>(N));
@@ -102,7 +102,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) {
SetSoftenedFloat(SDValue(N, ResNo), R);
}
-SDValue DAGTypeLegalizer::SoftenFloatRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) {
return BitConvertToInteger(N->getOperand(0));
}
@@ -133,8 +133,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) {
unsigned Size = NVT.getSizeInBits();
// Mask = ~(1 << (Size-1))
- SDValue Mask = DAG.getConstant(APInt::getAllOnesValue(Size).clear(Size-1),
- NVT);
+ APInt API = APInt::getAllOnesValue(Size);
+ API.clearBit(Size-1);
+ SDValue Mask = DAG.getConstant(API, NVT);
SDValue Op = GetSoftenedFloat(N->getOperand(0));
return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask);
}
@@ -455,7 +456,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
if (L->getExtensionType() == ISD::NON_EXTLOAD) {
NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(),
NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(),
- L->getSrcValue(), L->getSrcValueOffset(), NVT,
+ L->getPointerInfo(), NVT,
L->isVolatile(), L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
@@ -466,8 +467,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) {
// Do a non-extending load followed by FP_EXTEND.
NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD,
L->getMemoryVT(), dl, L->getChain(),
- L->getBasePtr(), L->getOffset(),
- L->getSrcValue(), L->getSrcValueOffset(),
+ L->getBasePtr(), L->getOffset(), L->getPointerInfo(),
L->getMemoryVT(), L->isVolatile(),
L->isNonTemporal(), L->getAlignment());
// Legalized the chain result - switch anything that used the old chain to
@@ -558,7 +558,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to soften this operator's operand!");
- case ISD::BIT_CONVERT: Res = SoftenFloatOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = SoftenFloatOp_BITCAST(N); break;
case ISD::BR_CC: Res = SoftenFloatOp_BR_CC(N); break;
case ISD::FP_ROUND: Res = SoftenFloatOp_FP_ROUND(N); break;
case ISD::FP_TO_SINT: Res = SoftenFloatOp_FP_TO_SINT(N); break;
@@ -670,8 +670,8 @@ void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
}
}
-SDValue DAGTypeLegalizer::SoftenFloatOp_BIT_CONVERT(SDNode *N) {
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) {
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
GetSoftenedFloat(N->getOperand(0)));
}
@@ -780,7 +780,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) {
Val = GetSoftenedFloat(Val);
return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(),
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->isVolatile(), ST->isNonTemporal(),
ST->getAlignment());
}
@@ -816,7 +816,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT: SplitRes_SELECT(N, Lo, Hi); break;
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -1110,9 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo,
assert(NVT.isByteSized() && "Expanded type not byte sized!");
assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?");
- Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr,
- LD->getSrcValue(), LD->getSrcValueOffset(),
- LD->getMemoryVT(), LD->isVolatile(),
+ Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr,
+ LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
// Remember the chain.
@@ -1222,7 +1221,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to expand this operator's operand!");
- case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -1421,7 +1420,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) {
GetExpandedOp(ST->getValue(), Lo, Hi);
return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr,
- ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->getPointerInfo(),
ST->getMemoryVT(), ST->isVolatile(),
ST->isNonTemporal(), ST->getAlignment());
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index f8c589071921..f0752df80f12 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -49,7 +49,7 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
llvm_unreachable("Do not know how to promote this operator!");
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
- case ISD::BIT_CONVERT: Res = PromoteIntRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
@@ -143,7 +143,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) {
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(),
N->getChain(), N->getBasePtr(),
- Op2, N->getSrcValue(), N->getAlignment());
+ Op2, N->getMemOperand());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
@@ -155,14 +155,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) {
SDValue Op3 = GetPromotedInteger(N->getOperand(3));
SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(),
N->getMemoryVT(), N->getChain(), N->getBasePtr(),
- Op2, Op3, N->getSrcValue(), N->getAlignment());
+ Op2, Op3, N->getMemOperand());
// Legalized the chain result - switch anything that used the old chain to
// use the new one.
ReplaceValueWith(SDValue(N, 1), Res.getValue(1));
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT);
@@ -179,8 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
case PromoteInteger:
if (NOutVT.bitsEq(NInVT))
// The input promotes to the same size. Convert the promoted value.
- return DAG.getNode(ISD::BIT_CONVERT, dl,
- NOutVT, GetPromotedInteger(InOp));
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, GetPromotedInteger(InOp));
break;
case SoftenFloat:
// Promote the integer operand by hand.
@@ -193,7 +192,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
BitConvertToInteger(GetScalarizedVector(InOp)));
case SplitVector: {
- // For example, i32 = BIT_CONVERT v2i16 on alpha. Convert the split
+ // For example, i32 = BITCAST v2i16 on alpha. Convert the split
// pieces of the input into integers and reassemble in the final type.
SDValue Lo, Hi;
GetSplitVector(N->getOperand(0), Lo, Hi);
@@ -207,12 +206,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BIT_CONVERT(SDNode *N) {
EVT::getIntegerVT(*DAG.getContext(),
NOutVT.getSizeInBits()),
JoinIntegers(Lo, Hi));
- return DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, NOutVT, InOp);
}
case WidenVector:
if (OutVT.bitsEq(NInVT))
// The input is widened to the same size. Convert to the widened value.
- return DAG.getNode(ISD::BIT_CONVERT, dl, OutVT, GetWidenedVector(InOp));
+ return DAG.getNode(ISD::BITCAST, dl, OutVT, GetWidenedVector(InOp));
}
return DAG.getNode(ISD::ANY_EXTEND, dl, NOutVT,
@@ -293,7 +292,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
APInt TopBit(NVT.getSizeInBits(), 0);
- TopBit.set(OVT.getSizeInBits());
+ TopBit.setBit(OVT.getSizeInBits());
Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, NVT));
return DAG.getNode(ISD::CTTZ, dl, NVT, Op);
}
@@ -371,8 +370,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) {
ISD::LoadExtType ExtType =
ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType();
DebugLoc dl = N->getDebugLoc();
- SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(),
- N->getSrcValue(), N->getSrcValueOffset(),
+ SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(),
+ N->getPointerInfo(),
N->getMemoryVT(), N->isVolatile(),
N->isNonTemporal(), N->getAlignment());
@@ -549,6 +548,48 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) {
return Res;
}
+SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
+ // Promote the overflow bit trivially.
+ if (ResNo == 1)
+ return PromoteIntRes_Overflow(N);
+
+ SDValue LHS = N->getOperand(0), RHS = N->getOperand(1);
+ DebugLoc DL = N->getDebugLoc();
+ EVT SmallVT = LHS.getValueType();
+
+ // To determine if the result overflowed in a larger type, we extend the input
+ // to the larger type, do the multiply, then check the high bits of the result
+ // to see if the overflow happened.
+ if (N->getOpcode() == ISD::SMULO) {
+ LHS = SExtPromotedInteger(LHS);
+ RHS = SExtPromotedInteger(RHS);
+ } else {
+ LHS = ZExtPromotedInteger(LHS);
+ RHS = ZExtPromotedInteger(RHS);
+ }
+ SDValue Mul = DAG.getNode(ISD::MUL, DL, LHS.getValueType(), LHS, RHS);
+
+ // Overflow occurred iff the high part of the result does not zero/sign-extend
+ // the low part.
+ SDValue Overflow;
+ if (N->getOpcode() == ISD::UMULO) {
+ // Unsigned overflow occurred iff the high part is non-zero.
+ SDValue Hi = DAG.getNode(ISD::SRL, DL, Mul.getValueType(), Mul,
+ DAG.getIntPtrConstant(SmallVT.getSizeInBits()));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), Hi,
+ DAG.getConstant(0, Hi.getValueType()), ISD::SETNE);
+ } else {
+ // Signed overflow occurred iff the high part does not sign extend the low.
+ SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, Mul.getValueType(),
+ Mul, DAG.getValueType(SmallVT));
+ Overflow = DAG.getSetCC(DL, N->getValueType(1), SExt, Mul, ISD::SETNE);
+ }
+
+ // Use the calculated overflow everywhere.
+ ReplaceValueWith(SDValue(N, 1), Overflow);
+ return Mul;
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) {
// Zero extend the input.
SDValue LHS = ZExtPromotedInteger(N->getOperand(0));
@@ -602,11 +643,6 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
return Res;
}
-SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) {
- assert(ResNo == 1 && "Only boolean result promotion currently supported!");
- return PromoteIntRes_Overflow(N);
-}
-
//===----------------------------------------------------------------------===//
// Integer Operand Promotion
//===----------------------------------------------------------------------===//
@@ -631,7 +667,7 @@ bool DAGTypeLegalizer::PromoteIntegerOperand(SDNode *N, unsigned OpNo) {
llvm_unreachable("Do not know how to promote this operator's operand!");
case ISD::ANY_EXTEND: Res = PromoteIntOp_ANY_EXTEND(N); break;
- case ISD::BIT_CONVERT: Res = PromoteIntOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = PromoteIntOp_BITCAST(N); break;
case ISD::BR_CC: Res = PromoteIntOp_BR_CC(N, OpNo); break;
case ISD::BRCOND: Res = PromoteIntOp_BRCOND(N, OpNo); break;
case ISD::BUILD_PAIR: Res = PromoteIntOp_BUILD_PAIR(N); break;
@@ -713,7 +749,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) {
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op);
}
-SDValue DAGTypeLegalizer::PromoteIntOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::PromoteIntOp_BITCAST(SDNode *N) {
// This should only occur in unusual situations like bitcasting to an
// x86_fp80, so just turn it into a store+load
return CreateStackStoreLoad(N->getOperand(0), N->getValueType(0));
@@ -889,7 +925,6 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) {
SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!");
SDValue Ch = N->getChain(), Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -898,8 +933,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){
SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value.
// Truncate the value and store the result.
- return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getSrcValue(),
- SVOffset, N->getMemoryVT(),
+ return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(),
+ N->getMemoryVT(),
isVolatile, isNonTemporal, Alignment);
}
@@ -951,7 +986,7 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: ExpandRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: ExpandRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_PAIR: ExpandRes_BUILD_PAIR(N, Lo, Hi); break;
case ISD::EXTRACT_ELEMENT: ExpandRes_EXTRACT_ELEMENT(N, Lo, Hi); break;
case ISD::EXTRACT_VECTOR_ELT: ExpandRes_EXTRACT_VECTOR_ELT(N, Lo, Hi); break;
@@ -978,6 +1013,23 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::UREM: ExpandIntRes_UREM(N, Lo, Hi); break;
case ISD::ZERO_EXTEND: ExpandIntRes_ZERO_EXTEND(N, Lo, Hi); break;
+ case ISD::ATOMIC_LOAD_ADD:
+ case ISD::ATOMIC_LOAD_SUB:
+ case ISD::ATOMIC_LOAD_AND:
+ case ISD::ATOMIC_LOAD_OR:
+ case ISD::ATOMIC_LOAD_XOR:
+ case ISD::ATOMIC_LOAD_NAND:
+ case ISD::ATOMIC_LOAD_MIN:
+ case ISD::ATOMIC_LOAD_MAX:
+ case ISD::ATOMIC_LOAD_UMIN:
+ case ISD::ATOMIC_LOAD_UMAX:
+ case ISD::ATOMIC_SWAP: {
+ std::pair<SDValue, SDValue> Tmp = ExpandAtomic(N);
+ SplitInteger(Tmp.first, Lo, Hi);
+ ReplaceValueWith(SDValue(N, 1), Tmp.second);
+ break;
+ }
+
case ISD::AND:
case ISD::OR:
case ISD::XOR: ExpandIntRes_Logical(N, Lo, Hi); break;
@@ -999,6 +1051,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break;
case ISD::UADDO:
case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break;
+ case ISD::UMULO:
+ case ISD::SMULO: ExpandIntRes_UMULSMULO(N, Lo, Hi); break;
}
// If Lo/Hi is null, the sub-method took care of registering results etc.
@@ -1006,11 +1060,98 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
SetExpandedInteger(SDValue(N, ResNo), Lo, Hi);
}
+/// Lower an atomic node to the appropriate builtin call.
+std::pair <SDValue, SDValue> DAGTypeLegalizer::ExpandAtomic(SDNode *Node) {
+ unsigned Opc = Node->getOpcode();
+ MVT VT = cast<AtomicSDNode>(Node)->getMemoryVT().getSimpleVT();
+ RTLIB::Libcall LC;
+
+ switch (Opc) {
+ default:
+ llvm_unreachable("Unhandled atomic intrinsic Expand!");
+ break;
+ case ISD::ATOMIC_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break;
+ }
+ break;
+ case ISD::ATOMIC_CMP_SWAP:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_ADD:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_SUB:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_AND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_OR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_XOR:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break;
+ }
+ break;
+ case ISD::ATOMIC_LOAD_NAND:
+ switch (VT.SimpleTy) {
+ default: llvm_unreachable("Unexpected value type for atomic!");
+ case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break;
+ case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break;
+ case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break;
+ case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break;
+ }
+ break;
+ }
+
+ return ExpandChainLibCall(LC, Node, false);
+}
+
/// ExpandShiftByConstant - N is a shift by a value that needs to be expanded,
/// and the shift amount is a constant 'Amt'. Expand the operation.
void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi) {
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
// Expand the incoming operand to be shifted, so that we have its parts
SDValue InL, InH;
GetExpandedInteger(N->getOperand(0), InL, InH);
@@ -1025,8 +1166,8 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
Lo = Hi = DAG.getConstant(0, NVT);
} else if (Amt > NVTBits) {
Lo = DAG.getConstant(0, NVT);
- Hi = DAG.getNode(ISD::SHL, dl,
- NVT, InL, DAG.getConstant(Amt-NVTBits,ShTy));
+ Hi = DAG.getNode(ISD::SHL, DL,
+ NVT, InL, DAG.getConstant(Amt-NVTBits, ShTy));
} else if (Amt == NVTBits) {
Lo = DAG.getConstant(0, NVT);
Hi = InL;
@@ -1034,17 +1175,17 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
TLI.isOperationLegalOrCustom(ISD::ADDC,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT))) {
// Emit this X << 1 as X+X.
- SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
SDValue LoOps[2] = { InL, InL };
- Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
+ Lo = DAG.getNode(ISD::ADDC, DL, VTList, LoOps, 2);
SDValue HiOps[3] = { InH, InH, Lo.getValue(1) };
- Hi = DAG.getNode(ISD::ADDE, dl, VTList, HiOps, 3);
+ Hi = DAG.getNode(ISD::ADDE, DL, VTList, HiOps, 3);
} else {
- Lo = DAG.getNode(ISD::SHL, dl, NVT, InL, DAG.getConstant(Amt, ShTy));
- Hi = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ Lo = DAG.getNode(ISD::SHL, DL, NVT, InL, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(NVTBits-Amt, ShTy)));
}
return;
@@ -1055,43 +1196,43 @@ void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt,
Lo = DAG.getConstant(0, NVT);
Hi = DAG.getConstant(0, NVT);
} else if (Amt > NVTBits) {
- Lo = DAG.getNode(ISD::SRL, dl,
+ Lo = DAG.getNode(ISD::SRL, DL,
NVT, InH, DAG.getConstant(Amt-NVTBits,ShTy));
Hi = DAG.getConstant(0, NVT);
} else if (Amt == NVTBits) {
Lo = InH;
Hi = DAG.getConstant(0, NVT);
} else {
- Lo = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(NVTBits-Amt, ShTy)));
- Hi = DAG.getNode(ISD::SRL, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::SRL, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
}
return;
}
assert(N->getOpcode() == ISD::SRA && "Unknown shift!");
if (Amt > VTBits) {
- Hi = Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else if (Amt > NVTBits) {
- Lo = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Lo = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(Amt-NVTBits, ShTy));
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else if (Amt == NVTBits) {
Lo = InH;
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH,
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH,
DAG.getConstant(NVTBits-1, ShTy));
} else {
- Lo = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SRL, dl, NVT, InL,
+ Lo = DAG.getNode(ISD::OR, DL, NVT,
+ DAG.getNode(ISD::SRL, DL, NVT, InL,
DAG.getConstant(Amt, ShTy)),
- DAG.getNode(ISD::SHL, dl, NVT, InH,
+ DAG.getNode(ISD::SHL, DL, NVT, InH,
DAG.getConstant(NVTBits-Amt, ShTy)));
- Hi = DAG.getNode(ISD::SRA, dl, NVT, InH, DAG.getConstant(Amt, ShTy));
+ Hi = DAG.getNode(ISD::SRA, DL, NVT, InH, DAG.getConstant(Amt, ShTy));
}
}
@@ -1269,7 +1410,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
// Do not generate ADDC/ADDE or SUBC/SUBE if the target does not support
// them. TODO: Teach operation legalization how to expand unsupported
// ADDC/ADDE/SUBC/SUBE. The problem is that these operations generate
- // a carry of type MVT::Flag, but there doesn't seem to be any way to
+ // a carry of type MVT::Glue, but there doesn't seem to be any way to
// generate a value of this type in the expanded code sequence.
bool hasCarry =
TLI.isOperationLegalOrCustom(N->getOpcode() == ISD::ADD ?
@@ -1277,7 +1418,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
TLI.getTypeToExpandTo(*DAG.getContext(), NVT));
if (hasCarry) {
- SDVTList VTList = DAG.getVTList(NVT, MVT::Flag);
+ SDVTList VTList = DAG.getVTList(NVT, MVT::Glue);
if (N->getOpcode() == ISD::ADD) {
Lo = DAG.getNode(ISD::ADDC, dl, VTList, LoOps, 2);
HiOps[2] = Lo.getValue(1);
@@ -1287,31 +1428,32 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N,
HiOps[2] = Lo.getValue(1);
Hi = DAG.getNode(ISD::SUBE, dl, VTList, HiOps, 3);
}
+ return;
+ }
+
+ if (N->getOpcode() == ISD::ADD) {
+ Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
+ SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
+ ISD::SETULT);
+ SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
+ ISD::SETULT);
+ SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
+ DAG.getConstant(1, NVT), Carry1);
+ Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
} else {
- if (N->getOpcode() == ISD::ADD) {
- Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2);
- SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0],
- ISD::SETULT);
- SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1,
- DAG.getConstant(1, NVT),
- DAG.getConstant(0, NVT));
- SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1],
- ISD::SETULT);
- SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2,
- DAG.getConstant(1, NVT), Carry1);
- Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2);
- } else {
- Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
- Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
- SDValue Cmp =
- DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
- LoOps[0], LoOps[1], ISD::SETULT);
- SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
- DAG.getConstant(1, NVT),
- DAG.getConstant(0, NVT));
- Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
- }
+ Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2);
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2);
+ SDValue Cmp =
+ DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()),
+ LoOps[0], LoOps[1], ISD::SETULT);
+ SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp,
+ DAG.getConstant(1, NVT),
+ DAG.getConstant(0, NVT));
+ Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow);
}
}
@@ -1322,7 +1464,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N,
DebugLoc dl = N->getDebugLoc();
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
- SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[2] = { LHSL, RHSL };
SDValue HiOps[3] = { LHSH, RHSH };
@@ -1348,7 +1490,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N,
DebugLoc dl = N->getDebugLoc();
GetExpandedInteger(N->getOperand(0), LHSL, LHSH);
GetExpandedInteger(N->getOperand(1), RHSL, RHSH);
- SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Flag);
+ SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue);
SDValue LoOps[3] = { LHSL, RHSL, N->getOperand(2) };
SDValue HiOps[3] = { LHSH, RHSH };
@@ -1437,7 +1579,7 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned NBitWidth = NVT.getSizeInBits();
const APInt &Cst = cast<ConstantSDNode>(N)->getAPIntValue();
- Lo = DAG.getConstant(APInt(Cst).trunc(NBitWidth), NVT);
+ Lo = DAG.getConstant(Cst.trunc(NBitWidth), NVT);
Hi = DAG.getConstant(Cst.lshr(NBitWidth).trunc(NBitWidth), NVT);
}
@@ -1524,7 +1666,6 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
ISD::LoadExtType ExtType = N->getExtensionType();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -1535,7 +1676,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
if (N->getMemoryVT().bitsLE(NVT)) {
EVT MemVT = N->getMemoryVT();
- Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
MemVT, isVolatile, isNonTemporal, Alignment);
// Remember the chain.
@@ -1557,7 +1698,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
}
} else if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
- Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
@@ -1568,8 +1709,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize, NEVT,
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize), NEVT,
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1586,7 +1727,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
unsigned ExcessBits = (EBytes - IncrementSize)*8;
// Load both the high bits and maybe some of the low bits.
- Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(),
EVT::getIntegerVT(*DAG.getContext(),
MemVT.getSizeInBits() - ExcessBits),
isVolatile, isNonTemporal, Alignment);
@@ -1595,8 +1736,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N,
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
// Load the rest of the low bits.
- Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize,
+ Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -1987,6 +2128,31 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N,
ReplaceValueWith(SDValue(N, 1), Ofl);
}
+void DAGTypeLegalizer::ExpandIntRes_UMULSMULO(SDNode *N,
+ SDValue &Lo, SDValue &Hi) {
+ SDValue LHS = N->getOperand(0);
+ SDValue RHS = N->getOperand(1);
+ DebugLoc dl = N->getDebugLoc();
+ EVT VT = N->getValueType(0);
+ EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits() / 2);
+ // Expand the result by simply replacing it with the equivalent
+ // non-overflow-checking operation.
+ SDValue Ret = DAG.getNode(ISD::MUL, dl, LHS.getValueType(), LHS, RHS);
+ SplitInteger(Ret, Lo, Hi);
+
+ // Now calculate overflow.
+ SDValue Ofl;
+ if (N->getOpcode() == ISD::UMULO)
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi,
+ DAG.getConstant(0, VT), ISD::SETNE);
+ else {
+ SDValue Tmp = DAG.getConstant(VT.getSizeInBits() - 1, HalfVT);
+ Tmp = DAG.getNode(ISD::SRA, dl, HalfVT, Lo, Tmp);
+ Ofl = DAG.getSetCC(dl, N->getValueType(1), Hi, Tmp, ISD::SETNE);
+ }
+ ReplaceValueWith(SDValue(N, 1), Ofl);
+}
+
void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N,
SDValue &Lo, SDValue &Hi) {
EVT VT = N->getValueType(0);
@@ -2078,7 +2244,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to expand this operator's operand!");
- case ISD::BIT_CONVERT: Res = ExpandOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = ExpandOp_BITCAST(N); break;
case ISD::BR_CC: Res = ExpandIntOp_BR_CC(N); break;
case ISD::BUILD_VECTOR: Res = ExpandOp_BUILD_VECTOR(N); break;
case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break;
@@ -2308,7 +2474,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
unsigned Alignment = N->getAlignment();
bool isVolatile = N->isVolatile();
bool isNonTemporal = N->isNonTemporal();
@@ -2319,14 +2484,16 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
if (N->getMemoryVT().bitsLE(NVT)) {
GetExpandedInteger(N->getValue(), Lo, Hi);
- return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
N->getMemoryVT(), isVolatile, isNonTemporal,
Alignment);
- } else if (TLI.isLittleEndian()) {
+ }
+
+ if (TLI.isLittleEndian()) {
// Little-endian - low bits are at low addresses.
GetExpandedInteger(N->getValue(), Lo, Hi);
- Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
unsigned ExcessBits =
@@ -2337,50 +2504,49 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = NVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize, NEVT,
- isVolatile, isNonTemporal,
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ NEVT, isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- } else {
- // Big-endian - high bits are at low addresses. Favor aligned stores at
- // the cost of some bit-fiddling.
- GetExpandedInteger(N->getValue(), Lo, Hi);
-
- EVT ExtVT = N->getMemoryVT();
- unsigned EBytes = ExtVT.getStoreSize();
- unsigned IncrementSize = NVT.getSizeInBits()/8;
- unsigned ExcessBits = (EBytes - IncrementSize)*8;
- EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
- ExtVT.getSizeInBits() - ExcessBits);
+ }
- if (ExcessBits < NVT.getSizeInBits()) {
- // Transfer high bits from the top of Lo to the bottom of Hi.
- Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
- DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
- TLI.getPointerTy()));
- Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
- DAG.getNode(ISD::SRL, dl, NVT, Lo,
- DAG.getConstant(ExcessBits,
- TLI.getPointerTy())));
- }
+ // Big-endian - high bits are at low addresses. Favor aligned stores at
+ // the cost of some bit-fiddling.
+ GetExpandedInteger(N->getValue(), Lo, Hi);
+
+ EVT ExtVT = N->getMemoryVT();
+ unsigned EBytes = ExtVT.getStoreSize();
+ unsigned IncrementSize = NVT.getSizeInBits()/8;
+ unsigned ExcessBits = (EBytes - IncrementSize)*8;
+ EVT HiVT = EVT::getIntegerVT(*DAG.getContext(),
+ ExtVT.getSizeInBits() - ExcessBits);
+
+ if (ExcessBits < NVT.getSizeInBits()) {
+ // Transfer high bits from the top of Lo to the bottom of Hi.
+ Hi = DAG.getNode(ISD::SHL, dl, NVT, Hi,
+ DAG.getConstant(NVT.getSizeInBits() - ExcessBits,
+ TLI.getPointerTy()));
+ Hi = DAG.getNode(ISD::OR, dl, NVT, Hi,
+ DAG.getNode(ISD::SRL, dl, NVT, Lo,
+ DAG.getConstant(ExcessBits,
+ TLI.getPointerTy())));
+ }
- // Store both the high bits and maybe some of the low bits.
- Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getSrcValue(),
- SVOffset, HiVT, isVolatile, isNonTemporal,
- Alignment);
+ // Store both the high bits and maybe some of the low bits.
+ Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(),
+ HiVT, isVolatile, isNonTemporal, Alignment);
- // Increment the pointer to the other half.
- Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
- DAG.getIntPtrConstant(IncrementSize));
- // Store the lowest ExcessBits bits in the second half.
- Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getSrcValue(),
- SVOffset+IncrementSize,
- EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
- isVolatile, isNonTemporal,
- MinAlign(Alignment, IncrementSize));
- return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
- }
+ // Increment the pointer to the other half.
+ Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
+ DAG.getIntPtrConstant(IncrementSize));
+ // Store the lowest ExcessBits bits in the second half.
+ Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
+ EVT::getIntegerVT(*DAG.getContext(), ExcessBits),
+ isVolatile, isNonTemporal,
+ MinAlign(Alignment, IncrementSize));
+ return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi);
}
SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) {
@@ -2460,8 +2626,10 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) {
// Load the value out, extending it from f32 to the destination float type.
// FIXME: Avoid the extend by constructing the right constant pool?
- SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(),
- FudgePtr, NULL, 0, MVT::f32,
+ SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(),
+ FudgePtr,
+ MachinePointerInfo::getConstantPool(),
+ MVT::f32,
false, false, Alignment);
return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge);
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 6e56c98e9b56..cedda7e7075a 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -714,6 +714,11 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) {
if (M->getNodeId() == Processed)
RemapValue(NewVal);
DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL);
+ // OldVal may be a target of the ReplacedValues map which was marked
+ // NewNode to force reanalysis because it was updated. Ensure that
+ // anything that ReplacedValues mapped to OldVal will now be mapped
+ // all the way to NewVal.
+ ReplacedValues[OldVal] = NewVal;
}
// The original node continues to exist in the DAG, marked NewNode.
}
@@ -858,7 +863,7 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) {
/// BitConvertToInteger - Convert to an integer of the same size.
SDValue DAGTypeLegalizer::BitConvertToInteger(SDValue Op) {
unsigned BitWidth = Op.getValueType().getSizeInBits();
- return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
EVT::getIntegerVT(*DAG.getContext(), BitWidth), Op);
}
@@ -869,7 +874,7 @@ SDValue DAGTypeLegalizer::BitConvertVectorToIntegerVector(SDValue Op) {
unsigned EltWidth = Op.getValueType().getVectorElementType().getSizeInBits();
EVT EltNVT = EVT::getIntegerVT(*DAG.getContext(), EltWidth);
unsigned NumElts = Op.getValueType().getVectorNumElements();
- return DAG.getNode(ISD::BIT_CONVERT, Op.getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, Op.getDebugLoc(),
EVT::getVectorVT(*DAG.getContext(), EltNVT, NumElts), Op);
}
@@ -880,10 +885,11 @@ SDValue DAGTypeLegalizer::CreateStackStoreLoad(SDValue Op,
// the source and destination types.
SDValue StackPtr = DAG.CreateStackTemporary(Op.getValueType(), DestVT);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Result is a load from the stack slot.
- return DAG.getLoad(DestVT, dl, Store, StackPtr, NULL, 0, false, false, 0);
+ return DAG.getLoad(DestVT, dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, 0);
}
/// CustomLowerNode - Replace the node's results with custom code provided
@@ -1049,6 +1055,39 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
return CallInfo.first;
}
+// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to
+// ExpandLibCall except that the first operand is the in-chain.
+std::pair<SDValue, SDValue>
+DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node,
+ bool isSigned) {
+ SDValue InChain = Node->getOperand(0);
+
+ TargetLowering::ArgListTy Args;
+ TargetLowering::ArgListEntry Entry;
+ for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) {
+ EVT ArgVT = Node->getOperand(i).getValueType();
+ const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext());
+ Entry.Node = Node->getOperand(i);
+ Entry.Ty = ArgTy;
+ Entry.isSExt = isSigned;
+ Entry.isZExt = !isSigned;
+ Args.push_back(Entry);
+ }
+ SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC),
+ TLI.getPointerTy());
+
+ // Splice the libcall in wherever FindInputOutputChains tells us to.
+ const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext());
+ std::pair<SDValue, SDValue> CallInfo =
+ TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
+ 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false,
+ /*isReturnValueUsed=*/true,
+ Callee, Args, DAG, Node->getDebugLoc());
+
+ return CallInfo;
+}
+
/// PromoteTargetBoolean - Promote the given target boolean to a target boolean
/// of the given type. A target boolean is an integer value, not necessarily of
/// type i1, the bits of which conform to getBooleanContents.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index d56029208e61..3f81bbbe4061 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -99,7 +99,7 @@ private:
return SoftenFloat;
return ExpandFloat;
}
-
+
if (VT.getVectorNumElements() == 1)
return ScalarizeVector;
return SplitVector;
@@ -192,6 +192,10 @@ private:
SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
const SDValue *Ops, unsigned NumOps, bool isSigned,
DebugLoc dl);
+ std::pair<SDValue, SDValue> ExpandChainLibCall(RTLIB::Libcall LC,
+ SDNode *Node, bool isSigned);
+ std::pair<SDValue, SDValue> ExpandAtomic(SDNode *Node);
+
SDValue PromoteTargetBoolean(SDValue Bool, EVT VT);
void ReplaceValueWith(SDValue From, SDValue To);
void SplitInteger(SDValue Op, SDValue &Lo, SDValue &Hi);
@@ -244,7 +248,7 @@ private:
SDValue PromoteIntRes_AssertZext(SDNode *N);
SDValue PromoteIntRes_Atomic1(AtomicSDNode *N);
SDValue PromoteIntRes_Atomic2(AtomicSDNode *N);
- SDValue PromoteIntRes_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntRes_BITCAST(SDNode *N);
SDValue PromoteIntRes_BSWAP(SDNode *N);
SDValue PromoteIntRes_BUILD_PAIR(SDNode *N);
SDValue PromoteIntRes_Constant(SDNode *N);
@@ -278,7 +282,7 @@ private:
// Integer Operand Promotion.
bool PromoteIntegerOperand(SDNode *N, unsigned OperandNo);
SDValue PromoteIntOp_ANY_EXTEND(SDNode *N);
- SDValue PromoteIntOp_BIT_CONVERT(SDNode *N);
+ SDValue PromoteIntOp_BITCAST(SDNode *N);
SDValue PromoteIntOp_BUILD_PAIR(SDNode *N);
SDValue PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo);
SDValue PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo);
@@ -344,6 +348,7 @@ private:
void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandIntRes_UMULSMULO (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandShiftByConstant(SDNode *N, unsigned Amt,
SDValue &Lo, SDValue &Hi);
@@ -352,7 +357,7 @@ private:
// Integer Operand Expansion.
bool ExpandIntegerOperand(SDNode *N, unsigned OperandNo);
- SDValue ExpandIntOp_BIT_CONVERT(SDNode *N);
+ SDValue ExpandIntOp_BITCAST(SDNode *N);
SDValue ExpandIntOp_BR_CC(SDNode *N);
SDValue ExpandIntOp_BUILD_VECTOR(SDNode *N);
SDValue ExpandIntOp_EXTRACT_ELEMENT(SDNode *N);
@@ -387,7 +392,7 @@ private:
// Result Float to Integer Conversion.
void SoftenFloatResult(SDNode *N, unsigned OpNo);
- SDValue SoftenFloatRes_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatRes_BITCAST(SDNode *N);
SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N);
SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N);
SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N);
@@ -426,7 +431,7 @@ private:
// Operand Float to Integer Conversion.
bool SoftenFloatOperand(SDNode *N, unsigned OpNo);
- SDValue SoftenFloatOp_BIT_CONVERT(SDNode *N);
+ SDValue SoftenFloatOp_BITCAST(SDNode *N);
SDValue SoftenFloatOp_BR_CC(SDNode *N);
SDValue SoftenFloatOp_FP_ROUND(SDNode *N);
SDValue SoftenFloatOp_FP_TO_SINT(SDNode *N);
@@ -515,7 +520,7 @@ private:
SDValue ScalarizeVecRes_UnaryOp(SDNode *N);
SDValue ScalarizeVecRes_InregOp(SDNode *N);
- SDValue ScalarizeVecRes_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecRes_BITCAST(SDNode *N);
SDValue ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N);
SDValue ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N);
SDValue ScalarizeVecRes_FPOWI(SDNode *N);
@@ -532,7 +537,7 @@ private:
// Vector Operand Scalarization: <1 x ty> -> ty.
bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo);
- SDValue ScalarizeVecOp_BIT_CONVERT(SDNode *N);
+ SDValue ScalarizeVecOp_BITCAST(SDNode *N);
SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N);
SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo);
@@ -557,7 +562,7 @@ private:
void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi);
- void SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_PAIR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -577,11 +582,12 @@ private:
bool SplitVectorOperand(SDNode *N, unsigned OpNo);
SDValue SplitVecOp_UnaryOp(SDNode *N);
- SDValue SplitVecOp_BIT_CONVERT(SDNode *N);
+ SDValue SplitVecOp_BITCAST(SDNode *N);
SDValue SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N);
SDValue SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue SplitVecOp_CONCAT_VECTORS(SDNode *N);
+ SDValue SplitVecOp_FP_ROUND(SDNode *N);
//===--------------------------------------------------------------------===//
// Vector Widening Support: LegalizeVectorTypes.cpp
@@ -603,7 +609,7 @@ private:
// Widen Vector Result Promotion.
void WidenVectorResult(SDNode *N, unsigned ResNo);
- SDValue WidenVecRes_BIT_CONVERT(SDNode* N);
+ SDValue WidenVecRes_BITCAST(SDNode* N);
SDValue WidenVecRes_BUILD_VECTOR(SDNode* N);
SDValue WidenVecRes_CONCAT_VECTORS(SDNode* N);
SDValue WidenVecRes_CONVERT_RNDSAT(SDNode* N);
@@ -628,7 +634,7 @@ private:
// Widen Vector Operand.
bool WidenVectorOperand(SDNode *N, unsigned ResNo);
- SDValue WidenVecOp_BIT_CONVERT(SDNode *N);
+ SDValue WidenVecOp_BITCAST(SDNode *N);
SDValue WidenVecOp_CONCAT_VECTORS(SDNode *N);
SDValue WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N);
SDValue WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N);
@@ -721,7 +727,7 @@ private:
}
// Generic Result Expansion.
- void ExpandRes_BIT_CONVERT (SDNode *N, SDValue &Lo, SDValue &Hi);
+ void ExpandRes_BITCAST (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_BUILD_PAIR (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_ELEMENT (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
@@ -729,7 +735,7 @@ private:
void ExpandRes_VAARG (SDNode *N, SDValue &Lo, SDValue &Hi);
// Generic Operand Expansion.
- SDValue ExpandOp_BIT_CONVERT (SDNode *N);
+ SDValue ExpandOp_BITCAST (SDNode *N);
SDValue ExpandOp_BUILD_VECTOR (SDNode *N);
SDValue ExpandOp_EXTRACT_ELEMENT (SDNode *N);
SDValue ExpandOp_INSERT_VECTOR_ELT(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
index 9c2b1d9ed73d..a75ae87f3cbe 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp
@@ -32,8 +32,7 @@ using namespace llvm;
// little/big-endian machines, followed by the Hi/Lo part. This means that
// they cannot be used as is on vectors, for which Lo is always stored first.
-void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) {
EVT OutVT = N->getValueType(0);
EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT);
SDValue InOp = N->getOperand(0);
@@ -50,31 +49,31 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
case SoftenFloat:
// Convert the integer operand instead.
SplitInteger(GetSoftenedFloat(InOp), Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case ExpandInteger:
case ExpandFloat:
// Convert the expanded pieces of the input.
GetExpandedOp(InOp, Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case SplitVector:
GetSplitVector(InOp, Lo, Hi);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case ScalarizeVector:
// Convert the element instead.
SplitInteger(BitConvertToInteger(GetScalarizedVector(InOp)), Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
case WidenVector: {
- assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BIT_CONVERT");
+ assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST");
InOp = GetWidenedVector(InOp);
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
InVT.getVectorNumElements()/2);
@@ -84,19 +83,19 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, NOutVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi);
return;
}
}
if (InVT.isVector() && OutVT.isInteger()) {
- // Handle cases like i64 = BIT_CONVERT v1i64 on x86, where the operand
+ // Handle cases like i64 = BITCAST v1i64 on x86, where the operand
// is legal but the result is not.
EVT NVT = EVT::getVectorVT(*DAG.getContext(), NOutVT, 2);
if (isTypeLegal(NVT)) {
- SDValue CastInOp = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, InOp);
+ SDValue CastInOp = DAG.getNode(ISD::BITCAST, dl, NVT, InOp);
Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
DAG.getIntPtrConstant(0));
Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NOutVT, CastInOp,
@@ -119,14 +118,14 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
getTypeForEVT(*DAG.getContext()));
SDValue StackPtr = DAG.CreateStackTemporary(InVT, Alignment);
int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
+ MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SPFI);
// Emit a store to the stack slot.
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, SV, 0,
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, InOp, StackPtr, PtrInfo,
false, false, 0);
// Load the first half from the stack slot.
- Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, 0, false, false, 0);
+ Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, 0);
// Increment the pointer to the other half.
unsigned IncrementSize = NOutVT.getSizeInBits() / 8;
@@ -134,7 +133,8 @@ void DAGTypeLegalizer::ExpandRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
// Load the second half from the stack slot.
- Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, SV, IncrementSize, false,
+ Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr,
+ PtrInfo.getWithOffset(IncrementSize), false,
false, MinAlign(Alignment, IncrementSize));
// Handle endianness of the load.
@@ -172,7 +172,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT OldVT = N->getValueType(0);
EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT);
- SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
EVT::getVectorVT(*DAG.getContext(),
NewVT, 2*OldElts),
OldVec);
@@ -204,22 +204,21 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo,
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0));
SDValue Chain = LD->getChain();
SDValue Ptr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Alignment = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
assert(NVT.isByteSized() && "Expanded type not byte sized!");
- Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset,
+ Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
// Increment the pointer to the other half.
unsigned IncrementSize = NVT.getSizeInBits() / 8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getSrcValue(),
- SVOffset+IncrementSize,
+ Hi = DAG.getLoad(NVT, dl, Chain, Ptr,
+ LD->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
@@ -262,14 +261,14 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
// Generic Operand Expansion.
//===--------------------------------------------------------------------===//
-SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
if (N->getValueType(0).isVector()) {
// An illegal expanding type is being converted to a legal vector type.
// Make a two element vector out of the expanded parts and convert that
// instead, but only if the new vector type is legal (otherwise there
// is no point, and it might create expansion loops). For example, on
- // x86 this turns v1i64 = BIT_CONVERT i64 into v1i64 = BIT_CONVERT v2i32.
+ // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32.
EVT OVT = N->getOperand(0).getValueType();
EVT NVT = EVT::getVectorVT(*DAG.getContext(),
TLI.getTypeToTransformTo(*DAG.getContext(), OVT),
@@ -283,7 +282,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BIT_CONVERT(SDNode *N) {
std::swap(Parts[0], Parts[1]);
SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2);
- return DAG.getNode(ISD::BIT_CONVERT, dl, N->getValueType(0), Vec);
+ return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec);
}
}
@@ -322,7 +321,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) {
&NewElts[0], NewElts.size());
// Convert the new vector to the old vector type.
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
}
SDValue DAGTypeLegalizer::ExpandOp_EXTRACT_ELEMENT(SDNode *N) {
@@ -347,7 +346,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
// Bitconvert to a vector of twice the length with elements of the expanded
// type, insert the expanded vector elements, and then convert back.
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewEVT, NumElts*2);
- SDValue NewVec = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue NewVec = DAG.getNode(ISD::BITCAST, dl,
NewVecVT, N->getOperand(0));
SDValue Lo, Hi;
@@ -363,7 +362,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) {
NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx);
// Convert the new vector to the old vector type.
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, VecVT, NewVec);
}
SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) {
@@ -390,7 +389,6 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
St->getValue().getValueType());
SDValue Chain = St->getChain();
SDValue Ptr = St->getBasePtr();
- int SVOffset = St->getSrcValueOffset();
unsigned Alignment = St->getAlignment();
bool isVolatile = St->isVolatile();
bool isNonTemporal = St->isNonTemporal();
@@ -404,14 +402,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) {
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(),
isVolatile, isNonTemporal, Alignment);
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!");
- Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getSrcValue(),
- SVOffset + IncrementSize,
+ Hi = DAG.getStore(Chain, dl, Hi, Ptr,
+ St->getPointerInfo().getWithOffset(IncrementSize),
isVolatile, isNonTemporal,
MinAlign(Alignment, IncrementSize));
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 621c08724210..167dbe0377b3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -241,14 +241,14 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) {
for (unsigned j = 0; j != Op.getNumOperands(); ++j) {
if (Op.getOperand(j).getValueType().isVector())
- Operands[j] = DAG.getNode(ISD::BIT_CONVERT, dl, NVT, Op.getOperand(j));
+ Operands[j] = DAG.getNode(ISD::BITCAST, dl, NVT, Op.getOperand(j));
else
Operands[j] = Op.getOperand(j);
}
Op = DAG.getNode(Op.getOpcode(), dl, NVT, &Operands[0], Operands.size());
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT, Op);
+ return DAG.getNode(ISD::BITCAST, dl, VT, Op);
}
SDValue VectorLegalizer::ExpandFNEG(SDValue Op) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 93bc2d04928e..182f8fcbfbf3 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -46,7 +46,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to scalarize the result of this operator!");
- case ISD::BIT_CONVERT: R = ScalarizeVecRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: R = ScalarizeVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: R = N->getOperand(0); break;
case ISD::CONVERT_RNDSAT: R = ScalarizeVecRes_CONVERT_RNDSAT(N); break;
case ISD::EXTRACT_SUBVECTOR: R = ScalarizeVecRes_EXTRACT_SUBVECTOR(N); break;
@@ -122,9 +122,9 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) {
LHS.getValueType(), LHS, RHS);
}
-SDValue DAGTypeLegalizer::ScalarizeVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) {
EVT NewVT = N->getValueType(0).getVectorElementType();
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
NewVT, N->getOperand(0));
}
@@ -171,7 +171,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) {
N->getDebugLoc(),
N->getChain(), N->getBasePtr(),
DAG.getUNDEF(N->getBasePtr().getValueType()),
- N->getSrcValue(), N->getSrcValueOffset(),
+ N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->getOriginalAlignment());
@@ -296,8 +296,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
dbgs() << "\n";
#endif
llvm_unreachable("Do not know how to scalarize this operator's operand!");
- case ISD::BIT_CONVERT:
- Res = ScalarizeVecOp_BIT_CONVERT(N);
+ case ISD::BITCAST:
+ Res = ScalarizeVecOp_BITCAST(N);
break;
case ISD::CONCAT_VECTORS:
Res = ScalarizeVecOp_CONCAT_VECTORS(N);
@@ -326,11 +326,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) {
return false;
}
-/// ScalarizeVecOp_BIT_CONVERT - If the value to convert is a vector that needs
+/// ScalarizeVecOp_BITCAST - If the value to convert is a vector that needs
/// to be scalarized, it must be <1 x ty>. Convert the element instead.
-SDValue DAGTypeLegalizer::ScalarizeVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) {
SDValue Elt = GetScalarizedVector(N->getOperand(0));
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(),
N->getValueType(0), Elt);
}
@@ -365,14 +365,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){
if (N->isTruncatingStore())
return DAG.getTruncStore(N->getChain(), dl,
GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(),
- N->getSrcValue(), N->getSrcValueOffset(),
+ N->getBasePtr(), N->getPointerInfo(),
N->getMemoryVT().getVectorElementType(),
N->isVolatile(), N->isNonTemporal(),
N->getAlignment());
return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)),
- N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(),
+ N->getBasePtr(), N->getPointerInfo(),
N->isVolatile(), N->isNonTemporal(),
N->getOriginalAlignment());
}
@@ -407,7 +406,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SELECT_CC: SplitRes_SELECT_CC(N, Lo, Hi); break;
case ISD::UNDEF: SplitRes_UNDEF(N, Lo, Hi); break;
- case ISD::BIT_CONVERT: SplitVecRes_BIT_CONVERT(N, Lo, Hi); break;
+ case ISD::BITCAST: SplitVecRes_BITCAST(N, Lo, Hi); break;
case ISD::BUILD_VECTOR: SplitVecRes_BUILD_VECTOR(N, Lo, Hi); break;
case ISD::CONCAT_VECTORS: SplitVecRes_CONCAT_VECTORS(N, Lo, Hi); break;
case ISD::CONVERT_RNDSAT: SplitVecRes_CONVERT_RNDSAT(N, Lo, Hi); break;
@@ -497,8 +496,8 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi);
}
-void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
- SDValue &Hi) {
+void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
// We know the result is a vector. The input may be either a vector or a
// scalar value.
EVT LoVT, HiVT;
@@ -526,8 +525,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
GetExpandedOp(InOp, Lo, Hi);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
break;
@@ -535,8 +534,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
// If the input is a vector that needs to be split, convert each split
// piece of the input now.
GetSplitVector(InOp, Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
return;
}
@@ -550,8 +549,8 @@ void DAGTypeLegalizer::SplitVecRes_BIT_CONVERT(SDNode *N, SDValue &Lo,
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- Lo = DAG.getNode(ISD::BIT_CONVERT, dl, LoVT, Lo);
- Hi = DAG.getNode(ISD::BIT_CONVERT, dl, HiVT, Hi);
+ Lo = DAG.getNode(ISD::BITCAST, dl, LoVT, Lo);
+ Hi = DAG.getNode(ISD::BITCAST, dl, HiVT, Hi);
}
void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo,
@@ -626,9 +625,9 @@ void DAGTypeLegalizer::SplitVecRes_CONVERT_RNDSAT(SDNode *N, SDValue &Lo,
EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(),
LoVT.getVectorNumElements());
VLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(0));
+ DAG.getIntPtrConstant(0));
VHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp,
- DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
+ DAG.getIntPtrConstant(InNVT.getVectorNumElements()));
break;
}
}
@@ -646,16 +645,15 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo,
SDValue &Hi) {
SDValue Vec = N->getOperand(0);
SDValue Idx = N->getOperand(1);
- EVT IdxVT = Idx.getValueType();
DebugLoc dl = N->getDebugLoc();
EVT LoVT, HiVT;
GetSplitDestVTs(N->getValueType(0), LoVT, HiVT);
Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx);
- Idx = DAG.getNode(ISD::ADD, dl, IdxVT, Idx,
- DAG.getConstant(LoVT.getVectorNumElements(), IdxVT));
- Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, Idx);
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec,
+ DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements()));
}
void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo,
@@ -705,8 +703,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
EVT VecVT = Vec.getValueType();
EVT EltVT = VecVT.getVectorElementType();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, NULL, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Store the new element. This may be larger than the vector element type,
// so use a truncating store.
@@ -714,11 +712,11 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
const Type *VecType = VecVT.getTypeForEVT(*DAG.getContext());
unsigned Alignment =
TLI.getTargetData()->getPrefTypeAlignment(VecType);
- Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, NULL, 0, EltVT,
+ Store = DAG.getTruncStore(Store, dl, Elt, EltPtr, MachinePointerInfo(), EltVT,
false, false, 0);
// Load the Lo part from the stack slot.
- Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, NULL, 0,
+ Lo = DAG.getLoad(Lo.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
false, false, 0);
// Increment the pointer to the other part.
@@ -727,8 +725,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo,
DAG.getIntPtrConstant(IncrementSize));
// Load the Hi part from the stack slot.
- Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, NULL, 0, false,
- false, MinAlign(Alignment, IncrementSize));
+ Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(),
+ false, false, MinAlign(Alignment, IncrementSize));
}
void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
@@ -751,8 +749,6 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue Ch = LD->getChain();
SDValue Ptr = LD->getBasePtr();
SDValue Offset = DAG.getUNDEF(Ptr.getValueType());
- const Value *SV = LD->getSrcValue();
- int SVOffset = LD->getSrcValueOffset();
EVT MemoryVT = LD->getMemoryVT();
unsigned Alignment = LD->getOriginalAlignment();
bool isVolatile = LD->isVolatile();
@@ -762,14 +758,15 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT);
Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
- SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment);
+ LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal,
+ Alignment);
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- SVOffset += IncrementSize;
Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset,
- SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment);
+ LD->getPointerInfo().getWithOffset(IncrementSize),
+ HiMemVT, isVolatile, isNonTemporal, Alignment);
// Build a factor node to remember that this load is independent of the
// other one.
@@ -980,10 +977,11 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
#endif
llvm_unreachable("Do not know how to split this operator's operand!");
- case ISD::BIT_CONVERT: Res = SplitVecOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = SplitVecOp_BITCAST(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = SplitVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT:Res = SplitVecOp_EXTRACT_VECTOR_ELT(N); break;
case ISD::CONCAT_VECTORS: Res = SplitVecOp_CONCAT_VECTORS(N); break;
+ case ISD::FP_ROUND: Res = SplitVecOp_FP_ROUND(N); break;
case ISD::STORE:
Res = SplitVecOp_STORE(cast<StoreSDNode>(N), OpNo);
break;
@@ -995,6 +993,8 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) {
case ISD::FP_TO_UINT:
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP:
+ case ISD::FP_EXTEND:
+ case ISD::FTRUNC:
case ISD::TRUNCATE:
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
@@ -1036,8 +1036,8 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) {
return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi);
}
-SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
- // For example, i64 = BIT_CONVERT v4i16 on alpha. Typically the vector will
+SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) {
+ // For example, i64 = BITCAST v4i16 on alpha. Typically the vector will
// end up being split all the way down to individual components. Convert the
// split pieces into integers and reassemble.
SDValue Lo, Hi;
@@ -1048,13 +1048,12 @@ SDValue DAGTypeLegalizer::SplitVecOp_BIT_CONVERT(SDNode *N) {
if (TLI.isBigEndian())
std::swap(Lo, Hi);
- return DAG.getNode(ISD::BIT_CONVERT, N->getDebugLoc(), N->getValueType(0),
+ return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0),
JoinIntegers(Lo, Hi));
}
SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) {
- // We know that the extracted result type is legal. For now, assume the index
- // is a constant.
+ // We know that the extracted result type is legal.
EVT SubVT = N->getValueType(0);
SDValue Idx = N->getOperand(1);
DebugLoc dl = N->getDebugLoc();
@@ -1099,15 +1098,13 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) {
EVT EltVT = VecVT.getVectorElementType();
DebugLoc dl = N->getDebugLoc();
SDValue StackPtr = DAG.CreateStackTemporary(VecVT);
- int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex();
- const Value *SV = PseudoSourceValue::getFixedStack(SPFI);
- SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, SV, 0,
- false, false, 0);
+ SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr,
+ MachinePointerInfo(), false, false, 0);
// Load back the required element.
StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx);
- return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr,
- SV, 0, EltVT, false, false, 0);
+ return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr,
+ MachinePointerInfo(), EltVT, false, false, 0);
}
SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
@@ -1118,7 +1115,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
bool isTruncating = N->isTruncatingStore();
SDValue Ch = N->getChain();
SDValue Ptr = N->getBasePtr();
- int SVOffset = N->getSrcValueOffset();
EVT MemoryVT = N->getMemoryVT();
unsigned Alignment = N->getOriginalAlignment();
bool isVol = N->isVolatile();
@@ -1132,22 +1128,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
if (isTruncating)
- Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
LoMemVT, isVol, isNT, Alignment);
else
- Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getSrcValue(), SVOffset,
+ Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(),
isVol, isNT, Alignment);
// Increment the pointer to the other half.
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
DAG.getIntPtrConstant(IncrementSize));
- SVOffset += IncrementSize;
if (isTruncating)
- Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
HiMemVT, isVol, isNT, Alignment);
else
- Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getSrcValue(), SVOffset,
+ Hi = DAG.getStore(Ch, DL, Hi, Ptr,
+ N->getPointerInfo().getWithOffset(IncrementSize),
isVol, isNT, Alignment);
return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
@@ -1155,7 +1152,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
DebugLoc DL = N->getDebugLoc();
-
+
// The input operands all must have the same type, and we know the result the
// result type is valid. Convert this to a buildvector which extracts all the
// input elements.
@@ -1172,11 +1169,29 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) {
}
}
-
+
return DAG.getNode(ISD::BUILD_VECTOR, DL, N->getValueType(0),
&Elts[0], Elts.size());
}
+SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) {
+ // The result has a legal vector type, but the input needs splitting.
+ EVT ResVT = N->getValueType(0);
+ SDValue Lo, Hi;
+ DebugLoc DL = N->getDebugLoc();
+ GetSplitVector(N->getOperand(0), Lo, Hi);
+ EVT InVT = Lo.getValueType();
+
+ EVT OutVT = EVT::getVectorVT(*DAG.getContext(), ResVT.getVectorElementType(),
+ InVT.getVectorNumElements());
+
+ Lo = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Lo, N->getOperand(1));
+ Hi = DAG.getNode(ISD::FP_ROUND, DL, OutVT, Hi, N->getOperand(1));
+
+ return DAG.getNode(ISD::CONCAT_VECTORS, DL, ResVT, Lo, Hi);
+}
+
+
//===----------------------------------------------------------------------===//
// Result Vector Widening
@@ -1201,7 +1216,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to widen the result of this operator!");
- case ISD::BIT_CONVERT: Res = WidenVecRes_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = WidenVecRes_BITCAST(N); break;
case ISD::BUILD_VECTOR: Res = WidenVecRes_BUILD_VECTOR(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecRes_CONCAT_VECTORS(N); break;
case ISD::CONVERT_RNDSAT: Res = WidenVecRes_CONVERT_RNDSAT(N); break;
@@ -1297,7 +1312,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
EVT WidenEltVT = WidenVT.getVectorElementType();
EVT VT = WidenVT;
unsigned NumElts = VT.getVectorNumElements();
- while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) {
+ while (!TLI.isTypeLegal(VT) && NumElts != 1) {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
}
@@ -1308,11 +1323,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
SDValue InOp2 = GetWidenedVector(N->getOperand(1));
return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2);
}
-
+
// No legal vector version so unroll the vector operation and then widen.
if (NumElts == 1)
return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements());
-
+
// Since the operation can trap, apply operation on the original vector.
EVT MaxVT = VT;
SDValue InOp1 = GetWidenedVector(N->getOperand(0));
@@ -1323,7 +1338,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
unsigned ConcatEnd = 0; // Current ConcatOps index.
int Idx = 0; // Current Idx into input vectors.
- // NumElts := greatest synthesizable vector size (at most WidenVT)
+ // NumElts := greatest legal vector size (at most WidenVT)
// while (orig. vector has unhandled elements) {
// take munches of size NumElts from the beginning and add to ConcatOps
// NumElts := next smaller supported vector size or 1
@@ -1341,13 +1356,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
do {
NumElts = NumElts / 2;
VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts);
- } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1);
+ } while (!TLI.isTypeLegal(VT) && NumElts != 1);
if (NumElts == 1) {
for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) {
- SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp1, DAG.getIntPtrConstant(Idx));
- SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
+ SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT,
InOp2, DAG.getIntPtrConstant(Idx));
ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT,
EOp1, EOp2);
@@ -1378,7 +1393,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
do {
NextSize *= 2;
NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize);
- } while (!TLI.isTypeSynthesizable(NextVT));
+ } while (!TLI.isTypeLegal(NextVT));
if (!VT.isVector()) {
// Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT
@@ -1415,7 +1430,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
if (VT == WidenVT)
return ConcatOps[0];
}
-
+
// add undefs of size MaxVT until ConcatOps grows to length of WidenVT
unsigned NumOps = WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements();
if (NumOps != ConcatEnd ) {
@@ -1428,7 +1443,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) {
SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue InOp = N->getOperand(0);
- DebugLoc dl = N->getDebugLoc();
+ DebugLoc DL = N->getDebugLoc();
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
@@ -1444,11 +1459,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
InOp = GetWidenedVector(N->getOperand(0));
InVT = InOp.getValueType();
InVTNumElts = InVT.getVectorNumElements();
- if (InVTNumElts == WidenNumElts)
- return DAG.getNode(Opcode, dl, WidenVT, InOp);
+ if (InVTNumElts == WidenNumElts) {
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InOp);
+ return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1));
+ }
}
- if (TLI.isTypeSynthesizable(InWidenVT)) {
+ if (TLI.isTypeLegal(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1462,16 +1480,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
SDValue UndefVal = DAG.getUNDEF(InVT);
for (unsigned i = 1; i != NumConcat; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(Opcode, dl, WidenVT,
- DAG.getNode(ISD::CONCAT_VECTORS, dl, InWidenVT,
- &Ops[0], NumConcat));
+ SDValue InVec = DAG.getNode(ISD::CONCAT_VECTORS, DL, InWidenVT,
+ &Ops[0], NumConcat);
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVec);
+ return DAG.getNode(Opcode, DL, WidenVT, InVec, N->getOperand(1));
}
if (InVTNumElts % WidenNumElts == 0) {
+ SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT,
+ InOp, DAG.getIntPtrConstant(0));
// Extract the input and convert the shorten input vector.
- return DAG.getNode(Opcode, dl, WidenVT,
- DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT,
- InOp, DAG.getIntPtrConstant(0)));
+ if (N->getNumOperands() == 1)
+ return DAG.getNode(Opcode, DL, WidenVT, InVal);
+ return DAG.getNode(Opcode, DL, WidenVT, InVal, N->getOperand(1));
}
}
@@ -1480,16 +1502,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
EVT EltVT = WidenVT.getVectorElementType();
unsigned MinElts = std::min(InVTNumElts, WidenNumElts);
unsigned i;
- for (i=0; i < MinElts; ++i)
- Ops[i] = DAG.getNode(Opcode, dl, EltVT,
- DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp,
- DAG.getIntPtrConstant(i)));
+ for (i=0; i < MinElts; ++i) {
+ SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp,
+ DAG.getIntPtrConstant(i));
+ if (N->getNumOperands() == 1)
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val);
+ else
+ Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val, N->getOperand(1));
+ }
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
Ops[i] = UndefVal;
- return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts);
+ return DAG.getNode(ISD::BUILD_VECTOR, DL, WidenVT, &Ops[0], WidenNumElts);
}
SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) {
@@ -1536,7 +1562,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) {
WidenVT, WidenLHS, DAG.getValueType(ExtVT));
}
-SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) {
SDValue InOp = N->getOperand(0);
EVT InVT = InOp.getValueType();
EVT VT = N->getValueType(0);
@@ -1555,7 +1581,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
InOp = GetPromotedInteger(InOp);
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
case SoftenFloat:
case ExpandInteger:
@@ -1570,13 +1596,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
InVT = InOp.getValueType();
if (WidenVT.bitsEq(InVT))
// The input widens to the same size. Convert to the widen value.
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, InOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, InOp);
break;
}
unsigned WidenSize = WidenVT.getSizeInBits();
unsigned InSize = InVT.getSizeInBits();
- if (WidenSize % InSize == 0) {
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (WidenSize % InSize == 0 && InVT != MVT::x86mmx) {
// Determine new input vector type. The new input vector type will use
// the same element type (if its a vector) or use the input type as a
// vector. It is the same size as the type to widen to.
@@ -1590,7 +1617,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts);
}
- if (TLI.isTypeSynthesizable(NewInVT)) {
+ if (TLI.isTypeLegal(NewInVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1609,7 +1636,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) {
else
NewVec = DAG.getNode(ISD::BUILD_VECTOR, dl,
NewInVT, &Ops[0], NewNumElts);
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, NewVec);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, NewVec);
}
}
@@ -1730,7 +1757,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) {
SatOp, CvtCode);
}
- if (TLI.isTypeSynthesizable(InWidenVT)) {
+ if (TLI.isTypeLegal(InWidenVT)) {
// Because the result and the input are different vector types, widening
// the result could create a legal type but widening the input might make
// it an illegal type that might lead to repeatedly splitting the input
@@ -1794,39 +1821,25 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) {
EVT InVT = InOp.getValueType();
- ConstantSDNode *CIdx = dyn_cast<ConstantSDNode>(Idx);
- if (CIdx) {
- unsigned IdxVal = CIdx->getZExtValue();
- // Check if we can just return the input vector after widening.
- if (IdxVal == 0 && InVT == WidenVT)
- return InOp;
-
- // Check if we can extract from the vector.
- unsigned InNumElts = InVT.getVectorNumElements();
- if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
- return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
- }
+ // Check if we can just return the input vector after widening.
+ uint64_t IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
+ if (IdxVal == 0 && InVT == WidenVT)
+ return InOp;
+
+ // Check if we can extract from the vector.
+ unsigned InNumElts = InVT.getVectorNumElements();
+ if (IdxVal % WidenNumElts == 0 && IdxVal + WidenNumElts < InNumElts)
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, WidenVT, InOp, Idx);
// We could try widening the input to the right length but for now, extract
// the original elements, fill the rest with undefs and build a vector.
SmallVector<SDValue, 16> Ops(WidenNumElts);
EVT EltVT = VT.getVectorElementType();
- EVT IdxVT = Idx.getValueType();
unsigned NumElts = VT.getVectorNumElements();
unsigned i;
- if (CIdx) {
- unsigned IdxVal = CIdx->getZExtValue();
- for (i=0; i < NumElts; ++i)
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
- DAG.getConstant(IdxVal+i, IdxVT));
- } else {
- Ops[0] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, Idx);
- for (i=1; i < NumElts; ++i) {
- SDValue NewIdx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx,
- DAG.getConstant(i, IdxVT));
- Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, NewIdx);
- }
- }
+ for (i=0; i < NumElts; ++i)
+ Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp,
+ DAG.getIntPtrConstant(IdxVal+i));
SDValue UndefVal = DAG.getUNDEF(EltVT);
for (; i < WidenNumElts; ++i)
@@ -1985,7 +1998,7 @@ bool DAGTypeLegalizer::WidenVectorOperand(SDNode *N, unsigned ResNo) {
#endif
llvm_unreachable("Do not know how to widen this operator's operand!");
- case ISD::BIT_CONVERT: Res = WidenVecOp_BIT_CONVERT(N); break;
+ case ISD::BITCAST: Res = WidenVecOp_BITCAST(N); break;
case ISD::CONCAT_VECTORS: Res = WidenVecOp_CONCAT_VECTORS(N); break;
case ISD::EXTRACT_SUBVECTOR: Res = WidenVecOp_EXTRACT_SUBVECTOR(N); break;
case ISD::EXTRACT_VECTOR_ELT: Res = WidenVecOp_EXTRACT_VECTOR_ELT(N); break;
@@ -2044,7 +2057,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) {
return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts);
}
-SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
+SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) {
EVT VT = N->getValueType(0);
SDValue InOp = GetWidenedVector(N->getOperand(0));
EVT InWidenVT = InOp.getValueType();
@@ -2053,11 +2066,12 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) {
// Check if we can convert between two legal vector types and extract.
unsigned InWidenSize = InWidenVT.getSizeInBits();
unsigned Size = VT.getSizeInBits();
- if (InWidenSize % Size == 0 && !VT.isVector()) {
+ // x86mmx is not an acceptable vector element type, so don't try.
+ if (InWidenSize % Size == 0 && !VT.isVector() && VT != MVT::x86mmx) {
unsigned NewNumElts = InWidenSize / Size;
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts);
- if (TLI.isTypeSynthesizable(NewVT)) {
- SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp);
+ if (TLI.isTypeLegal(NewVT)) {
+ SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp);
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp,
DAG.getIntPtrConstant(0));
}
@@ -2146,7 +2160,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
if (Width == WidenEltWidth)
return RetVT;
- // See if there is larger legal integer than the element type to load/store
+ // See if there is larger legal integer than the element type to load/store
unsigned VT;
for (VT = (unsigned)MVT::LAST_INTEGER_VALUETYPE;
VT >= (unsigned)MVT::FIRST_INTEGER_VALUETYPE; --VT) {
@@ -2154,7 +2168,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
unsigned MemVTWidth = MemVT.getSizeInBits();
if (MemVT.getSizeInBits() <= WidenEltWidth)
break;
- if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
+ if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
RetVT = MemVT;
@@ -2168,7 +2182,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI,
VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) {
EVT MemVT = (MVT::SimpleValueType) VT;
unsigned MemVTWidth = MemVT.getSizeInBits();
- if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
+ if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() &&
(WidenWidth % MemVTWidth) == 0 &&
(MemVTWidth <= Width ||
(Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
@@ -2201,7 +2215,7 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
if (NewLdTy != LdTy) {
NumElts = Width / NewLdTy.getSizeInBits();
NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewLdTy, NumElts);
- VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, VecOp);
+ VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, VecOp);
// Readjust position and vector position based on new load type
Idx = Idx * LdTy.getSizeInBits() / NewLdTy.getSizeInBits();
LdTy = NewLdTy;
@@ -2209,11 +2223,11 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy,
VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i],
DAG.getIntPtrConstant(Idx++));
}
- return DAG.getNode(ISD::BIT_CONVERT, dl, VecTy, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp);
}
-SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
- LoadSDNode * LD) {
+SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain,
+ LoadSDNode *LD) {
// The strategy assumes that we can efficiently load powers of two widths.
// The routines chops the vector into the largest vector loads with the same
// element type or scalar loads and then recombines it to the widen vector
@@ -2228,11 +2242,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const Value *SV = LD->getSrcValue();
int LdWidth = LdVT.getSizeInBits();
int WidthDiff = WidenWidth - LdWidth; // Difference
@@ -2241,7 +2253,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
// Find the vector type that can load from.
EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff);
int NewVTWidth = NewVT.getSizeInBits();
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV, SVOffset,
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(),
isVolatile, isNonTemporal, Align);
LdChain.push_back(LdOp.getValue(1));
@@ -2251,7 +2263,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
unsigned NumElts = WidenWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
SDValue VecOp = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NewVecVT, LdOp);
- return DAG.getNode(ISD::BIT_CONVERT, dl, WidenVT, VecOp);
+ return DAG.getNode(ISD::BITCAST, dl, WidenVT, VecOp);
}
if (NewVT == WidenVT)
return LdOp;
@@ -2286,8 +2298,9 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
NewVTWidth = NewVT.getSizeInBits();
}
- SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, SV,
- SVOffset+Offset, isVolatile,
+ SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr,
+ LD->getPointerInfo().getWithOffset(Offset),
+ isVolatile,
isNonTemporal, MinAlign(Align, Increment));
LdChain.push_back(LdOp.getValue(1));
LdOps.push_back(LdOp);
@@ -2300,7 +2313,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16>& LdChain,
if (!LdOps[0].getValueType().isVector())
// All the loads are scalar loads.
return BuildVectorFromScalar(DAG, WidenVT, LdOps, 0, End);
-
+
// If the load contains vectors, build the vector using concat vector.
// All of the vectors used to loads are power of 2 and the scalars load
// can be combined to make a power of 2 vector.
@@ -2362,11 +2375,9 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
// Load information
SDValue Chain = LD->getChain();
SDValue BasePtr = LD->getBasePtr();
- int SVOffset = LD->getSrcValueOffset();
unsigned Align = LD->getAlignment();
bool isVolatile = LD->isVolatile();
bool isNonTemporal = LD->isNonTemporal();
- const Value *SV = LD->getSrcValue();
EVT EltVT = WidenVT.getVectorElementType();
EVT LdEltVT = LdVT.getVectorElementType();
@@ -2376,16 +2387,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector<SDValue, 16>& LdChain,
unsigned WidenNumElts = WidenVT.getVectorNumElements();
SmallVector<SDValue, 16> Ops(WidenNumElts);
unsigned Increment = LdEltVT.getSizeInBits() / 8;
- Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset,
+ Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr,
+ LD->getPointerInfo(),
LdEltVT, isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[0].getValue(1));
unsigned i = 0, Offset = Increment;
for (i=1; i < NumElts; ++i, Offset += Increment) {
SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
BasePtr, DAG.getIntPtrConstant(Offset));
- Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV,
- SVOffset + Offset, LdEltVT, isVolatile,
- isNonTemporal, Align);
+ Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr,
+ LD->getPointerInfo().getWithOffset(Offset), LdEltVT,
+ isVolatile, isNonTemporal, Align);
LdChain.push_back(Ops[i].getValue(1));
}
@@ -2405,8 +2417,6 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
// element type or scalar stores.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- const Value *SV = ST->getSrcValue();
- int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
@@ -2433,9 +2443,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp,
DAG.getIntPtrConstant(Idx));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset + Offset, isVolatile,
- isNonTemporal,
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
@@ -2447,15 +2457,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector<SDValue, 16>& StChain,
// Cast the vector to the scalar type we can store
unsigned NumElts = ValWidth / NewVTWidth;
EVT NewVecVT = EVT::getVectorVT(*DAG.getContext(), NewVT, NumElts);
- SDValue VecOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVecVT, ValOp);
+ SDValue VecOp = DAG.getNode(ISD::BITCAST, dl, NewVecVT, ValOp);
// Readjust index position based on new vector type
Idx = Idx * ValEltWidth / NewVTWidth;
do {
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp,
DAG.getIntPtrConstant(Idx++));
- StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset + Offset, isVolatile,
- isNonTemporal, MinAlign(Align, Offset)));
+ StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ isVolatile, isNonTemporal,
+ MinAlign(Align, Offset)));
StWidth -= NewVTWidth;
Offset += Increment;
BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
@@ -2474,14 +2485,12 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
// and then store it. Instead, we extract each element and then store it.
SDValue Chain = ST->getChain();
SDValue BasePtr = ST->getBasePtr();
- const Value *SV = ST->getSrcValue();
- int SVOffset = ST->getSrcValueOffset();
unsigned Align = ST->getAlignment();
bool isVolatile = ST->isVolatile();
bool isNonTemporal = ST->isNonTemporal();
SDValue ValOp = GetWidenedVector(ST->getValue());
DebugLoc dl = ST->getDebugLoc();
-
+
EVT StVT = ST->getMemoryVT();
EVT ValVT = ValOp.getValueType();
@@ -2499,8 +2508,8 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
unsigned NumElts = StVT.getVectorNumElements();
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, SV,
- SVOffset, StEltVT,
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr,
+ ST->getPointerInfo(), StEltVT,
isVolatile, isNonTemporal, Align));
unsigned Offset = Increment;
for (unsigned i=1; i < NumElts; ++i, Offset += Increment) {
@@ -2508,9 +2517,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector<SDValue, 16>& StChain,
BasePtr, DAG.getIntPtrConstant(Offset));
SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp,
DAG.getIntPtrConstant(0));
- StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, SV,
- SVOffset + Offset, StEltVT,
- isVolatile, isNonTemporal,
+ StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr,
+ ST->getPointerInfo().getWithOffset(Offset),
+ StEltVT, isVolatile, isNonTemporal,
MinAlign(Align, Offset)));
}
}
diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
index ac2d33884b26..2dcb22957325 100644
--- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
+++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h
@@ -16,7 +16,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/DebugLoc.h"
-#include "llvm/System/DataTypes.h"
+#include "llvm/Support/DataTypes.h"
namespace llvm {
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
index fae27294e364..e3da2084529a 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp
@@ -205,7 +205,7 @@ void ScheduleDAGFast::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
- if (SU->getNode()->getFlaggedNode())
+ if (SU->getNode()->getGluedNode())
return NULL;
SDNode *N = SU->getNode();
@@ -216,7 +216,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
else if (VT == MVT::Other)
TryUnfold = true;
@@ -224,7 +224,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
}
@@ -476,12 +476,12 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU,
}
}
- for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
if (Node->getOpcode() == ISD::INLINEASM) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
- --NumOps; // Ignore the flag operand.
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
index 56f5ded50083..430283d5eff9 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGList.cpp
@@ -40,7 +40,7 @@ STATISTIC(NumStalls, "Number of pipeline stalls");
static RegisterScheduler
tdListDAGScheduler("list-td", "Top-down list scheduler",
createTDListDAGScheduler);
-
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGList - The actual list scheduler implementation. This supports
@@ -51,7 +51,7 @@ private:
/// AvailableQueue - The priority queue to use for the available SUnits.
///
SchedulingPriorityQueue *AvailableQueue;
-
+
/// PendingQueue - This contains all of the instructions whose operands have
/// been issued, but their results are not ready yet (due to the latency of
/// the operation). Once the operands become available, the instruction is
@@ -63,11 +63,12 @@ private:
public:
ScheduleDAGList(MachineFunction &mf,
- SchedulingPriorityQueue *availqueue,
- ScheduleHazardRecognizer *HR)
- : ScheduleDAGSDNodes(mf),
- AvailableQueue(availqueue), HazardRec(HR) {
- }
+ SchedulingPriorityQueue *availqueue)
+ : ScheduleDAGSDNodes(mf), AvailableQueue(availqueue) {
+
+ const TargetMachine &tm = mf.getTarget();
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
~ScheduleDAGList() {
delete HazardRec;
@@ -87,14 +88,14 @@ private:
/// Schedule - Schedule the DAG using list scheduling.
void ScheduleDAGList::Schedule() {
DEBUG(dbgs() << "********** List Scheduling **********\n");
-
+
// Build the scheduling graph.
BuildSchedGraph(NULL);
AvailableQueue->initNodes(SUnits);
-
+
ListScheduleTopDown();
-
+
AvailableQueue->releaseState();
}
@@ -118,7 +119,7 @@ void ScheduleDAGList::ReleaseSucc(SUnit *SU, const SDep &D) {
--SuccSU->NumPredsLeft;
SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency());
-
+
// If all the node's predecessors are scheduled, this node is ready
// to be scheduled. Ignore the special ExitSU node.
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
@@ -142,7 +143,7 @@ void ScheduleDAGList::ReleaseSuccessors(SUnit *SU) {
void ScheduleDAGList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
-
+
Sequence.push_back(SU);
assert(CurCycle >= SU->getDepth() && "Node scheduled above its depth!");
SU->setDepthToAtLeast(CurCycle);
@@ -168,7 +169,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
SUnits[i].isAvailable = true;
}
}
-
+
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
std::vector<SUnit*> NotReady;
@@ -187,7 +188,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
assert(PendingQueue[i]->getDepth() > CurCycle && "Negative latency?");
}
}
-
+
// If there are no instructions available, don't try to issue anything, and
// don't advance the hazard recognizer.
if (AvailableQueue->empty()) {
@@ -196,24 +197,24 @@ void ScheduleDAGList::ListScheduleTopDown() {
}
SUnit *FoundSUnit = 0;
-
+
bool HasNoopHazards = false;
while (!AvailableQueue->empty()) {
SUnit *CurSUnit = AvailableQueue->pop();
-
+
ScheduleHazardRecognizer::HazardType HT =
- HazardRec->getHazardType(CurSUnit);
+ HazardRec->getHazardType(CurSUnit, 0/*no stalls*/);
if (HT == ScheduleHazardRecognizer::NoHazard) {
FoundSUnit = CurSUnit;
break;
}
-
+
// Remember if this is a noop hazard.
HasNoopHazards |= HT == ScheduleHazardRecognizer::NoopHazard;
-
+
NotReady.push_back(CurSUnit);
}
-
+
// Add the nodes that aren't ready back onto the available list.
if (!NotReady.empty()) {
AvailableQueue->push_all(NotReady);
@@ -228,7 +229,7 @@ void ScheduleDAGList::ListScheduleTopDown() {
// If this is a pseudo-op node, we don't want to increment the current
// cycle.
if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops!
- ++CurCycle;
+ ++CurCycle;
} else if (!HasNoopHazards) {
// Otherwise, we have a pipeline stall, but no other problem, just advance
// the current cycle and try again.
@@ -257,12 +258,8 @@ void ScheduleDAGList::ListScheduleTopDown() {
// Public Constructor Functions
//===----------------------------------------------------------------------===//
-/// createTDListDAGScheduler - This creates a top-down list scheduler with a
-/// new hazard recognizer. This scheduler takes ownership of the hazard
-/// recognizer and deletes it when done.
+/// createTDListDAGScheduler - This creates a top-down list scheduler.
ScheduleDAGSDNodes *
llvm::createTDListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
- return new ScheduleDAGList(*IS->MF,
- new LatencyPriorityQueue(),
- IS->CreateTargetHazardRecognizer());
+ return new ScheduleDAGList(*IS->MF, new LatencyPriorityQueue());
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
index 4c3e4e3b0768..0b548b277f4c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp
@@ -20,6 +20,7 @@
#include "llvm/InlineAsm.h"
#include "llvm/CodeGen/SchedulerRegistry.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
@@ -65,6 +66,10 @@ static RegisterScheduler
"which tries to balance ILP and register pressure",
createILPListDAGScheduler);
+static cl::opt<bool> DisableSchedCycles(
+ "disable-sched-cycles", cl::Hidden, cl::init(false),
+ cl::desc("Disable cycle-level precision during preRA scheduling"));
+
namespace {
//===----------------------------------------------------------------------===//
/// ScheduleDAGRRList - The actual register reduction list scheduler
@@ -83,31 +88,56 @@ private:
/// AvailableQueue - The priority queue to use for the available SUnits.
SchedulingPriorityQueue *AvailableQueue;
+ /// PendingQueue - This contains all of the instructions whose operands have
+ /// been issued, but their results are not ready yet (due to the latency of
+ /// the operation). Once the operands becomes available, the instruction is
+ /// added to the AvailableQueue.
+ std::vector<SUnit*> PendingQueue;
+
+ /// HazardRec - The hazard recognizer to use.
+ ScheduleHazardRecognizer *HazardRec;
+
+ /// CurCycle - The current scheduler state corresponds to this cycle.
+ unsigned CurCycle;
+
+ /// MinAvailableCycle - Cycle of the soonest available instruction.
+ unsigned MinAvailableCycle;
+
/// LiveRegDefs - A set of physical registers and their definition
/// that are "live". These nodes must be scheduled before any other nodes that
/// modifies the registers can be scheduled.
unsigned NumLiveRegs;
std::vector<SUnit*> LiveRegDefs;
- std::vector<unsigned> LiveRegCycles;
+ std::vector<SUnit*> LiveRegGens;
/// Topo - A topological ordering for SUnits which permits fast IsReachable
/// and similar queries.
ScheduleDAGTopologicalSort Topo;
public:
- ScheduleDAGRRList(MachineFunction &mf,
- bool isbottomup, bool needlatency,
- SchedulingPriorityQueue *availqueue)
- : ScheduleDAGSDNodes(mf), isBottomUp(isbottomup), NeedLatency(needlatency),
- AvailableQueue(availqueue), Topo(SUnits) {
- }
+ ScheduleDAGRRList(MachineFunction &mf, bool needlatency,
+ SchedulingPriorityQueue *availqueue,
+ CodeGenOpt::Level OptLevel)
+ : ScheduleDAGSDNodes(mf), isBottomUp(availqueue->isBottomUp()),
+ NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0),
+ Topo(SUnits) {
+
+ const TargetMachine &tm = mf.getTarget();
+ if (DisableSchedCycles || !NeedLatency)
+ HazardRec = new ScheduleHazardRecognizer();
+ else
+ HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this);
+ }
~ScheduleDAGRRList() {
+ delete HazardRec;
delete AvailableQueue;
}
void Schedule();
+ ScheduleHazardRecognizer *getHazardRec() { return HazardRec; }
+
/// IsReachable - Checks if SU is reachable from TargetSU.
bool IsReachable(const SUnit *SU, const SUnit *TargetSU) {
return Topo.IsReachable(SU, TargetSU);
@@ -136,24 +166,37 @@ public:
}
private:
+ bool isReady(SUnit *SU) {
+ return DisableSchedCycles || !AvailableQueue->hasReadyFilter() ||
+ AvailableQueue->isReady(SU);
+ }
+
void ReleasePred(SUnit *SU, const SDep *PredEdge);
- void ReleasePredecessors(SUnit *SU, unsigned CurCycle);
+ void ReleasePredecessors(SUnit *SU);
void ReleaseSucc(SUnit *SU, const SDep *SuccEdge);
void ReleaseSuccessors(SUnit *SU);
+ void ReleasePending();
+ void AdvanceToCycle(unsigned NextCycle);
+ void AdvancePastStalls(SUnit *SU);
+ void EmitNode(SUnit *SU);
+ void ScheduleNodeBottomUp(SUnit*);
void CapturePred(SDep *PredEdge);
- void ScheduleNodeBottomUp(SUnit*, unsigned);
- void ScheduleNodeTopDown(SUnit*, unsigned);
void UnscheduleNodeBottomUp(SUnit*);
- void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
+ void RestoreHazardCheckerBottomUp();
+ void BacktrackBottomUp(SUnit*, SUnit*);
SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*,
const TargetRegisterClass*,
SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
- void ListScheduleTopDown();
+
+ SUnit *PickNodeToScheduleBottomUp();
void ListScheduleBottomUp();
+ void ScheduleNodeTopDown(SUnit*);
+ void ListScheduleTopDown();
+
/// CreateNewSUnit - Creates a new SUnit and returns a pointer to it.
/// Updates the topological ordering if required.
@@ -190,11 +233,13 @@ private:
void ScheduleDAGRRList::Schedule() {
DEBUG(dbgs()
<< "********** List Scheduling BB#" << BB->getNumber()
- << " **********\n");
+ << " '" << BB->getName() << "' **********\n");
+ CurCycle = 0;
+ MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX;
NumLiveRegs = 0;
- LiveRegDefs.resize(TRI->getNumRegs(), NULL);
- LiveRegCycles.resize(TRI->getNumRegs(), 0);
+ LiveRegDefs.resize(TRI->getNumRegs(), NULL);
+ LiveRegGens.resize(TRI->getNumRegs(), NULL);
// Build the scheduling graph.
BuildSchedGraph(NULL);
@@ -204,13 +249,15 @@ void ScheduleDAGRRList::Schedule() {
Topo.InitDAGTopologicalSorting();
AvailableQueue->initNodes(SUnits);
-
+
+ HazardRec->Reset();
+
// Execute the actual scheduling loop Top-Down or Bottom-Up as appropriate.
if (isBottomUp)
ListScheduleBottomUp();
else
ListScheduleTopDown();
-
+
AvailableQueue->releaseState();
}
@@ -243,33 +290,197 @@ void ScheduleDAGRRList::ReleasePred(SUnit *SU, const SDep *PredEdge) {
// to be scheduled. Ignore the special EntrySU node.
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU) {
PredSU->isAvailable = true;
- AvailableQueue->push(PredSU);
+
+ unsigned Height = PredSU->getHeight();
+ if (Height < MinAvailableCycle)
+ MinAvailableCycle = Height;
+
+ if (isReady(SU)) {
+ AvailableQueue->push(PredSU);
+ }
+ // CapturePred and others may have left the node in the pending queue, avoid
+ // adding it twice.
+ else if (!PredSU->isPending) {
+ PredSU->isPending = true;
+ PendingQueue.push_back(PredSU);
+ }
}
}
-void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU, unsigned CurCycle) {
+/// Call ReleasePred for each predecessor, then update register live def/gen.
+/// Always update LiveRegDefs for a register dependence even if the current SU
+/// also defines the register. This effectively create one large live range
+/// across a sequence of two-address node. This is important because the
+/// entire chain must be scheduled together. Example:
+///
+/// flags = (3) add
+/// flags = (2) addc flags
+/// flags = (1) addc flags
+///
+/// results in
+///
+/// LiveRegDefs[flags] = 3
+/// LiveRegGens[flags] = 1
+///
+/// If (2) addc is unscheduled, then (1) addc must also be unscheduled to avoid
+/// interference on flags.
+void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) {
// Bottom up: release predecessors
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
ReleasePred(SU, &*I);
if (I->isAssignedRegDep()) {
// This is a physical register dependency and it's impossible or
- // expensive to copy the register. Make sure nothing that can
+ // expensive to copy the register. Make sure nothing that can
// clobber the register is scheduled between the predecessor and
// this node.
- if (!LiveRegDefs[I->getReg()]) {
+ SUnit *RegDef = LiveRegDefs[I->getReg()]; (void)RegDef;
+ assert((!RegDef || RegDef == SU || RegDef == I->getSUnit()) &&
+ "interference on register dependence");
+ LiveRegDefs[I->getReg()] = I->getSUnit();
+ if (!LiveRegGens[I->getReg()]) {
++NumLiveRegs;
- LiveRegDefs[I->getReg()] = I->getSUnit();
- LiveRegCycles[I->getReg()] = CurCycle;
+ LiveRegGens[I->getReg()] = SU;
}
}
}
}
+/// Check to see if any of the pending instructions are ready to issue. If
+/// so, add them to the available queue.
+void ScheduleDAGRRList::ReleasePending() {
+ if (DisableSchedCycles) {
+ assert(PendingQueue.empty() && "pending instrs not allowed in this mode");
+ return;
+ }
+
+ // If the available queue is empty, it is safe to reset MinAvailableCycle.
+ if (AvailableQueue->empty())
+ MinAvailableCycle = UINT_MAX;
+
+ // Check to see if any of the pending instructions are ready to issue. If
+ // so, add them to the available queue.
+ for (unsigned i = 0, e = PendingQueue.size(); i != e; ++i) {
+ unsigned ReadyCycle =
+ isBottomUp ? PendingQueue[i]->getHeight() : PendingQueue[i]->getDepth();
+ if (ReadyCycle < MinAvailableCycle)
+ MinAvailableCycle = ReadyCycle;
+
+ if (PendingQueue[i]->isAvailable) {
+ if (!isReady(PendingQueue[i]))
+ continue;
+ AvailableQueue->push(PendingQueue[i]);
+ }
+ PendingQueue[i]->isPending = false;
+ PendingQueue[i] = PendingQueue.back();
+ PendingQueue.pop_back();
+ --i; --e;
+ }
+}
+
+/// Move the scheduler state forward by the specified number of Cycles.
+void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) {
+ if (NextCycle <= CurCycle)
+ return;
+
+ AvailableQueue->setCurCycle(NextCycle);
+ if (!HazardRec->isEnabled()) {
+ // Bypass lots of virtual calls in case of long latency.
+ CurCycle = NextCycle;
+ }
+ else {
+ for (; CurCycle != NextCycle; ++CurCycle) {
+ if (isBottomUp)
+ HazardRec->RecedeCycle();
+ else
+ HazardRec->AdvanceCycle();
+ }
+ }
+ // FIXME: Instead of visiting the pending Q each time, set a dirty flag on the
+ // available Q to release pending nodes at least once before popping.
+ ReleasePending();
+}
+
+/// Move the scheduler state forward until the specified node's dependents are
+/// ready and can be scheduled with no resource conflicts.
+void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
+ if (DisableSchedCycles)
+ return;
+
+ unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
+
+ // Bump CurCycle to account for latency. We assume the latency of other
+ // available instructions may be hidden by the stall (not a full pipe stall).
+ // This updates the hazard recognizer's cycle before reserving resources for
+ // this instruction.
+ AdvanceToCycle(ReadyCycle);
+
+ // Calls are scheduled in their preceding cycle, so don't conflict with
+ // hazards from instructions after the call. EmitNode will reset the
+ // scoreboard state before emitting the call.
+ if (isBottomUp && SU->isCall)
+ return;
+
+ // FIXME: For resource conflicts in very long non-pipelined stages, we
+ // should probably skip ahead here to avoid useless scoreboard checks.
+ int Stalls = 0;
+ while (true) {
+ ScheduleHazardRecognizer::HazardType HT =
+ HazardRec->getHazardType(SU, isBottomUp ? -Stalls : Stalls);
+
+ if (HT == ScheduleHazardRecognizer::NoHazard)
+ break;
+
+ ++Stalls;
+ }
+ AdvanceToCycle(CurCycle + Stalls);
+}
+
+/// Record this SUnit in the HazardRecognizer.
+/// Does not update CurCycle.
+void ScheduleDAGRRList::EmitNode(SUnit *SU) {
+ if (!HazardRec->isEnabled())
+ return;
+
+ // Check for phys reg copy.
+ if (!SU->getNode())
+ return;
+
+ switch (SU->getNode()->getOpcode()) {
+ default:
+ assert(SU->getNode()->isMachineOpcode() &&
+ "This target-independent node should not be scheduled.");
+ break;
+ case ISD::MERGE_VALUES:
+ case ISD::TokenFactor:
+ case ISD::CopyToReg:
+ case ISD::CopyFromReg:
+ case ISD::EH_LABEL:
+ // Noops don't affect the scoreboard state. Copies are likely to be
+ // removed.
+ return;
+ case ISD::INLINEASM:
+ // For inline asm, clear the pipeline state.
+ HazardRec->Reset();
+ return;
+ }
+ if (isBottomUp && SU->isCall) {
+ // Calls are scheduled with their preceding instructions. For bottom-up
+ // scheduling, clear the pipeline state before emitting.
+ HazardRec->Reset();
+ }
+
+ HazardRec->EmitInstruction(SU);
+
+ if (!isBottomUp && SU->isCall) {
+ HazardRec->Reset();
+ }
+}
+
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
/// count of its predecessors. If a predecessor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -278,36 +489,51 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
#endif
- // FIXME: Handle noop hazard.
+ // FIXME: Do not modify node height. It may interfere with
+ // backtracking. Instead add a "ready cycle" to SUnit. Before scheduling the
+ // node it's ready cycle can aid heuristics, and after scheduling it can
+ // indicate the scheduled cycle.
SU->setHeightToAtLeast(CurCycle);
+
+ // Reserve resources for the scheduled intruction.
+ EmitNode(SU);
+
Sequence.push_back(SU);
AvailableQueue->ScheduledNode(SU);
- ReleasePredecessors(SU, CurCycle);
+ // Update liveness of predecessors before successors to avoid treating a
+ // two-address node as a live range def.
+ ReleasePredecessors(SU);
// Release all the implicit physical register defs that are live.
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (I->isAssignedRegDep()) {
- if (LiveRegCycles[I->getReg()] == I->getSUnit()->getHeight()) {
- assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
- assert(LiveRegDefs[I->getReg()] == SU &&
- "Physical register dependency violated?");
- --NumLiveRegs;
- LiveRegDefs[I->getReg()] = NULL;
- LiveRegCycles[I->getReg()] = 0;
- }
+ // LiveRegDegs[I->getReg()] != SU when SU is a two-address node.
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] == SU) {
+ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
+ --NumLiveRegs;
+ LiveRegDefs[I->getReg()] = NULL;
+ LiveRegGens[I->getReg()] = NULL;
}
}
SU->isScheduled = true;
+
+ // Conditions under which the scheduler should eagerly advance the cycle:
+ // (1) No available instructions
+ // (2) All pipelines full, so available instructions must have hazards.
+ //
+ // If HazardRec is disabled, count each inst as one cycle.
+ if (!HazardRec->isEnabled() || HazardRec->atIssueLimit()
+ || AvailableQueue->empty())
+ AdvanceToCycle(CurCycle + 1);
}
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
/// unscheduled, incrcease the succ left count of its predecessors. Remove
/// them from AvailableQueue if necessary.
-void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
+void ScheduleDAGRRList::CapturePred(SDep *PredEdge) {
SUnit *PredSU = PredEdge->getSUnit();
if (PredSU->isAvailable) {
PredSU->isAvailable = false;
@@ -328,59 +554,98 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
CapturePred(&*I);
- if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){
+ if (I->isAssignedRegDep() && SU == LiveRegGens[I->getReg()]){
assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!");
assert(LiveRegDefs[I->getReg()] == I->getSUnit() &&
"Physical register dependency violated?");
--NumLiveRegs;
LiveRegDefs[I->getReg()] = NULL;
- LiveRegCycles[I->getReg()] = 0;
+ LiveRegGens[I->getReg()] = NULL;
}
}
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
if (I->isAssignedRegDep()) {
+ // This becomes the nearest def. Note that an earlier def may still be
+ // pending if this is a two-address node.
+ LiveRegDefs[I->getReg()] = SU;
if (!LiveRegDefs[I->getReg()]) {
- LiveRegDefs[I->getReg()] = SU;
++NumLiveRegs;
}
- if (I->getSUnit()->getHeight() < LiveRegCycles[I->getReg()])
- LiveRegCycles[I->getReg()] = I->getSUnit()->getHeight();
+ if (LiveRegGens[I->getReg()] == NULL ||
+ I->getSUnit()->getHeight() < LiveRegGens[I->getReg()]->getHeight())
+ LiveRegGens[I->getReg()] = I->getSUnit();
}
}
+ if (SU->getHeight() < MinAvailableCycle)
+ MinAvailableCycle = SU->getHeight();
SU->setHeightDirty();
SU->isScheduled = false;
SU->isAvailable = true;
- AvailableQueue->push(SU);
+ if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) {
+ // Don't make available until backtracking is complete.
+ SU->isPending = true;
+ PendingQueue.push_back(SU);
+ }
+ else {
+ AvailableQueue->push(SU);
+ }
AvailableQueue->UnscheduledNode(SU);
}
+/// After backtracking, the hazard checker needs to be restored to a state
+/// corresponding the the current cycle.
+void ScheduleDAGRRList::RestoreHazardCheckerBottomUp() {
+ HazardRec->Reset();
+
+ unsigned LookAhead = std::min((unsigned)Sequence.size(),
+ HazardRec->getMaxLookAhead());
+ if (LookAhead == 0)
+ return;
+
+ std::vector<SUnit*>::const_iterator I = (Sequence.end() - LookAhead);
+ unsigned HazardCycle = (*I)->getHeight();
+ for (std::vector<SUnit*>::const_iterator E = Sequence.end(); I != E; ++I) {
+ SUnit *SU = *I;
+ for (; SU->getHeight() > HazardCycle; ++HazardCycle) {
+ HazardRec->RecedeCycle();
+ }
+ EmitNode(SU);
+ }
+}
+
/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
/// BTCycle in order to schedule a specific node.
-void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, unsigned BtCycle,
- unsigned &CurCycle) {
- SUnit *OldSU = NULL;
- while (CurCycle > BtCycle) {
- OldSU = Sequence.back();
+void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) {
+ SUnit *OldSU = Sequence.back();
+ while (true) {
Sequence.pop_back();
if (SU->isSucc(OldSU))
// Don't try to remove SU from AvailableQueue.
SU->isAvailable = false;
+ // FIXME: use ready cycle instead of height
+ CurCycle = OldSU->getHeight();
UnscheduleNodeBottomUp(OldSU);
- --CurCycle;
AvailableQueue->setCurCycle(CurCycle);
+ if (OldSU == BtSU)
+ break;
+ OldSU = Sequence.back();
}
assert(!SU->isSucc(OldSU) && "Something is wrong!");
+ RestoreHazardCheckerBottomUp();
+
+ ReleasePending();
+
++NumBacktracks;
}
static bool isOperandOf(const SUnit *SU, SDNode *N) {
for (const SDNode *SUNode = SU->getNode(); SUNode;
- SUNode = SUNode->getFlaggedNode()) {
+ SUNode = SUNode->getGluedNode()) {
if (SUNode->isOperandOf(N))
return true;
}
@@ -390,18 +655,18 @@ static bool isOperandOf(const SUnit *SU, SDNode *N) {
/// CopyAndMoveSuccessors - Clone the specified node and move its scheduled
/// successors to the newly created node.
SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
- if (SU->getNode()->getFlaggedNode())
- return NULL;
-
SDNode *N = SU->getNode();
if (!N)
return NULL;
+ if (SU->getNode()->getGluedNode())
+ return NULL;
+
SUnit *NewSU;
bool TryUnfold = false;
for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
else if (VT == MVT::Other)
TryUnfold = true;
@@ -409,7 +674,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
const SDValue &Op = N->getOperand(i);
EVT VT = Op.getNode()->getValueType(Op.getResNo());
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return NULL;
}
@@ -441,13 +706,15 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
} else {
LoadSU = CreateNewSUnit(LoadNode);
LoadNode->setNodeId(LoadSU->NodeNum);
+
+ InitNumRegDefsLeft(LoadSU);
ComputeLatency(LoadSU);
}
SUnit *NewSU = CreateNewSUnit(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NewSU->NodeNum);
-
+
const TargetInstrDesc &TID = TII->get(N->getMachineOpcode());
for (unsigned i = 0; i != TID.getNumOperands(); ++i) {
if (TID.getOperandConstraint(i, TOI::TIED_TO) != -1) {
@@ -457,6 +724,8 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
}
if (TID.isCommutable())
NewSU->isCommutable = true;
+
+ InitNumRegDefsLeft(NewSU);
ComputeLatency(NewSU);
// Record all the edges to and from the old SU, by category.
@@ -507,6 +776,10 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
RemovePred(SuccDep, D);
D.setSUnit(NewSU);
AddPred(SuccDep, D);
+ // Balance register pressure.
+ if (AvailableQueue->tracksRegPressure() && SuccDep->isScheduled
+ && !D.isCtrl() && NewSU->NumRegDefsLeft > 0)
+ --NewSU->NumRegDefsLeft;
}
for (unsigned i = 0, e = ChainSuccs.size(); i != e; ++i) {
SDep D = ChainSuccs[i];
@@ -517,7 +790,7 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
D.setSUnit(LoadSU);
AddPred(SuccDep, D);
}
- }
+ }
// Add a data dependency to reflect that NewSU reads the value defined
// by LoadSU.
@@ -633,52 +906,52 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg,
/// CheckForLiveRegDef - Return true and update live register vector if the
/// specified register def of the specified SUnit clobbers any "live" registers.
-static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg,
+static void CheckForLiveRegDef(SUnit *SU, unsigned Reg,
std::vector<SUnit*> &LiveRegDefs,
SmallSet<unsigned, 4> &RegAdded,
SmallVector<unsigned, 4> &LRegs,
const TargetRegisterInfo *TRI) {
- bool Added = false;
- if (LiveRegDefs[Reg] && LiveRegDefs[Reg] != SU) {
- if (RegAdded.insert(Reg)) {
+ for (const unsigned *AliasI = TRI->getOverlaps(Reg); *AliasI; ++AliasI) {
+
+ // Check if Ref is live.
+ if (!LiveRegDefs[Reg]) continue;
+
+ // Allow multiple uses of the same def.
+ if (LiveRegDefs[Reg] == SU) continue;
+
+ // Add Reg to the set of interfering live regs.
+ if (RegAdded.insert(Reg))
LRegs.push_back(Reg);
- Added = true;
- }
}
- for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias)
- if (LiveRegDefs[*Alias] && LiveRegDefs[*Alias] != SU) {
- if (RegAdded.insert(*Alias)) {
- LRegs.push_back(*Alias);
- Added = true;
- }
- }
- return Added;
}
/// DelayForLiveRegsBottomUp - Returns true if it is necessary to delay
/// scheduling of the given node to satisfy live physical register dependencies.
/// If the specific node is the last one that's available to schedule, do
/// whatever is necessary (i.e. backtracking or cloning) to make it possible.
-bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
- SmallVector<unsigned, 4> &LRegs){
+bool ScheduleDAGRRList::
+DelayForLiveRegsBottomUp(SUnit *SU, SmallVector<unsigned, 4> &LRegs) {
if (NumLiveRegs == 0)
return false;
SmallSet<unsigned, 4> RegAdded;
// If this node would clobber any "live" register, then it's not ready.
+ //
+ // If SU is the currently live definition of the same register that it uses,
+ // then we are free to schedule it.
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
- if (I->isAssignedRegDep())
+ if (I->isAssignedRegDep() && LiveRegDefs[I->getReg()] != SU)
CheckForLiveRegDef(I->getSUnit(), I->getReg(), LiveRegDefs,
RegAdded, LRegs, TRI);
}
- for (SDNode *Node = SU->getNode(); Node; Node = Node->getFlaggedNode()) {
+ for (SDNode *Node = SU->getNode(); Node; Node = Node->getGluedNode()) {
if (Node->getOpcode() == ISD::INLINEASM) {
// Inline asm can clobber physical defs.
unsigned NumOps = Node->getNumOperands();
- if (Node->getOperand(NumOps-1).getValueType() == MVT::Flag)
- --NumOps; // Ignore the flag operand.
+ if (Node->getOperand(NumOps-1).getValueType() == MVT::Glue)
+ --NumOps; // Ignore the glue operand.
for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
unsigned Flags =
@@ -708,17 +981,151 @@ bool ScheduleDAGRRList::DelayForLiveRegsBottomUp(SUnit *SU,
for (const unsigned *Reg = TID.ImplicitDefs; *Reg; ++Reg)
CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI);
}
+
return !LRegs.empty();
}
+/// Return a node that can be scheduled in this cycle. Requirements:
+/// (1) Ready: latency has been satisfied
+/// (2) No Hazards: resources are available
+/// (3) No Interferences: may unschedule to break register interferences.
+SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() {
+ SmallVector<SUnit*, 4> Interferences;
+ DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
+
+ SUnit *CurSU = AvailableQueue->pop();
+ while (CurSU) {
+ SmallVector<unsigned, 4> LRegs;
+ if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
+ break;
+ LRegsMap.insert(std::make_pair(CurSU, LRegs));
+
+ CurSU->isPending = true; // This SU is not in AvailableQueue right now.
+ Interferences.push_back(CurSU);
+ CurSU = AvailableQueue->pop();
+ }
+ if (CurSU) {
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ assert(Interferences[i]->isAvailable && "must still be available");
+ AvailableQueue->push(Interferences[i]);
+ }
+ return CurSU;
+ }
+
+ // All candidates are delayed due to live physical reg dependencies.
+ // Try backtracking, code duplication, or inserting cross class copies
+ // to resolve it.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ SUnit *TrySU = Interferences[i];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+
+ // Try unscheduling up to the point where it's safe to schedule
+ // this node.
+ SUnit *BtSU = NULL;
+ unsigned LiveCycle = UINT_MAX;
+ for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
+ unsigned Reg = LRegs[j];
+ if (LiveRegGens[Reg]->getHeight() < LiveCycle) {
+ BtSU = LiveRegGens[Reg];
+ LiveCycle = BtSU->getHeight();
+ }
+ }
+ if (!WillCreateCycle(TrySU, BtSU)) {
+ BacktrackBottomUp(TrySU, BtSU);
+
+ // Force the current node to be scheduled before the node that
+ // requires the physical reg dep.
+ if (BtSU->isAvailable) {
+ BtSU->isAvailable = false;
+ if (!BtSU->isPending)
+ AvailableQueue->remove(BtSU);
+ }
+ AddPred(TrySU, SDep(BtSU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false, /*isArtificial=*/true));
+
+ // If one or more successors has been unscheduled, then the current
+ // node is no longer avaialable. Schedule a successor that's now
+ // available instead.
+ if (!TrySU->isAvailable) {
+ CurSU = AvailableQueue->pop();
+ }
+ else {
+ CurSU = TrySU;
+ TrySU->isPending = false;
+ Interferences.erase(Interferences.begin()+i);
+ }
+ break;
+ }
+ }
+
+ if (!CurSU) {
+ // Can't backtrack. If it's too expensive to copy the value, then try
+ // duplicate the nodes that produces these "too expensive to copy"
+ // values to break the dependency. In case even that doesn't work,
+ // insert cross class copies.
+ // If it's not too expensive, i.e. cost != -1, issue copies.
+ SUnit *TrySU = Interferences[0];
+ SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
+ assert(LRegs.size() == 1 && "Can't handle this yet!");
+ unsigned Reg = LRegs[0];
+ SUnit *LRDef = LiveRegDefs[Reg];
+ EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
+ const TargetRegisterClass *RC =
+ TRI->getMinimalPhysRegClass(Reg, VT);
+ const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
+
+ // If cross copy register class is null, then it must be possible copy
+ // the value directly. Do not try duplicate the def.
+ SUnit *NewDef = 0;
+ if (DestRC)
+ NewDef = CopyAndMoveSuccessors(LRDef);
+ else
+ DestRC = RC;
+ if (!NewDef) {
+ // Issue copies, these can be expensive cross register class copies.
+ SmallVector<SUnit*, 2> Copies;
+ InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
+ DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
+ << " to SU #" << Copies.front()->NodeNum << "\n");
+ AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ NewDef = Copies.back();
+ }
+
+ DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
+ << " to SU #" << TrySU->NodeNum << "\n");
+ LiveRegDefs[Reg] = NewDef;
+ AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
+ /*Reg=*/0, /*isNormalMemory=*/false,
+ /*isMustAlias=*/false,
+ /*isArtificial=*/true));
+ TrySU->isAvailable = false;
+ CurSU = NewDef;
+ }
+
+ assert(CurSU && "Unable to resolve live physical register dependencies!");
+
+ // Add the nodes that aren't ready back onto the available list.
+ for (unsigned i = 0, e = Interferences.size(); i != e; ++i) {
+ Interferences[i]->isPending = false;
+ // May no longer be available due to backtracking.
+ if (Interferences[i]->isAvailable) {
+ AvailableQueue->push(Interferences[i]);
+ }
+ }
+ return CurSU;
+}
/// ListScheduleBottomUp - The main loop of list scheduling for bottom-up
/// schedulers.
void ScheduleDAGRRList::ListScheduleBottomUp() {
- unsigned CurCycle = 0;
-
// Release any predecessors of the special Exit node.
- ReleasePredecessors(&ExitSU, CurCycle);
+ ReleasePredecessors(&ExitSU);
// Add root to Available queue.
if (!SUnits.empty()) {
@@ -730,135 +1137,29 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
- SmallVector<SUnit*, 4> NotReady;
- DenseMap<SUnit*, SmallVector<unsigned, 4> > LRegsMap;
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
- bool Delayed = false;
- LRegsMap.clear();
- SUnit *CurSU = AvailableQueue->pop();
- while (CurSU) {
- SmallVector<unsigned, 4> LRegs;
- if (!DelayForLiveRegsBottomUp(CurSU, LRegs))
- break;
- Delayed = true;
- LRegsMap.insert(std::make_pair(CurSU, LRegs));
+ DEBUG(dbgs() << "\n*** Examining Available\n";
+ AvailableQueue->dump(this));
- CurSU->isPending = true; // This SU is not in AvailableQueue right now.
- NotReady.push_back(CurSU);
- CurSU = AvailableQueue->pop();
- }
+ // Pick the best node to schedule taking all constraints into
+ // consideration.
+ SUnit *SU = PickNodeToScheduleBottomUp();
- // All candidates are delayed due to live physical reg dependencies.
- // Try backtracking, code duplication, or inserting cross class copies
- // to resolve it.
- if (Delayed && !CurSU) {
- for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
- SUnit *TrySU = NotReady[i];
- SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
-
- // Try unscheduling up to the point where it's safe to schedule
- // this node.
- unsigned LiveCycle = CurCycle;
- for (unsigned j = 0, ee = LRegs.size(); j != ee; ++j) {
- unsigned Reg = LRegs[j];
- unsigned LCycle = LiveRegCycles[Reg];
- LiveCycle = std::min(LiveCycle, LCycle);
- }
- SUnit *OldSU = Sequence[LiveCycle];
- if (!WillCreateCycle(TrySU, OldSU)) {
- BacktrackBottomUp(TrySU, LiveCycle, CurCycle);
- // Force the current node to be scheduled before the node that
- // requires the physical reg dep.
- if (OldSU->isAvailable) {
- OldSU->isAvailable = false;
- AvailableQueue->remove(OldSU);
- }
- AddPred(TrySU, SDep(OldSU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false, /*isArtificial=*/true));
- // If one or more successors has been unscheduled, then the current
- // node is no longer avaialable. Schedule a successor that's now
- // available instead.
- if (!TrySU->isAvailable)
- CurSU = AvailableQueue->pop();
- else {
- CurSU = TrySU;
- TrySU->isPending = false;
- NotReady.erase(NotReady.begin()+i);
- }
- break;
- }
- }
+ AdvancePastStalls(SU);
- if (!CurSU) {
- // Can't backtrack. If it's too expensive to copy the value, then try
- // duplicate the nodes that produces these "too expensive to copy"
- // values to break the dependency. In case even that doesn't work,
- // insert cross class copies.
- // If it's not too expensive, i.e. cost != -1, issue copies.
- SUnit *TrySU = NotReady[0];
- SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
- assert(LRegs.size() == 1 && "Can't handle this yet!");
- unsigned Reg = LRegs[0];
- SUnit *LRDef = LiveRegDefs[Reg];
- EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
- const TargetRegisterClass *RC =
- TRI->getMinimalPhysRegClass(Reg, VT);
- const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
-
- // If cross copy register class is null, then it must be possible copy
- // the value directly. Do not try duplicate the def.
- SUnit *NewDef = 0;
- if (DestRC)
- NewDef = CopyAndMoveSuccessors(LRDef);
- else
- DestRC = RC;
- if (!NewDef) {
- // Issue copies, these can be expensive cross register class copies.
- SmallVector<SUnit*, 2> Copies;
- InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
- DEBUG(dbgs() << " Adding an edge from SU #" << TrySU->NodeNum
- << " to SU #" << Copies.front()->NodeNum << "\n");
- AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- NewDef = Copies.back();
- }
+ ScheduleNodeBottomUp(SU);
- DEBUG(dbgs() << " Adding an edge from SU #" << NewDef->NodeNum
- << " to SU #" << TrySU->NodeNum << "\n");
- LiveRegDefs[Reg] = NewDef;
- AddPred(NewDef, SDep(TrySU, SDep::Order, /*Latency=*/1,
- /*Reg=*/0, /*isNormalMemory=*/false,
- /*isMustAlias=*/false,
- /*isArtificial=*/true));
- TrySU->isAvailable = false;
- CurSU = NewDef;
- }
-
- assert(CurSU && "Unable to resolve live physical register dependencies!");
- }
-
- // Add the nodes that aren't ready back onto the available list.
- for (unsigned i = 0, e = NotReady.size(); i != e; ++i) {
- NotReady[i]->isPending = false;
- // May no longer be available due to backtracking.
- if (NotReady[i]->isAvailable)
- AvailableQueue->push(NotReady[i]);
+ while (AvailableQueue->empty() && !PendingQueue.empty()) {
+ // Advance the cycle to free resources. Skip ahead to the next ready SU.
+ assert(MinAvailableCycle < UINT_MAX && "MinAvailableCycle uninitialized");
+ AdvanceToCycle(std::max(CurCycle + 1, MinAvailableCycle));
}
- NotReady.clear();
-
- if (CurSU)
- ScheduleNodeBottomUp(CurSU, CurCycle);
- ++CurCycle;
- AvailableQueue->setCurCycle(CurCycle);
}
// Reverse the order if it is bottom up.
std::reverse(Sequence.begin(), Sequence.end());
-
+
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
@@ -905,7 +1206,7 @@ void ScheduleDAGRRList::ReleaseSuccessors(SUnit *SU) {
/// ScheduleNodeTopDown - Add the node to the schedule. Decrement the pending
/// count of its successors. If a successor pending count is zero, add it to
/// the Available queue.
-void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
+void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU) {
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
DEBUG(SU->dump(this));
@@ -921,7 +1222,6 @@ void ScheduleDAGRRList::ScheduleNodeTopDown(SUnit *SU, unsigned CurCycle) {
/// ListScheduleTopDown - The main loop of list scheduling for top-down
/// schedulers.
void ScheduleDAGRRList::ListScheduleTopDown() {
- unsigned CurCycle = 0;
AvailableQueue->setCurCycle(CurCycle);
// Release any successors of the special Entry node.
@@ -935,19 +1235,19 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
SUnits[i].isAvailable = true;
}
}
-
+
// While Available queue is not empty, grab the node with the highest
// priority. If it is not ready put it back. Schedule the node.
Sequence.reserve(SUnits.size());
while (!AvailableQueue->empty()) {
SUnit *CurSU = AvailableQueue->pop();
-
+
if (CurSU)
- ScheduleNodeTopDown(CurSU, CurCycle);
+ ScheduleNodeTopDown(CurSU);
++CurCycle;
AvailableQueue->setCurCycle(CurCycle);
}
-
+
#ifndef NDEBUG
VerifySchedule(isBottomUp);
#endif
@@ -955,70 +1255,288 @@ void ScheduleDAGRRList::ListScheduleTopDown() {
//===----------------------------------------------------------------------===//
-// RegReductionPriorityQueue Implementation
+// RegReductionPriorityQueue Definition
//===----------------------------------------------------------------------===//
//
// This is a SchedulingPriorityQueue that schedules using Sethi Ullman numbers
// to reduce register pressure.
-//
+//
namespace {
- template<class SF>
- class RegReductionPriorityQueue;
-
- /// bu_ls_rr_sort - Priority function for bottom up register pressure
- // reduction scheduler.
- struct bu_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<bu_ls_rr_sort> *SPQ;
- bu_ls_rr_sort(RegReductionPriorityQueue<bu_ls_rr_sort> *spq) : SPQ(spq) {}
- bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+class RegReductionPQBase;
+
+struct queue_sort : public std::binary_function<SUnit*, SUnit*, bool> {
+ bool isReady(SUnit* SU, unsigned CurCycle) const { return true; }
+};
+
+/// bu_ls_rr_sort - Priority function for bottom up register pressure
+// reduction scheduler.
+struct bu_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
- // td_ls_rr_sort - Priority function for top down register pressure reduction
- // scheduler.
- struct td_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<td_ls_rr_sort> *SPQ;
- td_ls_rr_sort(RegReductionPriorityQueue<td_ls_rr_sort> *spq) : SPQ(spq) {}
- td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+ RegReductionPQBase *SPQ;
+ bu_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ bu_ls_rr_sort(const bu_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// td_ls_rr_sort - Priority function for top down register pressure reduction
+// scheduler.
+struct td_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = false,
+ HasReadyFilter = false
};
- // src_ls_rr_sort - Priority function for source order scheduler.
- struct src_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<src_ls_rr_sort> *SPQ;
- src_ls_rr_sort(RegReductionPriorityQueue<src_ls_rr_sort> *spq)
- : SPQ(spq) {}
- src_ls_rr_sort(const src_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
-
- bool operator()(const SUnit* left, const SUnit* right) const;
+ RegReductionPQBase *SPQ;
+ td_ls_rr_sort(RegReductionPQBase *spq) : SPQ(spq) {}
+ td_ls_rr_sort(const td_ls_rr_sort &RHS) : SPQ(RHS.SPQ) {}
+
+ bool operator()(const SUnit* left, const SUnit* right) const;
+};
+
+// src_ls_rr_sort - Priority function for source order scheduler.
+struct src_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = false
};
- // hybrid_ls_rr_sort - Priority function for hybrid scheduler.
- struct hybrid_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<hybrid_ls_rr_sort> *SPQ;
- hybrid_ls_rr_sort(RegReductionPriorityQueue<hybrid_ls_rr_sort> *spq)
- : SPQ(spq) {}
- hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
+ RegReductionPQBase *SPQ;
+ src_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ src_ls_rr_sort(const src_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
- bool operator()(const SUnit* left, const SUnit* right) const;
+// hybrid_ls_rr_sort - Priority function for hybrid scheduler.
+struct hybrid_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = true
};
- // ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
- // scheduler.
- struct ilp_ls_rr_sort : public std::binary_function<SUnit*, SUnit*, bool> {
- RegReductionPriorityQueue<ilp_ls_rr_sort> *SPQ;
- ilp_ls_rr_sort(RegReductionPriorityQueue<ilp_ls_rr_sort> *spq)
- : SPQ(spq) {}
- ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
- : SPQ(RHS.SPQ) {}
+ RegReductionPQBase *SPQ;
+ hybrid_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ hybrid_ls_rr_sort(const hybrid_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
- bool operator()(const SUnit* left, const SUnit* right) const;
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+// ilp_ls_rr_sort - Priority function for ILP (instruction level parallelism)
+// scheduler.
+struct ilp_ls_rr_sort : public queue_sort {
+ enum {
+ IsBottomUp = true,
+ HasReadyFilter = true
};
-} // end anonymous namespace
+
+ RegReductionPQBase *SPQ;
+ ilp_ls_rr_sort(RegReductionPQBase *spq)
+ : SPQ(spq) {}
+ ilp_ls_rr_sort(const ilp_ls_rr_sort &RHS)
+ : SPQ(RHS.SPQ) {}
+
+ bool isReady(SUnit *SU, unsigned CurCycle) const;
+
+ bool operator()(SUnit* left, SUnit* right) const;
+};
+
+class RegReductionPQBase : public SchedulingPriorityQueue {
+protected:
+ std::vector<SUnit*> Queue;
+ unsigned CurQueueId;
+ bool TracksRegPressure;
+
+ // SUnits - The SUnits for the current graph.
+ std::vector<SUnit> *SUnits;
+
+ MachineFunction &MF;
+ const TargetInstrInfo *TII;
+ const TargetRegisterInfo *TRI;
+ const TargetLowering *TLI;
+ ScheduleDAGRRList *scheduleDAG;
+
+ // SethiUllmanNumbers - The SethiUllman number for each node.
+ std::vector<unsigned> SethiUllmanNumbers;
+
+ /// RegPressure - Tracking current reg pressure per register class.
+ ///
+ std::vector<unsigned> RegPressure;
+
+ /// RegLimit - Tracking the number of allocatable registers per register
+ /// class.
+ std::vector<unsigned> RegLimit;
+
+public:
+ RegReductionPQBase(MachineFunction &mf,
+ bool hasReadyFilter,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : SchedulingPriorityQueue(hasReadyFilter),
+ CurQueueId(0), TracksRegPressure(tracksrp),
+ MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
+ if (TracksRegPressure) {
+ unsigned NumRC = TRI->getNumRegClasses();
+ RegLimit.resize(NumRC);
+ RegPressure.resize(NumRC);
+ std::fill(RegLimit.begin(), RegLimit.end(), 0);
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I)
+ RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
+ }
+ }
+
+ void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
+ scheduleDAG = scheduleDag;
+ }
+
+ ScheduleHazardRecognizer* getHazardRec() {
+ return scheduleDAG->getHazardRec();
+ }
+
+ void initNodes(std::vector<SUnit> &sunits);
+
+ void addNode(const SUnit *SU);
+
+ void updateNode(const SUnit *SU);
+
+ void releaseState() {
+ SUnits = 0;
+ SethiUllmanNumbers.clear();
+ std::fill(RegPressure.begin(), RegPressure.end(), 0);
+ }
+
+ unsigned getNodePriority(const SUnit *SU) const;
+
+ unsigned getNodeOrdering(const SUnit *SU) const {
+ return scheduleDAG->DAG->GetOrdering(SU->getNode());
+ }
+
+ bool empty() const { return Queue.empty(); }
+
+ void push(SUnit *U) {
+ assert(!U->NodeQueueId && "Node in the queue already");
+ U->NodeQueueId = ++CurQueueId;
+ Queue.push_back(U);
+ }
+
+ void remove(SUnit *SU) {
+ assert(!Queue.empty() && "Queue is empty!");
+ assert(SU->NodeQueueId != 0 && "Not in queue!");
+ std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
+ SU);
+ if (I != prior(Queue.end()))
+ std::swap(*I, Queue.back());
+ Queue.pop_back();
+ SU->NodeQueueId = 0;
+ }
+
+ bool tracksRegPressure() const { return TracksRegPressure; }
+
+ void dumpRegPressure() const;
+
+ bool HighRegPressure(const SUnit *SU) const;
+
+ bool MayReduceRegPressure(SUnit *SU);
+
+ void ScheduledNode(SUnit *SU);
+
+ void UnscheduledNode(SUnit *SU);
+
+protected:
+ bool canClobber(const SUnit *SU, const SUnit *Op);
+ void AddPseudoTwoAddrDeps();
+ void PrescheduleNodesWithMultipleUses();
+ void CalculateSethiUllmanNumbers();
+};
+
+template<class SF>
+class RegReductionPriorityQueue : public RegReductionPQBase {
+ static SUnit *popFromQueue(std::vector<SUnit*> &Q, SF &Picker) {
+ std::vector<SUnit *>::iterator Best = Q.begin();
+ for (std::vector<SUnit *>::iterator I = llvm::next(Q.begin()),
+ E = Q.end(); I != E; ++I)
+ if (Picker(*Best, *I))
+ Best = I;
+ SUnit *V = *Best;
+ if (Best != prior(Q.end()))
+ std::swap(*Best, Q.back());
+ Q.pop_back();
+ return V;
+ }
+
+ SF Picker;
+
+public:
+ RegReductionPriorityQueue(MachineFunction &mf,
+ bool tracksrp,
+ const TargetInstrInfo *tii,
+ const TargetRegisterInfo *tri,
+ const TargetLowering *tli)
+ : RegReductionPQBase(mf, SF::HasReadyFilter, tracksrp, tii, tri, tli),
+ Picker(this) {}
+
+ bool isBottomUp() const { return SF::IsBottomUp; }
+
+ bool isReady(SUnit *U) const {
+ return Picker.HasReadyFilter && Picker.isReady(U, getCurCycle());
+ }
+
+ SUnit *pop() {
+ if (Queue.empty()) return NULL;
+
+ SUnit *V = popFromQueue(Queue, Picker);
+ V->NodeQueueId = 0;
+ return V;
+ }
+
+ void dump(ScheduleDAG *DAG) const {
+ // Emulate pop() without clobbering NodeQueueIds.
+ std::vector<SUnit*> DumpQueue = Queue;
+ SF DumpPicker = Picker;
+ while (!DumpQueue.empty()) {
+ SUnit *SU = popFromQueue(DumpQueue, DumpPicker);
+ if (isBottomUp())
+ dbgs() << "Height " << SU->getHeight() << ": ";
+ else
+ dbgs() << "Depth " << SU->getDepth() << ": ";
+ SU->dump(DAG);
+ }
+ }
+};
+
+typedef RegReductionPriorityQueue<bu_ls_rr_sort>
+BURegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<td_ls_rr_sort>
+TDRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<src_ls_rr_sort>
+SrcRegReductionPriorityQueue;
+
+typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
+HybridBURRPriorityQueue;
+
+typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
+ILPBURRPriorityQueue;
+} // end anonymous namespace
+
+//===----------------------------------------------------------------------===//
+// Static Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
/// CalcNodeSethiUllmanNumber - Compute Sethi Ullman number.
/// Smaller number is the higher priority.
@@ -1045,413 +1563,283 @@ CalcNodeSethiUllmanNumber(const SUnit *SU, std::vector<unsigned> &SUNumbers) {
if (SethiUllmanNumber == 0)
SethiUllmanNumber = 1;
-
+
return SethiUllmanNumber;
}
-namespace {
- template<class SF>
- class RegReductionPriorityQueue : public SchedulingPriorityQueue {
- std::vector<SUnit*> Queue;
- SF Picker;
- unsigned CurQueueId;
- bool TracksRegPressure;
-
- protected:
- // SUnits - The SUnits for the current graph.
- std::vector<SUnit> *SUnits;
-
- MachineFunction &MF;
- const TargetInstrInfo *TII;
- const TargetRegisterInfo *TRI;
- const TargetLowering *TLI;
- ScheduleDAGRRList *scheduleDAG;
-
- // SethiUllmanNumbers - The SethiUllman number for each node.
- std::vector<unsigned> SethiUllmanNumbers;
-
- /// RegPressure - Tracking current reg pressure per register class.
- ///
- std::vector<unsigned> RegPressure;
-
- /// RegLimit - Tracking the number of allocatable registers per register
- /// class.
- std::vector<unsigned> RegLimit;
-
- public:
- RegReductionPriorityQueue(MachineFunction &mf,
- bool tracksrp,
- const TargetInstrInfo *tii,
- const TargetRegisterInfo *tri,
- const TargetLowering *tli)
- : Picker(this), CurQueueId(0), TracksRegPressure(tracksrp),
- MF(mf), TII(tii), TRI(tri), TLI(tli), scheduleDAG(NULL) {
- if (TracksRegPressure) {
- unsigned NumRC = TRI->getNumRegClasses();
- RegLimit.resize(NumRC);
- RegPressure.resize(NumRC);
- std::fill(RegLimit.begin(), RegLimit.end(), 0);
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I)
- RegLimit[(*I)->getID()] = tli->getRegPressureLimit(*I, MF);
- }
- }
-
- void initNodes(std::vector<SUnit> &sunits) {
- SUnits = &sunits;
- // Add pseudo dependency edges for two-address nodes.
- AddPseudoTwoAddrDeps();
- // Reroute edges to nodes with multiple uses.
- PrescheduleNodesWithMultipleUses();
- // Calculate node priorities.
- CalculateSethiUllmanNumbers();
- }
-
- void addNode(const SUnit *SU) {
- unsigned SUSize = SethiUllmanNumbers.size();
- if (SUnits->size() > SUSize)
- SethiUllmanNumbers.resize(SUSize*2, 0);
- CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
- }
-
- void updateNode(const SUnit *SU) {
- SethiUllmanNumbers[SU->NodeNum] = 0;
- CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
- }
+/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
+/// scheduling units.
+void RegReductionPQBase::CalculateSethiUllmanNumbers() {
+ SethiUllmanNumbers.assign(SUnits->size(), 0);
- void releaseState() {
- SUnits = 0;
- SethiUllmanNumbers.clear();
- std::fill(RegPressure.begin(), RegPressure.end(), 0);
- }
+ for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
+ CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
+}
- unsigned getNodePriority(const SUnit *SU) const {
- assert(SU->NodeNum < SethiUllmanNumbers.size());
- unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
- if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
- // CopyToReg should be close to its uses to facilitate coalescing and
- // avoid spilling.
- return 0;
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::INSERT_SUBREG)
- // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
- // close to their uses to facilitate coalescing.
- return 0;
- if (SU->NumSuccs == 0 && SU->NumPreds != 0)
- // If SU does not have a register use, i.e. it doesn't produce a value
- // that would be consumed (e.g. store), then it terminates a chain of
- // computation. Give it a large SethiUllman number so it will be
- // scheduled right before its predecessors that it doesn't lengthen
- // their live ranges.
- return 0xffff;
- if (SU->NumPreds == 0 && SU->NumSuccs != 0)
- // If SU does not have a register def, schedule it close to its uses
- // because it does not lengthen any live ranges.
- return 0;
- return SethiUllmanNumbers[SU->NodeNum];
- }
+void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
+ SUnits = &sunits;
+ // Add pseudo dependency edges for two-address nodes.
+ AddPseudoTwoAddrDeps();
+ // Reroute edges to nodes with multiple uses.
+ if (!TracksRegPressure)
+ PrescheduleNodesWithMultipleUses();
+ // Calculate node priorities.
+ CalculateSethiUllmanNumbers();
+}
- unsigned getNodeOrdering(const SUnit *SU) const {
- return scheduleDAG->DAG->GetOrdering(SU->getNode());
- }
+void RegReductionPQBase::addNode(const SUnit *SU) {
+ unsigned SUSize = SethiUllmanNumbers.size();
+ if (SUnits->size() > SUSize)
+ SethiUllmanNumbers.resize(SUSize*2, 0);
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
- bool empty() const { return Queue.empty(); }
-
- void push(SUnit *U) {
- assert(!U->NodeQueueId && "Node in the queue already");
- U->NodeQueueId = ++CurQueueId;
- Queue.push_back(U);
- }
+void RegReductionPQBase::updateNode(const SUnit *SU) {
+ SethiUllmanNumbers[SU->NodeNum] = 0;
+ CalcNodeSethiUllmanNumber(SU, SethiUllmanNumbers);
+}
- SUnit *pop() {
- if (empty()) return NULL;
- std::vector<SUnit *>::iterator Best = Queue.begin();
- for (std::vector<SUnit *>::iterator I = llvm::next(Queue.begin()),
- E = Queue.end(); I != E; ++I)
- if (Picker(*Best, *I))
- Best = I;
- SUnit *V = *Best;
- if (Best != prior(Queue.end()))
- std::swap(*Best, Queue.back());
- Queue.pop_back();
- V->NodeQueueId = 0;
- return V;
- }
+// Lower priority means schedule further down. For bottom-up scheduling, lower
+// priority SUs are scheduled before higher priority SUs.
+unsigned RegReductionPQBase::getNodePriority(const SUnit *SU) const {
+ assert(SU->NodeNum < SethiUllmanNumbers.size());
+ unsigned Opc = SU->getNode() ? SU->getNode()->getOpcode() : 0;
+ if (Opc == ISD::TokenFactor || Opc == ISD::CopyToReg)
+ // CopyToReg should be close to its uses to facilitate coalescing and
+ // avoid spilling.
+ return 0;
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::INSERT_SUBREG)
+ // EXTRACT_SUBREG, INSERT_SUBREG, and SUBREG_TO_REG nodes should be
+ // close to their uses to facilitate coalescing.
+ return 0;
+ if (SU->NumSuccs == 0 && SU->NumPreds != 0)
+ // If SU does not have a register use, i.e. it doesn't produce a value
+ // that would be consumed (e.g. store), then it terminates a chain of
+ // computation. Give it a large SethiUllman number so it will be
+ // scheduled right before its predecessors that it doesn't lengthen
+ // their live ranges.
+ return 0xffff;
+ if (SU->NumPreds == 0 && SU->NumSuccs != 0)
+ // If SU does not have a register def, schedule it close to its uses
+ // because it does not lengthen any live ranges.
+ return 0;
+ return SethiUllmanNumbers[SU->NodeNum];
+}
- void remove(SUnit *SU) {
- assert(!Queue.empty() && "Queue is empty!");
- assert(SU->NodeQueueId != 0 && "Not in queue!");
- std::vector<SUnit *>::iterator I = std::find(Queue.begin(), Queue.end(),
- SU);
- if (I != prior(Queue.end()))
- std::swap(*I, Queue.back());
- Queue.pop_back();
- SU->NodeQueueId = 0;
- }
+//===----------------------------------------------------------------------===//
+// Register Pressure Tracking
+//===----------------------------------------------------------------------===//
- bool HighRegPressure(const SUnit *SU) const {
- if (!TLI)
- return false;
+void RegReductionPQBase::dumpRegPressure() const {
+ for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
+ E = TRI->regclass_end(); I != E; ++I) {
+ const TargetRegisterClass *RC = *I;
+ unsigned Id = RC->getID();
+ unsigned RP = RegPressure[Id];
+ if (!RP) continue;
+ DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
+ << '\n');
+ }
+}
- for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] >= RegLimit[RCId])
- return true; // Reg pressure already high.
- unsigned Cost = TLI->getRepRegClassCostFor(VT);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- // Check if this increases register pressure of the specific register
- // class to the point where it would cause spills.
- if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
- return true;
- }
- }
+bool RegReductionPQBase::HighRegPressure(const SUnit *SU) const {
+ if (!TLI)
+ return false;
- return false;
+ for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance()) {
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ unsigned Cost = TLI->getRepRegClassCostFor(VT);
+ if ((RegPressure[RCId] + Cost) >= RegLimit[RCId])
+ return true;
}
+ }
+ return false;
+}
- void ScheduledNode(SUnit *SU) {
- if (!TracksRegPressure)
- return;
-
- const SDNode *N = SU->getNode();
- if (!N->isMachineOpcode()) {
- if (N->getOpcode() != ISD::CopyToReg)
- return;
- } else {
- unsigned Opc = N->getMachineOpcode();
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::REG_SEQUENCE ||
- Opc == TargetOpcode::IMPLICIT_DEF)
- return;
- }
+bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) {
+ const SDNode *N = SU->getNode();
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
- continue;
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- }
+ if (!N->isMachineOpcode() || !SU->NumSuccs)
+ return false;
- // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
- // may transfer data dependencies to CopyToReg.
- if (SU->NumSuccs && N->isMachineOpcode()) {
- unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = N->getValueType(i);
- if (!N->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
- // Register pressure tracking is imprecise. This can happen.
- RegPressure[RCId] = 0;
- else
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
- }
- }
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = N->getValueType(i);
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] >= RegLimit[RCId])
+ return true;
+ }
+ return false;
+}
+
+void RegReductionPQBase::ScheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
- dumpRegPressure();
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumRegDefsLeft is zero when enough uses of this node have been scheduled
+ // to cover the number of registers defined (they are all live).
+ if (PredSU->NumRegDefsLeft == 0) {
+ continue;
+ }
+ // FIXME: The ScheduleDAG currently loses information about which of a
+ // node's values is consumed by each dependence. Consequently, if the node
+ // defines multiple register classes, we don't know which to pressurize
+ // here. Instead the following loop consumes the register defs in an
+ // arbitrary order. At least it handles the common case of clustered loads
+ // to the same class. For precise liveness, each SDep needs to indicate the
+ // result number. But that tightly couples the ScheduleDAG with the
+ // SelectionDAG making updates tricky. A simpler hack would be to attach a
+ // value type or register class to SDep.
+ //
+ // The most important aspect of register tracking is balancing the increase
+ // here with the reduction further below. Note that this SU may use multiple
+ // defs in PredSU. The can't be determined here, but we've already
+ // compensated by reducing NumRegDefsLeft in PredSU during
+ // ScheduleDAGSDNodes::AddSchedEdges.
+ --PredSU->NumRegDefsLeft;
+ unsigned SkipRegDefs = PredSU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs)
+ continue;
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ break;
}
+ }
- void UnscheduledNode(SUnit *SU) {
- if (!TracksRegPressure)
- return;
-
- const SDNode *N = SU->getNode();
- if (!N->isMachineOpcode()) {
- if (N->getOpcode() != ISD::CopyToReg)
- return;
- } else {
- unsigned Opc = N->getMachineOpcode();
- if (Opc == TargetOpcode::EXTRACT_SUBREG ||
- Opc == TargetOpcode::INSERT_SUBREG ||
- Opc == TargetOpcode::SUBREG_TO_REG ||
- Opc == TargetOpcode::REG_SEQUENCE ||
- Opc == TargetOpcode::IMPLICIT_DEF)
- return;
- }
+ // We should have this assert, but there may be dead SDNodes that never
+ // materialize as SUnits, so they don't appear to generate liveness.
+ //assert(SU->NumRegDefsLeft == 0 && "not all regdefs have scheduled uses");
+ int SkipRegDefs = (int)SU->NumRegDefsLeft;
+ for (ScheduleDAGSDNodes::RegDefIter RegDefPos(SU, scheduleDAG);
+ RegDefPos.IsValid(); RegDefPos.Advance(), --SkipRegDefs) {
+ if (SkipRegDefs > 0)
+ continue;
+ EVT VT = RegDefPos.GetValue();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) {
+ // Register pressure tracking is imprecise. This can happen. But we try
+ // hard not to let it happen because it likely results in poor scheduling.
+ DEBUG(dbgs() << " SU(" << SU->NodeNum << ") has too many regdefs\n");
+ RegPressure[RCId] = 0;
+ }
+ else {
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
+ }
+ }
+ dumpRegPressure();
+}
- for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
- I != E; ++I) {
- if (I->isCtrl())
- continue;
- SUnit *PredSU = I->getSUnit();
- if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
- continue;
- const SDNode *PN = PredSU->getNode();
- if (!PN->isMachineOpcode()) {
- if (PN->getOpcode() == ISD::CopyFromReg) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
- continue;
- }
- unsigned POpc = PN->getMachineOpcode();
- if (POpc == TargetOpcode::IMPLICIT_DEF)
- continue;
- if (POpc == TargetOpcode::EXTRACT_SUBREG) {
- EVT VT = PN->getOperand(0).getValueType();
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- } else if (POpc == TargetOpcode::INSERT_SUBREG ||
- POpc == TargetOpcode::SUBREG_TO_REG) {
- EVT VT = PN->getValueType(0);
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- continue;
- }
- unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
- for (unsigned i = 0; i != NumDefs; ++i) {
- EVT VT = PN->getValueType(i);
- if (!PN->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
- // Register pressure tracking is imprecise. This can happen.
- RegPressure[RCId] = 0;
- else
- RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
- }
- }
+void RegReductionPQBase::UnscheduledNode(SUnit *SU) {
+ if (!TracksRegPressure)
+ return;
+
+ const SDNode *N = SU->getNode();
+ if (!N->isMachineOpcode()) {
+ if (N->getOpcode() != ISD::CopyToReg)
+ return;
+ } else {
+ unsigned Opc = N->getMachineOpcode();
+ if (Opc == TargetOpcode::EXTRACT_SUBREG ||
+ Opc == TargetOpcode::INSERT_SUBREG ||
+ Opc == TargetOpcode::SUBREG_TO_REG ||
+ Opc == TargetOpcode::REG_SEQUENCE ||
+ Opc == TargetOpcode::IMPLICIT_DEF)
+ return;
+ }
- // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
- // may transfer data dependencies to CopyToReg.
- if (SU->NumSuccs && N->isMachineOpcode()) {
- unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
- for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
- EVT VT = N->getValueType(i);
- if (VT == MVT::Flag || VT == MVT::Other)
- continue;
- if (!N->hasAnyUseOfValue(i))
- continue;
- unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
- RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
- }
+ for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl())
+ continue;
+ SUnit *PredSU = I->getSUnit();
+ // NumSuccsLeft counts all deps. Don't compare it with NumSuccs which only
+ // counts data deps.
+ if (PredSU->NumSuccsLeft != PredSU->Succs.size())
+ continue;
+ const SDNode *PN = PredSU->getNode();
+ if (!PN->isMachineOpcode()) {
+ if (PN->getOpcode() == ISD::CopyFromReg) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
}
-
- dumpRegPressure();
+ continue;
}
-
- void setScheduleDAG(ScheduleDAGRRList *scheduleDag) {
- scheduleDAG = scheduleDag;
+ unsigned POpc = PN->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF)
+ continue;
+ if (POpc == TargetOpcode::EXTRACT_SUBREG) {
+ EVT VT = PN->getOperand(0).getValueType();
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
+ } else if (POpc == TargetOpcode::INSERT_SUBREG ||
+ POpc == TargetOpcode::SUBREG_TO_REG) {
+ EVT VT = PN->getValueType(0);
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ continue;
}
-
- void dumpRegPressure() const {
- for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
- E = TRI->regclass_end(); I != E; ++I) {
- const TargetRegisterClass *RC = *I;
- unsigned Id = RC->getID();
- unsigned RP = RegPressure[Id];
- if (!RP) continue;
- DEBUG(dbgs() << RC->getName() << ": " << RP << " / " << RegLimit[Id]
- << '\n');
- }
+ unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
+ for (unsigned i = 0; i != NumDefs; ++i) {
+ EVT VT = PN->getValueType(i);
+ if (!PN->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
+ // Register pressure tracking is imprecise. This can happen.
+ RegPressure[RCId] = 0;
+ else
+ RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
}
+ }
- protected:
- bool canClobber(const SUnit *SU, const SUnit *Op);
- void AddPseudoTwoAddrDeps();
- void PrescheduleNodesWithMultipleUses();
- void CalculateSethiUllmanNumbers();
- };
-
- typedef RegReductionPriorityQueue<bu_ls_rr_sort>
- BURegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<td_ls_rr_sort>
- TDRegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<src_ls_rr_sort>
- SrcRegReductionPriorityQueue;
-
- typedef RegReductionPriorityQueue<hybrid_ls_rr_sort>
- HybridBURRPriorityQueue;
+ // Check for isMachineOpcode() as PrescheduleNodesWithMultipleUses()
+ // may transfer data dependencies to CopyToReg.
+ if (SU->NumSuccs && N->isMachineOpcode()) {
+ unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
+ for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
+ EVT VT = N->getValueType(i);
+ if (VT == MVT::Glue || VT == MVT::Other)
+ continue;
+ if (!N->hasAnyUseOfValue(i))
+ continue;
+ unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
+ RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
+ }
+ }
- typedef RegReductionPriorityQueue<ilp_ls_rr_sort>
- ILPBURRPriorityQueue;
+ dumpRegPressure();
}
+//===----------------------------------------------------------------------===//
+// Dynamic Node Priority for Register Pressure Reduction
+//===----------------------------------------------------------------------===//
+
/// closestSucc - Returns the scheduled cycle of the successor which is
/// closest to the current cycle.
static unsigned closestSucc(const SUnit *SU) {
@@ -1483,9 +1871,123 @@ static unsigned calcMaxScratches(const SUnit *SU) {
return Scratches;
}
-template <typename RRSort>
-static bool BURRSort(const SUnit *left, const SUnit *right,
- const RegReductionPriorityQueue<RRSort> *SPQ) {
+/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
+/// CopyToReg to a virtual register. This SU def is probably a liveout and
+/// it has no other use. It should be scheduled closer to the terminator.
+static bool hasOnlyLiveOutUses(const SUnit *SU) {
+ bool RetVal = false;
+ for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue;
+ const SUnit *SuccSU = I->getSUnit();
+ if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) {
+ unsigned Reg =
+ cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
+ RetVal = true;
+ continue;
+ }
+ }
+ return false;
+ }
+ return RetVal;
+}
+
+/// UnitsSharePred - Return true if the two scheduling units share a common
+/// data predecessor.
+static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
+ SmallSet<const SUnit*, 4> Preds;
+ for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ Preds.insert(I->getSUnit());
+ }
+ for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
+ I != E; ++I) {
+ if (I->isCtrl()) continue; // ignore chain preds
+ if (Preds.count(I->getSUnit()))
+ return true;
+ }
+ return false;
+}
+
+// Check for either a dependence (latency) or resource (hazard) stall.
+//
+// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
+static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
+ if ((int)SPQ->getCurCycle() < Height) return true;
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return true;
+ return false;
+}
+
+// Return -1 if left has higher priority, 1 if right has higher priority.
+// Return 0 if latency-based priority is equivalent.
+static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
+ RegReductionPQBase *SPQ) {
+ // If the two nodes share an operand and one of them has a single
+ // use that is a live out copy, favor the one that is live out. Otherwise
+ // it will be difficult to eliminate the copy if the instruction is a
+ // loop induction variable update. e.g.
+ // BB:
+ // sub r1, r3, #1
+ // str r0, [r2, r3]
+ // mov r3, r1
+ // cmp
+ // bne BB
+ bool SharePred = UnitsSharePred(left, right);
+ // FIXME: Only adjust if BB is a loop back edge.
+ // FIXME: What's the cost of a copy?
+ int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
+ int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
+ int LHeight = (int)left->getHeight() - LBonus;
+ int RHeight = (int)right->getHeight() - RBonus;
+
+ bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
+ BUHasStall(left, LHeight, SPQ);
+ bool RStall = (!checkPref || right->SchedulingPref == Sched::Latency) &&
+ BUHasStall(right, RHeight, SPQ);
+
+ // If scheduling one of the node will cause a pipeline stall, delay it.
+ // If scheduling either one of the node will cause a pipeline stall, sort
+ // them according to their height.
+ if (LStall) {
+ if (!RStall)
+ return 1;
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ } else if (RStall)
+ return -1;
+
+ // If either node is scheduling for latency, sort them by height/depth
+ // and latency.
+ if (!checkPref || (left->SchedulingPref == Sched::Latency ||
+ right->SchedulingPref == Sched::Latency)) {
+ if (DisableSchedCycles) {
+ if (LHeight != RHeight)
+ return LHeight > RHeight ? 1 : -1;
+ }
+ else {
+ // If neither instruction stalls (!LStall && !RStall) then
+ // it's height is already covered so only its depth matters. We also reach
+ // this if both stall but have the same height.
+ unsigned LDepth = left->getDepth();
+ unsigned RDepth = right->getDepth();
+ if (LDepth != RDepth) {
+ DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
+ << ") depth " << LDepth << " vs SU (" << right->NodeNum
+ << ") depth " << RDepth << "\n");
+ return LDepth < RDepth ? 1 : -1;
+ }
+ }
+ if (left->Latency != right->Latency)
+ return left->Latency > right->Latency ? 1 : -1;
+ }
+ return 0;
+}
+
+static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
unsigned LPriority = SPQ->getNodePriority(left);
unsigned RPriority = SPQ->getNodePriority(right);
if (LPriority != RPriority)
@@ -1519,24 +2021,31 @@ static bool BURRSort(const SUnit *left, const SUnit *right,
if (LScratch != RScratch)
return LScratch > RScratch;
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
-
- if (left->getDepth() != right->getDepth())
- return left->getDepth() < right->getDepth();
+ if (!DisableSchedCycles) {
+ int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
+ }
+ else {
+ if (left->getHeight() != right->getHeight())
+ return left->getHeight() > right->getHeight();
- assert(left->NodeQueueId && right->NodeQueueId &&
+ if (left->getDepth() != right->getDepth())
+ return left->getDepth() < right->getDepth();
+ }
+
+ assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId);
}
// Bottom up
-bool bu_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool bu_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
return BURRSort(left, right, SPQ);
}
// Source order, otherwise bottom up.
-bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
+bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
unsigned LOrder = SPQ->getNodeOrdering(left);
unsigned ROrder = SPQ->getNodeOrdering(right);
@@ -1548,49 +2057,69 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
return BURRSort(left, right, SPQ);
}
-bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
+// If the time between now and when the instruction will be ready can cover
+// the spill code, then avoid adding it to the ready queue. This gives long
+// stalls highest priority and allows hoisting across calls. It should also
+// speed up processing the available queue.
+bool hybrid_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ static const unsigned ReadyDelay = 3;
+
+ if (SPQ->MayReduceRegPressure(SU)) return true;
+
+ if (SU->getHeight() > (CurCycle + ReadyDelay)) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, -ReadyDelay)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return true;
+}
+
+// Return true if right should be scheduled with higher priority than left.
+bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
bool LHigh = SPQ->HighRegPressure(left);
bool RHigh = SPQ->HighRegPressure(right);
// Avoid causing spills. If register pressure is high, schedule for
// register pressure reduction.
- if (LHigh && !RHigh)
+ if (LHigh && !RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
+ << right->NodeNum << ")\n");
return true;
- else if (!LHigh && RHigh)
+ }
+ else if (!LHigh && RHigh) {
+ DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
+ << left->NodeNum << ")\n");
return false;
+ }
else if (!LHigh && !RHigh) {
- // Low register pressure situation, schedule for latency if possible.
- bool LStall = left->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < left->getHeight();
- bool RStall = right->SchedulingPref == Sched::Latency &&
- SPQ->getCurCycle() < right->getHeight();
- // If scheduling one of the node will cause a pipeline stall, delay it.
- // If scheduling either one of the node will cause a pipeline stall, sort
- // them according to their height.
- // If neither will cause a pipeline stall, try to reduce register pressure.
- if (LStall) {
- if (!RStall)
- return true;
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- } else if (RStall)
- return false;
-
- // If either node is scheduling for latency, sort them by height and latency
- // first.
- if (left->SchedulingPref == Sched::Latency ||
- right->SchedulingPref == Sched::Latency) {
- if (left->getHeight() != right->getHeight())
- return left->getHeight() > right->getHeight();
- if (left->Latency != right->Latency)
- return left->Latency > right->Latency;
- }
+ int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
+ if (result != 0)
+ return result > 0;
}
-
return BURRSort(left, right, SPQ);
}
-bool ilp_ls_rr_sort::operator()(const SUnit *left,
- const SUnit *right) const {
+// Schedule as many instructions in each cycle as possible. So don't make an
+// instruction available unless it is ready in the current cycle.
+bool ilp_ls_rr_sort::isReady(SUnit *SU, unsigned CurCycle) const {
+ if (SU->getHeight() > CurCycle) return false;
+
+ if (SPQ->getHazardRec()->getHazardType(SU, 0)
+ != ScheduleHazardRecognizer::NoHazard)
+ return false;
+
+ return SU->getHeight() <= CurCycle;
+}
+
+bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
+ if (left->isCall || right->isCall)
+ // No way to compute latency of calls.
+ return BURRSort(left, right, SPQ);
+
bool LHigh = SPQ->HighRegPressure(left);
bool RHigh = SPQ->HighRegPressure(right);
// Avoid causing spills. If register pressure is high, schedule for
@@ -1611,9 +2140,11 @@ bool ilp_ls_rr_sort::operator()(const SUnit *left,
return BURRSort(left, right, SPQ);
}
-template<class SF>
-bool
-RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
+//===----------------------------------------------------------------------===//
+// Preschedule for Register Pressure
+//===----------------------------------------------------------------------===//
+
+bool RegReductionPQBase::canClobber(const SUnit *SU, const SUnit *Op) {
if (SU->isTwoAddress) {
unsigned Opc = SU->getNode()->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
@@ -1631,19 +2162,6 @@ RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) {
return false;
}
-/// hasCopyToRegUse - Return true if SU has a value successor that is a
-/// CopyToReg node.
-static bool hasCopyToRegUse(const SUnit *SU) {
- for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
- I != E; ++I) {
- if (I->isCtrl()) continue;
- const SUnit *SuccSU = I->getSUnit();
- if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg)
- return true;
- }
- return false;
-}
-
/// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's
/// physical register defs.
static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
@@ -1654,7 +2172,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
const unsigned *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs();
assert(ImpDefs && "Caller should check hasPhysRegDefs");
for (const SDNode *SUNode = SU->getNode(); SUNode;
- SUNode = SUNode->getFlaggedNode()) {
+ SUNode = SUNode->getGluedNode()) {
if (!SUNode->isMachineOpcode())
continue;
const unsigned *SUImpDefs =
@@ -1663,7 +2181,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
return false;
for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) {
EVT VT = N->getValueType(i);
- if (VT == MVT::Flag || VT == MVT::Other)
+ if (VT == MVT::Glue || VT == MVT::Other)
continue;
if (!N->hasAnyUseOfValue(i))
continue;
@@ -1709,8 +2227,7 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU,
/// after N, which shortens the U->N live range, reducing
/// register pressure.
///
-template<class SF>
-void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
+void RegReductionPQBase::PrescheduleNodesWithMultipleUses() {
// Visit all the nodes in topological order, working top-down.
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
@@ -1748,7 +2265,7 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
if (PredSU->NumSuccs == 1)
continue;
// Avoid prescheduling to copies from virtual registers, which don't behave
- // like other nodes from the perspective of scheduling // heuristics.
+ // like other nodes from the perspective of scheduling heuristics.
if (SDNode *N = SU->getNode())
if (N->getOpcode() == ISD::CopyFromReg &&
TargetRegisterInfo::isVirtualRegister
@@ -1802,17 +2319,17 @@ void RegReductionPriorityQueue<SF>::PrescheduleNodesWithMultipleUses() {
/// one that has a CopyToReg use (more likely to be a loop induction update).
/// If both are two-address, but one is commutable while the other is not
/// commutable, favor the one that's not commutable.
-template<class SF>
-void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
+void RegReductionPQBase::AddPseudoTwoAddrDeps() {
for (unsigned i = 0, e = SUnits->size(); i != e; ++i) {
SUnit *SU = &(*SUnits)[i];
if (!SU->isTwoAddress)
continue;
SDNode *Node = SU->getNode();
- if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode())
+ if (!Node || !Node->isMachineOpcode() || SU->getNode()->getGluedNode())
continue;
+ bool isLiveOut = hasOnlyLiveOutUses(SU);
unsigned Opc = Node->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
unsigned NumRes = TID.getNumDefs();
@@ -1862,7 +2379,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
SuccOpc == TargetOpcode::SUBREG_TO_REG)
continue;
if ((!canClobber(SuccSU, DUSU) ||
- (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) ||
+ (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) ||
(!SU->isCommutable && SuccSU->isCommutable)) &&
!scheduleDAG->IsReachable(SuccSU, SU)) {
DEBUG(dbgs() << " Adding a pseudo-two-addr edge from SU #"
@@ -1877,20 +2394,10 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() {
}
}
-/// CalculateSethiUllmanNumbers - Calculate Sethi-Ullman numbers of all
-/// scheduling units.
-template<class SF>
-void RegReductionPriorityQueue<SF>::CalculateSethiUllmanNumbers() {
- SethiUllmanNumbers.assign(SUnits->size(), 0);
-
- for (unsigned i = 0, e = SUnits->size(); i != e; ++i)
- CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
-}
-
/// LimitedSumOfUnscheduledPredsOfSuccs - Compute the sum of the unscheduled
/// predecessors of the successors of the SUnit SU. Stop when the provided
/// limit is exceeded.
-static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
+static unsigned LimitedSumOfUnscheduledPredsOfSuccs(const SUnit *SU,
unsigned Limit) {
unsigned Sum = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
@@ -1942,7 +2449,7 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
if (left->NumSuccsLeft != right->NumSuccsLeft)
return left->NumSuccsLeft > right->NumSuccsLeft;
- assert(left->NodeQueueId && right->NodeQueueId &&
+ assert(left->NodeQueueId && right->NodeQueueId &&
"NodeQueueId cannot be zero");
return (left->NodeQueueId > right->NodeQueueId);
}
@@ -1952,68 +2459,74 @@ bool td_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
//===----------------------------------------------------------------------===//
llvm::ScheduleDAGSDNodes *
-llvm::createBURRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createBURRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
BURegReductionPriorityQueue *PQ =
new BURegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createTDRRListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
TDRegReductionPriorityQueue *PQ =
new TDRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createSourceListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
-
+
SrcRegReductionPriorityQueue *PQ =
new SrcRegReductionPriorityQueue(*IS->MF, false, TII, TRI, 0);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, false, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createHybridListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const TargetLowering *TLI = &IS->getTargetLowering();
-
+
HybridBURRPriorityQueue *PQ =
new HybridBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
llvm::ScheduleDAGSDNodes *
-llvm::createILPListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level) {
+llvm::createILPListDAGScheduler(SelectionDAGISel *IS,
+ CodeGenOpt::Level OptLevel) {
const TargetMachine &TM = IS->TM;
const TargetInstrInfo *TII = TM.getInstrInfo();
const TargetRegisterInfo *TRI = TM.getRegisterInfo();
const TargetLowering *TLI = &IS->getTargetLowering();
-
+
ILPBURRPriorityQueue *PQ =
new ILPBURRPriorityQueue(*IS->MF, true, TII, TRI, TLI);
- ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, true, PQ);
+ ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, true, PQ, OptLevel);
PQ->setScheduleDAG(SD);
- return SD;
+ return SD;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index f1bf82ab145a..477c1ffe65d3 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -34,8 +34,8 @@ using namespace llvm;
STATISTIC(LoadsClustered, "Number of loads clustered together");
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
- : ScheduleDAG(mf) {
-}
+ : ScheduleDAG(mf),
+ InstrItins(mf.getTarget().getInstrItineraryData()) {}
/// Run - perform scheduling.
///
@@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
SUnit *SU = NewSUnit(Old->getNode());
SU->OrigNode = Old->OrigNode;
SU->Latency = Old->Latency;
+ SU->isCall = Old->isCall;
SU->isTwoAddress = Old->isTwoAddress;
SU->isCommutable = Old->isCommutable;
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
@@ -85,7 +86,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
/// a specified operand is a physical register dependency. If so, returns the
/// register and the cost of copying the register.
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
- const TargetRegisterInfo *TRI,
+ const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII,
unsigned &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
@@ -108,29 +109,28 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
}
}
-static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
- SelectionDAG *DAG) {
+static void AddGlue(SDNode *N, SDValue Glue, bool AddGlue, SelectionDAG *DAG) {
SmallVector<EVT, 4> VTs;
- SDNode *FlagDestNode = Flag.getNode();
+ SDNode *GlueDestNode = Glue.getNode();
- // Don't add a flag from a node to itself.
- if (FlagDestNode == N) return;
+ // Don't add glue from a node to itself.
+ if (GlueDestNode == N) return;
- // Don't add a flag to something which already has a flag.
- if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return;
+ // Don't add glue to something which already has glue.
+ if (N->getValueType(N->getNumValues() - 1) == MVT::Glue) return;
for (unsigned I = 0, E = N->getNumValues(); I != E; ++I)
VTs.push_back(N->getValueType(I));
- if (AddFlag)
- VTs.push_back(MVT::Flag);
+ if (AddGlue)
+ VTs.push_back(MVT::Glue);
SmallVector<SDValue, 4> Ops;
for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I)
Ops.push_back(N->getOperand(I));
- if (FlagDestNode)
- Ops.push_back(Flag);
+ if (GlueDestNode)
+ Ops.push_back(Glue);
SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size());
MachineSDNode::mmo_iterator Begin = 0, End = 0;
@@ -149,9 +149,9 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag,
MN->setMemRefs(Begin, End);
}
-/// ClusterNeighboringLoads - Force nearby loads together by "flagging" them.
+/// ClusterNeighboringLoads - Force nearby loads together by "gluing" them.
/// This function finds loads of the same base and different offsets. If the
-/// offsets are not far apart (target specific), it add MVT::Flag inputs and
+/// offsets are not far apart (target specific), it add MVT::Glue inputs and
/// outputs to ensure they are scheduled together and in order. This
/// optimization may benefit some targets by improving cache locality.
void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
@@ -213,20 +213,20 @@ void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) {
if (NumLoads == 0)
return;
- // Cluster loads by adding MVT::Flag outputs and inputs. This also
+ // Cluster loads by adding MVT::Glue outputs and inputs. This also
// ensure they are scheduled in order of increasing addresses.
SDNode *Lead = Loads[0];
- AddFlags(Lead, SDValue(0, 0), true, DAG);
+ AddGlue(Lead, SDValue(0, 0), true, DAG);
- SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1);
+ SDValue InGlue = SDValue(Lead, Lead->getNumValues() - 1);
for (unsigned I = 1, E = Loads.size(); I != E; ++I) {
- bool OutFlag = I < E - 1;
+ bool OutGlue = I < E - 1;
SDNode *Load = Loads[I];
- AddFlags(Load, InFlag, OutFlag, DAG);
+ AddGlue(Load, InGlue, OutGlue, DAG);
- if (OutFlag)
- InFlag = SDValue(Load, Load->getNumValues() - 1);
+ if (OutGlue)
+ InGlue = SDValue(Load, Load->getNumValues() - 1);
++LoadsClustered;
}
@@ -266,68 +266,75 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// FIXME: Multiply by 2 because we may clone nodes during scheduling.
// This is a temporary workaround.
SUnits.reserve(NumNodes * 2);
-
+
// Add all nodes in depth first order.
SmallVector<SDNode*, 64> Worklist;
SmallPtrSet<SDNode*, 64> Visited;
Worklist.push_back(DAG->getRoot().getNode());
Visited.insert(DAG->getRoot().getNode());
-
+
while (!Worklist.empty()) {
SDNode *NI = Worklist.pop_back_val();
-
+
// Add all operands to the worklist unless they've already been added.
for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
if (Visited.insert(NI->getOperand(i).getNode()))
Worklist.push_back(NI->getOperand(i).getNode());
-
+
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
continue;
-
+
// If this node has already been processed, stop now.
if (NI->getNodeId() != -1) continue;
-
+
SUnit *NodeSUnit = NewSUnit(NI);
-
- // See if anything is flagged to this node, if so, add them to flagged
- // nodes. Nodes can have at most one flag input and one flag output. Flags
- // are required to be the last operand and result of a node.
-
- // Scan up to find flagged preds.
+
+ // See if anything is glued to this node, if so, add them to glued
+ // nodes. Nodes can have at most one glue input and one glue output. Glue
+ // is required to be the last operand and result of a node.
+
+ // Scan up to find glued preds.
SDNode *N = NI;
while (N->getNumOperands() &&
- N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag) {
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue) {
N = N->getOperand(N->getNumOperands()-1).getNode();
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
}
-
- // Scan down to find any flagged succs.
+
+ // Scan down to find any glued succs.
N = NI;
- while (N->getValueType(N->getNumValues()-1) == MVT::Flag) {
- SDValue FlagVal(N, N->getNumValues()-1);
-
- // There are either zero or one users of the Flag result.
- bool HasFlagUse = false;
- for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
+ while (N->getValueType(N->getNumValues()-1) == MVT::Glue) {
+ SDValue GlueVal(N, N->getNumValues()-1);
+
+ // There are either zero or one users of the Glue result.
+ bool HasGlueUse = false;
+ for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end();
UI != E; ++UI)
- if (FlagVal.isOperandOf(*UI)) {
- HasFlagUse = true;
+ if (GlueVal.isOperandOf(*UI)) {
+ HasGlueUse = true;
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
N = *UI;
+ if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
+ NodeSUnit->isCall = true;
break;
}
- if (!HasFlagUse) break;
+ if (!HasGlueUse) break;
}
-
- // If there are flag operands involved, N is now the bottom-most node
- // of the sequence of nodes that are flagged together.
+
+ // If there are glue operands involved, N is now the bottom-most node
+ // of the sequence of nodes that are glued together.
// Update the SUnit.
NodeSUnit->setNode(N);
assert(N->getNodeId() == -1 && "Node already inserted!");
N->setNodeId(NodeSUnit->NodeNum);
+ // Compute NumRegDefsLeft. This must be done before AddSchedEdges.
+ InitNumRegDefsLeft(NodeSUnit);
+
// Assign the Latency field of NodeSUnit using target-provided information.
ComputeLatency(NodeSUnit);
}
@@ -343,7 +350,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
for (unsigned su = 0, e = SUnits.size(); su != e; ++su) {
SUnit *SU = &SUnits[su];
SDNode *MainNode = SU->getNode();
-
+
if (MainNode->isMachineOpcode()) {
unsigned Opc = MainNode->getMachineOpcode();
const TargetInstrDesc &TID = TII->get(Opc);
@@ -356,9 +363,9 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (TID.isCommutable())
SU->isCommutable = true;
}
-
+
// Find all predecessors and successors of the group.
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) {
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode()) {
if (N->isMachineOpcode() &&
TII->get(N->getMachineOpcode()).getImplicitDefs()) {
SU->hasPhysRegClobbers = true;
@@ -368,7 +375,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (NumUsed > TII->get(N->getMachineOpcode()).getNumDefs())
SU->hasPhysRegDefs = true;
}
-
+
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
SDNode *OpN = N->getOperand(i).getNode();
if (isPassiveNode(OpN)) continue; // Not scheduled.
@@ -377,7 +384,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
if (OpSU == SU) continue; // In the same group.
EVT OpVT = N->getOperand(i).getValueType();
- assert(OpVT != MVT::Flag && "Flagged nodes should be in same sunit!");
+ assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
bool isChain = OpVT == MVT::Other;
unsigned PhysReg = 0;
@@ -403,7 +410,13 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep));
}
- SU->addPred(dep);
+ if (!SU->addPred(dep) && !dep.isCtrl() && OpSU->NumRegDefsLeft > 0) {
+ // Multiple register uses are combined in the same SUnit. For example,
+ // we could have a set of glued nodes with all their defs consumed by
+ // another set of glued nodes. Register pressure tracking sees this as
+ // a single use, so to keep pressure balanced we reduce the defs.
+ --OpSU->NumRegDefsLeft;
+ }
}
}
}
@@ -412,7 +425,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
-/// flagged together nodes with a single SUnit.
+/// glued together nodes with a single SUnit.
void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
// Cluster certain nodes which should be scheduled together.
ClusterNodes();
@@ -422,6 +435,69 @@ void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) {
AddSchedEdges();
}
+// Initialize NumNodeDefs for the current Node's opcode.
+void ScheduleDAGSDNodes::RegDefIter::InitNodeNumDefs() {
+ if (!Node->isMachineOpcode()) {
+ if (Node->getOpcode() == ISD::CopyFromReg)
+ NodeNumDefs = 1;
+ else
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned POpc = Node->getMachineOpcode();
+ if (POpc == TargetOpcode::IMPLICIT_DEF) {
+ // No register need be allocated for this.
+ NodeNumDefs = 0;
+ return;
+ }
+ unsigned NRegDefs = SchedDAG->TII->get(Node->getMachineOpcode()).getNumDefs();
+ // Some instructions define regs that are not represented in the selection DAG
+ // (e.g. unused flags). See tMOVi8. Make sure we don't access past NumValues.
+ NodeNumDefs = std::min(Node->getNumValues(), NRegDefs);
+ DefIdx = 0;
+}
+
+// Construct a RegDefIter for this SUnit and find the first valid value.
+ScheduleDAGSDNodes::RegDefIter::RegDefIter(const SUnit *SU,
+ const ScheduleDAGSDNodes *SD)
+ : SchedDAG(SD), Node(SU->getNode()), DefIdx(0), NodeNumDefs(0) {
+ InitNodeNumDefs();
+ Advance();
+}
+
+// Advance to the next valid value defined by the SUnit.
+void ScheduleDAGSDNodes::RegDefIter::Advance() {
+ for (;Node;) { // Visit all glued nodes.
+ for (;DefIdx < NodeNumDefs; ++DefIdx) {
+ if (!Node->hasAnyUseOfValue(DefIdx))
+ continue;
+ if (Node->isMachineOpcode() &&
+ Node->getMachineOpcode() == TargetOpcode::EXTRACT_SUBREG) {
+ // Propagate the incoming (full-register) type. I doubt it's needed.
+ ValueType = Node->getOperand(0).getValueType();
+ }
+ else {
+ ValueType = Node->getValueType(DefIdx);
+ }
+ ++DefIdx;
+ return; // Found a normal regdef.
+ }
+ Node = Node->getGluedNode();
+ if (Node == NULL) {
+ return; // No values left to visit.
+ }
+ InitNodeNumDefs();
+ }
+}
+
+void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
+ assert(SU->NumRegDefsLeft == 0 && "expect a new node");
+ for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
+ assert(SU->NumRegDefsLeft < USHRT_MAX && "overflow is ok but unexpected");
+ ++SU->NumRegDefsLeft;
+ }
+}
+
void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
// Check to see if the scheduler cares about latencies.
if (ForceUnitLatencies()) {
@@ -429,20 +505,17 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
return;
}
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty()) {
+ if (!InstrItins || InstrItins->isEmpty()) {
SU->Latency = 1;
return;
}
-
+
// Compute the latency for the node. We use the sum of the latencies for
- // all nodes flagged together into this SUnit.
+ // all nodes glued together into this SUnit.
SU->Latency = 0;
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
- if (N->isMachineOpcode()) {
- SU->Latency += InstrItins.
- getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
- }
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ if (N->isMachineOpcode())
+ SU->Latency += TII->getInstrLatency(InstrItins, N);
}
void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
@@ -451,32 +524,25 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
if (ForceUnitLatencies())
return;
- const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
- if (InstrItins.isEmpty())
- return;
-
if (dep.getKind() != SDep::Data)
return;
unsigned DefIdx = Use->getOperand(OpIdx).getResNo();
- if (Def->isMachineOpcode()) {
- const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
- if (DefIdx >= II.getNumDefs())
- return;
- int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
- if (DefCycle < 0)
- return;
- int UseCycle = 1;
- if (Use->isMachineOpcode()) {
- const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
- UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
- }
- if (UseCycle >= 0) {
- int Latency = DefCycle - UseCycle + 1;
- if (Latency >= 0)
- dep.setLatency(Latency);
- }
+ if (Use->isMachineOpcode())
+ // Adjust the use operand index by num of defs.
+ OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs();
+ int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx);
+ if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg &&
+ !BB->succ_empty()) {
+ unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg();
+ if (TargetRegisterInfo::isVirtualRegister(Reg))
+ // This copy is a liveout value. It is likely coalesced, so reduce the
+ // latency so not to penalize the def.
+ // FIXME: need target specific adjustment here?
+ Latency = (Latency > 1) ? Latency - 1 : 1;
}
+ if (Latency >= 0)
+ dep.setLatency(Latency);
}
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
@@ -487,14 +553,14 @@ void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
SU->getNode()->dump(DAG);
dbgs() << "\n";
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
dbgs() << " ";
- FlaggedNodes.back()->dump(DAG);
+ GluedNodes.back()->dump(DAG);
dbgs() << "\n";
- FlaggedNodes.pop_back();
+ GluedNodes.pop_back();
}
}
@@ -507,37 +573,25 @@ namespace {
};
}
-// ProcessSourceNode - Process nodes with source order numbers. These are added
-// to a vector which EmitSchedule uses to determine how to insert dbg_value
-// instructions in the right order.
-static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
- InstrEmitter &Emitter,
- DenseMap<SDValue, unsigned> &VRBaseMap,
+/// ProcessSDDbgValues - Process SDDbgValues assoicated with this node.
+static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
- SmallSet<unsigned, 8> &Seen) {
- unsigned Order = DAG->GetOrdering(N);
- if (!Order || !Seen.insert(Order))
- return;
-
- MachineBasicBlock *BB = Emitter.getBlock();
- if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
- // Did not insert any instruction.
- Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
- return;
- }
-
- Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ unsigned Order) {
if (!N->getHasDebugValue())
return;
+
// Opportunistically insert immediate dbg_value uses, i.e. those with source
// order number right after the N.
+ MachineBasicBlock *BB = Emitter.getBlock();
MachineBasicBlock::iterator InsertPos = Emitter.getInsertPos();
SmallVector<SDDbgValue*,2> &DVs = DAG->GetDbgValues(N);
for (unsigned i = 0, e = DVs.size(); i != e; ++i) {
if (DVs[i]->isInvalidated())
continue;
unsigned DVOrder = DVs[i]->getOrder();
- if (DVOrder == ++Order) {
+ if (!Order || DVOrder == ++Order) {
MachineInstr *DbgMI = Emitter.EmitDbgValue(DVs[i], VRBaseMap);
if (DbgMI) {
Orders.push_back(std::make_pair(DVOrder, DbgMI));
@@ -548,6 +602,33 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
}
}
+// ProcessSourceNode - Process nodes with source order numbers. These are added
+// to a vector which EmitSchedule uses to determine how to insert dbg_value
+// instructions in the right order.
+static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG,
+ InstrEmitter &Emitter,
+ DenseMap<SDValue, unsigned> &VRBaseMap,
+ SmallVector<std::pair<unsigned, MachineInstr*>, 32> &Orders,
+ SmallSet<unsigned, 8> &Seen) {
+ unsigned Order = DAG->GetOrdering(N);
+ if (!Order || !Seen.insert(Order)) {
+ // Process any valid SDDbgValues even if node does not have any order
+ // assigned.
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, 0);
+ return;
+ }
+
+ MachineBasicBlock *BB = Emitter.getBlock();
+ if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) {
+ // Did not insert any instruction.
+ Orders.push_back(std::make_pair(Order, (MachineInstr*)0));
+ return;
+ }
+
+ Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos())));
+ ProcessSDDbgValues(N, DAG, Emitter, Orders, VRBaseMap, Order);
+}
+
/// EmitSchedule - Emit the machine code in scheduled order.
MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
@@ -578,25 +659,25 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
}
// For pre-regalloc scheduling, create instructions corresponding to the
- // SDNode and any flagged SDNodes and append them to the block.
+ // SDNode and any glued SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
EmitPhysRegCopy(SU, CopyVRBaseMap);
continue;
}
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode()->getFlaggedNode(); N;
- N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
- SDNode *N = FlaggedNodes.back();
- Emitter.EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, SU->isCloned,
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode()->getGluedNode(); N;
+ N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
+ SDNode *N = GluedNodes.back();
+ Emitter.EmitNode(GluedNodes.back(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
// Remember the source order of the inserted instruction.
if (HasDbg)
ProcessSourceNode(N, DAG, Emitter, VRBaseMap, Orders, Seen);
- FlaggedNodes.pop_back();
+ GluedNodes.pop_back();
}
Emitter.EmitNode(SU->getNode(), SU->OrigNode != SU, SU->isCloned,
VRBaseMap);
@@ -625,16 +706,8 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// Insert all SDDbgValue's whose order(s) are before "Order".
if (!MI)
continue;
-#ifndef NDEBUG
- unsigned LastDIOrder = 0;
-#endif
for (; DI != DE &&
(*DI)->getOrder() >= LastOrder && (*DI)->getOrder() < Order; ++DI) {
-#ifndef NDEBUG
- assert((*DI)->getOrder() >= LastDIOrder &&
- "SDDbgValue nodes must be in source order!");
- LastDIOrder = (*DI)->getOrder();
-#endif
if ((*DI)->isInvalidated())
continue;
MachineInstr *DbgMI = Emitter.EmitDbgValue(*DI, VRBaseMap);
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
index 842fc8c72703..cc7310e4ca42 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h
@@ -20,13 +20,13 @@
namespace llvm {
/// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs.
- ///
+ ///
/// Edges between SUnits are initially based on edges in the SelectionDAG,
/// and additional edges can be added by the schedulers as heuristics.
/// SDNodes such as Constants, Registers, and a few others that are not
/// interesting to schedulers are not allocated SUnits.
///
- /// SDNodes with MVT::Flag operands are grouped along with the flagged
+ /// SDNodes with MVT::Glue operands are grouped along with the flagged
/// nodes into a single SUnit so that they are scheduled together.
///
/// SDNode-based scheduling graphs do not use SDep::Anti or SDep::Output
@@ -36,6 +36,7 @@ namespace llvm {
class ScheduleDAGSDNodes : public ScheduleDAG {
public:
SelectionDAG *DAG; // DAG of the current basic block
+ const InstrItineraryData *InstrItins;
explicit ScheduleDAGSDNodes(MachineFunction &mf);
@@ -72,13 +73,17 @@ namespace llvm {
/// predecessors / successors info nor the temporary scheduling states.
///
SUnit *Clone(SUnit *N);
-
+
/// BuildSchedGraph - Build the SUnit graph from the selection dag that we
/// are input. This SUnit graph is similar to the SelectionDAG, but
/// excludes nodes that aren't interesting to scheduling, and represents
/// flagged together nodes with a single SUnit.
virtual void BuildSchedGraph(AliasAnalysis *AA);
+ /// InitNumRegDefsLeft - Determine the # of regs defined by this node.
+ ///
+ void InitNumRegDefsLeft(SUnit *SU);
+
/// ComputeLatency - Compute node latency.
///
virtual void ComputeLatency(SUnit *SU);
@@ -105,6 +110,30 @@ namespace llvm {
virtual void getCustomGraphFeatures(GraphWriter<ScheduleDAG*> &GW) const;
+ /// RegDefIter - In place iteration over the values defined by an
+ /// SUnit. This does not need copies of the iterator or any other STLisms.
+ /// The iterator creates itself, rather than being provided by the SchedDAG.
+ class RegDefIter {
+ const ScheduleDAGSDNodes *SchedDAG;
+ const SDNode *Node;
+ unsigned DefIdx;
+ unsigned NodeNumDefs;
+ EVT ValueType;
+ public:
+ RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD);
+
+ bool IsValid() const { return Node != NULL; }
+
+ EVT GetValue() const {
+ assert(IsValid() && "bad iterator");
+ return ValueType;
+ }
+
+ void Advance();
+ private:
+ void InitNodeNumDefs();
+ };
+
private:
/// ClusterNeighboringLoads - Cluster loads from "near" addresses into
/// combined SUnits.
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index ad06ebda5b00..2fb2f2d8aa1e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -31,7 +31,6 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetSelectionDAGInfo.h"
#include "llvm/Target/TargetOptions.h"
@@ -44,7 +43,7 @@
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/System/Mutex.h"
+#include "llvm/Support/Mutex.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallSet.h"
@@ -111,7 +110,7 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT,
/// BUILD_VECTOR where all of the elements are ~0 or undef.
bool ISD::isBuildVectorAllOnes(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BIT_CONVERT)
+ if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -152,7 +151,7 @@ bool ISD::isBuildVectorAllOnes(const SDNode *N) {
/// BUILD_VECTOR where all of the elements are 0 or undef.
bool ISD::isBuildVectorAllZeros(const SDNode *N) {
// Look through a bit convert.
- if (N->getOpcode() == ISD::BIT_CONVERT)
+ if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0).getNode();
if (N->getOpcode() != ISD::BUILD_VECTOR) return false;
@@ -199,6 +198,8 @@ bool ISD::isScalarToVector(const SDNode *N) {
if (N->getOperand(0).getOpcode() == ISD::UNDEF)
return false;
unsigned NumElems = N->getNumOperands();
+ if (NumElems == 1)
+ return false;
for (unsigned i = 1; i < NumElems; ++i) {
SDValue V = N->getOperand(i);
if (V.getOpcode() != ISD::UNDEF)
@@ -489,7 +490,7 @@ encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
/// doNotCSE - Return true if CSE should not be performed for this node.
static bool doNotCSE(SDNode *N) {
- if (N->getValueType(0) == MVT::Flag)
+ if (N->getValueType(0) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
switch (N->getOpcode()) {
@@ -501,7 +502,7 @@ static bool doNotCSE(SDNode *N) {
// Check that remaining values produced are not flags.
for (unsigned i = 1, e = N->getNumValues(); i != e; ++i)
- if (N->getValueType(i) == MVT::Flag)
+ if (N->getValueType(i) == MVT::Glue)
return true; // Never CSE anything that produces a flag.
return false;
@@ -609,9 +610,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
bool Erased = false;
switch (N->getOpcode()) {
- case ISD::EntryToken:
- llvm_unreachable("EntryToken should not be in CSEMaps!");
- return false;
case ISD::HANDLENODE: return false; // noop.
case ISD::CONDCODE:
assert(CondCodeNodes[cast<CondCodeSDNode>(N)->get()] &&
@@ -641,6 +639,8 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
}
default:
// Remove it from the CSE Map.
+ assert(N->getOpcode() != ISD::DELETED_NODE && "DELETED_NODE in CSEMap!");
+ assert(N->getOpcode() != ISD::EntryToken && "EntryToken in CSEMap!");
Erased = CSEMap.RemoveNode(N);
break;
}
@@ -648,7 +648,7 @@ bool SelectionDAG::RemoveNodeFromCSEMaps(SDNode *N) {
// Verify that the node was actually in one of the CSE maps, unless it has a
// flag result (which cannot be CSE'd) or is one of the special cases that are
// not subject to CSE.
- if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Flag &&
+ if (!Erased && N->getValueType(N->getNumValues()-1) != MVT::Glue &&
!N->isMachineOpcode() && !doNotCSE(N)) {
N->dump(this);
dbgs() << "\n";
@@ -743,8 +743,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
return Node;
}
-/// VerifyNode - Sanity check the given node. Aborts if it is invalid.
-void SelectionDAG::VerifyNode(SDNode *N) {
+#ifndef NDEBUG
+/// VerifyNodeCommon - Sanity check the given node. Aborts if it is invalid.
+static void VerifyNodeCommon(SDNode *N) {
switch (N->getOpcode()) {
default:
break;
@@ -778,6 +779,44 @@ void SelectionDAG::VerifyNode(SDNode *N) {
}
}
+/// VerifySDNode - Sanity check the given SDNode. Aborts if it is invalid.
+static void VerifySDNode(SDNode *N) {
+ // The SDNode allocators cannot be used to allocate nodes with fields that are
+ // not present in an SDNode!
+ assert(!isa<MemSDNode>(N) && "Bad MemSDNode!");
+ assert(!isa<ShuffleVectorSDNode>(N) && "Bad ShuffleVectorSDNode!");
+ assert(!isa<ConstantSDNode>(N) && "Bad ConstantSDNode!");
+ assert(!isa<ConstantFPSDNode>(N) && "Bad ConstantFPSDNode!");
+ assert(!isa<GlobalAddressSDNode>(N) && "Bad GlobalAddressSDNode!");
+ assert(!isa<FrameIndexSDNode>(N) && "Bad FrameIndexSDNode!");
+ assert(!isa<JumpTableSDNode>(N) && "Bad JumpTableSDNode!");
+ assert(!isa<ConstantPoolSDNode>(N) && "Bad ConstantPoolSDNode!");
+ assert(!isa<BasicBlockSDNode>(N) && "Bad BasicBlockSDNode!");
+ assert(!isa<SrcValueSDNode>(N) && "Bad SrcValueSDNode!");
+ assert(!isa<MDNodeSDNode>(N) && "Bad MDNodeSDNode!");
+ assert(!isa<RegisterSDNode>(N) && "Bad RegisterSDNode!");
+ assert(!isa<BlockAddressSDNode>(N) && "Bad BlockAddressSDNode!");
+ assert(!isa<EHLabelSDNode>(N) && "Bad EHLabelSDNode!");
+ assert(!isa<ExternalSymbolSDNode>(N) && "Bad ExternalSymbolSDNode!");
+ assert(!isa<CondCodeSDNode>(N) && "Bad CondCodeSDNode!");
+ assert(!isa<CvtRndSatSDNode>(N) && "Bad CvtRndSatSDNode!");
+ assert(!isa<VTSDNode>(N) && "Bad VTSDNode!");
+ assert(!isa<MachineSDNode>(N) && "Bad MachineSDNode!");
+
+ VerifyNodeCommon(N);
+}
+
+/// VerifyMachineNode - Sanity check the given MachineNode. Aborts if it is
+/// invalid.
+static void VerifyMachineNode(SDNode *N) {
+ // The MachineNode allocators cannot be used to allocate nodes with fields
+ // that are not present in a MachineNode!
+ // Currently there are no such nodes.
+
+ VerifyNodeCommon(N);
+}
+#endif // NDEBUG
+
/// getEVTAlignment - Compute the default alignment value for the
/// given type.
///
@@ -1315,7 +1354,7 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) {
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
-
+
SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -1365,11 +1404,11 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::MDNODE_SDNODE, getVTList(MVT::Other), 0, 0);
ID.AddPointer(MD);
-
+
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
-
+
SDNode *N = new (NodeAllocator) MDNodeSDNode(MD);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -1613,7 +1652,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// Also compute a conserative estimate for high known-0 bits.
// More trickiness is possible, but this is sufficient for the
// interesting case of alignment computation.
- KnownOne.clear();
+ KnownOne.clearAllBits();
unsigned TrailZ = KnownZero.countTrailingOnes() +
KnownZero2.countTrailingOnes();
unsigned LeadZ = std::max(KnownZero.countLeadingOnes() +
@@ -1636,8 +1675,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
AllOnes, KnownZero2, KnownOne2, Depth+1);
unsigned LeadZ = KnownZero2.countLeadingOnes();
- KnownOne2.clear();
- KnownZero2.clear();
+ KnownOne2.clearAllBits();
+ KnownZero2.clearAllBits();
ComputeMaskedBits(Op.getOperand(1),
AllOnes, KnownZero2, KnownOne2, Depth+1);
unsigned RHSUnknownLeadingOnes = KnownOne2.countLeadingZeros();
@@ -1765,7 +1804,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// If the sign extended bits are demanded, we know that the sign
// bit is demanded.
- InSignBit.zext(BitWidth);
+ InSignBit = InSignBit.zext(BitWidth);
if (NewBits.getBoolValue())
InputDemandedBits |= InSignBit;
@@ -1792,7 +1831,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::CTPOP: {
unsigned LowBits = Log2_32(BitWidth)+1;
KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits);
- KnownOne.clear();
+ KnownOne.clearAllBits();
return;
}
case ISD::LOAD: {
@@ -1808,13 +1847,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask;
- InMask.trunc(InBits);
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
return;
}
@@ -1823,16 +1861,15 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InSignBit = APInt::getSignBit(InBits);
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
- APInt InMask = Mask;
- InMask.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
// If any of the sign extended bits are demanded, we know that the sign
// bit is demanded. Temporarily set this bit in the mask for our callee.
if (NewBits.getBoolValue())
InMask |= InSignBit;
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
// Note if the sign bit is known to be zero or one.
@@ -1844,13 +1881,12 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
// If the sign bit wasn't actually demanded by our caller, we don't
// want it set in the KnownZero and KnownOne result values. Reset the
// mask and reapply it to the result values.
- InMask = Mask;
- InMask.trunc(InBits);
+ InMask = Mask.trunc(InBits);
KnownZero &= InMask;
KnownOne &= InMask;
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
// If the sign bit is known zero or one, the top bits match.
if (SignBitKnownZero)
@@ -1862,26 +1898,24 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask;
- InMask.trunc(InBits);
- KnownZero.trunc(InBits);
- KnownOne.trunc(InBits);
+ APInt InMask = Mask.trunc(InBits);
+ KnownZero = KnownZero.trunc(InBits);
+ KnownOne = KnownOne.trunc(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
return;
}
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
unsigned InBits = InVT.getScalarType().getSizeInBits();
- APInt InMask = Mask;
- InMask.zext(InBits);
- KnownZero.zext(InBits);
- KnownOne.zext(InBits);
+ APInt InMask = Mask.zext(InBits);
+ KnownZero = KnownZero.zext(InBits);
+ KnownOne = KnownOne.zext(InBits);
ComputeMaskedBits(Op.getOperand(0), InMask, KnownZero, KnownOne, Depth+1);
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
break;
}
case ISD::AssertZext: {
@@ -1921,7 +1955,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
}
}
// fall through
- case ISD::ADD: {
+ case ISD::ADD:
+ case ISD::ADDE: {
// Output known-0 bits are known if clear or set in both the low clear bits
// common to both LHS & RHS. For example, 8+(X<<3) is known to have the
// low 3 bits clear.
@@ -1936,7 +1971,17 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
KnownZeroOut = std::min(KnownZeroOut,
KnownZero2.countTrailingOnes());
- KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ if (Op.getOpcode() == ISD::ADD) {
+ KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut);
+ return;
+ }
+
+ // With ADDE, a carry bit may be added in, so we can only use this
+ // information if we know (at least) that the low two bits are clear. We
+ // then return to the caller that the low bit is unknown but that other bits
+ // are known zero.
+ if (KnownZeroOut >= 2) // ADDE
+ KnownZero |= APInt::getBitsSet(BitWidth, 1, KnownZeroOut);
return;
}
case ISD::SREM:
@@ -1991,10 +2036,19 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
uint32_t Leaders = std::max(KnownZero.countLeadingOnes(),
KnownZero2.countLeadingOnes());
- KnownOne.clear();
+ KnownOne.clearAllBits();
KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask;
return;
}
+ case ISD::FrameIndex:
+ case ISD::TargetFrameIndex:
+ if (unsigned Align = InferPtrAlignment(Op)) {
+ // The low bits are known zero if the pointer is aligned.
+ KnownZero = APInt::getLowBitsSet(BitWidth, Log2_32(Align));
+ return;
+ }
+ break;
+
default:
// Allow the target to implement this method for its nodes.
if (Op.getOpcode() >= ISD::BUILTIN_OP_END) {
@@ -2234,6 +2288,25 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
return std::max(FirstAnswer, std::min(VTBits, Mask.countLeadingZeros()));
}
+/// isBaseWithConstantOffset - Return true if the specified operand is an
+/// ISD::ADD with a ConstantSDNode on the right-hand side, or if it is an
+/// ISD::OR with a ConstantSDNode that is guaranteed to have the same
+/// semantics as an ADD. This handles the equivalence:
+/// X|Cst == X+Cst iff X&Cst = 0.
+bool SelectionDAG::isBaseWithConstantOffset(SDValue Op) const {
+ if ((Op.getOpcode() != ISD::ADD && Op.getOpcode() != ISD::OR) ||
+ !isa<ConstantSDNode>(Op.getOperand(1)))
+ return false;
+
+ if (Op.getOpcode() == ISD::OR &&
+ !MaskedValueIsZero(Op.getOperand(0),
+ cast<ConstantSDNode>(Op.getOperand(1))->getAPIntValue()))
+ return false;
+
+ return true;
+}
+
+
bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
// If we're told that NaNs won't happen, assume they won't.
if (NoNaNsFPMath)
@@ -2295,7 +2368,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -2308,23 +2381,22 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
switch (Opcode) {
default: break;
case ISD::SIGN_EXTEND:
- return getConstant(APInt(Val).sextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.sextOrTrunc(VT.getSizeInBits()), VT);
case ISD::ANY_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::TRUNCATE:
- return getConstant(APInt(Val).zextOrTrunc(VT.getSizeInBits()), VT);
+ return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT);
case ISD::UINT_TO_FP:
case ISD::SINT_TO_FP: {
- const uint64_t zero[] = {0, 0};
// No compile time operations on ppcf128.
if (VT == MVT::ppcf128) break;
- APFloat apf = APFloat(APInt(VT.getSizeInBits(), 2, zero));
+ APFloat apf(APInt::getNullValue(VT.getSizeInBits()));
(void)apf.convertFromAPInt(Val,
Opcode==ISD::SINT_TO_FP,
APFloat::rmNearestTiesToEven);
return getConstantFP(apf, VT);
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
if (VT == MVT::f32 && C->getValueType(0) == MVT::i32)
return getConstantFP(Val.bitsToFloat(), VT);
else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64)
@@ -2375,7 +2447,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
APInt api(VT.getSizeInBits(), 2, x);
return getConstant(api, VT);
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
if (VT == MVT::i32 && C->getValueType(0) == MVT::f32)
return getConstant((uint32_t)V.bitcastToAPInt().getZExtValue(), VT);
else if (VT == MVT::i64 && C->getValueType(0) == MVT::f64)
@@ -2477,13 +2549,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
return Operand.getNode()->getOperand(0);
}
break;
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
// Basic sanity checking.
assert(VT.getSizeInBits() == Operand.getValueType().getSizeInBits()
- && "Cannot BIT_CONVERT between types of different sizes!");
+ && "Cannot BITCAST between types of different sizes!");
if (VT == Operand.getValueType()) return Operand; // noop conversion.
- if (OpOpcode == ISD::BIT_CONVERT) // bitconv(bitconv(x)) -> bitconv(x)
- return getNode(ISD::BIT_CONVERT, DL, VT, Operand.getOperand(0));
+ if (OpOpcode == ISD::BITCAST) // bitconv(bitconv(x)) -> bitconv(x)
+ return getNode(ISD::BITCAST, DL, VT, Operand.getOperand(0));
if (OpOpcode == ISD::UNDEF)
return getUNDEF(VT);
break;
@@ -2519,7 +2591,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) { // Don't CSE flag producing nodes
+ if (VT != MVT::Glue) { // Don't CSE flag producing nodes
FoldingSetNodeID ID;
SDValue Ops[1] = { Operand };
AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
@@ -2535,7 +2607,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -2676,6 +2748,13 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
"Shift operators return type must be the same as their first arg");
assert(VT.isInteger() && N2.getValueType().isInteger() &&
"Shifts only work on integers");
+ // Verify that the shift amount VT is bit enough to hold valid shift
+ // amounts. This catches things like trying to shift an i1024 value by an
+ // i8, which is easy to fall into in generic code that uses
+ // TLI.getShiftAmount().
+ assert(N2.getValueType().getSizeInBits() >=
+ Log2_32_Ceil(N1.getValueType().getSizeInBits()) &&
+ "Invalid use of small shift amount with oversized value!");
// Always fold shifts of i1 values so the code generator doesn't need to
// handle them. Since we know the size of the shift has to be less than the
@@ -2820,11 +2899,30 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
return getConstant(ShiftedVal.trunc(ElementSize), VT);
}
break;
- case ISD::EXTRACT_SUBVECTOR:
- if (N1.getValueType() == VT) // Trivial extraction.
- return N1;
+ case ISD::EXTRACT_SUBVECTOR: {
+ SDValue Index = N2;
+ if (VT.isSimple() && N1.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ "Extract subvector VTs must be a vectors!");
+ assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() &&
+ "Extract subvector VTs must have the same element type!");
+ assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Extract subvector must be from larger vector to smaller vector!");
+
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((VT.getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= N1.getValueType().getVectorNumElements())
+ && "Extract subvector overflow!");
+ }
+
+ // Trivial extraction.
+ if (VT.getSimpleVT() == N1.getValueType().getSimpleVT())
+ return N1;
+ }
break;
}
+ }
if (N1C) {
if (N2C) {
@@ -2961,7 +3059,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// Memoize this node if possible.
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
@@ -2977,7 +3075,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -3019,7 +3117,31 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
case ISD::VECTOR_SHUFFLE:
llvm_unreachable("should use getVectorShuffle constructor!");
break;
- case ISD::BIT_CONVERT:
+ case ISD::INSERT_SUBVECTOR: {
+ SDValue Index = N3;
+ if (VT.isSimple() && N1.getValueType().isSimple()
+ && N2.getValueType().isSimple()) {
+ assert(VT.isVector() && N1.getValueType().isVector() &&
+ N2.getValueType().isVector() &&
+ "Insert subvector VTs must be a vectors");
+ assert(VT == N1.getValueType() &&
+ "Dest and insert subvector source types must match!");
+ assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() &&
+ "Insert subvector must be from smaller vector to larger vector!");
+ if (isa<ConstantSDNode>(Index.getNode())) {
+ assert((N2.getValueType().getVectorNumElements() +
+ cast<ConstantSDNode>(Index.getNode())->getZExtValue()
+ <= VT.getVectorNumElements())
+ && "Insert subvector overflow!");
+ }
+
+ // Trivial insertion.
+ if (VT.getSimpleVT() == N2.getValueType().getSimpleVT())
+ return N2;
+ }
+ break;
+ }
+ case ISD::BITCAST:
// Fold bit_convert nodes from a type to themselves.
if (N1.getValueType() == VT)
return N1;
@@ -3029,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
// Memoize node if it doesn't produce a flag.
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
SDValue Ops[] = { N1, N2, N3 };
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
@@ -3045,7 +3167,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -3087,6 +3209,17 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
&ArgChains[0], ArgChains.size());
}
+/// SplatByte - Distribute ByteVal over NumBits bits.
+static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) {
+ APInt Val = APInt(NumBits, ByteVal);
+ unsigned Shift = 8;
+ for (unsigned i = NumBits; i > 8; i >>= 1) {
+ Val = (Val << Shift) | Val;
+ Shift <<= 1;
+ }
+ return Val;
+}
+
/// getMemsetValue - Vectorized representation of the memset value
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
@@ -3095,27 +3228,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
- APInt Val = APInt(NumBits, C->getZExtValue() & 255);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Val = (Val << Shift) | Val;
- Shift <<= 1;
- }
+ APInt Val = SplatByte(NumBits, C->getZExtValue() & 255);
if (VT.isInteger())
return DAG.getConstant(Val, VT);
return DAG.getConstantFP(APFloat(Val), VT);
}
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value);
- unsigned Shift = 8;
- for (unsigned i = NumBits; i > 8; i >>= 1) {
- Value = DAG.getNode(ISD::OR, dl, VT,
- DAG.getNode(ISD::SHL, dl, VT, Value,
- DAG.getConstant(Shift,
- TLI.getShiftAmountTy())),
- Value);
- Shift <<= 1;
+ if (NumBits > 8) {
+ // Use a multiplication with 0x010101... to extend the input to the
+ // required length.
+ APInt Magic = SplatByte(NumBits, 0x01);
+ Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT));
}
return Value;
@@ -3131,13 +3255,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG,
if (Str.empty()) {
if (VT.isInteger())
return DAG.getConstant(0, VT);
- else if (VT.getSimpleVT().SimpleTy == MVT::f32 ||
- VT.getSimpleVT().SimpleTy == MVT::f64)
+ else if (VT == MVT::f32 || VT == MVT::f64)
return DAG.getConstantFP(0.0, VT);
else if (VT.isVector()) {
unsigned NumElts = VT.getVectorNumElements();
MVT EltVT = (VT.getVectorElementType() == MVT::f32) ? MVT::i32 : MVT::i64;
- return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ return DAG.getNode(ISD::BITCAST, dl, VT,
DAG.getConstant(0, EVT::getVectorVT(*DAG.getContext(),
EltVT, NumElts)));
} else
@@ -3234,15 +3357,6 @@ static bool FindOptimalMemOpLowering(std::vector<EVT> &MemOps,
if (VT.bitsGT(LVT))
VT = LVT;
}
-
- // If we're optimizing for size, and there is a limit, bump the maximum number
- // of operations inserted down to 4. This is a wild guess that approximates
- // the size of a call to memcpy or memset (3 arguments + call).
- if (Limit != ~0U) {
- const Function *F = DAG.getMachineFunction().getFunction();
- if (F->hasFnAttr(Attribute::OptimizeForSize))
- Limit = 4;
- }
unsigned NumMemOps = 0;
while (Size != 0) {
@@ -3276,18 +3390,22 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Turn a memcpy of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
// Expand memcpy to a series of load and store ops if the size operand falls
// below a certain threshold.
+ // TODO: In the AlwaysInline case, if the size is big then generate a loop
+ // rather than maybe a humongous number of loads and stores.
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
@@ -3297,8 +3415,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
std::string Str;
bool CopyFromStr = isMemSrcFromString(Src, Str);
bool isZeroStr = CopyFromStr && Str.empty();
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy();
-
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(OptSize);
+
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
(isZeroStr ? 0 : SrcAlign),
@@ -3334,7 +3452,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = getMemsetStringVal(VT, dl, DAG, TLI, Str, SrcOff);
Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, Align);
+ DstPtrInfo.getWithOffset(DstOff), isVol,
+ false, Align);
} else {
// The type might not be legal for the target. This should only happen
// if the type is smaller than a legal type, as on PPC, so the right
@@ -3343,14 +3462,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
// FIXME does the case above also need this?
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT);
assert(NVT.bitsGE(VT));
- Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain,
+ Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, VT, isVol, false,
+ SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false,
MinAlign(SrcAlign, SrcOff));
Store = DAG.getTruncStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, VT, isVol, false,
- Align);
+ DstPtrInfo.getWithOffset(DstOff), VT, isVol,
+ false, Align);
}
OutChains.push_back(Store);
SrcOff += VTSize;
@@ -3366,8 +3485,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Turn a memmove of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
@@ -3377,14 +3496,16 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
unsigned SrcAlign = DAG.InferPtrAlignment(Src);
if (Align > SrcAlign)
SrcAlign = Align;
- unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove();
+ unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize);
if (!FindOptimalMemOpLowering(MemOps, Limit, Size,
(DstAlignCanChange ? 0 : Align),
@@ -3414,7 +3535,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Value = DAG.getLoad(VT, dl, Chain,
getMemBasePlusOffset(Src, SrcOff, DAG),
- SrcSV, SrcSVOff + SrcOff, isVol, false, SrcAlign);
+ SrcPtrInfo.getWithOffset(SrcOff), isVol,
+ false, SrcAlign);
LoadValues.push_back(Value);
LoadChains.push_back(Value.getValue(1));
SrcOff += VTSize;
@@ -3429,7 +3551,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl,
Store = DAG.getStore(Chain, dl, LoadValues[i],
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, Align);
+ DstPtrInfo.getWithOffset(DstOff), isVol, false, Align);
OutChains.push_back(Store);
DstOff += VTSize;
}
@@ -3442,7 +3564,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, uint64_t Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff) {
+ MachinePointerInfo DstPtrInfo) {
// Turn a memset of undef to nop.
if (Src.getOpcode() == ISD::UNDEF)
return Chain;
@@ -3452,13 +3574,15 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
std::vector<EVT> MemOps;
bool DstAlignCanChange = false;
- MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool OptSize = MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize);
FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Dst);
if (FI && !MFI->isFixedObjectIndex(FI->getIndex()))
DstAlignCanChange = true;
bool NonScalarIntSafe =
isa<ConstantSDNode>(Src) && cast<ConstantSDNode>(Src)->isNullValue();
- if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(),
+ if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize),
Size, (DstAlignCanChange ? 0 : Align), 0,
NonScalarIntSafe, false, DAG, TLI))
return SDValue();
@@ -3477,15 +3601,34 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SmallVector<SDValue, 8> OutChains;
uint64_t DstOff = 0;
unsigned NumMemOps = MemOps.size();
+
+ // Find the largest store and generate the bit pattern for it.
+ EVT LargestVT = MemOps[0];
+ for (unsigned i = 1; i < NumMemOps; i++)
+ if (MemOps[i].bitsGT(LargestVT))
+ LargestVT = MemOps[i];
+ SDValue MemSetValue = getMemsetValue(Src, LargestVT, DAG, dl);
+
for (unsigned i = 0; i < NumMemOps; i++) {
EVT VT = MemOps[i];
- unsigned VTSize = VT.getSizeInBits() / 8;
- SDValue Value = getMemsetValue(Src, VT, DAG, dl);
+
+ // If this store is smaller than the largest store see whether we can get
+ // the smaller value for free with a truncate.
+ SDValue Value = MemSetValue;
+ if (VT.bitsLT(LargestVT)) {
+ if (!LargestVT.isVector() && !VT.isVector() &&
+ TLI.isTruncateFree(LargestVT, VT))
+ Value = DAG.getNode(ISD::TRUNCATE, dl, VT, MemSetValue);
+ else
+ Value = getMemsetValue(Src, VT, DAG, dl);
+ }
+ assert(Value.getValueType() == VT && "Value with wrong type.");
SDValue Store = DAG.getStore(Chain, dl, Value,
getMemBasePlusOffset(Dst, DstOff, DAG),
- DstSV, DstSVOff + DstOff, isVol, false, 0);
+ DstPtrInfo.getWithOffset(DstOff),
+ isVol, false, Align);
OutChains.push_back(Store);
- DstOff += VTSize;
+ DstOff += VT.getSizeInBits() / 8;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
@@ -3495,8 +3638,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl,
SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol, bool AlwaysInline,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Check to see if we should lower the memcpy to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3508,7 +3651,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result = getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(),Align,
- isVol, false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ isVol, false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -3518,7 +3661,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result =
TSI.EmitTargetCodeForMemcpy(*this, dl, Chain, Dst, Src, Size, Align,
isVol, AlwaysInline,
- DstSV, DstSVOff, SrcSV, SrcSVOff);
+ DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -3528,7 +3671,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
assert(ConstantSize && "AlwaysInline requires a constant size!");
return getMemcpyLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
- true, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ true, DstPtrInfo, SrcPtrInfo);
}
// FIXME: If the memcpy is volatile (isVol), lowering it to a plain libc
@@ -3559,8 +3702,8 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff,
- const Value *SrcSV, uint64_t SrcSVOff) {
+ MachinePointerInfo DstPtrInfo,
+ MachinePointerInfo SrcPtrInfo) {
// Check to see if we should lower the memmove to loads and stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3573,7 +3716,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result =
getMemmoveLoadsAndStores(*this, dl, Chain, Dst, Src,
ConstantSize->getZExtValue(), Align, isVol,
- false, DstSV, DstSVOff, SrcSV, SrcSVOff);
+ false, DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
}
@@ -3582,7 +3725,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
SDValue Result =
TSI.EmitTargetCodeForMemmove(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstSV, DstSVOff, SrcSV, SrcSVOff);
+ DstPtrInfo, SrcPtrInfo);
if (Result.getNode())
return Result;
@@ -3611,7 +3754,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVol,
- const Value *DstSV, uint64_t DstSVOff) {
+ MachinePointerInfo DstPtrInfo) {
// Check to see if we should lower the memset to stores first.
// For cases within the target-specified limits, this is the best choice.
@@ -3623,7 +3766,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
SDValue Result =
getMemsetStores(*this, dl, Chain, Dst, Src, ConstantSize->getZExtValue(),
- Align, isVol, DstSV, DstSVOff);
+ Align, isVol, DstPtrInfo);
if (Result.getNode())
return Result;
@@ -3633,11 +3776,11 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
// code. If the target chooses to do this, this is the next best.
SDValue Result =
TSI.EmitTargetCodeForMemset(*this, dl, Chain, Dst, Src, Size, Align, isVol,
- DstSV, DstSVOff);
+ DstPtrInfo);
if (Result.getNode())
return Result;
- // Emit a library call.
+ // Emit a library call.
const Type *IntPtrTy = TLI.getTargetData()->getIntPtrType(*getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
@@ -3669,19 +3812,12 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
}
SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
- SDValue Chain,
- SDValue Ptr, SDValue Cmp,
- SDValue Swp, const Value* PtrVal,
+ SDValue Chain, SDValue Ptr, SDValue Cmp,
+ SDValue Swp, MachinePointerInfo PtrInfo,
unsigned Alignment) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
- // Check if the memory reference references a frame index
- if (!PtrVal)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
@@ -3689,8 +3825,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrVal, Flags, 0,
- MemVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Cmp, Swp, MMO);
}
@@ -3729,12 +3864,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(MemVT);
- // Check if the memory reference references a frame index
- if (!PtrVal)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- PtrVal = PseudoSourceValue::getFixedStack(FI->getIndex());
-
MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore;
@@ -3742,7 +3871,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PtrVal, Flags, 0,
+ MF.getMachineMemOperand(MachinePointerInfo(PtrVal), Flags,
MemVT.getStoreSize(), Alignment);
return getAtomic(Opcode, dl, MemVT, Chain, Ptr, Val, MMO);
@@ -3785,7 +3914,6 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
}
/// getMergeValues - Create a MERGE_VALUES node from the given operands.
-/// Allowed to return something different (and simpler) if Simplify is true.
SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps,
DebugLoc dl) {
if (NumOps == 1)
@@ -3803,18 +3931,18 @@ SDValue
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl,
const EVT *VTs, unsigned NumVTs,
const SDValue *Ops, unsigned NumOps,
- EVT MemVT, const Value *srcValue, int SVOff,
+ EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
bool ReadMem, bool WriteMem) {
return getMemIntrinsicNode(Opcode, dl, makeVTList(VTs, NumVTs), Ops, NumOps,
- MemVT, srcValue, SVOff, Align, Vol,
+ MemVT, PtrInfo, Align, Vol,
ReadMem, WriteMem);
}
SDValue
SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
const SDValue *Ops, unsigned NumOps,
- EVT MemVT, const Value *srcValue, int SVOff,
+ EVT MemVT, MachinePointerInfo PtrInfo,
unsigned Align, bool Vol,
bool ReadMem, bool WriteMem) {
if (Align == 0) // Ensure that codegen never sees alignment 0
@@ -3829,8 +3957,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
if (Vol)
Flags |= MachineMemOperand::MOVolatile;
MachineMemOperand *MMO =
- MF.getMachineMemOperand(srcValue, Flags, SVOff,
- MemVT.getStoreSize(), Align);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Align);
return getMemIntrinsicNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
}
@@ -3841,13 +3968,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
EVT MemVT, MachineMemOperand *MMO) {
assert((Opcode == ISD::INTRINSIC_VOID ||
Opcode == ISD::INTRINSIC_W_CHAIN ||
+ Opcode == ISD::PREFETCH ||
(Opcode <= INT_MAX &&
(int)Opcode >= ISD::FIRST_TARGET_MEMORY_OPCODE)) &&
"Opcode is not a memory-accessing opcode!");
// Memoize the node unless it returns a flag.
MemIntrinsicSDNode *N;
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -3867,36 +3995,70 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
return SDValue(N, 0);
}
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, int64_t Offset = 0) {
+ // If this is FI+Offset, we can model it.
+ if (const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr))
+ return MachinePointerInfo::getFixedStack(FI->getIndex(), Offset);
+
+ // If this is (FI+Offset1)+Offset2, we can model it.
+ if (Ptr.getOpcode() != ISD::ADD ||
+ !isa<ConstantSDNode>(Ptr.getOperand(1)) ||
+ !isa<FrameIndexSDNode>(Ptr.getOperand(0)))
+ return MachinePointerInfo();
+
+ int FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ return MachinePointerInfo::getFixedStack(FI, Offset+
+ cast<ConstantSDNode>(Ptr.getOperand(1))->getSExtValue());
+}
+
+/// InferPointerInfo - If the specified ptr/offset is a frame index, infer a
+/// MachinePointerInfo record from it. This is particularly useful because the
+/// code generator has many cases where it doesn't bother passing in a
+/// MachinePointerInfo to getLoad or getStore when it has "FI+Cst".
+static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) {
+ // If the 'Offset' value isn't a constant, we can't handle this.
+ if (ConstantSDNode *OffsetNode = dyn_cast<ConstantSDNode>(OffsetOp))
+ return InferPointerInfo(Ptr, OffsetNode->getSExtValue());
+ if (OffsetOp.getOpcode() == ISD::UNDEF)
+ return InferPointerInfo(Ptr);
+ return MachinePointerInfo();
+}
+
+
SDValue
SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset,
- const Value *SV, int SVOffset, EVT MemVT,
+ MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(VT);
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOLoad;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ // If we don't have a PtrInfo, infer the trivial frame index case to simplify
+ // clients.
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr, Offset);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset,
- MemVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, MemVT.getStoreSize(), Alignment,
+ TBAAInfo);
return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO);
}
SDValue
-SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
+SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
EVT VT, DebugLoc dl, SDValue Chain,
SDValue Ptr, SDValue Offset, EVT MemVT,
MachineMemOperand *MMO) {
@@ -3943,25 +4105,26 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType,
SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl,
SDValue Chain, SDValue Ptr,
- const Value *SV, int SVOffset,
+ MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef,
- SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment);
+ PtrInfo, VT, isVolatile, isNonTemporal, Alignment, TBAAInfo);
}
-SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl,
+SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT,
SDValue Chain, SDValue Ptr,
- const Value *SV,
- int SVOffset, EVT MemVT,
+ MachinePointerInfo PtrInfo, EVT MemVT,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
SDValue Undef = getUNDEF(Ptr.getValueType());
return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef,
- SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment);
+ PtrInfo, MemVT, isVolatile, isNonTemporal, Alignment,
+ TBAAInfo);
}
+
SDValue
SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
SDValue Offset, ISD::MemIndexedMode AM) {
@@ -3969,33 +4132,32 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base,
assert(LD->getOffset().getOpcode() == ISD::UNDEF &&
"Load is already a indexed load!");
return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl,
- LD->getChain(), Base, Offset, LD->getSrcValue(),
- LD->getSrcValueOffset(), LD->getMemoryVT(),
+ LD->getChain(), Base, Offset, LD->getPointerInfo(),
+ LD->getMemoryVT(),
LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment());
}
SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
- SDValue Ptr, const Value *SV, int SVOffset,
+ SDValue Ptr, MachinePointerInfo PtrInfo,
bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ unsigned Alignment, const MDNode *TBAAInfo) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(Val.getValueType());
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset,
- Val.getValueType().getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags,
+ Val.getValueType().getStoreSize(), Alignment,
+ TBAAInfo);
return getStore(Chain, dl, Val, Ptr, MMO);
}
@@ -4024,27 +4186,26 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
}
SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
- SDValue Ptr, const Value *SV,
- int SVOffset, EVT SVT,
- bool isVolatile, bool isNonTemporal,
- unsigned Alignment) {
+ SDValue Ptr, MachinePointerInfo PtrInfo,
+ EVT SVT,bool isVolatile, bool isNonTemporal,
+ unsigned Alignment,
+ const MDNode *TBAAInfo) {
if (Alignment == 0) // Ensure that codegen never sees alignment 0
Alignment = getEVTAlignment(SVT);
- // Check if the memory reference references a frame index
- if (!SV)
- if (const FrameIndexSDNode *FI =
- dyn_cast<const FrameIndexSDNode>(Ptr.getNode()))
- SV = PseudoSourceValue::getFixedStack(FI->getIndex());
-
- MachineFunction &MF = getMachineFunction();
unsigned Flags = MachineMemOperand::MOStore;
if (isVolatile)
Flags |= MachineMemOperand::MOVolatile;
if (isNonTemporal)
Flags |= MachineMemOperand::MONonTemporal;
+
+ if (PtrInfo.V == 0)
+ PtrInfo = InferPointerInfo(Ptr);
+
+ MachineFunction &MF = getMachineFunction();
MachineMemOperand *MMO =
- MF.getMachineMemOperand(SV, Flags, SVOffset, SVT.getStoreSize(), Alignment);
+ MF.getMachineMemOperand(PtrInfo, Flags, SVT.getStoreSize(), Alignment,
+ TBAAInfo);
return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO);
}
@@ -4170,7 +4331,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
SDNode *N;
SDVTList VTs = getVTList(VT);
- if (VT != MVT::Flag) {
+ if (VT != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
void *IP = 0;
@@ -4186,7 +4347,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -4236,7 +4397,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
// Memoize the node unless it returns a flag.
SDNode *N;
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -4268,7 +4429,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
}
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifySDNode(N);
#endif
return SDValue(N, 0);
}
@@ -4645,7 +4806,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
unsigned NumOps) {
// If an identical node already exists, use it.
void *IP = 0;
- if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
+ if (VTs.VTs[VTs.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
@@ -4845,9 +5006,9 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl,
MachineSDNode *
SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
const SDValue *Ops, unsigned NumOps) {
- bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Flag;
+ bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue;
MachineSDNode *N;
- void *IP;
+ void *IP = 0;
if (DoCSE) {
FoldingSetNodeID ID;
@@ -4876,7 +5037,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
AllNodes.push_back(N);
#ifndef NDEBUG
- VerifyNode(N);
+ VerifyMachineNode(N);
#endif
return N;
}
@@ -4907,7 +5068,7 @@ SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT,
/// else return NULL.
SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
const SDValue *Ops, unsigned NumOps) {
- if (VTList.VTs[VTList.NumVTs-1] != MVT::Flag) {
+ if (VTList.VTs[VTList.NumVTs-1] != MVT::Glue) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
@@ -5340,6 +5501,29 @@ void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) {
SD->setHasDebugValue(true);
}
+/// TransferDbgValues - Transfer SDDbgValues.
+void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) {
+ if (From == To || !From.getNode()->getHasDebugValue())
+ return;
+ SDNode *FromNode = From.getNode();
+ SDNode *ToNode = To.getNode();
+ SmallVector<SDDbgValue *, 2> &DVs = GetDbgValues(FromNode);
+ SmallVector<SDDbgValue *, 2> ClonedDVs;
+ for (SmallVector<SDDbgValue *, 2>::iterator I = DVs.begin(), E = DVs.end();
+ I != E; ++I) {
+ SDDbgValue *Dbg = *I;
+ if (Dbg->getKind() == SDDbgValue::SDNODE) {
+ SDDbgValue *Clone = getDbgValue(Dbg->getMDPtr(), ToNode, To.getResNo(),
+ Dbg->getOffset(), Dbg->getDebugLoc(),
+ Dbg->getOrder());
+ ClonedDVs.push_back(Clone);
+ }
+ }
+ for (SmallVector<SDDbgValue *, 2>::iterator I = ClonedDVs.begin(),
+ E = ClonedDVs.end(); I != E; ++I)
+ AddDbgValue(*I, ToNode, false);
+}
+
//===----------------------------------------------------------------------===//
// SDNode Class
//===----------------------------------------------------------------------===//
@@ -5367,7 +5551,7 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
}
MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
- const SDValue *Ops, unsigned NumOps, EVT memvt,
+ const SDValue *Ops, unsigned NumOps, EVT memvt,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs, Ops, NumOps),
MemoryVT(memvt), MMO(mmo) {
@@ -5386,7 +5570,7 @@ void SDNode::Profile(FoldingSetNodeID &ID) const {
namespace {
struct EVTArray {
std::vector<EVT> VTs;
-
+
EVTArray() {
VTs.reserve(MVT::LAST_VALUETYPE);
for (unsigned i = 0; i < MVT::LAST_VALUETYPE; ++i)
@@ -5406,7 +5590,7 @@ const EVT *SDNode::getValueTypeList(EVT VT) {
sys::SmartScopedLock<true> Lock(*VTMutex);
return &(*EVTs->insert(VT).first);
} else {
- assert(VT.getSimpleVT().SimpleTy < MVT::LAST_VALUETYPE &&
+ assert(VT.getSimpleVT() < MVT::LAST_VALUETYPE &&
"Value type out of range!");
return &SimpleVTArray->VTs[VT.getSimpleVT().SimpleTy];
}
@@ -5478,9 +5662,9 @@ bool SDNode::isOperandOf(SDNode *N) const {
/// reachesChainWithoutSideEffects - Return true if this operand (which must
/// be a chain) reaches the specified operand without crossing any
-/// side-effecting instructions. In practice, this looks through token
-/// factors and non-volatile loads. In order to remain efficient, this only
-/// looks a couple of nodes in, it does not do an exhaustive search.
+/// side-effecting instructions on any chain path. In practice, this looks
+/// through token factors and non-volatile loads. In order to remain efficient,
+/// this only looks a couple of nodes in, it does not do an exhaustive search.
bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
unsigned Depth) const {
if (*this == Dest) return true;
@@ -5490,12 +5674,12 @@ bool SDValue::reachesChainWithoutSideEffects(SDValue Dest,
if (Depth == 0) return false;
// If this is a token factor, all inputs to the TF happen in parallel. If any
- // of the operands of the TF reach dest, then we can do the xform.
+ // of the operands of the TF does not reach dest, then we cannot do the xform.
if (getOpcode() == ISD::TokenFactor) {
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
- if (getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
- return true;
- return false;
+ if (!getOperand(i).reachesChainWithoutSideEffects(Dest, Depth-1))
+ return false;
+ return true;
}
// Loads don't have side effects, look through them.
@@ -5600,6 +5784,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::EH_RETURN: return "EH_RETURN";
case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP";
case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP";
+ case ISD::EH_SJLJ_DISPATCHSETUP: return "EH_SJLJ_DISPATCHSETUP";
case ISD::ConstantPool: return "ConstantPool";
case ISD::ExternalSymbol: return "ExternalSymbol";
case ISD::BlockAddress: return "BlockAddress";
@@ -5690,6 +5875,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::INSERT_VECTOR_ELT: return "insert_vector_elt";
case ISD::EXTRACT_VECTOR_ELT: return "extract_vector_elt";
case ISD::CONCAT_VECTORS: return "concat_vectors";
+ case ISD::INSERT_SUBVECTOR: return "insert_subvector";
case ISD::EXTRACT_SUBVECTOR: return "extract_subvector";
case ISD::SCALAR_TO_VECTOR: return "scalar_to_vector";
case ISD::VECTOR_SHUFFLE: return "vector_shuffle";
@@ -5723,7 +5909,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::UINT_TO_FP: return "uint_to_fp";
case ISD::FP_TO_SINT: return "fp_to_sint";
case ISD::FP_TO_UINT: return "fp_to_uint";
- case ISD::BIT_CONVERT: return "bit_convert";
+ case ISD::BITCAST: return "bit_convert";
case ISD::FP16_TO_FP32: return "fp16_to_fp32";
case ISD::FP32_TO_FP16: return "fp32_to_fp16";
@@ -5935,12 +6121,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
OS << LBB->getName() << " ";
OS << (const void*)BBDN->getBasicBlock() << ">";
} else if (const RegisterSDNode *R = dyn_cast<RegisterSDNode>(this)) {
- if (G && R->getReg() &&
- TargetRegisterInfo::isPhysicalRegister(R->getReg())) {
- OS << " %" << G->getTarget().getRegisterInfo()->getName(R->getReg());
- } else {
- OS << " %reg" << R->getReg();
- }
+ OS << ' ' << PrintReg(R->getReg(), G ? G->getTarget().getRegisterInfo() :0);
} else if (const ExternalSymbolSDNode *ES =
dyn_cast<ExternalSymbolSDNode>(this)) {
OS << "'" << ES->getSymbol() << "'";
@@ -5986,7 +6167,7 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
const char *AM = getIndexedModeName(ST->getAddressingMode());
if (*AM)
OS << ", " << AM;
-
+
OS << ">";
} else if (const MemSDNode* M = dyn_cast<MemSDNode>(this)) {
OS << "<" << *M->getMemOperand() << ">";
@@ -6037,7 +6218,7 @@ void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
const SelectionDAG *G, unsigned depth,
- unsigned indent)
+ unsigned indent)
{
if (depth == 0)
return;
@@ -6058,7 +6239,7 @@ static void printrWithDepthHelper(raw_ostream &OS, const SDNode *N,
void SDNode::printrWithDepth(raw_ostream &OS, const SelectionDAG *G,
unsigned depth) const {
printrWithDepthHelper(OS, this, G, depth, 0);
-}
+}
void SDNode::printrFull(raw_ostream &OS, const SelectionDAG *G) const {
// Don't print impossibly deep things.
@@ -6072,7 +6253,7 @@ void SDNode::dumprWithDepth(const SelectionDAG *G, unsigned depth) const {
void SDNode::dumprFull(const SelectionDAG *G) const {
// Don't print impossibly deep things.
dumprWithDepth(G, 100);
-}
+}
static void DumpNodes(const SDNode *N, unsigned indent, const SelectionDAG *G) {
for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
@@ -6156,10 +6337,10 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
}
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
-/// location that is 'Dist' units away from the location that the 'Base' load
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
/// is loading from.
-bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
unsigned Bytes, int Dist) const {
if (LD->getChain() != Base->getChain())
return false;
@@ -6180,11 +6361,11 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
if (FS != BFS || FS != (int)Bytes) return false;
return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
}
- if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
- if (V && (V->getSExtValue() == Dist*Bytes))
- return true;
- }
+
+ // Handle X+C
+ if (isBaseWithConstantOffset(Loc) && Loc.getOperand(0) == BaseLoc &&
+ cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue() == Dist*Bytes)
+ return true;
const GlobalValue *GV1 = NULL;
const GlobalValue *GV2 = NULL;
@@ -6225,15 +6406,14 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
int64_t FrameOffset = 0;
if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
FrameIdx = FI->getIndex();
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ } else if (isBaseWithConstantOffset(Ptr) &&
isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ // Handle FI+Cst
FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
FrameOffset = Ptr.getConstantOperandVal(1);
}
if (FrameIdx != (1 << 31)) {
- // FIXME: Handle FI+CST.
const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
FrameOffset);
@@ -6354,7 +6534,7 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
if (OpVal.getOpcode() == ISD::UNDEF)
SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
- SplatValue |= APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
+ SplatValue |= CN->getAPIntValue().zextOrTrunc(EltBitSize).
zextOrTrunc(sz) << BitPos;
else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal))
SplatValue |= CN->getValueAPF().bitcastToAPInt().zextOrTrunc(sz) <<BitPos;
@@ -6369,10 +6549,10 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
while (sz > 8) {
unsigned HalfSize = sz / 2;
- APInt HighValue = APInt(SplatValue).lshr(HalfSize).trunc(HalfSize);
- APInt LowValue = APInt(SplatValue).trunc(HalfSize);
- APInt HighUndef = APInt(SplatUndef).lshr(HalfSize).trunc(HalfSize);
- APInt LowUndef = APInt(SplatUndef).trunc(HalfSize);
+ APInt HighValue = SplatValue.lshr(HalfSize).trunc(HalfSize);
+ APInt LowValue = SplatValue.trunc(HalfSize);
+ APInt HighUndef = SplatUndef.lshr(HalfSize).trunc(HalfSize);
+ APInt LowUndef = SplatUndef.trunc(HalfSize);
// If the two halves do not match (ignoring undef bits), stop here.
if ((HighValue & ~LowUndef) != (LowValue & ~HighUndef) ||
@@ -6412,7 +6592,7 @@ static void checkForCyclesHelper(const SDNode *N,
// If this node has already been checked, don't check it again.
if (Checked.count(N))
return;
-
+
// If a node has already been visited on this depth-first walk, reject it as
// a cycle.
if (!Visited.insert(N)) {
@@ -6421,10 +6601,10 @@ static void checkForCyclesHelper(const SDNode *N,
errs() << "Detected cycle in SelectionDAG\n";
abort();
}
-
+
for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
-
+
Checked.insert(N);
Visited.erase(N);
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index e65744592c8b..452f5614b7bf 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -15,6 +15,7 @@
#include "SDNodeDbgValue.h"
#include "SelectionDAGBuilder.h"
#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -43,9 +44,8 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/Analysis/DebugInfo.h"
-#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
-#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetIntrinsicInfo.h"
#include "llvm/Target/TargetLowering.h"
@@ -70,10 +70,28 @@ LimitFPPrecision("limit-float-precision",
cl::location(LimitFloatPrecision),
cl::init(0));
+// Limit the width of DAG chains. This is important in general to prevent
+// prevent DAG-based analysis from blowing up. For example, alias analysis and
+// load clustering may not complete in reasonable time. It is difficult to
+// recognize and avoid this situation within each individual analysis, and
+// future analyses are likely to have the same behavior. Limiting DAG width is
+// the safe approach, and will be especially important with global DAGs.
+//
+// MaxParallelChains default is arbitrarily high to avoid affecting
+// optimization, but could be lowered to improve compile time. Any ld-ld-st-st
+// sequence over this should have been converted to llvm.memcpy by the
+// frontend. It easy to induce this behavior with .ll code such as:
+// %buffer = alloca [4096 x i8]
+// %data = load [4096 x i8]* %argPtr
+// store [4096 x i8] %data, [4096 x i8]* %buffer
+static cl::opt<unsigned>
+MaxParallelChains("dag-chain-limit", cl::desc("Max parallel isel dag chains"),
+ cl::init(64), cl::Hidden);
+
static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
const SDValue *Parts, unsigned NumParts,
EVT PartVT, EVT ValueVT);
-
+
/// getCopyFromParts - Create a value that contains the specified legal parts
/// combined into the value they represent. If the parts combine to a type
/// larger then ValueVT then AssertOp can be used to specify whether the extra
@@ -85,7 +103,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
if (ValueVT.isVector())
return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT);
-
+
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
@@ -112,8 +130,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT);
} else {
- Lo = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, DL, HalfVT, Parts[1]);
+ Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
}
if (TLI.isBigEndian())
@@ -145,8 +163,8 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) &&
"Unexpected split");
SDValue Lo, Hi;
- Lo = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[0]);
- Hi = DAG.getNode(ISD::BIT_CONVERT, DL, EVT(MVT::f64), Parts[1]);
+ Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
+ Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
if (TLI.isBigEndian())
std::swap(Lo, Hi);
Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
@@ -188,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL,
}
if (PartVT.getSizeInBits() == ValueVT.getSizeInBits())
- return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
llvm_unreachable("Unknown mismatch!");
return SDValue();
@@ -206,7 +224,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
assert(NumParts > 0 && "No parts to assemble!");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDValue Val = Parts[0];
-
+
// Handle a multi-element vector.
if (NumParts > 1) {
EVT IntermediateVT, RegisterVT;
@@ -219,7 +237,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
assert(RegisterVT == Parts[0].getValueType() &&
"Part type doesn't match part!");
-
+
// Assemble the parts into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
if (NumIntermediates == NumParts) {
@@ -238,20 +256,20 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
PartVT, IntermediateVT);
}
-
+
// Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
// intermediate operands.
Val = DAG.getNode(IntermediateVT.isVector() ?
ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR, DL,
ValueVT, &Ops[0], NumIntermediates);
}
-
+
// There is now one part, held in Val. Correct it to match ValueVT.
PartVT = Val.getValueType();
-
+
if (PartVT == ValueVT)
return Val;
-
+
if (PartVT.isVector()) {
// If the element type of the source/dest vectors are the same, but the
// parts vector has more elements than the value vector, then we have a
@@ -262,12 +280,12 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
"Cannot narrow, it would be a lossy transformation");
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
DAG.getIntPtrConstant(0));
- }
-
+ }
+
// Vector/Vector bitcast.
- return DAG.getNode(ISD::BIT_CONVERT, DL, ValueVT, Val);
+ return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
}
-
+
assert(ValueVT.getVectorElementType() == PartVT &&
ValueVT.getVectorNumElements() == 1 &&
"Only trivial scalar-to-vector conversions should get here!");
@@ -280,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL,
static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
EVT PartVT);
-
+
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
@@ -289,11 +307,11 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
EVT PartVT,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
EVT ValueVT = Val.getValueType();
-
+
// Handle the vector case separately.
if (ValueVT.isVector())
return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT);
-
+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
unsigned PartBits = PartVT.getSizeInBits();
unsigned OrigNumParts = NumParts;
@@ -316,14 +334,14 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
} else {
assert(PartVT.isInteger() && ValueVT.isInteger() &&
- "Unknown mismatch!");
+ "Unknown mismatch!");
ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
}
} else if (PartBits == ValueVT.getSizeInBits()) {
// Different types of the same size.
assert(NumParts == 1 && PartVT != ValueVT);
- Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
// If the parts cover less bits than value has, truncate the value.
assert(PartVT.isInteger() && ValueVT.isInteger() &&
@@ -366,7 +384,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
// The number of parts is a power of 2. Repeatedly bisect the value using
// EXTRACT_ELEMENT.
- Parts[0] = DAG.getNode(ISD::BIT_CONVERT, DL,
+ Parts[0] = DAG.getNode(ISD::BITCAST, DL,
EVT::getIntegerVT(*DAG.getContext(),
ValueVT.getSizeInBits()),
Val);
@@ -384,8 +402,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL,
ThisVT, Part0, DAG.getIntPtrConstant(0));
if (ThisBits == PartBits && ThisVT != PartVT) {
- Part0 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part0);
- Part1 = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Part1);
+ Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
+ Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
}
}
}
@@ -403,13 +421,13 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
EVT ValueVT = Val.getValueType();
assert(ValueVT.isVector() && "Not a vector");
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-
+
if (NumParts == 1) {
if (PartVT == ValueVT) {
// Nothing to do.
} else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
// Bitconvert vector->vector case.
- Val = DAG.getNode(ISD::BIT_CONVERT, DL, PartVT, Val);
+ Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
} else if (PartVT.isVector() &&
PartVT.getVectorElementType() == ValueVT.getVectorElementType()&&
PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
@@ -420,7 +438,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
ElementVT, Val, DAG.getIntPtrConstant(i)));
-
+
for (unsigned i = ValueVT.getVectorNumElements(),
e = PartVT.getVectorNumElements(); i != e; ++i)
Ops.push_back(DAG.getUNDEF(ElementVT));
@@ -428,7 +446,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::BUILD_VECTOR, DL, PartVT, &Ops[0], Ops.size());
// FIXME: Use CONCAT for 2x -> 4x.
-
+
//SDValue UndefElts = DAG.getUNDEF(VectorTy);
//Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
} else {
@@ -439,11 +457,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
PartVT, Val, DAG.getIntPtrConstant(0));
}
-
+
Parts[0] = Val;
return;
}
-
+
// Handle a multi-element vector.
EVT IntermediateVT, RegisterVT;
unsigned NumIntermediates;
@@ -451,11 +469,11 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
IntermediateVT,
NumIntermediates, RegisterVT);
unsigned NumElements = ValueVT.getVectorNumElements();
-
+
assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
NumParts = NumRegs; // Silence a compiler warning.
assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
-
+
// Split the vector into intermediate operands.
SmallVector<SDValue, 8> Ops(NumIntermediates);
for (unsigned i = 0; i != NumIntermediates; ++i) {
@@ -467,7 +485,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL,
Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
IntermediateVT, Val, DAG.getIntPtrConstant(i));
}
-
+
// Split the intermediate operands into legal parts.
if (NumParts == NumIntermediates) {
// If the register was not expanded, promote or copy the value,
@@ -618,48 +636,49 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG,
}
Chain = P.getValue(1);
+ Parts[i] = P;
// If the source register was virtual and if we know something about it,
// add an assert node.
- if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) &&
- RegisterVT.isInteger() && !RegisterVT.isVector()) {
- unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister;
- if (FuncInfo.LiveOutRegInfo.size() > SlotNo) {
- const FunctionLoweringInfo::LiveOutInfo &LOI =
- FuncInfo.LiveOutRegInfo[SlotNo];
-
- unsigned RegSize = RegisterVT.getSizeInBits();
- unsigned NumSignBits = LOI.NumSignBits;
- unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
-
- // FIXME: We capture more information than the dag can represent. For
- // now, just use the tightest assertzext/assertsext possible.
- bool isSExt = true;
- EVT FromVT(MVT::Other);
- if (NumSignBits == RegSize)
- isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
- else if (NumZeroBits >= RegSize-1)
- isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
- else if (NumSignBits > RegSize-8)
- isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
- else if (NumZeroBits >= RegSize-8)
- isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
- else if (NumSignBits > RegSize-16)
- isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
- else if (NumZeroBits >= RegSize-16)
- isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
- else if (NumSignBits > RegSize-32)
- isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
- else if (NumZeroBits >= RegSize-32)
- isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
-
- if (FromVT != MVT::Other)
- P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
- RegisterVT, P, DAG.getValueType(FromVT));
- }
- }
+ if (!TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) ||
+ !RegisterVT.isInteger() || RegisterVT.isVector() ||
+ !FuncInfo.LiveOutRegInfo.inBounds(Regs[Part+i]))
+ continue;
+
+ const FunctionLoweringInfo::LiveOutInfo &LOI =
+ FuncInfo.LiveOutRegInfo[Regs[Part+i]];
+
+ unsigned RegSize = RegisterVT.getSizeInBits();
+ unsigned NumSignBits = LOI.NumSignBits;
+ unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes();
+
+ // FIXME: We capture more information than the dag can represent. For
+ // now, just use the tightest assertzext/assertsext possible.
+ bool isSExt = true;
+ EVT FromVT(MVT::Other);
+ if (NumSignBits == RegSize)
+ isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1
+ else if (NumZeroBits >= RegSize-1)
+ isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1
+ else if (NumSignBits > RegSize-8)
+ isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8
+ else if (NumZeroBits >= RegSize-8)
+ isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8
+ else if (NumSignBits > RegSize-16)
+ isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16
+ else if (NumZeroBits >= RegSize-16)
+ isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16
+ else if (NumSignBits > RegSize-32)
+ isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32
+ else if (NumZeroBits >= RegSize-32)
+ isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32
+ else
+ continue;
- Parts[i] = P;
+ // Add an assertion node.
+ assert(FromVT != MVT::Other);
+ Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
+ RegisterVT, P, DAG.getValueType(FromVT));
}
Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
@@ -889,11 +908,8 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V,
Val.getResNo(), Offset, dl, DbgSDNodeOrder);
DAG.AddDbgValue(SDV, Val.getNode(), false);
}
- } else {
- SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
- Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
- }
+ } else
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
DanglingDebugInfoMap[V] = DanglingDebugInfo();
}
}
@@ -913,7 +929,9 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
unsigned InReg = It->second;
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+ N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain,NULL);
+ resolveDanglingDebugInfo(V, N);
+ return N;
}
// Otherwise create a new SDValue and remember it.
@@ -1088,7 +1106,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
Chains[i] =
DAG.getStore(Chain, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + i),
- Add, NULL, Offsets[i], false, false, 0);
+ // FIXME: better loc info would be nice.
+ Add, MachinePointerInfo(), false, false, 0);
}
Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
@@ -1347,7 +1366,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases){
if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
return false;
}
-
+
return true;
}
@@ -1383,6 +1402,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// If this is a series of conditions that are or'd or and'd together, emit
// this as a sequence of branches instead of setcc's with and/or operations.
+ // As long as jumps are not expensive, this should improve performance.
// For example, instead of something like:
// cmp A, B
// C = seteq
@@ -1397,7 +1417,8 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) {
// jle foo
//
if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
- if (BOp->hasOneUse() &&
+ if (!TLI.isJumpExpensive() &&
+ BOp->hasOneUse() &&
(BOp->getOpcode() == Instruction::And ||
BOp->getOpcode() == Instruction::Or)) {
FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
@@ -1502,10 +1523,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB,
MVT::Other, getControlRoot(), Cond,
DAG.getBasicBlock(CB.TrueBB));
- // Insert the false branch.
- if (CB.FalseBB != NextBlock)
- BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
- DAG.getBasicBlock(CB.FalseBB));
+ // Insert the false branch. Do this even if it's a fall through branch,
+ // this makes it easier to do DAG optimizations which require inverting
+ // the branch condition.
+ BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
+ DAG.getBasicBlock(CB.FalseBB));
DAG.setRoot(BrCond);
}
@@ -1592,12 +1614,28 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
Sub, DAG.getConstant(B.Range, VT),
ISD::SETUGT);
- SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(),
- TLI.getPointerTy());
+ // Determine the type of the test operands.
+ bool UsePtrType = false;
+ if (!TLI.isTypeLegal(VT))
+ UsePtrType = true;
+ else {
+ for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
+ if ((uint64_t)((int64_t)B.Cases[i].Mask >> VT.getSizeInBits()) + 1 >= 2) {
+ // Switch table case range are encoded into series of masks.
+ // Just use pointer type, it's guaranteed to fit.
+ UsePtrType = true;
+ break;
+ }
+ }
+ if (UsePtrType) {
+ VT = TLI.getPointerTy();
+ Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT);
+ }
- B.Reg = FuncInfo.CreateReg(TLI.getPointerTy());
+ B.RegVT = VT;
+ B.Reg = FuncInfo.CreateReg(VT);
SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(),
- B.Reg, ShiftOp);
+ B.Reg, Sub);
// Set NextBlock to be the MBB immediately after the current one, if any.
// This is used to avoid emitting unnecessary branches to the next block.
@@ -1623,36 +1661,34 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B,
}
/// visitBitTestCase - this function produces one "bit test"
-void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB,
+void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB) {
- SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg,
- TLI.getPointerTy());
+ EVT VT = BB.RegVT;
+ SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(),
+ Reg, VT);
SDValue Cmp;
if (CountPopulation_64(B.Mask) == 1) {
// Testing for a single bit; just compare the shift count with what it
// would need to be to shift a 1 bit in that position.
Cmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(ShiftOp.getValueType()),
+ TLI.getSetCCResultType(VT),
ShiftOp,
- DAG.getConstant(CountTrailingZeros_64(B.Mask),
- TLI.getPointerTy()),
+ DAG.getConstant(CountTrailingZeros_64(B.Mask), VT),
ISD::SETEQ);
} else {
// Make desired shift
- SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(),
- TLI.getPointerTy(),
- DAG.getConstant(1, TLI.getPointerTy()),
- ShiftOp);
+ SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT,
+ DAG.getConstant(1, VT), ShiftOp);
// Emit bit tests and jumps
SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(),
- TLI.getPointerTy(), SwitchVal,
- DAG.getConstant(B.Mask, TLI.getPointerTy()));
+ VT, SwitchVal, DAG.getConstant(B.Mask, VT));
Cmp = DAG.getSetCC(getCurDebugLoc(),
- TLI.getSetCCResultType(AndOp.getValueType()),
- AndOp, DAG.getConstant(0, TLI.getPointerTy()),
+ TLI.getSetCCResultType(VT),
+ AndOp, DAG.getConstant(0, VT),
ISD::SETNE);
}
@@ -1732,10 +1768,56 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR,
if (++BBI != FuncInfo.MF->end())
NextBlock = BBI;
- // TODO: If any two of the cases has the same destination, and if one value
+ // If any two of the cases has the same destination, and if one value
// is the same as the other, but has one bit unset that the other has set,
// use bit manipulation to do two compares at once. For example:
// "if (X == 6 || X == 4)" -> "if ((X|2) == 6)"
+ // TODO: This could be extended to merge any 2 cases in switches with 3 cases.
+ // TODO: Handle cases where CR.CaseBB != SwitchBB.
+ if (Size == 2 && CR.CaseBB == SwitchBB) {
+ Case &Small = *CR.Range.first;
+ Case &Big = *(CR.Range.second-1);
+
+ if (Small.Low == Small.High && Big.Low == Big.High && Small.BB == Big.BB) {
+ const APInt& SmallValue = cast<ConstantInt>(Small.Low)->getValue();
+ const APInt& BigValue = cast<ConstantInt>(Big.Low)->getValue();
+
+ // Check that there is only one bit different.
+ if (BigValue.countPopulation() == SmallValue.countPopulation() + 1 &&
+ (SmallValue | BigValue) == BigValue) {
+ // Isolate the common bit.
+ APInt CommonBit = BigValue & ~SmallValue;
+ assert((SmallValue | CommonBit) == BigValue &&
+ CommonBit.countPopulation() == 1 && "Not a common bit?");
+
+ SDValue CondLHS = getValue(SV);
+ EVT VT = CondLHS.getValueType();
+ DebugLoc DL = getCurDebugLoc();
+
+ SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS,
+ DAG.getConstant(CommonBit, VT));
+ SDValue Cond = DAG.getSetCC(DL, MVT::i1,
+ Or, DAG.getConstant(BigValue, VT),
+ ISD::SETEQ);
+
+ // Update successor info.
+ SwitchBB->addSuccessor(Small.BB);
+ SwitchBB->addSuccessor(Default);
+
+ // Insert the true branch.
+ SDValue BrCond = DAG.getNode(ISD::BRCOND, DL, MVT::Other,
+ getControlRoot(), Cond,
+ DAG.getBasicBlock(Small.BB));
+
+ // Insert the false branch.
+ BrCond = DAG.getNode(ISD::BR, DL, MVT::Other, BrCond,
+ DAG.getBasicBlock(Default));
+
+ DAG.setRoot(BrCond);
+ return true;
+ }
+ }
+ }
// Rearrange the case blocks so that the last one falls through if possible.
if (NextBlock && Default != NextBlock && BackCase.BB != NextBlock) {
@@ -1800,9 +1882,8 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) {
}
static APInt ComputeRange(const APInt &First, const APInt &Last) {
- APInt LastExt(Last), FirstExt(First);
uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1;
- LastExt.sext(BitWidth); FirstExt.sext(BitWidth);
+ APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth);
return (LastExt - FirstExt + 1ULL);
}
@@ -2151,7 +2232,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR,
}
BitTestBlock BTB(lowBound, cmpRange, SV,
- -1U, (CR.CaseBB == SwitchBB),
+ -1U, MVT::Other, (CR.CaseBB == SwitchBB),
CR.CaseBB, Default, BTC);
if (CR.CaseBB == SwitchBB)
@@ -2180,7 +2261,8 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
if (Cases.size() >= 2)
// Must recompute end() each iteration because it may be
// invalidated by erase if we hold on to it
- for (CaseItr I = Cases.begin(), J = ++(Cases.begin()); J != Cases.end(); ) {
+ for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin());
+ J != Cases.end(); ) {
const APInt& nextValue = cast<ConstantInt>(J->Low)->getValue();
const APInt& currentValue = cast<ConstantInt>(I->High)->getValue();
MachineBasicBlock* nextBB = J->BB;
@@ -2205,6 +2287,19 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases,
return numCmps;
}
+void SelectionDAGBuilder::UpdateSplitBlock(MachineBasicBlock *First,
+ MachineBasicBlock *Last) {
+ // Update JTCases.
+ for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
+ if (JTCases[i].first.HeaderBB == First)
+ JTCases[i].first.HeaderBB = Last;
+
+ // Update BitTestCases.
+ for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
+ if (BitTestCases[i].Parent == First)
+ BitTestCases[i].Parent = Last;
+}
+
void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) {
MachineBasicBlock *SwitchMBB = FuncInfo.MBB;
@@ -2292,30 +2387,14 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
void SelectionDAGBuilder::visitFSub(const User &I) {
// -0.0 - X --> fneg
const Type *Ty = I.getType();
- if (Ty->isVectorTy()) {
- if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
- const VectorType *DestTy = cast<VectorType>(I.getType());
- const Type *ElTy = DestTy->getElementType();
- unsigned VL = DestTy->getNumElements();
- std::vector<Constant*> NZ(VL, ConstantFP::getNegativeZero(ElTy));
- Constant *CNZ = ConstantVector::get(&NZ[0], NZ.size());
- if (CV == CNZ) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
- Op2.getValueType(), Op2));
- return;
- }
- }
+ if (isa<Constant>(I.getOperand(0)) &&
+ I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) {
+ SDValue Op2 = getValue(I.getOperand(1));
+ setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
+ Op2.getValueType(), Op2));
+ return;
}
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(I.getOperand(0)))
- if (CFP->isExactlyValue(ConstantFP::getNegativeZero(Ty)->getValueAPF())) {
- SDValue Op2 = getValue(I.getOperand(1));
- setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(),
- Op2.getValueType(), Op2));
- return;
- }
-
visitBinary(I, ISD::FSUB);
}
@@ -2329,31 +2408,29 @@ void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- if (!I.getType()->isVectorTy() &&
- Op2.getValueType() != TLI.getShiftAmountTy()) {
+
+ MVT ShiftTy = TLI.getShiftAmountTy();
+
+ // Coerce the shift amount to the right type if we can.
+ if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
+ unsigned ShiftSize = ShiftTy.getSizeInBits();
+ unsigned Op2Size = Op2.getValueType().getSizeInBits();
+ DebugLoc DL = getCurDebugLoc();
+
// If the operand is smaller than the shift count type, promote it.
- EVT PTy = TLI.getPointerTy();
- EVT STy = TLI.getShiftAmountTy();
- if (STy.bitsGT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
- TLI.getShiftAmountTy(), Op2);
+ if (ShiftSize > Op2Size)
+ Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
+
// If the operand is larger than the shift count type but the shift
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
- else if (STy.getSizeInBits() >=
- Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
- Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
- TLI.getShiftAmountTy(), Op2);
- // Otherwise we'll need to temporarily settle for some other
- // convenient type; type legalization will make adjustments as
- // needed.
- else if (PTy.bitsLT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
- TLI.getPointerTy(), Op2);
- else if (PTy.bitsGT(Op2.getValueType()))
- Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
- TLI.getPointerTy(), Op2);
+ else if (ShiftSize >= Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
+ Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
+ // Otherwise we'll need to temporarily settle for some other convenient
+ // type. Type legalization will make adjustments once the shiftee is split.
+ else
+ Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
}
setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(),
@@ -2499,9 +2576,9 @@ void SelectionDAGBuilder::visitBitCast(const User &I) {
EVT DestVT = TLI.getValueType(I.getType());
// BitCast assures us that source and destination are the same size so this is
- // either a BIT_CONVERT or a no-op.
+ // either a BITCAST or a no-op.
if (DestVT != N.getValueType())
- setValue(&I, DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
DestVT, N)); // convert types.
else
setValue(&I, N); // noop cast.
@@ -2650,7 +2727,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) {
} else {
StartIdx[Input] = (MinRange[Input]/MaskNumElts)*MaskNumElts;
if (MaxRange[Input] - StartIdx[Input] < (int)MaskNumElts &&
- StartIdx[Input] + MaskNumElts < SrcNumElts)
+ StartIdx[Input] + MaskNumElts <= SrcNumElts)
RangeUse[Input] = 1; // Extract from a multiple of the mask length.
}
}
@@ -2726,8 +2803,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) {
bool IntoUndef = isa<UndefValue>(Op0);
bool FromUndef = isa<UndefValue>(Op1);
- unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
- I.idx_begin(), I.idx_end());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
SmallVector<EVT, 4> AggValueVTs;
ComputeValueVTs(TLI, AggTy, AggValueVTs);
@@ -2765,8 +2841,7 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) {
const Type *ValTy = I.getType();
bool OutOfUndef = isa<UndefValue>(Op0);
- unsigned LinearIndex = ComputeLinearIndex(TLI, AggTy,
- I.idx_begin(), I.idx_end());
+ unsigned LinearIndex = ComputeLinearIndex(AggTy, I.idx_begin(), I.idx_end());
SmallVector<EVT, 4> ValValueVTs;
ComputeValueVTs(TLI, ValTy, ValValueVTs);
@@ -2884,7 +2959,7 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
// Handle alignment. If the requested alignment is less than or equal to
// the stack alignment, ignore it. If the size is greater than or equal to
// the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
- unsigned StackAlign = TM.getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = TM.getFrameLowering()->getStackAlignment();
if (Align <= StackAlign)
Align = 0;
@@ -2920,6 +2995,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
SmallVector<EVT, 4> ValueVTs;
SmallVector<uint64_t, 4> Offsets;
@@ -2930,10 +3006,11 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
SDValue Root;
bool ConstantMemory = false;
- if (I.isVolatile())
+ if (I.isVolatile() || NumValues > MaxParallelChains)
// Serialize volatile loads with other side effects.
Root = getRoot();
- else if (AA->pointsToConstantMemory(SV)) {
+ else if (AA->pointsToConstantMemory(
+ AliasAnalysis::Location(SV, AA->getTypeStoreSize(Ty), TBAAInfo))) {
// Do not serialize (non-volatile) loads of constant memory with anything.
Root = DAG.getEntryNode();
ConstantMemory = true;
@@ -2943,23 +3020,38 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
}
SmallVector<SDValue, 4> Values(NumValues);
- SmallVector<SDValue, 4> Chains(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
EVT PtrVT = Ptr.getValueType();
- for (unsigned i = 0; i != NumValues; ++i) {
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // Serializing loads here may result in excessive register pressure, and
+ // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
+ // could recover a bit by hoisting nodes upward in the chain by recognizing
+ // they are side-effect free or do not alias. The optimizer should really
+ // avoid this case by converting large object/array copies to llvm.memcpy
+ // (MaxParallelChains should always remain as failsafe).
+ if (ChainI == MaxParallelChains) {
+ assert(PendingLoads.empty() && "PendingLoads must be serialized first");
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(),
PtrVT, Ptr,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root,
- A, SV, Offsets[i], isVolatile,
- isNonTemporal, Alignment);
+ A, MachinePointerInfo(SV, Offsets[i]), isVolatile,
+ isNonTemporal, Alignment, TBAAInfo);
Values[i] = L;
- Chains[i] = L.getValue(1);
+ Chains[ChainI] = L.getValue(1);
}
if (!ConstantMemory) {
SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
- MVT::Other, &Chains[0], NumValues);
+ MVT::Other, &Chains[0], ChainI);
if (isVolatile)
DAG.setRoot(Chain);
else
@@ -2989,23 +3081,37 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) {
SDValue Ptr = getValue(PtrV);
SDValue Root = getRoot();
- SmallVector<SDValue, 4> Chains(NumValues);
+ SmallVector<SDValue, 4> Chains(std::min(unsigned(MaxParallelChains),
+ NumValues));
EVT PtrVT = Ptr.getValueType();
bool isVolatile = I.isVolatile();
bool isNonTemporal = I.getMetadata("nontemporal") != 0;
unsigned Alignment = I.getAlignment();
-
- for (unsigned i = 0; i != NumValues; ++i) {
+ const MDNode *TBAAInfo = I.getMetadata(LLVMContext::MD_tbaa);
+
+ unsigned ChainI = 0;
+ for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
+ // See visitLoad comments.
+ if (ChainI == MaxParallelChains) {
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ Root = Chain;
+ ChainI = 0;
+ }
SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr,
DAG.getConstant(Offsets[i], PtrVT));
- Chains[i] = DAG.getStore(Root, getCurDebugLoc(),
- SDValue(Src.getNode(), Src.getResNo() + i),
- Add, PtrV, Offsets[i], isVolatile,
- isNonTemporal, Alignment);
- }
-
- DAG.setRoot(DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
- MVT::Other, &Chains[0], NumValues));
+ SDValue St = DAG.getStore(Root, getCurDebugLoc(),
+ SDValue(Src.getNode(), Src.getResNo() + i),
+ Add, MachinePointerInfo(PtrV, Offsets[i]),
+ isVolatile, isNonTemporal, Alignment, TBAAInfo);
+ Chains[ChainI] = St;
+ }
+
+ SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], ChainI);
+ ++SDNodeOrder;
+ AssignOrderingToNode(StoreNode.getNode());
+ DAG.setRoot(StoreNode);
}
/// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
@@ -3031,7 +3137,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
// Add the intrinsic ID as an integer operand if it's not a target intrinsic.
- if (!IsTgtIntrinsic)
+ if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
+ Info.opc == ISD::INTRINSIC_W_CHAIN)
Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy()));
// Add all operands of the call to the operand list.
@@ -3062,7 +3169,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
// This is target intrinsic that touches memory
Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(),
VTs, &Ops[0], Ops.size(),
- Info.memVT, Info.ptrVal, Info.offset,
+ Info.memVT,
+ MachinePointerInfo(Info.ptrVal, Info.offset),
Info.align, Info.vol,
Info.readMem, Info.writeMem);
} else if (!HasChain) {
@@ -3087,7 +3195,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
if (!I.getType()->isVoidTy()) {
if (const VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
EVT VT = TLI.getValueType(PTy);
- Result = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(), VT, Result);
+ Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result);
}
setValue(&I, Result);
@@ -3106,7 +3214,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
DAG.getConstant(0x007fffff, MVT::i32));
SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
DAG.getConstant(0x3f800000, MVT::i32));
- return DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t2);
+ return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
}
/// GetExponent - Get the exponent:
@@ -3205,13 +3313,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t5);
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5);
// Add the exponent into the result in integer domain.
SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t6);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
//
@@ -3231,13 +3339,13 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,MVT::i32, t7);
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7);
// Add the exponent into the result in integer domain.
SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t8);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
//
@@ -3269,14 +3377,14 @@ SelectionDAGBuilder::visitExp(const CallInst &I) {
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue TwoToFracPartOfX = DAG.getNode(ISD::BIT_CONVERT, dl,
+ SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,
MVT::i32, t13);
// Add the exponent into the result in integer domain.
SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32,
TwoToFracPartOfX, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, t14);
+ result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14);
}
} else {
// No special expansion.
@@ -3298,7 +3406,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) {
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log(2) [0.69314718f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3408,7 +3516,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) {
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Get the exponent.
SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
@@ -3517,7 +3625,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) {
if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 &&
LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
SDValue Op = getValue(I.getArgOperand(0));
- SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
+ SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
@@ -3645,11 +3753,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
@@ -3670,11 +3778,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
@@ -3706,11 +3814,11 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) {
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
}
} else {
@@ -3778,11 +3886,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
getF32Constant(DAG, 0x3f7f5e7e));
- SDValue t6 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t5);
+ SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) {
// For floating-point precision of 12:
@@ -3803,11 +3911,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
getF32Constant(DAG, 0x3f7ff8fd));
- SDValue t8 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t7);
+ SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
} else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18
// For floating-point precision of 18:
@@ -3839,11 +3947,11 @@ SelectionDAGBuilder::visitPow(const CallInst &I) {
SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
getF32Constant(DAG, 0x3f800000));
- SDValue t14 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, t13);
+ SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13);
SDValue TwoToFractionalPartOfX =
DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX);
- result = DAG.getNode(ISD::BIT_CONVERT, dl,
+ result = DAG.getNode(ISD::BITCAST, dl,
MVT::f32, TwoToFractionalPartOfX);
}
} else {
@@ -3915,13 +4023,16 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS,
/// At the end of instruction selection, they will be inserted to the entry BB.
bool
SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
- int64_t Offset,
+ int64_t Offset,
const SDValue &N) {
const Argument *Arg = dyn_cast<Argument>(V);
if (!Arg)
return false;
MachineFunction &MF = DAG.getMachineFunction();
+ const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
+
// Ignore inlined function arguments here.
DIVariable DV(Variable);
if (DV.isInlinedFnArgument(MF.getFunction()))
@@ -3935,14 +4046,16 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
if (Arg->hasByValAttr()) {
// Byval arguments' frame index is recorded during argument lowering.
// Use this info directly.
- const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo();
Reg = TRI->getFrameRegister(MF);
Offset = FuncInfo.getByValArgumentFrameIndex(Arg);
+ // If byval argument ofset is not recorded then ignore this.
+ if (!Offset)
+ Reg = 0;
}
if (N.getNode() && N.getOpcode() == ISD::CopyFromReg) {
Reg = cast<RegisterSDNode>(N.getOperand(1))->getReg();
- if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
+ if (TargetRegisterInfo::isVirtualRegister(Reg)) {
MachineRegisterInfo &RegInfo = MF.getRegInfo();
unsigned PR = RegInfo.getLiveInPhysReg(Reg);
if (PR)
@@ -3951,13 +4064,25 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
}
if (!Reg) {
+ // Check if ValueMap has reg number.
DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
- if (VMI == FuncInfo.ValueMap.end())
- return false;
- Reg = VMI->second;
+ if (VMI != FuncInfo.ValueMap.end())
+ Reg = VMI->second;
}
- const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo();
+ if (!Reg && N.getNode()) {
+ // Check if frame index is available.
+ if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
+ if (FrameIndexSDNode *FINode =
+ dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode())) {
+ Reg = TRI->getFrameRegister(MF);
+ Offset = FINode->getIndex();
+ }
+ }
+
+ if (!Reg)
+ return false;
+
MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(),
TII->get(TargetOpcode::DBG_VALUE))
.addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable);
@@ -3966,9 +4091,11 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable,
}
// VisualStudio defines setjmp as _setjmp
-#if defined(_MSC_VER) && defined(setjmp)
-#define setjmp_undefined_for_visual_studio
-#undef setjmp
+#if defined(_MSC_VER) && defined(setjmp) && \
+ !defined(setjmp_undefined_for_msvc)
+# pragma push_macro("setjmp")
+# undef setjmp
+# define setjmp_undefined_for_msvc
#endif
/// visitIntrinsicCall - Lower the call to the specified intrinsic function. If
@@ -4013,7 +4140,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false,
- I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
return 0;
}
case Intrinsic::memset: {
@@ -4028,7 +4156,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getArgOperand(0), 0));
+ MachinePointerInfo(I.getArgOperand(0))));
return 0;
}
case Intrinsic::memmove: {
@@ -4044,22 +4172,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDValue Op3 = getValue(I.getArgOperand(2));
unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
-
- // If the source and destination are known to not be aliases, we can
- // lower memmove as memcpy.
- uint64_t Size = -1ULL;
- if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op3))
- Size = C->getZExtValue();
- if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) ==
- AliasAnalysis::NoAlias) {
- DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- false, I.getArgOperand(0), 0,
- I.getArgOperand(1), 0));
- return 0;
- }
-
DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol,
- I.getArgOperand(0), 0, I.getArgOperand(1), 0));
+ MachinePointerInfo(I.getArgOperand(0)),
+ MachinePointerInfo(I.getArgOperand(1))));
return 0;
}
case Intrinsic::dbg_declare: {
@@ -4078,10 +4193,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Check if address has undef value.
if (isa<UndefValue>(Address) ||
(Address->use_empty() && !isa<Argument>(Address))) {
- SDDbgValue*SDV =
- DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
}
@@ -4092,7 +4204,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
SDDbgValue *SDV;
if (N.getNode()) {
// Parameters are handled specially.
- bool isParameter =
+ bool isParameter =
DIVariable(Variable).getTag() == dwarf::DW_TAG_arg_variable;
if (const BitCastInst *BCI = dyn_cast<BitCastInst>(Address))
Address = BCI->getOperand(0);
@@ -4104,25 +4216,40 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Byval parameter. We have a frame index at this point.
SDV = DAG.getDbgValue(Variable, FINode->getIndex(),
0, dl, SDNodeOrder);
- else
+ else {
// Can't do anything with other non-AI cases yet. This might be a
// parameter of a callee function that got inlined, for example.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
+ }
} else if (AI)
SDV = DAG.getDbgValue(Variable, N.getNode(), N.getResNo(),
0, dl, SDNodeOrder);
- else
+ else {
// Can't do anything with other non-AI cases yet.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
return 0;
+ }
DAG.AddDbgValue(SDV, N.getNode(), isParameter);
} else {
- // If Address is an arugment then try to emits its dbg value using
- // virtual register info from the FuncInfo.ValueMap. Otherwise add undef
- // to help track missing debug info.
+ // If Address is an argument then try to emit its dbg value using
+ // virtual register info from the FuncInfo.ValueMap.
if (!EmitFuncArgumentDbgValue(Address, Variable, 0, N)) {
- SDV = DAG.getDbgValue(Variable, UndefValue::get(Address->getType()),
- 0, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ // If variable is pinned by a alloca in dominating bb then
+ // use StaticAllocaMap.
+ if (const AllocaInst *AI = dyn_cast<AllocaInst>(Address)) {
+ if (AI->getParent() != DI.getParent()) {
+ DenseMap<const AllocaInst*, int>::iterator SI =
+ FuncInfo.StaticAllocaMap.find(AI);
+ if (SI != FuncInfo.StaticAllocaMap.end()) {
+ SDV = DAG.getDbgValue(Variable, SI->second,
+ 0, dl, SDNodeOrder);
+ DAG.AddDbgValue(SDV, 0, false);
+ return 0;
+ }
+ }
+ }
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
}
}
return 0;
@@ -4160,17 +4287,15 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
N.getResNo(), Offset, dl, SDNodeOrder);
DAG.AddDbgValue(SDV, N.getNode(), false);
}
- } else if (isa<PHINode>(V) && !V->use_empty() ) {
+ } else if (!V->use_empty() ) {
// Do not call getValue(V) yet, as we don't want to generate code.
// Remember it for later.
DanglingDebugInfo DDI(&DI, dl, SDNodeOrder);
DanglingDebugInfoMap[V] = DDI;
} else {
// We may expand this to cover more cases. One case where we have no
- // data available is an unreferenced parameter; we need this fallback.
- SDV = DAG.getDbgValue(Variable, UndefValue::get(V->getType()),
- Offset, dl, SDNodeOrder);
- DAG.AddDbgValue(SDV, 0, false);
+ // data available is an unreferenced parameter.
+ DEBUG(dbgs() << "Dropping debug info for " << DI);
}
}
@@ -4186,7 +4311,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
if (SI == FuncInfo.StaticAllocaMap.end())
return 0; // VLAs.
int FI = SI->second;
-
+
MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
if (!DI.getDebugLoc().isUnknown() && MMI.hasDebugInfo())
MMI.setVariableDbgInfo(Variable, FI, DI.getDebugLoc());
@@ -4282,11 +4407,75 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
}
case Intrinsic::eh_sjlj_longjmp: {
DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other,
- getRoot(),
- getValue(I.getArgOperand(0))));
+ getRoot(), getValue(I.getArgOperand(0))));
+ return 0;
+ }
+ case Intrinsic::eh_sjlj_dispatch_setup: {
+ DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_DISPATCHSETUP, dl, MVT::Other,
+ getRoot(), getValue(I.getArgOperand(0))));
return 0;
}
+ case Intrinsic::x86_mmx_pslli_w:
+ case Intrinsic::x86_mmx_pslli_d:
+ case Intrinsic::x86_mmx_pslli_q:
+ case Intrinsic::x86_mmx_psrli_w:
+ case Intrinsic::x86_mmx_psrli_d:
+ case Intrinsic::x86_mmx_psrli_q:
+ case Intrinsic::x86_mmx_psrai_w:
+ case Intrinsic::x86_mmx_psrai_d: {
+ SDValue ShAmt = getValue(I.getArgOperand(1));
+ if (isa<ConstantSDNode>(ShAmt)) {
+ visitTargetIntrinsic(I, Intrinsic);
+ return 0;
+ }
+ unsigned NewIntrinsic = 0;
+ EVT ShAmtVT = MVT::v2i32;
+ switch (Intrinsic) {
+ case Intrinsic::x86_mmx_pslli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_w;
+ break;
+ case Intrinsic::x86_mmx_pslli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_d;
+ break;
+ case Intrinsic::x86_mmx_pslli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psll_q;
+ break;
+ case Intrinsic::x86_mmx_psrli_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_w;
+ break;
+ case Intrinsic::x86_mmx_psrli_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_d;
+ break;
+ case Intrinsic::x86_mmx_psrli_q:
+ NewIntrinsic = Intrinsic::x86_mmx_psrl_q;
+ break;
+ case Intrinsic::x86_mmx_psrai_w:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_w;
+ break;
+ case Intrinsic::x86_mmx_psrai_d:
+ NewIntrinsic = Intrinsic::x86_mmx_psra_d;
+ break;
+ default: llvm_unreachable("Impossible intrinsic"); // Can't reach here.
+ }
+
+ // The vector shift intrinsics with scalars uses 32b shift amounts but
+ // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits
+ // to be zero.
+ // We must do this early because v2i32 is not a legal type.
+ DebugLoc dl = getCurDebugLoc();
+ SDValue ShOps[2];
+ ShOps[0] = ShAmt;
+ ShOps[1] = DAG.getConstant(0, MVT::i32);
+ ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2);
+ EVT DestVT = TLI.getValueType(I.getType());
+ ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt);
+ Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
+ DAG.getConstant(NewIntrinsic, MVT::i32),
+ getValue(I.getArgOperand(0)), ShAmt);
+ setValue(&I, Res);
+ return 0;
+ }
case Intrinsic::convertff:
case Intrinsic::convertfsi:
case Intrinsic::convertfui:
@@ -4430,8 +4619,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
// Store the stack protector onto the stack.
Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN,
- PseudoSourceValue::getFixedStack(FI),
- 0, true, false, 0);
+ MachinePointerInfo::getFixedStack(FI),
+ true, false, 0);
setValue(&I, Res);
DAG.setRoot(Res);
return 0;
@@ -4510,14 +4699,22 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
case Intrinsic::prefetch: {
SDValue Ops[4];
+ unsigned rw = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
Ops[0] = getRoot();
Ops[1] = getValue(I.getArgOperand(0));
Ops[2] = getValue(I.getArgOperand(1));
Ops[3] = getValue(I.getArgOperand(2));
- DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4));
+ DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl,
+ DAG.getVTList(MVT::Other),
+ &Ops[0], 4,
+ EVT::getIntegerVT(*Context, 8),
+ MachinePointerInfo(I.getArgOperand(0)),
+ 0, /* align */
+ false, /* volatile */
+ rw==0, /* read */
+ rw==1)); /* write */
return 0;
}
-
case Intrinsic::memory_barrier: {
SDValue Ops[6];
Ops[0] = getRoot();
@@ -4536,7 +4733,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
getValue(I.getArgOperand(0)),
getValue(I.getArgOperand(1)),
getValue(I.getArgOperand(2)),
- I.getArgOperand(0));
+ MachinePointerInfo(I.getArgOperand(0)));
setValue(&I, L);
DAG.setRoot(L.getValue(1));
return 0;
@@ -4599,6 +4796,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
FTy->isVarArg(), Outs, FTy->getContext());
SDValue DemoteStackSlot;
+ int DemoteStackIdx = -100;
if (!CanLowerReturn) {
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
@@ -4606,10 +4804,10 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(
FTy->getReturnType());
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
- DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy());
Entry.Node = DemoteStackSlot;
Entry.Ty = StackSlotPtrType;
Entry.isSExt = false;
@@ -4703,7 +4901,9 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
DemoteStackSlot,
DAG.getConstant(Offsets[i], PtrVT));
SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second,
- Add, NULL, Offsets[i], false, false, 1);
+ Add,
+ MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]),
+ false, false, 1);
Values[i] = L;
Chains[i] = L.getValue(1);
}
@@ -4711,7 +4911,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
MVT::Other, &Chains[0], NumValues);
PendingLoads.push_back(Chain);
-
+
// Collect the legal value parts into potentially illegal values
// that correspond to the original function's return values.
SmallVector<EVT, 4> RetTys;
@@ -4724,7 +4924,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee,
EVT VT = RetTys[I];
EVT RegisterVT = TLI.getRegisterType(RetTy->getContext(), VT);
unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
-
+
SDValue ReturnValue =
getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
RegisterVT, VT, AssertOp);
@@ -4806,7 +5006,7 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT,
SDValue Ptr = Builder.getValue(PtrVal);
SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root,
- Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/,
+ Ptr, MachinePointerInfo(PtrVal),
false /*volatile*/,
false /*nontemporal*/, 1 /* align=1 */);
@@ -4902,7 +5102,25 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
visitInlineAsm(&I);
return;
}
-
+
+ // See if any floating point values are being passed to this function. This is
+ // used to emit an undefined reference to fltused on Windows.
+ const FunctionType *FT =
+ cast<FunctionType>(I.getCalledValue()->getType()->getContainedType(0));
+ MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI();
+ if (FT->isVarArg() &&
+ !MMI.callsExternalVAFunctionWithFloatingPointArguments()) {
+ for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
+ const Type* T = I.getArgOperand(i)->getType();
+ for (po_iterator<const Type*> i = po_begin(T), e = po_end(T);
+ i != e; ++i) {
+ if (!i->isFloatingPointTy()) continue;
+ MMI.setCallsExternalVAFunctionWithFloatingPointArguments(true);
+ break;
+ }
+ }
+ }
+
const char *RenameFn = 0;
if (Function *F = I.getCalledFunction()) {
if (F->isDeclaration()) {
@@ -4980,7 +5198,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
}
}
}
-
+
SDValue Callee;
if (!RenameFn)
Callee = getValue(I.getCalledValue());
@@ -5008,7 +5226,7 @@ public:
/// contains the set of register corresponding to the operand.
RegsForValue AssignedRegs;
- explicit SDISelAsmOperandInfo(const InlineAsm::ConstraintInfo &info)
+ explicit SDISelAsmOperandInfo(const TargetLowering::AsmOperandInfo &info)
: TargetLowering::AsmOperandInfo(info), CallOperand(0,0) {
}
@@ -5083,6 +5301,8 @@ private:
}
};
+typedef SmallVector<SDISelAsmOperandInfo,16> SDISelAsmOperandInfoVector;
+
} // end llvm namespace.
/// isAllocatableRegister - If the specified register is safe to allocate,
@@ -5192,7 +5412,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
// vector types).
EVT RegVT = *PhysReg.second->vt_begin();
if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) {
- OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
} else if (RegVT.isInteger() && OpInfo.ConstraintVT.isFloatingPoint()) {
@@ -5202,7 +5422,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo,
// machine.
RegVT = EVT::getIntegerVT(Context,
OpInfo.ConstraintVT.getSizeInBits());
- OpInfo.CallOperand = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
RegVT, OpInfo.CallOperand);
OpInfo.ConstraintVT = RegVT;
}
@@ -5320,30 +5540,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
/// ConstraintOperands - Information about all of the constraints.
- std::vector<SDISelAsmOperandInfo> ConstraintOperands;
+ SDISelAsmOperandInfoVector ConstraintOperands;
std::set<unsigned> OutputRegs, InputRegs;
- // Do a prepass over the constraints, canonicalizing them, and building up the
- // ConstraintOperands list.
- std::vector<InlineAsm::ConstraintInfo>
- ConstraintInfos = IA->ParseConstraints();
-
- bool hasMemory = hasInlineAsmMemConstraint(ConstraintInfos, TLI);
-
- SDValue Chain, Flag;
-
- // We won't need to flush pending loads if this asm doesn't touch
- // memory and is nonvolatile.
- if (hasMemory || IA->hasSideEffects())
- Chain = getRoot();
- else
- Chain = DAG.getRoot();
+ TargetLowering::AsmOperandInfoVector TargetConstraints = TLI.ParseConstraints(CS);
+ bool hasMemory = false;
unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
unsigned ResNo = 0; // ResNo - The result number of the next output.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
- ConstraintOperands.push_back(SDISelAsmOperandInfo(ConstraintInfos[i]));
+ for (unsigned i = 0, e = TargetConstraints.size(); i != e; ++i) {
+ ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i]));
SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back();
EVT OpVT = MVT::Other;
@@ -5380,9 +5587,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// If this is an input or an indirect output, process the call argument.
// BasicBlocks are labels, currently appearing only in asm's.
if (OpInfo.CallOperandVal) {
- // Strip bitcasts, if any. This mostly comes up for functions.
- OpInfo.CallOperandVal = OpInfo.CallOperandVal->stripPointerCasts();
-
if (const BasicBlock *BB = dyn_cast<BasicBlock>(OpInfo.CallOperandVal)) {
OpInfo.CallOperand = DAG.getBasicBlock(FuncInfo.MBBMap[BB]);
} else {
@@ -5393,11 +5597,33 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
OpInfo.ConstraintVT = OpVT;
+
+ // Indirect operand accesses access memory.
+ if (OpInfo.isIndirect)
+ hasMemory = true;
+ else {
+ for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) {
+ TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[j]);
+ if (CType == TargetLowering::C_Memory) {
+ hasMemory = true;
+ break;
+ }
+ }
+ }
}
+ SDValue Chain, Flag;
+
+ // We won't need to flush pending loads if this asm doesn't touch
+ // memory and is nonvolatile.
+ if (hasMemory || IA->hasSideEffects())
+ Chain = getRoot();
+ else
+ Chain = DAG.getRoot();
+
// Second pass over the constraints: compute which constraint option to use
// and assign registers to constraints that want a specific physreg.
- for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
SDISelAsmOperandInfo &OpInfo = ConstraintOperands[i];
// If this is an output operand with a matching input operand, look up the
@@ -5406,7 +5632,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// error.
if (OpInfo.hasMatchingInput()) {
SDISelAsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
-
+
if (OpInfo.ConstraintVT != Input.ConstraintVT) {
if ((OpInfo.ConstraintVT.isInteger() !=
Input.ConstraintVT.isInteger()) ||
@@ -5427,7 +5653,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// need to to provide an address for the memory input.
if (OpInfo.ConstraintType == TargetLowering::C_Memory &&
!OpInfo.isIndirect) {
- assert(OpInfo.Type == InlineAsm::isInput &&
+ assert((OpInfo.isMultipleAlternative || (OpInfo.Type == InlineAsm::isInput)) &&
"Can only indirectify direct input operands!");
// Memory operands really want the address of the value. If we don't have
@@ -5451,7 +5677,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
Chain = DAG.getStore(Chain, getCurDebugLoc(),
- OpInfo.CallOperand, StackSlot, NULL, 0,
+ OpInfo.CallOperand, StackSlot,
+ MachinePointerInfo::getFixedStack(SSFI),
false, false, 0);
OpInfo.CallOperand = StackSlot;
}
@@ -5469,8 +5696,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
GetRegistersForValue(OpInfo, OutputRegs, InputRegs);
}
- ConstraintInfos.clear();
-
// Second pass - Loop over all of the operands, assigning virtual or physregs
// to register class operands.
for (unsigned i = 0, e = ConstraintOperands.size(); i != e; ++i) {
@@ -5495,9 +5720,14 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc");
AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc));
- // Remember the AlignStack bit as operand 3.
- AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0,
- MVT::i1));
+ // Remember the HasSideEffect and AlignStack bits as operand 3.
+ unsigned ExtraInfo = 0;
+ if (IA->hasSideEffects())
+ ExtraInfo |= InlineAsm::Extra_HasSideEffects;
+ if (IA->isAlignStack())
+ ExtraInfo |= InlineAsm::Extra_IsAlignStack;
+ AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo,
+ TLI.getPointerTy()));
// Loop over all of the inputs, copying the operand values into the
// appropriate registers and processing the output regs.
@@ -5588,7 +5818,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
" don't know how to handle tied "
"indirect register inputs");
}
-
+
RegsForValue MatchedRegs;
MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType());
EVT RegVT = AsmNodeOperands[CurOp+1].getValueType();
@@ -5607,7 +5837,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
DAG, AsmNodeOperands);
break;
}
-
+
assert(InlineAsm::isMemKind(OpFlag) && "Unknown matching constraint!");
assert(InlineAsm::getNumOperandRegisters(OpFlag) == 1 &&
"Unexpected number of operands");
@@ -5622,8 +5852,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
}
// Treat indirect 'X' constraint as memory.
- if (OpInfo.ConstraintType == TargetLowering::C_Other &&
- OpInfo.isIndirect)
+ if (OpInfo.ConstraintType == TargetLowering::C_Other &&
+ OpInfo.isIndirect)
OpInfo.ConstraintType = TargetLowering::C_Memory;
if (OpInfo.ConstraintType == TargetLowering::C_Other) {
@@ -5642,7 +5872,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end());
break;
}
-
+
if (OpInfo.ConstraintType == TargetLowering::C_Memory) {
assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!");
assert(InOperandVal.getValueType() == TLI.getPointerTy() &&
@@ -5693,7 +5923,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
if (Flag.getNode()) AsmNodeOperands.push_back(Flag);
Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(),
- DAG.getVTList(MVT::Other, MVT::Flag),
+ DAG.getVTList(MVT::Other, MVT::Glue),
&AsmNodeOperands[0], AsmNodeOperands.size());
Flag = Chain.getValue(1);
@@ -5713,7 +5943,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
// not have the same VT as was expected. Convert it to the right type
// with bit_convert.
if (ResultType != Val.getValueType() && Val.getValueType().isVector()) {
- Val = DAG.getNode(ISD::BIT_CONVERT, getCurDebugLoc(),
+ Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(),
ResultType, Val);
} else if (ResultType != Val.getValueType() &&
@@ -5751,7 +5981,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) {
SDValue Val = DAG.getStore(Chain, getCurDebugLoc(),
StoresToEmit[i].first,
getValue(StoresToEmit[i].second),
- StoresToEmit[i].second, 0,
+ MachinePointerInfo(StoresToEmit[i].second),
false, false, 0);
OutChains.push_back(Val);
}
@@ -5888,7 +6118,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
for (unsigned i = 0; i != NumRegs; ++i) {
ISD::InputArg MyFlags;
- MyFlags.VT = RegisterVT;
+ MyFlags.VT = RegisterVT.getSimpleVT();
MyFlags.Used = isReturnValueUsed;
if (RetSExt)
MyFlags.Flags.setSExt();
@@ -5924,7 +6154,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
DEBUG(for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerCall emitted a null value!");
- assert(Ins[i].VT == InVals[i].getValueType() &&
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerCall emitted a value with the wrong type!");
});
@@ -6085,7 +6315,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert(InVals[i].getNode() &&
"LowerFormalArguments emitted a null value!");
- assert(Ins[i].VT == InVals[i].getValueType() &&
+ assert(EVT(Ins[i].VT) == InVals[i].getValueType() &&
"LowerFormalArguments emitted a value with the wrong type!");
}
});
@@ -6154,7 +6384,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) {
// Note down frame index for byval arguments.
if (I->hasByValAttr() && !ArgValues.empty())
- if (FrameIndexSDNode *FI =
+ if (FrameIndexSDNode *FI =
dyn_cast<FrameIndexSDNode>(ArgValues[0].getNode()))
FuncInfo->setByValArgumentFrameIndex(I, FI->getIndex());
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 5f400e9c83ac..a1a70c394a51 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -258,15 +258,16 @@ private:
struct BitTestBlock {
BitTestBlock(APInt F, APInt R, const Value* SV,
- unsigned Rg, bool E,
+ unsigned Rg, EVT RgVT, bool E,
MachineBasicBlock* P, MachineBasicBlock* D,
const BitTestInfo& C):
- First(F), Range(R), SValue(SV), Reg(Rg), Emitted(E),
+ First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E),
Parent(P), Default(D), Cases(C) { }
APInt First;
APInt Range;
const Value *SValue;
unsigned Reg;
+ EVT RegVT;
bool Emitted;
MachineBasicBlock *Parent;
MachineBasicBlock *Default;
@@ -347,7 +348,7 @@ public:
SDValue getControlRoot();
DebugLoc getCurDebugLoc() const { return CurDebugLoc; }
-
+ void setCurDebugLoc(DebugLoc dl){ CurDebugLoc = dl; }
unsigned getSDNodeOrder() const { return SDNodeOrder; }
void CopyValueToVirtualRegister(const Value *V, unsigned Reg);
@@ -398,6 +399,10 @@ public:
void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall,
MachineBasicBlock *LandingPad = NULL);
+ /// UpdateSplitBlock - When an MBB was split during scheduling, update the
+ /// references that ned to refer to the last resulting block.
+ void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last);
+
private:
// Terminator instructions.
void visitRet(const ReturnInst &I);
@@ -431,7 +436,8 @@ public:
void visitSwitchCase(CaseBlock &CB,
MachineBasicBlock *SwitchBB);
void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB);
- void visitBitTestCase(MachineBasicBlock* NextMBB,
+ void visitBitTestCase(BitTestBlock &BB,
+ MachineBasicBlock* NextMBB,
unsigned Reg,
BitTestCase &B,
MachineBasicBlock *SwitchBB);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index 66cb5ceb09e5..62ebc81ef86e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -43,6 +43,7 @@
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
@@ -53,8 +54,17 @@
using namespace llvm;
STATISTIC(NumFastIselFailures, "Number of instructions fast isel failed on");
+STATISTIC(NumFastIselBlocks, "Number of blocks selected entirely by fast isel");
+STATISTIC(NumDAGBlocks, "Number of blocks selected using DAG");
STATISTIC(NumDAGIselRetries,"Number of times dag isel has to try another path");
+#ifndef NDEBUG
+STATISTIC(NumBBWithOutOfOrderLineInfo,
+ "Number of blocks with out of order line number info");
+STATISTIC(NumMBBWithOutOfOrderLineInfo,
+ "Number of machine blocks with out of order line number info");
+#endif
+
static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
@@ -170,15 +180,18 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// SelectionDAGISel code
//===----------------------------------------------------------------------===//
-SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) :
+SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm,
+ CodeGenOpt::Level OL) :
MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()),
FuncInfo(new FunctionLoweringInfo(TLI)),
CurDAG(new SelectionDAG(tm)),
SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)),
GFI(),
OptLevel(OL),
- DAGSize(0)
-{}
+ DAGSize(0) {
+ initializeGCModuleInfoPass(*PassRegistry::getPassRegistry());
+ initializeAliasAnalysisAnalysisGroup(*PassRegistry::getPassRegistry());
+ }
SelectionDAGISel::~SelectionDAGISel() {
delete SDB;
@@ -202,6 +215,7 @@ void SelectionDAGISel::getAnalysisUsage(AnalysisUsage &AU) const {
static bool FunctionCallsSetJmp(const Function *F) {
const Module *M = F->getParent();
static const char *ReturnsTwiceFns[] = {
+ "_setjmp",
"setjmp",
"sigsetjmp",
"setjmp_syscall",
@@ -227,6 +241,44 @@ static bool FunctionCallsSetJmp(const Function *F) {
#undef NUM_RETURNS_TWICE_FNS
}
+/// SplitCriticalSideEffectEdges - Look for critical edges with a PHI value that
+/// may trap on it. In this case we have to split the edge so that the path
+/// through the predecessor block that doesn't go to the phi block doesn't
+/// execute the possibly trapping instruction.
+///
+/// This is required for correctness, so it must be done at -O0.
+///
+static void SplitCriticalSideEffectEdges(Function &Fn, Pass *SDISel) {
+ // Loop for blocks with phi nodes.
+ for (Function::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) {
+ PHINode *PN = dyn_cast<PHINode>(BB->begin());
+ if (PN == 0) continue;
+
+ ReprocessBlock:
+ // For each block with a PHI node, check to see if any of the input values
+ // are potentially trapping constant expressions. Constant expressions are
+ // the only potentially trapping value that can occur as the argument to a
+ // PHI.
+ for (BasicBlock::iterator I = BB->begin(); (PN = dyn_cast<PHINode>(I)); ++I)
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ ConstantExpr *CE = dyn_cast<ConstantExpr>(PN->getIncomingValue(i));
+ if (CE == 0 || !CE->canTrap()) continue;
+
+ // The only case we have to worry about is when the edge is critical.
+ // Since this block has a PHI Node, we assume it has multiple input
+ // edges: check to see if the pred has multiple successors.
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ if (Pred->getTerminator()->getNumSuccessors() == 1)
+ continue;
+
+ // Okay, we have to split this edge.
+ SplitCriticalEdge(Pred->getTerminator(),
+ GetSuccessorNumber(Pred, BB), SDISel, true);
+ goto ReprocessBlock;
+ }
+ }
+}
+
bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
// Do some sanity-checking on the command-line options.
assert((!EnableFastISelVerbose || EnableFastISel) &&
@@ -245,6 +297,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n");
+ SplitCriticalSideEffectEdges(const_cast<Function&>(Fn), this);
+
CurDAG->init(*MF);
FuncInfo->set(Fn, *MF);
SDB->init(GFI, *AA);
@@ -261,7 +315,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (!FuncInfo->ArgDbgValues.empty())
for (MachineRegisterInfo::livein_iterator LI = RegInfo->livein_begin(),
E = RegInfo->livein_end(); LI != E; ++LI)
- if (LI->second)
+ if (LI->second)
LiveInMap.insert(std::make_pair(LI->first, LI->second));
// Insert DBG_VALUE instructions for function arguments to the entry block.
@@ -282,14 +336,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
if (LDI != LiveInMap.end()) {
MachineInstr *Def = RegInfo->getVRegDef(LDI->second);
MachineBasicBlock::iterator InsertPos = Def;
- const MDNode *Variable =
+ const MDNode *Variable =
MI->getOperand(MI->getNumOperands()-1).getMetadata();
unsigned Offset = MI->getOperand(1).getImm();
// Def is never a terminator here, so it is ok to increment InsertPos.
- BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
+ BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(),
TII.get(TargetOpcode::DBG_VALUE))
.addReg(LDI->second, RegState::Debug)
.addImm(Offset).addMetadata(Variable);
+
+ // If this vreg is directly copied into an exported register then
+ // that COPY instructions also need DBG_VALUE, if it is the only
+ // user of LDI->second.
+ MachineInstr *CopyUseMI = NULL;
+ for (MachineRegisterInfo::use_iterator
+ UI = RegInfo->use_begin(LDI->second);
+ MachineInstr *UseMI = UI.skipInstruction();) {
+ if (UseMI->isDebugValue()) continue;
+ if (UseMI->isCopy() && !CopyUseMI && UseMI->getParent() == EntryMBB) {
+ CopyUseMI = UseMI; continue;
+ }
+ // Otherwise this is another use or second copy use.
+ CopyUseMI = NULL; break;
+ }
+ if (CopyUseMI) {
+ MachineInstr *NewMI =
+ BuildMI(*MF, CopyUseMI->getDebugLoc(),
+ TII.get(TargetOpcode::DBG_VALUE))
+ .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug)
+ .addImm(Offset).addMetadata(Variable);
+ EntryMBB->insertAfter(CopyUseMI, NewMI);
+ }
}
}
@@ -303,10 +380,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
II = MBB->begin(), IE = MBB->end(); II != IE; ++II) {
const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode());
- // Operand 1 of an inline asm instruction indicates whether the asm
- // needs stack or not.
- if ((II->isInlineAsm() && II->getOperand(1).getImm()) ||
- (TID.isCall() && !TID.isReturn())) {
+ if ((TID.isCall() && !TID.isReturn()) ||
+ II->isStackAligningInlineAsm()) {
MFI->setHasCalls(true);
goto done;
}
@@ -362,6 +437,7 @@ SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin,
// Final step, emit the lowered DAG as machine code.
CodeGenAndEmitDAG();
+ return;
}
void SelectionDAGISel::ComputeLiveOutVRegInfo() {
@@ -406,9 +482,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() {
// Only install this information if it tells us something.
if (NumSignBits != 1 || KnownZero != 0 || KnownOne != 0) {
- DestReg -= TargetRegisterInfo::FirstVirtualRegister;
- if (DestReg >= FuncInfo->LiveOutRegInfo.size())
- FuncInfo->LiveOutRegInfo.resize(DestReg+1);
+ FuncInfo->LiveOutRegInfo.grow(DestReg);
FunctionLoweringInfo::LiveOutInfo &LOI =
FuncInfo->LiveOutRegInfo[DestReg];
LOI.NumSignBits = NumSignBits;
@@ -541,13 +615,19 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Emit machine code to BB. This can change 'BB' to the last block being
// inserted into.
+ MachineBasicBlock *FirstMBB = FuncInfo->MBB, *LastMBB;
{
NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled);
- FuncInfo->MBB = Scheduler->EmitSchedule();
+ LastMBB = FuncInfo->MBB = Scheduler->EmitSchedule();
FuncInfo->InsertPt = Scheduler->InsertPos;
}
+ // If the block was split, make sure we update any references that are used to
+ // update PHI nodes later on.
+ if (FirstMBB != LastMBB)
+ SDB->UpdateSplitBlock(FirstMBB, LastMBB);
+
// Free the scheduler state.
{
NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName,
@@ -563,19 +643,19 @@ void SelectionDAGISel::DoInstructionSelection() {
DEBUG(errs() << "===== Instruction selection begins:\n");
PreprocessISelDAG();
-
+
// Select target instructions for the DAG.
{
// Number all nodes with a topological order and set DAGSize.
DAGSize = CurDAG->AssignTopologicalOrder();
-
+
// Create a dummy node (which is not added to allnodes), that adds
// a reference to the root node, preventing it from being deleted,
// and tracking any changes of the root.
HandleSDNode Dummy(CurDAG->getRoot());
ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
++ISelPosition;
-
+
// The AllNodes list is now topological-sorted. Visit the
// nodes by starting at the end of the list (the root of the
// graph) and preceding back toward the beginning (the entry
@@ -587,19 +667,19 @@ void SelectionDAGISel::DoInstructionSelection() {
// makes it theoretically possible to disable the DAGCombiner.
if (Node->use_empty())
continue;
-
+
SDNode *ResNode = Select(Node);
-
+
// FIXME: This is pretty gross. 'Select' should be changed to not return
// anything at all and this code should be nuked with a tactical strike.
-
+
// If node should not be replaced, continue with the next one.
if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
continue;
// Replace node.
if (ResNode)
ReplaceUses(Node, ResNode);
-
+
// If after the replacement this node is not used any more,
// remove this dead node.
if (Node->use_empty()) { // Don't delete EntryToken, etc.
@@ -607,9 +687,9 @@ void SelectionDAGISel::DoInstructionSelection() {
CurDAG->RemoveDeadNode(Node, &ISU);
}
}
-
+
CurDAG->setRoot(Dummy.getValue());
- }
+ }
DEBUG(errs() << "===== Instruction selection ends:\n");
@@ -661,6 +741,90 @@ void SelectionDAGISel::PrepareEHLandingPad() {
}
}
+
+
+
+bool SelectionDAGISel::TryToFoldFastISelLoad(const LoadInst *LI,
+ FastISel *FastIS) {
+ // Don't try to fold volatile loads. Target has to deal with alignment
+ // constraints.
+ if (LI->isVolatile()) return false;
+
+ // Figure out which vreg this is going into.
+ unsigned LoadReg = FastIS->getRegForValue(LI);
+ assert(LoadReg && "Load isn't already assigned a vreg? ");
+
+ // Check to see what the uses of this vreg are. If it has no uses, or more
+ // than one use (at the machine instr level) then we can't fold it.
+ MachineRegisterInfo::reg_iterator RI = RegInfo->reg_begin(LoadReg);
+ if (RI == RegInfo->reg_end())
+ return false;
+
+ // See if there is exactly one use of the vreg. If there are multiple uses,
+ // then the instruction got lowered to multiple machine instructions or the
+ // use of the loaded value ended up being multiple operands of the result, in
+ // either case, we can't fold this.
+ MachineRegisterInfo::reg_iterator PostRI = RI; ++PostRI;
+ if (PostRI != RegInfo->reg_end())
+ return false;
+
+ assert(RI.getOperand().isUse() &&
+ "The only use of the vreg must be a use, we haven't emitted the def!");
+
+ MachineInstr *User = &*RI;
+
+ // Set the insertion point properly. Folding the load can cause generation of
+ // other random instructions (like sign extends) for addressing modes, make
+ // sure they get inserted in a logical place before the new instruction.
+ FuncInfo->InsertPt = User;
+ FuncInfo->MBB = User->getParent();
+
+ // Ask the target to try folding the load.
+ return FastIS->TryToFoldLoad(User, RI.getOperandNo(), LI);
+}
+
+#ifndef NDEBUG
+/// CheckLineNumbers - Check if basic block instructions follow source order
+/// or not.
+static void CheckLineNumbers(const BasicBlock *BB) {
+ unsigned Line = 0;
+ unsigned Col = 0;
+ for (BasicBlock::const_iterator BI = BB->begin(),
+ BE = BB->end(); BI != BE; ++BI) {
+ const DebugLoc DL = BI->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ unsigned L = DL.getLine();
+ unsigned C = DL.getCol();
+ if (L < Line || (L == Line && C < Col)) {
+ ++NumBBWithOutOfOrderLineInfo;
+ return;
+ }
+ Line = L;
+ Col = C;
+ }
+}
+
+/// CheckLineNumbers - Check if machine basic block instructions follow source
+/// order or not.
+static void CheckLineNumbers(const MachineBasicBlock *MBB) {
+ unsigned Line = 0;
+ unsigned Col = 0;
+ for (MachineBasicBlock::const_iterator MBI = MBB->begin(),
+ MBE = MBB->end(); MBI != MBE; ++MBI) {
+ const DebugLoc DL = MBI->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ unsigned L = DL.getLine();
+ unsigned C = DL.getCol();
+ if (L < Line || (L == Line && C < Col)) {
+ ++NumMBBWithOutOfOrderLineInfo;
+ return;
+ }
+ Line = L;
+ Col = C;
+ }
+}
+#endif
+
void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Initialize the Fast-ISel state, if needed.
FastISel *FastIS = 0;
@@ -670,6 +834,9 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Iterate over all basic blocks in the function.
for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
const BasicBlock *LLVMBB = &*I;
+#ifndef NDEBUG
+ CheckLineNumbers(LLVMBB);
+#endif
FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB];
FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI();
@@ -682,10 +849,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
// Setup an EH landing-pad block.
if (FuncInfo->MBB->isLandingPad())
PrepareEHLandingPad();
-
+
// Lower any arguments needed in this block if this is the entry block.
- if (LLVMBB == &Fn.getEntryBlock())
+ if (LLVMBB == &Fn.getEntryBlock()) {
+ for (BasicBlock::const_iterator DBI = LLVMBB->begin(), DBE = LLVMBB->end();
+ DBI != DBE; ++DBI) {
+ if (const DbgInfoIntrinsic *DI = dyn_cast<DbgInfoIntrinsic>(DBI)) {
+ const DebugLoc DL = DI->getDebugLoc();
+ SDB->setCurDebugLoc(DL);
+ break;
+ }
+ }
LowerArguments(LLVMBB);
+ }
// Before doing SelectionDAG ISel, see if FastISel has been requested.
if (FastIS) {
@@ -723,8 +899,19 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
// Try to select the instruction with FastISel.
- if (FastIS->SelectInstruction(Inst))
+ if (FastIS->SelectInstruction(Inst)) {
+ // If fast isel succeeded, check to see if there is a single-use
+ // non-volatile load right before the selected instruction, and see if
+ // the load is used by the instruction. If so, try to fold it.
+ const Instruction *BeforeInst = 0;
+ if (Inst != Begin)
+ BeforeInst = llvm::prior(llvm::prior(BI));
+ if (BeforeInst && isa<LoadInst>(BeforeInst) &&
+ BeforeInst->hasOneUse() && *BeforeInst->use_begin() == Inst &&
+ TryToFoldFastISelLoad(cast<LoadInst>(BeforeInst), FastIS))
+ --BI; // If we succeeded, don't re-select the load.
continue;
+ }
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(Inst)) {
@@ -771,6 +958,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
FastIS->recomputeInsertPt();
}
+ if (Begin != BI)
+ ++NumDAGBlocks;
+ else
+ ++NumFastIselBlocks;
+
// Run SelectionDAG instruction selection on the remainder of the block
// not handled by FastISel. If FastISel is not run, this is the entire
// block.
@@ -782,6 +974,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) {
}
delete FastIS;
+#ifndef NDEBUG
+ for (MachineFunction::const_iterator MBI = MF->begin(), MBE = MF->end();
+ MBI != MBE; ++MBI)
+ CheckLineNumbers(MBI);
+#endif
}
void
@@ -831,12 +1028,14 @@ SelectionDAGISel::FinishBasicBlock() {
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Emit the code
if (j+1 != ej)
- SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB,
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Cases[j+1].ThisBB,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
else
- SDB->visitBitTestCase(SDB->BitTestCases[i].Default,
+ SDB->visitBitTestCase(SDB->BitTestCases[i],
+ SDB->BitTestCases[i].Default,
SDB->BitTestCases[i].Reg,
SDB->BitTestCases[i].Cases[j],
FuncInfo->MBB);
@@ -951,7 +1150,7 @@ SelectionDAGISel::FinishBasicBlock() {
// additional DAGs necessary.
for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) {
// Set the current basic block to the mbb we wish to insert the code into
- MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
+ FuncInfo->MBB = SDB->SwitchCases[i].ThisBB;
FuncInfo->InsertPt = FuncInfo->MBB->end();
// Determine the unique successors.
@@ -960,13 +1159,15 @@ SelectionDAGISel::FinishBasicBlock() {
if (SDB->SwitchCases[i].TrueBB != SDB->SwitchCases[i].FalseBB)
Succs.push_back(SDB->SwitchCases[i].FalseBB);
- // Emit the code. Note that this could result in ThisBB being split, so
- // we need to check for updates.
+ // Emit the code. Note that this could result in FuncInfo->MBB being split.
SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB);
CurDAG->setRoot(SDB->getRoot());
SDB->clear();
CodeGenAndEmitDAG();
- ThisBB = FuncInfo->MBB;
+
+ // Remember the last block, now that any splitting is done, for use in
+ // populating PHI nodes in successors.
+ MachineBasicBlock *ThisBB = FuncInfo->MBB;
// Handle any PHI nodes in successors of this chunk, as if we were coming
// from the original BB before switch expansion. Note that PHI nodes can
@@ -1016,10 +1217,6 @@ ScheduleDAGSDNodes *SelectionDAGISel::CreateScheduler() {
return Ctor(this, OptLevel);
}
-ScheduleHazardRecognizer *SelectionDAGISel::CreateTargetHazardRecognizer() {
- return new ScheduleHazardRecognizer();
-}
-
//===----------------------------------------------------------------------===//
// Helper functions used by the generated instruction selector.
//===----------------------------------------------------------------------===//
@@ -1099,11 +1296,11 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0
Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1
Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc
- Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3
+ Ops.push_back(InOps[InlineAsm::Op_ExtraInfo]); // 3 (SideEffect, AlignStack)
unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size();
- if (InOps[e-1].getValueType() == MVT::Flag)
- --e; // Don't process a flag operand if it is here.
+ if (InOps[e-1].getValueType() == MVT::Glue)
+ --e; // Don't process a glue operand if it is here.
while (i != e) {
unsigned Flags = cast<ConstantSDNode>(InOps[i])->getZExtValue();
@@ -1130,15 +1327,15 @@ SelectInlineAsmMemoryOperands(std::vector<SDValue> &Ops) {
}
}
- // Add the flag input back if present.
+ // Add the glue input back if present.
if (e != InOps.size())
Ops.push_back(InOps.back());
}
-/// findFlagUse - Return use of EVT::Flag value produced by the specified
+/// findGlueUse - Return use of MVT::Glue value produced by the specified
/// SDNode.
///
-static SDNode *findFlagUse(SDNode *N) {
+static SDNode *findGlueUse(SDNode *N) {
unsigned FlagResNo = N->getNumValues()-1;
for (SDNode::use_iterator I = N->use_begin(), E = N->use_end(); I != E; ++I) {
SDUse &Use = I.getUse();
@@ -1160,11 +1357,11 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// never find it.
//
// The Use may be -1 (unassigned) if it is a newly allocated node. This can
- // happen because we scan down to newly selected nodes in the case of flag
+ // happen because we scan down to newly selected nodes in the case of glue
// uses.
if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
return false;
-
+
// Don't revisit nodes if we already scanned it and didn't fail, we know we
// won't fail if we scan it again.
if (!Visited.insert(Use))
@@ -1174,7 +1371,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
// Ignore chain uses, they are validated by HandleMergeInputChains.
if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
continue;
-
+
SDNode *N = Use->getOperand(i).getNode();
if (N == Def) {
if (Use == ImmedUse || Use == Root)
@@ -1221,8 +1418,8 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
//
// * indicates nodes to be folded together.
//
- // If Root produces a flag, then it gets (even more) interesting. Since it
- // will be "glued" together with its flag use in the scheduler, we need to
+ // If Root produces glue, then it gets (even more) interesting. Since it
+ // will be "glued" together with its glue use in the scheduler, we need to
// check if it might reach N.
//
// [N*] //
@@ -1240,30 +1437,30 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
// ^ / //
// f / //
// | / //
- // [FU] //
+ // [GU] //
//
- // If FU (flag use) indirectly reaches N (the load), and Root folds N
- // (call it Fold), then X is a predecessor of FU and a successor of
- // Fold. But since Fold and FU are flagged together, this will create
+ // If GU (glue use) indirectly reaches N (the load), and Root folds N
+ // (call it Fold), then X is a predecessor of GU and a successor of
+ // Fold. But since Fold and GU are glued together, this will create
// a cycle in the scheduling graph.
- // If the node has flags, walk down the graph to the "lowest" node in the
- // flagged set.
+ // If the node has glue, walk down the graph to the "lowest" node in the
+ // glueged set.
EVT VT = Root->getValueType(Root->getNumValues()-1);
- while (VT == MVT::Flag) {
- SDNode *FU = findFlagUse(Root);
- if (FU == NULL)
+ while (VT == MVT::Glue) {
+ SDNode *GU = findGlueUse(Root);
+ if (GU == NULL)
break;
- Root = FU;
+ Root = GU;
VT = Root->getValueType(Root->getNumValues()-1);
-
- // If our query node has a flag result with a use, we've walked up it. If
+
+ // If our query node has a glue result with a use, we've walked up it. If
// the user (which has already been selected) has a chain or indirectly uses
// the chain, our WalkChainUsers predicate will not consider it. Because of
// this, we cannot ignore chains in this predicate.
IgnoreChains = false;
}
-
+
SmallPtrSet<SDNode*, 16> Visited;
return !findNonImmUse(Root, N.getNode(), U, Root, Visited, IgnoreChains);
@@ -1272,10 +1469,10 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
std::vector<SDValue> Ops(N->op_begin(), N->op_end());
SelectInlineAsmMemoryOperands(Ops);
-
+
std::vector<EVT> VTs;
VTs.push_back(MVT::Other);
- VTs.push_back(MVT::Flag);
+ VTs.push_back(MVT::Glue);
SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
VTs, &Ops[0], Ops.size());
New->setNodeId(-1);
@@ -1287,11 +1484,11 @@ SDNode *SelectionDAGISel::Select_UNDEF(SDNode *N) {
}
/// GetVBR - decode a vbr encoding whose top bit is set.
-ALWAYS_INLINE static uint64_t
+LLVM_ATTRIBUTE_ALWAYS_INLINE static uint64_t
GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
assert(Val >= 128 && "Not a VBR");
Val &= 127; // Remove first vbr bit.
-
+
unsigned Shift = 7;
uint64_t NextBits;
do {
@@ -1299,25 +1496,25 @@ GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
Val |= (NextBits&127) << Shift;
Shift += 7;
} while (NextBits & 128);
-
+
return Val;
}
-/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
-/// interior flag and chain results to use the new flag and chain results.
+/// UpdateChainsAndGlue - When a match is complete, this method updates uses of
+/// interior glue and chain results to use the new glue and chain results.
void SelectionDAGISel::
-UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
- const SmallVectorImpl<SDNode*> &ChainNodesMatched,
- SDValue InputFlag,
- const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
- bool isMorphNodeTo) {
+UpdateChainsAndGlue(SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SDValue InputGlue,
+ const SmallVectorImpl<SDNode*> &GlueResultNodesMatched,
+ bool isMorphNodeTo) {
SmallVector<SDNode*, 4> NowDeadNodes;
-
+
ISelUpdater ISU(ISelPosition);
// Now that all the normal results are replaced, we replace the chain and
- // flag results if present.
+ // glue results if present.
if (!ChainNodesMatched.empty()) {
assert(InputChain.getNode() != 0 &&
"Matched input chains but didn't produce a chain");
@@ -1325,55 +1522,55 @@ UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
// Replace all the chain results with the final chain we ended up with.
for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
SDNode *ChainNode = ChainNodesMatched[i];
-
+
// If this node was already deleted, don't look at it.
if (ChainNode->getOpcode() == ISD::DELETED_NODE)
continue;
-
+
// Don't replace the results of the root node if we're doing a
// MorphNodeTo.
if (ChainNode == NodeToMatch && isMorphNodeTo)
continue;
-
+
SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
- if (ChainVal.getValueType() == MVT::Flag)
+ if (ChainVal.getValueType() == MVT::Glue)
ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
-
+
// If the node became dead and we haven't already seen it, delete it.
if (ChainNode->use_empty() &&
!std::count(NowDeadNodes.begin(), NowDeadNodes.end(), ChainNode))
NowDeadNodes.push_back(ChainNode);
}
}
-
- // If the result produces a flag, update any flag results in the matched
- // pattern with the flag result.
- if (InputFlag.getNode() != 0) {
+
+ // If the result produces glue, update any glue results in the matched
+ // pattern with the glue result.
+ if (InputGlue.getNode() != 0) {
// Handle any interior nodes explicitly marked.
- for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
- SDNode *FRN = FlagResultNodesMatched[i];
-
+ for (unsigned i = 0, e = GlueResultNodesMatched.size(); i != e; ++i) {
+ SDNode *FRN = GlueResultNodesMatched[i];
+
// If this node was already deleted, don't look at it.
if (FRN->getOpcode() == ISD::DELETED_NODE)
continue;
-
- assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
- "Doesn't have a flag result");
+
+ assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Glue &&
+ "Doesn't have a glue result");
CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
- InputFlag, &ISU);
-
+ InputGlue, &ISU);
+
// If the node became dead and we haven't already seen it, delete it.
if (FRN->use_empty() &&
!std::count(NowDeadNodes.begin(), NowDeadNodes.end(), FRN))
NowDeadNodes.push_back(FRN);
}
}
-
+
if (!NowDeadNodes.empty())
CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
-
+
DEBUG(errs() << "ISEL: Match complete!\n");
}
@@ -1392,17 +1589,17 @@ enum ChainResult {
///
/// The walk we do here is guaranteed to be small because we quickly get down to
/// already selected nodes "below" us.
-static ChainResult
+static ChainResult
WalkChainUsers(SDNode *ChainedNode,
SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
ChainResult Result = CR_Simple;
-
+
for (SDNode::use_iterator UI = ChainedNode->use_begin(),
E = ChainedNode->use_end(); UI != E; ++UI) {
// Make sure the use is of the chain, not some other value we produce.
if (UI.getUse().getValueType() != MVT::Other) continue;
-
+
SDNode *User = *UI;
// If we see an already-selected machine node, then we've gone beyond the
@@ -1411,7 +1608,7 @@ WalkChainUsers(SDNode *ChainedNode,
if (User->isMachineOpcode() ||
User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
continue;
-
+
if (User->getOpcode() == ISD::CopyToReg ||
User->getOpcode() == ISD::CopyFromReg ||
User->getOpcode() == ISD::INLINEASM ||
@@ -1437,7 +1634,7 @@ WalkChainUsers(SDNode *ChainedNode,
if (!std::count(ChainedNodesInPattern.begin(),
ChainedNodesInPattern.end(), User))
return CR_InducesCycle;
-
+
// Otherwise we found a node that is part of our pattern. For example in:
// x = load ptr
// y = x+4
@@ -1449,7 +1646,7 @@ WalkChainUsers(SDNode *ChainedNode,
InteriorChainedNodes.push_back(User);
continue;
}
-
+
// If we found a TokenFactor, there are two cases to consider: first if the
// TokenFactor is just hanging "below" the pattern we're matching (i.e. no
// uses of the TF are in our pattern) we just want to ignore it. Second,
@@ -1486,7 +1683,7 @@ WalkChainUsers(SDNode *ChainedNode,
case CR_LeadsToInteriorNode:
break; // Otherwise, keep processing.
}
-
+
// Okay, we know we're in the interesting interior case. The TokenFactor
// is now going to be considered part of the pattern so that we rewrite its
// uses (it may have uses that are not part of the pattern) with the
@@ -1497,7 +1694,7 @@ WalkChainUsers(SDNode *ChainedNode,
InteriorChainedNodes.push_back(User);
continue;
}
-
+
return Result;
}
@@ -1519,7 +1716,7 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
InteriorChainedNodes) == CR_InducesCycle)
return SDValue(); // Would induce a cycle.
}
-
+
// Okay, we have walked all the matched nodes and collected TokenFactor nodes
// that we are interested in. Form our input TokenFactor node.
SmallVector<SDValue, 3> InputChains;
@@ -1530,14 +1727,14 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
if (N->getOpcode() != ISD::TokenFactor) {
if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
continue;
-
+
// Otherwise, add the input chain.
SDValue InChain = ChainNodesMatched[i]->getOperand(0);
assert(InChain.getValueType() == MVT::Other && "Not a chain");
InputChains.push_back(InChain);
continue;
}
-
+
// If we have a token factor, we want to add all inputs of the token factor
// that are not part of the pattern we're matching.
for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
@@ -1546,13 +1743,13 @@ HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
InputChains.push_back(N->getOperand(op));
}
}
-
+
SDValue Res;
if (InputChains.size() == 1)
return InputChains[0];
return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
MVT::Other, &InputChains[0], InputChains.size());
-}
+}
/// MorphNode - Handle morphing a node in place for the selector.
SDNode *SelectionDAGISel::
@@ -1560,15 +1757,15 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
// It is possible we're using MorphNodeTo to replace a node with no
// normal results with one that has a normal result (or we could be
- // adding a chain) and the input could have flags and chains as well.
+ // adding a chain) and the input could have glue and chains as well.
// In this case we need to shift the operands down.
// FIXME: This is a horrible hack and broken in obscure cases, no worse
// than the old isel though.
- int OldFlagResultNo = -1, OldChainResultNo = -1;
+ int OldGlueResultNo = -1, OldChainResultNo = -1;
unsigned NTMNumResults = Node->getNumValues();
- if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
- OldFlagResultNo = NTMNumResults-1;
+ if (Node->getValueType(NTMNumResults-1) == MVT::Glue) {
+ OldGlueResultNo = NTMNumResults-1;
if (NTMNumResults != 1 &&
Node->getValueType(NTMNumResults-2) == MVT::Other)
OldChainResultNo = NTMNumResults-2;
@@ -1589,54 +1786,55 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
}
unsigned ResNumResults = Res->getNumValues();
- // Move the flag if needed.
- if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
- (unsigned)OldFlagResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo),
+ // Move the glue if needed.
+ if ((EmitNodeInfo & OPFL_GlueOutput) && OldGlueResultNo != -1 &&
+ (unsigned)OldGlueResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldGlueResultNo),
SDValue(Res, ResNumResults-1));
- if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+ if ((EmitNodeInfo & OPFL_GlueOutput) != 0)
--ResNumResults;
// Move the chain reference if needed.
if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
(unsigned)OldChainResultNo != ResNumResults-1)
- CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
SDValue(Res, ResNumResults-1));
// Otherwise, no replacement happened because the node already exists. Replace
// Uses of the old node with the new one.
if (Res != Node)
CurDAG->ReplaceAllUsesWith(Node, Res);
-
+
return Res;
}
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
- SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+ SDValue N,
+ const SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
// Accept if it is exactly the same as a previously recorded node.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- return N == RecordedNodes[RecNo];
+ return N == RecordedNodes[RecNo].first;
}
-
+
/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SelectionDAGISel &SDISel) {
return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
}
/// CheckNodePredicate - Implements OP_CheckNodePredicate.
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SelectionDAGISel &SDISel, SDNode *N) {
return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDNode *N) {
uint16_t Opc = MatcherTable[MatcherIndex++];
@@ -1644,17 +1842,17 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
return N->getOpcode() == Opc;
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (N.getValueType() == VT) return true;
-
+
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI,
unsigned ChildNo) {
@@ -1664,57 +1862,57 @@ CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
return cast<CondCodeSDNode>(N)->get() ==
(ISD::CondCode)MatcherTable[MatcherIndex++];
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, const TargetLowering &TLI) {
MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (cast<VTSDNode>(N)->getVT() == VT)
return true;
-
+
// Handle the case when VT is iPTR.
return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
return C != 0 && C->getSExtValue() == Val;
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
if (N->getOpcode() != ISD::AND) return false;
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
}
-ALWAYS_INLINE static bool
+LLVM_ATTRIBUTE_ALWAYS_INLINE static bool
CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
SDValue N, SelectionDAGISel &SDISel) {
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
-
+
if (N->getOpcode() != ISD::OR) return false;
-
+
ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
}
@@ -1724,11 +1922,11 @@ CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
/// fail, set Result=true and return anything. If the current predicate is
/// known to pass, set Result=false and return the MatcherIndex to continue
/// with. If the current predicate is unknown, set Result=false and return the
-/// MatcherIndex to continue with.
+/// MatcherIndex to continue with.
static unsigned IsPredicateKnownToFail(const unsigned char *Table,
unsigned Index, SDValue N,
bool &Result, SelectionDAGISel &SDISel,
- SmallVectorImpl<SDValue> &RecordedNodes){
+ SmallVectorImpl<std::pair<SDValue, SDNode*> > &RecordedNodes) {
switch (Table[Index++]) {
default:
Result = false;
@@ -1782,21 +1980,21 @@ namespace {
struct MatchScope {
/// FailIndex - If this match fails, this is the index to continue with.
unsigned FailIndex;
-
+
/// NodeStack - The node stack when the scope was formed.
SmallVector<SDValue, 4> NodeStack;
-
+
/// NumRecordedNodes - The number of recorded nodes when the scope was formed.
unsigned NumRecordedNodes;
-
+
/// NumMatchedMemRefs - The number of matched memref entries.
unsigned NumMatchedMemRefs;
-
- /// InputChain/InputFlag - The current chain/flag
- SDValue InputChain, InputFlag;
+
+ /// InputChain/InputGlue - The current chain/glue
+ SDValue InputChain, InputGlue;
/// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
- bool HasChainNodesMatched, HasFlagResultNodesMatched;
+ bool HasChainNodesMatched, HasGlueResultNodesMatched;
};
}
@@ -1838,7 +2036,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
}
-
+
assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
// Set up the node stack with NodeToMatch as the only node on the stack.
@@ -1849,37 +2047,38 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// MatchScopes - Scopes used when matching, if a match failure happens, this
// indicates where to continue checking.
SmallVector<MatchScope, 8> MatchScopes;
-
+
// RecordedNodes - This is the set of nodes that have been recorded by the
- // state machine.
- SmallVector<SDValue, 8> RecordedNodes;
-
+ // state machine. The second value is the parent of the node, or null if the
+ // root is recorded.
+ SmallVector<std::pair<SDValue, SDNode*>, 8> RecordedNodes;
+
// MatchedMemRefs - This is the set of MemRef's we've seen in the input
// pattern.
SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
-
- // These are the current input chain and flag for use when generating nodes.
+
+ // These are the current input chain and glue for use when generating nodes.
// Various Emit operations change these. For example, emitting a copytoreg
// uses and updates these.
- SDValue InputChain, InputFlag;
-
+ SDValue InputChain, InputGlue;
+
// ChainNodesMatched - If a pattern matches nodes that have input/output
// chains, the OPC_EmitMergeInputChains operation is emitted which indicates
// which ones they are. The result is captured into this list so that we can
// update the chain results when the pattern is complete.
SmallVector<SDNode*, 3> ChainNodesMatched;
- SmallVector<SDNode*, 3> FlagResultNodesMatched;
-
+ SmallVector<SDNode*, 3> GlueResultNodesMatched;
+
DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
NodeToMatch->dump(CurDAG);
errs() << '\n');
-
+
// Determine where to start the interpreter. Normally we start at opcode #0,
// but if the state machine starts with an OPC_SwitchOpcode, then we
// accelerate the first lookup (which is guaranteed to be hot) with the
// OpcodeOffset table.
unsigned MatcherIndex = 0;
-
+
if (!OpcodeOffset.empty()) {
// Already computed the OpcodeOffset table, just index into it.
if (N.getOpcode() < OpcodeOffset.size())
@@ -1911,7 +2110,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (N.getOpcode() < OpcodeOffset.size())
MatcherIndex = OpcodeOffset[N.getOpcode()];
}
-
+
while (1) {
assert(MatcherIndex < TableSize && "Invalid index");
#ifndef NDEBUG
@@ -1926,7 +2125,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// determine immediately that the first check (or first several) will
// immediately fail, don't even bother pushing a scope for them.
unsigned FailIndex;
-
+
while (1) {
unsigned NumToSkip = MatcherTable[MatcherIndex++];
if (NumToSkip & 128)
@@ -1936,12 +2135,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
FailIndex = 0;
break;
}
-
+
FailIndex = MatcherIndex+NumToSkip;
-
+
unsigned MatcherIndexOfPredicate = MatcherIndex;
(void)MatcherIndexOfPredicate; // silence warning.
-
+
// If we can't evaluate this predicate without pushing a scope (e.g. if
// it is a 'MoveParent') or if the predicate succeeds on this node, we
// push the scope and evaluate the full predicate chain.
@@ -1950,20 +2149,20 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
Result, *this, RecordedNodes);
if (!Result)
break;
-
+
DEBUG(errs() << " Skipped scope entry (due to false predicate) at "
<< "index " << MatcherIndexOfPredicate
<< ", continuing at " << FailIndex << "\n");
++NumDAGIselRetries;
-
+
// Otherwise, we know that this case of the Scope is guaranteed to fail,
// move to the next case.
MatcherIndex = FailIndex;
}
-
+
// If the whole scope failed to match, bail.
if (FailIndex == 0) break;
-
+
// Push a MatchScope which indicates where to go if the first child fails
// to match.
MatchScope NewEntry;
@@ -1972,17 +2171,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NewEntry.NumRecordedNodes = RecordedNodes.size();
NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
NewEntry.InputChain = InputChain;
- NewEntry.InputFlag = InputFlag;
+ NewEntry.InputGlue = InputGlue;
NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
- NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+ NewEntry.HasGlueResultNodesMatched = !GlueResultNodesMatched.empty();
MatchScopes.push_back(NewEntry);
continue;
}
- case OPC_RecordNode:
+ case OPC_RecordNode: {
// Remember this node, it may end up being an operand in the pattern.
- RecordedNodes.push_back(N);
+ SDNode *Parent = 0;
+ if (NodeStack.size() > 1)
+ Parent = NodeStack[NodeStack.size()-2].getNode();
+ RecordedNodes.push_back(std::make_pair(N, Parent));
continue;
-
+ }
+
case OPC_RecordChild0: case OPC_RecordChild1:
case OPC_RecordChild2: case OPC_RecordChild3:
case OPC_RecordChild4: case OPC_RecordChild5:
@@ -1991,20 +2194,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (ChildNo >= N.getNumOperands())
break; // Match fails if out of range child #.
- RecordedNodes.push_back(N->getOperand(ChildNo));
+ RecordedNodes.push_back(std::make_pair(N->getOperand(ChildNo),
+ N.getNode()));
continue;
}
case OPC_RecordMemRef:
MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
continue;
-
- case OPC_CaptureFlagInput:
- // If the current node has an input flag, capture it in InputFlag.
+
+ case OPC_CaptureGlueInput:
+ // If the current node has an input glue, capture it in InputGlue.
if (N->getNumOperands() != 0 &&
- N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
- InputFlag = N->getOperand(N->getNumOperands()-1);
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Glue)
+ InputGlue = N->getOperand(N->getNumOperands()-1);
continue;
-
+
case OPC_MoveChild: {
unsigned ChildNo = MatcherTable[MatcherIndex++];
if (ChildNo >= N.getNumOperands())
@@ -2013,14 +2217,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NodeStack.push_back(N);
continue;
}
-
+
case OPC_MoveParent:
// Pop the current node off the NodeStack.
NodeStack.pop_back();
assert(!NodeStack.empty() && "Node stack imbalance!");
- N = NodeStack.back();
+ N = NodeStack.back();
continue;
-
+
case OPC_CheckSame:
if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
continue;
@@ -2036,7 +2240,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned CPNum = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckComplexPat");
- if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo], CPNum,
+ if (!CheckComplexPattern(NodeToMatch, RecordedNodes[RecNo].second,
+ RecordedNodes[RecNo].first, CPNum,
RecordedNodes))
break;
continue;
@@ -2044,11 +2249,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckOpcode:
if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
continue;
-
+
case OPC_CheckType:
if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
continue;
-
+
case OPC_SwitchOpcode: {
unsigned CurNodeOpcode = N.getOpcode();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
@@ -2066,22 +2271,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// If the opcode matches, then we will execute this case.
if (CurNodeOpcode == Opc)
break;
-
+
// Otherwise, skip over this case.
MatcherIndex += CaseSize;
}
-
+
// If no cases matched, bail out.
if (CaseSize == 0) break;
-
+
// Otherwise, execute the case we found.
DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
<< " to " << MatcherIndex << "\n");
continue;
}
-
+
case OPC_SwitchType: {
- MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+ MVT CurNodeVT = N.getValueType().getSimpleVT();
unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
unsigned CaseSize;
while (1) {
@@ -2090,23 +2295,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (CaseSize & 128)
CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
if (CaseSize == 0) break;
-
- MVT::SimpleValueType CaseVT =
- (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+
+ MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
if (CaseVT == MVT::iPTR)
- CaseVT = TLI.getPointerTy().SimpleTy;
-
+ CaseVT = TLI.getPointerTy();
+
// If the VT matches, then we will execute this case.
if (CurNodeVT == CaseVT)
break;
-
+
// Otherwise, skip over this case.
MatcherIndex += CaseSize;
}
-
+
// If no cases matched, bail out.
if (CaseSize == 0) break;
-
+
// Otherwise, execute the case we found.
DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
<< "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
@@ -2135,7 +2339,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
case OPC_CheckOrImm:
if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
continue;
-
+
case OPC_CheckFoldableChainNode: {
assert(NodeStack.size() != 1 && "No parent node");
// Verify that all intermediate nodes between the root and this one have
@@ -2156,7 +2360,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
NodeToMatch, OptLevel,
true/*We validate our own chains*/))
break;
-
+
continue;
}
case OPC_EmitInteger: {
@@ -2165,22 +2369,24 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
int64_t Val = MatcherTable[MatcherIndex++];
if (Val & 128)
Val = GetVBR(Val, MatcherTable, MatcherIndex);
- RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getTargetConstant(Val, VT), (SDNode*)0));
continue;
}
case OPC_EmitRegister: {
MVT::SimpleValueType VT =
(MVT::SimpleValueType)MatcherTable[MatcherIndex++];
unsigned RegNo = MatcherTable[MatcherIndex++];
- RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+ RecordedNodes.push_back(std::pair<SDValue, SDNode*>(
+ CurDAG->getRegister(RegNo, VT), (SDNode*)0));
continue;
}
-
+
case OPC_EmitConvertToTarget: {
// Convert from IMM/FPIMM to target version.
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- SDValue Imm = RecordedNodes[RecNo];
+ SDValue Imm = RecordedNodes[RecNo].first;
if (Imm->getOpcode() == ISD::Constant) {
int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
@@ -2189,11 +2395,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
}
-
- RecordedNodes.push_back(Imm);
+
+ RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second));
continue;
}
-
+
case OPC_EmitMergeInputChains1_0: // OPC_EmitMergeInputChains, 1, 0
case OPC_EmitMergeInputChains1_1: { // OPC_EmitMergeInputChains, 1, 1
// These are space-optimized forms of OPC_EmitMergeInputChains.
@@ -2201,28 +2407,28 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
"EmitMergeInputChains should be the first chain producing node");
assert(ChainNodesMatched.empty() &&
"Should only have one EmitMergeInputChains per match");
-
+
// Read all of the chained nodes.
unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1;
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
- !RecordedNodes[RecNo].hasOneUse()) {
+ !RecordedNodes[RecNo].first.hasOneUse()) {
ChainNodesMatched.clear();
break;
}
-
+
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-
+
if (InputChain.getNode() == 0)
break; // Failed to merge.
continue;
}
-
+
case OPC_EmitMergeInputChains: {
assert(InputChain.getNode() == 0 &&
"EmitMergeInputChains should be the first chain producing node");
@@ -2242,54 +2448,55 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
for (unsigned i = 0; i != NumChains; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
-
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
+
// FIXME: What if other value results of the node have uses not matched
// by this pattern?
if (ChainNodesMatched.back() != NodeToMatch &&
- !RecordedNodes[RecNo].hasOneUse()) {
+ !RecordedNodes[RecNo].first.hasOneUse()) {
ChainNodesMatched.clear();
break;
}
}
-
+
// If the inner loop broke out, the match fails.
if (ChainNodesMatched.empty())
break;
// Merge the input chains if they are not intra-pattern references.
InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
-
+
if (InputChain.getNode() == 0)
break; // Failed to merge.
continue;
}
-
+
case OPC_EmitCopyToReg: {
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
unsigned DestPhysReg = MatcherTable[MatcherIndex++];
-
+
if (InputChain.getNode() == 0)
InputChain = CurDAG->getEntryNode();
-
+
InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
- DestPhysReg, RecordedNodes[RecNo],
- InputFlag);
-
- InputFlag = InputChain.getValue(1);
+ DestPhysReg, RecordedNodes[RecNo].first,
+ InputGlue);
+
+ InputGlue = InputChain.getValue(1);
continue;
}
-
+
case OPC_EmitNodeXForm: {
unsigned XFormNo = MatcherTable[MatcherIndex++];
unsigned RecNo = MatcherTable[MatcherIndex++];
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+ SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo);
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(Res, (SDNode*) 0));
continue;
}
-
+
case OPC_EmitNode:
case OPC_MorphNodeTo: {
uint16_t TargetOpc = MatcherTable[MatcherIndex++];
@@ -2304,12 +2511,12 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
VTs.push_back(VT);
}
-
+
if (EmitNodeInfo & OPFL_Chain)
VTs.push_back(MVT::Other);
- if (EmitNodeInfo & OPFL_FlagOutput)
- VTs.push_back(MVT::Flag);
-
+ if (EmitNodeInfo & OPFL_GlueOutput)
+ VTs.push_back(MVT::Glue);
+
// This is hot code, so optimize the two most common cases of 1 and 2
// results.
SDVTList VTList;
@@ -2327,11 +2534,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned RecNo = MatcherTable[MatcherIndex++];
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
-
+
assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
- Ops.push_back(RecordedNodes[RecNo]);
+ Ops.push_back(RecordedNodes[RecNo].first);
}
-
+
// If there are variadic operands to add, handle them now.
if (EmitNodeInfo & OPFL_VariadicInfo) {
// Determine the start index to copy from.
@@ -2339,22 +2546,22 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
"Invalid variadic node");
- // Copy all of the variadic operands, not including a potential flag
+ // Copy all of the variadic operands, not including a potential glue
// input.
for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
i != e; ++i) {
SDValue V = NodeToMatch->getOperand(i);
- if (V.getValueType() == MVT::Flag) break;
+ if (V.getValueType() == MVT::Glue) break;
Ops.push_back(V);
}
}
-
- // If this has chain/flag inputs, add them.
+
+ // If this has chain/glue inputs, add them.
if (EmitNodeInfo & OPFL_Chain)
Ops.push_back(InputChain);
- if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
- Ops.push_back(InputFlag);
-
+ if ((EmitNodeInfo & OPFL_GlueInput) && InputGlue.getNode() != 0)
+ Ops.push_back(InputGlue);
+
// Create the node.
SDNode *Res = 0;
if (Opcode != OPC_MorphNodeTo) {
@@ -2362,28 +2569,29 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
// add the results to the RecordedNodes list.
Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
VTList, Ops.data(), Ops.size());
-
- // Add all the non-flag/non-chain results to the RecordedNodes list.
+
+ // Add all the non-glue/non-chain results to the RecordedNodes list.
for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
- if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
- RecordedNodes.push_back(SDValue(Res, i));
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Glue) break;
+ RecordedNodes.push_back(std::pair<SDValue,SDNode*>(SDValue(Res, i),
+ (SDNode*) 0));
}
-
+
} else {
Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
EmitNodeInfo);
}
-
- // If the node had chain/flag results, update our notion of the current
- // chain and flag.
- if (EmitNodeInfo & OPFL_FlagOutput) {
- InputFlag = SDValue(Res, VTs.size()-1);
+
+ // If the node had chain/glue results, update our notion of the current
+ // chain and glue.
+ if (EmitNodeInfo & OPFL_GlueOutput) {
+ InputGlue = SDValue(Res, VTs.size()-1);
if (EmitNodeInfo & OPFL_Chain)
InputChain = SDValue(Res, VTs.size()-2);
} else if (EmitNodeInfo & OPFL_Chain)
InputChain = SDValue(Res, VTs.size()-1);
- // If the OPFL_MemRefs flag is set on this node, slap all of the
+ // If the OPFL_MemRefs glue is set on this node, slap all of the
// accumulated memrefs onto it.
//
// FIXME: This is vastly incorrect for patterns with multiple outputs
@@ -2396,37 +2604,37 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
cast<MachineSDNode>(Res)
->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
}
-
+
DEBUG(errs() << " "
<< (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
<< " node: "; Res->dump(CurDAG); errs() << "\n");
-
+
// If this was a MorphNodeTo then we're completely done!
if (Opcode == OPC_MorphNodeTo) {
- // Update chain and flag uses.
- UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
- InputFlag, FlagResultNodesMatched, true);
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, true);
return Res;
}
-
+
continue;
}
-
- case OPC_MarkFlagResults: {
+
+ case OPC_MarkGlueResults: {
unsigned NumNodes = MatcherTable[MatcherIndex++];
-
- // Read and remember all the flag-result nodes.
+
+ // Read and remember all the glue-result nodes.
for (unsigned i = 0; i != NumNodes; ++i) {
unsigned RecNo = MatcherTable[MatcherIndex++];
if (RecNo & 128)
RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
- FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+ GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode());
}
continue;
}
-
+
case OPC_CompleteMatch: {
// The match has been completed, and any new nodes (if any) have been
// created. Patch up references to the matched dag to use the newly
@@ -2437,13 +2645,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
unsigned ResSlot = MatcherTable[MatcherIndex++];
if (ResSlot & 128)
ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
-
+
assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
- SDValue Res = RecordedNodes[ResSlot];
-
+ SDValue Res = RecordedNodes[ResSlot].first;
+
assert(i < NodeToMatch->getNumValues() &&
NodeToMatch->getValueType(i) != MVT::Other &&
- NodeToMatch->getValueType(i) != MVT::Flag &&
+ NodeToMatch->getValueType(i) != MVT::Glue &&
"Invalid number of results to complete!");
assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
NodeToMatch->getValueType(i) == MVT::iPTR ||
@@ -2454,24 +2662,23 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
}
- // If the root node defines a flag, add it to the flag nodes to update
- // list.
- if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
- FlagResultNodesMatched.push_back(NodeToMatch);
-
- // Update chain and flag uses.
- UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
- InputFlag, FlagResultNodesMatched, false);
-
+ // If the root node defines glue, add it to the glue nodes to update list.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Glue)
+ GlueResultNodesMatched.push_back(NodeToMatch);
+
+ // Update chain and glue uses.
+ UpdateChainsAndGlue(NodeToMatch, InputChain, ChainNodesMatched,
+ InputGlue, GlueResultNodesMatched, false);
+
assert(NodeToMatch->use_empty() &&
"Didn't replace all uses of the node?");
-
+
// FIXME: We just return here, which interacts correctly with SelectRoot
// above. We should fix this to not return an SDNode* anymore.
return 0;
}
}
-
+
// If the code reached this point, then the match failed. See if there is
// another child to try in the current 'Scope', otherwise pop it until we
// find a case to check.
@@ -2494,15 +2701,15 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
MatcherIndex = LastScope.FailIndex;
-
+
DEBUG(errs() << " Continuing at " << MatcherIndex << "\n");
-
+
InputChain = LastScope.InputChain;
- InputFlag = LastScope.InputFlag;
+ InputGlue = LastScope.InputGlue;
if (!LastScope.HasChainNodesMatched)
ChainNodesMatched.clear();
- if (!LastScope.HasFlagResultNodesMatched)
- FlagResultNodesMatched.clear();
+ if (!LastScope.HasGlueResultNodesMatched)
+ GlueResultNodesMatched.clear();
// Check to see what the offset is at the new MatcherIndex. If it is zero
// we have reached the end of this scope, otherwise we have another child
@@ -2517,21 +2724,21 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
LastScope.FailIndex = MatcherIndex+NumToSkip;
break;
}
-
+
// End of this scope, pop it and try the next child in the containing
// scope.
MatchScopes.pop_back();
}
}
}
-
+
void SelectionDAGISel::CannotYetSelect(SDNode *N) {
std::string msg;
raw_string_ostream Msg(msg);
- Msg << "Cannot yet select: ";
-
+ Msg << "Cannot select: ";
+
if (N->getOpcode() != ISD::INTRINSIC_W_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_WO_CHAIN &&
N->getOpcode() != ISD::INTRINSIC_VOID) {
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index 8313de5e32bb..76eb9453561e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -93,7 +93,7 @@ namespace llvm {
static std::string getEdgeAttributes(const void *Node, EdgeIter EI) {
SDValue Op = EI.getNode()->getOperand(EI.getOperand());
EVT VT = Op.getValueType();
- if (VT == MVT::Flag)
+ if (VT == MVT::Glue)
return "color=red,style=bold";
else if (VT == MVT::Other)
return "color=blue,style=dashed";
@@ -273,14 +273,14 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const {
raw_string_ostream O(s);
O << "SU(" << SU->NodeNum << "): ";
if (SU->getNode()) {
- SmallVector<SDNode *, 4> FlaggedNodes;
- for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
- FlaggedNodes.push_back(N);
- while (!FlaggedNodes.empty()) {
+ SmallVector<SDNode *, 4> GluedNodes;
+ for (SDNode *N = SU->getNode(); N; N = N->getGluedNode())
+ GluedNodes.push_back(N);
+ while (!GluedNodes.empty()) {
O << DOTGraphTraits<SelectionDAG*>
- ::getSimpleNodeLabel(FlaggedNodes.back(), DAG);
- FlaggedNodes.pop_back();
- if (!FlaggedNodes.empty())
+ ::getSimpleNodeLabel(GluedNodes.back(), DAG);
+ GluedNodes.pop_back();
+ if (!GluedNodes.empty())
O << "\n ";
}
} else {
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index b74f600cfa2d..691390e2a0e4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -28,6 +28,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <cctype>
using namespace llvm;
namespace llvm {
@@ -530,7 +531,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand);
setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand);
}
-
+
// These operations default to expand.
setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand);
@@ -538,8 +539,8 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// Most targets ignore the @llvm.prefetch intrinsic.
setOperationAction(ISD::PREFETCH, MVT::Other, Expand);
-
- // ConstantFP nodes default to expand. Targets can either change this to
+
+ // ConstantFP nodes default to expand. Targets can either change this to
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
// to optimize expansions for certain constants.
setOperationAction(ISD::ConstantFP, MVT::f32, Expand);
@@ -560,18 +561,21 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
// Default ISD::TRAP to expand (which turns it into abort).
setOperationAction(ISD::TRAP, MVT::Other, Expand);
-
+
IsLittleEndian = TD->isLittleEndian();
ShiftAmountTy = PointerTy = MVT::getIntegerVT(8*TD->getPointerSize());
memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*));
memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray));
maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8;
+ maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize
+ = maxStoresPerMemmoveOptSize = 4;
benefitFromCodePlacementOpt = false;
UseUnderscoreSetJmp = false;
UseUnderscoreLongJmp = false;
SelectIsExpensive = false;
IntDivIsCheap = false;
Pow2DivIsCheap = false;
+ JumpIsExpensive = false;
StackPointerRegisterToSaveRestore = 0;
ExceptionPointerRegister = 0;
ExceptionSelectorRegister = 0;
@@ -617,16 +621,16 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
// Figure out the right, legal destination reg to copy into.
unsigned NumElts = VT.getVectorNumElements();
MVT EltTy = VT.getVectorElementType();
-
+
unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
-
+
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) {
@@ -635,7 +639,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
}
NumIntermediates = NumVectorRegs;
-
+
MVT NewVT = MVT::getVectorVT(EltTy, NumElts);
if (!TLI->isTypeLegal(NewVT))
NewVT = EltTy;
@@ -645,7 +649,7 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT,
RegisterVT = DestVT;
if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-
+
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
@@ -750,7 +754,7 @@ void TargetLowering::computeRegisterProperties() {
RegisterTypeForVT[MVT::ppcf128] = MVT::f64;
TransformToType[MVT::ppcf128] = MVT::f64;
ValueTypeActions.setTypeAction(MVT::ppcf128, Expand);
- }
+ }
// Decide how to handle f64. If the target does not have native f64 support,
// expand it to i64 and we will be generating soft float library calls.
@@ -776,13 +780,13 @@ void TargetLowering::computeRegisterProperties() {
ValueTypeActions.setTypeAction(MVT::f32, Expand);
}
}
-
+
// Loop over all of the vector value types to see which need transformations.
for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE;
i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) {
MVT VT = (MVT::SimpleValueType)i;
if (isTypeLegal(VT)) continue;
-
+
// Determine if there is a legal wider type. If so, we should promote to
// that wider vector type.
EVT EltVT = VT.getVectorElementType();
@@ -792,8 +796,8 @@ void TargetLowering::computeRegisterProperties() {
for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) {
EVT SVT = (MVT::SimpleValueType)nVT;
if (SVT.getVectorElementType() == EltVT &&
- SVT.getVectorNumElements() > NElts &&
- isTypeSynthesizable(SVT)) {
+ SVT.getVectorNumElements() > NElts &&
+ isTypeLegal(SVT)) {
TransformToType[i] = SVT;
RegisterTypeForVT[i] = SVT;
NumRegistersForVT[i] = 1;
@@ -804,7 +808,7 @@ void TargetLowering::computeRegisterProperties() {
}
if (IsLegalWiderType) continue;
}
-
+
MVT IntermediateVT;
EVT RegisterVT;
unsigned NumIntermediates;
@@ -812,7 +816,7 @@ void TargetLowering::computeRegisterProperties() {
getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates,
RegisterVT, this);
RegisterTypeForVT[i] = RegisterVT;
-
+
EVT NVT = VT.getPow2VectorType();
if (NVT == VT) {
// Type is already a power of 2. The default action is to split.
@@ -865,7 +869,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
unsigned &NumIntermediates,
EVT &RegisterVT) const {
unsigned NumElts = VT.getVectorNumElements();
-
+
// If there is a wider vector type with the same element type as this one,
// we should widen to that legal vector type. This handles things like
// <2 x float> -> <4 x float>.
@@ -877,19 +881,19 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
return 1;
}
}
-
+
// Figure out the right, legal destination reg to copy into.
EVT EltTy = VT.getVectorElementType();
-
+
unsigned NumVectorRegs = 1;
-
- // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
+
+ // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we
// could break down into LHS/RHS like LegalizeDAG does.
if (!isPowerOf2_32(NumElts)) {
NumVectorRegs = NumElts;
NumElts = 1;
}
-
+
// Divide the input until we get to a supported size. This will always
// end with a scalar if the target doesn't support vectors.
while (NumElts > 1 && !isTypeLegal(
@@ -899,7 +903,7 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
}
NumIntermediates = NumVectorRegs;
-
+
EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts);
if (!isTypeLegal(NewVT))
NewVT = EltTy;
@@ -909,13 +913,13 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT,
RegisterVT = DestVT;
if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16.
return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits());
-
+
// Otherwise, promotion or legal types use the same number of registers as
// the vector decimated to the appropriate level.
return NumVectorRegs;
}
-/// Get the EVTs and ArgFlags collections that represent the legalized return
+/// Get the EVTs and ArgFlags collections that represent the legalized return
/// type of the given function. This does not require a DAG or a return value,
/// and is suitable for use before any DAGs for the function are constructed.
/// TODO: Move this out of TargetLowering.cpp.
@@ -988,11 +992,11 @@ unsigned TargetLowering::getJumpTableEncoding() const {
// In non-pic modes, just use the address of a block.
if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
return MachineJumpTableInfo::EK_BlockAddress;
-
+
// In PIC mode, if the target supports a GPRel32 directive, use it.
if (getTargetMachine().getMCAsmInfo()->getGPRel32Directive() != 0)
return MachineJumpTableInfo::EK_GPRel32BlockAddress;
-
+
// Otherwise, use a label difference.
return MachineJumpTableInfo::EK_LabelDifference32;
}
@@ -1036,11 +1040,11 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
// Optimization Methods
//===----------------------------------------------------------------------===//
-/// ShrinkDemandedConstant - Check to see if the specified operand of the
+/// ShrinkDemandedConstant - Check to see if the specified operand of the
/// specified instruction is a constant integer. If so, check to see if there
/// are any bits set in the constant that are not demanded. If so, shrink the
/// constant and return true.
-bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
+bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
const APInt &Demanded) {
DebugLoc dl = Op.getDebugLoc();
@@ -1062,7 +1066,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op,
EVT VT = Op.getValueType();
SDValue New = DAG.getNode(Op.getOpcode(), dl, VT, Op.getOperand(0),
DAG.getConstant(Demanded &
- C->getAPIntValue(),
+ C->getAPIntValue(),
VT));
return CombineTo(Op, New);
}
@@ -1139,9 +1143,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero = KnownOne = APInt(BitWidth, 0);
// Other users may use these bits.
- if (!Op.getNode()->hasOneUse()) {
+ if (!Op.getNode()->hasOneUse()) {
if (Depth != 0) {
- // If not at the root, Just compute the KnownZero/KnownOne bits to
+ // If not at the root, Just compute the KnownZero/KnownOne bits to
// simplify things downstream.
TLO.DAG.ComputeMaskedBits(Op, DemandedMask, KnownZero, KnownOne, Depth);
return false;
@@ -1149,7 +1153,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// If this is the root being simplified, allow it to have multiple uses,
// just set the NewMask to all bits.
NewMask = APInt::getAllOnesValue(BitWidth);
- } else if (DemandedMask == 0) {
+ } else if (DemandedMask == 0) {
// Not demanding any bits from Op.
if (Op.getOpcode() != ISD::UNDEF)
return TLO.CombineTo(Op, TLO.DAG.getUNDEF(Op.getValueType()));
@@ -1172,8 +1176,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// the RHS.
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
APInt LHSZero, LHSOne;
+ // Do not increment Depth here; that can cause an infinite loop.
TLO.DAG.ComputeMaskedBits(Op.getOperand(0), NewMask,
- LHSZero, LHSOne, Depth+1);
+ LHSZero, LHSOne, Depth);
// If the LHS already has zeros where RHSC does, this and is dead.
if ((LHSZero & NewMask) == (~RHSC->getAPIntValue() & NewMask))
return TLO.CombineTo(Op, Op.getOperand(0));
@@ -1182,16 +1187,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (TLO.ShrinkDemandedConstant(Op, ~LHSZero & NewMask))
return true;
}
-
+
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownZero & NewMask,
KnownZero2, KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known one on one side, return the other.
// These bits cannot contribute to the result of the 'and'.
if ((NewMask & ~KnownZero2 & KnownOne) == (~KnownZero2 & NewMask))
@@ -1214,15 +1219,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero |= KnownZero2;
break;
case ISD::OR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), ~KnownOne & NewMask,
KnownZero2, KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'or'.
if ((NewMask & ~KnownOne2 & KnownZero) == (~KnownOne2 & NewMask))
@@ -1248,15 +1253,15 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne |= KnownOne2;
break;
case ISD::XOR:
- if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If all of the demanded bits are known zero on one side, return the other.
// These bits cannot contribute to the result of the 'xor'.
if ((KnownZero & NewMask) == NewMask)
@@ -1274,12 +1279,12 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::OR, dl, Op.getValueType(),
Op.getOperand(0),
Op.getOperand(1)));
-
+
// Output known-0 bits are known if clear or set in both the LHS & RHS.
KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2);
// Output known-1 are known to be set if set in only one of the LHS, RHS.
KnownOneOut = (KnownZero & KnownOne2) | (KnownOne & KnownZero2);
-
+
// If all of the demanded bits on one side are known, and all of the set
// bits on that side are also known to be set on the other side, turn this
// into an AND, as we know the bits will be cleared.
@@ -1288,11 +1293,11 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if ((KnownOne & KnownOne2) == KnownOne) {
EVT VT = Op.getValueType();
SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT);
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::AND, dl, VT,
Op.getOperand(0), ANDC));
}
}
-
+
// If the RHS is a constant, see if we can simplify it.
// for XOR, we prefer to force bits to 1 if they will make a -1.
// if we can't force bits, try to shrink constant
@@ -1317,37 +1322,37 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownOne = KnownOneOut;
break;
case ISD::SELECT:
- if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
if (SimplifyDemandedBits(Op.getOperand(1), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If the operands are constants, see if we can simplify them.
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
-
+
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
break;
case ISD::SELECT_CC:
- if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
+ if (SimplifyDemandedBits(Op.getOperand(3), NewMask, KnownZero,
KnownOne, TLO, Depth+1))
return true;
if (SimplifyDemandedBits(Op.getOperand(2), NewMask, KnownZero2,
KnownOne2, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
// If the operands are constants, see if we can simplify them.
if (TLO.ShrinkDemandedConstant(Op, NewMask))
return true;
-
+
// Only known if known in both the LHS and RHS.
KnownOne &= KnownOne2;
KnownZero &= KnownZero2;
@@ -1373,16 +1378,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SRL;
- }
-
- SDValue NewSA =
+ }
+
+ SDValue NewSA =
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
EVT VT = Op.getValueType();
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0), NewSA));
}
- }
-
+ }
+
if (SimplifyDemandedBits(InOp, NewMask.lshr(ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
@@ -1421,7 +1426,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
unsigned ShAmt = SA->getZExtValue();
unsigned VTSize = VT.getSizeInBits();
SDValue InOp = Op.getOperand(0);
-
+
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
break;
@@ -1438,20 +1443,20 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (Diff < 0) {
Diff = -Diff;
Opc = ISD::SHL;
- }
-
+ }
+
SDValue NewSA =
TLO.DAG.getConstant(Diff, Op.getOperand(1).getValueType());
return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, dl, VT,
InOp.getOperand(0), NewSA));
}
- }
-
+ }
+
// Compute the new bits that are at the top now.
if (SimplifyDemandedBits(InOp, (NewMask << ShAmt),
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
@@ -1472,7 +1477,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
EVT VT = Op.getValueType();
unsigned ShAmt = SA->getZExtValue();
-
+
// If the shift count is an invalid immediate, don't do anything.
if (ShAmt >= BitWidth)
break;
@@ -1484,21 +1489,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
if (HighBits.intersects(NewMask))
InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
-
+
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
KnownZero = KnownZero.lshr(ShAmt);
KnownOne = KnownOne.lshr(ShAmt);
-
+
// Handle the sign bit, adjusted to where it is now in the mask.
APInt SignBit = APInt::getSignBit(BitWidth).lshr(ShAmt);
-
+
// If the input sign bit is known to be zero, or if none of the top bits
// are demanded, turn this into an unsigned shift right.
if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) {
- return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
+ return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT,
Op.getOperand(0),
Op.getOperand(1)));
} else if (KnownOne.intersects(SignBit)) { // New bits are known one.
@@ -1509,23 +1514,23 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::SIGN_EXTEND_INREG: {
EVT EVT = cast<VTSDNode>(Op.getOperand(1))->getVT();
- // Sign extension. Compute the demanded bits in the result that are not
+ // Sign extension. Compute the demanded bits in the result that are not
// present in the input.
APInt NewBits =
APInt::getHighBitsSet(BitWidth,
BitWidth - EVT.getScalarType().getSizeInBits());
-
+
// If none of the extended bits are demanded, eliminate the sextinreg.
if ((NewBits & NewMask) == 0)
return TLO.CombineTo(Op, Op.getOperand(0));
- APInt InSignBit = APInt::getSignBit(EVT.getScalarType().getSizeInBits());
- InSignBit.zext(BitWidth);
+ APInt InSignBit =
+ APInt::getSignBit(EVT.getScalarType().getSizeInBits()).zext(BitWidth);
APInt InputDemandedBits =
APInt::getLowBitsSet(BitWidth,
EVT.getScalarType().getSizeInBits()) &
NewMask;
-
+
// Since the sign extended bits are demanded, we know that the sign
// bit is demanded.
InputDemandedBits |= InSignBit;
@@ -1533,16 +1538,16 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), InputDemandedBits,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
// If the sign bit of the input is known set or clear, then we know the
// top bits of the result.
-
+
// If the input sign bit is known zero, convert this into a zero extension.
if (KnownZero.intersects(InSignBit))
- return TLO.CombineTo(Op,
+ return TLO.CombineTo(Op,
TLO.DAG.getZeroExtendInReg(Op.getOperand(0),dl,EVT));
-
+
if (KnownOne.intersects(InSignBit)) { // Input sign bit known set
KnownOne |= NewBits;
KnownZero &= ~NewBits;
@@ -1555,23 +1560,22 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::ZERO_EXTEND: {
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt InMask = NewMask;
- InMask.trunc(OperandBitWidth);
-
+ APInt InMask = NewMask.trunc(OperandBitWidth);
+
// If none of the top bits are demanded, convert this into an any_extend.
APInt NewBits =
APInt::getHighBitsSet(BitWidth, BitWidth - OperandBitWidth) & NewMask;
if (!NewBits.intersects(NewMask))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
- Op.getValueType(),
+ Op.getValueType(),
Op.getOperand(0)));
-
+
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
KnownZero |= NewBits;
break;
}
@@ -1581,31 +1585,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
APInt InMask = APInt::getLowBitsSet(BitWidth, InBits);
APInt InSignBit = APInt::getBitsSet(BitWidth, InBits - 1, InBits);
APInt NewBits = ~InMask & NewMask;
-
+
// If none of the top bits are demanded, convert this into an any_extend.
if (NewBits == 0)
return TLO.CombineTo(Op,TLO.DAG.getNode(ISD::ANY_EXTEND, dl,
Op.getValueType(),
Op.getOperand(0)));
-
+
// Since some of the sign extended bits are demanded, we know that the sign
// bit is demanded.
APInt InDemandedBits = InMask & NewMask;
InDemandedBits |= InSignBit;
- InDemandedBits.trunc(InBits);
-
- if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
+ InDemandedBits = InDemandedBits.trunc(InBits);
+
+ if (SimplifyDemandedBits(Op.getOperand(0), InDemandedBits, KnownZero,
KnownOne, TLO, Depth+1))
return true;
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
-
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
+
// If the sign bit is known zero, convert this to a zero extend.
if (KnownZero.intersects(InSignBit))
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::ZERO_EXTEND, dl,
- Op.getValueType(),
+ Op.getValueType(),
Op.getOperand(0)));
-
+
// If the sign bit is known one, the top bits match.
if (KnownOne.intersects(InSignBit)) {
KnownOne |= NewBits;
@@ -1619,14 +1623,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::ANY_EXTEND: {
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt InMask = NewMask;
- InMask.trunc(OperandBitWidth);
+ APInt InMask = NewMask.trunc(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), InMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
- KnownZero.zext(BitWidth);
- KnownOne.zext(BitWidth);
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ KnownZero = KnownZero.zext(BitWidth);
+ KnownOne = KnownOne.zext(BitWidth);
break;
}
case ISD::TRUNCATE: {
@@ -1634,14 +1637,13 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// zero/one bits live out.
unsigned OperandBitWidth =
Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
- APInt TruncMask = NewMask;
- TruncMask.zext(OperandBitWidth);
+ APInt TruncMask = NewMask.zext(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- KnownZero.trunc(BitWidth);
- KnownOne.trunc(BitWidth);
-
+ KnownZero = KnownZero.trunc(BitWidth);
+ KnownOne = KnownOne.trunc(BitWidth);
+
// If the input is only used by this truncate, see if we can shrink it based
// on the known demanded bits.
if (Op.getOperand(0).getNode()->hasOneUse()) {
@@ -1661,25 +1663,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
break;
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
OperandBitWidth - BitWidth);
- HighBits = HighBits.lshr(ShAmt->getZExtValue());
- HighBits.trunc(BitWidth);
+ HighBits = HighBits.lshr(ShAmt->getZExtValue()).trunc(BitWidth);
if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
- Op.getValueType(),
+ Op.getValueType(),
In.getOperand(0));
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
Op.getValueType(),
- NewTrunc,
+ NewTrunc,
In.getOperand(1)));
}
break;
}
}
-
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
break;
}
case ISD::AssertZext: {
@@ -1689,7 +1690,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
if (SimplifyDemandedBits(Op.getOperand(0), NewMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
- assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+ assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
EVT VT = cast<VTSDNode>(Op.getOperand(1))->getVT();
APInt InMask = APInt::getLowBitsSet(BitWidth,
@@ -1697,7 +1698,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
KnownZero |= ~InMask & NewMask;
break;
}
- case ISD::BIT_CONVERT:
+ case ISD::BITCAST:
#if 0
// If this is an FP->Int bitcast and if the sign bit is the only thing that
// is demanded, turn this into a FGETSIGN.
@@ -1709,7 +1710,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
isOperationLegal(ISD::FGETSIGN, Op.getValueType())) {
// Make a FGETSIGN + SHL to move the sign bit into the appropriate
// place. We expect the SHL to be eliminated by other optimizations.
- SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
+ SDValue Sign = TLO.DAG.getNode(ISD::FGETSIGN, Op.getValueType(),
Op.getOperand(0));
unsigned ShVal = Op.getValueType().getSizeInBits()-1;
SDValue ShAmt = TLO.DAG.getConstant(ShVal, getShiftAmountTy());
@@ -1742,21 +1743,21 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TLO.DAG.ComputeMaskedBits(Op, NewMask, KnownZero, KnownOne, Depth);
break;
}
-
+
// If we know the value of all of the demanded bits, return this as a
// constant.
if ((NewMask & (KnownZero|KnownOne)) == NewMask)
return TLO.CombineTo(Op, TLO.DAG.getConstant(KnownOne, Op.getValueType()));
-
+
return false;
}
-/// computeMaskedBitsForTargetNode - Determine which of the bits specified
-/// in Mask are known to be either zero or one and return them in the
+/// computeMaskedBitsForTargetNode - Determine which of the bits specified
+/// in Mask are known to be either zero or one and return them in the
/// KnownZero/KnownOne bitsets.
-void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
+void TargetLowering::computeMaskedBitsForTargetNode(const SDValue Op,
const APInt &Mask,
- APInt &KnownZero,
+ APInt &KnownZero,
APInt &KnownOne,
const SelectionDAG &DAG,
unsigned Depth) const {
@@ -1817,7 +1818,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
(KnownOne.countPopulation() == 1);
}
-/// SimplifySetCC - Try to simplify a setcc built with the specified operands
+/// SimplifySetCC - Try to simplify a setcc built with the specified operands
/// and cc. If it is unable to simplify it, return a null SDValue.
SDValue
TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
@@ -1869,6 +1870,30 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
+ SDValue CTPOP = N0;
+ // Look through truncs that don't change the value of a ctpop.
+ if (N0.hasOneUse() && N0.getOpcode() == ISD::TRUNCATE)
+ CTPOP = N0.getOperand(0);
+
+ if (CTPOP.hasOneUse() && CTPOP.getOpcode() == ISD::CTPOP &&
+ (N0 == CTPOP || N0.getValueType().getSizeInBits() >
+ Log2_32_Ceil(CTPOP.getValueType().getSizeInBits()))) {
+ EVT CTVT = CTPOP.getValueType();
+ SDValue CTOp = CTPOP.getOperand(0);
+
+ // (ctpop x) u< 2 -> (x & x-1) == 0
+ // (ctpop x) u> 1 -> (x & x-1) != 0
+ if ((Cond == ISD::SETULT && C1 == 2) || (Cond == ISD::SETUGT && C1 == 1)){
+ SDValue Sub = DAG.getNode(ISD::SUB, dl, CTVT, CTOp,
+ DAG.getConstant(1, CTVT));
+ SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Sub);
+ ISD::CondCode CC = Cond == ISD::SETULT ? ISD::SETEQ : ISD::SETNE;
+ return DAG.getSetCC(dl, VT, And, DAG.getConstant(0, CTVT), CC);
+ }
+
+ // TODO: (ctpop x) == 1 -> x && (x & x-1) == 0 iff ctpop is illegal.
+ }
+
// If the LHS is '(and load, const)', the RHS is 0,
// the test is for equality or unsigned, and all 1 bits of the const are
// in the same partial word, see if we can shorten the load.
@@ -1884,7 +1909,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (!Lod->isVolatile() && Lod->isUnindexed()) {
unsigned origWidth = N0.getValueType().getSizeInBits();
unsigned maskWidth = origWidth;
- // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
+ // We can narrow (e.g.) 16-bit extending loads on 32-bit target to
// 8 bits, but have to be careful...
if (Lod->getExtensionType() != ISD::NON_EXTLOAD)
origWidth = Lod->getMemoryVT().getSizeInBits();
@@ -1916,10 +1941,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
DAG.getConstant(bestOffset, PtrType));
unsigned NewAlign = MinAlign(Lod->getAlignment(), bestOffset);
SDValue NewLoad = DAG.getLoad(newVT, dl, Lod->getChain(), Ptr,
- Lod->getSrcValue(),
- Lod->getSrcValueOffset() + bestOffset,
+ Lod->getPointerInfo().getWithOffset(bestOffset),
false, false, NewAlign);
- return DAG.getSetCC(dl, VT,
+ return DAG.getSetCC(dl, VT,
DAG.getNode(ISD::AND, dl, newVT, NewLoad,
DAG.getConstant(bestMask.trunc(bestWidth),
newVT)),
@@ -1969,7 +1993,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
(isOperationLegal(ISD::SETCC, newVT) &&
getCondCodeAction(Cond, newVT)==Legal))
return DAG.getSetCC(dl, VT, N0.getOperand(0),
- DAG.getConstant(APInt(C1).trunc(InSize), newVT),
+ DAG.getConstant(C1.trunc(InSize), newVT),
Cond);
break;
}
@@ -1987,7 +2011,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// the sign extension, it is impossible for both sides to be equal.
if (C1.getMinSignedBits() > ExtSrcTyBits)
return DAG.getConstant(Cond == ISD::SETNE, VT);
-
+
SDValue ZextOp;
EVT Op0Ty = N0.getOperand(0).getValueType();
if (Op0Ty == ExtSrcTy) {
@@ -2000,10 +2024,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(ZextOp.getNode());
// Otherwise, make this a use of a zext.
- return DAG.getSetCC(dl, VT, ZextOp,
+ return DAG.getSetCC(dl, VT, ZextOp,
DAG.getConstant(C1 & APInt::getLowBitsSet(
ExtDstTyBits,
- ExtSrcTyBits),
+ ExtSrcTyBits),
ExtDstTy),
Cond);
} else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
@@ -2013,16 +2037,16 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
if (TrueWhenTrue)
- return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
- CC = ISD::getSetCCInverse(CC,
+ CC = ISD::getSetCCInverse(CC,
N0.getOperand(0).getValueType().isInteger());
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
if ((N0.getOpcode() == ISD::XOR ||
- (N0.getOpcode() == ISD::AND &&
+ (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -2038,7 +2062,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (N0.getOpcode() == ISD::XOR)
Val = N0.getOperand(0);
else {
- assert(N0.getOpcode() == ISD::AND &&
+ assert(N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR);
// ((X^1)&1)^1 -> X & 1
Val = DAG.getNode(ISD::AND, dl, N0.getValueType(),
@@ -2082,7 +2106,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
}
}
}
-
+
APInt MinVal, MaxVal;
unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits();
if (ISD::isSignedIntSetCC(Cond)) {
@@ -2097,7 +2121,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETGE || Cond == ISD::SETUGE) {
if (C1 == MinVal) return DAG.getConstant(1, VT); // X >= MIN --> true
// X >= C0 --> X > (C0-1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C1-1, N1.getValueType()),
(Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT);
}
@@ -2105,7 +2129,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (Cond == ISD::SETLE || Cond == ISD::SETULE) {
if (C1 == MaxVal) return DAG.getConstant(1, VT); // X <= MAX --> true
// X <= C0 --> X < (C0+1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(C1+1, N1.getValueType()),
(Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT);
}
@@ -2128,12 +2152,12 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// If we have setult X, 1, turn it into seteq X, 0
if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal+1)
- return DAG.getSetCC(dl, VT, N0,
- DAG.getConstant(MinVal, N0.getValueType()),
+ return DAG.getSetCC(dl, VT, N0,
+ DAG.getConstant(MinVal, N0.getValueType()),
ISD::SETEQ);
// If we have setugt X, Max-1, turn it into seteq X, Max
else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1)
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(MaxVal, N0.getValueType()),
ISD::SETEQ);
@@ -2141,9 +2165,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
// by changing cc.
// SETUGT X, SINTMAX -> SETLT X, 0
- if (Cond == ISD::SETUGT &&
+ if (Cond == ISD::SETUGT &&
C1 == APInt::getSignedMaxValue(OperandBitSize))
- return DAG.getSetCC(dl, VT, N0,
+ return DAG.getSetCC(dl, VT, N0,
DAG.getConstant(0, N1.getValueType()),
ISD::SETLT);
@@ -2203,7 +2227,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
return DAG.getUNDEF(VT);
}
}
-
+
// Otherwise, we know the RHS is not a NaN. Simplify the node to drop the
// constant if knowing that the operand is non-nan is enough. We prefer to
// have SETO(x,x) instead of SETO(x, 0.0) because this avoids having to
@@ -2278,14 +2302,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
if (DAG.isCommutativeBinOp(N0.getOpcode())) {
// If X op Y == Y op X, try other combinations.
if (N0.getOperand(0) == N1.getOperand(1))
- return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
+ return DAG.getSetCC(dl, VT, N0.getOperand(1), N1.getOperand(0),
Cond);
if (N0.getOperand(1) == N1.getOperand(0))
- return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
+ return DAG.getSetCC(dl, VT, N0.getOperand(0), N1.getOperand(1),
Cond);
}
}
-
+
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(N1)) {
if (ConstantSDNode *LHSR = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
// Turn (X+C1) == C2 --> X == C2-C1
@@ -2295,7 +2319,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
LHSR->getAPIntValue(),
N0.getValueType()), Cond);
}
-
+
// Turn (X^C1) == C2 into X == C1^C2 iff X&~C1 = 0.
if (N0.getOpcode() == ISD::XOR)
// If we know that all of the inverted bits are zero, don't bother
@@ -2308,7 +2332,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getValueType()),
Cond);
}
-
+
// Turn (C1-X) == C2 --> X == C1-C2
if (ConstantSDNode *SUBC = dyn_cast<ConstantSDNode>(N0.getOperand(0))) {
if (N0.getOpcode() == ISD::SUB && N0.getNode()->hasOneUse()) {
@@ -2319,7 +2343,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getValueType()),
Cond);
}
- }
+ }
}
// Simplify (X+Z) == X --> Z == 0
@@ -2334,7 +2358,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!");
// (Z-X) == X --> Z == X<<1
SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(),
- N1,
+ N1,
DAG.getConstant(1, getShiftAmountTy()));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
@@ -2356,7 +2380,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
} else if (N1.getNode()->hasOneUse()) {
assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!");
// X == (Z-X) --> X<<1 == Z
- SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
+ SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0,
DAG.getConstant(1, getShiftAmountTy()));
if (!DCI.isCalledByLegalizer())
DCI.AddToWorklist(SH.getNode());
@@ -2443,7 +2467,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
/// isGAPlusOffset - Returns true (and the GlobalValue and the offset) if the
/// node is a GlobalAddress + offset.
-bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
+bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue *&GA,
int64_t &Offset) const {
if (isa<GlobalAddressSDNode>(N)) {
GlobalAddressSDNode *GASD = cast<GlobalAddressSDNode>(N);
@@ -2469,6 +2493,7 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, const GlobalValue* &GA,
}
}
}
+
return false;
}
@@ -2497,7 +2522,10 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
return C_Memory;
case 'i': // Simple Integer or Relocatable Constant
case 'n': // Simple Integer
+ case 'E': // Floating Point Constant
+ case 'F': // Floating Point Constant
case 's': // Relocatable Constant
+ case 'p': // Address.
case 'X': // Allow ANY value.
case 'I': // Target registers.
case 'J':
@@ -2507,11 +2535,13 @@ TargetLowering::getConstraintType(const std::string &Constraint) const {
case 'N':
case 'O':
case 'P':
+ case '<':
+ case '>':
return C_Other;
}
}
-
- if (Constraint.size() > 1 && Constraint[0] == '{' &&
+
+ if (Constraint.size() > 1 && Constraint[0] == '{' &&
Constraint[Constraint.size()-1] == '}')
return C_Register;
return C_Unknown;
@@ -2550,7 +2580,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
// is possible and fine if either GV or C are missing.
ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
-
+
// If we have "(add GV, C)", pull out GV/C
if (Op.getOpcode() == ISD::ADD) {
C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
@@ -2562,14 +2592,14 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op,
if (C == 0 || GA == 0)
C = 0, GA = 0;
}
-
+
// If we find a valid operand, map to the TargetXXX version so that the
// value itself doesn't get selected.
if (GA) { // Either &GV or &GV+C
if (ConstraintLetter != 'n') {
int64_t Offs = GA->getOffset();
if (C) Offs += C->getZExtValue();
- Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
+ Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(),
C ? C->getDebugLoc() : DebugLoc(),
Op.getValueType(), Offs));
return;
@@ -2613,8 +2643,8 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(),
E = RI->regclass_end(); RCI != E; ++RCI) {
const TargetRegisterClass *RC = *RCI;
-
- // If none of the value types for this register class are valid, we
+
+ // If none of the value types for this register class are valid, we
// can't use it. For example, 64-bit reg classes on 32-bit targets.
bool isLegal = false;
for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end();
@@ -2624,16 +2654,16 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
break;
}
}
-
+
if (!isLegal) continue;
-
- for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
+
+ for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
if (RegName.equals_lower(RI->getName(*I)))
return std::make_pair(*I, RC);
}
}
-
+
return std::make_pair(0u, static_cast<const TargetRegisterClass*>(0));
}
@@ -2655,6 +2685,186 @@ unsigned TargetLowering::AsmOperandInfo::getMatchedOperand() const {
}
+/// ParseConstraints - Split up the constraint string from the inline
+/// assembly value into the specific constraints and their prefixes,
+/// and also tie in the associated operand values.
+/// If this returns an empty vector, and if the constraint string itself
+/// isn't empty, there was an error parsing.
+TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints(
+ ImmutableCallSite CS) const {
+ /// ConstraintOperands - Information about all of the constraints.
+ AsmOperandInfoVector ConstraintOperands;
+ const InlineAsm *IA = cast<InlineAsm>(CS.getCalledValue());
+ unsigned maCount = 0; // Largest number of multiple alternative constraints.
+
+ // Do a prepass over the constraints, canonicalizing them, and building up the
+ // ConstraintOperands list.
+ InlineAsm::ConstraintInfoVector
+ ConstraintInfos = IA->ParseConstraints();
+
+ unsigned ArgNo = 0; // ArgNo - The argument of the CallInst.
+ unsigned ResNo = 0; // ResNo - The result number of the next output.
+
+ for (unsigned i = 0, e = ConstraintInfos.size(); i != e; ++i) {
+ ConstraintOperands.push_back(AsmOperandInfo(ConstraintInfos[i]));
+ AsmOperandInfo &OpInfo = ConstraintOperands.back();
+
+ // Update multiple alternative constraint count.
+ if (OpInfo.multipleAlternatives.size() > maCount)
+ maCount = OpInfo.multipleAlternatives.size();
+
+ OpInfo.ConstraintVT = MVT::Other;
+
+ // Compute the value type for each operand.
+ switch (OpInfo.Type) {
+ case InlineAsm::isOutput:
+ // Indirect outputs just consume an argument.
+ if (OpInfo.isIndirect) {
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ }
+
+ // The return value of the call is this value. As such, there is no
+ // corresponding argument.
+ assert(!CS.getType()->isVoidTy() &&
+ "Bad inline asm!");
+ if (const StructType *STy = dyn_cast<StructType>(CS.getType())) {
+ OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo));
+ } else {
+ assert(ResNo == 0 && "Asm only has one result!");
+ OpInfo.ConstraintVT = getValueType(CS.getType());
+ }
+ ++ResNo;
+ break;
+ case InlineAsm::isInput:
+ OpInfo.CallOperandVal = const_cast<Value *>(CS.getArgument(ArgNo++));
+ break;
+ case InlineAsm::isClobber:
+ // Nothing to do.
+ break;
+ }
+
+ if (OpInfo.CallOperandVal) {
+ const llvm::Type *OpTy = OpInfo.CallOperandVal->getType();
+ if (OpInfo.isIndirect) {
+ const llvm::PointerType *PtrTy = dyn_cast<PointerType>(OpTy);
+ if (!PtrTy)
+ report_fatal_error("Indirect operand for inline asm not a pointer!");
+ OpTy = PtrTy->getElementType();
+ }
+ // If OpTy is not a single value, it may be a struct/union that we
+ // can tile with integers.
+ if (!OpTy->isSingleValueType() && OpTy->isSized()) {
+ unsigned BitSize = TD->getTypeSizeInBits(OpTy);
+ switch (BitSize) {
+ default: break;
+ case 1:
+ case 8:
+ case 16:
+ case 32:
+ case 64:
+ case 128:
+ OpInfo.ConstraintVT =
+ EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true);
+ break;
+ }
+ } else if (dyn_cast<PointerType>(OpTy)) {
+ OpInfo.ConstraintVT = MVT::getIntegerVT(8*TD->getPointerSize());
+ } else {
+ OpInfo.ConstraintVT = EVT::getEVT(OpTy, true);
+ }
+ }
+ }
+
+ // If we have multiple alternative constraints, select the best alternative.
+ if (ConstraintInfos.size()) {
+ if (maCount) {
+ unsigned bestMAIndex = 0;
+ int bestWeight = -1;
+ // weight: -1 = invalid match, and 0 = so-so match to 5 = good match.
+ int weight = -1;
+ unsigned maIndex;
+ // Compute the sums of the weights for each alternative, keeping track
+ // of the best (highest weight) one so far.
+ for (maIndex = 0; maIndex < maCount; ++maIndex) {
+ int weightSum = 0;
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+ if (OpInfo.Type == InlineAsm::isClobber)
+ continue;
+
+ // If this is an output operand with a matching input operand,
+ // look up the matching input. If their types mismatch, e.g. one
+ // is an integer, the other is floating point, or their sizes are
+ // different, flag it as an maCantMatch.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ weightSum = -1; // Can't match.
+ break;
+ }
+ }
+ }
+ weight = getMultipleConstraintMatchWeight(OpInfo, maIndex);
+ if (weight == -1) {
+ weightSum = -1;
+ break;
+ }
+ weightSum += weight;
+ }
+ // Update best.
+ if (weightSum > bestWeight) {
+ bestWeight = weightSum;
+ bestMAIndex = maIndex;
+ }
+ }
+
+ // Now select chosen alternative in each constraint.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& cInfo = ConstraintOperands[cIndex];
+ if (cInfo.Type == InlineAsm::isClobber)
+ continue;
+ cInfo.selectAlternative(bestMAIndex);
+ }
+ }
+ }
+
+ // Check and hook up tied operands, choose constraint code to use.
+ for (unsigned cIndex = 0, eIndex = ConstraintOperands.size();
+ cIndex != eIndex; ++cIndex) {
+ AsmOperandInfo& OpInfo = ConstraintOperands[cIndex];
+
+ // If this is an output operand with a matching input operand, look up the
+ // matching input. If their types mismatch, e.g. one is an integer, the
+ // other is floating point, or their sizes are different, flag it as an
+ // error.
+ if (OpInfo.hasMatchingInput()) {
+ AsmOperandInfo &Input = ConstraintOperands[OpInfo.MatchingInput];
+
+ if (OpInfo.ConstraintVT != Input.ConstraintVT) {
+ if ((OpInfo.ConstraintVT.isInteger() !=
+ Input.ConstraintVT.isInteger()) ||
+ (OpInfo.ConstraintVT.getSizeInBits() !=
+ Input.ConstraintVT.getSizeInBits())) {
+ report_fatal_error("Unsupported asm: input constraint"
+ " with a matching output constraint of"
+ " incompatible type!");
+ }
+ }
+
+ }
+ }
+
+ return ConstraintOperands;
+}
+
+
/// getConstraintGenerality - Return an integer indicating how general CT
/// is.
static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
@@ -2672,6 +2882,79 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) {
}
}
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getMultipleConstraintMatchWeight(
+ AsmOperandInfo &info, int maIndex) const {
+ InlineAsm::ConstraintCodeVector *rCodes;
+ if (maIndex >= (int)info.multipleAlternatives.size())
+ rCodes = &info.Codes;
+ else
+ rCodes = &info.multipleAlternatives[maIndex].Codes;
+ ConstraintWeight BestWeight = CW_Invalid;
+
+ // Loop over the options, keeping track of the most general one.
+ for (unsigned i = 0, e = rCodes->size(); i != e; ++i) {
+ ConstraintWeight weight =
+ getSingleConstraintMatchWeight(info, (*rCodes)[i].c_str());
+ if (weight > BestWeight)
+ BestWeight = weight;
+ }
+
+ return BestWeight;
+}
+
+/// Examine constraint type and operand type and determine a weight value.
+/// This object must already have been set up with the operand type
+/// and the current alternative constraint selected.
+TargetLowering::ConstraintWeight
+ TargetLowering::getSingleConstraintMatchWeight(
+ AsmOperandInfo &info, const char *constraint) const {
+ ConstraintWeight weight = CW_Invalid;
+ Value *CallOperandVal = info.CallOperandVal;
+ // If we don't have a value, we can't do a match,
+ // but allow it at the lowest weight.
+ if (CallOperandVal == NULL)
+ return CW_Default;
+ // Look at the constraint type.
+ switch (*constraint) {
+ case 'i': // immediate integer.
+ case 'n': // immediate integer with a known value.
+ if (isa<ConstantInt>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 's': // non-explicit intregal immediate.
+ if (isa<GlobalValue>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case 'E': // immediate float if host format.
+ case 'F': // immediate float.
+ if (isa<ConstantFP>(CallOperandVal))
+ weight = CW_Constant;
+ break;
+ case '<': // memory operand with autodecrement.
+ case '>': // memory operand with autoincrement.
+ case 'm': // memory operand.
+ case 'o': // offsettable memory operand
+ case 'V': // non-offsettable memory operand
+ weight = CW_Memory;
+ break;
+ case 'r': // general register.
+ case 'g': // general register, memory operand or immediate integer.
+ // note: Clang converts "g" to "imr".
+ if (CallOperandVal->getType()->isIntegerTy())
+ weight = CW_Register;
+ break;
+ case 'X': // any operand.
+ default:
+ weight = CW_Default;
+ break;
+ }
+ return weight;
+}
+
/// ChooseConstraint - If there are multiple different constraints that we
/// could pick for this operand (e.g. "imr") try to pick the 'best' one.
/// This is somewhat tricky: constraints fall into four classes:
@@ -2721,12 +3004,12 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
break;
}
}
-
+
// Things with matching constraints can only be registers, per gcc
// documentation. This mainly affects "g" constraints.
if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput())
continue;
-
+
// This constraint letter is more general than the previous one, use it.
int Generality = getConstraintGenerality(CType);
if (Generality > BestGenerality) {
@@ -2735,7 +3018,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
BestGenerality = Generality;
}
}
-
+
OpInfo.ConstraintCode = OpInfo.Codes[BestIdx];
OpInfo.ConstraintType = BestType;
}
@@ -2744,10 +3027,10 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo,
/// type to use for the specific AsmOperandInfo, setting
/// OpInfo.ConstraintCode and OpInfo.ConstraintType.
void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
- SDValue Op,
+ SDValue Op,
SelectionDAG *DAG) const {
assert(!OpInfo.Codes.empty() && "Must have at least one constraint");
-
+
// Single-letter constraints ('r') are very common.
if (OpInfo.Codes.size() == 1) {
OpInfo.ConstraintCode = OpInfo.Codes[0];
@@ -2755,7 +3038,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
} else {
ChooseConstraint(OpInfo, *this, Op, DAG);
}
-
+
// 'X' matches anything.
if (OpInfo.ConstraintCode == "X" && OpInfo.CallOperandVal) {
// Labels and constants are handled elsewhere ('X' is the only thing
@@ -2766,7 +3049,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
OpInfo.CallOperandVal = v;
return;
}
-
+
// Otherwise, try to resolve it to something we know about by looking at
// the actual operand type.
if (const char *Repl = LowerXConstraint(OpInfo.ConstraintVT)) {
@@ -2782,7 +3065,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo,
/// isLegalAddressingMode - Return true if the addressing mode represented
/// by AM is legal for this target, for a load/store of the specified type.
-bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
+bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
// The default implementation of this implements a conservative RISCy, r+r and
// r+i addr mode.
@@ -2790,12 +3073,12 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
// Allows a sign-extended 16-bit immediate field.
if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
return false;
-
+
// No global is ever allowed as a base.
if (AM.BaseGV)
return false;
-
- // Only support r+r,
+
+ // Only support r+r,
switch (AM.Scale) {
case 0: // "r+i" or just "i", depending on HasBaseReg.
break;
@@ -2810,7 +3093,7 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
// Allow 2*r as r+r.
break;
}
-
+
return true;
}
@@ -2818,19 +3101,19 @@ bool TargetLowering::isLegalAddressingMode(const AddrMode &AM,
/// return a DAG expression to select that will generate the same value by
/// multiplying by a magic number. See:
/// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html>
-SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
+SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
std::vector<SDNode*>* Created) const {
EVT VT = N->getValueType(0);
DebugLoc dl= N->getDebugLoc();
-
+
// Check to see if we can do this.
// FIXME: We should be more aggressive here.
if (!isTypeLegal(VT))
return SDValue();
-
+
APInt d = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue();
APInt::ms magics = d.magic();
-
+
// Multiply the numerator (operand 0) by the magic value
// FIXME: We should support doing a MUL in a wider type
SDValue Q;
@@ -2844,7 +3127,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
else
return SDValue(); // No mulhs or equvialent
// If d > 0 and m < 0, add the numerator
- if (d.isStrictlyPositive() && magics.m.isNegative()) {
+ if (d.isStrictlyPositive() && magics.m.isNegative()) {
Q = DAG.getNode(ISD::ADD, dl, VT, Q, N->getOperand(0));
if (Created)
Created->push_back(Q.getNode());
@@ -2857,7 +3140,7 @@ SDValue TargetLowering::BuildSDIV(SDNode *N, SelectionDAG &DAG,
}
// Shift right algebraic if shift value is nonzero
if (magics.s > 0) {
- Q = DAG.getNode(ISD::SRA, dl, VT, Q,
+ Q = DAG.getNode(ISD::SRA, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy()));
if (Created)
Created->push_back(Q.getNode());
@@ -2908,20 +3191,20 @@ SDValue TargetLowering::BuildUDIV(SDNode *N, SelectionDAG &DAG,
if (magics.a == 0) {
assert(magics.s < N1C->getAPIntValue().getBitWidth() &&
"We shouldn't generate an undefined shift!");
- return DAG.getNode(ISD::SRL, dl, VT, Q,
+ return DAG.getNode(ISD::SRL, dl, VT, Q,
DAG.getConstant(magics.s, getShiftAmountTy()));
} else {
SDValue NPQ = DAG.getNode(ISD::SUB, dl, VT, N->getOperand(0), Q);
if (Created)
Created->push_back(NPQ.getNode());
- NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ NPQ = DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(1, getShiftAmountTy()));
if (Created)
Created->push_back(NPQ.getNode());
NPQ = DAG.getNode(ISD::ADD, dl, VT, NPQ, Q);
if (Created)
Created->push_back(NPQ.getNode());
- return DAG.getNode(ISD::SRL, dl, VT, NPQ,
+ return DAG.getNode(ISD::SRL, dl, VT, NPQ,
DAG.getConstant(magics.s-1, getShiftAmountTy()));
}
}
diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp
index aeaa38b56433..7b5bca495206 100644
--- a/lib/CodeGen/ShrinkWrapping.cpp
+++ b/lib/CodeGen/ShrinkWrapping.cpp
@@ -226,7 +226,7 @@ bool PEI::calcAnticInOut(MachineBasicBlock* MBB) {
// AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]);
CSRegSet prevAnticIn = AnticIn[MBB];
AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB];
- if (prevAnticIn |= AnticIn[MBB])
+ if (prevAnticIn != AnticIn[MBB])
changed = true;
return changed;
}
@@ -264,7 +264,7 @@ bool PEI::calcAvailInOut(MachineBasicBlock* MBB) {
// AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]);
CSRegSet prevAvailOut = AvailOut[MBB];
AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB];
- if (prevAvailOut |= AvailOut[MBB])
+ if (prevAvailOut != AvailOut[MBB])
changed = true;
return changed;
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index b29ea19835bc..2843c1a5b6d8 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -15,6 +15,7 @@
#define DEBUG_TYPE "regcoalescing"
#include "SimpleRegisterCoalescing.h"
#include "VirtRegMap.h"
+#include "LiveDebugVariables.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/Value.h"
#include "llvm/Analysis/AliasAnalysis.h"
@@ -64,9 +65,25 @@ DisablePhysicalJoin("disable-physical-join",
cl::desc("Avoid coalescing physical register copies"),
cl::init(false), cl::Hidden);
-INITIALIZE_AG_PASS(SimpleRegisterCoalescing, RegisterCoalescer,
+static cl::opt<bool>
+VerifyCoalescing("verify-coalescing",
+ cl::desc("Verify machine instrs before and after register coalescing"),
+ cl::Hidden);
+
+INITIALIZE_AG_PASS_BEGIN(SimpleRegisterCoalescing, RegisterCoalescer,
"simple-register-coalescing", "Simple Register Coalescing",
- false, false, true);
+ false, false, true)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination)
+INITIALIZE_PASS_DEPENDENCY(PHIElimination)
+INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_AG_PASS_END(SimpleRegisterCoalescing, RegisterCoalescer,
+ "simple-register-coalescing", "Simple Register Coalescing",
+ false, false, true)
char &llvm::SimpleRegisterCoalescingID = SimpleRegisterCoalescing::ID;
@@ -75,14 +92,14 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<AliasAnalysis>();
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
+ AU.addRequired<LiveDebugVariables>();
+ AU.addPreserved<LiveDebugVariables>();
AU.addPreserved<SlotIndexes>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
AU.addPreservedID(MachineDominatorsID);
- if (StrongPHIElim)
- AU.addPreservedID(StrongPHIEliminationID);
- else
- AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(StrongPHIEliminationID);
+ AU.addPreservedID(PHIEliminationID);
AU.addPreservedID(TwoAddressInstructionPassID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -124,7 +141,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
// Get the location that B is defined at. Two options: either this value has
// an unknown definition point or it is defined at CopyIdx. If unknown, we
// can't process it.
- if (!BValNo->getCopy()) return false;
+ if (!BValNo->isDefByCopy()) return false;
assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
// AValNo is the value number in A that defines the copy, A3 in the example.
@@ -218,7 +235,7 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP,
continue;
LiveInterval &SRLI = li_->getInterval(*SR);
SRLI.addRange(LiveRange(FillerStart, FillerEnd,
- SRLI.getNextValue(FillerStart, 0, true,
+ SRLI.getNextValue(FillerStart, 0,
li_->getVNInfoAllocator())));
}
}
@@ -266,9 +283,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) {
if (BI->valno == BValNo)
continue;
- // When BValNo is null, we're looking for a dummy clobber-value for a subreg.
- if (!BValNo && !BI->valno->isDefAccurate() && !BI->valno->getCopy())
- continue;
if (BI->start <= AI->start && BI->end > AI->start)
return true;
if (BI->start > AI->start && BI->start < AI->end)
@@ -278,16 +292,6 @@ bool SimpleRegisterCoalescing::HasOtherReachingDefs(LiveInterval &IntA,
return false;
}
-static void
-TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
- for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands();
- i != e; ++i) {
- MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isImplicit())
- NewMI->addOperand(MO);
- }
-}
-
/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
/// IntA being the source and IntB being the dest, thus this defines a value
/// number in IntB. If the source value number (in IntA) is defined by a
@@ -324,8 +328,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
if (!li_->hasInterval(CP.getDstReg()))
return false;
- SlotIndex CopyIdx =
- li_->getInstructionIndex(CopyMI).getDefIndex();
+ SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex();
LiveInterval &IntA =
li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
@@ -334,27 +337,19 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// BValNo is a value number in B that is defined by a copy from A. 'B3' in
// the example above.
- LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx);
- if (BLR == IntB.end()) return false;
- VNInfo *BValNo = BLR->valno;
+ VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
+ if (!BValNo || !BValNo->isDefByCopy())
+ return false;
- // Get the location that B is defined at. Two options: either this value has
- // an unknown definition point or it is defined at CopyIdx. If unknown, we
- // can't process it.
- if (!BValNo->getCopy()) return false;
assert(BValNo->def == CopyIdx && "Copy doesn't define the value?");
// AValNo is the value number in A that defines the copy, A3 in the example.
- LiveInterval::iterator ALR =
- IntA.FindLiveRangeContaining(CopyIdx.getUseIndex()); //
+ VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getUseIndex());
+ assert(AValNo && "COPY source not live");
- assert(ALR != IntA.end() && "Live range not found!");
- VNInfo *AValNo = ALR->valno;
// If other defs can reach uses of this def, then it's not safe to perform
- // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
- // tested?
- if (AValNo->isPHIDef() || !AValNo->isDefAccurate() ||
- AValNo->isUnused() || AValNo->hasPHIKill())
+ // the optimization.
+ if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def);
if (!DefMI)
@@ -411,7 +406,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
return false;
}
- DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << *DefMI);
+ DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t'
+ << *DefMI);
// At this point we have decided that it is legal to do this
// transformation. Start by commuting the instruction.
@@ -427,10 +423,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false);
NewMI->getOperand(OpIdx).setIsKill();
- bool BHasPHIKill = BValNo->hasPHIKill();
- SmallVector<VNInfo*, 4> BDeadValNos;
- std::map<SlotIndex, SlotIndex> BExtend;
-
// If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
// A = or A, B
// ...
@@ -439,9 +431,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
// C = A<kill>
// ...
// = B
- bool Extended = BLR->end > ALR->end && ALR->end != ALR->start;
- if (Extended)
- BExtend[ALR->end] = BLR->end;
// Update uses of IntA of the specific Val# with IntB.
for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg),
@@ -467,52 +456,24 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
UseMO.setReg(NewReg);
if (UseMI == CopyMI)
continue;
- if (UseMO.isKill()) {
- if (Extended)
- UseMO.setIsKill(false);
- }
if (!UseMI->isCopy())
continue;
if (UseMI->getOperand(0).getReg() != IntB.reg ||
UseMI->getOperand(0).getSubReg())
continue;
- // This copy will become a noop. If it's defining a new val#,
- // remove that val# as well. However this live range is being
- // extended to the end of the existing live range defined by the copy.
+ // This copy will become a noop. If it's defining a new val#, merge it into
+ // BValNo.
SlotIndex DefIdx = UseIdx.getDefIndex();
- const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx);
- if (!DLR)
+ VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
+ if (!DVNI)
continue;
- BHasPHIKill |= DLR->valno->hasPHIKill();
- assert(DLR->valno->def == DefIdx);
- BDeadValNos.push_back(DLR->valno);
- BExtend[DLR->start] = DLR->end;
+ DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
+ assert(DVNI->def == DefIdx);
+ BValNo = IntB.MergeValueNumberInto(BValNo, DVNI);
JoinedCopies.insert(UseMI);
}
- // We need to insert a new liverange: [ALR.start, LastUse). It may be we can
- // simply extend BLR if CopyMI doesn't end the range.
- DEBUG({
- dbgs() << "Extending: ";
- IntB.print(dbgs(), tri_);
- });
-
- // Remove val#'s defined by copies that will be coalesced away.
- for (unsigned i = 0, e = BDeadValNos.size(); i != e; ++i) {
- VNInfo *DeadVNI = BDeadValNos[i];
- if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) {
- for (const unsigned *AS = tri_->getAliasSet(IntB.reg); *AS; ++AS) {
- if (!li_->hasInterval(*AS))
- continue;
- LiveInterval &ASLI = li_->getInterval(*AS);
- if (const LiveRange *ASLR = ASLI.getLiveRangeContaining(DeadVNI->def))
- ASLI.removeValNo(ASLR->valno);
- }
- }
- IntB.removeValNo(BDeadValNos[i]);
- }
-
// Extend BValNo by merging in IntA live ranges of AValNo. Val# definition
// is updated.
VNInfo *ValNo = BValNo;
@@ -521,30 +482,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP,
for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end();
AI != AE; ++AI) {
if (AI->valno != AValNo) continue;
- SlotIndex End = AI->end;
- std::map<SlotIndex, SlotIndex>::iterator
- EI = BExtend.find(End);
- if (EI != BExtend.end())
- End = EI->second;
- IntB.addRange(LiveRange(AI->start, End, ValNo));
+ IntB.addRange(LiveRange(AI->start, AI->end, ValNo));
}
- ValNo->setHasPHIKill(BHasPHIKill);
-
- DEBUG({
- dbgs() << " result = ";
- IntB.print(dbgs(), tri_);
- dbgs() << "\nShortening: ";
- IntA.print(dbgs(), tri_);
- });
+ DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
IntA.removeValNo(AValNo);
-
- DEBUG({
- dbgs() << " result = ";
- IntA.print(dbgs(), tri_);
- dbgs() << '\n';
- });
-
+ DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n');
++numCommutes;
return true;
}
@@ -644,6 +587,7 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx,
/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
+ bool preserveSrcInt,
unsigned DstReg,
unsigned DstSubIdx,
MachineInstr *CopyMI) {
@@ -652,12 +596,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
assert(SrcLR != SrcInt.end() && "Live range not found!");
VNInfo *ValNo = SrcLR->valno;
// If other defs can reach uses of this def, then it's not safe to perform
- // the optimization. FIXME: Do isPHIDef and isDefAccurate both need to be
- // tested?
- if (ValNo->isPHIDef() || !ValNo->isDefAccurate() ||
- ValNo->isUnused() || ValNo->hasPHIKill())
+ // the optimization.
+ if (ValNo->isPHIDef() || ValNo->isUnused() || ValNo->hasPHIKill())
return false;
MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def);
+ if (!DefMI)
+ return false;
assert(DefMI && "Defining instruction disappeared");
const TargetInstrDesc &TID = DefMI->getDesc();
if (!TID.isAsCheapAsAMove())
@@ -681,8 +625,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
return false;
}
- // If destination register has a sub-register index on it, make sure it mtches
- // the instruction register class.
+ // If destination register has a sub-register index on it, make sure it
+ // matches the instruction register class.
if (DstSubIdx) {
const TargetInstrDesc &TID = DefMI->getDesc();
if (TID.getNumDefs() != 1)
@@ -699,30 +643,12 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
RemoveCopyFlag(DstReg, CopyMI);
- // If copy kills the source register, find the last use and propagate
- // kill.
- bool checkForDeadDef = false;
MachineBasicBlock *MBB = CopyMI->getParent();
- if (SrcLR->end == CopyIdx.getDefIndex())
- if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) {
- checkForDeadDef = true;
- }
-
MachineBasicBlock::iterator MII =
llvm::next(MachineBasicBlock::iterator(CopyMI));
tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_);
MachineInstr *NewMI = prior(MII);
- if (checkForDeadDef) {
- // PR4090 fix: Trim interval failed because there was no use of the
- // source interval in this MBB. If the def is in this MBB too then we
- // should mark it dead:
- if (DefMI->getParent() == MBB) {
- DefMI->addRegisterDead(SrcInt.reg, tri_);
- SrcLR->end = SrcLR->start.getNextSlot();
- }
- }
-
// CopyMI may have implicit operands, transfer them over to the newly
// rematerialized instruction. And update implicit def interval valnos.
for (unsigned i = CopyMI->getDesc().getNumOperands(),
@@ -734,13 +660,18 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
RemoveCopyFlag(MO.getReg(), CopyMI);
}
- TransferImplicitOps(CopyMI, NewMI);
+ NewMI->copyImplicitOps(CopyMI);
li_->ReplaceMachineInstrInMaps(CopyMI, NewMI);
CopyMI->eraseFromParent();
ReMatCopies.insert(CopyMI);
ReMatDefs.insert(DefMI);
DEBUG(dbgs() << "Remat: " << *NewMI);
++NumReMats;
+
+ // The source interval can become smaller because we removed a use.
+ if (preserveSrcInt)
+ li_->shrinkToUses(&SrcInt);
+
return true;
}
@@ -756,6 +687,9 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
unsigned DstReg = CP.getDstReg();
unsigned SubIdx = CP.getSubIdx();
+ // Update LiveDebugVariables.
+ ldv_->renameRegister(SrcReg, DstReg, SubIdx);
+
for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg);
MachineInstr *UseMI = I.skipInstruction();) {
// A PhysReg copy that won't be coalesced can perhaps be rematerialized
@@ -768,7 +702,7 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) {
UseMI->getOperand(0).getReg() != SrcReg &&
UseMI->getOperand(0).getReg() != DstReg &&
!JoinedCopies.count(UseMI) &&
- ReMaterializeTrivialDef(li_->getInterval(SrcReg),
+ ReMaterializeTrivialDef(li_->getInterval(SrcReg), false,
UseMI->getOperand(0).getReg(), 0, UseMI))
continue;
}
@@ -874,7 +808,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
if (li_->hasInterval(DstReg)) {
LiveInterval &LI = li_->getInterval(DstReg);
if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
- if (LR->valno->getCopy() == CopyMI)
+ if (LR->valno->def == DefIdx)
LR->valno->setCopy(0);
}
if (!TargetRegisterInfo::isPhysicalRegister(DstReg))
@@ -884,7 +818,7 @@ void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg,
continue;
LiveInterval &LI = li_->getInterval(*AS);
if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx))
- if (LR->valno->getCopy() == CopyMI)
+ if (LR->valno->def == DefIdx)
LR->valno->setCopy(0);
}
}
@@ -1044,23 +978,19 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
return false;
}
- DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg());
+ DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), tri_));
// Enforce policies.
if (CP.isPhys()) {
- DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n");
+ DEBUG(dbgs() <<" with physreg " << PrintReg(CP.getDstReg(), tri_) << "\n");
// Only coalesce to allocatable physreg.
if (!li_->isAllocatable(CP.getDstReg())) {
DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n");
return false; // Not coalescable.
}
} else {
- DEBUG({
- dbgs() << " with reg%" << CP.getDstReg();
- if (CP.getSubIdx())
- dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx());
- dbgs() << " to " << CP.getNewRC()->getName() << "\n";
- });
+ DEBUG(dbgs() << " with " << PrintReg(CP.getDstReg(), tri_, CP.getSubIdx())
+ << " to " << CP.getNewRC()->getName() << "\n");
// Avoid constraining virtual register regclass too much.
if (CP.isCrossClass()) {
@@ -1114,7 +1044,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// Before giving up coalescing, if definition of source is defined by
// trivial computation, try rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI))
+ ReMaterializeTrivialDef(JoinVInt, true, CP.getDstReg(), 0, CopyMI))
return true;
++numAborts;
@@ -1134,7 +1064,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
// If definition of source is defined by trivial computation, try
// rematerializing it.
if (!CP.isFlipped() &&
- ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()),
+ ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), true,
CP.getDstReg(), 0, CopyMI))
return true;
@@ -1317,7 +1247,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
continue;
// Never join with a register that has EarlyClobber redefs.
@@ -1341,7 +1271,7 @@ bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) {
for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end();
i != e; ++i) {
VNInfo *VNI = *i;
- if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy?
+ if (VNI->isUnused() || !VNI->isDefByCopy()) // Src not defined by a copy?
continue;
// Never join with a register that has EarlyClobber redefs.
@@ -1495,9 +1425,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
std::vector<CopyRec> &TryAgain) {
DEBUG(dbgs() << MBB->getName() << ":\n");
- std::vector<CopyRec> VirtCopies;
- std::vector<CopyRec> PhysCopies;
- std::vector<CopyRec> ImpDefCopies;
+ SmallVector<CopyRec, 8> VirtCopies;
+ SmallVector<CopyRec, 8> PhysCopies;
+ SmallVector<CopyRec, 8> ImpDefCopies;
for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
MII != E;) {
MachineInstr *Inst = MII++;
@@ -1690,6 +1620,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
tri_ = tm_->getRegisterInfo();
tii_ = tm_->getInstrInfo();
li_ = &getAnalysis<LiveIntervals>();
+ ldv_ = &getAnalysis<LiveDebugVariables>();
AA = &getAnalysis<AliasAnalysis>();
loopInfo = &getAnalysis<MachineLoopInfo>();
@@ -1697,6 +1628,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
<< "********** Function: "
<< ((Value*)mf_->getFunction())->getName() << '\n');
+ if (VerifyCoalescing)
+ mf_->verify(this, "Before register coalescing");
+
for (TargetRegisterInfo::regclass_iterator I = tri_->regclass_begin(),
E = tri_->regclass_end(); I != E; ++I)
allocatableRCRegs_.insert(std::make_pair(*I,
@@ -1739,9 +1673,11 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
DoDelete = false;
if (MI->allDefsAreDead()) {
- LiveInterval &li = li_->getInterval(SrcReg);
- if (!ShortenDeadCopySrcLiveRange(li, MI))
- ShortenDeadCopyLiveRange(li, MI);
+ if (li_->hasInterval(SrcReg)) {
+ LiveInterval &li = li_->getInterval(SrcReg);
+ if (!ShortenDeadCopySrcLiveRange(li, MI))
+ ShortenDeadCopyLiveRange(li, MI);
+ }
DoDelete = true;
}
if (!DoDelete) {
@@ -1821,13 +1757,26 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
if (!MO.isReg() || !MO.isKill()) continue;
unsigned reg = MO.getReg();
if (!reg || !li_->hasInterval(reg)) continue;
- if (!li_->getInterval(reg).killedAt(DefIdx))
+ if (!li_->getInterval(reg).killedAt(DefIdx)) {
MO.setIsKill(false);
+ continue;
+ }
+ // When leaving a kill flag on a physreg, check if any subregs should
+ // remain alive.
+ if (!TargetRegisterInfo::isPhysicalRegister(reg))
+ continue;
+ for (const unsigned *SR = tri_->getSubRegisters(reg);
+ unsigned S = *SR; ++SR)
+ if (li_->hasInterval(S) && li_->getInterval(S).liveAt(DefIdx))
+ MI->addRegisterDefined(S, tri_);
}
}
}
DEBUG(dump());
+ DEBUG(ldv_->dump());
+ if (VerifyCoalescing)
+ mf_->verify(this, "After register coalescing");
return true;
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 855bdb98b36c..56703dfa2ddd 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -21,7 +21,7 @@
namespace llvm {
class SimpleRegisterCoalescing;
- class LiveVariables;
+ class LiveDebugVariables;
class TargetRegisterInfo;
class TargetInstrInfo;
class VirtRegMap;
@@ -44,6 +44,7 @@ namespace llvm {
const TargetRegisterInfo* tri_;
const TargetInstrInfo* tii_;
LiveIntervals *li_;
+ LiveDebugVariables *ldv_;
const MachineLoopInfo* loopInfo;
AliasAnalysis *AA;
@@ -63,7 +64,9 @@ namespace llvm {
public:
static char ID; // Pass identifcation, replacement for typeid
- SimpleRegisterCoalescing() : MachineFunctionPass(ID) {}
+ SimpleRegisterCoalescing() : MachineFunctionPass(ID) {
+ initializeSimpleRegisterCoalescingPass(*PassRegistry::getPassRegistry());
+ }
struct InstrSlots {
enum {
@@ -140,8 +143,10 @@ namespace llvm {
/// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial
/// computation, replace the copy by rematerialize the definition.
- bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg,
- unsigned DstSubIdx, MachineInstr *CopyMI);
+ /// If PreserveSrcInt is true, make sure SrcInt is valid after the call.
+ bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt,
+ unsigned DstReg, unsigned DstSubIdx,
+ MachineInstr *CopyMI);
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp
index b637980f885c..13e1454fa5f3 100644
--- a/lib/CodeGen/SjLjEHPrepare.cpp
+++ b/lib/CodeGen/SjLjEHPrepare.cpp
@@ -21,15 +21,14 @@
#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
-#include "llvm/CodeGen/Passes.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/SmallVector.h"
-#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/CodeGen/Passes.h"
#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetLowering.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include <set>
using namespace llvm;
STATISTIC(NumInvokes, "Number of invokes replaced");
@@ -53,6 +52,7 @@ namespace {
Constant *SelectorFn;
Constant *ExceptionFn;
Constant *CallSiteFn;
+ Constant *DispatchSetupFn;
Value *CallSite;
public:
@@ -116,6 +116,8 @@ bool SjLjEHPass::doInitialization(Module &M) {
SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector);
ExceptionFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_exception);
CallSiteFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_callsite);
+ DispatchSetupFn
+ = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_dispatch_setup);
PersonalityFn = 0;
return true;
@@ -317,8 +319,12 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
Unwinds.push_back(UI);
}
}
- // If we don't have any invokes or unwinds, there's nothing to do.
- if (Unwinds.empty() && Invokes.empty()) return false;
+
+ NumInvokes += Invokes.size();
+ NumUnwinds += Unwinds.size();
+
+ // If we don't have any invokes, there's nothing to do.
+ if (Invokes.empty()) return false;
// Find the eh.selector.*, eh.exception and alloca calls.
//
@@ -332,6 +338,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
SmallVector<CallInst*,16> EH_Selectors;
SmallVector<CallInst*,16> EH_Exceptions;
SmallVector<Instruction*,16> JmpbufUpdatePoints;
+
// Note: Skip the entry block since there's nothing there that interests
// us. eh.selector and eh.exception shouldn't ever be there, and we
// want to disregard any allocas that are there.
@@ -351,228 +358,231 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) {
}
}
}
+
// If we don't have any eh.selector calls, we can't determine the personality
// function. Without a personality function, we can't process exceptions.
if (!PersonalityFn) return false;
- NumInvokes += Invokes.size();
- NumUnwinds += Unwinds.size();
+ // We have invokes, so we need to add register/unregister calls to get this
+ // function onto the global unwind stack.
+ //
+ // First thing we need to do is scan the whole function for values that are
+ // live across unwind edges. Each value that is live across an unwind edge we
+ // spill into a stack location, guaranteeing that there is nothing live across
+ // the unwind edge. This process also splits all critical edges coming out of
+ // invoke's.
+ splitLiveRangesAcrossInvokes(Invokes);
+
+ BasicBlock *EntryBB = F.begin();
+ // Create an alloca for the incoming jump buffer ptr and the new jump buffer
+ // that needs to be restored on all exits from the function. This is an
+ // alloca because the value needs to be added to the global context list.
+ unsigned Align = 4; // FIXME: Should be a TLI check?
+ AllocaInst *FunctionContext =
+ new AllocaInst(FunctionContextTy, 0, Align,
+ "fcn_context", F.begin()->begin());
+
+ Value *Idxs[2];
+ const Type *Int32Ty = Type::getInt32Ty(F.getContext());
+ Value *Zero = ConstantInt::get(Int32Ty, 0);
+ // We need to also keep around a reference to the call_site field
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "call_site",
+ EntryBB->getTerminator());
+
+ // The exception selector comes back in context->data[1]
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "fc_data",
+ EntryBB->getTerminator());
+ Idxs[1] = ConstantInt::get(Int32Ty, 1);
+ Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ "exc_selector_gep",
+ EntryBB->getTerminator());
+ // The exception value comes back in context->data[0]
+ Idxs[1] = Zero;
+ Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
+ "exception_gep",
+ EntryBB->getTerminator());
+
+ // The result of the eh.selector call will be replaced with a a reference to
+ // the selector value returned in the function context. We leave the selector
+ // itself so the EH analysis later can use it.
+ for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
+ CallInst *I = EH_Selectors[i];
+ Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
+ I->replaceAllUsesWith(SelectorVal);
+ }
- if (!Invokes.empty()) {
- // We have invokes, so we need to add register/unregister calls to get
- // this function onto the global unwind stack.
- //
- // First thing we need to do is scan the whole function for values that are
- // live across unwind edges. Each value that is live across an unwind edge
- // we spill into a stack location, guaranteeing that there is nothing live
- // across the unwind edge. This process also splits all critical edges
- // coming out of invoke's.
- splitLiveRangesAcrossInvokes(Invokes);
-
- BasicBlock *EntryBB = F.begin();
- // Create an alloca for the incoming jump buffer ptr and the new jump buffer
- // that needs to be restored on all exits from the function. This is an
- // alloca because the value needs to be added to the global context list.
- unsigned Align = 4; // FIXME: Should be a TLI check?
- AllocaInst *FunctionContext =
- new AllocaInst(FunctionContextTy, 0, Align,
- "fcn_context", F.begin()->begin());
-
- Value *Idxs[2];
- const Type *Int32Ty = Type::getInt32Ty(F.getContext());
- Value *Zero = ConstantInt::get(Int32Ty, 0);
- // We need to also keep around a reference to the call_site field
- Idxs[0] = Zero;
- Idxs[1] = ConstantInt::get(Int32Ty, 1);
- CallSite = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "call_site",
- EntryBB->getTerminator());
-
- // The exception selector comes back in context->data[1]
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *FCData = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "fc_data",
- EntryBB->getTerminator());
- Idxs[1] = ConstantInt::get(Int32Ty, 1);
- Value *SelectorAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
- "exc_selector_gep",
- EntryBB->getTerminator());
- // The exception value comes back in context->data[0]
- Idxs[1] = Zero;
- Value *ExceptionAddr = GetElementPtrInst::Create(FCData, Idxs, Idxs+2,
- "exception_gep",
- EntryBB->getTerminator());
-
- // The result of the eh.selector call will be replaced with a
- // a reference to the selector value returned in the function
- // context. We leave the selector itself so the EH analysis later
- // can use it.
- for (int i = 0, e = EH_Selectors.size(); i < e; ++i) {
- CallInst *I = EH_Selectors[i];
- Value *SelectorVal = new LoadInst(SelectorAddr, "select_val", true, I);
- I->replaceAllUsesWith(SelectorVal);
- }
- // eh.exception calls are replaced with references to the proper
- // location in the context. Unlike eh.selector, the eh.exception
- // calls are removed entirely.
- for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
- CallInst *I = EH_Exceptions[i];
- // Possible for there to be duplicates, so check to make sure
- // the instruction hasn't already been removed.
- if (!I->getParent()) continue;
- Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
- const Type *Ty = Type::getInt8PtrTy(F.getContext());
- Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
-
- I->replaceAllUsesWith(Val);
- I->eraseFromParent();
- }
+ // eh.exception calls are replaced with references to the proper location in
+ // the context. Unlike eh.selector, the eh.exception calls are removed
+ // entirely.
+ for (int i = 0, e = EH_Exceptions.size(); i < e; ++i) {
+ CallInst *I = EH_Exceptions[i];
+ // Possible for there to be duplicates, so check to make sure the
+ // instruction hasn't already been removed.
+ if (!I->getParent()) continue;
+ Value *Val = new LoadInst(ExceptionAddr, "exception", true, I);
+ const Type *Ty = Type::getInt8PtrTy(F.getContext());
+ Val = CastInst::Create(Instruction::IntToPtr, Val, Ty, "", I);
+
+ I->replaceAllUsesWith(Val);
+ I->eraseFromParent();
+ }
- // The entry block changes to have the eh.sjlj.setjmp, with a conditional
- // branch to a dispatch block for non-zero returns. If we return normally,
- // we're not handling an exception and just register the function context
- // and continue.
-
- // Create the dispatch block. The dispatch block is basically a big switch
- // statement that goes to all of the invoke landing pads.
- BasicBlock *DispatchBlock =
- BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
-
- // Insert a load in the Catch block, and a switch on its value. By default,
- // we go to a block that just does an unwind (which is the correct action
- // for a standard call).
- BasicBlock *UnwindBlock =
- BasicBlock::Create(F.getContext(), "unwindbb", &F);
- Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
-
- Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
- DispatchBlock);
- SwitchInst *DispatchSwitch =
- SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
- DispatchBlock);
- // Split the entry block to insert the conditional branch for the setjmp.
- BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
- "eh.sjlj.setjmp.cont");
-
- // Populate the Function Context
- // 1. LSDA address
- // 2. Personality function address
- // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
-
- // LSDA address
- Idxs[0] = Zero;
- Idxs[1] = ConstantInt::get(Int32Ty, 4);
- Value *LSDAFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "lsda_gep",
- EntryBB->getTerminator());
- Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
- EntryBB->getTerminator());
- new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
-
- Idxs[1] = ConstantInt::get(Int32Ty, 3);
- Value *PersonalityFieldPtr =
- GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "lsda_gep",
+ // The entry block changes to have the eh.sjlj.setjmp, with a conditional
+ // branch to a dispatch block for non-zero returns. If we return normally,
+ // we're not handling an exception and just register the function context and
+ // continue.
+
+ // Create the dispatch block. The dispatch block is basically a big switch
+ // statement that goes to all of the invoke landing pads.
+ BasicBlock *DispatchBlock =
+ BasicBlock::Create(F.getContext(), "eh.sjlj.setjmp.catch", &F);
+
+ // Add a call to dispatch_setup at the start of the dispatch block. This is
+ // expanded to any target-specific setup that needs to be done.
+ Value *SetupArg =
+ CastInst::Create(Instruction::BitCast, FunctionContext,
+ Type::getInt8PtrTy(F.getContext()), "",
+ DispatchBlock);
+ CallInst::Create(DispatchSetupFn, SetupArg, "", DispatchBlock);
+
+ // Insert a load of the callsite in the dispatch block, and a switch on its
+ // value. By default, we go to a block that just does an unwind (which is the
+ // correct action for a standard call).
+ BasicBlock *UnwindBlock =
+ BasicBlock::Create(F.getContext(), "unwindbb", &F);
+ Unwinds.push_back(new UnwindInst(F.getContext(), UnwindBlock));
+
+ Value *DispatchLoad = new LoadInst(CallSite, "invoke.num", true,
+ DispatchBlock);
+ SwitchInst *DispatchSwitch =
+ SwitchInst::Create(DispatchLoad, UnwindBlock, Invokes.size(),
+ DispatchBlock);
+ // Split the entry block to insert the conditional branch for the setjmp.
+ BasicBlock *ContBlock = EntryBB->splitBasicBlock(EntryBB->getTerminator(),
+ "eh.sjlj.setjmp.cont");
+
+ // Populate the Function Context
+ // 1. LSDA address
+ // 2. Personality function address
+ // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp)
+
+ // LSDA address
+ Idxs[0] = Zero;
+ Idxs[1] = ConstantInt::get(Int32Ty, 4);
+ Value *LSDAFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "lsda_gep",
+ EntryBB->getTerminator());
+ Value *LSDA = CallInst::Create(LSDAAddrFn, "lsda_addr",
+ EntryBB->getTerminator());
+ new StoreInst(LSDA, LSDAFieldPtr, true, EntryBB->getTerminator());
+
+ Idxs[1] = ConstantInt::get(Int32Ty, 3);
+ Value *PersonalityFieldPtr =
+ GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "lsda_gep",
+ EntryBB->getTerminator());
+ new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
+ EntryBB->getTerminator());
+
+ // Save the frame pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 5);
+ Value *JBufPtr
+ = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
+ "jbuf_gep",
EntryBB->getTerminator());
- new StoreInst(PersonalityFn, PersonalityFieldPtr, true,
- EntryBB->getTerminator());
-
- // Save the frame pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 5);
- Value *JBufPtr
- = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2,
- "jbuf_gep",
- EntryBB->getTerminator());
- Idxs[1] = ConstantInt::get(Int32Ty, 0);
- Value *FramePtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+ Idxs[1] = ConstantInt::get(Int32Ty, 0);
+ Value *FramePtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep",
+ EntryBB->getTerminator());
+
+ Value *Val = CallInst::Create(FrameAddrFn,
+ ConstantInt::get(Int32Ty, 0),
+ "fp",
EntryBB->getTerminator());
+ new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
+
+ // Save the stack pointer.
+ Idxs[1] = ConstantInt::get(Int32Ty, 2);
+ Value *StackPtr =
+ GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
+ EntryBB->getTerminator());
+
+ Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
+ new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
+
+ // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
+ Value *SetjmpArg =
+ CastInst::Create(Instruction::BitCast, JBufPtr,
+ Type::getInt8PtrTy(F.getContext()), "",
+ EntryBB->getTerminator());
+ Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
+ "dispatch",
+ EntryBB->getTerminator());
+ // check the return value of the setjmp. non-zero goes to dispatcher.
+ Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
+ ICmpInst::ICMP_EQ, DispatchVal, Zero,
+ "notunwind");
+ // Nuke the uncond branch.
+ EntryBB->getTerminator()->eraseFromParent();
+
+ // Put in a new condbranch in its place.
+ BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
+
+ // Register the function context and make sure it's known to not throw
+ CallInst *Register =
+ CallInst::Create(RegisterFn, FunctionContext, "",
+ ContBlock->getTerminator());
+ Register->setDoesNotThrow();
+
+ // At this point, we are all set up, update the invoke instructions to mark
+ // their call_site values, and fill in the dispatch switch accordingly.
+ for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
+ markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
+
+ // Mark call instructions that aren't nounwind as no-action (call_site ==
+ // -1). Skip the entry block, as prior to then, no function context has been
+ // created for this function and any unexpected exceptions thrown will go
+ // directly to the caller's context, which is what we want anyway, so no need
+ // to do anything here.
+ for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
+ for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
+ if (CallInst *CI = dyn_cast<CallInst>(I)) {
+ // Ignore calls to the EH builtins (eh.selector, eh.exception)
+ Constant *Callee = CI->getCalledFunction();
+ if (Callee != SelectorFn && Callee != ExceptionFn
+ && !CI->doesNotThrow())
+ insertCallSiteStore(CI, -1, CallSite);
+ }
+ }
- Value *Val = CallInst::Create(FrameAddrFn,
- ConstantInt::get(Int32Ty, 0),
- "fp",
- EntryBB->getTerminator());
- new StoreInst(Val, FramePtr, true, EntryBB->getTerminator());
-
- // Save the stack pointer.
- Idxs[1] = ConstantInt::get(Int32Ty, 2);
- Value *StackPtr =
- GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep",
- EntryBB->getTerminator());
-
- Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator());
- new StoreInst(Val, StackPtr, true, EntryBB->getTerminator());
-
- // Call the setjmp instrinsic. It fills in the rest of the jmpbuf.
- Value *SetjmpArg =
- CastInst::Create(Instruction::BitCast, JBufPtr,
- Type::getInt8PtrTy(F.getContext()), "",
- EntryBB->getTerminator());
- Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg,
- "dispatch",
- EntryBB->getTerminator());
- // check the return value of the setjmp. non-zero goes to dispatcher.
- Value *IsNormal = new ICmpInst(EntryBB->getTerminator(),
- ICmpInst::ICMP_EQ, DispatchVal, Zero,
- "notunwind");
- // Nuke the uncond branch.
- EntryBB->getTerminator()->eraseFromParent();
-
- // Put in a new condbranch in its place.
- BranchInst::Create(ContBlock, DispatchBlock, IsNormal, EntryBB);
-
- // Register the function context and make sure it's known to not throw
- CallInst *Register =
- CallInst::Create(RegisterFn, FunctionContext, "",
- ContBlock->getTerminator());
- Register->setDoesNotThrow();
-
- // At this point, we are all set up, update the invoke instructions
- // to mark their call_site values, and fill in the dispatch switch
- // accordingly.
- for (unsigned i = 0, e = Invokes.size(); i != e; ++i)
- markInvokeCallSite(Invokes[i], i+1, CallSite, DispatchSwitch);
-
- // Mark call instructions that aren't nounwind as no-action
- // (call_site == -1). Skip the entry block, as prior to then, no function
- // context has been created for this function and any unexpected exceptions
- // thrown will go directly to the caller's context, which is what we want
- // anyway, so no need to do anything here.
- for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) {
- for (BasicBlock::iterator I = BB->begin(), end = BB->end(); I != end; ++I)
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- // Ignore calls to the EH builtins (eh.selector, eh.exception)
- Constant *Callee = CI->getCalledFunction();
- if (Callee != SelectorFn && Callee != ExceptionFn
- && !CI->doesNotThrow())
- insertCallSiteStore(CI, -1, CallSite);
- }
- }
-
- // Replace all unwinds with a branch to the unwind handler.
- // ??? Should this ever happen with sjlj exceptions?
- for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
- BranchInst::Create(UnwindBlock, Unwinds[i]);
- Unwinds[i]->eraseFromParent();
- }
-
- // Following any allocas not in the entry block, update the saved SP
- // in the jmpbuf to the new value.
- for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
- Instruction *AI = JmpbufUpdatePoints[i];
- Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
- StackAddr->insertAfter(AI);
- Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
- StoreStackAddr->insertAfter(StackAddr);
- }
+ // Replace all unwinds with a branch to the unwind handler.
+ // ??? Should this ever happen with sjlj exceptions?
+ for (unsigned i = 0, e = Unwinds.size(); i != e; ++i) {
+ BranchInst::Create(UnwindBlock, Unwinds[i]);
+ Unwinds[i]->eraseFromParent();
+ }
- // Finally, for any returns from this function, if this function contains an
- // invoke, add a call to unregister the function context.
- for (unsigned i = 0, e = Returns.size(); i != e; ++i)
- CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+ // Following any allocas not in the entry block, update the saved SP in the
+ // jmpbuf to the new value.
+ for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) {
+ Instruction *AI = JmpbufUpdatePoints[i];
+ Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp");
+ StackAddr->insertAfter(AI);
+ Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true);
+ StoreStackAddr->insertAfter(StackAddr);
}
+ // Finally, for any returns from this function, if this function contains an
+ // invoke, add a call to unregister the function context.
+ for (unsigned i = 0, e = Returns.size(); i != e; ++i)
+ CallInst::Create(UnregisterFn, FunctionContext, "", Returns[i]);
+
return true;
}
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index 1bc148f160bc..6e3fa90e4341 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -41,7 +41,7 @@ namespace {
char SlotIndexes::ID = 0;
INITIALIZE_PASS(SlotIndexes, "slotindexes",
- "Slot index numbering", false, false);
+ "Slot index numbering", false, false)
IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
return &*IndexListEntryEmptyKey;
@@ -61,7 +61,6 @@ void SlotIndexes::releaseMemory() {
mi2iMap.clear();
mbb2IdxMap.clear();
idx2MBBMap.clear();
- terminatorGaps.clear();
clearList();
}
@@ -112,13 +111,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
if (mi->isDebugValue())
continue;
- if (miItr == mbb->getFirstTerminator()) {
- push_back(createEntry(0, index));
- terminatorGaps.insert(
- std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += SlotIndex::NUM;
- }
-
// Insert a store index for the instr.
push_back(createEntry(mi, index));
@@ -135,15 +127,12 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
index += (Slots + 1) * SlotIndex::NUM;
}
- if (mbb->getFirstTerminator() == mbb->end()) {
- push_back(createEntry(0, index));
- terminatorGaps.insert(
- std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += SlotIndex::NUM;
- }
+ // We insert two blank instructions between basic blocks.
+ // One to represent live-out registers and one to represent live-ins.
+ push_back(createEntry(0, index));
+ index += SlotIndex::NUM;
- // One blank instruction at the end.
- push_back(createEntry(0, index));
+ push_back(createEntry(0, index));
SlotIndex blockEndIndex(back(), SlotIndex::LOAD);
mbb2IdxMap.insert(
@@ -169,6 +158,7 @@ void SlotIndexes::renumberIndexes() {
// resulting numbering will match what would have been generated by the
// pass during the initial numbering of the function if the new instructions
// had been present.
+ DEBUG(dbgs() << "\n*** Renumbering SlotIndexes ***\n");
functionSize = 0;
unsigned index = 0;
@@ -179,7 +169,7 @@ void SlotIndexes::renumberIndexes() {
curEntry->setIndex(index);
if (curEntry->getInstr() == 0) {
- // MBB start entry or terminator gap. Just step index by 1.
+ // MBB start entry. Just step index by 1.
index += SlotIndex::NUM;
}
else {
@@ -214,11 +204,10 @@ void SlotIndexes::dump() const {
// Print a SlotIndex to a raw_ostream.
void SlotIndex::print(raw_ostream &os) const {
- os << entry().getIndex();
- if (isPHI())
- os << "*";
+ if (isValid())
+ os << entry().getIndex() << "LudS"[getSlot()];
else
- os << "LudS"[getSlot()];
+ os << "invalid";
}
// Dump a SlotIndex to stderr.
diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp
new file mode 100644
index 000000000000..9c0bf1629a14
--- /dev/null
+++ b/lib/CodeGen/SpillPlacement.cpp
@@ -0,0 +1,330 @@
+//===-- SpillPlacement.cpp - Optimal Spill Code Placement -----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the spill code placement analysis.
+//
+// Each edge bundle corresponds to a node in a Hopfield network. Constraints on
+// basic blocks are weighted by the block frequency and added to become the node
+// bias.
+//
+// Transparent basic blocks have the variable live through, but don't care if it
+// is spilled or in a register. These blocks become connections in the Hopfield
+// network, again weighted by block frequency.
+//
+// The Hopfield network minimizes (possibly locally) its energy function:
+//
+// E = -sum_n V_n * ( B_n + sum_{n, m linked by b} V_m * F_b )
+//
+// The energy function represents the expected spill code execution frequency,
+// or the cost of spilling. This is a Lyapunov function which never increases
+// when a node is updated. It is guaranteed to converge to a local minimum.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "spillplacement"
+#include "SpillPlacement.h"
+#include "llvm/CodeGen/EdgeBundles.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/Format.h"
+
+using namespace llvm;
+
+char SpillPlacement::ID = 0;
+INITIALIZE_PASS_BEGIN(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+INITIALIZE_PASS_DEPENDENCY(EdgeBundles)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(SpillPlacement, "spill-code-placement",
+ "Spill Code Placement Analysis", true, true)
+
+char &llvm::SpillPlacementID = SpillPlacement::ID;
+
+void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesAll();
+ AU.addRequiredTransitive<EdgeBundles>();
+ AU.addRequiredTransitive<MachineLoopInfo>();
+ MachineFunctionPass::getAnalysisUsage(AU);
+}
+
+/// Node - Each edge bundle corresponds to a Hopfield node.
+///
+/// The node contains precomputed frequency data that only depends on the CFG,
+/// but Bias and Links are computed each time placeSpills is called.
+///
+/// The node Value is positive when the variable should be in a register. The
+/// value can change when linked nodes change, but convergence is very fast
+/// because all weights are positive.
+///
+struct SpillPlacement::Node {
+ /// Frequency - Total block frequency feeding into[0] or out of[1] the bundle.
+ /// Ideally, these two numbers should be identical, but inaccuracies in the
+ /// block frequency estimates means that we need to normalize ingoing and
+ /// outgoing frequencies separately so they are commensurate.
+ float Frequency[2];
+
+ /// Bias - Normalized contributions from non-transparent blocks.
+ /// A bundle connected to a MustSpill block has a huge negative bias,
+ /// otherwise it is a number in the range [-2;2].
+ float Bias;
+
+ /// Value - Output value of this node computed from the Bias and links.
+ /// This is always in the range [-1;1]. A positive number means the variable
+ /// should go in a register through this bundle.
+ float Value;
+
+ typedef SmallVector<std::pair<float, unsigned>, 4> LinkVector;
+
+ /// Links - (Weight, BundleNo) for all transparent blocks connecting to other
+ /// bundles. The weights are all positive and add up to at most 2, weights
+ /// from ingoing and outgoing nodes separately add up to a most 1. The weight
+ /// sum can be less than 2 when the variable is not live into / out of some
+ /// connected basic blocks.
+ LinkVector Links;
+
+ /// preferReg - Return true when this node prefers to be in a register.
+ bool preferReg() const {
+ // Undecided nodes (Value==0) go on the stack.
+ return Value > 0;
+ }
+
+ /// mustSpill - Return True if this node is so biased that it must spill.
+ bool mustSpill() const {
+ // Actually, we must spill if Bias < sum(weights).
+ // It may be worth it to compute the weight sum here?
+ return Bias < -2.0f;
+ }
+
+ /// Node - Create a blank Node.
+ Node() {
+ Frequency[0] = Frequency[1] = 0;
+ }
+
+ /// clear - Reset per-query data, but preserve frequencies that only depend on
+ // the CFG.
+ void clear() {
+ Bias = Value = 0;
+ Links.clear();
+ }
+
+ /// addLink - Add a link to bundle b with weight w.
+ /// out=0 for an ingoing link, and 1 for an outgoing link.
+ void addLink(unsigned b, float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w /= Frequency[out];
+
+ // There can be multiple links to the same bundle, add them up.
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ if (I->second == b) {
+ I->first += w;
+ return;
+ }
+ // This must be the first link to b.
+ Links.push_back(std::make_pair(w, b));
+ }
+
+ /// addBias - Bias this node from an ingoing[0] or outgoing[1] link.
+ void addBias(float w, bool out) {
+ // Normalize w relative to all connected blocks from that direction.
+ w /= Frequency[out];
+ Bias += w;
+ }
+
+ /// update - Recompute Value from Bias and Links. Return true when node
+ /// preference changes.
+ bool update(const Node nodes[]) {
+ // Compute the weighted sum of inputs.
+ float Sum = Bias;
+ for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I)
+ Sum += I->first * nodes[I->second].Value;
+
+ // The weighted sum is going to be in the range [-2;2]. Ideally, we should
+ // simply set Value = sign(Sum), but we will add a dead zone around 0 for
+ // two reasons:
+ // 1. It avoids arbitrary bias when all links are 0 as is possible during
+ // initial iterations.
+ // 2. It helps tame rounding errors when the links nominally sum to 0.
+ const float Thres = 1e-4f;
+ bool Before = preferReg();
+ if (Sum < -Thres)
+ Value = -1;
+ else if (Sum > Thres)
+ Value = 1;
+ else
+ Value = 0;
+ return Before != preferReg();
+ }
+};
+
+bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) {
+ MF = &mf;
+ bundles = &getAnalysis<EdgeBundles>();
+ loops = &getAnalysis<MachineLoopInfo>();
+
+ assert(!nodes && "Leaking node array");
+ nodes = new Node[bundles->getNumBundles()];
+
+ // Compute total ingoing and outgoing block frequencies for all bundles.
+ for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) {
+ float Freq = getBlockFrequency(I);
+ unsigned Num = I->getNumber();
+ nodes[bundles->getBundle(Num, 1)].Frequency[0] += Freq;
+ nodes[bundles->getBundle(Num, 0)].Frequency[1] += Freq;
+ }
+
+ // We never change the function.
+ return false;
+}
+
+void SpillPlacement::releaseMemory() {
+ delete[] nodes;
+ nodes = 0;
+}
+
+/// activate - mark node n as active if it wasn't already.
+void SpillPlacement::activate(unsigned n) {
+ if (ActiveNodes->test(n))
+ return;
+ ActiveNodes->set(n);
+ nodes[n].clear();
+}
+
+
+/// prepareNodes - Compute node biases and weights from a set of constraints.
+/// Set a bit in NodeMask for each active node.
+void SpillPlacement::
+prepareNodes(const SmallVectorImpl<BlockConstraint> &LiveBlocks) {
+ for (SmallVectorImpl<BlockConstraint>::const_iterator I = LiveBlocks.begin(),
+ E = LiveBlocks.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = MF->getBlockNumbered(I->Number);
+ float Freq = getBlockFrequency(MBB);
+
+ // Is this a transparent block? Link ingoing and outgoing bundles.
+ if (I->Entry == DontCare && I->Exit == DontCare) {
+ unsigned ib = bundles->getBundle(I->Number, 0);
+ unsigned ob = bundles->getBundle(I->Number, 1);
+
+ // Ignore self-loops.
+ if (ib == ob)
+ continue;
+ activate(ib);
+ activate(ob);
+ nodes[ib].addLink(ob, Freq, 1);
+ nodes[ob].addLink(ib, Freq, 0);
+ continue;
+ }
+
+ // This block is not transparent, but it can still add bias.
+ const float Bias[] = {
+ 0, // DontCare,
+ 1, // PrefReg,
+ -1, // PrefSpill
+ -HUGE_VALF // MustSpill
+ };
+
+ // Live-in to block?
+ if (I->Entry != DontCare) {
+ unsigned ib = bundles->getBundle(I->Number, 0);
+ activate(ib);
+ nodes[ib].addBias(Freq * Bias[I->Entry], 1);
+ }
+
+ // Live-out from block?
+ if (I->Exit != DontCare) {
+ unsigned ob = bundles->getBundle(I->Number, 1);
+ activate(ob);
+ nodes[ob].addBias(Freq * Bias[I->Exit], 0);
+ }
+ }
+}
+
+/// iterate - Repeatedly update the Hopfield nodes until stability or the
+/// maximum number of iterations is reached.
+/// @param Linked - Numbers of linked nodes that need updating.
+void SpillPlacement::iterate(const SmallVectorImpl<unsigned> &Linked) {
+ if (Linked.empty())
+ return;
+
+ // Run up to 10 iterations. The edge bundle numbering is closely related to
+ // basic block numbering, so there is a strong tendency towards chains of
+ // linked nodes with sequential numbers. By scanning the linked nodes
+ // backwards and forwards, we make it very likely that a single node can
+ // affect the entire network in a single iteration. That means very fast
+ // convergence, usually in a single iteration.
+ for (unsigned iteration = 0; iteration != 10; ++iteration) {
+ // Scan backwards, skipping the last node which was just updated.
+ bool Changed = false;
+ for (SmallVectorImpl<unsigned>::const_reverse_iterator I =
+ llvm::next(Linked.rbegin()), E = Linked.rend(); I != E; ++I) {
+ unsigned n = *I;
+ bool C = nodes[n].update(nodes);
+ Changed |= C;
+ }
+ if (!Changed)
+ return;
+
+ // Scan forwards, skipping the first node which was just updated.
+ Changed = false;
+ for (SmallVectorImpl<unsigned>::const_iterator I =
+ llvm::next(Linked.begin()), E = Linked.end(); I != E; ++I) {
+ unsigned n = *I;
+ bool C = nodes[n].update(nodes);
+ Changed |= C;
+ }
+ if (!Changed)
+ return;
+ }
+}
+
+bool
+SpillPlacement::placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+ BitVector &RegBundles) {
+ // Reuse RegBundles as our ActiveNodes vector.
+ ActiveNodes = &RegBundles;
+ ActiveNodes->clear();
+ ActiveNodes->resize(bundles->getNumBundles());
+
+ // Compute active nodes, links and biases.
+ prepareNodes(LiveBlocks);
+
+ // Update all active nodes, and find the ones that are actually linked to
+ // something so their value may change when iterating.
+ SmallVector<unsigned, 8> Linked;
+ for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n)) {
+ nodes[n].update(nodes);
+ // A node that must spill, or a node without any links is not going to
+ // change its value ever again, so exclude it from iterations.
+ if (!nodes[n].Links.empty() && !nodes[n].mustSpill())
+ Linked.push_back(n);
+ }
+
+ // Iterate the network to convergence.
+ iterate(Linked);
+
+ // Write preferences back to RegBundles.
+ bool Perfect = true;
+ for (int n = RegBundles.find_first(); n>=0; n = RegBundles.find_next(n))
+ if (!nodes[n].preferReg()) {
+ RegBundles.reset(n);
+ Perfect = false;
+ }
+ return Perfect;
+}
+
+/// getBlockFrequency - Return our best estimate of the block frequency which is
+/// the expected number of block executions per function invocation.
+float SpillPlacement::getBlockFrequency(const MachineBasicBlock *MBB) {
+ // Use the unnormalized spill weight for real block frequencies.
+ return LiveIntervals::getSpillWeight(true, false, loops->getLoopDepth(MBB));
+}
+
diff --git a/lib/CodeGen/SpillPlacement.h b/lib/CodeGen/SpillPlacement.h
new file mode 100644
index 000000000000..ef2d516cdce7
--- /dev/null
+++ b/lib/CodeGen/SpillPlacement.h
@@ -0,0 +1,108 @@
+//===-- SpillPlacement.h - Optimal Spill Code Placement --------*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This analysis computes the optimal spill code placement between basic blocks.
+//
+// The runOnMachineFunction() method only precomputes some profiling information
+// about the CFG. The real work is done by placeSpills() which is called by the
+// register allocator.
+//
+// Given a variable that is live across multiple basic blocks, and given
+// constraints on the basic blocks where the variable is live, determine which
+// edge bundles should have the variable in a register and which edge bundles
+// should have the variable in a stack slot.
+//
+// The returned bit vector can be used to place optimal spill code at basic
+// block entries and exits. Spill code placement inside a basic block is not
+// considered.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_SPILLPLACEMENT_H
+#define LLVM_CODEGEN_SPILLPLACEMENT_H
+
+#include "llvm/CodeGen/MachineFunctionPass.h"
+
+namespace llvm {
+
+class BitVector;
+class EdgeBundles;
+class MachineBasicBlock;
+class MachineLoopInfo;
+template <typename> class SmallVectorImpl;
+
+class SpillPlacement : public MachineFunctionPass {
+ struct Node;
+ const MachineFunction *MF;
+ const EdgeBundles *bundles;
+ const MachineLoopInfo *loops;
+ Node *nodes;
+
+ // Nodes that are active in the current computation. Owned by the placeSpills
+ // caller.
+ BitVector *ActiveNodes;
+
+public:
+ static char ID; // Pass identification, replacement for typeid.
+
+ SpillPlacement() : MachineFunctionPass(ID), nodes(0) {}
+ ~SpillPlacement() { releaseMemory(); }
+
+ /// BorderConstraint - A basic block has separate constraints for entry and
+ /// exit.
+ enum BorderConstraint {
+ DontCare, ///< Block doesn't care / variable not live.
+ PrefReg, ///< Block entry/exit prefers a register.
+ PrefSpill, ///< Block entry/exit prefers a stack slot.
+ MustSpill ///< A register is impossible, variable must be spilled.
+ };
+
+ /// BlockConstraint - Entry and exit constraints for a basic block.
+ struct BlockConstraint {
+ unsigned Number; ///< Basic block number (from MBB::getNumber()).
+ BorderConstraint Entry : 8; ///< Constraint on block entry.
+ BorderConstraint Exit : 8; ///< Constraint on block exit.
+ };
+
+ /// placeSpills - Compute the optimal spill code placement given the
+ /// constraints. No MustSpill constraints will be violated, and the smallest
+ /// possible number of PrefX constraints will be violated, weighted by
+ /// expected execution frequencies.
+ /// @param LiveBlocks Constraints for blocks that have the variable live in or
+ /// live out. DontCare/DontCare means the variable is live
+ /// through the block. DontCare/X means the variable is live
+ /// out, but not live in.
+ /// @param RegBundles Bit vector to receive the edge bundles where the
+ /// variable should be kept in a register. Each bit
+ /// corresponds to an edge bundle, a set bit means the
+ /// variable should be kept in a register through the
+ /// bundle. A clear bit means the variable should be
+ /// spilled.
+ /// @return True if a perfect solution was found, allowing the variable to be
+ /// in a register through all relevant bundles.
+ bool placeSpills(const SmallVectorImpl<BlockConstraint> &LiveBlocks,
+ BitVector &RegBundles);
+
+ /// getBlockFrequency - Return the estimated block execution frequency per
+ /// function invocation.
+ float getBlockFrequency(const MachineBasicBlock*);
+
+private:
+ virtual bool runOnMachineFunction(MachineFunction&);
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ virtual void releaseMemory();
+
+ void activate(unsigned);
+ void prepareNodes(const SmallVectorImpl<BlockConstraint>&);
+ void iterate(const SmallVectorImpl<unsigned>&);
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 59d5ab33c994..fd385824aff9 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -12,6 +12,7 @@
#include "Spiller.h"
#include "VirtRegMap.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -28,7 +29,7 @@
using namespace llvm;
namespace {
- enum SpillerName { trivial, standard, splitting, inline_ };
+ enum SpillerName { trivial, standard, inline_ };
}
static cl::opt<SpillerName>
@@ -37,7 +38,6 @@ spillerOpt("spiller",
cl::Prefix,
cl::values(clEnumVal(trivial, "trivial spiller"),
clEnumVal(standard, "default spiller"),
- clEnumVal(splitting, "splitting spiller"),
clEnumValN(inline_, "inline", "inline spiller"),
clEnumValEnd),
cl::init(standard));
@@ -80,7 +80,7 @@ protected:
assert(li->weight != HUGE_VALF &&
"Attempting to spill already spilled value.");
- assert(!li->isStackSlot() &&
+ assert(!TargetRegisterInfo::isStackSlot(li->reg) &&
"Trying to spill a stack slot.");
DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n");
@@ -144,7 +144,7 @@ protected:
vrm->addSpillSlotUse(ss, loadInstr);
SlotIndex endIndex = loadIndex.getNextIndex();
VNInfo *loadVNI =
- newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator());
+ newLI->getNextValue(loadIndex, 0, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI));
}
@@ -158,7 +158,7 @@ protected:
vrm->addSpillSlotUse(ss, storeInstr);
SlotIndex beginIndex = storeIndex.getPrevIndex();
VNInfo *storeVNI =
- newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator());
+ newLI->getNextValue(beginIndex, 0, lis->getVNInfoAllocator());
newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI));
}
@@ -182,7 +182,7 @@ public:
void spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &) {
+ const SmallVectorImpl<LiveInterval*> &) {
// Ignore spillIs - we don't use it.
trivialSpillEverywhere(li, newIntervals);
}
@@ -195,315 +195,42 @@ namespace {
/// Falls back on LiveIntervals::addIntervalsForSpills.
class StandardSpiller : public Spiller {
protected:
+ MachineFunction *mf;
LiveIntervals *lis;
+ LiveStacks *lss;
MachineLoopInfo *loopInfo;
VirtRegMap *vrm;
public:
StandardSpiller(MachineFunctionPass &pass, MachineFunction &mf,
VirtRegMap &vrm)
- : lis(&pass.getAnalysis<LiveIntervals>()),
+ : mf(&mf),
+ lis(&pass.getAnalysis<LiveIntervals>()),
+ lss(&pass.getAnalysis<LiveStacks>()),
loopInfo(pass.getAnalysisIfAvailable<MachineLoopInfo>()),
vrm(&vrm) {}
/// Falls back on LiveIntervals::addIntervalsForSpills.
void spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs) {
+ const SmallVectorImpl<LiveInterval*> &spillIs) {
std::vector<LiveInterval*> added =
lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
newIntervals.insert(newIntervals.end(), added.begin(), added.end());
- }
-};
-
-} // end anonymous namespace
-
-namespace {
-
-/// When a call to spill is placed this spiller will first try to break the
-/// interval up into its component values (one new interval per value).
-/// If this fails, or if a call is placed to spill a previously split interval
-/// then the spiller falls back on the standard spilling mechanism.
-class SplittingSpiller : public StandardSpiller {
-public:
- SplittingSpiller(MachineFunctionPass &pass, MachineFunction &mf,
- VirtRegMap &vrm)
- : StandardSpiller(pass, mf, vrm) {
- mri = &mf.getRegInfo();
- tii = mf.getTarget().getInstrInfo();
- tri = mf.getTarget().getRegisterInfo();
- }
- void spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs) {
- if (worthTryingToSplit(li))
- tryVNISplit(li);
- else
- StandardSpiller::spill(li, newIntervals, spillIs);
+ // Update LiveStacks.
+ int SS = vrm->getStackSlot(li->reg);
+ if (SS == VirtRegMap::NO_STACK_SLOT)
+ return;
+ const TargetRegisterClass *RC = mf->getRegInfo().getRegClass(li->reg);
+ LiveInterval &SI = lss->getOrCreateInterval(SS, RC);
+ if (!SI.hasAtLeastOneValue())
+ SI.getNextValue(SlotIndex(), 0, lss->getVNInfoAllocator());
+ SI.MergeRangesInAsValue(*li, SI.getValNumInfo(0));
}
-
-private:
-
- MachineRegisterInfo *mri;
- const TargetInstrInfo *tii;
- const TargetRegisterInfo *tri;
- DenseSet<LiveInterval*> alreadySplit;
-
- bool worthTryingToSplit(LiveInterval *li) const {
- return (!alreadySplit.count(li) && li->getNumValNums() > 1);
- }
-
- /// Try to break a LiveInterval into its component values.
- std::vector<LiveInterval*> tryVNISplit(LiveInterval *li) {
-
- DEBUG(dbgs() << "Trying VNI split of %reg" << *li << "\n");
-
- std::vector<LiveInterval*> added;
- SmallVector<VNInfo*, 4> vnis;
-
- std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
-
- for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
- vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
- VNInfo *vni = *vniItr;
-
- // Skip unused VNIs.
- if (vni->isUnused())
- continue;
-
- DEBUG(dbgs() << " Extracted Val #" << vni->id << " as ");
- LiveInterval *splitInterval = extractVNI(li, vni);
-
- if (splitInterval != 0) {
- DEBUG(dbgs() << *splitInterval << "\n");
- added.push_back(splitInterval);
- alreadySplit.insert(splitInterval);
- } else {
- DEBUG(dbgs() << "0\n");
- }
- }
-
- DEBUG(dbgs() << "Original LI: " << *li << "\n");
-
- // If there original interval still contains some live ranges
- // add it to added and alreadySplit.
- if (!li->empty()) {
- added.push_back(li);
- alreadySplit.insert(li);
- }
-
- return added;
- }
-
- /// Extract the given value number from the interval.
- LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
- assert(vni->isDefAccurate() || vni->isPHIDef());
-
- // Create a new vreg and live interval, copy VNI ranges over.
- const TargetRegisterClass *trc = mri->getRegClass(li->reg);
- unsigned newVReg = mri->createVirtualRegister(trc);
- vrm->grow();
- LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
- VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
-
- // Start by copying all live ranges in the VN to the new interval.
- for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
- rItr != rEnd; ++rItr) {
- if (rItr->valno == vni) {
- newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI));
- }
- }
-
- // Erase the old VNI & ranges.
- li->removeValNo(vni);
-
- // Collect all current uses of the register belonging to the given VNI.
- // We'll use this to rename the register after we've dealt with the def.
- std::set<MachineInstr*> uses;
- for (MachineRegisterInfo::use_iterator
- useItr = mri->use_begin(li->reg), useEnd = mri->use_end();
- useItr != useEnd; ++useItr) {
- uses.insert(&*useItr);
- }
-
- // Process the def instruction for this VNI.
- if (newVNI->isPHIDef()) {
- // Insert a copy at the start of the MBB. The range proceeding the
- // copy will be attached to the original LiveInterval.
- MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
- MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(),
- tii->get(TargetOpcode::COPY), newVReg)
- .addReg(li->reg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
- 0, false, lis->getVNInfoAllocator());
- phiDefVNI->setIsPHIDef(true);
- li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
- LiveRange *oldPHIDefRange =
- newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
-
- // If the old phi def starts in the middle of the range chop it up.
- if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) {
- LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end,
- oldPHIDefRange->valno);
- oldPHIDefRange->end = lis->getMBBStartIdx(defMBB);
- newLI->addRange(oldPHIDefRange2);
- } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) {
- // Otherwise if it's at the start of the range just trim it.
- oldPHIDefRange->start = copyIdx.getDefIndex();
- } else {
- assert(false && "PHI def range doesn't cover PHI def?");
- }
-
- newVNI->def = copyIdx.getDefIndex();
- newVNI->setCopy(copyMI);
- newVNI->setIsPHIDef(false); // not a PHI def anymore.
- newVNI->setIsDefAccurate(true);
- } else {
- // non-PHI def. Rename the def. If it's two-addr that means renaming the
- // use and inserting a new copy too.
- MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
- // We'll rename this now, so we can remove it from uses.
- uses.erase(defInst);
- unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg);
- bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx),
- twoAddrUseIsUndef = false;
-
- for (unsigned i = 0; i < defInst->getNumOperands(); ++i) {
- MachineOperand &mo = defInst->getOperand(i);
- if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) {
- mo.setReg(newVReg);
- if (isTwoAddr && mo.isUse() && mo.isUndef())
- twoAddrUseIsUndef = true;
- }
- }
-
- SlotIndex defIdx = lis->getInstructionIndex(defInst);
- newVNI->def = defIdx.getDefIndex();
-
- if (isTwoAddr && !twoAddrUseIsUndef) {
- MachineBasicBlock *defMBB = defInst->getParent();
- MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(),
- tii->get(TargetOpcode::COPY), newVReg)
- .addReg(li->reg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
- LiveRange *origUseRange =
- li->getLiveRangeContaining(newVNI->def.getUseIndex());
- origUseRange->end = copyIdx.getDefIndex();
- VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
- true, lis->getVNInfoAllocator());
- LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
- newLI->addRange(copyRange);
- }
- }
-
- for (std::set<MachineInstr*>::iterator
- usesItr = uses.begin(), usesEnd = uses.end();
- usesItr != usesEnd; ++usesItr) {
- MachineInstr *useInst = *usesItr;
- SlotIndex useIdx = lis->getInstructionIndex(useInst);
- LiveRange *useRange =
- newLI->getLiveRangeContaining(useIdx.getUseIndex());
-
- // If this use doesn't belong to the new interval skip it.
- if (useRange == 0)
- continue;
-
- // This use doesn't belong to the VNI, skip it.
- if (useRange->valno != newVNI)
- continue;
-
- // Check if this instr is two address.
- unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
- bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
-
- // Rename uses (and defs for two-address instrs).
- for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
- MachineOperand &mo = useInst->getOperand(i);
- if (mo.isReg() && (mo.isUse() || isTwoAddress) &&
- (mo.getReg() == li->reg)) {
- mo.setReg(newVReg);
- }
- }
-
- // If this is a two address instruction we've got some extra work to do.
- if (isTwoAddress) {
- // We modified the def operand, so we need to copy back to the original
- // reg.
- MachineBasicBlock *useMBB = useInst->getParent();
- MachineBasicBlock::iterator useItr(useInst);
- MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(),
- tii->get(TargetOpcode::COPY), newVReg)
- .addReg(li->reg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
- // Change the old two-address defined range & vni to start at
- // (and be defined by) the copy.
- LiveRange *origDefRange =
- li->getLiveRangeContaining(useIdx.getDefIndex());
- origDefRange->start = copyIdx.getDefIndex();
- origDefRange->valno->def = copyIdx.getDefIndex();
- origDefRange->valno->setCopy(copyMI);
-
- // Insert a new range & vni for the two-address-to-copy value. This
- // will be attached to the new live interval.
- VNInfo *copyVNI =
- newLI->getNextValue(useIdx.getDefIndex(), 0, true,
- lis->getVNInfoAllocator());
- LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
- newLI->addRange(copyRange);
- }
- }
-
- // Iterate over any PHI kills - we'll need to insert new copies for them.
- for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end();
- LRI != LRE; ++LRI) {
- if (LRI->valno != newVNI || LRI->end.isPHI())
- continue;
- SlotIndex killIdx = LRI->end;
- MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
- MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(),
- DebugLoc(), tii->get(TargetOpcode::COPY),
- li->reg)
- .addReg(newVReg, RegState::Kill);
- SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
-
- // Save the current end. We may need it to add a new range if the
- // current range runs of the end of the MBB.
- SlotIndex newKillRangeEnd = LRI->end;
- LRI->end = copyIdx.getDefIndex();
-
- if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) {
- assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) &&
- "PHI kill range doesn't reach kill-block end. Not sane.");
- newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB),
- newKillRangeEnd, newVNI));
- }
-
- VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
- copyMI, true,
- lis->getVNInfoAllocator());
- newKillVNI->setHasPHIKill(true);
- li->addRange(LiveRange(copyIdx.getDefIndex(),
- lis->getMBBEndIdx(killMBB),
- newKillVNI));
- }
- newVNI->setHasPHIKill(false);
-
- return newLI;
- }
-
};
} // end anonymous namespace
-
-namespace llvm {
-Spiller *createInlineSpiller(MachineFunctionPass &pass,
- MachineFunction &mf,
- VirtRegMap &vrm);
-}
-
llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm) {
@@ -511,7 +238,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunctionPass &pass,
default: assert(0 && "unknown spiller");
case trivial: return new TrivialSpiller(pass, mf, vrm);
case standard: return new StandardSpiller(pass, mf, vrm);
- case splitting: return new SplittingSpiller(pass, mf, vrm);
case inline_: return createInlineSpiller(pass, mf, vrm);
}
}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index 59bc0ec6ae70..f017583494ed 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -10,14 +10,13 @@
#ifndef LLVM_CODEGEN_SPILLER_H
#define LLVM_CODEGEN_SPILLER_H
-#include "llvm/ADT/SmallVector.h"
-
namespace llvm {
class LiveInterval;
class MachineFunction;
class MachineFunctionPass;
class SlotIndex;
+ template <typename T> class SmallVectorImpl;
class VirtRegMap;
/// Spiller interface.
@@ -37,7 +36,7 @@ namespace llvm {
/// @param newIntervals The newly created intervals will be appended here.
virtual void spill(LiveInterval *li,
SmallVectorImpl<LiveInterval*> &newIntervals,
- SmallVectorImpl<LiveInterval*> &spillIs) = 0;
+ const SmallVectorImpl<LiveInterval*> &spillIs) = 0;
};
@@ -45,6 +44,13 @@ namespace llvm {
Spiller* createSpiller(MachineFunctionPass &pass,
MachineFunction &mf,
VirtRegMap &vrm);
+
+ /// Create and return a spiller that will insert spill code directly instead
+ /// of deferring though VirtRegMap.
+ Spiller *createInlineSpiller(MachineFunctionPass &pass,
+ MachineFunction &mf,
+ VirtRegMap &vrm);
+
}
#endif
diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp
index 29474f0d5512..5663936bf3aa 100644
--- a/lib/CodeGen/SplitKit.cpp
+++ b/lib/CodeGen/SplitKit.cpp
@@ -12,13 +12,14 @@
//
//===----------------------------------------------------------------------===//
-#define DEBUG_TYPE "splitter"
+#define DEBUG_TYPE "regalloc"
#include "SplitKit.h"
+#include "LiveRangeEdit.h"
#include "VirtRegMap.h"
#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
@@ -36,371 +37,231 @@ AllowSplit("spiller-splits-edges",
// Split Analysis
//===----------------------------------------------------------------------===//
-SplitAnalysis::SplitAnalysis(const MachineFunction &mf,
+SplitAnalysis::SplitAnalysis(const VirtRegMap &vrm,
const LiveIntervals &lis,
const MachineLoopInfo &mli)
- : mf_(mf),
- lis_(lis),
- loops_(mli),
- tii_(*mf.getTarget().getInstrInfo()),
- curli_(0) {}
+ : MF(vrm.getMachineFunction()),
+ VRM(vrm),
+ LIS(lis),
+ Loops(mli),
+ TII(*MF.getTarget().getInstrInfo()),
+ CurLI(0) {}
void SplitAnalysis::clear() {
- usingInstrs_.clear();
- usingBlocks_.clear();
- usingLoops_.clear();
- curli_ = 0;
+ UseSlots.clear();
+ UsingInstrs.clear();
+ UsingBlocks.clear();
+ LiveBlocks.clear();
+ CurLI = 0;
}
bool SplitAnalysis::canAnalyzeBranch(const MachineBasicBlock *MBB) {
MachineBasicBlock *T, *F;
SmallVector<MachineOperand, 4> Cond;
- return !tii_.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
+ return !TII.AnalyzeBranch(const_cast<MachineBasicBlock&>(*MBB), T, F, Cond);
}
-/// analyzeUses - Count instructions, basic blocks, and loops using curli.
+/// analyzeUses - Count instructions, basic blocks, and loops using CurLI.
void SplitAnalysis::analyzeUses() {
- const MachineRegisterInfo &MRI = mf_.getRegInfo();
- for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(curli_->reg);
- MachineInstr *MI = I.skipInstruction();) {
- if (MI->isDebugValue() || !usingInstrs_.insert(MI))
+ const MachineRegisterInfo &MRI = MF.getRegInfo();
+ for (MachineRegisterInfo::reg_iterator I = MRI.reg_begin(CurLI->reg),
+ E = MRI.reg_end(); I != E; ++I) {
+ MachineOperand &MO = I.getOperand();
+ if (MO.isUse() && MO.isUndef())
continue;
- MachineBasicBlock *MBB = MI->getParent();
- if (usingBlocks_[MBB]++)
+ MachineInstr *MI = MO.getParent();
+ if (MI->isDebugValue() || !UsingInstrs.insert(MI))
continue;
- if (MachineLoop *Loop = loops_.getLoopFor(MBB))
- usingLoops_[Loop]++;
+ UseSlots.push_back(LIS.getInstructionIndex(MI).getDefIndex());
+ MachineBasicBlock *MBB = MI->getParent();
+ UsingBlocks[MBB]++;
}
+ array_pod_sort(UseSlots.begin(), UseSlots.end());
+ calcLiveBlockInfo();
DEBUG(dbgs() << " counted "
- << usingInstrs_.size() << " instrs, "
- << usingBlocks_.size() << " blocks, "
- << usingLoops_.size() << " loops.\n");
+ << UsingInstrs.size() << " instrs, "
+ << UsingBlocks.size() << " blocks.\n");
}
-/// removeUse - Update statistics by noting that MI no longer uses curli.
-void SplitAnalysis::removeUse(const MachineInstr *MI) {
- if (!usingInstrs_.erase(MI))
+/// calcLiveBlockInfo - Fill the LiveBlocks array with information about blocks
+/// where CurLI is live.
+void SplitAnalysis::calcLiveBlockInfo() {
+ if (CurLI->empty())
return;
- // Decrement MBB count.
- const MachineBasicBlock *MBB = MI->getParent();
- BlockCountMap::iterator bi = usingBlocks_.find(MBB);
- assert(bi != usingBlocks_.end() && "MBB missing");
- assert(bi->second && "0 count in map");
- if (--bi->second)
- return;
- // No more uses in MBB.
- usingBlocks_.erase(bi);
+ LiveInterval::const_iterator LVI = CurLI->begin();
+ LiveInterval::const_iterator LVE = CurLI->end();
+
+ SmallVectorImpl<SlotIndex>::const_iterator UseI, UseE;
+ UseI = UseSlots.begin();
+ UseE = UseSlots.end();
+
+ // Loop over basic blocks where CurLI is live.
+ MachineFunction::iterator MFI = LIS.getMBBFromIndex(LVI->start);
+ for (;;) {
+ BlockInfo BI;
+ BI.MBB = MFI;
+ SlotIndex Start, Stop;
+ tie(Start, Stop) = LIS.getSlotIndexes()->getMBBRange(BI.MBB);
+
+ // The last split point is the latest possible insertion point that dominates
+ // all successor blocks. If interference reaches LastSplitPoint, it is not
+ // possible to insert a split or reload that makes CurLI live in the
+ // outgoing bundle.
+ MachineBasicBlock::iterator LSP = LIS.getLastSplitPoint(*CurLI, BI.MBB);
+ if (LSP == BI.MBB->end())
+ BI.LastSplitPoint = Stop;
+ else
+ BI.LastSplitPoint = LIS.getInstructionIndex(LSP);
+
+ // LVI is the first live segment overlapping MBB.
+ BI.LiveIn = LVI->start <= Start;
+ if (!BI.LiveIn)
+ BI.Def = LVI->start;
+
+ // Find the first and last uses in the block.
+ BI.Uses = hasUses(MFI);
+ if (BI.Uses && UseI != UseE) {
+ BI.FirstUse = *UseI;
+ assert(BI.FirstUse >= Start);
+ do ++UseI;
+ while (UseI != UseE && *UseI < Stop);
+ BI.LastUse = UseI[-1];
+ assert(BI.LastUse < Stop);
+ }
- // Decrement loop count.
- MachineLoop *Loop = loops_.getLoopFor(MBB);
- if (!Loop)
- return;
- LoopCountMap::iterator li = usingLoops_.find(Loop);
- assert(li != usingLoops_.end() && "Loop missing");
- assert(li->second && "0 count in map");
- if (--li->second)
- return;
- // No more blocks in Loop.
- usingLoops_.erase(li);
-}
+ // Look for gaps in the live range.
+ bool hasGap = false;
+ BI.LiveOut = true;
+ while (LVI->end < Stop) {
+ SlotIndex LastStop = LVI->end;
+ if (++LVI == LVE || LVI->start >= Stop) {
+ BI.Kill = LastStop;
+ BI.LiveOut = false;
+ break;
+ }
+ if (LastStop < LVI->start) {
+ hasGap = true;
+ BI.Kill = LastStop;
+ BI.Def = LVI->start;
+ }
+ }
-// Get three sets of basic blocks surrounding a loop: Blocks inside the loop,
-// predecessor blocks, and exit blocks.
-void SplitAnalysis::getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks) {
- Blocks.clear();
-
- // Blocks in the loop.
- Blocks.Loop.insert(Loop->block_begin(), Loop->block_end());
-
- // Predecessor blocks.
- const MachineBasicBlock *Header = Loop->getHeader();
- for (MachineBasicBlock::const_pred_iterator I = Header->pred_begin(),
- E = Header->pred_end(); I != E; ++I)
- if (!Blocks.Loop.count(*I))
- Blocks.Preds.insert(*I);
-
- // Exit blocks.
- for (MachineLoop::block_iterator I = Loop->block_begin(),
- E = Loop->block_end(); I != E; ++I) {
- const MachineBasicBlock *MBB = *I;
- for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(),
- SE = MBB->succ_end(); SI != SE; ++SI)
- if (!Blocks.Loop.count(*SI))
- Blocks.Exits.insert(*SI);
- }
-}
+ // Don't set LiveThrough when the block has a gap.
+ BI.LiveThrough = !hasGap && BI.LiveIn && BI.LiveOut;
+ LiveBlocks.push_back(BI);
-/// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
-/// and around the Loop.
-SplitAnalysis::LoopPeripheralUse SplitAnalysis::
-analyzeLoopPeripheralUse(const SplitAnalysis::LoopBlocks &Blocks) {
- LoopPeripheralUse use = ContainedInLoop;
- for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
- I != E; ++I) {
- const MachineBasicBlock *MBB = I->first;
- // Is this a peripheral block?
- if (use < MultiPeripheral &&
- (Blocks.Preds.count(MBB) || Blocks.Exits.count(MBB))) {
- if (I->second > 1) use = MultiPeripheral;
- else use = SinglePeripheral;
- continue;
- }
- // Is it a loop block?
- if (Blocks.Loop.count(MBB))
- continue;
- // It must be an unrelated block.
- return OutsideLoop;
- }
- return use;
-}
+ // LVI is now at LVE or LVI->end >= Stop.
+ if (LVI == LVE)
+ break;
-/// getCriticalExits - It may be necessary to partially break critical edges
-/// leaving the loop if an exit block has phi uses of curli. Collect the exit
-/// blocks that need special treatment into CriticalExits.
-void SplitAnalysis::getCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
- BlockPtrSet &CriticalExits) {
- CriticalExits.clear();
-
- // A critical exit block contains a phi def of curli, and has a predecessor
- // that is not in the loop nor a loop predecessor.
- // For such an exit block, the edges carrying the new variable must be moved
- // to a new pre-exit block.
- for (BlockPtrSet::iterator I = Blocks.Exits.begin(), E = Blocks.Exits.end();
- I != E; ++I) {
- const MachineBasicBlock *Succ = *I;
- SlotIndex SuccIdx = lis_.getMBBStartIdx(Succ);
- VNInfo *SuccVNI = curli_->getVNInfoAt(SuccIdx);
- // This exit may not have curli live in at all. No need to split.
- if (!SuccVNI)
- continue;
- // If this is not a PHI def, it is either using a value from before the
- // loop, or a value defined inside the loop. Both are safe.
- if (!SuccVNI->isPHIDef() || SuccVNI->def.getBaseIndex() != SuccIdx)
- continue;
- // This exit block does have a PHI. Does it also have a predecessor that is
- // not a loop block or loop predecessor?
- for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
- PE = Succ->pred_end(); PI != PE; ++PI) {
- const MachineBasicBlock *Pred = *PI;
- if (Blocks.Loop.count(Pred) || Blocks.Preds.count(Pred))
- continue;
- // This is a critical exit block, and we need to split the exit edge.
- CriticalExits.insert(Succ);
+ // Live segment ends exactly at Stop. Move to the next segment.
+ if (LVI->end == Stop && ++LVI == LVE)
break;
- }
+
+ // Pick the next basic block.
+ if (LVI->start < Stop)
+ ++MFI;
+ else
+ MFI = LIS.getMBBFromIndex(LVI->start);
}
}
-/// canSplitCriticalExits - Return true if it is possible to insert new exit
-/// blocks before the blocks in CriticalExits.
-bool
-SplitAnalysis::canSplitCriticalExits(const SplitAnalysis::LoopBlocks &Blocks,
- BlockPtrSet &CriticalExits) {
- // If we don't allow critical edge splitting, require no critical exits.
- if (!AllowSplit)
- return CriticalExits.empty();
-
- for (BlockPtrSet::iterator I = CriticalExits.begin(), E = CriticalExits.end();
- I != E; ++I) {
- const MachineBasicBlock *Succ = *I;
- // We want to insert a new pre-exit MBB before Succ, and change all the
- // in-loop blocks to branch to the pre-exit instead of Succ.
- // Check that all the in-loop predecessors can be changed.
- for (MachineBasicBlock::const_pred_iterator PI = Succ->pred_begin(),
- PE = Succ->pred_end(); PI != PE; ++PI) {
- const MachineBasicBlock *Pred = *PI;
- // The external predecessors won't be altered.
- if (!Blocks.Loop.count(Pred) && !Blocks.Preds.count(Pred))
- continue;
- if (!canAnalyzeBranch(Pred))
- return false;
- }
-
- // If Succ's layout predecessor falls through, that too must be analyzable.
- // We need to insert the pre-exit block in the gap.
- MachineFunction::const_iterator MFI = Succ;
- if (MFI == mf_.begin())
- continue;
- if (!canAnalyzeBranch(--MFI))
- return false;
+void SplitAnalysis::print(const BlockPtrSet &B, raw_ostream &OS) const {
+ for (BlockPtrSet::const_iterator I = B.begin(), E = B.end(); I != E; ++I) {
+ unsigned count = UsingBlocks.lookup(*I);
+ OS << " BB#" << (*I)->getNumber();
+ if (count)
+ OS << '(' << count << ')';
}
- // No problems found.
- return true;
}
void SplitAnalysis::analyze(const LiveInterval *li) {
clear();
- curli_ = li;
+ CurLI = li;
analyzeUses();
}
-const MachineLoop *SplitAnalysis::getBestSplitLoop() {
- assert(curli_ && "Call analyze() before getBestSplitLoop");
- if (usingLoops_.empty())
- return 0;
-
- LoopPtrSet Loops, SecondLoops;
- LoopBlocks Blocks;
- BlockPtrSet CriticalExits;
-
- // Find first-class and second class candidate loops.
- // We prefer to split around loops where curli is used outside the periphery.
- for (LoopCountMap::const_iterator I = usingLoops_.begin(),
- E = usingLoops_.end(); I != E; ++I) {
- const MachineLoop *Loop = I->first;
- getLoopBlocks(Loop, Blocks);
-
- // FIXME: We need an SSA updater to properly handle multiple exit blocks.
- if (Blocks.Exits.size() > 1) {
- DEBUG(dbgs() << " multiple exits from " << *Loop);
- continue;
- }
-
- LoopPtrSet *LPS = 0;
- switch(analyzeLoopPeripheralUse(Blocks)) {
- case OutsideLoop:
- LPS = &Loops;
- break;
- case MultiPeripheral:
- LPS = &SecondLoops;
- break;
- case ContainedInLoop:
- DEBUG(dbgs() << " contained in " << *Loop);
- continue;
- case SinglePeripheral:
- DEBUG(dbgs() << " single peripheral use in " << *Loop);
- continue;
- }
- // Will it be possible to split around this loop?
- getCriticalExits(Blocks, CriticalExits);
- DEBUG(dbgs() << " " << CriticalExits.size() << " critical exits from "
- << *Loop);
- if (!canSplitCriticalExits(Blocks, CriticalExits))
- continue;
- // This is a possible split.
- assert(LPS);
- LPS->insert(Loop);
- }
-
- DEBUG(dbgs() << " getBestSplitLoop found " << Loops.size() << " + "
- << SecondLoops.size() << " candidate loops.\n");
-
- // If there are no first class loops available, look at second class loops.
- if (Loops.empty())
- Loops = SecondLoops;
- if (Loops.empty())
- return 0;
+//===----------------------------------------------------------------------===//
+// LiveIntervalMap
+//===----------------------------------------------------------------------===//
- // Pick the earliest loop.
- // FIXME: Are there other heuristics to consider?
- const MachineLoop *Best = 0;
- SlotIndex BestIdx;
- for (LoopPtrSet::const_iterator I = Loops.begin(), E = Loops.end(); I != E;
- ++I) {
- SlotIndex Idx = lis_.getMBBStartIdx((*I)->getHeader());
- if (!Best || Idx < BestIdx)
- Best = *I, BestIdx = Idx;
- }
- DEBUG(dbgs() << " getBestSplitLoop found " << *Best);
- return Best;
+// Work around the fact that the std::pair constructors are broken for pointer
+// pairs in some implementations. makeVV(x, 0) works.
+static inline std::pair<const VNInfo*, VNInfo*>
+makeVV(const VNInfo *a, VNInfo *b) {
+ return std::make_pair(a, b);
}
-/// getMultiUseBlocks - if curli has more than one use in a basic block, it
-/// may be an advantage to split curli for the duration of the block.
-bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
- // If curli is local to one block, there is no point to splitting it.
- if (usingBlocks_.size() <= 1)
- return false;
- // Add blocks with multiple uses.
- for (BlockCountMap::iterator I = usingBlocks_.begin(), E = usingBlocks_.end();
- I != E; ++I)
- switch (I->second) {
- case 0:
- case 1:
- continue;
- case 2: {
- // It doesn't pay to split a 2-instr block if it redefines curli.
- VNInfo *VN1 = curli_->getVNInfoAt(lis_.getMBBStartIdx(I->first));
- VNInfo *VN2 =
- curli_->getVNInfoAt(lis_.getMBBEndIdx(I->first).getPrevIndex());
- // live-in and live-out with a different value.
- if (VN1 && VN2 && VN1 != VN2)
- continue;
- } // Fall through.
- default:
- Blocks.insert(I->first);
- }
- return !Blocks.empty();
+void LiveIntervalMap::reset(LiveInterval *li) {
+ LI = li;
+ Values.clear();
+ LiveOutCache.clear();
}
-//===----------------------------------------------------------------------===//
-// LiveIntervalMap
-//===----------------------------------------------------------------------===//
+bool LiveIntervalMap::isComplexMapped(const VNInfo *ParentVNI) const {
+ ValueMap::const_iterator i = Values.find(ParentVNI);
+ return i != Values.end() && i->second == 0;
+}
-// defValue - Introduce a li_ def for ParentVNI that could be later than
+// defValue - Introduce a LI def for ParentVNI that could be later than
// ParentVNI->def.
VNInfo *LiveIntervalMap::defValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+ assert(LI && "call reset first");
assert(ParentVNI && "Mapping NULL value");
assert(Idx.isValid() && "Invalid SlotIndex");
- assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
-
- // Is this a simple 1-1 mapping? Not likely.
- if (Idx == ParentVNI->def)
- return mapValue(ParentVNI, Idx);
-
- // This is a complex def. Mark with a NULL in valueMap.
- VNInfo *OldVNI =
- valueMap_.insert(
- ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0))).first->second;
- // The static_cast<VNInfo *> is only needed to work around a bug in an
- // old version of the C++0x standard which the following compilers
- // implemented and have yet to fix:
- //
- // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
- // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
- //
- // If/When we move to C++0x, this can be replaced by nullptr.
- (void)OldVNI;
- assert(OldVNI == 0 && "Simple/Complex values mixed");
-
- // Should we insert a minimal snippet of VNI LiveRange, or can we count on
- // callers to do that? We need it for lookups of complex values.
- VNInfo *VNI = li_.getNextValue(Idx, 0, true, lis_.getVNInfoAllocator());
+ assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+
+ // Create a new value.
+ VNInfo *VNI = LI->getNextValue(Idx, 0, LIS.getVNInfoAllocator());
+
+ // Preserve the PHIDef bit.
+ if (ParentVNI->isPHIDef() && Idx == ParentVNI->def)
+ VNI->setIsPHIDef(true);
+
+ // Use insert for lookup, so we can add missing values with a second lookup.
+ std::pair<ValueMap::iterator,bool> InsP =
+ Values.insert(makeVV(ParentVNI, Idx == ParentVNI->def ? VNI : 0));
+
+ // This is now a complex def. Mark with a NULL in valueMap.
+ if (!InsP.second)
+ InsP.first->second = 0;
+
return VNI;
}
+
// mapValue - Find the mapped value for ParentVNI at Idx.
// Potentially create phi-def values.
-VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
+VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx,
+ bool *simple) {
+ assert(LI && "call reset first");
assert(ParentVNI && "Mapping NULL value");
assert(Idx.isValid() && "Invalid SlotIndex");
- assert(parentli_.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
+ assert(ParentLI.getVNInfoAt(Idx) == ParentVNI && "Bad ParentVNI");
// Use insert for lookup, so we can add missing values with a second lookup.
std::pair<ValueMap::iterator,bool> InsP =
- valueMap_.insert(ValueMap::value_type(ParentVNI, static_cast<VNInfo *>(0)));
- // The static_cast<VNInfo *> is only needed to work around a bug in an
- // old version of the C++0x standard which the following compilers
- // implemented and have yet to fix:
- //
- // Microsoft Visual Studio 2010 Version 10.0.30319.1 RTMRel
- // Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01
- //
- // If/When we move to C++0x, this can be replaced by nullptr.
+ Values.insert(makeVV(ParentVNI, 0));
// This was an unknown value. Create a simple mapping.
- if (InsP.second)
- return InsP.first->second = li_.createValueCopy(ParentVNI,
- lis_.getVNInfoAllocator());
+ if (InsP.second) {
+ if (simple) *simple = true;
+ return InsP.first->second = LI->createValueCopy(ParentVNI,
+ LIS.getVNInfoAllocator());
+ }
+
// This was a simple mapped value.
- if (InsP.first->second)
+ if (InsP.first->second) {
+ if (simple) *simple = true;
return InsP.first->second;
+ }
// This is a complex mapped value. There may be multiple defs, and we may need
// to create phi-defs.
- MachineBasicBlock *IdxMBB = lis_.getMBBFromIndex(Idx);
+ if (simple) *simple = false;
+ MachineBasicBlock *IdxMBB = LIS.getMBBFromIndex(Idx);
assert(IdxMBB && "No MBB at Idx");
// Is there a def in the same MBB we can extend?
@@ -409,157 +270,260 @@ VNInfo *LiveIntervalMap::mapValue(const VNInfo *ParentVNI, SlotIndex Idx) {
// Now for the fun part. We know that ParentVNI potentially has multiple defs,
// and we may need to create even more phi-defs to preserve VNInfo SSA form.
- // Perform a depth-first search for predecessor blocks where we know the
- // dominating VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
-
- // Track MBBs where we have created or learned the dominating value.
- // This may change during the DFS as we create new phi-defs.
- typedef DenseMap<MachineBasicBlock*, VNInfo*> MBBValueMap;
- MBBValueMap DomValue;
-
- for (idf_iterator<MachineBasicBlock*>
- IDFI = idf_begin(IdxMBB),
- IDFE = idf_end(IdxMBB); IDFI != IDFE;) {
- MachineBasicBlock *MBB = *IDFI;
- SlotIndex End = lis_.getMBBEndIdx(MBB);
-
- // We are operating on the restricted CFG where ParentVNI is live.
- if (parentli_.getVNInfoAt(End.getPrevSlot()) != ParentVNI) {
- IDFI.skipChildren();
- continue;
- }
-
- // Do we have a dominating value in this block?
- VNInfo *VNI = extendTo(MBB, End);
- if (!VNI) {
- ++IDFI;
- continue;
+ // Perform a search for all predecessor blocks where we know the dominating
+ // VNInfo. Insert phi-def VNInfos along the path back to IdxMBB.
+ DEBUG(dbgs() << "\n Reaching defs for BB#" << IdxMBB->getNumber()
+ << " at " << Idx << " in " << *LI << '\n');
+
+ // Blocks where LI should be live-in.
+ SmallVector<MachineDomTreeNode*, 16> LiveIn;
+ LiveIn.push_back(MDT[IdxMBB]);
+
+ // Using LiveOutCache as a visited set, perform a BFS for all reaching defs.
+ for (unsigned i = 0; i != LiveIn.size(); ++i) {
+ MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ MachineBasicBlock *Pred = *PI;
+ // Is this a known live-out block?
+ std::pair<LiveOutMap::iterator,bool> LOIP =
+ LiveOutCache.insert(std::make_pair(Pred, LiveOutPair()));
+ // Yes, we have been here before.
+ if (!LOIP.second) {
+ DEBUG(if (VNInfo *VNI = LOIP.first->second.first)
+ dbgs() << " known valno #" << VNI->id
+ << " at BB#" << Pred->getNumber() << '\n');
+ continue;
+ }
+
+ // Does Pred provide a live-out value?
+ SlotIndex Last = LIS.getMBBEndIdx(Pred).getPrevSlot();
+ if (VNInfo *VNI = extendTo(Pred, Last)) {
+ MachineBasicBlock *DefMBB = LIS.getMBBFromIndex(VNI->def);
+ DEBUG(dbgs() << " found valno #" << VNI->id
+ << " from BB#" << DefMBB->getNumber()
+ << " at BB#" << Pred->getNumber() << '\n');
+ LiveOutPair &LOP = LOIP.first->second;
+ LOP.first = VNI;
+ LOP.second = MDT[DefMBB];
+ continue;
+ }
+ // No, we need a live-in value for Pred as well
+ if (Pred != IdxMBB)
+ LiveIn.push_back(MDT[Pred]);
}
+ }
- // Yes, VNI dominates MBB. Track the path back to IdxMBB, creating phi-defs
- // as needed along the way.
- for (unsigned PI = IDFI.getPathLength()-1; PI != 0; --PI) {
- // Start from MBB's immediate successor. End at IdxMBB.
- MachineBasicBlock *Succ = IDFI.getPath(PI-1);
- std::pair<MBBValueMap::iterator, bool> InsP =
- DomValue.insert(MBBValueMap::value_type(Succ, VNI));
-
- // This is the first time we backtrack to Succ.
- if (InsP.second)
- continue;
-
- // We reached Succ again with the same VNI. Nothing is going to change.
- VNInfo *OVNI = InsP.first->second;
- if (OVNI == VNI)
- break;
+ // We may need to add phi-def values to preserve the SSA form.
+ // This is essentially the same iterative algorithm that SSAUpdater uses,
+ // except we already have a dominator tree, so we don't have to recompute it.
+ VNInfo *IdxVNI = 0;
+ unsigned Changes;
+ do {
+ Changes = 0;
+ DEBUG(dbgs() << " Iterating over " << LiveIn.size() << " blocks.\n");
+ // Propagate live-out values down the dominator tree, inserting phi-defs when
+ // necessary. Since LiveIn was created by a BFS, going backwards makes it more
+ // likely for us to visit immediate dominators before their children.
+ for (unsigned i = LiveIn.size(); i; --i) {
+ MachineDomTreeNode *Node = LiveIn[i-1];
+ MachineBasicBlock *MBB = Node->getBlock();
+ MachineDomTreeNode *IDom = Node->getIDom();
+ LiveOutPair IDomValue;
+ // We need a live-in value to a block with no immediate dominator?
+ // This is probably an unreachable block that has survived somehow.
+ bool needPHI = !IDom;
+
+ // Get the IDom live-out value.
+ if (!needPHI) {
+ LiveOutMap::iterator I = LiveOutCache.find(IDom->getBlock());
+ if (I != LiveOutCache.end())
+ IDomValue = I->second;
+ else
+ // If IDom is outside our set of live-out blocks, there must be new
+ // defs, and we need a phi-def here.
+ needPHI = true;
+ }
- // Succ already has a phi-def. No need to continue.
- SlotIndex Start = lis_.getMBBStartIdx(Succ);
- if (OVNI->def == Start)
- break;
+ // IDom dominates all of our predecessors, but it may not be the immediate
+ // dominator. Check if any of them have live-out values that are properly
+ // dominated by IDom. If so, we need a phi-def here.
+ if (!needPHI) {
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ LiveOutPair Value = LiveOutCache[*PI];
+ if (!Value.first || Value.first == IDomValue.first)
+ continue;
+ // This predecessor is carrying something other than IDomValue.
+ // It could be because IDomValue hasn't propagated yet, or it could be
+ // because MBB is in the dominance frontier of that value.
+ if (MDT.dominates(IDom, Value.second)) {
+ needPHI = true;
+ break;
+ }
+ }
+ }
- // We have a collision between the old and new VNI at Succ. That means
- // neither dominates and we need a new phi-def.
- VNI = li_.getNextValue(Start, 0, true, lis_.getVNInfoAllocator());
- VNI->setIsPHIDef(true);
- InsP.first->second = VNI;
-
- // Replace OVNI with VNI in the remaining path.
- for (; PI > 1 ; --PI) {
- MBBValueMap::iterator I = DomValue.find(IDFI.getPath(PI-2));
- if (I == DomValue.end() || I->second != OVNI)
- break;
- I->second = VNI;
+ // Create a phi-def if required.
+ if (needPHI) {
+ ++Changes;
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ VNInfo *VNI = LI->getNextValue(Start, 0, LIS.getVNInfoAllocator());
+ VNI->setIsPHIDef(true);
+ DEBUG(dbgs() << " - BB#" << MBB->getNumber()
+ << " phi-def #" << VNI->id << " at " << Start << '\n');
+ // We no longer need LI to be live-in.
+ LiveIn.erase(LiveIn.begin()+(i-1));
+ // Blocks in LiveIn are either IdxMBB, or have a value live-through.
+ if (MBB == IdxMBB)
+ IdxVNI = VNI;
+ // Check if we need to update live-out info.
+ LiveOutMap::iterator I = LiveOutCache.find(MBB);
+ if (I == LiveOutCache.end() || I->second.second == Node) {
+ // We already have a live-out defined in MBB, so this must be IdxMBB.
+ assert(MBB == IdxMBB && "Adding phi-def to known live-out");
+ LI->addRange(LiveRange(Start, Idx.getNextSlot(), VNI));
+ } else {
+ // This phi-def is also live-out, so color the whole block.
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
+ I->second = LiveOutPair(VNI, Node);
+ }
+ } else if (IDomValue.first) {
+ // No phi-def here. Remember incoming value for IdxMBB.
+ if (MBB == IdxMBB)
+ IdxVNI = IDomValue.first;
+ // Propagate IDomValue if needed:
+ // MBB is live-out and doesn't define its own value.
+ LiveOutMap::iterator I = LiveOutCache.find(MBB);
+ if (I != LiveOutCache.end() && I->second.second != Node &&
+ I->second.first != IDomValue.first) {
+ ++Changes;
+ I->second = IDomValue;
+ DEBUG(dbgs() << " - BB#" << MBB->getNumber()
+ << " idom valno #" << IDomValue.first->id
+ << " from BB#" << IDom->getBlock()->getNumber() << '\n');
+ }
}
}
+ DEBUG(dbgs() << " - made " << Changes << " changes.\n");
+ } while (Changes);
- // No need to search the children, we found a dominating value.
- IDFI.skipChildren();
- }
+ assert(IdxVNI && "Didn't find value for Idx");
- // The search should at least find a dominating value for IdxMBB.
- assert(!DomValue.empty() && "Couldn't find a reaching definition");
+#ifndef NDEBUG
+ // Check the LiveOutCache invariants.
+ for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
+ I != E; ++I) {
+ assert(I->first && "Null MBB entry in cache");
+ assert(I->second.first && "Null VNInfo in cache");
+ assert(I->second.second && "Null DomTreeNode in cache");
+ if (I->second.second->getBlock() == I->first)
+ continue;
+ for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
+ PE = I->first->pred_end(); PI != PE; ++PI)
+ assert(LiveOutCache.lookup(*PI) == I->second && "Bad invariant");
+ }
+#endif
- // Since we went through the trouble of a full DFS visiting all reaching defs,
- // the values in DomValue are now accurate. No more phi-defs are needed for
- // these blocks, so we can color the live ranges.
+ // Since we went through the trouble of a full BFS visiting all reaching defs,
+ // the values in LiveIn are now accurate. No more phi-defs are needed
+ // for these blocks, so we can color the live ranges.
// This makes the next mapValue call much faster.
- VNInfo *IdxVNI = 0;
- for (MBBValueMap::iterator I = DomValue.begin(), E = DomValue.end(); I != E;
- ++I) {
- MachineBasicBlock *MBB = I->first;
- VNInfo *VNI = I->second;
- SlotIndex Start = lis_.getMBBStartIdx(MBB);
- if (MBB == IdxMBB) {
- // Don't add full liveness to IdxMBB, stop at Idx.
- if (Start != Idx)
- li_.addRange(LiveRange(Start, Idx, VNI));
- // The caller had better add some liveness to IdxVNI, or it leaks.
- IdxVNI = VNI;
- } else
- li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+ for (unsigned i = 0, e = LiveIn.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = LiveIn[i]->getBlock();
+ SlotIndex Start = LIS.getMBBStartIdx(MBB);
+ VNInfo *VNI = LiveOutCache.lookup(MBB).first;
+
+ // Anything in LiveIn other than IdxMBB is live-through.
+ // In IdxMBB, we should stop at Idx unless the same value is live-out.
+ if (MBB == IdxMBB && IdxVNI != VNI)
+ LI->addRange(LiveRange(Start, Idx.getNextSlot(), IdxVNI));
+ else
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
}
- assert(IdxVNI && "Didn't find value for Idx");
return IdxVNI;
}
-// extendTo - Find the last li_ value defined in MBB at or before Idx. The
-// parentli_ is assumed to be live at Idx. Extend the live range to Idx.
+#ifndef NDEBUG
+void LiveIntervalMap::dumpCache() {
+ for (LiveOutMap::iterator I = LiveOutCache.begin(), E = LiveOutCache.end();
+ I != E; ++I) {
+ assert(I->first && "Null MBB entry in cache");
+ assert(I->second.first && "Null VNInfo in cache");
+ assert(I->second.second && "Null DomTreeNode in cache");
+ dbgs() << " cache: BB#" << I->first->getNumber()
+ << " has valno #" << I->second.first->id << " from BB#"
+ << I->second.second->getBlock()->getNumber() << ", preds";
+ for (MachineBasicBlock::pred_iterator PI = I->first->pred_begin(),
+ PE = I->first->pred_end(); PI != PE; ++PI)
+ dbgs() << " BB#" << (*PI)->getNumber();
+ dbgs() << '\n';
+ }
+ dbgs() << " cache: " << LiveOutCache.size() << " entries.\n";
+}
+#endif
+
+// extendTo - Find the last LI value defined in MBB at or before Idx. The
+// ParentLI is assumed to be live at Idx. Extend the live range to Idx.
// Return the found VNInfo, or NULL.
-VNInfo *LiveIntervalMap::extendTo(MachineBasicBlock *MBB, SlotIndex Idx) {
- LiveInterval::iterator I = std::upper_bound(li_.begin(), li_.end(), Idx);
- if (I == li_.begin())
+VNInfo *LiveIntervalMap::extendTo(const MachineBasicBlock *MBB, SlotIndex Idx) {
+ assert(LI && "call reset first");
+ LiveInterval::iterator I = std::upper_bound(LI->begin(), LI->end(), Idx);
+ if (I == LI->begin())
return 0;
--I;
- if (I->start < lis_.getMBBStartIdx(MBB))
+ if (I->end <= LIS.getMBBStartIdx(MBB))
return 0;
- if (I->end < Idx)
- I->end = Idx;
+ if (I->end <= Idx)
+ I->end = Idx.getNextSlot();
return I->valno;
}
-// addSimpleRange - Add a simple range from parentli_ to li_.
+// addSimpleRange - Add a simple range from ParentLI to LI.
// ParentVNI must be live in the [Start;End) interval.
void LiveIntervalMap::addSimpleRange(SlotIndex Start, SlotIndex End,
const VNInfo *ParentVNI) {
- VNInfo *VNI = mapValue(ParentVNI, Start);
- // A simple mappoing is easy.
- if (VNI->def == ParentVNI->def) {
- li_.addRange(LiveRange(Start, End, VNI));
+ assert(LI && "call reset first");
+ bool simple;
+ VNInfo *VNI = mapValue(ParentVNI, Start, &simple);
+ // A simple mapping is easy.
+ if (simple) {
+ LI->addRange(LiveRange(Start, End, VNI));
return;
}
// ParentVNI is a complex value. We must map per MBB.
- MachineFunction::iterator MBB = lis_.getMBBFromIndex(Start);
- MachineFunction::iterator MBBE = lis_.getMBBFromIndex(End);
+ MachineFunction::iterator MBB = LIS.getMBBFromIndex(Start);
+ MachineFunction::iterator MBBE = LIS.getMBBFromIndex(End.getPrevSlot());
if (MBB == MBBE) {
- li_.addRange(LiveRange(Start, End, VNI));
+ LI->addRange(LiveRange(Start, End, VNI));
return;
}
// First block.
- li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB), VNI));
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB), VNI));
// Run sequence of full blocks.
for (++MBB; MBB != MBBE; ++MBB) {
- Start = lis_.getMBBStartIdx(MBB);
- li_.addRange(LiveRange(Start, lis_.getMBBEndIdx(MBB),
- mapValue(ParentVNI, Start)));
+ Start = LIS.getMBBStartIdx(MBB);
+ LI->addRange(LiveRange(Start, LIS.getMBBEndIdx(MBB),
+ mapValue(ParentVNI, Start)));
}
// Final block.
- Start = lis_.getMBBStartIdx(MBB);
+ Start = LIS.getMBBStartIdx(MBB);
if (Start != End)
- li_.addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
+ LI->addRange(LiveRange(Start, End, mapValue(ParentVNI, Start)));
}
-/// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+/// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
/// All needed values whose def is not inside [Start;End) must be defined
/// beforehand so mapValue will work.
void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
- LiveInterval::const_iterator B = parentli_.begin(), E = parentli_.end();
+ assert(LI && "call reset first");
+ LiveInterval::const_iterator B = ParentLI.begin(), E = ParentLI.end();
LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
// Check if --I begins before Start and overlaps.
@@ -575,403 +539,374 @@ void LiveIntervalMap::addRange(SlotIndex Start, SlotIndex End) {
addSimpleRange(I->start, std::min(End, I->end), I->valno);
}
+
//===----------------------------------------------------------------------===//
// Split Editor
//===----------------------------------------------------------------------===//
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
-SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm,
- SmallVectorImpl<LiveInterval*> &intervals)
- : sa_(sa), lis_(lis), vrm_(vrm),
- mri_(vrm.getMachineFunction().getRegInfo()),
- tii_(*vrm.getMachineFunction().getTarget().getInstrInfo()),
- curli_(sa_.getCurLI()),
- dupli_(0), openli_(0),
- intervals_(intervals),
- firstInterval(intervals_.size())
+SplitEditor::SplitEditor(SplitAnalysis &sa,
+ LiveIntervals &lis,
+ VirtRegMap &vrm,
+ MachineDominatorTree &mdt,
+ LiveRangeEdit &edit)
+ : SA(sa), LIS(lis), VRM(vrm),
+ MRI(vrm.getMachineFunction().getRegInfo()),
+ MDT(mdt),
+ TII(*vrm.getMachineFunction().getTarget().getInstrInfo()),
+ TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()),
+ Edit(edit),
+ OpenIdx(0),
+ RegAssign(Allocator)
{
- assert(curli_ && "SplitEditor created from empty SplitAnalysis");
-
- // Make sure curli_ is assigned a stack slot, so all our intervals get the
- // same slot as curli_.
- if (vrm_.getStackSlot(curli_->reg) == VirtRegMap::NO_STACK_SLOT)
- vrm_.assignVirt2StackSlot(curli_->reg);
-
+ // We don't need an AliasAnalysis since we will only be performing
+ // cheap-as-a-copy remats anyway.
+ Edit.anyRematerializable(LIS, TII, 0);
}
-LiveInterval *SplitEditor::createInterval() {
- unsigned curli = sa_.getCurLI()->reg;
- unsigned Reg = mri_.createVirtualRegister(mri_.getRegClass(curli));
- LiveInterval &Intv = lis_.getOrCreateInterval(Reg);
- vrm_.grow();
- vrm_.assignVirt2StackSlot(Reg, vrm_.getStackSlot(curli));
- return &Intv;
+void SplitEditor::dump() const {
+ if (RegAssign.empty()) {
+ dbgs() << " empty\n";
+ return;
+ }
+
+ for (RegAssignMap::const_iterator I = RegAssign.begin(); I.valid(); ++I)
+ dbgs() << " [" << I.start() << ';' << I.stop() << "):" << I.value();
+ dbgs() << '\n';
}
-LiveInterval *SplitEditor::getDupLI() {
- if (!dupli_) {
- // Create an interval for dupli that is a copy of curli.
- dupli_ = createInterval();
- dupli_->Copy(*curli_, &mri_, lis_.getVNInfoAllocator());
+VNInfo *SplitEditor::defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) {
+ MachineInstr *CopyMI = 0;
+ SlotIndex Def;
+ LiveInterval *LI = Edit.get(RegIdx);
+
+ // Attempt cheap-as-a-copy rematerialization.
+ LiveRangeEdit::Remat RM(ParentVNI);
+ if (Edit.canRematerializeAt(RM, UseIdx, true, LIS)) {
+ Def = Edit.rematerializeAt(MBB, I, LI->reg, RM, LIS, TII, TRI);
+ } else {
+ // Can't remat, just insert a copy from parent.
+ CopyMI = BuildMI(MBB, I, DebugLoc(), TII.get(TargetOpcode::COPY), LI->reg)
+ .addReg(Edit.getReg());
+ Def = LIS.InsertMachineInstrInMaps(CopyMI).getDefIndex();
}
- return dupli_;
-}
-VNInfo *SplitEditor::mapValue(const VNInfo *curliVNI) {
- VNInfo *&VNI = valueMap_[curliVNI];
- if (!VNI)
- VNI = openli_->createValueCopy(curliVNI, lis_.getVNInfoAllocator());
- return VNI;
-}
+ // Define the value in Reg.
+ VNInfo *VNI = LIMappers[RegIdx].defValue(ParentVNI, Def);
+ VNI->setCopy(CopyMI);
-/// Insert a COPY instruction curli -> li. Allocate a new value from li
-/// defined by the COPY. Note that rewrite() will deal with the curli
-/// register, so this function can be used to copy from any interval - openli,
-/// curli, or dupli.
-VNInfo *SplitEditor::insertCopy(LiveInterval &LI,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I) {
- MachineInstr *MI = BuildMI(MBB, I, DebugLoc(), tii_.get(TargetOpcode::COPY),
- LI.reg).addReg(curli_->reg);
- SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
- return LI.getNextValue(DefIdx, MI, true, lis_.getVNInfoAllocator());
+ // Add minimal liveness for the new value.
+ Edit.get(RegIdx)->addRange(LiveRange(Def, Def.getNextSlot(), VNI));
+ return VNI;
}
/// Create a new virtual register and live interval.
void SplitEditor::openIntv() {
- assert(!openli_ && "Previous LI not closed before openIntv");
- openli_ = createInterval();
- intervals_.push_back(openli_);
- liveThrough_ = false;
-}
+ assert(!OpenIdx && "Previous LI not closed before openIntv");
-/// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
-/// not live before Idx, a COPY is not inserted.
-void SplitEditor::enterIntvBefore(SlotIndex Idx) {
- assert(openli_ && "openIntv not called before enterIntvBefore");
-
- // Copy from curli_ if it is live.
- if (VNInfo *CurVNI = curli_->getVNInfoAt(Idx.getUseIndex())) {
- MachineInstr *MI = lis_.getInstructionFromIndex(Idx);
- assert(MI && "enterIntvBefore called with invalid index");
- VNInfo *VNI = insertCopy(*openli_, *MI->getParent(), MI);
- openli_->addRange(LiveRange(VNI->def, Idx.getDefIndex(), VNI));
-
- // Make sure CurVNI is properly mapped.
- VNInfo *&mapVNI = valueMap_[CurVNI];
- // We dont have SSA update yet, so only one entry per value is allowed.
- assert(!mapVNI && "enterIntvBefore called more than once for the same value");
- mapVNI = VNI;
+ // Create the complement as index 0.
+ if (Edit.empty()) {
+ Edit.create(MRI, LIS, VRM);
+ LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
+ LIMappers.back().reset(Edit.get(0));
}
- DEBUG(dbgs() << " enterIntvBefore " << Idx << ": " << *openli_ << '\n');
-}
-/// enterIntvAtEnd - Enter openli at the end of MBB.
-/// PhiMBB is a successor inside openli where a PHI value is created.
-/// Currently, all entries must share the same PhiMBB.
-void SplitEditor::enterIntvAtEnd(MachineBasicBlock &A, MachineBasicBlock &B) {
- assert(openli_ && "openIntv not called before enterIntvAtEnd");
-
- SlotIndex EndA = lis_.getMBBEndIdx(&A);
- VNInfo *CurVNIA = curli_->getVNInfoAt(EndA.getPrevIndex());
- if (!CurVNIA) {
- DEBUG(dbgs() << " enterIntvAtEnd, curli not live out of BB#"
- << A.getNumber() << ".\n");
- return;
- }
+ // Create the open interval.
+ OpenIdx = Edit.size();
+ Edit.create(MRI, LIS, VRM);
+ LIMappers.push_back(LiveIntervalMap(LIS, MDT, Edit.getParent()));
+ LIMappers[OpenIdx].reset(Edit.get(OpenIdx));
+}
- // Add a phi kill value and live range out of A.
- VNInfo *VNIA = insertCopy(*openli_, A, A.getFirstTerminator());
- openli_->addRange(LiveRange(VNIA->def, EndA, VNIA));
-
- // FIXME: If this is the only entry edge, we don't need the extra PHI value.
- // FIXME: If there are multiple entry blocks (so not a loop), we need proper
- // SSA update.
-
- // Now look at the start of B.
- SlotIndex StartB = lis_.getMBBStartIdx(&B);
- SlotIndex EndB = lis_.getMBBEndIdx(&B);
- const LiveRange *CurB = curli_->getLiveRangeContaining(StartB);
- if (!CurB) {
- DEBUG(dbgs() << " enterIntvAtEnd: curli not live in to BB#"
- << B.getNumber() << ".\n");
- return;
+SlotIndex SplitEditor::enterIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before enterIntvBefore");
+ DEBUG(dbgs() << " enterIntvBefore " << Idx);
+ Idx = Idx.getBaseIndex();
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx;
}
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "enterIntvBefore called with invalid index");
- VNInfo *VNIB = openli_->getVNInfoAt(StartB);
- if (!VNIB) {
- // Create a phi value.
- VNIB = openli_->getNextValue(SlotIndex(StartB, true), 0, false,
- lis_.getVNInfoAllocator());
- VNIB->setIsPHIDef(true);
- VNInfo *&mapVNI = valueMap_[CurB->valno];
- if (mapVNI) {
- // Multiple copies - must create PHI value.
- abort();
- } else {
- // This is the first copy of dupLR. Mark the mapping.
- mapVNI = VNIB;
- }
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
+}
+SlotIndex SplitEditor::enterIntvAtEnd(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before enterIntvAtEnd");
+ SlotIndex End = LIS.getMBBEndIdx(&MBB);
+ SlotIndex Last = End.getPrevSlot();
+ DEBUG(dbgs() << " enterIntvAtEnd BB#" << MBB.getNumber() << ", " << Last);
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Last);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return End;
}
-
- DEBUG(dbgs() << " enterIntvAtEnd: " << *openli_ << '\n');
+ DEBUG(dbgs() << ": valno " << ParentVNI->id);
+ VNInfo *VNI = defFromParent(OpenIdx, ParentVNI, Last, MBB,
+ LIS.getLastSplitPoint(Edit.getParent(), &MBB));
+ RegAssign.insert(VNI->def, End, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
}
-/// useIntv - indicate that all instructions in MBB should use openli.
+/// useIntv - indicate that all instructions in MBB should use OpenLI.
void SplitEditor::useIntv(const MachineBasicBlock &MBB) {
- useIntv(lis_.getMBBStartIdx(&MBB), lis_.getMBBEndIdx(&MBB));
+ useIntv(LIS.getMBBStartIdx(&MBB), LIS.getMBBEndIdx(&MBB));
}
void SplitEditor::useIntv(SlotIndex Start, SlotIndex End) {
- assert(openli_ && "openIntv not called before useIntv");
+ assert(OpenIdx && "openIntv not called before useIntv");
+ DEBUG(dbgs() << " useIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
+}
- // Map the curli values from the interval into openli_
- LiveInterval::const_iterator B = curli_->begin(), E = curli_->end();
- LiveInterval::const_iterator I = std::lower_bound(B, E, Start);
+SlotIndex SplitEditor::leaveIntvAfter(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAfter");
+ DEBUG(dbgs() << " leaveIntvAfter " << Idx);
- if (I != B) {
- --I;
- // I begins before Start, but overlaps.
- if (I->end > Start)
- openli_->addRange(LiveRange(Start, std::min(End, I->end),
- mapValue(I->valno)));
- ++I;
+ // The interval must be live beyond the instruction at Idx.
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
}
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
- // The remaining ranges begin after Start.
- for (;I != E && I->start < End; ++I)
- openli_->addRange(LiveRange(I->start, std::min(End, I->end),
- mapValue(I->valno)));
- DEBUG(dbgs() << " use [" << Start << ';' << End << "): " << *openli_
- << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(),
+ llvm::next(MachineBasicBlock::iterator(MI)));
+ return VNI->def;
}
-/// leaveIntvAfter - Leave openli after the instruction at Idx.
-void SplitEditor::leaveIntvAfter(SlotIndex Idx) {
- assert(openli_ && "openIntv not called before leaveIntvAfter");
+SlotIndex SplitEditor::leaveIntvBefore(SlotIndex Idx) {
+ assert(OpenIdx && "openIntv not called before leaveIntvBefore");
+ DEBUG(dbgs() << " leaveIntvBefore " << Idx);
- const LiveRange *CurLR = curli_->getLiveRangeContaining(Idx.getDefIndex());
- if (!CurLR || CurLR->end <= Idx.getBoundaryIndex()) {
- DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": not live\n");
- return;
+ // The interval must be live into the instruction at Idx.
+ Idx = Idx.getBoundaryIndex();
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Idx.getNextSlot();
}
+ DEBUG(dbgs() << ": valno " << ParentVNI->id << '\n');
- // Was this value of curli live through openli?
- if (!openli_->liveAt(CurLR->valno->def)) {
- DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": using external value\n");
- liveThrough_ = true;
- return;
- }
-
- // We are going to insert a back copy, so we must have a dupli_.
- LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Idx.getDefIndex());
- assert(DupLR && "dupli not live into black, but curli is?");
-
- // Insert the COPY instruction.
- MachineBasicBlock::iterator I = lis_.getInstructionFromIndex(Idx);
- MachineInstr *MI = BuildMI(*I->getParent(), llvm::next(I), I->getDebugLoc(),
- tii_.get(TargetOpcode::COPY), dupli_->reg)
- .addReg(openli_->reg);
- SlotIndex CopyIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
- openli_->addRange(LiveRange(Idx.getDefIndex(), CopyIdx,
- mapValue(CurLR->valno)));
- DupLR->valno->def = CopyIdx;
- DEBUG(dbgs() << " leaveIntvAfter " << Idx << ": " << *openli_ << '\n');
+ MachineInstr *MI = LIS.getInstructionFromIndex(Idx);
+ assert(MI && "No instruction at index");
+ VNInfo *VNI = defFromParent(0, ParentVNI, Idx, *MI->getParent(), MI);
+ return VNI->def;
}
-/// leaveIntvAtTop - Leave the interval at the top of MBB.
-/// Currently, only one value can leave the interval.
-void SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
- assert(openli_ && "openIntv not called before leaveIntvAtTop");
-
- SlotIndex Start = lis_.getMBBStartIdx(&MBB);
- const LiveRange *CurLR = curli_->getLiveRangeContaining(Start);
-
- // Is curli even live-in to MBB?
- if (!CurLR) {
- DEBUG(dbgs() << " leaveIntvAtTop at " << Start << ": not live\n");
- return;
- }
-
- // Is curli defined by PHI at the beginning of MBB?
- bool isPHIDef = CurLR->valno->isPHIDef() &&
- CurLR->valno->def.getBaseIndex() == Start;
+SlotIndex SplitEditor::leaveIntvAtTop(MachineBasicBlock &MBB) {
+ assert(OpenIdx && "openIntv not called before leaveIntvAtTop");
+ SlotIndex Start = LIS.getMBBStartIdx(&MBB);
+ DEBUG(dbgs() << " leaveIntvAtTop BB#" << MBB.getNumber() << ", " << Start);
- // If MBB is using a value of curli that was defined outside the openli range,
- // we don't want to copy it back here.
- if (!isPHIDef && !openli_->liveAt(CurLR->valno->def)) {
- DEBUG(dbgs() << " leaveIntvAtTop at " << Start
- << ": using external value\n");
- liveThrough_ = true;
- return;
+ VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Start);
+ if (!ParentVNI) {
+ DEBUG(dbgs() << ": not live\n");
+ return Start;
}
- // We are going to insert a back copy, so we must have a dupli_.
- LiveRange *DupLR = getDupLI()->getLiveRangeContaining(Start);
- assert(DupLR && "dupli not live into black, but curli is?");
-
- // Insert the COPY instruction.
- MachineInstr *MI = BuildMI(MBB, MBB.begin(), DebugLoc(),
- tii_.get(TargetOpcode::COPY), dupli_->reg)
- .addReg(openli_->reg);
- SlotIndex Idx = lis_.InsertMachineInstrInMaps(MI).getDefIndex();
-
- // Adjust dupli and openli values.
- if (isPHIDef) {
- // dupli was already a PHI on entry to MBB. Simply insert an openli PHI,
- // and shift the dupli def down to the COPY.
- VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
- lis_.getVNInfoAllocator());
- VNI->setIsPHIDef(true);
- openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
- dupli_->removeRange(Start, Idx);
- DupLR->valno->def = Idx;
- DupLR->valno->setIsPHIDef(false);
- } else {
- // The dupli value was defined somewhere inside the openli range.
- DEBUG(dbgs() << " leaveIntvAtTop source value defined at "
- << DupLR->valno->def << "\n");
- // FIXME: We may not need a PHI here if all predecessors have the same
- // value.
- VNInfo *VNI = openli_->getNextValue(SlotIndex(Start, true), 0, false,
- lis_.getVNInfoAllocator());
- VNI->setIsPHIDef(true);
- openli_->addRange(LiveRange(VNI->def, Idx, VNI));
-
- // FIXME: What if DupLR->valno is used by multiple exits? SSA Update.
-
- // closeIntv is going to remove the superfluous live ranges.
- DupLR->valno->def = Idx;
- DupLR->valno->setIsPHIDef(false);
- }
+ VNInfo *VNI = defFromParent(0, ParentVNI, Start, MBB,
+ MBB.SkipPHIsAndLabels(MBB.begin()));
+ RegAssign.insert(Start, VNI->def, OpenIdx);
+ DEBUG(dump());
+ return VNI->def;
+}
- DEBUG(dbgs() << " leaveIntvAtTop at " << Idx << ": " << *openli_ << '\n');
+void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) {
+ assert(OpenIdx && "openIntv not called before overlapIntv");
+ assert(Edit.getParent().getVNInfoAt(Start) ==
+ Edit.getParent().getVNInfoAt(End.getPrevSlot()) &&
+ "Parent changes value in extended range");
+ assert(Edit.get(0)->getVNInfoAt(Start) && "Start must come from leaveIntv*");
+ assert(LIS.getMBBFromIndex(Start) == LIS.getMBBFromIndex(End) &&
+ "Range cannot span basic blocks");
+
+ // Treat this as useIntv() for now. The complement interval will be extended
+ // as needed by mapValue().
+ DEBUG(dbgs() << " overlapIntv [" << Start << ';' << End << "):");
+ RegAssign.insert(Start, End, OpenIdx);
+ DEBUG(dump());
}
/// closeIntv - Indicate that we are done editing the currently open
/// LiveInterval, and ranges can be trimmed.
void SplitEditor::closeIntv() {
- assert(openli_ && "openIntv not called before closeIntv");
-
- DEBUG(dbgs() << " closeIntv cleaning up\n");
- DEBUG(dbgs() << " open " << *openli_ << '\n');
-
- if (liveThrough_) {
- DEBUG(dbgs() << " value live through region, leaving dupli as is.\n");
- } else {
- // live out with copies inserted, or killed by region. Either way we need to
- // remove the overlapping region from dupli.
- getDupLI();
- for (LiveInterval::iterator I = openli_->begin(), E = openli_->end();
- I != E; ++I) {
- dupli_->removeRange(I->start, I->end);
- }
- // FIXME: A block branching to the entry block may also branch elsewhere
- // curli is live. We need both openli and curli to be live in that case.
- DEBUG(dbgs() << " dup2 " << *dupli_ << '\n');
- }
- openli_ = 0;
- valueMap_.clear();
+ assert(OpenIdx && "openIntv not called before closeIntv");
+ OpenIdx = 0;
}
-/// rewrite - after all the new live ranges have been created, rewrite
-/// instructions using curli to use the new intervals.
-void SplitEditor::rewrite() {
- assert(!openli_ && "Previous LI not closed before rewrite");
- const LiveInterval *curli = sa_.getCurLI();
- for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(curli->reg),
- RE = mri_.reg_end(); RI != RE;) {
+/// rewriteAssigned - Rewrite all uses of Edit.getReg().
+void SplitEditor::rewriteAssigned() {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Edit.getReg()),
+ RE = MRI.reg_end(); RI != RE;) {
MachineOperand &MO = RI.getOperand();
MachineInstr *MI = MO.getParent();
++RI;
+ // LiveDebugVariables should have handled all DBG_VALUE instructions.
if (MI->isDebugValue()) {
DEBUG(dbgs() << "Zapping " << *MI);
- // FIXME: We can do much better with debug values.
MO.setReg(0);
continue;
}
- SlotIndex Idx = lis_.getInstructionIndex(MI);
- Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
- LiveInterval *LI = dupli_;
- for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
- LiveInterval *testli = intervals_[i];
- if (testli->liveAt(Idx)) {
- LI = testli;
- break;
- }
- }
- if (LI) {
- MO.setReg(LI->reg);
- sa_.removeUse(MI);
- DEBUG(dbgs() << " rewrite " << Idx << '\t' << *MI);
- }
- }
- // dupli_ goes in last, after rewriting.
- if (dupli_) {
- if (dupli_->empty()) {
- DEBUG(dbgs() << " dupli became empty?\n");
- lis_.removeInterval(dupli_->reg);
- dupli_ = 0;
- } else {
- dupli_->RenumberValues(lis_);
- intervals_.push_back(dupli_);
+ // <undef> operands don't really read the register, so just assign them to
+ // the complement.
+ if (MO.isUse() && MO.isUndef()) {
+ MO.setReg(Edit.get(0)->reg);
+ continue;
}
+
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+
+ // Rewrite to the mapped register at Idx.
+ unsigned RegIdx = RegAssign.lookup(Idx);
+ MO.setReg(Edit.get(RegIdx)->reg);
+ DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ << Idx << ':' << RegIdx << '\t' << *MI);
+
+ // Extend liveness to Idx.
+ const VNInfo *ParentVNI = Edit.getParent().getVNInfoAt(Idx);
+ LIMappers[RegIdx].mapValue(ParentVNI, Idx);
}
+}
- // Calculate spill weight and allocation hints for new intervals.
- VirtRegAuxInfo vrai(vrm_.getMachineFunction(), lis_, sa_.loops_);
- for (unsigned i = firstInterval, e = intervals_.size(); i != e; ++i) {
- LiveInterval &li = *intervals_[i];
- vrai.CalculateRegClass(li.reg);
- vrai.CalculateWeightAndHint(li);
- DEBUG(dbgs() << " new interval " << mri_.getRegClass(li.reg)->getName()
- << ":" << li << '\n');
+/// rewriteSplit - Rewrite uses of Intvs[0] according to the ConEQ mapping.
+void SplitEditor::rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+ const ConnectedVNInfoEqClasses &ConEq) {
+ for (MachineRegisterInfo::reg_iterator RI = MRI.reg_begin(Intvs[0]->reg),
+ RE = MRI.reg_end(); RI != RE;) {
+ MachineOperand &MO = RI.getOperand();
+ MachineInstr *MI = MO.getParent();
+ ++RI;
+ if (MO.isUse() && MO.isUndef())
+ continue;
+ // DBG_VALUE instructions should have been eliminated earlier.
+ SlotIndex Idx = LIS.getInstructionIndex(MI);
+ Idx = MO.isUse() ? Idx.getUseIndex() : Idx.getDefIndex();
+ DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t'
+ << Idx << ':');
+ const VNInfo *VNI = Intvs[0]->getVNInfoAt(Idx);
+ assert(VNI && "Interval not live at use.");
+ MO.setReg(Intvs[ConEq.getEqClass(VNI)]->reg);
+ DEBUG(dbgs() << VNI->id << '\t' << *MI);
}
}
+void SplitEditor::finish() {
+ assert(OpenIdx == 0 && "Previous LI not closed before rewrite");
-//===----------------------------------------------------------------------===//
-// Loop Splitting
-//===----------------------------------------------------------------------===//
+ // At this point, the live intervals in Edit contain VNInfos corresponding to
+ // the inserted copies.
-bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
- SplitAnalysis::LoopBlocks Blocks;
- sa_.getLoopBlocks(Loop, Blocks);
+ // Add the original defs from the parent interval.
+ for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
+ E = Edit.getParent().vni_end(); I != E; ++I) {
+ const VNInfo *ParentVNI = *I;
+ if (ParentVNI->isUnused())
+ continue;
+ LiveIntervalMap &LIM = LIMappers[RegAssign.lookup(ParentVNI->def)];
+ VNInfo *VNI = LIM.defValue(ParentVNI, ParentVNI->def);
+ LIM.getLI()->addRange(LiveRange(ParentVNI->def,
+ ParentVNI->def.getNextSlot(), VNI));
+ // Mark all values as complex to force liveness computation.
+ // This should really only be necessary for remat victims, but we are lazy.
+ LIM.markComplexMapped(ParentVNI);
+ }
- // Break critical edges as needed.
- SplitAnalysis::BlockPtrSet CriticalExits;
- sa_.getCriticalExits(Blocks, CriticalExits);
- assert(CriticalExits.empty() && "Cannot break critical exits yet");
+#ifndef NDEBUG
+ // Every new interval must have a def by now, otherwise the split is bogus.
+ for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+ assert((*I)->hasAtLeastOneValue() && "Split interval has no value");
+#endif
+
+ // FIXME: Don't recompute the liveness of all values, infer it from the
+ // overlaps between the parent live interval and RegAssign.
+ // The mapValue algorithm is only necessary when:
+ // - The parent value maps to multiple defs, and new phis are needed, or
+ // - The value has been rematerialized before some uses, and we want to
+ // minimize the live range so it only reaches the remaining uses.
+ // All other values have simple liveness that can be computed from RegAssign
+ // and the parent live interval.
+
+ // Extend live ranges to be live-out for successor PHI values.
+ for (LiveInterval::const_vni_iterator I = Edit.getParent().vni_begin(),
+ E = Edit.getParent().vni_end(); I != E; ++I) {
+ const VNInfo *PHIVNI = *I;
+ if (PHIVNI->isUnused() || !PHIVNI->isPHIDef())
+ continue;
+ unsigned RegIdx = RegAssign.lookup(PHIVNI->def);
+ LiveIntervalMap &LIM = LIMappers[RegIdx];
+ MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def);
+ DEBUG(dbgs() << " map phi in BB#" << MBB->getNumber() << '@' << PHIVNI->def
+ << " -> " << RegIdx << '\n');
+ for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
+ PE = MBB->pred_end(); PI != PE; ++PI) {
+ SlotIndex End = LIS.getMBBEndIdx(*PI).getPrevSlot();
+ DEBUG(dbgs() << " pred BB#" << (*PI)->getNumber() << '@' << End);
+ // The predecessor may not have a live-out value. That is OK, like an
+ // undef PHI operand.
+ if (VNInfo *VNI = Edit.getParent().getVNInfoAt(End)) {
+ DEBUG(dbgs() << " has parent valno #" << VNI->id << " live out\n");
+ assert(RegAssign.lookup(End) == RegIdx &&
+ "Different register assignment in phi predecessor");
+ LIM.mapValue(VNI, End);
+ }
+ else
+ DEBUG(dbgs() << " is not live-out\n");
+ }
+ DEBUG(dbgs() << " " << *LIM.getLI() << '\n');
+ }
- // Create new live interval for the loop.
- openIntv();
+ // Rewrite instructions.
+ rewriteAssigned();
- // Insert copies in the predecessors.
- for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Preds.begin(),
- E = Blocks.Preds.end(); I != E; ++I) {
- MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
- enterIntvAtEnd(MBB, *Loop->getHeader());
- }
+ // FIXME: Delete defs that were rematted everywhere.
- // Switch all loop blocks.
- for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Loop.begin(),
- E = Blocks.Loop.end(); I != E; ++I)
- useIntv(**I);
+ // Get rid of unused values and set phi-kill flags.
+ for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I)
+ (*I)->RenumberValues(LIS);
- // Insert back copies in the exit blocks.
- for (SplitAnalysis::BlockPtrSet::iterator I = Blocks.Exits.begin(),
- E = Blocks.Exits.end(); I != E; ++I) {
- MachineBasicBlock &MBB = const_cast<MachineBasicBlock&>(**I);
- leaveIntvAtTop(MBB);
+ // Now check if any registers were separated into multiple components.
+ ConnectedVNInfoEqClasses ConEQ(LIS);
+ for (unsigned i = 0, e = Edit.size(); i != e; ++i) {
+ // Don't use iterators, they are invalidated by create() below.
+ LiveInterval *li = Edit.get(i);
+ unsigned NumComp = ConEQ.Classify(li);
+ if (NumComp <= 1)
+ continue;
+ DEBUG(dbgs() << " " << NumComp << " components: " << *li << '\n');
+ SmallVector<LiveInterval*, 8> dups;
+ dups.push_back(li);
+ for (unsigned i = 1; i != NumComp; ++i)
+ dups.push_back(&Edit.create(MRI, LIS, VRM));
+ rewriteComponents(dups, ConEQ);
+ ConEQ.Distribute(&dups[0]);
}
- // Done.
- closeIntv();
- rewrite();
- return dupli_;
+ // Calculate spill weight and allocation hints for new intervals.
+ VirtRegAuxInfo vrai(VRM.getMachineFunction(), LIS, SA.Loops);
+ for (LiveRangeEdit::iterator I = Edit.begin(), E = Edit.end(); I != E; ++I){
+ LiveInterval &li = **I;
+ vrai.CalculateRegClass(li.reg);
+ vrai.CalculateWeightAndHint(li);
+ DEBUG(dbgs() << " new interval " << MRI.getRegClass(li.reg)->getName()
+ << ":" << li << '\n');
+ }
}
@@ -979,45 +914,50 @@ bool SplitEditor::splitAroundLoop(const MachineLoop *Loop) {
// Single Block Splitting
//===----------------------------------------------------------------------===//
-/// splitSingleBlocks - Split curli into a separate live interval inside each
-/// basic block in Blocks. Return true if curli has been completely replaced,
-/// false if curli is still intact, and needs to be spilled or split further.
-bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
- DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n");
- // Determine the first and last instruction using curli in each block.
- typedef std::pair<SlotIndex,SlotIndex> IndexPair;
- typedef DenseMap<const MachineBasicBlock*,IndexPair> IndexPairMap;
- IndexPairMap MBBRange;
- for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
- E = sa_.usingInstrs_.end(); I != E; ++I) {
- const MachineBasicBlock *MBB = (*I)->getParent();
- if (!Blocks.count(MBB))
+/// getMultiUseBlocks - if CurLI has more than one use in a basic block, it
+/// may be an advantage to split CurLI for the duration of the block.
+bool SplitAnalysis::getMultiUseBlocks(BlockPtrSet &Blocks) {
+ // If CurLI is local to one block, there is no point to splitting it.
+ if (LiveBlocks.size() <= 1)
+ return false;
+ // Add blocks with multiple uses.
+ for (unsigned i = 0, e = LiveBlocks.size(); i != e; ++i) {
+ const BlockInfo &BI = LiveBlocks[i];
+ if (!BI.Uses)
continue;
- SlotIndex Idx = lis_.getInstructionIndex(*I);
- DEBUG(dbgs() << " BB#" << MBB->getNumber() << '\t' << Idx << '\t' << **I);
- IndexPair &IP = MBBRange[MBB];
- if (!IP.first.isValid() || Idx < IP.first)
- IP.first = Idx;
- if (!IP.second.isValid() || Idx > IP.second)
- IP.second = Idx;
+ unsigned Instrs = UsingBlocks.lookup(BI.MBB);
+ if (Instrs <= 1)
+ continue;
+ if (Instrs == 2 && BI.LiveIn && BI.LiveOut && !BI.LiveThrough)
+ continue;
+ Blocks.insert(BI.MBB);
}
+ return !Blocks.empty();
+}
+
+/// splitSingleBlocks - Split CurLI into a separate live interval inside each
+/// basic block in Blocks.
+void SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
+ DEBUG(dbgs() << " splitSingleBlocks for " << Blocks.size() << " blocks.\n");
- // Create a new interval for each block.
- for (SplitAnalysis::BlockPtrSet::const_iterator I = Blocks.begin(),
- E = Blocks.end(); I != E; ++I) {
- IndexPair &IP = MBBRange[*I];
- DEBUG(dbgs() << " splitting for BB#" << (*I)->getNumber() << ": ["
- << IP.first << ';' << IP.second << ")\n");
- assert(IP.first.isValid() && IP.second.isValid());
+ for (unsigned i = 0, e = SA.LiveBlocks.size(); i != e; ++i) {
+ const SplitAnalysis::BlockInfo &BI = SA.LiveBlocks[i];
+ if (!BI.Uses || !Blocks.count(BI.MBB))
+ continue;
openIntv();
- enterIntvBefore(IP.first);
- useIntv(IP.first.getBaseIndex(), IP.second.getBoundaryIndex());
- leaveIntvAfter(IP.second);
+ SlotIndex SegStart = enterIntvBefore(BI.FirstUse);
+ if (BI.LastUse < BI.LastSplitPoint) {
+ useIntv(SegStart, leaveIntvAfter(BI.LastUse));
+ } else {
+ // THe last use os after tha last valid split point.
+ SlotIndex SegStop = leaveIntvBefore(BI.LastSplitPoint);
+ useIntv(SegStart, SegStop);
+ overlapIntv(SegStop, BI.LastUse);
+ }
closeIntv();
}
- rewrite();
- return dupli_;
+ finish();
}
@@ -1025,31 +965,29 @@ bool SplitEditor::splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks) {
// Sub Block Splitting
//===----------------------------------------------------------------------===//
-/// getBlockForInsideSplit - If curli is contained inside a single basic block,
+/// getBlockForInsideSplit - If CurLI is contained inside a single basic block,
/// and it wou pay to subdivide the interval inside that block, return it.
/// Otherwise return NULL. The returned block can be passed to
/// SplitEditor::splitInsideBlock.
const MachineBasicBlock *SplitAnalysis::getBlockForInsideSplit() {
// The interval must be exclusive to one block.
- if (usingBlocks_.size() != 1)
+ if (UsingBlocks.size() != 1)
return 0;
// Don't to this for less than 4 instructions. We want to be sure that
// splitting actually reduces the instruction count per interval.
- if (usingInstrs_.size() < 4)
+ if (UsingInstrs.size() < 4)
return 0;
- return usingBlocks_.begin()->first;
+ return UsingBlocks.begin()->first;
}
-/// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
-/// true if curli has been completely replaced, false if curli is still
-/// intact, and needs to be spilled or split further.
-bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
+/// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
+void SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
SmallVector<SlotIndex, 32> Uses;
- Uses.reserve(sa_.usingInstrs_.size());
- for (SplitAnalysis::InstrPtrSet::const_iterator I = sa_.usingInstrs_.begin(),
- E = sa_.usingInstrs_.end(); I != E; ++I)
+ Uses.reserve(SA.UsingInstrs.size());
+ for (SplitAnalysis::InstrPtrSet::const_iterator I = SA.UsingInstrs.begin(),
+ E = SA.UsingInstrs.end(); I != E; ++I)
if ((*I)->getParent() == MBB)
- Uses.push_back(lis_.getInstructionIndex(*I));
+ Uses.push_back(LIS.getInstructionIndex(*I));
DEBUG(dbgs() << " splitInsideBlock BB#" << MBB->getNumber() << " for "
<< Uses.size() << " instructions.\n");
assert(Uses.size() >= 3 && "Need at least 3 instructions");
@@ -1077,21 +1015,16 @@ bool SplitEditor::splitInsideBlock(const MachineBasicBlock *MBB) {
// First interval before the gap. Don't create single-instr intervals.
if (bestPos > 1) {
openIntv();
- enterIntvBefore(Uses.front());
- useIntv(Uses.front().getBaseIndex(), Uses[bestPos-1].getBoundaryIndex());
- leaveIntvAfter(Uses[bestPos-1]);
+ useIntv(enterIntvBefore(Uses.front()), leaveIntvAfter(Uses[bestPos-1]));
closeIntv();
}
// Second interval after the gap.
if (bestPos < Uses.size()-1) {
openIntv();
- enterIntvBefore(Uses[bestPos]);
- useIntv(Uses[bestPos].getBaseIndex(), Uses.back().getBoundaryIndex());
- leaveIntvAfter(Uses.back());
+ useIntv(enterIntvBefore(Uses[bestPos]), leaveIntvAfter(Uses.back()));
closeIntv();
}
- rewrite();
- return dupli_;
+ finish();
}
diff --git a/lib/CodeGen/SplitKit.h b/lib/CodeGen/SplitKit.h
index ddef7461dc3d..5c34afd1c819 100644
--- a/lib/CodeGen/SplitKit.h
+++ b/lib/CodeGen/SplitKit.h
@@ -1,4 +1,4 @@
-//===---------- SplitKit.cpp - Toolkit for splitting live ranges ----------===//
+//===-------- SplitKit.h - Toolkit for splitting live ranges ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -12,125 +12,132 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/IntervalMap.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/CodeGen/SlotIndexes.h"
namespace llvm {
+class ConnectedVNInfoEqClasses;
class LiveInterval;
class LiveIntervals;
+class LiveRangeEdit;
class MachineInstr;
-class MachineLoop;
class MachineLoopInfo;
class MachineRegisterInfo;
class TargetInstrInfo;
+class TargetRegisterInfo;
class VirtRegMap;
class VNInfo;
+class raw_ostream;
+
+/// At some point we should just include MachineDominators.h:
+class MachineDominatorTree;
+template <class NodeT> class DomTreeNodeBase;
+typedef DomTreeNodeBase<MachineBasicBlock> MachineDomTreeNode;
+
/// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting
/// opportunities.
class SplitAnalysis {
public:
- const MachineFunction &mf_;
- const LiveIntervals &lis_;
- const MachineLoopInfo &loops_;
- const TargetInstrInfo &tii_;
+ const MachineFunction &MF;
+ const VirtRegMap &VRM;
+ const LiveIntervals &LIS;
+ const MachineLoopInfo &Loops;
+ const TargetInstrInfo &TII;
// Instructions using the the current register.
typedef SmallPtrSet<const MachineInstr*, 16> InstrPtrSet;
- InstrPtrSet usingInstrs_;
+ InstrPtrSet UsingInstrs;
+
+ // Sorted slot indexes of using instructions.
+ SmallVector<SlotIndex, 8> UseSlots;
- // The number of instructions using curli in each basic block.
+ // The number of instructions using CurLI in each basic block.
typedef DenseMap<const MachineBasicBlock*, unsigned> BlockCountMap;
- BlockCountMap usingBlocks_;
+ BlockCountMap UsingBlocks;
+
+ /// Additional information about basic blocks where the current variable is
+ /// live. Such a block will look like one of these templates:
+ ///
+ /// 1. | o---x | Internal to block. Variable is only live in this block.
+ /// 2. |---x | Live-in, kill.
+ /// 3. | o---| Def, live-out.
+ /// 4. |---x o---| Live-in, kill, def, live-out.
+ /// 5. |---o---o---| Live-through with uses or defs.
+ /// 6. |-----------| Live-through without uses. Transparent.
+ ///
+ struct BlockInfo {
+ MachineBasicBlock *MBB;
+ SlotIndex FirstUse; ///< First instr using current reg.
+ SlotIndex LastUse; ///< Last instr using current reg.
+ SlotIndex Kill; ///< Interval end point inside block.
+ SlotIndex Def; ///< Interval start point inside block.
+ /// Last possible point for splitting live ranges.
+ SlotIndex LastSplitPoint;
+ bool Uses; ///< Current reg has uses or defs in block.
+ bool LiveThrough; ///< Live in whole block (Templ 5. or 6. above).
+ bool LiveIn; ///< Current reg is live in.
+ bool LiveOut; ///< Current reg is live out.
+
+ // Per-interference pattern scratch data.
+ bool OverlapEntry; ///< Interference overlaps entering interval.
+ bool OverlapExit; ///< Interference overlaps exiting interval.
+ };
- // The number of basic block using curli in each loop.
- typedef DenseMap<const MachineLoop*, unsigned> LoopCountMap;
- LoopCountMap usingLoops_;
+ /// Basic blocks where var is live. This array is parallel to
+ /// SpillConstraints.
+ SmallVector<BlockInfo, 8> LiveBlocks;
private:
// Current live interval.
- const LiveInterval *curli_;
+ const LiveInterval *CurLI;
- // Sumarize statistics by counting instructions using curli_.
+ // Sumarize statistics by counting instructions using CurLI.
void analyzeUses();
+ /// calcLiveBlockInfo - Compute per-block information about CurLI.
+ void calcLiveBlockInfo();
+
/// canAnalyzeBranch - Return true if MBB ends in a branch that can be
/// analyzed.
bool canAnalyzeBranch(const MachineBasicBlock *MBB);
public:
- SplitAnalysis(const MachineFunction &mf, const LiveIntervals &lis,
+ SplitAnalysis(const VirtRegMap &vrm, const LiveIntervals &lis,
const MachineLoopInfo &mli);
- /// analyze - set curli to the specified interval, and analyze how it may be
+ /// analyze - set CurLI to the specified interval, and analyze how it may be
/// split.
void analyze(const LiveInterval *li);
- /// removeUse - Update statistics by noting that mi no longer uses curli.
- void removeUse(const MachineInstr *mi);
-
- const LiveInterval *getCurLI() { return curli_; }
-
/// clear - clear all data structures so SplitAnalysis is ready to analyze a
/// new interval.
void clear();
- typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
- typedef SmallPtrSet<const MachineLoop*, 16> LoopPtrSet;
-
- // Sets of basic blocks surrounding a machine loop.
- struct LoopBlocks {
- BlockPtrSet Loop; // Blocks in the loop.
- BlockPtrSet Preds; // Loop predecessor blocks.
- BlockPtrSet Exits; // Loop exit blocks.
-
- void clear() {
- Loop.clear();
- Preds.clear();
- Exits.clear();
- }
- };
-
- // Calculate the block sets surrounding the loop.
- void getLoopBlocks(const MachineLoop *Loop, LoopBlocks &Blocks);
-
- /// LoopPeripheralUse - how is a variable used in and around a loop?
- /// Peripheral blocks are the loop predecessors and exit blocks.
- enum LoopPeripheralUse {
- ContainedInLoop, // All uses are inside the loop.
- SinglePeripheral, // At most one instruction per peripheral block.
- MultiPeripheral, // Multiple instructions in some peripheral blocks.
- OutsideLoop // Uses outside loop periphery.
- };
-
- /// analyzeLoopPeripheralUse - Return an enum describing how curli_ is used in
- /// and around the Loop.
- LoopPeripheralUse analyzeLoopPeripheralUse(const LoopBlocks&);
+ /// getParent - Return the last analyzed interval.
+ const LiveInterval &getParent() const { return *CurLI; }
- /// getCriticalExits - It may be necessary to partially break critical edges
- /// leaving the loop if an exit block has phi uses of curli. Collect the exit
- /// blocks that need special treatment into CriticalExits.
- void getCriticalExits(const LoopBlocks &Blocks, BlockPtrSet &CriticalExits);
+ /// hasUses - Return true if MBB has any uses of CurLI.
+ bool hasUses(const MachineBasicBlock *MBB) const {
+ return UsingBlocks.lookup(MBB);
+ }
- /// canSplitCriticalExits - Return true if it is possible to insert new exit
- /// blocks before the blocks in CriticalExits.
- bool canSplitCriticalExits(const LoopBlocks &Blocks,
- BlockPtrSet &CriticalExits);
+ typedef SmallPtrSet<const MachineBasicBlock*, 16> BlockPtrSet;
- /// getBestSplitLoop - Return the loop where curli may best be split to a
- /// separate register, or NULL.
- const MachineLoop *getBestSplitLoop();
+ // Print a set of blocks with use counts.
+ void print(const BlockPtrSet&, raw_ostream&) const;
/// getMultiUseBlocks - Add basic blocks to Blocks that may benefit from
- /// having curli split to a new live interval. Return true if Blocks can be
+ /// having CurLI split to a new live interval. Return true if Blocks can be
/// passed to SplitEditor::splitSingleBlocks.
bool getMultiUseBlocks(BlockPtrSet &Blocks);
- /// getBlockForInsideSplit - If curli is contained inside a single basic block,
- /// and it wou pay to subdivide the interval inside that block, return it.
- /// Otherwise return NULL. The returned block can be passed to
+ /// getBlockForInsideSplit - If CurLI is contained inside a single basic
+ /// block, and it would pay to subdivide the interval inside that block,
+ /// return it. Otherwise return NULL. The returned block can be passed to
/// SplitEditor::splitInsideBlock.
const MachineBasicBlock *getBlockForInsideSplit();
};
@@ -140,58 +147,102 @@ public:
/// interval that is a subset. Insert phi-def values as needed. This class is
/// used by SplitEditor to create new smaller LiveIntervals.
///
-/// parentli_ is the larger interval, li_ is the subset interval. Every value
-/// in li_ corresponds to exactly one value in parentli_, and the live range
-/// of the value is contained within the live range of the parentli_ value.
-/// Values in parentli_ may map to any number of openli_ values, including 0.
+/// ParentLI is the larger interval, LI is the subset interval. Every value
+/// in LI corresponds to exactly one value in ParentLI, and the live range
+/// of the value is contained within the live range of the ParentLI value.
+/// Values in ParentLI may map to any number of OpenLI values, including 0.
class LiveIntervalMap {
- LiveIntervals &lis_;
+ LiveIntervals &LIS;
+ MachineDominatorTree &MDT;
// The parent interval is never changed.
- const LiveInterval &parentli_;
+ const LiveInterval &ParentLI;
- // The child interval's values are fully contained inside parentli_ values.
- LiveInterval &li_;
+ // The child interval's values are fully contained inside ParentLI values.
+ LiveInterval *LI;
typedef DenseMap<const VNInfo*, VNInfo*> ValueMap;
- // Map parentli_ values to simple values in li_ that are defined at the same
- // SlotIndex, or NULL for parentli_ values that have complex li_ defs.
+ // Map ParentLI values to simple values in LI that are defined at the same
+ // SlotIndex, or NULL for ParentLI values that have complex LI defs.
// Note there is a difference between values mapping to NULL (complex), and
// values not present (unknown/unmapped).
- ValueMap valueMap_;
-
- // extendTo - Find the last li_ value defined in MBB at or before Idx. The
- // parentli_ is assumed to be live at Idx. Extend the live range to Idx.
- // Return the found VNInfo, or NULL.
- VNInfo *extendTo(MachineBasicBlock *MBB, SlotIndex Idx);
-
- // addSimpleRange - Add a simple range from parentli_ to li_.
- // ParentVNI must be live in the [Start;End) interval.
- void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
+ ValueMap Values;
+
+ typedef std::pair<VNInfo*, MachineDomTreeNode*> LiveOutPair;
+ typedef DenseMap<MachineBasicBlock*,LiveOutPair> LiveOutMap;
+
+ // LiveOutCache - Map each basic block where LI is live out to the live-out
+ // value and its defining block. One of these conditions shall be true:
+ //
+ // 1. !LiveOutCache.count(MBB)
+ // 2. LiveOutCache[MBB].second.getNode() == MBB
+ // 3. forall P in preds(MBB): LiveOutCache[P] == LiveOutCache[MBB]
+ //
+ // This is only a cache, the values can be computed as:
+ //
+ // VNI = LI->getVNInfoAt(LIS.getMBBEndIdx(MBB))
+ // Node = mbt_[LIS.getMBBFromIndex(VNI->def)]
+ //
+ // The cache is also used as a visiteed set by mapValue().
+ LiveOutMap LiveOutCache;
+
+ // Dump the live-out cache to dbgs().
+ void dumpCache();
public:
LiveIntervalMap(LiveIntervals &lis,
- const LiveInterval &parentli,
- LiveInterval &li)
- : lis_(lis), parentli_(parentli), li_(li) {}
+ MachineDominatorTree &mdt,
+ const LiveInterval &parentli)
+ : LIS(lis), MDT(mdt), ParentLI(parentli), LI(0) {}
+
+ /// reset - clear all data structures and start a new live interval.
+ void reset(LiveInterval *);
+
+ /// getLI - return the current live interval.
+ LiveInterval *getLI() const { return LI; }
- /// defValue - define a value in li_ from the parentli_ value VNI and Idx.
+ /// defValue - define a value in LI from the ParentLI value VNI and Idx.
/// Idx does not have to be ParentVNI->def, but it must be contained within
- /// ParentVNI's live range in parentli_.
- /// Return the new li_ value.
+ /// ParentVNI's live range in ParentLI.
+ /// Return the new LI value.
VNInfo *defValue(const VNInfo *ParentVNI, SlotIndex Idx);
- /// mapValue - map ParentVNI to the corresponding li_ value at Idx. It is
+ /// mapValue - map ParentVNI to the corresponding LI value at Idx. It is
/// assumed that ParentVNI is live at Idx.
/// If ParentVNI has not been defined by defValue, it is assumed that
/// ParentVNI->def dominates Idx.
/// If ParentVNI has been defined by defValue one or more times, a value that
/// dominates Idx will be returned. This may require creating extra phi-def
- /// values and adding live ranges to li_.
- VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx);
+ /// values and adding live ranges to LI.
+ /// If simple is not NULL, *simple will indicate if ParentVNI is a simply
+ /// mapped value.
+ VNInfo *mapValue(const VNInfo *ParentVNI, SlotIndex Idx, bool *simple = 0);
+
+ // extendTo - Find the last LI value defined in MBB at or before Idx. The
+ // parentli is assumed to be live at Idx. Extend the live range to include
+ // Idx. Return the found VNInfo, or NULL.
+ VNInfo *extendTo(const MachineBasicBlock *MBB, SlotIndex Idx);
+
+ /// isMapped - Return true is ParentVNI is a known mapped value. It may be a
+ /// simple 1-1 mapping or a complex mapping to later defs.
+ bool isMapped(const VNInfo *ParentVNI) const {
+ return Values.count(ParentVNI);
+ }
+
+ /// isComplexMapped - Return true if ParentVNI has received new definitions
+ /// with defValue.
+ bool isComplexMapped(const VNInfo *ParentVNI) const;
+
+ /// markComplexMapped - Mark ParentVNI as complex mapped regardless of the
+ /// number of definitions.
+ void markComplexMapped(const VNInfo *ParentVNI) { Values[ParentVNI] = 0; }
+
+ // addSimpleRange - Add a simple range from ParentLI to LI.
+ // ParentVNI must be live in the [Start;End) interval.
+ void addSimpleRange(SlotIndex Start, SlotIndex End, const VNInfo *ParentVNI);
- /// addRange - Add live ranges to li_ where [Start;End) intersects parentli_.
+ /// addRange - Add live ranges to LI where [Start;End) intersects ParentLI.
/// All needed values whose def is not inside [Start;End) must be defined
/// beforehand so mapValue will work.
void addRange(SlotIndex Start, SlotIndex End);
@@ -207,115 +258,129 @@ public:
/// - Mark the ranges where the new interval is used with useIntv*
/// - Mark the places where the interval is exited with exitIntv*.
/// - Finish the current interval with closeIntv and repeat from 2.
-/// - Rewrite instructions with rewrite().
+/// - Rewrite instructions with finish().
///
class SplitEditor {
- SplitAnalysis &sa_;
- LiveIntervals &lis_;
- VirtRegMap &vrm_;
- MachineRegisterInfo &mri_;
- const TargetInstrInfo &tii_;
-
- /// curli_ - The immutable interval we are currently splitting.
- const LiveInterval *const curli_;
-
- /// dupli_ - Created as a copy of curli_, ranges are carved out as new
- /// intervals get added through openIntv / closeIntv. This is used to avoid
- /// editing curli_.
- LiveInterval *dupli_;
-
- /// Currently open LiveInterval.
- LiveInterval *openli_;
-
- /// createInterval - Create a new virtual register and LiveInterval with same
- /// register class and spill slot as curli.
- LiveInterval *createInterval();
-
- /// getDupLI - Ensure dupli is created and return it.
- LiveInterval *getDupLI();
-
- /// valueMap_ - Map values in cupli to values in openli. These are direct 1-1
- /// mappings, and do not include values created by inserted copies.
- DenseMap<const VNInfo*, VNInfo*> valueMap_;
-
- /// mapValue - Return the openIntv value that corresponds to the given curli
- /// value.
- VNInfo *mapValue(const VNInfo *curliVNI);
-
- /// A dupli value is live through openIntv.
- bool liveThrough_;
-
- /// All the new intervals created for this split are added to intervals_.
- SmallVectorImpl<LiveInterval*> &intervals_;
-
- /// The index into intervals_ of the first interval we added. There may be
- /// others from before we got it.
- unsigned firstInterval;
-
- /// Insert a COPY instruction curli -> li. Allocate a new value from li
- /// defined by the COPY
- VNInfo *insertCopy(LiveInterval &LI,
- MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I);
+ SplitAnalysis &SA;
+ LiveIntervals &LIS;
+ VirtRegMap &VRM;
+ MachineRegisterInfo &MRI;
+ MachineDominatorTree &MDT;
+ const TargetInstrInfo &TII;
+ const TargetRegisterInfo &TRI;
+
+ /// Edit - The current parent register and new intervals created.
+ LiveRangeEdit &Edit;
+
+ /// Index into Edit of the currently open interval.
+ /// The index 0 is used for the complement, so the first interval started by
+ /// openIntv will be 1.
+ unsigned OpenIdx;
+
+ typedef IntervalMap<SlotIndex, unsigned> RegAssignMap;
+
+ /// Allocator for the interval map. This will eventually be shared with
+ /// SlotIndexes and LiveIntervals.
+ RegAssignMap::Allocator Allocator;
+
+ /// RegAssign - Map of the assigned register indexes.
+ /// Edit.get(RegAssign.lookup(Idx)) is the register that should be live at
+ /// Idx.
+ RegAssignMap RegAssign;
+
+ /// LIMappers - One LiveIntervalMap or each interval in Edit.
+ SmallVector<LiveIntervalMap, 4> LIMappers;
+
+ /// defFromParent - Define Reg from ParentVNI at UseIdx using either
+ /// rematerialization or a COPY from parent. Return the new value.
+ VNInfo *defFromParent(unsigned RegIdx,
+ VNInfo *ParentVNI,
+ SlotIndex UseIdx,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I);
+
+ /// rewriteAssigned - Rewrite all uses of Edit.getReg() to assigned registers.
+ void rewriteAssigned();
+
+ /// rewriteComponents - Rewrite all uses of Intv[0] according to the eq
+ /// classes in ConEQ.
+ /// This must be done when Intvs[0] is styill live at all uses, before calling
+ /// ConEq.Distribute().
+ void rewriteComponents(const SmallVectorImpl<LiveInterval*> &Intvs,
+ const ConnectedVNInfoEqClasses &ConEq);
public:
/// Create a new SplitEditor for editing the LiveInterval analyzed by SA.
/// Newly created intervals will be appended to newIntervals.
SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&,
- SmallVectorImpl<LiveInterval*> &newIntervals);
+ MachineDominatorTree&, LiveRangeEdit&);
/// getAnalysis - Get the corresponding analysis.
- SplitAnalysis &getAnalysis() { return sa_; }
+ SplitAnalysis &getAnalysis() { return SA; }
/// Create a new virtual register and live interval.
void openIntv();
- /// enterIntvBefore - Enter openli before the instruction at Idx. If curli is
- /// not live before Idx, a COPY is not inserted.
- void enterIntvBefore(SlotIndex Idx);
+ /// enterIntvBefore - Enter the open interval before the instruction at Idx.
+ /// If the parent interval is not live before Idx, a COPY is not inserted.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvBefore(SlotIndex Idx);
- /// enterIntvAtEnd - Enter openli at the end of MBB.
- /// PhiMBB is a successor inside openli where a PHI value is created.
- /// Currently, all entries must share the same PhiMBB.
- void enterIntvAtEnd(MachineBasicBlock &MBB, MachineBasicBlock &PhiMBB);
+ /// enterIntvAtEnd - Enter the open interval at the end of MBB.
+ /// Use the open interval from he inserted copy to the MBB end.
+ /// Return the beginning of the new live range.
+ SlotIndex enterIntvAtEnd(MachineBasicBlock &MBB);
- /// useIntv - indicate that all instructions in MBB should use openli.
+ /// useIntv - indicate that all instructions in MBB should use OpenLI.
void useIntv(const MachineBasicBlock &MBB);
- /// useIntv - indicate that all instructions in range should use openli.
+ /// useIntv - indicate that all instructions in range should use OpenLI.
void useIntv(SlotIndex Start, SlotIndex End);
- /// leaveIntvAfter - Leave openli after the instruction at Idx.
- void leaveIntvAfter(SlotIndex Idx);
+ /// leaveIntvAfter - Leave the open interval after the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAfter(SlotIndex Idx);
+
+ /// leaveIntvBefore - Leave the open interval before the instruction at Idx.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvBefore(SlotIndex Idx);
/// leaveIntvAtTop - Leave the interval at the top of MBB.
- /// Currently, only one value can leave the interval.
- void leaveIntvAtTop(MachineBasicBlock &MBB);
+ /// Add liveness from the MBB top to the copy.
+ /// Return the end of the live range.
+ SlotIndex leaveIntvAtTop(MachineBasicBlock &MBB);
+
+ /// overlapIntv - Indicate that all instructions in range should use the open
+ /// interval, but also let the complement interval be live.
+ ///
+ /// This doubles the register pressure, but is sometimes required to deal with
+ /// register uses after the last valid split point.
+ ///
+ /// The Start index should be a return value from a leaveIntv* call, and End
+ /// should be in the same basic block. The parent interval must have the same
+ /// value across the range.
+ ///
+ void overlapIntv(SlotIndex Start, SlotIndex End);
/// closeIntv - Indicate that we are done editing the currently open
/// LiveInterval, and ranges can be trimmed.
void closeIntv();
- /// rewrite - after all the new live ranges have been created, rewrite
- /// instructions using curli to use the new intervals.
- void rewrite();
+ /// finish - after all the new live ranges have been created, compute the
+ /// remaining live range, and rewrite instructions to use the new registers.
+ void finish();
- // ===--- High level methods ---===
+ /// dump - print the current interval maping to dbgs().
+ void dump() const;
- /// splitAroundLoop - Split curli into a separate live interval inside
- /// the loop. Return true if curli has been completely replaced, false if
- /// curli is still intact, and needs to be spilled or split further.
- bool splitAroundLoop(const MachineLoop*);
+ // ===--- High level methods ---===
- /// splitSingleBlocks - Split curli into a separate live interval inside each
- /// basic block in Blocks. Return true if curli has been completely replaced,
- /// false if curli is still intact, and needs to be spilled or split further.
- bool splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
+ /// splitSingleBlocks - Split CurLI into a separate live interval inside each
+ /// basic block in Blocks.
+ void splitSingleBlocks(const SplitAnalysis::BlockPtrSet &Blocks);
- /// splitInsideBlock - Split curli into multiple intervals inside MBB. Return
- /// true if curli has been completely replaced, false if curli is still
- /// intact, and needs to be spilled or split further.
- bool splitInsideBlock(const MachineBasicBlock *);
+ /// splitInsideBlock - Split CurLI into multiple intervals inside MBB.
+ void splitInsideBlock(const MachineBasicBlock *);
};
}
diff --git a/lib/CodeGen/Splitter.cpp b/lib/CodeGen/Splitter.cpp
index 38f3b1f4d35e..08aee82b8c5c 100644
--- a/lib/CodeGen/Splitter.cpp
+++ b/lib/CodeGen/Splitter.cpp
@@ -29,8 +29,14 @@
using namespace llvm;
char LoopSplitter::ID = 0;
-INITIALIZE_PASS(LoopSplitter, "loop-splitting",
- "Split virtual regists across loop boundaries.", false, false);
+INITIALIZE_PASS_BEGIN(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(LoopSplitter, "loop-splitting",
+ "Split virtual regists across loop boundaries.", false, false)
namespace llvm {
@@ -140,7 +146,6 @@ namespace llvm {
VNInfo *newVal = getNewVNI(preHeaderRange->valno);
newVal->def = copyDefIdx;
newVal->setCopy(copy);
- newVal->setIsDefAccurate(true);
li.removeRange(copyDefIdx, ls.lis->getMBBEndIdx(preHeader), true);
getNewLI()->addRange(LiveRange(copyDefIdx,
@@ -174,13 +179,13 @@ namespace llvm {
// Blow away output range definition.
outRange->valno->def = ls.lis->getInvalidIndex();
- outRange->valno->setIsDefAccurate(false);
li.removeRange(ls.lis->getMBBStartIdx(outBlock), copyDefIdx);
+ SlotIndex newDefIdx = ls.lis->getMBBStartIdx(outBlock);
+ assert(ls.lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
VNInfo *newVal =
- getNewLI()->getNextValue(SlotIndex(ls.lis->getMBBStartIdx(outBlock),
- true),
- 0, false, ls.lis->getVNInfoAllocator());
+ getNewLI()->getNextValue(newDefIdx, 0, ls.lis->getVNInfoAllocator());
getNewLI()->addRange(LiveRange(ls.lis->getMBBStartIdx(outBlock),
copyDefIdx, newVal));
@@ -514,8 +519,10 @@ namespace llvm {
if (!insertRange)
continue;
- VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(preHeader),
- 0, false, lis->getVNInfoAllocator());
+ SlotIndex newDefIdx = lis->getMBBStartIdx(preHeader);
+ assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal = li.getNextValue(newDefIdx, 0, lis->getVNInfoAllocator());
li.addRange(LiveRange(lis->getMBBStartIdx(preHeader),
lis->getMBBEndIdx(preHeader),
newVal));
@@ -612,8 +619,11 @@ namespace llvm {
lis->getMBBEndIdx(splitBlock), true);
}
} else if (intersects) {
- VNInfo *newVal = li.getNextValue(lis->getMBBStartIdx(splitBlock),
- 0, false, lis->getVNInfoAllocator());
+ SlotIndex newDefIdx = lis->getMBBStartIdx(splitBlock);
+ assert(lis->getInstructionFromIndex(newDefIdx) == 0 &&
+ "PHI def index points at actual instruction.");
+ VNInfo *newVal = li.getNextValue(newDefIdx, 0,
+ lis->getVNInfoAllocator());
li.addRange(LiveRange(lis->getMBBStartIdx(splitBlock),
lis->getMBBEndIdx(splitBlock),
newVal));
diff --git a/lib/CodeGen/Splitter.h b/lib/CodeGen/Splitter.h
index a726a7b834fb..9fb1b8b30139 100644
--- a/lib/CodeGen/Splitter.h
+++ b/lib/CodeGen/Splitter.h
@@ -36,7 +36,9 @@ namespace llvm {
public:
static char ID;
- LoopSplitter() : MachineFunctionPass(ID) {}
+ LoopSplitter() : MachineFunctionPass(ID) {
+ initializeLoopSplitterPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &au) const;
diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp
index 9f51778da756..fcaee4208ba3 100644
--- a/lib/CodeGen/StackProtector.cpp
+++ b/lib/CodeGen/StackProtector.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "stack-protector"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Attributes.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -45,6 +46,8 @@ namespace {
Function *F;
Module *M;
+ DominatorTree* DT;
+
/// InsertStackProtectors - Insert code into the prologue and epilogue of
/// the function.
///
@@ -62,9 +65,17 @@ namespace {
bool RequiresStackProtector() const;
public:
static char ID; // Pass identification, replacement for typeid.
- StackProtector() : FunctionPass(ID), TLI(0) {}
+ StackProtector() : FunctionPass(ID), TLI(0) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
StackProtector(const TargetLowering *tli)
- : FunctionPass(ID), TLI(tli) {}
+ : FunctionPass(ID), TLI(tli) {
+ initializeStackProtectorPass(*PassRegistry::getPassRegistry());
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
+ }
virtual bool runOnFunction(Function &Fn);
};
@@ -72,7 +83,7 @@ namespace {
char StackProtector::ID = 0;
INITIALIZE_PASS(StackProtector, "stack-protector",
- "Insert stack protectors", false, false);
+ "Insert stack protectors", false, false)
FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
return new StackProtector(tli);
@@ -81,6 +92,7 @@ FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) {
bool StackProtector::runOnFunction(Function &Fn) {
F = &Fn;
M = F->getParent();
+ DT = getAnalysisIfAvailable<DominatorTree>();
if (!RequiresStackProtector()) return false;
@@ -135,6 +147,7 @@ bool StackProtector::RequiresStackProtector() const {
/// value. It calls __stack_chk_fail if they differ.
bool StackProtector::InsertStackProtectors() {
BasicBlock *FailBB = 0; // The basic block to jump to if check fails.
+ BasicBlock *FailBBDom = 0; // FailBB's dominator.
AllocaInst *AI = 0; // Place on stack that stores the stack guard.
Value *StackGuardVar = 0; // The stack guard variable.
@@ -178,6 +191,8 @@ bool StackProtector::InsertStackProtectors() {
// Create the basic block to jump to when the guard check fails.
FailBB = CreateFailBB();
+ if (DT)
+ FailBBDom = DT->isReachableFromEntry(BB) ? BB : 0;
}
// For each block with a return instruction, convert this:
@@ -204,6 +219,10 @@ bool StackProtector::InsertStackProtectors() {
// Split the basic block before the return instruction.
BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return");
+ if (DT) {
+ DT->addNewBlock(NewBB, DT->isReachableFromEntry(BB) ? BB : 0);
+ FailBBDom = DT->findNearestCommonDominator(FailBBDom, BB);
+ }
// Remove default branch instruction to the new BB.
BB->getTerminator()->eraseFromParent();
@@ -223,6 +242,9 @@ bool StackProtector::InsertStackProtectors() {
// statements in the function.
if (!FailBB) return false;
+ if (DT)
+ DT->addNewBlock(FailBB, FailBBDom);
+
return true;
}
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index 8d57ae95dde2..01f5b5627f4f 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -95,9 +95,13 @@ namespace {
public:
static char ID; // Pass identification
StackSlotColoring() :
- MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(false), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
StackSlotColoring(bool RegColor) :
- MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {}
+ MachineFunctionPass(ID), ColorWithRegs(RegColor), NextColor(-1) {
+ initializeStackSlotColoringPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -145,8 +149,14 @@ namespace {
char StackSlotColoring::ID = 0;
-INITIALIZE_PASS(StackSlotColoring, "stack-slot-coloring",
- "Stack Slot Coloring", false, false);
+INITIALIZE_PASS_BEGIN(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveStacks)
+INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_END(StackSlotColoring, "stack-slot-coloring",
+ "Stack Slot Coloring", false, false)
FunctionPass *llvm::createStackSlotColoringPass(bool RegColor) {
return new StackSlotColoring(RegColor);
@@ -208,7 +218,7 @@ void StackSlotColoring::InitializeSlots() {
for (LiveStacks::iterator i = LS->begin(), e = LS->end(); i != e; ++i) {
LiveInterval &li = i->second;
DEBUG(li.dump());
- int FI = li.getStackSlotIndex();
+ int FI = TargetRegisterInfo::stackSlot2Index(li.reg);
if (MFI->isDeadObjectIndex(FI))
continue;
SSIntervals.push_back(&li);
@@ -251,7 +261,7 @@ StackSlotColoring::ColorSlotsWithFreeRegs(SmallVector<int, 16> &SlotMapping,
DEBUG(dbgs() << "Assigning unused registers to spill slots:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = li->getStackSlotIndex();
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
if (!UsedColors[SS] || li->weight < 20)
// If the weight is < 20, i.e. two references in a loop with depth 1,
// don't bother with it.
@@ -340,7 +350,7 @@ int StackSlotColoring::ColorSlot(LiveInterval *li) {
// Record the assignment.
Assignments[Color].push_back(li);
- int FI = li->getStackSlotIndex();
+ int FI = TargetRegisterInfo::stackSlot2Index(li->reg);
DEBUG(dbgs() << "Assigning fi#" << FI << " to fi#" << Color << "\n");
// Change size and alignment of the allocated slot. If there are multiple
@@ -369,7 +379,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
bool Changed = false;
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = li->getStackSlotIndex();
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
int NewSS = ColorSlot(li);
assert(NewSS >= 0 && "Stack coloring failed?");
SlotMapping[SS] = NewSS;
@@ -382,7 +392,7 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) {
DEBUG(dbgs() << "\nSpill slots after coloring:\n");
for (unsigned i = 0, e = SSIntervals.size(); i != e; ++i) {
LiveInterval *li = SSIntervals[i];
- int SS = li->getStackSlotIndex();
+ int SS = TargetRegisterInfo::stackSlot2Index(li->reg);
li->weight = SlotWeights[SS];
}
// Sort them by new weight.
@@ -636,7 +646,7 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI,
} else {
SmallVector<MachineInstr*, 4> NewMIs;
bool Success = TII->unfoldMemoryOperand(MF, MI, Reg, false, false, NewMIs);
- Success = Success; // Silence compiler warning.
+ (void)Success; // Silence compiler warning.
assert(Success && "Failed to unfold!");
MachineInstr *NewMI = NewMIs[0];
MBB->insert(MI, NewMI);
diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp
index 894dbfa28bac..ec7829ec39fe 100644
--- a/lib/CodeGen/StrongPHIElimination.cpp
+++ b/lib/CodeGen/StrongPHIElimination.cpp
@@ -1,4 +1,4 @@
-//===- StrongPhiElimination.cpp - Eliminate PHI nodes by inserting copies -===//
+//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,1039 +7,823 @@
//
//===----------------------------------------------------------------------===//
//
-// This pass eliminates machine instruction PHI nodes by inserting copy
-// instructions, using an intelligent copy-folding technique based on
-// dominator information. This is technique is derived from:
+// This pass eliminates PHI instructions by aggressively coalescing the copies
+// that would be inserted by a naive algorithm and only inserting the copies
+// that are necessary. The coalescing technique initially assumes that all
+// registers appearing in a PHI instruction do not interfere. It then eliminates
+// proven interferences, using dominators to only perform a linear number of
+// interference tests instead of the quadratic number of interference tests
+// that this would naively require. This is a technique derived from:
//
// Budimlic, et al. Fast copy coalescing and live-range identification.
// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language
// Design and Implementation (Berlin, Germany, June 17 - 19, 2002).
// PLDI '02. ACM, New York, NY, 25-32.
-// DOI= http://doi.acm.org/10.1145/512529.512534
+//
+// The original implementation constructs a data structure they call a dominance
+// forest for this purpose. The dominance forest was shown to be unnecessary,
+// as it is possible to emulate the creation and traversal of a dominance forest
+// by directly using the dominator tree, rather than actually constructing the
+// dominance forest. This technique is explained in:
+//
+// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code
+// Quality and Efficiency,
+// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code
+// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009).
+// CGO '09. IEEE, Washington, DC, 114-125.
+//
+// Careful implementation allows for all of the dominator forest interference
+// checks to be performed at once in a single depth-first traversal of the
+// dominator tree, which is what is implemented here.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "strongphielim"
+#include "PHIEliminationUtils.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
-#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/RegisterCoalescer.h"
#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/ADT/DepthFirstIterator.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
namespace {
- struct StrongPHIElimination : public MachineFunctionPass {
+ class StrongPHIElimination : public MachineFunctionPass {
+ public:
static char ID; // Pass identification, replacement for typeid
- StrongPHIElimination() : MachineFunctionPass(ID) {}
-
- // Waiting stores, for each MBB, the set of copies that need to
- // be inserted into that MBB
- DenseMap<MachineBasicBlock*,
- std::multimap<unsigned, unsigned> > Waiting;
-
- // Stacks holds the renaming stack for each register
- std::map<unsigned, std::vector<unsigned> > Stacks;
-
- // Registers in UsedByAnother are PHI nodes that are themselves
- // used as operands to another PHI node
- std::set<unsigned> UsedByAnother;
-
- // RenameSets are the is a map from a PHI-defined register
- // to the input registers to be coalesced along with the
- // predecessor block for those input registers.
- std::map<unsigned, std::map<unsigned, MachineBasicBlock*> > RenameSets;
-
- // PhiValueNumber holds the ID numbers of the VNs for each phi that we're
- // eliminating, indexed by the register defined by that phi.
- std::map<unsigned, unsigned> PhiValueNumber;
-
- // Store the DFS-in number of each block
- DenseMap<MachineBasicBlock*, unsigned> preorder;
-
- // Store the DFS-out number of each block
- DenseMap<MachineBasicBlock*, unsigned> maxpreorder;
-
- bool runOnMachineFunction(MachineFunction &Fn);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- AU.addRequired<MachineDominatorTree>();
- AU.addRequired<SlotIndexes>();
- AU.addPreserved<SlotIndexes>();
- AU.addRequired<LiveIntervals>();
-
- // TODO: Actually make this true.
- AU.addPreserved<LiveIntervals>();
- AU.addPreserved<RegisterCoalescer>();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
-
- virtual void releaseMemory() {
- preorder.clear();
- maxpreorder.clear();
-
- Waiting.clear();
- Stacks.clear();
- UsedByAnother.clear();
- RenameSets.clear();
+ StrongPHIElimination() : MachineFunctionPass(ID) {
+ initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry());
}
+ virtual void getAnalysisUsage(AnalysisUsage&) const;
+ bool runOnMachineFunction(MachineFunction&);
+
private:
-
- /// DomForestNode - Represents a node in the "dominator forest". This is
- /// a forest in which the nodes represent registers and the edges
- /// represent a dominance relation in the block defining those registers.
- struct DomForestNode {
- private:
- // Store references to our children
- std::vector<DomForestNode*> children;
- // The register we represent
- unsigned reg;
-
- // Add another node as our child
- void addChild(DomForestNode* DFN) { children.push_back(DFN); }
-
- public:
- typedef std::vector<DomForestNode*>::iterator iterator;
-
- // Create a DomForestNode by providing the register it represents, and
- // the node to be its parent. The virtual root node has register 0
- // and a null parent.
- DomForestNode(unsigned r, DomForestNode* parent) : reg(r) {
- if (parent)
- parent->addChild(this);
- }
-
- ~DomForestNode() {
- for (iterator I = begin(), E = end(); I != E; ++I)
- delete *I;
- }
-
- /// getReg - Return the regiser that this node represents
- inline unsigned getReg() { return reg; }
-
- // Provide iterator access to our children
- inline DomForestNode::iterator begin() { return children.begin(); }
- inline DomForestNode::iterator end() { return children.end(); }
+ /// This struct represents a single node in the union-find data structure
+ /// representing the variable congruence classes. There is one difference
+ /// from a normal union-find data structure. We steal two bits from the parent
+ /// pointer . One of these bits is used to represent whether the register
+ /// itself has been isolated, and the other is used to represent whether the
+ /// PHI with that register as its destination has been isolated.
+ ///
+ /// Note that this leads to the strange situation where the leader of a
+ /// congruence class may no longer logically be a member, due to being
+ /// isolated.
+ struct Node {
+ enum Flags {
+ kRegisterIsolatedFlag = 1,
+ kPHIIsolatedFlag = 2
+ };
+ Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); }
+
+ Node *getLeader();
+
+ PointerIntPair<Node*, 2> parent;
+ unsigned value;
+ unsigned rank;
};
-
- void computeDFS(MachineFunction& MF);
- void processBlock(MachineBasicBlock* MBB);
-
- std::vector<DomForestNode*> computeDomForest(
- std::map<unsigned, MachineBasicBlock*>& instrs,
- MachineRegisterInfo& MRI);
- void processPHIUnion(MachineInstr* Inst,
- std::map<unsigned, MachineBasicBlock*>& PHIUnion,
- std::vector<StrongPHIElimination::DomForestNode*>& DF,
- std::vector<std::pair<unsigned, unsigned> >& locals);
- void ScheduleCopies(MachineBasicBlock* MBB, std::set<unsigned>& pushed);
- void InsertCopies(MachineDomTreeNode* MBB,
- SmallPtrSet<MachineBasicBlock*, 16>& v);
- bool mergeLiveIntervals(unsigned primary, unsigned secondary);
- };
-}
-char StrongPHIElimination::ID = 0;
-INITIALIZE_PASS(StrongPHIElimination, "strong-phi-node-elimination",
- "Eliminate PHI nodes for register allocation, intelligently", false, false);
+ /// Add a register in a new congruence class containing only itself.
+ void addReg(unsigned);
-char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+ /// Join the congruence classes of two registers. This function is biased
+ /// towards the left argument, i.e. after
+ ///
+ /// addReg(r2);
+ /// unionRegs(r1, r2);
+ ///
+ /// the leader of the unioned congruence class is the same as the leader of
+ /// r1's congruence class prior to the union. This is actually relied upon
+ /// in the copy insertion code.
+ void unionRegs(unsigned, unsigned);
-/// computeDFS - Computes the DFS-in and DFS-out numbers of the dominator tree
-/// of the given MachineFunction. These numbers are then used in other parts
-/// of the PHI elimination process.
-void StrongPHIElimination::computeDFS(MachineFunction& MF) {
- SmallPtrSet<MachineDomTreeNode*, 8> frontier;
- SmallPtrSet<MachineDomTreeNode*, 8> visited;
-
- unsigned time = 0;
-
- MachineDominatorTree& DT = getAnalysis<MachineDominatorTree>();
-
- MachineDomTreeNode* node = DT.getRootNode();
-
- std::vector<MachineDomTreeNode*> worklist;
- worklist.push_back(node);
-
- while (!worklist.empty()) {
- MachineDomTreeNode* currNode = worklist.back();
-
- if (!frontier.count(currNode)) {
- frontier.insert(currNode);
- ++time;
- preorder.insert(std::make_pair(currNode->getBlock(), time));
- }
-
- bool inserted = false;
- for (MachineDomTreeNode::iterator I = currNode->begin(), E = currNode->end();
- I != E; ++I)
- if (!frontier.count(*I) && !visited.count(*I)) {
- worklist.push_back(*I);
- inserted = true;
- break;
- }
-
- if (!inserted) {
- frontier.erase(currNode);
- visited.insert(currNode);
- maxpreorder.insert(std::make_pair(currNode->getBlock(), time));
-
- worklist.pop_back();
+ /// Get the color of a register. The color is 0 if the register has been
+ /// isolated.
+ unsigned getRegColor(unsigned);
+
+ // Isolate a register.
+ void isolateReg(unsigned);
+
+ /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been
+ /// isolated. Otherwise, it is the original color of its destination and
+ /// all of its operands (before they were isolated, if they were).
+ unsigned getPHIColor(MachineInstr*);
+
+ /// Isolate a PHI.
+ void isolatePHI(MachineInstr*);
+
+ /// Traverses a basic block, splitting any interferences found between
+ /// registers in the same congruence class. It takes two DenseMaps as
+ /// arguments that it also updates: CurrentDominatingParent, which maps
+ /// a color to the register in that congruence class whose definition was
+ /// most recently seen, and ImmediateDominatingParent, which maps a register
+ /// to the register in the same congruence class that most immediately
+ /// dominates it.
+ ///
+ /// This function assumes that it is being called in a depth-first traversal
+ /// of the dominator tree.
+ void SplitInterferencesForBasicBlock(
+ MachineBasicBlock&,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent);
+
+ // Lowers a PHI instruction, inserting copies of the source and destination
+ // registers as necessary.
+ void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*);
+
+ // Merges the live interval of Reg into NewReg and renames Reg to NewReg
+ // everywhere that Reg appears. Requires Reg and NewReg to have non-
+ // overlapping lifetimes.
+ void MergeLIsAndRename(unsigned Reg, unsigned NewReg);
+
+ MachineRegisterInfo *MRI;
+ const TargetInstrInfo *TII;
+ MachineDominatorTree *DT;
+ LiveIntervals *LI;
+
+ BumpPtrAllocator Allocator;
+
+ DenseMap<unsigned, Node*> RegNodeMap;
+
+ // Maps a basic block to a list of its defs of registers that appear as PHI
+ // sources.
+ DenseMap<MachineBasicBlock*, std::vector<MachineInstr*> > PHISrcDefs;
+
+ // Maps a color to a pair of a MachineInstr* and a virtual register, which
+ // is the operand of that PHI corresponding to the current basic block.
+ DenseMap<unsigned, std::pair<MachineInstr*, unsigned> > CurrentPHIForColor;
+
+ // FIXME: Can these two data structures be combined? Would a std::multimap
+ // be any better?
+
+ // Stores pairs of predecessor basic blocks and the source registers of
+ // inserted copy instructions.
+ typedef DenseSet<std::pair<MachineBasicBlock*, unsigned> > SrcCopySet;
+ SrcCopySet InsertedSrcCopySet;
+
+ // Maps pairs of predecessor basic blocks and colors to their defining copy
+ // instructions.
+ typedef DenseMap<std::pair<MachineBasicBlock*, unsigned>, MachineInstr*>
+ SrcCopyMap;
+ SrcCopyMap InsertedSrcCopyMap;
+
+ // Maps inserted destination copy registers to their defining copy
+ // instructions.
+ typedef DenseMap<unsigned, MachineInstr*> DestCopyMap;
+ DestCopyMap InsertedDestCopies;
+ };
+
+ struct MIIndexCompare {
+ MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { }
+
+ bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const {
+ return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS);
}
- }
-}
-namespace {
+ LiveIntervals *LI;
+ };
+} // namespace
-/// PreorderSorter - a helper class that is used to sort registers
-/// according to the preorder number of their defining blocks
-class PreorderSorter {
-private:
- DenseMap<MachineBasicBlock*, unsigned>& preorder;
- MachineRegisterInfo& MRI;
-
-public:
- PreorderSorter(DenseMap<MachineBasicBlock*, unsigned>& p,
- MachineRegisterInfo& M) : preorder(p), MRI(M) { }
-
- bool operator()(unsigned A, unsigned B) {
- if (A == B)
- return false;
-
- MachineBasicBlock* ABlock = MRI.getVRegDef(A)->getParent();
- MachineBasicBlock* BBlock = MRI.getVRegDef(B)->getParent();
-
- if (preorder[ABlock] < preorder[BBlock])
- return true;
- else if (preorder[ABlock] > preorder[BBlock])
- return false;
-
- return false;
- }
-};
+STATISTIC(NumPHIsLowered, "Number of PHIs lowered");
+STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted");
+STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted");
+char StrongPHIElimination::ID = 0;
+INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
+INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
+INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination",
+ "Eliminate PHI nodes for register allocation, intelligently", false, false)
+
+char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID;
+
+void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ AU.addRequired<MachineDominatorTree>();
+ AU.addRequired<SlotIndexes>();
+ AU.addPreserved<SlotIndexes>();
+ AU.addRequired<LiveIntervals>();
+ AU.addPreserved<LiveIntervals>();
+ MachineFunctionPass::getAnalysisUsage(AU);
}
-/// computeDomForest - compute the subforest of the DomTree corresponding
-/// to the defining blocks of the registers in question
-std::vector<StrongPHIElimination::DomForestNode*>
-StrongPHIElimination::computeDomForest(
- std::map<unsigned, MachineBasicBlock*>& regs,
- MachineRegisterInfo& MRI) {
- // Begin by creating a virtual root node, since the actual results
- // may well be a forest. Assume this node has maximum DFS-out number.
- DomForestNode* VirtualRoot = new DomForestNode(0, 0);
- maxpreorder.insert(std::make_pair((MachineBasicBlock*)0, ~0UL));
-
- // Populate a worklist with the registers
- std::vector<unsigned> worklist;
- worklist.reserve(regs.size());
- for (std::map<unsigned, MachineBasicBlock*>::iterator I = regs.begin(),
- E = regs.end(); I != E; ++I)
- worklist.push_back(I->first);
-
- // Sort the registers by the DFS-in number of their defining block
- PreorderSorter PS(preorder, MRI);
- std::sort(worklist.begin(), worklist.end(), PS);
-
- // Create a "current parent" stack, and put the virtual root on top of it
- DomForestNode* CurrentParent = VirtualRoot;
- std::vector<DomForestNode*> stack;
- stack.push_back(VirtualRoot);
-
- // Iterate over all the registers in the previously computed order
- for (std::vector<unsigned>::iterator I = worklist.begin(), E = worklist.end();
- I != E; ++I) {
- unsigned pre = preorder[MRI.getVRegDef(*I)->getParent()];
- MachineBasicBlock* parentBlock = CurrentParent->getReg() ?
- MRI.getVRegDef(CurrentParent->getReg())->getParent() :
- 0;
-
- // If the DFS-in number of the register is greater than the DFS-out number
- // of the current parent, repeatedly pop the parent stack until it isn't.
- while (pre > maxpreorder[parentBlock]) {
- stack.pop_back();
- CurrentParent = stack.back();
-
- parentBlock = CurrentParent->getReg() ?
- MRI.getVRegDef(CurrentParent->getReg())->getParent() :
- 0;
+static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) {
+ // FIXME: This only needs to check from the first terminator, as only the
+ // first terminator can use a virtual register.
+ for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) {
+ assert (RI != MBB->rend());
+ MachineInstr *MI = &*RI;
+
+ for (MachineInstr::mop_iterator OI = MI->operands_begin(),
+ OE = MI->operands_end(); OI != OE; ++OI) {
+ MachineOperand &MO = *OI;
+ if (MO.isReg() && MO.isUse() && MO.getReg() == Reg)
+ return &MO;
}
-
- // Now that we've found the appropriate parent, create a DomForestNode for
- // this register and attach it to the forest
- DomForestNode* child = new DomForestNode(*I, CurrentParent);
-
- // Push this new node on the "current parent" stack
- stack.push_back(child);
- CurrentParent = child;
}
-
- // Return a vector containing the children of the virtual root node
- std::vector<DomForestNode*> ret;
- ret.insert(ret.end(), VirtualRoot->begin(), VirtualRoot->end());
- return ret;
+ return NULL;
}
-/// isLiveIn - helper method that determines, from a regno, if a register
-/// is live into a block
-static bool isLiveIn(unsigned r, MachineBasicBlock* MBB,
- LiveIntervals& LI) {
- LiveInterval& I = LI.getOrCreateInterval(r);
- SlotIndex idx = LI.getMBBStartIdx(MBB);
- return I.liveAt(idx);
-}
+bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) {
+ MRI = &MF.getRegInfo();
+ TII = MF.getTarget().getInstrInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ LI = &getAnalysis<LiveIntervals>();
-/// isLiveOut - help method that determines, from a regno, if a register is
-/// live out of a block.
-static bool isLiveOut(unsigned r, MachineBasicBlock* MBB,
- LiveIntervals& LI) {
- for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
- E = MBB->succ_end(); PI != E; ++PI)
- if (isLiveIn(r, *PI, LI))
- return true;
-
- return false;
-}
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+ PHISrcDefs[I].push_back(BBI);
-/// interferes - checks for local interferences by scanning a block. The only
-/// trick parameter is 'mode' which tells it the relationship of the two
-/// registers. 0 - defined in the same block, 1 - first properly dominates
-/// second, 2 - second properly dominates first
-static bool interferes(unsigned a, unsigned b, MachineBasicBlock* scan,
- LiveIntervals& LV, unsigned mode) {
- MachineInstr* def = 0;
- MachineInstr* kill = 0;
-
- // The code is still in SSA form at this point, so there is only one
- // definition per VReg. Thus we can safely use MRI->getVRegDef().
- const MachineRegisterInfo* MRI = &scan->getParent()->getRegInfo();
-
- bool interference = false;
-
- // Wallk the block, checking for interferences
- for (MachineBasicBlock::iterator MBI = scan->begin(), MBE = scan->end();
- MBI != MBE; ++MBI) {
- MachineInstr* curr = MBI;
-
- // Same defining block...
- if (mode == 0) {
- if (curr == MRI->getVRegDef(a)) {
- // If we find our first definition, save it
- if (!def) {
- def = curr;
- // If there's already an unkilled definition, then
- // this is an interference
- } else if (!kill) {
- interference = true;
- break;
- // If there's a definition followed by a KillInst, then
- // they can't interfere
- } else {
- interference = false;
- break;
- }
- // Symmetric with the above
- } else if (curr == MRI->getVRegDef(b)) {
- if (!def) {
- def = curr;
- } else if (!kill) {
- interference = true;
- break;
- } else {
- interference = false;
- break;
- }
- // Store KillInsts if they match up with the definition
- } else if (curr->killsRegister(a)) {
- if (def == MRI->getVRegDef(a)) {
- kill = curr;
- } else if (curr->killsRegister(b)) {
- if (def == MRI->getVRegDef(b)) {
- kill = curr;
- }
- }
- }
- // First properly dominates second...
- } else if (mode == 1) {
- if (curr == MRI->getVRegDef(b)) {
- // Definition of second without kill of first is an interference
- if (!kill) {
- interference = true;
- break;
- // Definition after a kill is a non-interference
- } else {
- interference = false;
- break;
- }
- // Save KillInsts of First
- } else if (curr->killsRegister(a)) {
- kill = curr;
- }
- // Symmetric with the above
- } else if (mode == 2) {
- if (curr == MRI->getVRegDef(a)) {
- if (!kill) {
- interference = true;
- break;
- } else {
- interference = false;
- break;
- }
- } else if (curr->killsRegister(b)) {
- kill = curr;
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = BBI->getOperand(i);
+ unsigned SrcReg = SrcMO.getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
+
+ MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
+ if (DefMI)
+ PHISrcDefs[DefMI->getParent()].push_back(DefMI);
}
}
}
-
- return interference;
-}
-/// processBlock - Determine how to break up PHIs in the current block. Each
-/// PHI is broken up by some combination of renaming its operands and inserting
-/// copies. This method is responsible for determining which operands receive
-/// which treatment.
-void StrongPHIElimination::processBlock(MachineBasicBlock* MBB) {
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- MachineRegisterInfo& MRI = MBB->getParent()->getRegInfo();
-
- // Holds names that have been added to a set in any PHI within this block
- // before the current one.
- std::set<unsigned> ProcessedNames;
-
- // Iterate over all the PHI nodes in this block
- MachineBasicBlock::iterator P = MBB->begin();
- while (P != MBB->end() && P->isPHI()) {
- unsigned DestReg = P->getOperand(0).getReg();
-
- // Don't both doing PHI elimination for dead PHI's.
- if (P->registerDefIsDead(DestReg)) {
- ++P;
- continue;
- }
+ // Perform a depth-first traversal of the dominator tree, splitting
+ // interferences amongst PHI-congruence classes.
+ DenseMap<unsigned, unsigned> CurrentDominatingParent;
+ DenseMap<unsigned, unsigned> ImmediateDominatingParent;
+ for (df_iterator<MachineDomTreeNode*> DI = df_begin(DT->getRootNode()),
+ DE = df_end(DT->getRootNode()); DI != DE; ++DI) {
+ SplitInterferencesForBasicBlock(*DI->getBlock(),
+ CurrentDominatingParent,
+ ImmediateDominatingParent);
+ }
- LiveInterval& PI = LI.getOrCreateInterval(DestReg);
- SlotIndex pIdx = LI.getInstructionIndex(P).getDefIndex();
- VNInfo* PVN = PI.getLiveRangeContaining(pIdx)->valno;
- PhiValueNumber.insert(std::make_pair(DestReg, PVN->id));
-
- // PHIUnion is the set of incoming registers to the PHI node that
- // are going to be renames rather than having copies inserted. This set
- // is refinded over the course of this function. UnionedBlocks is the set
- // of corresponding MBBs.
- std::map<unsigned, MachineBasicBlock*> PHIUnion;
- SmallPtrSet<MachineBasicBlock*, 8> UnionedBlocks;
-
- // Iterate over the operands of the PHI node
- for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
- unsigned SrcReg = P->getOperand(i-1).getReg();
-
- // Don't need to try to coalesce a register with itself.
- if (SrcReg == DestReg) {
- ProcessedNames.insert(SrcReg);
- continue;
- }
-
- // We don't need to insert copies for implicit_defs.
- MachineInstr* DefMI = MRI.getVRegDef(SrcReg);
- if (DefMI->isImplicitDef())
- ProcessedNames.insert(SrcReg);
-
- // Check for trivial interferences via liveness information, allowing us
- // to avoid extra work later. Any registers that interfere cannot both
- // be in the renaming set, so choose one and add copies for it instead.
- // The conditions are:
- // 1) if the operand is live into the PHI node's block OR
- // 2) if the PHI node is live out of the operand's defining block OR
- // 3) if the operand is itself a PHI node and the original PHI is
- // live into the operand's defining block OR
- // 4) if the operand is already being renamed for another PHI node
- // in this block OR
- // 5) if any two operands are defined in the same block, insert copies
- // for one of them
- if (isLiveIn(SrcReg, P->getParent(), LI) ||
- isLiveOut(P->getOperand(0).getReg(),
- MRI.getVRegDef(SrcReg)->getParent(), LI) ||
- ( MRI.getVRegDef(SrcReg)->isPHI() &&
- isLiveIn(P->getOperand(0).getReg(),
- MRI.getVRegDef(SrcReg)->getParent(), LI) ) ||
- ProcessedNames.count(SrcReg) ||
- UnionedBlocks.count(MRI.getVRegDef(SrcReg)->getParent())) {
-
- // Add a copy for the selected register
- MachineBasicBlock* From = P->getOperand(i).getMBB();
- Waiting[From].insert(std::make_pair(SrcReg, DestReg));
- UsedByAnother.insert(SrcReg);
- } else {
- // Otherwise, add it to the renaming set
- PHIUnion.insert(std::make_pair(SrcReg,P->getOperand(i).getMBB()));
- UnionedBlocks.insert(MRI.getVRegDef(SrcReg)->getParent());
- }
+ // Insert copies for all PHI source and destination registers.
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ InsertCopiesForPHI(BBI, I);
}
-
- // Compute the dominator forest for the renaming set. This is a forest
- // where the nodes are the registers and the edges represent dominance
- // relations between the defining blocks of the registers
- std::vector<StrongPHIElimination::DomForestNode*> DF =
- computeDomForest(PHIUnion, MRI);
-
- // Walk DomForest to resolve interferences at an inter-block level. This
- // will remove registers from the renaming set (and insert copies for them)
- // if interferences are found.
- std::vector<std::pair<unsigned, unsigned> > localInterferences;
- processPHIUnion(P, PHIUnion, DF, localInterferences);
-
- // If one of the inputs is defined in the same block as the current PHI
- // then we need to check for a local interference between that input and
- // the PHI.
- for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
- E = PHIUnion.end(); I != E; ++I)
- if (MRI.getVRegDef(I->first)->getParent() == P->getParent())
- localInterferences.push_back(std::make_pair(I->first,
- P->getOperand(0).getReg()));
-
- // The dominator forest walk may have returned some register pairs whose
- // interference cannot be determined from dominator analysis. We now
- // examine these pairs for local interferences.
- for (std::vector<std::pair<unsigned, unsigned> >::iterator I =
- localInterferences.begin(), E = localInterferences.end(); I != E; ++I) {
- std::pair<unsigned, unsigned> p = *I;
-
- MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
-
- // Determine the block we need to scan and the relationship between
- // the two registers
- MachineBasicBlock* scan = 0;
- unsigned mode = 0;
- if (MRI.getVRegDef(p.first)->getParent() ==
- MRI.getVRegDef(p.second)->getParent()) {
- scan = MRI.getVRegDef(p.first)->getParent();
- mode = 0; // Same block
- } else if (MDT.dominates(MRI.getVRegDef(p.first)->getParent(),
- MRI.getVRegDef(p.second)->getParent())) {
- scan = MRI.getVRegDef(p.second)->getParent();
- mode = 1; // First dominates second
- } else {
- scan = MRI.getVRegDef(p.first)->getParent();
- mode = 2; // Second dominates first
- }
-
- // If there's an interference, we need to insert copies
- if (interferes(p.first, p.second, scan, LI, mode)) {
- // Insert copies for First
- for (int i = P->getNumOperands() - 1; i >= 2; i-=2) {
- if (P->getOperand(i-1).getReg() == p.first) {
- unsigned SrcReg = p.first;
- MachineBasicBlock* From = P->getOperand(i).getMBB();
-
- Waiting[From].insert(std::make_pair(SrcReg,
- P->getOperand(0).getReg()));
- UsedByAnother.insert(SrcReg);
-
- PHIUnion.erase(SrcReg);
- }
- }
+ }
+
+ // FIXME: Preserve the equivalence classes during copy insertion and use
+ // the preversed equivalence classes instead of recomputing them.
+ RegNodeMap.clear();
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ unsigned DestReg = BBI->getOperand(0).getReg();
+ addReg(DestReg);
+
+ for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) {
+ unsigned SrcReg = BBI->getOperand(i).getReg();
+ addReg(SrcReg);
+ unionRegs(DestReg, SrcReg);
}
}
-
- // Add the renaming set for this PHI node to our overall renaming information
- for (std::map<unsigned, MachineBasicBlock*>::iterator QI = PHIUnion.begin(),
- QE = PHIUnion.end(); QI != QE; ++QI) {
- DEBUG(dbgs() << "Adding Renaming: " << QI->first << " -> "
- << P->getOperand(0).getReg() << "\n");
- }
-
- RenameSets.insert(std::make_pair(P->getOperand(0).getReg(), PHIUnion));
-
- // Remember which registers are already renamed, so that we don't try to
- // rename them for another PHI node in this block
- for (std::map<unsigned, MachineBasicBlock*>::iterator I = PHIUnion.begin(),
- E = PHIUnion.end(); I != E; ++I)
- ProcessedNames.insert(I->first);
-
- ++P;
}
-}
-/// processPHIUnion - Take a set of candidate registers to be coalesced when
-/// decomposing the PHI instruction. Use the DominanceForest to remove the ones
-/// that are known to interfere, and flag others that need to be checked for
-/// local interferences.
-void StrongPHIElimination::processPHIUnion(MachineInstr* Inst,
- std::map<unsigned, MachineBasicBlock*>& PHIUnion,
- std::vector<StrongPHIElimination::DomForestNode*>& DF,
- std::vector<std::pair<unsigned, unsigned> >& locals) {
-
- std::vector<DomForestNode*> worklist(DF.begin(), DF.end());
- SmallPtrSet<DomForestNode*, 4> visited;
-
- // Code is still in SSA form, so we can use MRI::getVRegDef()
- MachineRegisterInfo& MRI = Inst->getParent()->getParent()->getRegInfo();
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- unsigned DestReg = Inst->getOperand(0).getReg();
-
- // DF walk on the DomForest
- while (!worklist.empty()) {
- DomForestNode* DFNode = worklist.back();
-
- visited.insert(DFNode);
-
- bool inserted = false;
- for (DomForestNode::iterator CI = DFNode->begin(), CE = DFNode->end();
- CI != CE; ++CI) {
- DomForestNode* child = *CI;
-
- // If the current node is live-out of the defining block of one of its
- // children, insert a copy for it. NOTE: The paper actually calls for
- // a more elaborate heuristic for determining whether to insert copies
- // for the child or the parent. In the interest of simplicity, we're
- // just always choosing the parent.
- if (isLiveOut(DFNode->getReg(),
- MRI.getVRegDef(child->getReg())->getParent(), LI)) {
- // Insert copies for parent
- for (int i = Inst->getNumOperands() - 1; i >= 2; i-=2) {
- if (Inst->getOperand(i-1).getReg() == DFNode->getReg()) {
- unsigned SrcReg = DFNode->getReg();
- MachineBasicBlock* From = Inst->getOperand(i).getMBB();
-
- Waiting[From].insert(std::make_pair(SrcReg, DestReg));
- UsedByAnother.insert(SrcReg);
-
- PHIUnion.erase(SrcReg);
- }
- }
-
- // If a node is live-in to the defining block of one of its children, but
- // not live-out, then we need to scan that block for local interferences.
- } else if (isLiveIn(DFNode->getReg(),
- MRI.getVRegDef(child->getReg())->getParent(), LI) ||
- MRI.getVRegDef(DFNode->getReg())->getParent() ==
- MRI.getVRegDef(child->getReg())->getParent()) {
- // Add (p, c) to possible local interferences
- locals.push_back(std::make_pair(DFNode->getReg(), child->getReg()));
+ DenseMap<unsigned, unsigned> RegRenamingMap;
+ bool Changed = false;
+ for (MachineFunction::iterator I = MF.begin(), E = MF.end();
+ I != E; ++I) {
+ MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end();
+ while (BBI != BBE && BBI->isPHI()) {
+ MachineInstr *PHI = BBI;
+
+ assert(PHI->getNumOperands() > 0);
+
+ unsigned SrcReg = PHI->getOperand(1).getReg();
+ unsigned SrcColor = getRegColor(SrcReg);
+ unsigned NewReg = RegRenamingMap[SrcColor];
+ if (!NewReg) {
+ NewReg = SrcReg;
+ RegRenamingMap[SrcColor] = SrcReg;
}
-
- if (!visited.count(child)) {
- worklist.push_back(child);
- inserted = true;
+ MergeLIsAndRename(SrcReg, NewReg);
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ if (!InsertedDestCopies.count(DestReg))
+ MergeLIsAndRename(DestReg, NewReg);
+
+ for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcReg = PHI->getOperand(i).getReg();
+ MergeLIsAndRename(SrcReg, NewReg);
}
+
+ ++BBI;
+ LI->RemoveMachineInstrFromMaps(PHI);
+ PHI->eraseFromParent();
+ Changed = true;
}
-
- if (!inserted) worklist.pop_back();
}
-}
-/// ScheduleCopies - Insert copies into predecessor blocks, scheduling
-/// them properly so as to avoid the 'lost copy' and the 'virtual swap'
-/// problems.
-///
-/// Based on "Practical Improvements to the Construction and Destruction
-/// of Static Single Assignment Form" by Briggs, et al.
-void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB,
- std::set<unsigned>& pushed) {
- // FIXME: This function needs to update LiveIntervals
- std::multimap<unsigned, unsigned>& copy_set= Waiting[MBB];
-
- std::multimap<unsigned, unsigned> worklist;
- std::map<unsigned, unsigned> map;
-
- // Setup worklist of initial copies
- for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
- E = copy_set.end(); I != E; ) {
- map.insert(std::make_pair(I->first, I->first));
- map.insert(std::make_pair(I->second, I->second));
-
- if (!UsedByAnother.count(I->second)) {
- worklist.insert(*I);
-
- // Avoid iterator invalidation
- std::multimap<unsigned, unsigned>::iterator OI = I;
- ++I;
- copy_set.erase(OI);
- } else {
- ++I;
+ // Due to the insertion of copies to split live ranges, the live intervals are
+ // guaranteed to not overlap, except in one case: an original PHI source and a
+ // PHI destination copy. In this case, they have the same value and thus don't
+ // truly intersect, so we merge them into the value live at that point.
+ // FIXME: Is there some better way we can handle this?
+ for (DestCopyMap::iterator I = InsertedDestCopies.begin(),
+ E = InsertedDestCopies.end(); I != E; ++I) {
+ unsigned DestReg = I->first;
+ unsigned DestColor = getRegColor(DestReg);
+ unsigned NewReg = RegRenamingMap[DestColor];
+
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ assert(DestLI.ranges.size() == 1
+ && "PHI destination copy's live interval should be a single live "
+ "range from the beginning of the BB to the copy instruction.");
+ LiveRange *DestLR = DestLI.begin();
+ VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start);
+ if (!NewVNI) {
+ NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator());
+ MachineInstr *CopyInstr = I->second;
+ CopyInstr->getOperand(1).setIsKill(true);
}
+
+ LiveRange NewLR(DestLR->start, DestLR->end, NewVNI);
+ NewLI.addRange(NewLR);
+
+ LI->removeInterval(DestReg);
+ MRI->replaceRegWith(DestReg, NewReg);
}
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- MachineFunction* MF = MBB->getParent();
- MachineRegisterInfo& MRI = MF->getRegInfo();
- const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
-
- SmallVector<std::pair<unsigned, MachineInstr*>, 4> InsertedPHIDests;
-
- // Iterate over the worklist, inserting copies
- while (!worklist.empty() || !copy_set.empty()) {
- while (!worklist.empty()) {
- std::multimap<unsigned, unsigned>::iterator WI = worklist.begin();
- std::pair<unsigned, unsigned> curr = *WI;
- worklist.erase(WI);
-
- const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(curr.first);
-
- if (isLiveOut(curr.second, MBB, LI)) {
- // Create a temporary
- unsigned t = MF->getRegInfo().createVirtualRegister(RC);
-
- // Insert copy from curr.second to a temporary at
- // the Phi defining curr.second
- MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second);
- BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY),
- t).addReg(curr.second);
- DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t
- << "\n");
-
- // Push temporary on Stacks
- Stacks[curr.second].push_back(t);
-
- // Insert curr.second in pushed
- pushed.insert(curr.second);
-
- // Create a live interval for this temporary
- InsertedPHIDests.push_back(std::make_pair(t, --PI));
- }
-
- // Insert copy from map[curr.first] to curr.second
- BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]);
- map[curr.first] = curr.second;
- DEBUG(dbgs() << "Inserted copy from " << curr.first << " to "
- << curr.second << "\n");
-
- // Push this copy onto InsertedPHICopies so we can
- // update LiveIntervals with it.
- MachineBasicBlock::iterator MI = MBB->getFirstTerminator();
- InsertedPHIDests.push_back(std::make_pair(curr.second, --MI));
-
- // If curr.first is a destination in copy_set...
- for (std::multimap<unsigned, unsigned>::iterator I = copy_set.begin(),
- E = copy_set.end(); I != E; )
- if (curr.first == I->second) {
- std::pair<unsigned, unsigned> temp = *I;
- worklist.insert(temp);
-
- // Avoid iterator invalidation
- std::multimap<unsigned, unsigned>::iterator OI = I;
- ++I;
- copy_set.erase(OI);
-
- break;
- } else {
- ++I;
- }
- }
-
- if (!copy_set.empty()) {
- std::multimap<unsigned, unsigned>::iterator CI = copy_set.begin();
- std::pair<unsigned, unsigned> curr = *CI;
- worklist.insert(curr);
- copy_set.erase(CI);
-
- LiveInterval& I = LI.getInterval(curr.second);
- MachineBasicBlock::iterator term = MBB->getFirstTerminator();
- SlotIndex endIdx = SlotIndex();
- if (term != MBB->end())
- endIdx = LI.getInstructionIndex(term);
- else
- endIdx = LI.getMBBEndIdx(MBB);
-
- if (I.liveAt(endIdx)) {
- const TargetRegisterClass *RC =
- MF->getRegInfo().getRegClass(curr.first);
-
- // Insert a copy from dest to a new temporary t at the end of b
- unsigned t = MF->getRegInfo().createVirtualRegister(RC);
- BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), t).addReg(curr.second);
- map[curr.second] = t;
-
- MachineBasicBlock::iterator TI = MBB->getFirstTerminator();
- InsertedPHIDests.push_back(std::make_pair(t, --TI));
+
+ // Adjust the live intervals of all PHI source registers to handle the case
+ // where the PHIs in successor blocks were the only later uses of the source
+ // register.
+ for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(),
+ E = InsertedSrcCopySet.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I->first;
+ unsigned SrcReg = I->second;
+ if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)])
+ SrcReg = RenamedRegister;
+
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+
+ bool isLiveOut = false;
+ for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
+ SE = MBB->succ_end(); SI != SE; ++SI) {
+ if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) {
+ isLiveOut = true;
+ break;
}
}
+
+ if (isLiveOut)
+ continue;
+
+ MachineOperand *LastUse = findLastUse(MBB, SrcReg);
+ assert(LastUse);
+ SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent());
+ SrcLI.removeRange(LastUseIndex.getDefIndex(), LI->getMBBEndIdx(MBB));
+ LastUse->setIsKill(true);
}
-
- // Renumber the instructions so that we can perform the index computations
- // needed to create new live intervals.
- LI.renumber();
-
- // For copies that we inserted at the ends of predecessors, we construct
- // live intervals. This is pretty easy, since we know that the destination
- // register cannot have be in live at that point previously. We just have
- // to make sure that, for registers that serve as inputs to more than one
- // PHI, we don't create multiple overlapping live intervals.
- std::set<unsigned> RegHandled;
- for (SmallVector<std::pair<unsigned, MachineInstr*>, 4>::iterator I =
- InsertedPHIDests.begin(), E = InsertedPHIDests.end(); I != E; ++I) {
- if (RegHandled.insert(I->first).second) {
- LiveInterval& Int = LI.getOrCreateInterval(I->first);
- SlotIndex instrIdx = LI.getInstructionIndex(I->second);
- if (Int.liveAt(instrIdx.getDefIndex()))
- Int.removeRange(instrIdx.getDefIndex(),
- LI.getMBBEndIdx(I->second->getParent()).getNextSlot(),
- true);
-
- LiveRange R = LI.addLiveRangeToEndOfBlock(I->first, I->second);
- R.valno->setCopy(I->second);
- R.valno->def = LI.getInstructionIndex(I->second).getDefIndex();
- }
+
+ LI->renumber();
+
+ Allocator.Reset();
+ RegNodeMap.clear();
+ PHISrcDefs.clear();
+ InsertedSrcCopySet.clear();
+ InsertedSrcCopyMap.clear();
+ InsertedDestCopies.clear();
+
+ return Changed;
+}
+
+void StrongPHIElimination::addReg(unsigned Reg) {
+ if (RegNodeMap.count(Reg))
+ return;
+ RegNodeMap[Reg] = new (Allocator) Node(Reg);
+}
+
+StrongPHIElimination::Node*
+StrongPHIElimination::Node::getLeader() {
+ Node *N = this;
+ Node *Parent = parent.getPointer();
+ Node *Grandparent = Parent->parent.getPointer();
+
+ while (Parent != Grandparent) {
+ N->parent.setPointer(Grandparent);
+ N = Grandparent;
+ Parent = Parent->parent.getPointer();
+ Grandparent = Parent->parent.getPointer();
}
+
+ return Parent;
}
-/// InsertCopies - insert copies into MBB and all of its successors
-void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN,
- SmallPtrSet<MachineBasicBlock*, 16>& visited) {
- MachineBasicBlock* MBB = MDTN->getBlock();
- visited.insert(MBB);
-
- std::set<unsigned> pushed;
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- // Rewrite register uses from Stacks
- for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end();
- I != E; ++I) {
- if (I->isPHI())
- continue;
-
- for (unsigned i = 0; i < I->getNumOperands(); ++i)
- if (I->getOperand(i).isReg() &&
- Stacks[I->getOperand(i).getReg()].size()) {
- // Remove the live range for the old vreg.
- LiveInterval& OldInt = LI.getInterval(I->getOperand(i).getReg());
- LiveInterval::iterator OldLR =
- OldInt.FindLiveRangeContaining(LI.getInstructionIndex(I).getUseIndex());
- if (OldLR != OldInt.end())
- OldInt.removeRange(*OldLR, true);
-
- // Change the register
- I->getOperand(i).setReg(Stacks[I->getOperand(i).getReg()].back());
-
- // Add a live range for the new vreg
- LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg());
- VNInfo* FirstVN = *Int.vni_begin();
- FirstVN->setHasPHIKill(false);
- LiveRange LR (LI.getMBBStartIdx(I->getParent()),
- LI.getInstructionIndex(I).getUseIndex().getNextSlot(),
- FirstVN);
-
- Int.addRange(LR);
- }
- }
-
- // Schedule the copies for this block
- ScheduleCopies(MBB, pushed);
-
- // Recur down the dominator tree.
- for (MachineDomTreeNode::iterator I = MDTN->begin(),
- E = MDTN->end(); I != E; ++I)
- if (!visited.count((*I)->getBlock()))
- InsertCopies(*I, visited);
-
- // As we exit this block, pop the names we pushed while processing it
- for (std::set<unsigned>::iterator I = pushed.begin(),
- E = pushed.end(); I != E; ++I)
- Stacks[*I].pop_back();
+unsigned StrongPHIElimination::getRegColor(unsigned Reg) {
+ DenseMap<unsigned, Node*>::iterator RI = RegNodeMap.find(Reg);
+ if (RI == RegNodeMap.end())
+ return 0;
+ Node *Node = RI->second;
+ if (Node->parent.getInt() & Node::kRegisterIsolatedFlag)
+ return 0;
+ return Node->getLeader()->value;
}
-bool StrongPHIElimination::mergeLiveIntervals(unsigned primary,
- unsigned secondary) {
-
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
- LiveInterval& LHS = LI.getOrCreateInterval(primary);
- LiveInterval& RHS = LI.getOrCreateInterval(secondary);
-
- LI.renumber();
-
- DenseMap<VNInfo*, VNInfo*> VNMap;
- for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- LiveRange R = *I;
-
- SlotIndex Start = R.start;
- SlotIndex End = R.end;
- if (LHS.getLiveRangeContaining(Start))
- return false;
-
- if (LHS.getLiveRangeContaining(End))
- return false;
-
- LiveInterval::iterator RI = std::upper_bound(LHS.begin(), LHS.end(), R);
- if (RI != LHS.end() && RI->start < End)
- return false;
+void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) {
+ Node *Node1 = RegNodeMap[Reg1]->getLeader();
+ Node *Node2 = RegNodeMap[Reg2]->getLeader();
+
+ if (Node1->rank > Node2->rank) {
+ Node2->parent.setPointer(Node1->getLeader());
+ } else if (Node1->rank < Node2->rank) {
+ Node1->parent.setPointer(Node2->getLeader());
+ } else if (Node1 != Node2) {
+ Node2->parent.setPointer(Node1->getLeader());
+ Node1->rank++;
}
-
- for (LiveInterval::iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) {
- LiveRange R = *I;
- VNInfo* OldVN = R.valno;
- VNInfo*& NewVN = VNMap[OldVN];
- if (!NewVN) {
- NewVN = LHS.createValueCopy(OldVN, LI.getVNInfoAllocator());
- }
-
- LiveRange LR (R.start, R.end, NewVN);
- LHS.addRange(LR);
+}
+
+void StrongPHIElimination::isolateReg(unsigned Reg) {
+ Node *Node = RegNodeMap[Reg];
+ Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag);
+}
+
+unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ Node *DestNode = RegNodeMap[DestReg];
+ if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag)
+ return 0;
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg());
+ if (SrcColor)
+ return SrcColor;
}
-
- LI.removeInterval(RHS.reg);
-
- return true;
+ return 0;
}
-bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) {
- LiveIntervals& LI = getAnalysis<LiveIntervals>();
-
- // Compute DFS numbers of each block
- computeDFS(Fn);
-
- // Determine which phi node operands need copies
- for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
- if (!I->empty() && I->begin()->isPHI())
- processBlock(I);
-
- // Break interferences where two different phis want to coalesce
- // in the same register.
- std::set<unsigned> seen;
- typedef std::map<unsigned, std::map<unsigned, MachineBasicBlock*> >
- RenameSetType;
- for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
- I != E; ++I) {
- for (std::map<unsigned, MachineBasicBlock*>::iterator
- OI = I->second.begin(), OE = I->second.end(); OI != OE; ) {
- if (!seen.count(OI->first)) {
- seen.insert(OI->first);
- ++OI;
+void StrongPHIElimination::isolatePHI(MachineInstr *PHI) {
+ assert(PHI->isPHI());
+ Node *Node = RegNodeMap[PHI->getOperand(0).getReg()];
+ Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag);
+}
+
+/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any
+/// interferences found between registers in the same congruence class. It
+/// takes two DenseMaps as arguments that it also updates:
+///
+/// 1) CurrentDominatingParent, which maps a color to the register in that
+/// congruence class whose definition was most recently seen.
+///
+/// 2) ImmediateDominatingParent, which maps a register to the register in the
+/// same congruence class that most immediately dominates it.
+///
+/// This function assumes that it is being called in a depth-first traversal
+/// of the dominator tree.
+///
+/// The algorithm used here is a generalization of the dominance-based SSA test
+/// for two variables. If there are variables a_1, ..., a_n such that
+///
+/// def(a_1) dom ... dom def(a_n),
+///
+/// then we can test for an interference between any two a_i by only using O(n)
+/// interference tests between pairs of variables. If i < j and a_i and a_j
+/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1).
+/// Thus, in order to test for an interference involving a_i, we need only check
+/// for a potential interference with a_i+1.
+///
+/// This method can be generalized to arbitrary sets of variables by performing
+/// a depth-first traversal of the dominator tree. As we traverse down a branch
+/// of the dominator tree, we keep track of the current dominating variable and
+/// only perform an interference test with that variable. However, when we go to
+/// another branch of the dominator tree, the definition of the current dominating
+/// variable may no longer dominate the current block. In order to correct this,
+/// we need to use a stack of past choices of the current dominating variable
+/// and pop from this stack until we find a variable whose definition actually
+/// dominates the current block.
+///
+/// There will be one push on this stack for each variable that has become the
+/// current dominating variable, so instead of using an explicit stack we can
+/// simply associate the previous choice for a current dominating variable with
+/// the new choice. This works better in our implementation, where we test for
+/// interference in multiple distinct sets at once.
+void
+StrongPHIElimination::SplitInterferencesForBasicBlock(
+ MachineBasicBlock &MBB,
+ DenseMap<unsigned, unsigned> &CurrentDominatingParent,
+ DenseMap<unsigned, unsigned> &ImmediateDominatingParent) {
+ // Sort defs by their order in the original basic block, as the code below
+ // assumes that it is processing definitions in dominance order.
+ std::vector<MachineInstr*> &DefInstrs = PHISrcDefs[&MBB];
+ std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI));
+
+ for (std::vector<MachineInstr*>::const_iterator BBI = DefInstrs.begin(),
+ BBE = DefInstrs.end(); BBI != BBE; ++BBI) {
+ for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(),
+ E = (*BBI)->operands_end(); I != E; ++I) {
+ const MachineOperand &MO = *I;
+
+ // FIXME: This would be faster if it were possible to bail out of checking
+ // an instruction's operands after the explicit defs, but this is incorrect
+ // for variadic instructions, which may appear before register allocation
+ // in the future.
+ if (!MO.isReg() || !MO.isDef())
+ continue;
+
+ unsigned DestReg = MO.getReg();
+ if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg))
+ continue;
+
+ // If the virtual register being defined is not used in any PHI or has
+ // already been isolated, then there are no more interferences to check.
+ unsigned DestColor = getRegColor(DestReg);
+ if (!DestColor)
+ continue;
+
+ // The input to this pass sometimes is not in SSA form in every basic
+ // block, as some virtual registers have redefinitions. We could eliminate
+ // this by fixing the passes that generate the non-SSA code, or we could
+ // handle it here by tracking defining machine instructions rather than
+ // virtual registers. For now, we just handle the situation conservatively
+ // in a way that will possibly lead to false interferences.
+ unsigned &CurrentParent = CurrentDominatingParent[DestColor];
+ unsigned NewParent = CurrentParent;
+ if (NewParent == DestReg)
+ continue;
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+
+ // If NewParent is nonzero, then its definition dominates the current
+ // instruction, so it is only necessary to check for the liveness of
+ // NewParent in order to check for an interference.
+ if (NewParent
+ && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) {
+ // If there is an interference, always isolate the new register. This
+ // could be improved by using a heuristic that decides which of the two
+ // registers to isolate.
+ isolateReg(DestReg);
+ CurrentParent = NewParent;
} else {
- Waiting[OI->second].insert(std::make_pair(OI->first, I->first));
- unsigned reg = OI->first;
- ++OI;
- I->second.erase(reg);
- DEBUG(dbgs() << "Removing Renaming: " << reg << " -> " << I->first
- << "\n");
+ // If there is no interference, update ImmediateDominatingParent and set
+ // the CurrentDominatingParent for this color to the current register.
+ ImmediateDominatingParent[DestReg] = NewParent;
+ CurrentParent = DestReg;
}
}
}
-
- // Insert copies
- // FIXME: This process should probably preserve LiveIntervals
- SmallPtrSet<MachineBasicBlock*, 16> visited;
- MachineDominatorTree& MDT = getAnalysis<MachineDominatorTree>();
- InsertCopies(MDT.getRootNode(), visited);
-
- // Perform renaming
- for (RenameSetType::iterator I = RenameSets.begin(), E = RenameSets.end();
- I != E; ++I)
- while (I->second.size()) {
- std::map<unsigned, MachineBasicBlock*>::iterator SI = I->second.begin();
-
- DEBUG(dbgs() << "Renaming: " << SI->first << " -> " << I->first << "\n");
-
- if (SI->first != I->first) {
- if (mergeLiveIntervals(I->first, SI->first)) {
- Fn.getRegInfo().replaceRegWith(SI->first, I->first);
-
- if (RenameSets.count(SI->first)) {
- I->second.insert(RenameSets[SI->first].begin(),
- RenameSets[SI->first].end());
- RenameSets.erase(SI->first);
- }
- } else {
- // Insert a last-minute copy if a conflict was detected.
- const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo();
- BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(),
- TII->get(TargetOpcode::COPY), I->first).addReg(SI->first);
-
- LI.renumber();
-
- LiveInterval& Int = LI.getOrCreateInterval(I->first);
- SlotIndex instrIdx =
- LI.getInstructionIndex(--SI->second->getFirstTerminator());
- if (Int.liveAt(instrIdx.getDefIndex()))
- Int.removeRange(instrIdx.getDefIndex(),
- LI.getMBBEndIdx(SI->second).getNextSlot(), true);
-
- LiveRange R = LI.addLiveRangeToEndOfBlock(I->first,
- --SI->second->getFirstTerminator());
- R.valno->setCopy(--SI->second->getFirstTerminator());
- R.valno->def = instrIdx.getDefIndex();
-
- DEBUG(dbgs() << "Renaming failed: " << SI->first << " -> "
- << I->first << "\n");
- }
+
+ // We now walk the PHIs in successor blocks and check for interferences. This
+ // is necesary because the use of a PHI's operands are logically contained in
+ // the predecessor block. The def of a PHI's destination register is processed
+ // along with the other defs in a basic block.
+
+ CurrentPHIForColor.clear();
+
+ for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(),
+ SE = MBB.succ_end(); SI != SE; ++SI) {
+ for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end();
+ BBI != BBE && BBI->isPHI(); ++BBI) {
+ MachineInstr *PHI = BBI;
+
+ // If a PHI is already isolated, either by being isolated directly or
+ // having all of its operands isolated, ignore it.
+ unsigned Color = getPHIColor(PHI);
+ if (!Color)
+ continue;
+
+ // Find the index of the PHI operand that corresponds to this basic block.
+ unsigned PredIndex;
+ for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) {
+ if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB)
+ break;
}
-
- LiveInterval& Int = LI.getOrCreateInterval(I->first);
- const LiveRange* LR =
- Int.getLiveRangeContaining(LI.getMBBEndIdx(SI->second));
- LR->valno->setHasPHIKill(true);
-
- I->second.erase(SI->first);
+ assert(PredIndex < PHI->getNumOperands());
+ unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg();
+
+ // Pop registers from the stack represented by ImmediateDominatingParent
+ // until we find a parent that dominates the current instruction.
+ unsigned &CurrentParent = CurrentDominatingParent[Color];
+ unsigned NewParent = CurrentParent;
+ while (NewParent
+ && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB)
+ || !getRegColor(NewParent)))
+ NewParent = ImmediateDominatingParent[NewParent];
+ CurrentParent = NewParent;
+
+ // If there is an interference with a register, always isolate the
+ // register rather than the PHI. It is also possible to isolate the
+ // PHI, but that introduces copies for all of the registers involved
+ // in that PHI.
+ if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB)
+ && NewParent != PredOperandReg)
+ isolateReg(NewParent);
+
+ std::pair<MachineInstr*, unsigned>
+ &CurrentPHI = CurrentPHIForColor[Color];
+
+ // If two PHIs have the same operand from every shared predecessor, then
+ // they don't actually interfere. Otherwise, isolate the current PHI. This
+ // could possibly be improved, e.g. we could isolate the PHI with the
+ // fewest operands.
+ if (CurrentPHI.first && CurrentPHI.second != PredOperandReg)
+ isolatePHI(PHI);
+ else
+ CurrentPHI = std::make_pair(PHI, PredOperandReg);
}
-
- // Remove PHIs
- std::vector<MachineInstr*> phis;
- for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) {
- for (MachineBasicBlock::iterator BI = I->begin(), BE = I->end();
- BI != BE; ++BI)
- if (BI->isPHI())
- phis.push_back(BI);
}
-
- for (std::vector<MachineInstr*>::iterator I = phis.begin(), E = phis.end();
- I != E; ) {
- MachineInstr* PInstr = *(I++);
-
- // If this is a dead PHI node, then remove it from LiveIntervals.
- unsigned DestReg = PInstr->getOperand(0).getReg();
- LiveInterval& PI = LI.getInterval(DestReg);
- if (PInstr->registerDefIsDead(DestReg)) {
- if (PI.containsOneValue()) {
- LI.removeInterval(DestReg);
+}
+
+void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI,
+ MachineBasicBlock *MBB) {
+ assert(PHI->isPHI());
+ ++NumPHIsLowered;
+ unsigned PHIColor = getPHIColor(PHI);
+
+ for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) {
+ MachineOperand &SrcMO = PHI->getOperand(i);
+
+ // If a source is defined by an implicit def, there is no need to insert a
+ // copy in the predecessor.
+ if (SrcMO.isUndef())
+ continue;
+
+ unsigned SrcReg = SrcMO.getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ "Machine PHI Operands must all be virtual registers!");
+
+ MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB();
+ unsigned SrcColor = getRegColor(SrcReg);
+
+ // If neither the PHI nor the operand were isolated, then we only need to
+ // set the phi-kill flag on the VNInfo at this PHI.
+ if (PHIColor && SrcColor == PHIColor) {
+ LiveInterval &SrcInterval = LI->getInterval(SrcReg);
+ SlotIndex PredIndex = LI->getMBBEndIdx(PredBB);
+ VNInfo *SrcVNI = SrcInterval.getVNInfoAt(PredIndex.getPrevIndex());
+ assert(SrcVNI);
+ SrcVNI->setHasPHIKill(true);
+ continue;
+ }
+
+ unsigned CopyReg = 0;
+ if (PHIColor) {
+ SrcCopyMap::const_iterator I
+ = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor));
+ CopyReg
+ = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0;
+ }
+
+ if (!CopyReg) {
+ const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
+ CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineBasicBlock::iterator
+ CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg);
+ unsigned SrcSubReg = SrcMO.getSubReg();
+ MachineInstr *CopyInstr = BuildMI(*PredBB,
+ CopyInsertPoint,
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ CopyReg).addReg(SrcReg, 0, SrcSubReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ ++NumSrcCopiesInserted;
+
+ // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for
+ // the newly added range.
+ LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr);
+ InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg));
+
+ addReg(CopyReg);
+ if (PHIColor) {
+ unionRegs(PHIColor, CopyReg);
+ assert(getRegColor(CopyReg) != CopyReg);
} else {
- SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
- PI.removeRange(*PI.getLiveRangeContaining(idx), true);
- }
- } else {
- // Trim live intervals of input registers. They are no longer live into
- // this block if they died after the PHI. If they lived after it, don't
- // trim them because they might have other legitimate uses.
- for (unsigned i = 1; i < PInstr->getNumOperands(); i += 2) {
- unsigned reg = PInstr->getOperand(i).getReg();
-
- MachineBasicBlock* MBB = PInstr->getOperand(i+1).getMBB();
- LiveInterval& InputI = LI.getInterval(reg);
- if (MBB != PInstr->getParent() &&
- InputI.liveAt(LI.getMBBStartIdx(PInstr->getParent())) &&
- InputI.expiredAt(LI.getInstructionIndex(PInstr).getNextIndex()))
- InputI.removeRange(LI.getMBBStartIdx(PInstr->getParent()),
- LI.getInstructionIndex(PInstr),
- true);
+ PHIColor = CopyReg;
+ assert(getRegColor(CopyReg) == CopyReg);
}
-
- // If the PHI is not dead, then the valno defined by the PHI
- // now has an unknown def.
- SlotIndex idx = LI.getInstructionIndex(PInstr).getDefIndex();
- const LiveRange* PLR = PI.getLiveRangeContaining(idx);
- PLR->valno->setIsPHIDef(true);
- LiveRange R (LI.getMBBStartIdx(PInstr->getParent()),
- PLR->start, PLR->valno);
- PI.addRange(R);
+
+ if (!InsertedSrcCopyMap.count(std::make_pair(PredBB, PHIColor)))
+ InsertedSrcCopyMap[std::make_pair(PredBB, PHIColor)] = CopyInstr;
}
-
- LI.RemoveMachineInstrFromMaps(PInstr);
- PInstr->eraseFromParent();
+
+ SrcMO.setReg(CopyReg);
+
+ // If SrcReg is not live beyond the PHI, trim its interval so that it is no
+ // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are
+ // processed later, but this is still correct to do at this point because we
+ // never rely on LiveIntervals being correct while inserting copies.
+ // FIXME: Should this just count uses at PHIs like the normal PHIElimination
+ // pass does?
+ LiveInterval &SrcLI = LI->getInterval(SrcReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ SlotIndex NextInstrIndex = PHIIndex.getNextIndex();
+ if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex))
+ SrcLI.removeRange(MBBStartIndex, PHIIndex, true);
}
-
- LI.renumber();
-
- return true;
+
+ unsigned DestReg = PHI->getOperand(0).getReg();
+ unsigned DestColor = getRegColor(DestReg);
+
+ if (PHIColor && DestColor == PHIColor) {
+ LiveInterval &DestLI = LI->getInterval(DestReg);
+
+ // Set the phi-def flag for the VN at this PHI.
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getDefIndex());
+ assert(DestVNI);
+ DestVNI->setIsPHIDef(true);
+
+ // Prior to PHI elimination, the live ranges of PHIs begin at their defining
+ // instruction. After PHI elimination, PHI instructions are replaced by VNs
+ // with the phi-def flag set, and the live ranges of these VNs start at the
+ // beginning of the basic block.
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ DestVNI->def = MBBStartIndex;
+ DestLI.addRange(LiveRange(MBBStartIndex,
+ PHIIndex.getDefIndex(),
+ DestVNI));
+ return;
+ }
+
+ const TargetRegisterClass *RC = MRI->getRegClass(DestReg);
+ unsigned CopyReg = MRI->createVirtualRegister(RC);
+
+ MachineInstr *CopyInstr = BuildMI(*MBB,
+ MBB->SkipPHIsAndLabels(MBB->begin()),
+ PHI->getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DestReg).addReg(CopyReg);
+ LI->InsertMachineInstrInMaps(CopyInstr);
+ PHI->getOperand(0).setReg(CopyReg);
+ ++NumDestCopiesInserted;
+
+ // Add the region from the beginning of MBB to the copy instruction to
+ // CopyReg's live interval, and give the VNInfo the phidef flag.
+ LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg);
+ SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB);
+ SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr);
+ VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex,
+ CopyInstr,
+ LI->getVNInfoAllocator());
+ CopyVNI->setIsPHIDef(true);
+ CopyLI.addRange(LiveRange(MBBStartIndex,
+ DestCopyIndex.getDefIndex(),
+ CopyVNI));
+
+ // Adjust DestReg's live interval to adjust for its new definition at
+ // CopyInstr.
+ LiveInterval &DestLI = LI->getOrCreateInterval(DestReg);
+ SlotIndex PHIIndex = LI->getInstructionIndex(PHI);
+ DestLI.removeRange(PHIIndex.getDefIndex(), DestCopyIndex.getDefIndex());
+
+ VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getDefIndex());
+ assert(DestVNI);
+ DestVNI->def = DestCopyIndex.getDefIndex();
+
+ InsertedDestCopies[CopyReg] = CopyInstr;
+}
+
+void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) {
+ if (Reg == NewReg)
+ return;
+
+ LiveInterval &OldLI = LI->getInterval(Reg);
+ LiveInterval &NewLI = LI->getInterval(NewReg);
+
+ // Merge the live ranges of the two registers.
+ DenseMap<VNInfo*, VNInfo*> VNMap;
+ for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end();
+ LRI != LRE; ++LRI) {
+ LiveRange OldLR = *LRI;
+ VNInfo *OldVN = OldLR.valno;
+
+ VNInfo *&NewVN = VNMap[OldVN];
+ if (!NewVN) {
+ NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator());
+ VNMap[OldVN] = NewVN;
+ }
+
+ LiveRange LR(OldLR.start, OldLR.end, NewVN);
+ NewLI.addRange(LR);
+ }
+
+ // Remove the LiveInterval for the register being renamed and replace all
+ // of its defs and uses with the new register.
+ LI->removeInterval(Reg);
+ MRI->replaceRegWith(Reg, NewReg);
}
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index a815b364d54e..04d3d311b416 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -350,7 +350,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
if (!MO.isReg())
continue;
unsigned Reg = MO.getReg();
- if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (!TargetRegisterInfo::isVirtualRegister(Reg))
continue;
if (MO.isDef()) {
const TargetRegisterClass *RC = MRI->getRegClass(Reg);
@@ -459,15 +459,19 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
// duplicate only one, because one branch instruction can be eliminated to
// compensate for the duplication.
unsigned MaxDuplicateCount;
- if (MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
+ if (TailDuplicateSize.getNumOccurrences() == 0 &&
+ MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize))
MaxDuplicateCount = 1;
else
MaxDuplicateCount = TailDuplicateSize;
if (PreRegAlloc) {
- // Pre-regalloc tail duplication hurts compile time and doesn't help
- // much except for indirect branches.
- if (TailBB->empty() || !TailBB->back().getDesc().isIndirectBranch())
+ if (TailBB->empty())
+ return false;
+ const TargetInstrDesc &TID = TailBB->back().getDesc();
+ // Pre-regalloc tail duplication hurts compile time and doesn't help
+ // much except for indirect branches and returns.
+ if (!TID.isIndirectBranch() && !TID.isReturn())
return false;
// If the target has hardware branch prediction that can handle indirect
// branches, duplicating them can often make them predictable when there
@@ -500,9 +504,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
if (!I->isPHI() && !I->isDebugValue())
InstrCount += 1;
}
- // Heuristically, don't tail-duplicate calls if it would expand code size,
- // as it's less likely to be worth the extra cost.
- if (InstrCount > 1 && HasCall)
+ // Don't tail-duplicate calls before register allocation. Calls presents a
+ // barrier to register allocation so duplicating them may end up increasing
+ // spills.
+ if (InstrCount > 1 && (PreRegAlloc && HasCall))
return false;
DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 6e4a0d837ecd..15340a3f1084 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -22,13 +22,18 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
-#include "llvm/CodeGen/PostRAHazardRecognizer.h"
+#include "llvm/CodeGen/ScoreboardHazardRecognizer.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+static cl::opt<bool> DisableHazardRecognizer(
+ "disable-sched-hazard", cl::Hidden, cl::init(false),
+ cl::desc("Disable hazard detection during preRA scheduling"));
+
/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything
/// after it, replacing it with an unconditional branch to NewDest.
void
@@ -135,7 +140,7 @@ bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI,
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isPredicable())
return false;
-
+
for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) {
if (TID.OpInfo[i].isPredicate()) {
MachineOperand &MO = MI->getOperand(i);
@@ -166,8 +171,10 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
MBB.insert(I, MI);
}
-bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
- const MachineInstr *MI1) const {
+bool
+TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
@@ -252,9 +259,9 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI,
const MachineFrameInfo &MFI = *MF.getFrameInfo();
assert(MFI.getObjectOffset(FI) != -1);
MachineMemOperand *MMO =
- MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
- Flags, /*Offset=*/0,
- MFI.getObjectSize(FI),
+ MF.getMachineMemOperand(
+ MachinePointerInfo(PseudoSourceValue::getFixedStack(FI)),
+ Flags, MFI.getObjectSize(FI),
MFI.getObjectAlignment(FI));
NewMI->addMemOperand(MF, MMO);
@@ -329,8 +336,13 @@ isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI,
const TargetInstrDesc &TID = MI->getDesc();
// Avoid instructions obviously unsafe for remat.
- if (TID.hasUnmodeledSideEffects() || TID.isNotDuplicable() ||
- TID.mayStore())
+ if (TID.isNotDuplicable() || TID.mayStore() ||
+ MI->hasUnmodeledSideEffects())
+ return false;
+
+ // Don't remat inline asm. We have no idea how expensive it is
+ // even if it's side effect free.
+ if (MI->isInlineAsm())
return false;
// Avoid instructions which load from potentially varying memory.
@@ -414,8 +426,24 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
return false;
}
+// Provide a global flag for disabling the PreRA hazard recognizer that targets
+// may choose to honor.
+bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const {
+ return !DisableHazardRecognizer;
+}
+
+// Default implementation of CreateTargetRAHazardRecognizer.
+ScheduleHazardRecognizer *TargetInstrInfoImpl::
+CreateTargetHazardRecognizer(const TargetMachine *TM,
+ const ScheduleDAG *DAG) const {
+ // Dummy hazard recognizer allows all instructions to issue.
+ return new ScheduleHazardRecognizer();
+}
+
// Default implementation of CreateTargetPostRAHazardRecognizer.
ScheduleHazardRecognizer *TargetInstrInfoImpl::
-CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
- return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II);
+CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II,
+ const ScheduleDAG *DAG) const {
+ return (ScheduleHazardRecognizer *)
+ new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched");
}
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index f1e10eec724c..0b7bd98cc692 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -29,10 +29,12 @@
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/Dwarf.h"
+#include "llvm/Support/ELF.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/Triple.h"
using namespace llvm;
using namespace dwarf;
@@ -45,81 +47,81 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
TargetLoweringObjectFile::Initialize(Ctx, TM);
BSSSection =
- getContext().getELFSection(".bss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".bss", ELF::SHT_NOBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getBSS());
TextSection =
- getContext().getELFSection(".text", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_EXECINSTR |
- MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".text", ELF::SHT_PROGBITS,
+ ELF::SHF_EXECINSTR |
+ ELF::SHF_ALLOC,
SectionKind::getText());
DataSection =
- getContext().getELFSection(".data", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_WRITE |MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".data", ELF::SHT_PROGBITS,
+ ELF::SHF_WRITE |ELF::SHF_ALLOC,
SectionKind::getDataRel());
ReadOnlySection =
- getContext().getELFSection(".rodata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".rodata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
SectionKind::getReadOnly());
TLSDataSection =
- getContext().getELFSection(".tdata", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
- MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".tdata", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
SectionKind::getThreadData());
TLSBSSSection =
- getContext().getELFSection(".tbss", MCSectionELF::SHT_NOBITS,
- MCSectionELF::SHF_ALLOC | MCSectionELF::SHF_TLS |
- MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".tbss", ELF::SHT_NOBITS,
+ ELF::SHF_ALLOC | ELF::SHF_TLS |
+ ELF::SHF_WRITE,
SectionKind::getThreadBSS());
DataRelSection =
- getContext().getELFSection(".data.rel", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".data.rel", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getDataRel());
DataRelLocalSection =
- getContext().getELFSection(".data.rel.local", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".data.rel.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getDataRelLocal());
DataRelROSection =
- getContext().getELFSection(".data.rel.ro", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".data.rel.ro", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getReadOnlyWithRel());
DataRelROLocalSection =
- getContext().getELFSection(".data.rel.ro.local", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".data.rel.ro.local", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getReadOnlyWithRelLocal());
MergeableConst4Section =
- getContext().getELFSection(".rodata.cst4", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+ getContext().getELFSection(".rodata.cst4", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
SectionKind::getMergeableConst4());
MergeableConst8Section =
- getContext().getELFSection(".rodata.cst8", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+ getContext().getELFSection(".rodata.cst8", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
SectionKind::getMergeableConst8());
MergeableConst16Section =
- getContext().getELFSection(".rodata.cst16", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_MERGE,
+ getContext().getELFSection(".rodata.cst16", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_MERGE,
SectionKind::getMergeableConst16());
StaticCtorSection =
- getContext().getELFSection(".ctors", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".ctors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getDataRel());
StaticDtorSection =
- getContext().getELFSection(".dtors", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
+ getContext().getELFSection(".dtors", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |ELF::SHF_WRITE,
SectionKind::getDataRel());
// Exception Handling Sections.
@@ -129,50 +131,50 @@ void TargetLoweringObjectFileELF::Initialize(MCContext &Ctx,
// runtime hit for C++ apps. Either the contents of the LSDA need to be
// adjusted or this should be a data section.
LSDASection =
- getContext().getELFSection(".gcc_except_table", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC,
+ getContext().getELFSection(".gcc_except_table", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
SectionKind::getReadOnly());
- EHFrameSection =
- getContext().getELFSection(".eh_frame", MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |MCSectionELF::SHF_WRITE,
- SectionKind::getDataRel());
-
// Debug Info Sections.
DwarfAbbrevSection =
- getContext().getELFSection(".debug_abbrev", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_abbrev", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfInfoSection =
- getContext().getELFSection(".debug_info", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_info", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfLineSection =
- getContext().getELFSection(".debug_line", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_line", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfFrameSection =
- getContext().getELFSection(".debug_frame", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_frame", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfPubNamesSection =
- getContext().getELFSection(".debug_pubnames", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_pubnames", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfPubTypesSection =
- getContext().getELFSection(".debug_pubtypes", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfStrSection =
- getContext().getELFSection(".debug_str", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_str", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfLocSection =
- getContext().getELFSection(".debug_loc", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_loc", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfARangesSection =
- getContext().getELFSection(".debug_aranges", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_aranges", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfRangesSection =
- getContext().getELFSection(".debug_ranges", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_ranges", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
DwarfMacroInfoSection =
- getContext().getELFSection(".debug_macinfo", MCSectionELF::SHT_PROGBITS, 0,
+ getContext().getELFSection(".debug_macinfo", ELF::SHT_PROGBITS, 0,
SectionKind::getMetadata());
}
+const MCSection *TargetLoweringObjectFileELF::getEHFrameSection() const {
+ return getContext().getELFSection(".eh_frame", ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+}
static SectionKind
getELFKindForNamedSection(StringRef Name, SectionKind K) {
@@ -208,18 +210,18 @@ getELFKindForNamedSection(StringRef Name, SectionKind K) {
static unsigned getELFSectionType(StringRef Name, SectionKind K) {
if (Name == ".init_array")
- return MCSectionELF::SHT_INIT_ARRAY;
+ return ELF::SHT_INIT_ARRAY;
if (Name == ".fini_array")
- return MCSectionELF::SHT_FINI_ARRAY;
+ return ELF::SHT_FINI_ARRAY;
if (Name == ".preinit_array")
- return MCSectionELF::SHT_PREINIT_ARRAY;
+ return ELF::SHT_PREINIT_ARRAY;
if (K.isBSS() || K.isThreadBSS())
- return MCSectionELF::SHT_NOBITS;
+ return ELF::SHT_NOBITS;
- return MCSectionELF::SHT_PROGBITS;
+ return ELF::SHT_PROGBITS;
}
@@ -228,24 +230,24 @@ getELFSectionFlags(SectionKind K) {
unsigned Flags = 0;
if (!K.isMetadata())
- Flags |= MCSectionELF::SHF_ALLOC;
+ Flags |= ELF::SHF_ALLOC;
if (K.isText())
- Flags |= MCSectionELF::SHF_EXECINSTR;
+ Flags |= ELF::SHF_EXECINSTR;
if (K.isWriteable())
- Flags |= MCSectionELF::SHF_WRITE;
+ Flags |= ELF::SHF_WRITE;
if (K.isThreadLocal())
- Flags |= MCSectionELF::SHF_TLS;
+ Flags |= ELF::SHF_TLS;
// K.isMergeableConst() is left out to honour PR4650
if (K.isMergeableCString() || K.isMergeableConst4() ||
K.isMergeableConst8() || K.isMergeableConst16())
- Flags |= MCSectionELF::SHF_MERGE;
+ Flags |= ELF::SHF_MERGE;
if (K.isMergeableCString())
- Flags |= MCSectionELF::SHF_STRINGS;
+ Flags |= ELF::SHF_STRINGS;
return Flags;
}
@@ -261,23 +263,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
return getContext().getELFSection(SectionName,
getELFSectionType(SectionName, Kind),
- getELFSectionFlags(Kind), Kind, true);
-}
-
-static const char *getSectionPrefixForUniqueGlobal(SectionKind Kind) {
- if (Kind.isText()) return ".gnu.linkonce.t.";
- if (Kind.isReadOnly()) return ".gnu.linkonce.r.";
-
- if (Kind.isThreadData()) return ".gnu.linkonce.td.";
- if (Kind.isThreadBSS()) return ".gnu.linkonce.tb.";
-
- if (Kind.isDataNoRel()) return ".gnu.linkonce.d.";
- if (Kind.isDataRelLocal()) return ".gnu.linkonce.d.rel.local.";
- if (Kind.isDataRel()) return ".gnu.linkonce.d.rel.";
- if (Kind.isReadOnlyWithRelLocal()) return ".gnu.linkonce.d.rel.ro.local.";
-
- assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
- return ".gnu.linkonce.d.rel.ro.";
+ getELFSectionFlags(Kind), Kind);
}
/// getSectionPrefixForGlobal - Return the section prefix name used by options
@@ -307,7 +293,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
bool EmitUniquedSection;
if (Kind.isText())
EmitUniquedSection = TM.getFunctionSections();
- else
+ else
EmitUniquedSection = TM.getDataSections();
// If this global is linkonce/weak and the target handles this by emitting it
@@ -315,19 +301,21 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if ((GV->isWeakForLinker() || EmitUniquedSection) &&
!Kind.isCommon() && !Kind.isBSS()) {
const char *Prefix;
- if (GV->isWeakForLinker())
- Prefix = getSectionPrefixForUniqueGlobal(Kind);
- else {
- assert(EmitUniquedSection);
- Prefix = getSectionPrefixForGlobal(Kind);
- }
+ Prefix = getSectionPrefixForGlobal(Kind);
SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
MCSymbol *Sym = Mang->getSymbol(GV);
Name.append(Sym->getName().begin(), Sym->getName().end());
+ StringRef Group = "";
+ unsigned Flags = getELFSectionFlags(Kind);
+ if (GV->isWeakForLinker()) {
+ Group = Sym->getName();
+ Flags |= ELF::SHF_GROUP;
+ }
+
return getContext().getELFSection(Name.str(),
getELFSectionType(Name.str(), Kind),
- getELFSectionFlags(Kind), Kind);
+ Flags, Kind, 0, Group);
}
if (Kind.isText()) return TextSection;
@@ -352,10 +340,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
std::string Name = SizeSpec + utostr(Align);
- return getContext().getELFSection(Name, MCSectionELF::SHT_PROGBITS,
- MCSectionELF::SHF_ALLOC |
- MCSectionELF::SHF_MERGE |
- MCSectionELF::SHF_STRINGS,
+ return getContext().getELFSection(Name, ELF::SHT_PROGBITS,
+ ELF::SHF_ALLOC |
+ ELF::SHF_MERGE |
+ ELF::SHF_STRINGS,
Kind);
}
@@ -450,7 +438,16 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
IsFunctionEHSymbolGlobal = true;
IsFunctionEHFrameSymbolPrivate = false;
SupportsWeakOmittedEHFrame = false;
-
+
+ Triple T(((LLVMTargetMachine&)TM).getTargetTriple());
+ if (T.getOS() == Triple::Darwin) {
+ unsigned MajNum = T.getDarwinMajorNumber();
+ if (MajNum == 7 || MajNum == 8) // 10.3 Panther, 10.4 Tiger
+ CommDirectiveSupportsAlignment = false;
+ if (MajNum > 9) // 10.6 SnowLeopard
+ IsFunctionEHSymbolGlobal = false;
+ }
+
TargetLoweringObjectFile::Initialize(Ctx, TM);
TextSection // .text
@@ -469,20 +466,20 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
= getContext().getMachOSection("__DATA", "__thread_bss",
MCSectionMachO::S_THREAD_LOCAL_ZEROFILL,
SectionKind::getThreadBSS());
-
+
// TODO: Verify datarel below.
TLSTLVSection // .tlv
= getContext().getMachOSection("__DATA", "__thread_vars",
MCSectionMachO::S_THREAD_LOCAL_VARIABLES,
SectionKind::getDataRel());
-
+
TLSThreadInitSection
= getContext().getMachOSection("__DATA", "__thread_init",
MCSectionMachO::S_THREAD_LOCAL_INIT_FUNCTION_POINTERS,
SectionKind::getDataRel());
-
+
CStringSection // .cstring
- = getContext().getMachOSection("__TEXT", "__cstring",
+ = getContext().getMachOSection("__TEXT", "__cstring",
MCSectionMachO::S_CSTRING_LITERALS,
SectionKind::getMergeable1ByteCString());
UStringSection
@@ -493,7 +490,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
MCSectionMachO::S_4BYTE_LITERALS,
SectionKind::getMergeableConst4());
EightByteConstantSection // .literal8
- = getContext().getMachOSection("__TEXT", "__literal8",
+ = getContext().getMachOSection("__TEXT", "__literal8",
MCSectionMachO::S_8BYTE_LITERALS,
SectionKind::getMergeableConst8());
@@ -517,14 +514,14 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
SectionKind::getText());
ConstTextCoalSection
- = getContext().getMachOSection("__TEXT", "__const_coal",
+ = getContext().getMachOSection("__TEXT", "__const_coal",
MCSectionMachO::S_COALESCED,
SectionKind::getReadOnly());
ConstDataSection // .const_data
= getContext().getMachOSection("__DATA", "__const", 0,
SectionKind::getReadOnlyWithRel());
DataCoalSection
- = getContext().getMachOSection("__DATA","__datacoal_nt",
+ = getContext().getMachOSection("__DATA","__datacoal_nt",
MCSectionMachO::S_COALESCED,
SectionKind::getDataRel());
DataCommonSection
@@ -534,7 +531,7 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
DataBSSSection
= getContext().getMachOSection("__DATA","__bss", MCSectionMachO::S_ZEROFILL,
SectionKind::getBSS());
-
+
LazySymbolPointerSection
= getContext().getMachOSection("__DATA", "__la_symbol_ptr",
@@ -566,17 +563,9 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
// Exception Handling.
LSDASection = getContext().getMachOSection("__TEXT", "__gcc_except_tab", 0,
SectionKind::getReadOnlyWithRel());
- EHFrameSection =
- getContext().getMachOSection("__TEXT", "__eh_frame",
- MCSectionMachO::S_COALESCED |
- MCSectionMachO::S_ATTR_NO_TOC |
- MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
- MCSectionMachO::S_ATTR_LIVE_SUPPORT,
- SectionKind::getReadOnly());
-
// Debug Information.
DwarfAbbrevSection =
- getContext().getMachOSection("__DWARF", "__debug_abbrev",
+ getContext().getMachOSection("__DWARF", "__debug_abbrev",
MCSectionMachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
DwarfInfoSection =
@@ -623,10 +612,19 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
getContext().getMachOSection("__DWARF", "__debug_inlined",
MCSectionMachO::S_ATTR_DEBUG,
SectionKind::getMetadata());
-
+
TLSExtraDataSection = TLSTLVSection;
}
+const MCSection *TargetLoweringObjectFileMachO::getEHFrameSection() const {
+ return getContext().getMachOSection("__TEXT", "__eh_frame",
+ MCSectionMachO::S_COALESCED |
+ MCSectionMachO::S_ATTR_NO_TOC |
+ MCSectionMachO::S_ATTR_STRIP_STATIC_SYMS |
+ MCSectionMachO::S_ATTR_LIVE_SUPPORT,
+ SectionKind::getReadOnly());
+}
+
const MCSection *TargetLoweringObjectFileMachO::
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
@@ -665,7 +663,7 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
const MCSection *TargetLoweringObjectFileMachO::
SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
Mangler *Mang, const TargetMachine &TM) const {
-
+
// Handle thread local data.
if (Kind.isThreadBSS()) return TLSBSSSection;
if (Kind.isThreadData()) return TLSDataSection;
@@ -685,7 +683,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
if (Kind.isMergeable1ByteCString() &&
TM.getTargetData()->getPreferredAlignment(cast<GlobalVariable>(GV)) < 32)
return CStringSection;
-
+
// Do not put 16-bit arrays in the UString section if they have an
// externally visible label, this runs into issues with certain linker
// versions.
@@ -721,7 +719,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
// with the .zerofill directive (aka .lcomm).
if (Kind.isBSSLocal())
return DataBSSSection;
-
+
// Otherwise, just drop the variable in the normal data section.
return DataSection;
}
@@ -858,13 +856,6 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
COFF::IMAGE_SCN_MEM_READ,
SectionKind::getReadOnly());
- EHFrameSection =
- getContext().getCOFFSection(".eh_frame",
- COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
- COFF::IMAGE_SCN_MEM_READ |
- COFF::IMAGE_SCN_MEM_WRITE,
- SectionKind::getDataRel());
-
// Debug info.
DwarfAbbrevSection =
getContext().getCOFFSection(".debug_abbrev",
@@ -928,6 +919,15 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx,
SectionKind::getMetadata());
}
+const MCSection *TargetLoweringObjectFileCOFF::getEHFrameSection() const {
+ return getContext().getCOFFSection(".eh_frame",
+ COFF::IMAGE_SCN_CNT_INITIALIZED_DATA |
+ COFF::IMAGE_SCN_MEM_READ |
+ COFF::IMAGE_SCN_MEM_WRITE,
+ SectionKind::getDataRel());
+}
+
+
static unsigned
getCOFFSectionFlags(SectionKind K) {
unsigned Flags = 0;
@@ -938,6 +938,7 @@ getCOFFSectionFlags(SectionKind K) {
else if (K.isText())
Flags |=
COFF::IMAGE_SCN_MEM_EXECUTE |
+ COFF::IMAGE_SCN_MEM_READ |
COFF::IMAGE_SCN_CNT_CODE;
else if (K.isBSS ())
Flags |=
@@ -967,12 +968,12 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) {
if (Kind.isText())
- return ".text$linkonce";
+ return ".text$";
if (Kind.isBSS ())
- return ".bss$linkonce";
+ return ".bss$";
if (Kind.isWriteable())
- return ".data$linkonce";
- return ".rdata$linkonce";
+ return ".data$";
+ return ".rdata$";
}
@@ -987,14 +988,14 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind);
SmallString<128> Name(Prefix, Prefix+strlen(Prefix));
MCSymbol *Sym = Mang->getSymbol(GV);
- Name.append(Sym->getName().begin(), Sym->getName().end());
+ Name.append(Sym->getName().begin() + 1, Sym->getName().end());
unsigned Characteristics = getCOFFSectionFlags(Kind);
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
return getContext().getCOFFSection(Name.str(), Characteristics,
- COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind);
+ COFF::IMAGE_COMDAT_SELECT_ANY, Kind);
}
if (Kind.isText())
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 78989c567e42..b3120b8be1ab 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -110,7 +110,7 @@ namespace {
bool ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
- unsigned RegB, unsigned Dist);
+ unsigned RegA, unsigned RegB, unsigned Dist);
typedef std::pair<std::pair<unsigned, bool>, MachineInstr*> NewKill;
bool canUpdateDeletedKills(SmallVector<unsigned, 4> &Kills,
@@ -138,7 +138,9 @@ namespace {
public:
static char ID; // Pass identification, replacement for typeid
- TwoAddressInstructionPass() : MachineFunctionPass(ID) {}
+ TwoAddressInstructionPass() : MachineFunctionPass(ID) {
+ initializeTwoAddressInstructionPassPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
@@ -146,10 +148,7 @@ namespace {
AU.addPreserved<LiveVariables>();
AU.addPreservedID(MachineLoopInfoID);
AU.addPreservedID(MachineDominatorsID);
- if (StrongPHIElim)
- AU.addPreservedID(StrongPHIEliminationID);
- else
- AU.addPreservedID(PHIEliminationID);
+ AU.addPreservedID(PHIEliminationID);
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -159,8 +158,11 @@ namespace {
}
char TwoAddressInstructionPass::ID = 0;
-INITIALIZE_PASS(TwoAddressInstructionPass, "twoaddressinstruction",
- "Two-Address instruction pass", false, false);
+INITIALIZE_PASS_BEGIN(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
+INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
+INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction",
+ "Two-Address instruction pass", false, false)
char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID;
@@ -548,8 +550,9 @@ TwoAddressInstructionPass::isProfitableToCommute(unsigned regB, unsigned regC,
unsigned FromRegC = getMappedReg(regC, SrcRegMap);
unsigned ToRegB = getMappedReg(regB, DstRegMap);
unsigned ToRegC = getMappedReg(regC, DstRegMap);
- if (!regsAreCompatible(FromRegB, ToRegB, TRI) &&
- (regsAreCompatible(FromRegB, ToRegC, TRI) ||
+ if ((FromRegB && ToRegB && !regsAreCompatible(FromRegB, ToRegB, TRI)) &&
+ ((!FromRegC && !ToRegC) ||
+ regsAreCompatible(FromRegB, ToRegC, TRI) ||
regsAreCompatible(FromRegC, ToRegB, TRI)))
return true;
@@ -630,7 +633,8 @@ bool
TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
MachineBasicBlock::iterator &nmi,
MachineFunction::iterator &mbbi,
- unsigned RegB, unsigned Dist) {
+ unsigned RegA, unsigned RegB,
+ unsigned Dist) {
MachineInstr *NewMI = TII->convertToThreeAddress(mbbi, mi, LV);
if (NewMI) {
DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
@@ -650,6 +654,10 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
mi = NewMI;
nmi = llvm::next(mi);
}
+
+ // Update source and destination register maps.
+ SrcRegMap.erase(RegA);
+ DstRegMap.erase(RegB);
return true;
}
@@ -740,7 +748,7 @@ static bool isSafeToDelete(MachineInstr *MI,
const TargetInstrDesc &TID = MI->getDesc();
if (TID.mayStore() || TID.isCall())
return false;
- if (TID.isTerminator() || TID.hasUnmodeledSideEffects())
+ if (TID.isTerminator() || MI->hasUnmodeledSideEffects())
return false;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@@ -884,7 +892,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
// three-address instruction. Check if it is profitable.
if (!regBKilled || isProfitableToConv3Addr(regA)) {
// Try to convert it.
- if (ConvertInstTo3Addr(mi, nmi, mbbi, regB, Dist)) {
+ if (ConvertInstTo3Addr(mi, nmi, mbbi, regA, regB, Dist)) {
++NumConvertedTo3Addr;
return true; // Done with this instruction.
}
@@ -951,7 +959,7 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi,
if (LV) {
for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
MachineOperand &MO = mi->getOperand(i);
- if (MO.isReg() && MO.getReg() != 0 &&
+ if (MO.isReg() &&
TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
if (MO.isUse()) {
if (MO.isKill()) {
@@ -1013,8 +1021,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
<< MF.getFunction()->getName() << '\n');
// ReMatRegs - Keep track of the registers whose def's are remat'ed.
- BitVector ReMatRegs;
- ReMatRegs.resize(MRI->getLastVirtReg()+1);
+ BitVector ReMatRegs(MRI->getNumVirtRegs());
typedef DenseMap<unsigned, SmallVector<std::pair<unsigned, unsigned>, 4> >
TiedOperandMap;
@@ -1143,7 +1150,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI);
- ReMatRegs.set(regB);
+ ReMatRegs.set(TargetRegisterInfo::virtReg2Index(regB));
++NumReMats;
} else {
BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY),
@@ -1229,13 +1236,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
}
// Some remat'ed instructions are dead.
- int VReg = ReMatRegs.find_first();
- while (VReg != -1) {
+ for (int i = ReMatRegs.find_first(); i != -1; i = ReMatRegs.find_next(i)) {
+ unsigned VReg = TargetRegisterInfo::index2VirtReg(i);
if (MRI->use_nodbg_empty(VReg)) {
MachineInstr *DefMI = MRI->getVRegDef(VReg);
DefMI->eraseFromParent();
}
- VReg = ReMatRegs.find_next(VReg);
}
// Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve
@@ -1346,7 +1352,6 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector<unsigned,4> &Srcs,
continue;
// Insert a copy to replace the original.
- MachineBasicBlock::iterator InsertLoc = SomeMI;
MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI,
SomeMI->getDebugLoc(),
TII->get(TargetOpcode::COPY))
@@ -1412,6 +1417,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
SmallSet<unsigned, 4> Seen;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
+ unsigned SubIdx = MI->getOperand(i+1).getImm();
if (MI->getOperand(i).getSubReg() ||
TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI);
@@ -1431,7 +1437,9 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
bool isKill = MI->getOperand(i).isKill();
if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() ||
- !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) {
+ !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) ||
+ !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg),
+ MRI->getRegClass(SrcReg), SubIdx)) {
// REG_SEQUENCE cannot have duplicated operands, add a copy.
// Also add an copy if the source is live-in the block. We don't want
// to end up with a partial-redef of a livein, e.g.
@@ -1460,7 +1468,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
MachineBasicBlock::iterator InsertLoc = MI;
MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc,
MI->getDebugLoc(), TII->get(TargetOpcode::COPY))
- .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm())
+ .addReg(DstReg, RegState::Define, SubIdx)
.addReg(SrcReg, getKillRegState(isKill));
MI->getOperand(i).setReg(0);
if (LV && isKill)
diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp
index 6dd333358bc4..48d8ab1658da 100644
--- a/lib/CodeGen/UnreachableBlockElim.cpp
+++ b/lib/CodeGen/UnreachableBlockElim.cpp
@@ -26,6 +26,7 @@
#include "llvm/Function.h"
#include "llvm/Pass.h"
#include "llvm/Type.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -43,16 +44,19 @@ namespace {
virtual bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- UnreachableBlockElim() : FunctionPass(ID) {}
+ UnreachableBlockElim() : FunctionPass(ID) {
+ initializeUnreachableBlockElimPass(*PassRegistry::getPassRegistry());
+ }
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addPreserved<DominatorTree>();
AU.addPreserved<ProfileInfo>();
}
};
}
char UnreachableBlockElim::ID = 0;
INITIALIZE_PASS(UnreachableBlockElim, "unreachableblockelim",
- "Remove unreachable blocks from the CFG", false, false);
+ "Remove unreachable blocks from the CFG", false, false)
FunctionPass *llvm::createUnreachableBlockEliminationPass() {
return new UnreachableBlockElim();
@@ -106,7 +110,7 @@ namespace {
char UnreachableMachineBlockElim::ID = 0;
INITIALIZE_PASS(UnreachableMachineBlockElim, "unreachable-mbb-elimination",
- "Remove unreachable machine basic blocks", false, false);
+ "Remove unreachable machine basic blocks", false, false)
char &llvm::UnreachableMachineBlockElimID = UnreachableMachineBlockElim::ID;
@@ -118,6 +122,7 @@ void UnreachableMachineBlockElim::getAnalysisUsage(AnalysisUsage &AU) const {
bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
SmallPtrSet<MachineBasicBlock*, 8> Reachable;
+ bool ModifiedPHI = false;
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
MachineDominatorTree *MDT = getAnalysisIfAvailable<MachineDominatorTree>();
@@ -179,6 +184,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
if (!preds.count(phi->getOperand(i).getMBB())) {
phi->RemoveOperand(i);
phi->RemoveOperand(i-1);
+ ModifiedPHI = true;
}
if (phi->getNumOperands() == 3) {
@@ -188,6 +194,7 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
MachineInstr* temp = phi;
++phi;
temp->eraseFromParent();
+ ModifiedPHI = true;
if (Input != Output)
F.getRegInfo().replaceRegWith(Output, Input);
@@ -201,5 +208,5 @@ bool UnreachableMachineBlockElim::runOnMachineFunction(MachineFunction &F) {
F.RenumberBlocks();
- return DeadBlocks.size();
+ return (DeadBlocks.size() || ModifiedPHI);
}
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 20ffcffa70d3..734b87e62f62 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -24,6 +24,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
@@ -48,7 +49,7 @@ STATISTIC(NumSpills , "Number of register spills");
char VirtRegMap::ID = 0;
-INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false);
+INITIALIZE_PASS(VirtRegMap, "virtregmap", "Virtual Register Map", false, false)
bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
MRI = &mf.getRegInfo();
@@ -74,8 +75,7 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
EmergencySpillSlots.clear();
SpillSlotToUsesMap.resize(8);
- ImplicitDefed.resize(MF->getRegInfo().getLastVirtReg()+1-
- TargetRegisterInfo::FirstVirtualRegister);
+ ImplicitDefed.resize(MF->getRegInfo().getNumVirtRegs());
allocatableRCRegs.clear();
for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
@@ -89,24 +89,37 @@ bool VirtRegMap::runOnMachineFunction(MachineFunction &mf) {
}
void VirtRegMap::grow() {
- unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg();
- Virt2PhysMap.grow(LastVirtReg);
- Virt2StackSlotMap.grow(LastVirtReg);
- Virt2ReMatIdMap.grow(LastVirtReg);
- Virt2SplitMap.grow(LastVirtReg);
- Virt2SplitKillMap.grow(LastVirtReg);
- ReMatMap.grow(LastVirtReg);
- ImplicitDefed.resize(LastVirtReg-TargetRegisterInfo::FirstVirtualRegister+1);
+ unsigned NumRegs = MF->getRegInfo().getNumVirtRegs();
+ Virt2PhysMap.resize(NumRegs);
+ Virt2StackSlotMap.resize(NumRegs);
+ Virt2ReMatIdMap.resize(NumRegs);
+ Virt2SplitMap.resize(NumRegs);
+ Virt2SplitKillMap.resize(NumRegs);
+ ReMatMap.resize(NumRegs);
+ ImplicitDefed.resize(NumRegs);
+}
+
+unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) {
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
+ if (LowSpillSlot == NO_STACK_SLOT)
+ LowSpillSlot = SS;
+ if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
+ HighSpillSlot = SS;
+ assert(SS >= LowSpillSlot && "Unexpected low spill slot");
+ unsigned Idx = SS-LowSpillSlot;
+ while (Idx >= SpillSlotToUsesMap.size())
+ SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
+ return SS;
}
unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) {
std::pair<unsigned, unsigned> Hint = MRI->getRegAllocationHint(virtReg);
unsigned physReg = Hint.second;
- if (physReg &&
- TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
+ if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg))
physReg = getPhys(physReg);
if (Hint.first == 0)
- return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg))
+ return (TargetRegisterInfo::isPhysicalRegister(physReg))
? physReg : 0;
return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF);
}
@@ -116,18 +129,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
- int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
- if (LowSpillSlot == NO_STACK_SLOT)
- LowSpillSlot = SS;
- if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
- HighSpillSlot = SS;
- unsigned Idx = SS-LowSpillSlot;
- while (Idx >= SpillSlotToUsesMap.size())
- SpillSlotToUsesMap.resize(SpillSlotToUsesMap.size()*2);
- Virt2StackSlotMap[virtReg] = SS;
++NumSpills;
- return SS;
+ return Virt2StackSlotMap[virtReg] = createSpillSlot(RC);
}
void VirtRegMap::assignVirt2StackSlot(unsigned virtReg, int SS) {
@@ -160,14 +163,7 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
EmergencySpillSlots.find(RC);
if (I != EmergencySpillSlots.end())
return I->second;
- int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
- RC->getAlignment());
- if (LowSpillSlot == NO_STACK_SLOT)
- LowSpillSlot = SS;
- if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
- HighSpillSlot = SS;
- EmergencySpillSlots[RC] = SS;
- return SS;
+ return EmergencySpillSlots[RC] = createSpillSlot(RC);
}
void VirtRegMap::addSpillSlotUse(int FI, MachineInstr *MI) {
@@ -232,10 +228,11 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
UnusedRegs.resize(NumRegs);
BitVector Used(NumRegs);
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
- if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
- Used.set(Virt2PhysMap[i]);
+ for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG)
+ Used.set(Virt2PhysMap[Reg]);
+ }
BitVector Allocatable = TRI->getAllocatableSet(*MF);
bool AnyUnused = false;
@@ -258,23 +255,97 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
return AnyUnused;
}
+void VirtRegMap::rewrite(SlotIndexes *Indexes) {
+ DEBUG(dbgs() << "********** REWRITE VIRTUAL REGISTERS **********\n"
+ << "********** Function: "
+ << MF->getFunction()->getName() << '\n');
+
+ SmallVector<unsigned, 8> SuperKills;
+
+ for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end();
+ MBBI != MBBE; ++MBBI) {
+ DEBUG(MBBI->print(dbgs(), Indexes));
+ for (MachineBasicBlock::iterator MII = MBBI->begin(), MIE = MBBI->end();
+ MII != MIE;) {
+ MachineInstr *MI = MII;
+ ++MII;
+
+ for (MachineInstr::mop_iterator MOI = MI->operands_begin(),
+ MOE = MI->operands_end(); MOI != MOE; ++MOI) {
+ MachineOperand &MO = *MOI;
+ if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue;
+ unsigned VirtReg = MO.getReg();
+ unsigned PhysReg = getPhys(VirtReg);
+ assert(PhysReg != NO_PHYS_REG && "Instruction uses unmapped VirtReg");
+
+ // Preserve semantics of sub-register operands.
+ if (MO.getSubReg()) {
+ // A virtual register kill refers to the whole register, so we may
+ // have to add <imp-use,kill> operands for the super-register.
+ if (MO.isUse() && MO.isKill() && !MO.isUndef())
+ SuperKills.push_back(PhysReg);
+
+ // We don't have to deal with sub-register defs because
+ // LiveIntervalAnalysis already added the necessary <imp-def>
+ // operands.
+
+ // PhysReg operands cannot have subregister indexes.
+ PhysReg = TRI->getSubReg(PhysReg, MO.getSubReg());
+ assert(PhysReg && "Invalid SubReg for physical register");
+ MO.setSubReg(0);
+ }
+ // Rewrite. Note we could have used MachineOperand::substPhysReg(), but
+ // we need the inlining here.
+ MO.setReg(PhysReg);
+ }
+
+ // Add any missing super-register kills after rewriting the whole
+ // instruction.
+ while (!SuperKills.empty())
+ MI->addRegisterKilled(SuperKills.pop_back_val(), TRI, true);
+
+ DEBUG(dbgs() << "> " << *MI);
+
+ // Finally, remove any identity copies.
+ if (MI->isIdentityCopy()) {
+ DEBUG(dbgs() << "Deleting identity copy.\n");
+ RemoveMachineInstrFromMaps(MI);
+ if (Indexes)
+ Indexes->removeMachineInstrFromMaps(MI);
+ // It's safe to erase MI because MII has already been incremented.
+ MI->eraseFromParent();
+ }
+ }
+ }
+
+ // Tell MRI about physical registers in use.
+ for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg)
+ if (!MRI->reg_nodbg_empty(Reg))
+ MRI->setPhysRegUsed(Reg);
+}
+
void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
const MachineRegisterInfo &MRI = MF->getRegInfo();
OS << "********** REGISTER MAP **********\n";
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
- if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
- OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
- << "] " << MRI.getRegClass(i)->getName() << "\n";
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2PhysMap[Reg] != (unsigned)VirtRegMap::NO_PHYS_REG) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> "
+ << PrintReg(Virt2PhysMap[Reg], TRI) << "] "
+ << MRI.getRegClass(Reg)->getName() << "\n";
+ }
}
- for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
- e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
- if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
- OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i]
- << "] " << MRI.getRegClass(i)->getName() << "\n";
+ for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) {
+ unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
+ if (Virt2StackSlotMap[Reg] != VirtRegMap::NO_STACK_SLOT) {
+ OS << '[' << PrintReg(Reg, TRI) << " -> fi#" << Virt2StackSlotMap[Reg]
+ << "] " << MRI.getRegClass(Reg)->getName() << "\n";
+ }
+ }
OS << '\n';
}
diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h
index 8b6082d18193..ba50f4e42302 100644
--- a/lib/CodeGen/VirtRegMap.h
+++ b/lib/CodeGen/VirtRegMap.h
@@ -35,6 +35,7 @@ namespace llvm {
class TargetInstrInfo;
class TargetRegisterInfo;
class raw_ostream;
+ class SlotIndexes;
class VirtRegMap : public MachineFunctionPass {
public:
@@ -80,7 +81,7 @@ namespace llvm {
/// Virt2SplitKillMap - This is splitted virtual register to its last use
/// (kill) index mapping.
- IndexedMap<SlotIndex> Virt2SplitKillMap;
+ IndexedMap<SlotIndex, VirtReg2IndexFunctor> Virt2SplitKillMap;
/// ReMatMap - This is virtual register to re-materialized instruction
/// mapping. Each virtual register whose definition is going to be
@@ -134,6 +135,9 @@ namespace llvm {
/// UnusedRegs - A list of physical registers that have not been used.
BitVector UnusedRegs;
+ /// createSpillSlot - Allocate a spill slot for RC from MFI.
+ unsigned createSpillSlot(const TargetRegisterClass *RC);
+
VirtRegMap(const VirtRegMap&); // DO NOT IMPLEMENT
void operator=(const VirtRegMap&); // DO NOT IMPLEMENT
@@ -153,10 +157,13 @@ namespace llvm {
}
MachineFunction &getMachineFunction() const {
- assert(MF && "getMachineFunction called before runOnMAchineFunction");
+ assert(MF && "getMachineFunction called before runOnMachineFunction");
return *MF;
}
+ MachineRegisterInfo &getRegInfo() const { return *MRI; }
+ const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; }
+
void grow();
/// @brief returns true if the specified virtual register is
@@ -207,10 +214,19 @@ namespace llvm {
}
/// @brief returns the live interval virtReg is split from.
- unsigned getPreSplitReg(unsigned virtReg) {
+ unsigned getPreSplitReg(unsigned virtReg) const {
return Virt2SplitMap[virtReg];
}
+ /// getOriginal - Return the original virtual register that VirtReg descends
+ /// from through splitting.
+ /// A register that was not created by splitting is its own original.
+ /// This operation is idempotent.
+ unsigned getOriginal(unsigned VirtReg) const {
+ unsigned Orig = getPreSplitReg(VirtReg);
+ return Orig ? Orig : VirtReg;
+ }
+
/// @brief returns true if the specified virtual register is not
/// mapped to a stack slot or rematerialized.
bool isAssignedReg(unsigned virtReg) const {
@@ -426,12 +442,12 @@ namespace llvm {
/// @brief Mark the specified register as being implicitly defined.
void setIsImplicitlyDefined(unsigned VirtReg) {
- ImplicitDefed.set(VirtReg-TargetRegisterInfo::FirstVirtualRegister);
+ ImplicitDefed.set(TargetRegisterInfo::virtReg2Index(VirtReg));
}
/// @brief Returns true if the virtual register is implicitly defined.
bool isImplicitlyDefined(unsigned VirtReg) const {
- return ImplicitDefed[VirtReg-TargetRegisterInfo::FirstVirtualRegister];
+ return ImplicitDefed[TargetRegisterInfo::virtReg2Index(VirtReg)];
}
/// @brief Updates information about the specified virtual register's value
@@ -487,6 +503,13 @@ namespace llvm {
return 0;
}
+ /// rewrite - Rewrite all instructions in MF to use only physical registers
+ /// by mapping all virtual register operands to their assigned physical
+ /// registers.
+ ///
+ /// @param Indexes Optionally remove deleted instructions from indexes.
+ void rewrite(SlotIndexes *Indexes);
+
void print(raw_ostream &OS, const Module* M = 0) const;
void dump() const;
};
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 240d28cf3011..458a2134bf4a 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -22,8 +22,8 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/Statistic.h"
-#include <algorithm>
using namespace llvm;
STATISTIC(NumDSE , "Number of dead stores elided");
@@ -216,7 +216,8 @@ public:
<< SlotOrReMat-VirtRegMap::MAX_STACK_SLOT-1);
else
DEBUG(dbgs() << "Remembering SS#" << SlotOrReMat);
- DEBUG(dbgs() << " in physreg " << TRI->getName(Reg) << "\n");
+ DEBUG(dbgs() << " in physreg " << TRI->getName(Reg)
+ << (CanClobber ? " canclobber" : "") << "\n");
}
/// canClobberPhysRegForSS - Return true if the spiller is allowed to change
@@ -297,7 +298,7 @@ ComputeReloadLoc(MachineBasicBlock::iterator const InsertLoc,
const TargetLowering *TL = MF.getTarget().getTargetLowering();
if (!TL->isTypeLegal(TL->getPointerTy()))
- // Believe it or not, this is true on PIC16.
+ // Believe it or not, this is true on 16-bit targets like PIC16.
return InsertLoc;
const TargetRegisterClass *ptrRegClass =
@@ -462,25 +463,70 @@ static void findSinglePredSuccessor(MachineBasicBlock *MBB,
}
}
-/// InvalidateKill - Invalidate register kill information for a specific
-/// register. This also unsets the kills marker on the last kill operand.
-static void InvalidateKill(unsigned Reg,
- const TargetRegisterInfo* TRI,
- BitVector &RegKills,
- std::vector<MachineOperand*> &KillOps) {
- if (RegKills[Reg]) {
- KillOps[Reg]->setIsKill(false);
- // KillOps[Reg] might be a def of a super-register.
- unsigned KReg = KillOps[Reg]->getReg();
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
- for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
- if (RegKills[*SR]) {
- KillOps[*SR]->setIsKill(false);
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
- }
+/// ResurrectConfirmedKill - Helper for ResurrectKill. This register is killed
+/// but not re-defined and it's being reused. Remove the kill flag for the
+/// register and unset the kill's marker and last kill operand.
+static void ResurrectConfirmedKill(unsigned Reg, const TargetRegisterInfo* TRI,
+ BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ DEBUG(dbgs() << "Resurrect " << TRI->getName(Reg) << "\n");
+
+ MachineOperand *KillOp = KillOps[Reg];
+ KillOp->setIsKill(false);
+ // KillOps[Reg] might be a def of a super-register.
+ unsigned KReg = KillOp->getReg();
+ if (!RegKills[KReg])
+ return;
+
+ assert(KillOps[KReg] == KillOp && "invalid superreg kill flags");
+ KillOps[KReg] = NULL;
+ RegKills.reset(KReg);
+
+ // If it's a def of a super-register. Its other sub-regsters are no
+ // longer killed as well.
+ for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
+ DEBUG(dbgs() << " Resurrect subreg " << TRI->getName(*SR) << "\n");
+
+ assert(KillOps[*SR] == KillOp && "invalid subreg kill flags");
+ KillOps[*SR] = NULL;
+ RegKills.reset(*SR);
+ }
+}
+
+/// ResurrectKill - Invalidate kill info associated with a previous MI. An
+/// optimization may have decided that it's safe to reuse a previously killed
+/// register. If we fail to erase the invalid kill flags, then the register
+/// scavenger may later clobber the register used by this MI. Note that this
+/// must be done even if this MI is being deleted! Consider:
+///
+/// USE $r1 (vreg1) <kill>
+/// ...
+/// $r1(vreg3) = COPY $r1 (vreg2)
+///
+/// RegAlloc has smartly assigned all three vregs to the same physreg. Initially
+/// vreg1's only use is a kill. The rewriter doesn't know it should be live
+/// until it rewrites vreg2. At that points it sees that the copy is dead and
+/// deletes it. However, deleting the copy implicitly forwards liveness of $r1
+/// (it's copy coalescing). We must resurrect $r1 by removing the kill flag at
+/// vreg1 before deleting the copy.
+static void ResurrectKill(MachineInstr &MI, unsigned Reg,
+ const TargetRegisterInfo* TRI, BitVector &RegKills,
+ std::vector<MachineOperand*> &KillOps) {
+ if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
+ ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
+ return;
+ }
+ // No previous kill for this reg. Check for subreg kills as well.
+ // d4 =
+ // store d4, fi#0
+ // ...
+ // = s8<kill>
+ // ...
+ // = d4 <avoiding reload>
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ unsigned SReg = *SR;
+ if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI)
+ ResurrectConfirmedKill(SReg, TRI, RegKills, KillOps);
}
}
@@ -502,15 +548,22 @@ static void InvalidateKills(MachineInstr &MI,
KillRegs->push_back(Reg);
assert(Reg < KillOps.size());
if (KillOps[Reg] == &MO) {
+ // This operand was the kill, now no longer.
KillOps[Reg] = NULL;
RegKills.reset(Reg);
for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
if (RegKills[*SR]) {
+ assert(KillOps[*SR] == &MO && "bad subreg kill flags");
KillOps[*SR] = NULL;
RegKills.reset(*SR);
}
}
}
+ else {
+ // This operand may have reused a previously killed reg. Keep it live in
+ // case it continues to be used after erasing this instruction.
+ ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
+ }
}
}
@@ -578,44 +631,8 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
if (Reg == 0)
continue;
- if (RegKills[Reg] && KillOps[Reg]->getParent() != &MI) {
- // That can't be right. Register is killed but not re-defined and it's
- // being reused. Let's fix that.
- KillOps[Reg]->setIsKill(false);
- // KillOps[Reg] might be a def of a super-register.
- unsigned KReg = KillOps[Reg]->getReg();
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
-
- // Must be a def of a super-register. Its other sub-regsters are no
- // longer killed as well.
- for (const unsigned *SR = TRI->getSubRegisters(KReg); *SR; ++SR) {
- KillOps[*SR] = NULL;
- RegKills.reset(*SR);
- }
- } else {
- // Check for subreg kills as well.
- // d4 =
- // store d4, fi#0
- // ...
- // = s8<kill>
- // ...
- // = d4 <avoiding reload>
- for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
- unsigned SReg = *SR;
- if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
- KillOps[SReg]->setIsKill(false);
- unsigned KReg = KillOps[SReg]->getReg();
- KillOps[KReg] = NULL;
- RegKills.reset(KReg);
-
- for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
- KillOps[*SSR] = NULL;
- RegKills.reset(*SSR);
- }
- }
- }
- }
+ // This operand may have reused a previously killed reg. Keep it live.
+ ResurrectKill(MI, Reg, TRI, RegKills, KillOps);
if (MO.isKill()) {
RegKills.set(Reg);
@@ -770,7 +787,8 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
NotAvailable.insert(Reg);
else {
MBB.addLiveIn(Reg);
- InvalidateKill(Reg, TRI, RegKills, KillOps);
+ if (RegKills[Reg])
+ ResurrectConfirmedKill(Reg, TRI, RegKills, KillOps);
}
// Skip over the same register.
@@ -1056,6 +1074,7 @@ class LocalRewriter : public VirtRegRewriter {
const TargetRegisterInfo *TRI;
const TargetInstrInfo *TII;
VirtRegMap *VRM;
+ LiveIntervals *LIs;
BitVector AllocatableRegs;
DenseMap<MachineInstr*, unsigned> DistanceMap;
DenseMap<int, SmallVector<MachineInstr*,4> > Slot2DbgValues;
@@ -1068,6 +1087,11 @@ public:
LiveIntervals* LIs);
private:
+ void EraseInstr(MachineInstr *MI) {
+ VRM->RemoveMachineInstrFromMaps(MI);
+ LIs->RemoveMachineInstrFromMaps(MI);
+ MI->eraseFromParent();
+ }
bool OptimizeByUnfold2(unsigned VirtReg, int SS,
MachineBasicBlock::iterator &MII,
@@ -1110,6 +1134,12 @@ private:
bool InsertSpills(MachineInstr *MI);
+ void ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ ReuseInfo &ReusedOperands,
+ std::vector<MachineOperand*> &KillOps);
+
void RewriteMBB(LiveIntervals *LIs,
AvailableSpills &Spills, BitVector &RegKills,
std::vector<MachineOperand*> &KillOps);
@@ -1117,17 +1147,18 @@ private:
}
bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
- LiveIntervals* LIs) {
+ LiveIntervals* lis) {
MRI = &MF.getRegInfo();
TRI = MF.getTarget().getRegisterInfo();
TII = MF.getTarget().getInstrInfo();
VRM = &vrm;
+ LIs = lis;
AllocatableRegs = TRI->getAllocatableSet(MF);
DEBUG(dbgs() << "\n**** Local spiller rewriting function '"
<< MF.getFunction()->getName() << "':\n");
DEBUG(dbgs() << "**** Machine Instrs (NOTE! Does not include spills and"
" reloads!) ****\n");
- DEBUG(MF.dump());
+ DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
// Spills - Keep track of which spilled values are available in physregs
// so that we can choose to reuse the physregs instead of emitting
@@ -1178,7 +1209,7 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
}
DEBUG(dbgs() << "**** Post Machine Instrs ****\n");
- DEBUG(MF.dump());
+ DEBUG(MF.print(dbgs(), LIs->getSlotIndexes()));
// Mark unused spill slots.
MachineFrameInfo *MFI = MF.getFrameInfo();
@@ -1190,10 +1221,8 @@ bool LocalRewriter::runOnMachineFunction(MachineFunction &MF, VirtRegMap &vrm,
MFI->RemoveStackObject(SS);
for (unsigned j = 0, ee = DbgValues.size(); j != ee; ++j) {
MachineInstr *DVMI = DbgValues[j];
- MachineBasicBlock *DVMBB = DVMI->getParent();
DEBUG(dbgs() << "Removing debug info referencing FI#" << SS << '\n');
- VRM->RemoveMachineInstrFromMaps(DVMI);
- DVMBB->erase(DVMI);
+ EraseInstr(DVMI);
}
++NumDSS;
}
@@ -1273,8 +1302,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
VRM->transferRestorePts(&MI, NewMIs[0]);
MII = MBB->insert(MII, NewMIs[0]);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
++NumModRefUnfold;
// Unfold next instructions that fold the same SS.
@@ -1289,8 +1317,7 @@ OptimizeByUnfold2(unsigned VirtReg, int SS,
VRM->transferRestorePts(&NextMI, NewMIs[0]);
MBB->insert(NextMII, NewMIs[0]);
InvalidateKills(NextMI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&NextMI);
- MBB->erase(&NextMI);
+ EraseInstr(&NextMI);
++NumModRefUnfold;
// Skip over dbg_value instructions.
while (NextMII != MBB->end() && NextMII->isDebugValue())
@@ -1417,8 +1444,7 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII,
VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
MII = FoldedMI;
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
return true;
}
}
@@ -1524,14 +1550,11 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII,
// Delete all 3 old instructions.
InvalidateKills(*ReloadMI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(ReloadMI);
- MBB->erase(ReloadMI);
+ EraseInstr(ReloadMI);
InvalidateKills(*DefMI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(DefMI);
- MBB->erase(DefMI);
+ EraseInstr(DefMI);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
// If NewReg was previously holding value of some SS, it's now clobbered.
// This has to be done now because it's a physical register. When this
@@ -1574,8 +1597,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
bool CheckDef = PrevMII != MBB->begin();
if (CheckDef)
--PrevMII;
- VRM->RemoveMachineInstrFromMaps(LastStore);
- MBB->erase(LastStore);
+ EraseInstr(LastStore);
if (CheckDef) {
// Look at defs of killed registers on the store. Mark the defs
// as dead since the store has been deleted and they aren't
@@ -1586,8 +1608,7 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
MachineInstr *DeadDef = PrevMII;
if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
// FIXME: This assumes a remat def does not have side effects.
- VRM->RemoveMachineInstrFromMaps(DeadDef);
- MBB->erase(DeadDef);
+ EraseInstr(DeadDef);
++NumDRM;
}
}
@@ -1612,10 +1633,18 @@ SpillRegToStackSlot(MachineBasicBlock::iterator &MII,
/// effect and all of its defs are dead.
static bool isSafeToDelete(MachineInstr &MI) {
const TargetInstrDesc &TID = MI.getDesc();
- if (TID.mayLoad() || TID.mayStore() || TID.isCall() || TID.isTerminator() ||
+ if (TID.mayLoad() || TID.mayStore() || TID.isTerminator() ||
TID.isCall() || TID.isBarrier() || TID.isReturn() ||
- TID.hasUnmodeledSideEffects())
+ MI.isLabel() || MI.isDebugValue() ||
+ MI.hasUnmodeledSideEffects())
return false;
+
+ // Technically speaking inline asm without side effects and no defs can still
+ // be deleted. But there is so much bad inline asm code out there, we should
+ // let them be.
+ if (MI.isInlineAsm())
+ return false;
+
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isReg() || !MO.getReg())
@@ -1675,8 +1704,7 @@ TransferDeadness(unsigned Reg, BitVector &RegKills,
LastUD->setIsDead();
break;
}
- VRM->RemoveMachineInstrFromMaps(LastUDMI);
- MBB->erase(LastUDMI);
+ EraseInstr(LastUDMI);
} else {
LastUD->setIsKill();
RegKills.set(Reg);
@@ -1764,6 +1792,10 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
<< TRI->getName(InReg) << " for vreg"
<< VirtReg <<" instead of reloading into physreg "
<< TRI->getName(Phys) << '\n');
+
+ // Reusing a physreg may resurrect it. But we expect ProcessUses to update
+ // the kill flags for the current instruction after processing it.
+
++NumOmitted;
continue;
} else if (InReg && InReg != Phys) {
@@ -1828,7 +1860,7 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI,
return true;
}
-/// InsertEmergencySpills - Insert spills after MI if requested by VRM. Return
+/// InsertSpills - Insert spills after MI if requested by VRM. Return
/// true if spills were inserted.
bool LocalRewriter::InsertSpills(MachineInstr *MI) {
if (!VRM->isSpillPt(MI))
@@ -1856,6 +1888,349 @@ bool LocalRewriter::InsertSpills(MachineInstr *MI) {
}
+/// ProcessUses - Process all of MI's spilled operands and all available
+/// operands.
+void LocalRewriter::ProcessUses(MachineInstr &MI, AvailableSpills &Spills,
+ std::vector<MachineInstr*> &MaybeDeadStores,
+ BitVector &RegKills,
+ ReuseInfo &ReusedOperands,
+ std::vector<MachineOperand*> &KillOps) {
+ // Clear kill info.
+ SmallSet<unsigned, 2> KilledMIRegs;
+ SmallVector<unsigned, 4> VirtUseOps;
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI.getOperand(i);
+ if (!MO.isReg() || MO.getReg() == 0)
+ continue; // Ignore non-register operands.
+
+ unsigned VirtReg = MO.getReg();
+
+ if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
+ // Ignore physregs for spilling, but remember that it is used by this
+ // function.
+ MRI->setPhysRegUsed(VirtReg);
+ continue;
+ }
+
+ // We want to process implicit virtual register uses first.
+ if (MO.isImplicit())
+ // If the virtual register is implicitly defined, emit a implicit_def
+ // before so scavenger knows it's "defined".
+ // FIXME: This is a horrible hack done the by register allocator to
+ // remat a definition with virtual register operand.
+ VirtUseOps.insert(VirtUseOps.begin(), i);
+ else
+ VirtUseOps.push_back(i);
+
+ // A partial def causes problems because the same operand both reads and
+ // writes the register. This rewriter is designed to rewrite uses and defs
+ // separately, so a partial def would already have been rewritten to a
+ // physreg by the time we get to processing defs.
+ // Add an implicit use operand to model the partial def.
+ if (MO.isDef() && MO.getSubReg() && MI.readsVirtualRegister(VirtReg) &&
+ MI.findRegisterUseOperandIdx(VirtReg) == -1) {
+ VirtUseOps.insert(VirtUseOps.begin(), MI.getNumOperands());
+ MI.addOperand(MachineOperand::CreateReg(VirtReg,
+ false, // isDef
+ true)); // isImplicit
+ DEBUG(dbgs() << "Partial redef: " << MI);
+ }
+ }
+
+ // Process all of the spilled uses and all non spilled reg references.
+ SmallVector<int, 2> PotentialDeadStoreSlots;
+ KilledMIRegs.clear();
+ for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
+ unsigned i = VirtUseOps[j];
+ unsigned VirtReg = MI.getOperand(i).getReg();
+ assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
+ "Not a virtual register?");
+
+ unsigned SubIdx = MI.getOperand(i).getSubReg();
+ if (VRM->isAssignedReg(VirtReg)) {
+ // This virtual register was assigned a physreg!
+ unsigned Phys = VRM->getPhys(VirtReg);
+ MRI->setPhysRegUsed(Phys);
+ if (MI.getOperand(i).isDef())
+ ReusedOperands.markClobbered(Phys);
+ substitutePhysReg(MI.getOperand(i), Phys, *TRI);
+ if (VRM->isImplicitlyDefined(VirtReg))
+ // FIXME: Is this needed?
+ BuildMI(*MBB, &MI, MI.getDebugLoc(),
+ TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
+ continue;
+ }
+
+ // This virtual register is now known to be a spilled value.
+ if (!MI.getOperand(i).isUse())
+ continue; // Handle defs in the loop below (handle use&def here though)
+
+ bool AvoidReload = MI.getOperand(i).isUndef();
+ // Check if it is defined by an implicit def. It should not be spilled.
+ // Note, this is for correctness reason. e.g.
+ // 8 %reg1024<def> = IMPLICIT_DEF
+ // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
+ // The live range [12, 14) are not part of the r1024 live interval since
+ // it's defined by an implicit def. It will not conflicts with live
+ // interval of r1025. Now suppose both registers are spilled, you can
+ // easily see a situation where both registers are reloaded before
+ // the INSERT_SUBREG and both target registers that would overlap.
+ bool DoReMat = VRM->isReMaterialized(VirtReg);
+ int SSorRMId = DoReMat
+ ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
+ int ReuseSlot = SSorRMId;
+
+ // Check to see if this stack slot is available.
+ unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
+
+ // If this is a sub-register use, make sure the reuse register is in the
+ // right register class. For example, for x86 not all of the 32-bit
+ // registers have accessible sub-registers.
+ // Similarly so for EXTRACT_SUBREG. Consider this:
+ // EDI = op
+ // MOV32_mr fi#1, EDI
+ // ...
+ // = EXTRACT_SUBREG fi#1
+ // fi#1 is available in EDI, but it cannot be reused because it's not in
+ // the right register file.
+ if (PhysReg && !AvoidReload && SubIdx) {
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ if (!RC->contains(PhysReg))
+ PhysReg = 0;
+ }
+
+ if (PhysReg && !AvoidReload) {
+ // This spilled operand might be part of a two-address operand. If this
+ // is the case, then changing it will necessarily require changing the
+ // def part of the instruction as well. However, in some cases, we
+ // aren't allowed to modify the reused register. If none of these cases
+ // apply, reuse it.
+ bool CanReuse = true;
+ bool isTied = MI.isRegTiedToDefOperand(i);
+ if (isTied) {
+ // Okay, we have a two address operand. We can reuse this physreg as
+ // long as we are allowed to clobber the value and there isn't an
+ // earlier def that has already clobbered the physreg.
+ CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
+ Spills.canClobberPhysReg(PhysReg);
+ }
+ // If this is an asm, and a PhysReg alias is used elsewhere as an
+ // earlyclobber operand, we can't also use it as an input.
+ if (MI.isInlineAsm()) {
+ for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
+ MachineOperand &MOk = MI.getOperand(k);
+ if (MOk.isReg() && MOk.isEarlyClobber() &&
+ TRI->regsOverlap(MOk.getReg(), PhysReg)) {
+ CanReuse = false;
+ DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
+ << " for vreg" << VirtReg << ": " << MOk << '\n');
+ break;
+ }
+ }
+ }
+
+ if (CanReuse) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg "
+ << TRI->getName(PhysReg) << " for vreg"
+ << VirtReg <<" instead of reloading into physreg "
+ << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+
+ // Reusing a physreg may resurrect it. But we expect ProcessUses to
+ // update the kill flags for the current instr after processing it.
+
+ // The only technical detail we have is that we don't know that
+ // PhysReg won't be clobbered by a reloaded stack slot that occurs
+ // later in the instruction. In particular, consider 'op V1, V2'.
+ // If V1 is available in physreg R0, we would choose to reuse it
+ // here, instead of reloading it into the register the allocator
+ // indicated (say R1). However, V2 might have to be reloaded
+ // later, and it might indicate that it needs to live in R0. When
+ // this occurs, we need to have information available that
+ // indicates it is safe to use R1 for the reload instead of R0.
+ //
+ // To further complicate matters, we might conflict with an alias,
+ // or R0 and R1 might not be compatible with each other. In this
+ // case, we actually insert a reload for V1 in R1, ensuring that
+ // we can get at R0 or its alias.
+ ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
+ VRM->getPhys(VirtReg), VirtReg);
+ if (isTied)
+ // Only mark it clobbered if this is a use&def operand.
+ ReusedOperands.markClobbered(PhysReg);
+ ++NumReused;
+
+ if (MI.getOperand(i).isKill() &&
+ ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
+
+ // The store of this spilled value is potentially dead, but we
+ // won't know for certain until we've confirmed that the re-use
+ // above is valid, which means waiting until the other operands
+ // are processed. For now we just track the spill slot, we'll
+ // remove it after the other operands are processed if valid.
+
+ PotentialDeadStoreSlots.push_back(ReuseSlot);
+ }
+
+ // Mark is isKill if it's there no other uses of the same virtual
+ // register and it's not a two-address operand. IsKill will be
+ // unset if reg is reused.
+ if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+ continue;
+ } // CanReuse
+
+ // Otherwise we have a situation where we have a two-address instruction
+ // whose mod/ref operand needs to be reloaded. This reload is already
+ // available in some register "PhysReg", but if we used PhysReg as the
+ // operand to our 2-addr instruction, the instruction would modify
+ // PhysReg. This isn't cool if something later uses PhysReg and expects
+ // to get its initial value.
+ //
+ // To avoid this problem, and to avoid doing a load right after a store,
+ // we emit a copy from PhysReg into the designated register for this
+ // operand.
+ //
+ // This case also applies to an earlyclobber'd PhysReg.
+ unsigned DesignatedReg = VRM->getPhys(VirtReg);
+ assert(DesignatedReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ DesignatedReg = ReusedOperands.
+ GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
+ MaybeDeadStores, RegKills, KillOps, *VRM);
+
+ // If the mapped designated register is actually the physreg we have
+ // incoming, we don't need to inserted a dead copy.
+ if (DesignatedReg == PhysReg) {
+ // If this stack slot value is already available, reuse it!
+ if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
+ DEBUG(dbgs() << "Reusing RM#"
+ << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
+ else
+ DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
+ DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
+ << " for vreg" << VirtReg
+ << " instead of reloading into same physreg.\n");
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ ReusedOperands.markClobbered(RReg);
+ ++NumReused;
+ continue;
+ }
+
+ MRI->setPhysRegUsed(DesignatedReg);
+ ReusedOperands.markClobbered(DesignatedReg);
+
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, *MBB->getParent());
+ MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
+ TII->get(TargetOpcode::COPY),
+ DesignatedReg).addReg(PhysReg);
+ CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
+ UpdateKills(*CopyMI, TRI, RegKills, KillOps);
+
+ // This invalidates DesignatedReg.
+ Spills.ClobberPhysReg(DesignatedReg);
+
+ Spills.addAvailable(ReuseSlot, DesignatedReg);
+ unsigned RReg =
+ SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+ ++NumReused;
+ continue;
+ } // if (PhysReg)
+
+ // Otherwise, reload it and remember that we have it.
+ PhysReg = VRM->getPhys(VirtReg);
+ assert(PhysReg && "Must map virtreg to physreg!");
+
+ // Note that, if we reused a register for a previous operand, the
+ // register we want to reload into might not actually be
+ // available. If this occurs, use the register indicated by the
+ // reuser.
+ if (ReusedOperands.hasReuses())
+ PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
+ Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
+
+ MRI->setPhysRegUsed(PhysReg);
+ ReusedOperands.markClobbered(PhysReg);
+ if (AvoidReload)
+ ++NumAvoided;
+ else {
+ // Back-schedule reloads and remats.
+ MachineBasicBlock::iterator InsertLoc =
+ ComputeReloadLoc(MI, MBB->begin(), PhysReg, TRI, DoReMat,
+ SSorRMId, TII, *MBB->getParent());
+
+ if (DoReMat) {
+ ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
+ } else {
+ const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
+ TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
+ MachineInstr *LoadMI = prior(InsertLoc);
+ VRM->addSpillSlotUse(SSorRMId, LoadMI);
+ ++NumLoads;
+ DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
+ }
+ // This invalidates PhysReg.
+ Spills.ClobberPhysReg(PhysReg);
+
+ // Any stores to this stack slot are not dead anymore.
+ if (!DoReMat)
+ MaybeDeadStores[SSorRMId] = NULL;
+ Spills.addAvailable(SSorRMId, PhysReg);
+ // Assumes this is the last use. IsKill will be unset if reg is reused
+ // unless it's a two-address operand.
+ if (!MI.isRegTiedToDefOperand(i) &&
+ KilledMIRegs.count(VirtReg) == 0) {
+ MI.getOperand(i).setIsKill();
+ KilledMIRegs.insert(VirtReg);
+ }
+
+ UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
+ DEBUG(dbgs() << '\t' << *prior(InsertLoc));
+ }
+ unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
+ MI.getOperand(i).setReg(RReg);
+ MI.getOperand(i).setSubReg(0);
+ }
+
+ // Ok - now we can remove stores that have been confirmed dead.
+ for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
+ // This was the last use and the spilled value is still available
+ // for reuse. That means the spill was unnecessary!
+ int PDSSlot = PotentialDeadStoreSlots[j];
+ MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
+ if (DeadStore) {
+ DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
+ InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
+ EraseInstr(DeadStore);
+ MaybeDeadStores[PDSSlot] = NULL;
+ ++NumDSE;
+ }
+ }
+}
+
/// rewriteMBB - Keep track of which spills are available even after the
/// register allocator is done with them. If possible, avoid reloading vregs.
void
@@ -1880,9 +2255,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// ReMatDefs - These are rematerializable def MIs which are not deleted.
SmallSet<MachineInstr*, 4> ReMatDefs;
- // Clear kill info.
- SmallSet<unsigned, 2> KilledMIRegs;
-
// Keep track of the registers we have already spilled in case there are
// multiple defs of the same register in MI.
SmallSet<unsigned, 8> SpilledMIRegs;
@@ -1918,323 +2290,8 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
/// ReusedOperands - Keep track of operand reuse in case we need to undo
/// reuse.
ReuseInfo ReusedOperands(MI, TRI);
- SmallVector<unsigned, 4> VirtUseOps;
- for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
- MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || MO.getReg() == 0)
- continue; // Ignore non-register operands.
-
- unsigned VirtReg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(VirtReg)) {
- // Ignore physregs for spilling, but remember that it is used by this
- // function.
- MRI->setPhysRegUsed(VirtReg);
- continue;
- }
-
- // We want to process implicit virtual register uses first.
- if (MO.isImplicit())
- // If the virtual register is implicitly defined, emit a implicit_def
- // before so scavenger knows it's "defined".
- // FIXME: This is a horrible hack done the by register allocator to
- // remat a definition with virtual register operand.
- VirtUseOps.insert(VirtUseOps.begin(), i);
- else
- VirtUseOps.push_back(i);
- }
-
- // Process all of the spilled uses and all non spilled reg references.
- SmallVector<int, 2> PotentialDeadStoreSlots;
- KilledMIRegs.clear();
- for (unsigned j = 0, e = VirtUseOps.size(); j != e; ++j) {
- unsigned i = VirtUseOps[j];
- unsigned VirtReg = MI.getOperand(i).getReg();
- assert(TargetRegisterInfo::isVirtualRegister(VirtReg) &&
- "Not a virtual register?");
-
- unsigned SubIdx = MI.getOperand(i).getSubReg();
- if (VRM->isAssignedReg(VirtReg)) {
- // This virtual register was assigned a physreg!
- unsigned Phys = VRM->getPhys(VirtReg);
- MRI->setPhysRegUsed(Phys);
- if (MI.getOperand(i).isDef())
- ReusedOperands.markClobbered(Phys);
- substitutePhysReg(MI.getOperand(i), Phys, *TRI);
- if (VRM->isImplicitlyDefined(VirtReg))
- // FIXME: Is this needed?
- BuildMI(*MBB, &MI, MI.getDebugLoc(),
- TII->get(TargetOpcode::IMPLICIT_DEF), Phys);
- continue;
- }
-
- // This virtual register is now known to be a spilled value.
- if (!MI.getOperand(i).isUse())
- continue; // Handle defs in the loop below (handle use&def here though)
-
- bool AvoidReload = MI.getOperand(i).isUndef();
- // Check if it is defined by an implicit def. It should not be spilled.
- // Note, this is for correctness reason. e.g.
- // 8 %reg1024<def> = IMPLICIT_DEF
- // 12 %reg1024<def> = INSERT_SUBREG %reg1024<kill>, %reg1025, 2
- // The live range [12, 14) are not part of the r1024 live interval since
- // it's defined by an implicit def. It will not conflicts with live
- // interval of r1025. Now suppose both registers are spilled, you can
- // easily see a situation where both registers are reloaded before
- // the INSERT_SUBREG and both target registers that would overlap.
- bool DoReMat = VRM->isReMaterialized(VirtReg);
- int SSorRMId = DoReMat
- ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg);
- int ReuseSlot = SSorRMId;
-
- // Check to see if this stack slot is available.
- unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
-
- // If this is a sub-register use, make sure the reuse register is in the
- // right register class. For example, for x86 not all of the 32-bit
- // registers have accessible sub-registers.
- // Similarly so for EXTRACT_SUBREG. Consider this:
- // EDI = op
- // MOV32_mr fi#1, EDI
- // ...
- // = EXTRACT_SUBREG fi#1
- // fi#1 is available in EDI, but it cannot be reused because it's not in
- // the right register file.
- if (PhysReg && !AvoidReload && SubIdx) {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- if (!RC->contains(PhysReg))
- PhysReg = 0;
- }
-
- if (PhysReg && !AvoidReload) {
- // This spilled operand might be part of a two-address operand. If this
- // is the case, then changing it will necessarily require changing the
- // def part of the instruction as well. However, in some cases, we
- // aren't allowed to modify the reused register. If none of these cases
- // apply, reuse it.
- bool CanReuse = true;
- bool isTied = MI.isRegTiedToDefOperand(i);
- if (isTied) {
- // Okay, we have a two address operand. We can reuse this physreg as
- // long as we are allowed to clobber the value and there isn't an
- // earlier def that has already clobbered the physreg.
- CanReuse = !ReusedOperands.isClobbered(PhysReg) &&
- Spills.canClobberPhysReg(PhysReg);
- }
- // If this is an asm, and a PhysReg alias is used elsewhere as an
- // earlyclobber operand, we can't also use it as an input.
- if (MI.isInlineAsm()) {
- for (unsigned k = 0, e = MI.getNumOperands(); k != e; ++k) {
- MachineOperand &MOk = MI.getOperand(k);
- if (MOk.isReg() && MOk.isEarlyClobber() &&
- TRI->regsOverlap(MOk.getReg(), PhysReg)) {
- CanReuse = false;
- DEBUG(dbgs() << "Not reusing physreg " << TRI->getName(PhysReg)
- << " for vreg" << VirtReg << ": " << MOk << '\n');
- break;
- }
- }
- }
-
- if (CanReuse) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg "
- << TRI->getName(PhysReg) << " for vreg"
- << VirtReg <<" instead of reloading into physreg "
- << TRI->getName(VRM->getPhys(VirtReg)) << '\n');
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
-
- // The only technical detail we have is that we don't know that
- // PhysReg won't be clobbered by a reloaded stack slot that occurs
- // later in the instruction. In particular, consider 'op V1, V2'.
- // If V1 is available in physreg R0, we would choose to reuse it
- // here, instead of reloading it into the register the allocator
- // indicated (say R1). However, V2 might have to be reloaded
- // later, and it might indicate that it needs to live in R0. When
- // this occurs, we need to have information available that
- // indicates it is safe to use R1 for the reload instead of R0.
- //
- // To further complicate matters, we might conflict with an alias,
- // or R0 and R1 might not be compatible with each other. In this
- // case, we actually insert a reload for V1 in R1, ensuring that
- // we can get at R0 or its alias.
- ReusedOperands.addReuse(i, ReuseSlot, PhysReg,
- VRM->getPhys(VirtReg), VirtReg);
- if (isTied)
- // Only mark it clobbered if this is a use&def operand.
- ReusedOperands.markClobbered(PhysReg);
- ++NumReused;
-
- if (MI.getOperand(i).isKill() &&
- ReuseSlot <= VirtRegMap::MAX_STACK_SLOT) {
-
- // The store of this spilled value is potentially dead, but we
- // won't know for certain until we've confirmed that the re-use
- // above is valid, which means waiting until the other operands
- // are processed. For now we just track the spill slot, we'll
- // remove it after the other operands are processed if valid.
-
- PotentialDeadStoreSlots.push_back(ReuseSlot);
- }
-
- // Mark is isKill if it's there no other uses of the same virtual
- // register and it's not a two-address operand. IsKill will be
- // unset if reg is reused.
- if (!isTied && KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
-
- continue;
- } // CanReuse
-
- // Otherwise we have a situation where we have a two-address instruction
- // whose mod/ref operand needs to be reloaded. This reload is already
- // available in some register "PhysReg", but if we used PhysReg as the
- // operand to our 2-addr instruction, the instruction would modify
- // PhysReg. This isn't cool if something later uses PhysReg and expects
- // to get its initial value.
- //
- // To avoid this problem, and to avoid doing a load right after a store,
- // we emit a copy from PhysReg into the designated register for this
- // operand.
- //
- // This case also applies to an earlyclobber'd PhysReg.
- unsigned DesignatedReg = VRM->getPhys(VirtReg);
- assert(DesignatedReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- DesignatedReg = ReusedOperands.
- GetRegForReload(VirtReg, DesignatedReg, &MI, Spills,
- MaybeDeadStores, RegKills, KillOps, *VRM);
-
- // If the mapped designated register is actually the physreg we have
- // incoming, we don't need to inserted a dead copy.
- if (DesignatedReg == PhysReg) {
- // If this stack slot value is already available, reuse it!
- if (ReuseSlot > VirtRegMap::MAX_STACK_SLOT)
- DEBUG(dbgs() << "Reusing RM#"
- << ReuseSlot-VirtRegMap::MAX_STACK_SLOT-1);
- else
- DEBUG(dbgs() << "Reusing SS#" << ReuseSlot);
- DEBUG(dbgs() << " from physreg " << TRI->getName(PhysReg)
- << " for vreg" << VirtReg
- << " instead of reloading into same physreg.\n");
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- ReusedOperands.markClobbered(RReg);
- ++NumReused;
- continue;
- }
-
- MRI->setPhysRegUsed(DesignatedReg);
- ReusedOperands.markClobbered(DesignatedReg);
-
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, MF);
- MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(),
- TII->get(TargetOpcode::COPY),
- DesignatedReg).addReg(PhysReg);
- CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse);
- UpdateKills(*CopyMI, TRI, RegKills, KillOps);
-
- // This invalidates DesignatedReg.
- Spills.ClobberPhysReg(DesignatedReg);
-
- Spills.addAvailable(ReuseSlot, DesignatedReg);
- unsigned RReg =
- SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- DEBUG(dbgs() << '\t' << *prior(MII));
- ++NumReused;
- continue;
- } // if (PhysReg)
-
- // Otherwise, reload it and remember that we have it.
- PhysReg = VRM->getPhys(VirtReg);
- assert(PhysReg && "Must map virtreg to physreg!");
-
- // Note that, if we reused a register for a previous operand, the
- // register we want to reload into might not actually be
- // available. If this occurs, use the register indicated by the
- // reuser.
- if (ReusedOperands.hasReuses())
- PhysReg = ReusedOperands.GetRegForReload(VirtReg, PhysReg, &MI,
- Spills, MaybeDeadStores, RegKills, KillOps, *VRM);
-
- MRI->setPhysRegUsed(PhysReg);
- ReusedOperands.markClobbered(PhysReg);
- if (AvoidReload)
- ++NumAvoided;
- else {
- // Back-schedule reloads and remats.
- MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(MII, MBB->begin(), PhysReg, TRI, DoReMat,
- SSorRMId, TII, MF);
-
- if (DoReMat) {
- ReMaterialize(*MBB, InsertLoc, PhysReg, VirtReg, TII, TRI, *VRM);
- } else {
- const TargetRegisterClass* RC = MRI->getRegClass(VirtReg);
- TII->loadRegFromStackSlot(*MBB, InsertLoc, PhysReg, SSorRMId, RC,TRI);
- MachineInstr *LoadMI = prior(InsertLoc);
- VRM->addSpillSlotUse(SSorRMId, LoadMI);
- ++NumLoads;
- DistanceMap.insert(std::make_pair(LoadMI, DistanceMap.size()));
- }
- // This invalidates PhysReg.
- Spills.ClobberPhysReg(PhysReg);
-
- // Any stores to this stack slot are not dead anymore.
- if (!DoReMat)
- MaybeDeadStores[SSorRMId] = NULL;
- Spills.addAvailable(SSorRMId, PhysReg);
- // Assumes this is the last use. IsKill will be unset if reg is reused
- // unless it's a two-address operand.
- if (!MI.isRegTiedToDefOperand(i) &&
- KilledMIRegs.count(VirtReg) == 0) {
- MI.getOperand(i).setIsKill();
- KilledMIRegs.insert(VirtReg);
- }
-
- UpdateKills(*prior(InsertLoc), TRI, RegKills, KillOps);
- DEBUG(dbgs() << '\t' << *prior(InsertLoc));
- }
- unsigned RReg = SubIdx ? TRI->getSubReg(PhysReg, SubIdx) : PhysReg;
- MI.getOperand(i).setReg(RReg);
- MI.getOperand(i).setSubReg(0);
- }
-
- // Ok - now we can remove stores that have been confirmed dead.
- for (unsigned j = 0, e = PotentialDeadStoreSlots.size(); j != e; ++j) {
- // This was the last use and the spilled value is still available
- // for reuse. That means the spill was unnecessary!
- int PDSSlot = PotentialDeadStoreSlots[j];
- MachineInstr* DeadStore = MaybeDeadStores[PDSSlot];
- if (DeadStore) {
- DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
- InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(DeadStore);
- MBB->erase(DeadStore);
- MaybeDeadStores[PDSSlot] = NULL;
- ++NumDSE;
- }
- }
+ ProcessUses(MI, Spills, MaybeDeadStores, RegKills, ReusedOperands, KillOps);
DEBUG(dbgs() << '\t' << MI);
@@ -2288,14 +2345,13 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
BackTracked = true;
} else {
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
- // Unset last kill since it's being reused.
- InvalidateKill(InReg, TRI, RegKills, KillOps);
+ // InvalidateKills resurrects any prior kill of the copy's source
+ // allowing the source reg to be reused in place of the copy.
Spills.disallowClobberPhysReg(InReg);
}
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
goto ProcessNextInst;
}
@@ -2306,8 +2362,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
TII->unfoldMemoryOperand(MF, &MI, PhysReg, false, false, NewMIs)){
MBB->insert(MII, NewMIs[0]);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
--NextMII; // backtrack to the unfolded instruction.
BackTracked = true;
@@ -2343,8 +2398,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
MBB->insert(MII, NewStore);
VRM->addSpillSlotUse(SS, NewStore);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
--NextMII;
--NextMII; // backtrack to the unfolded instruction.
@@ -2359,8 +2413,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// If we get here, the store is dead, nuke it now.
DEBUG(dbgs() << "Removed dead store:\t" << *DeadStore);
InvalidateKills(*DeadStore, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(DeadStore);
- MBB->erase(DeadStore);
+ EraseInstr(DeadStore);
if (!NewStore)
++NumDSE;
}
@@ -2437,8 +2490,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// Last def is now dead.
TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps);
}
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
Spills.disallowClobberPhysReg(VirtReg);
goto ProcessNextInst;
@@ -2514,8 +2566,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
++NumDCE;
DEBUG(dbgs() << "Removing now-noop copy: " << MI);
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
UpdateKills(*LastStore, TRI, RegKills, KillOps);
goto ProcessNextInst;
@@ -2526,8 +2577,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs,
// Delete dead instructions without side effects.
if (!Erased && !BackTracked && isSafeToDelete(MI)) {
InvalidateKills(MI, TRI, RegKills, KillOps);
- VRM->RemoveMachineInstrFromMaps(&MI);
- MBB->erase(&MI);
+ EraseInstr(&MI);
Erased = true;
}
if (!Erased)