From 66e41e3c6e8b8fbc48d5d3b4d2bd9ce0be4ecb75 Mon Sep 17 00:00:00 2001 From: Roman Divacky Date: Tue, 13 Jul 2010 17:19:57 +0000 Subject: Update LLVM to r108243. --- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 83 +- lib/CodeGen/AggressiveAntiDepBreaker.h | 1 + lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 94 +- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 4 +- lib/CodeGen/AsmPrinter/DIE.cpp | 2 + lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 246 ++- lib/CodeGen/AsmPrinter/DwarfDebug.h | 18 +- lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 6 +- lib/CodeGen/BranchFolding.cpp | 68 +- lib/CodeGen/BranchFolding.h | 5 +- lib/CodeGen/CMakeLists.txt | 7 +- lib/CodeGen/CalcSpillWeights.cpp | 2 +- lib/CodeGen/CallingConvLower.cpp | 177 ++ lib/CodeGen/CodePlacementOpt.cpp | 4 + lib/CodeGen/CriticalAntiDepBreaker.cpp | 158 +- lib/CodeGen/CriticalAntiDepBreaker.h | 5 +- lib/CodeGen/DwarfEHPrepare.cpp | 110 +- lib/CodeGen/ELFCodeEmitter.cpp | 2 +- lib/CodeGen/ExactHazardRecognizer.cpp | 180 --- lib/CodeGen/ExactHazardRecognizer.h | 86 - lib/CodeGen/GCStrategy.cpp | 6 +- lib/CodeGen/IfConversion.cpp | 400 +++-- lib/CodeGen/InlineSpiller.cpp | 408 +++++ lib/CodeGen/IntrinsicLowering.cpp | 51 +- lib/CodeGen/LLVMTargetMachine.cpp | 15 +- lib/CodeGen/LatencyPriorityQueue.cpp | 2 +- lib/CodeGen/LiveInterval.cpp | 66 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 236 +-- lib/CodeGen/LiveStackAnalysis.cpp | 4 +- lib/CodeGen/LiveVariables.cpp | 9 +- lib/CodeGen/LowerSubregs.cpp | 217 +-- lib/CodeGen/MachineBasicBlock.cpp | 129 +- lib/CodeGen/MachineCSE.cpp | 46 +- lib/CodeGen/MachineDominators.cpp | 1 - lib/CodeGen/MachineFunction.cpp | 16 +- lib/CodeGen/MachineInstr.cpp | 113 +- lib/CodeGen/MachineLICM.cpp | 118 +- lib/CodeGen/MachineRegisterInfo.cpp | 130 +- lib/CodeGen/MachineSink.cpp | 102 +- lib/CodeGen/MachineVerifier.cpp | 3 +- lib/CodeGen/OptimizeExts.cpp | 24 +- lib/CodeGen/OptimizePHIs.cpp | 5 + lib/CodeGen/PBQP/HeuristicSolver.h | 2 +- lib/CodeGen/PBQP/Heuristics/Briggs.h | 5 +- lib/CodeGen/PHIElimination.cpp | 63 +- lib/CodeGen/Passes.cpp | 26 +- lib/CodeGen/PostRAHazardRecognizer.cpp | 180 +++ lib/CodeGen/PostRASchedulerList.cpp | 51 +- lib/CodeGen/PreAllocSplitting.cpp | 89 +- lib/CodeGen/ProcessImplicitDefs.cpp | 33 +- lib/CodeGen/PrologEpilogInserter.cpp | 65 +- lib/CodeGen/RegAllocFast.cpp | 226 ++- lib/CodeGen/RegAllocLinearScan.cpp | 33 +- lib/CodeGen/RegAllocLocal.cpp | 1254 --------------- lib/CodeGen/RegAllocPBQP.cpp | 25 +- lib/CodeGen/RegisterCoalescer.cpp | 156 ++ lib/CodeGen/RegisterScavenging.cpp | 33 +- lib/CodeGen/ScheduleDAG.cpp | 37 +- lib/CodeGen/ScheduleDAGEmit.cpp | 14 +- lib/CodeGen/ScheduleDAGInstrs.h | 5 +- lib/CodeGen/SelectionDAG/CMakeLists.txt | 1 - lib/CodeGen/SelectionDAG/CallingConvLower.cpp | 179 --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 267 +-- lib/CodeGen/SelectionDAG/FastISel.cpp | 354 ++-- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 63 +- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h | 144 -- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 133 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 335 +++- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 40 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 150 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 72 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 4 + lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 8 +- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 140 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 2 +- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 17 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 241 +-- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 5 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 158 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 1248 +++++++------- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 12 + lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 421 ++--- lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 2 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 218 ++- lib/CodeGen/ShadowStackGC.cpp | 14 +- lib/CodeGen/SimpleHazardRecognizer.h | 89 - lib/CodeGen/SimpleRegisterCoalescing.cpp | 1790 +++++---------------- lib/CodeGen/SimpleRegisterCoalescing.h | 84 +- lib/CodeGen/SjLjEHPrepare.cpp | 114 +- lib/CodeGen/SlotIndexes.cpp | 4 +- lib/CodeGen/Spiller.cpp | 209 ++- lib/CodeGen/Spiller.h | 18 +- lib/CodeGen/StackProtector.cpp | 16 +- lib/CodeGen/StackSlotColoring.cpp | 14 +- lib/CodeGen/StrongPHIElimination.cpp | 22 +- lib/CodeGen/TailDuplication.cpp | 18 +- lib/CodeGen/TargetInstrInfoImpl.cpp | 182 ++- lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 112 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 303 +++- lib/CodeGen/VirtRegRewriter.cpp | 110 +- 101 files changed, 6179 insertions(+), 6792 deletions(-) create mode 100644 lib/CodeGen/CallingConvLower.cpp delete mode 100644 lib/CodeGen/ExactHazardRecognizer.cpp delete mode 100644 lib/CodeGen/ExactHazardRecognizer.h create mode 100644 lib/CodeGen/InlineSpiller.cpp create mode 100644 lib/CodeGen/PostRAHazardRecognizer.cpp delete mode 100644 lib/CodeGen/RegAllocLocal.cpp delete mode 100644 lib/CodeGen/SelectionDAG/CallingConvLower.cpp delete mode 100644 lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h delete mode 100644 lib/CodeGen/SimpleHazardRecognizer.h (limited to 'lib/CodeGen') diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 4008a6a63cf8..a7189acc3fec 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -114,6 +115,7 @@ AggressiveAntiDepBreaker(MachineFunction& MFi, TargetSubtarget::RegClassVector& CriticalPathRCs) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)), State(NULL) { @@ -163,25 +165,27 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - } else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - State->UnionGroups(AliasReg, 0); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } + unsigned Reg = *I; + State->UnionGroups(Reg, 0); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + State->UnionGroups(AliasReg, 0); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; } - } + } // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any @@ -390,7 +394,8 @@ void AggressiveAntiDepBreaker::PrescanInstruction(MachineInstr *MI, // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq()) { + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -443,6 +448,26 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, std::multimap& RegRefs = State->GetRegRefs(); + // If MI's uses have special allocation requirement, don't allow + // any use registers to be changed. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + // Scan the register uses for this instruction and update // live-ranges, groups and RegRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -459,10 +484,7 @@ void AggressiveAntiDepBreaker::ScanInstruction(MachineInstr *MI, // for the register. HandleLastUse(Reg, Count, "(last-use)"); - // If MI's uses have special allocation requirement, don't allow - // any use registers to be changed. Also assume all registers - // used in a call must not be changed (ABI). - if (MI->getDesc().isCall() || MI->getDesc().hasExtraSrcRegAllocReq()) { + if (Special) { DEBUG(if (State->GetGroup(Reg) != 0) dbgs() << "->g0(alloc-req)"); State->UnionGroups(Reg, 0); } @@ -604,8 +626,12 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( // order. If that register is available, and the corresponding // registers are available for the other group subregisters, then we // can use those registers to rename. + + // FIXME: Using getMinimalPhysRegClass is very conservative. We should + // check every use of the register and find the largest register class + // that can be used in all of them. const TargetRegisterClass *SuperRC = - TRI->getPhysicalRegisterRegClass(SuperReg, MVT::Other); + TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); const TargetRegisterClass::iterator RB = SuperRC->allocation_order_begin(MF); const TargetRegisterClass::iterator RE = SuperRC->allocation_order_end(MF); @@ -905,6 +931,19 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( AggressiveAntiDepState::RegisterReference>::iterator Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second.Operand->setReg(NewReg); + // If the SU for the instruction being updated has debug + // information related to the anti-dependency register, make + // sure to update that as well. + const SUnit *SU = MISUnitMap[Q->second.Operand->getParent()]; + if (!SU) continue; + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { + MachineInstr *DI = SU->DbgInstrList[i]; + assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() + && "Non register dbg_value attached to SUnit!"); + if (DI->getOperand(0).getReg() == AntiDepReg) + DI->getOperand(0).setReg(NewReg); + } } // We just went back in time and modified history; the diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 506d43e7f3fc..91ebb850d19d 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -115,6 +115,7 @@ namespace llvm { class AggressiveAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; /// AllocatableSet - The set of allocatable registers. diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 5a0c27b300ab..d9387a8e72c5 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -199,7 +199,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { case GlobalValue::LinkOnceODRLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: - case GlobalValue::LinkerPrivateLinkage: + case GlobalValue::LinkerPrivateWeakLinkage: if (MAI->getWeakDefDirective() != 0) { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); @@ -225,6 +225,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { break; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: + case GlobalValue::LinkerPrivateLinkage: break; default: llvm_unreachable("Unknown linkage type!"); @@ -330,7 +331,6 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); - EmitLinkage(GV->getLinkage(), MangSym); EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(MangSym); @@ -353,7 +353,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer unsigned PtrSize = TD->getPointerSizeInBits()/8; - OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("__tlv_bootstrap"), + OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), PtrSize, 0); OutStreamer.EmitIntValue(0, PtrSize, 0); OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); @@ -428,20 +428,12 @@ void AsmPrinter::EmitFunctionHeader() { // Emit pre-function debug and/or EH information. if (DE) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(EHTimerName, DWARFGroupName); - DE->BeginFunction(MF); - } else { - DE->BeginFunction(MF); - } + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->BeginFunction(MF); } if (DD) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->beginFunction(MF); - } else { - DD->beginFunction(MF); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginFunction(MF); } } @@ -458,14 +450,11 @@ void AsmPrinter::EmitFunctionEntryLabel() { } -/// EmitComments - Pretty-print comments for instructions. -static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { - const MachineFunction *MF = MI.getParent()->getParent(); - const TargetMachine &TM = MF->getTarget(); - - DebugLoc DL = MI.getDebugLoc(); +static void EmitDebugLoc(DebugLoc DL, const MachineFunction *MF, + raw_ostream &CommentOS) { + const LLVMContext &Ctx = MF->getFunction()->getContext(); if (!DL.isUnknown()) { // Print source line info. - DIScope Scope(DL.getScope(MF->getFunction()->getContext())); + DIScope Scope(DL.getScope(Ctx)); // Omit the directory, because it's likely to be long and uninteresting. if (Scope.Verify()) CommentOS << Scope.getFilename(); @@ -474,6 +463,23 @@ static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { CommentOS << ':' << DL.getLine(); if (DL.getCol() != 0) CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << "[ "; + EmitDebugLoc(InlinedAtDL, MF, CommentOS); + CommentOS << " ]"; + } + } +} + +/// EmitComments - Pretty-print comments for instructions. +static void EmitComments(const MachineInstr &MI, raw_ostream &CommentOS) { + const MachineFunction *MF = MI.getParent()->getParent(); + const TargetMachine &TM = MF->getTarget(); + + DebugLoc DL = MI.getDebugLoc(); + if (!DL.isUnknown()) { // Print source line info. + EmitDebugLoc(DL, MF, CommentOS); CommentOS << '\n'; } @@ -611,12 +617,8 @@ void AsmPrinter::EmitFunctionBody() { } if (ShouldPrintDebugScopes) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->beginScope(II); - } else { - DD->beginScope(II); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->beginScope(II); } if (isVerbose()) @@ -649,12 +651,8 @@ void AsmPrinter::EmitFunctionBody() { } if (ShouldPrintDebugScopes) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->endScope(II); - } else { - DD->endScope(II); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endScope(II); } } } @@ -692,20 +690,12 @@ void AsmPrinter::EmitFunctionBody() { // Emit post-function debug information. if (DD) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->endFunction(MF); - } else { - DD->endFunction(MF); - } + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + DD->endFunction(MF); } if (DE) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(EHTimerName, DWARFGroupName); - DE->EndFunction(); - } else { - DE->EndFunction(); - } + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); + DE->EndFunction(); } MMI->EndFunction(); @@ -730,19 +720,15 @@ bool AsmPrinter::doFinalization(Module &M) { // Finalize debug and EH information. if (DE) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(EHTimerName, DWARFGroupName); - DE->EndModule(); - } else { + { + NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); DE->EndModule(); } delete DE; DE = 0; } if (DD) { - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - DD->endModule(); - } else { + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->endModule(); } delete DD; DD = 0; diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index ba6fed2a78ba..f6f3bae42a80 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -83,7 +83,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, unsigned LocCookie) const { // Tell SrcMgr about this buffer, it takes ownership of the buffer. SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); - AsmParser Parser(SrcMgr, OutContext, OutStreamer, *MAI); + AsmParser Parser(TM.getTarget(), SrcMgr, OutContext, OutStreamer, *MAI); OwningPtr TAP(TM.getTarget().createAsmParser(Parser)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" @@ -279,7 +279,7 @@ void AsmPrinter::EmitInlineAsm(const MachineInstr *MI) const { // Okay, we finally have a value number. Ask the target to print this // operand! if (CurVariant == -1 || CurVariant == AsmPrinterVariant) { - unsigned OpNo = 1; + unsigned OpNo = 2; bool Error = false; diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index b2c70d51f5a5..21396ca37f06 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -201,6 +201,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_data8: Size = 8; break; case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; + case dwarf::DW_FORM_addr: Size = Asm->getTargetData().getPointerSize(); break; default: llvm_unreachable("DIE Value form not supported yet"); } Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); @@ -221,6 +222,7 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data8: return sizeof(int64_t); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); + case dwarf::DW_FORM_addr: return AP->getTargetData().getPointerSize(); default: llvm_unreachable("DIE Value form not supported yet"); break; } return 0; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 890507cf3148..65c1d190216f 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -44,7 +44,8 @@ using namespace llvm; static cl::opt PrintDbgScope("print-dbgscope", cl::Hidden, cl::desc("Print DbgScope information for each machine instruction")); -static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, +static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", + cl::Hidden, cl::desc("Disable debug info printing")); static cl::opt UnknownLocations("use-unknown-locations", cl::Hidden, @@ -79,15 +80,13 @@ class CompileUnit { /// IndexTyDie - An anonymous type for index type. Owned by CUDie. DIE *IndexTyDie; - /// GVToDieMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton /// variables to debug information entries. - /// FIXME : Rename GVToDieMap -> NodeToDieMap - DenseMap GVToDieMap; + DenseMap MDNodeToDieMap; - /// GVToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton /// descriptors to debug information entries using a DIEEntry proxy. - /// FIXME : Rename - DenseMap GVToDIEEntryMap; + DenseMap MDNodeToDIEEntryMap; /// Globals - A map of globally visible named entities for this unit. /// @@ -123,25 +122,25 @@ public: /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. - DIE *getDIE(const MDNode *N) { return GVToDieMap.lookup(N); } + DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } /// insertDIE - Insert DIE into the map. void insertDIE(const MDNode *N, DIE *D) { - GVToDieMap.insert(std::make_pair(N, D)); + MDNodeToDieMap.insert(std::make_pair(N, D)); } /// getDIEEntry - Returns the debug information entry for the speciefied /// debug variable. DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap::iterator I = GVToDIEEntryMap.find(N); - if (I == GVToDIEEntryMap.end()) + DenseMap::iterator I = MDNodeToDIEEntryMap.find(N); + if (I == MDNodeToDIEEntryMap.end()) return NULL; return I->second; } /// insertDIEEntry - Insert debug information entry into the map. void insertDIEEntry(const MDNode *N, DIEEntry *E) { - GVToDIEEntryMap.insert(std::make_pair(N, E)); + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); } /// addDie - Adds or interns the DIE to the compile unit. @@ -321,12 +320,12 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfFrameSectionSym = DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; + DwarfDebugLineSectionSym = CurrentLineSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - if (TimePassesIsEnabled) { - NamedRegionTimer T(DbgTimerName, DWARFGroupName); - beginModule(M); - } else { - beginModule(M); + DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); + { + NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); + beginModule(M); } } DwarfDebug::~DwarfDebug() { @@ -378,7 +377,8 @@ DIEEntry *DwarfDebug::createDIEEntry(DIE *Entry) { void DwarfDebug::addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer) { if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); + DIEValue *Value = Integer == 1 ? + DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); Die->addValue(Attribute, Form, Value); } @@ -866,6 +866,10 @@ void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) { } else if (Context.isNameSpace()) { DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); ContextDIE->addChild(Die); + } else if (Context.isSubprogram()) { + DIE *ContextDIE = createSubprogramDIE(DISubprogram(Context), + /*MakeDecl=*/false); + ContextDIE->addChild(Die); } else if (DIE *ContextDIE = getCompileUnit(Context)->getDIE(Context)) ContextDIE->addChild(Die); else @@ -1055,6 +1059,10 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (DIDescriptor(ContainingType).isCompositeType()) addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, getOrCreateTypeDIE(DIType(ContainingType))); + else { + DIDescriptor Context = CTy.getContext(); + addToContextOwner(&Buffer, Context); + } break; } default: @@ -1065,8 +1073,9 @@ void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name); - if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { + if (Tag == dwarf::DW_TAG_enumeration_type || Tag == dwarf::DW_TAG_class_type + || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) + { // Add size if non-zero (derived types might be zero-sized.) if (Size) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); @@ -1329,6 +1338,9 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) { // DW_TAG_inlined_subroutine may refer to this DIE. SPCU->insertDIE(SP, SPDie); + // Add to context owner. + addToContextOwner(SPDie, SP.getContext()); + return SPDie; } @@ -1379,6 +1391,7 @@ static bool isSubprogramContext(const MDNode *Context) { DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { CompileUnit *SPCU = getCompileUnit(SPNode); DIE *SPDie = SPCU->getDIE(SPNode); + assert(SPDie && "Unable to find subprogram DIE!"); DISubprogram SP(SPNode); @@ -1412,6 +1425,14 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(const MDNode *SPNode) { SPCU->addDie(SPDie); } + // Pick up abstract subprogram DIE. + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + SPDie = new DIE(dwarf::DW_TAG_subprogram); + addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, + dwarf::DW_FORM_ref4, AbsSPDIE); + SPCU->addDie(SPDie); + } + addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, @@ -1483,7 +1504,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) { const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); - if (StartLabel == FunctionBeginSym || EndLabel == 0) { + if (StartLabel == 0 || EndLabel == 0) { assert (0 && "Unexpected Start and End labels for a inlined scope!"); return 0; } @@ -1605,11 +1626,13 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) { // FIXME : Handle getNumOperands != 3 if (DVInsn->getNumOperands() == 3) { if (DVInsn->getOperand(0).isReg()) - updated = addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0)); + updated = + addRegisterAddress(VariableDie, DVLabel, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isImm()) updated = addConstantValue(VariableDie, DVLabel, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isFPImm()) - updated = addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); + updated = + addConstantFPValue(VariableDie, DVLabel, DVInsn->getOperand(0)); } else { MachineLocation Location = Asm->getDebugValueLocation(DVInsn); if (Location.getReg()) { @@ -1682,8 +1705,13 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) { if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(Scope); else if (DS.isSubprogram()) { - if (Scope->isAbstractScope()) + ProcessedSPNodes.insert(DS); + if (Scope->isAbstractScope()) { ScopeDIE = getCompileUnit(DS)->getDIE(DS); + // Note down abstract DIE. + if (ScopeDIE) + AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); + } else ScopeDIE = updateSubprogramScopeDIE(DS); } @@ -1782,11 +1810,11 @@ void DwarfDebug::constructCompileUnit(const MDNode *N) { addString(Die, dwarf::DW_AT_name, dwarf::DW_FORM_string, FN); // Use DW_AT_entry_pc instead of DW_AT_low_pc/DW_AT_high_pc pair. This // simplifies debug range entries. - addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_data4, 0); + addUInt(Die, dwarf::DW_AT_entry_pc, dwarf::DW_FORM_addr, 0); // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. It is always zero when only one - // compile unit is emitted in one object file. - addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + // compile unit in debug_line section. This offset is calculated + // during endMoudle(). + addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); if (!Dir.empty()) addString(Die, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string, Dir); @@ -1996,6 +2024,40 @@ void DwarfDebug::beginModule(Module *M) { /// void DwarfDebug::endModule() { if (!FirstCU) return; + const Module *M = MMI->getModule(); + if (NamedMDNode *AllSPs = M->getNamedMetadata("llvm.dbg.sp")) { + for (unsigned SI = 0, SE = AllSPs->getNumOperands(); SI != SE; ++SI) { + if (ProcessedSPNodes.count(AllSPs->getOperand(SI)) != 0) continue; + DISubprogram SP(AllSPs->getOperand(SI)); + if (!SP.Verify()) continue; + + // Collect info for variables that were optimized out. + StringRef FName = SP.getLinkageName(); + if (FName.empty()) + FName = SP.getName(); + NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", getRealLinkageName(FName))); + if (!NMD) continue; + unsigned E = NMD->getNumOperands(); + if (!E) continue; + DbgScope *Scope = new DbgScope(NULL, DIDescriptor(SP), NULL); + for (unsigned I = 0; I != E; ++I) { + DIVariable DV(NMD->getOperand(I)); + if (!DV.Verify()) continue; + Scope->addVariable(new DbgVariable(DV)); + } + + // Construct subprogram DIE and add variables DIEs. + constructSubprogramDIE(SP); + DIE *ScopeDIE = getCompileUnit(SP)->getDIE(SP); + const SmallVector &Variables = Scope->getVariables(); + for (unsigned i = 0, N = Variables.size(); i < N; ++i) { + DIE *VariableDIE = constructVariableDIE(Variables[i], Scope); + if (VariableDIE) + ScopeDIE->addChild(VariableDIE); + } + } + } // Attach DW_AT_inline attribute with inlined subprogram DIEs. for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(), @@ -2037,15 +2099,15 @@ void DwarfDebug::endModule() { // Compute DIE offsets and sizes. computeSizeAndOffsets(); + // Emit source line correspondence into a debug line section. + emitDebugLines(); + // Emit all the DIEs into a debug info section emitDebugInfo(); // Corresponding abbreviations into a abbrev section. emitAbbreviations(); - // Emit source line correspondence into a debug line section. - emitDebugLines(); - // Emit info into a debug pubnames section. emitDebugPubNames(); @@ -2150,8 +2212,9 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { } /// collectVariableInfo - Populate DbgScope entries with variables' info. -void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { - SmallPtrSet Processed; +void +DwarfDebug::collectVariableInfo(const MachineFunction *MF, + SmallPtrSet &Processed) { /// collection info from MMI table. collectVariableInfoFromMMITable(MF, Processed); @@ -2180,16 +2243,23 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { if (Processed.count(DV) != 0) continue; + const MachineInstr *PrevMI = MInsn; for (SmallVector::iterator MI = I+1, ME = DbgValues.end(); MI != ME; ++MI) { const MDNode *Var = (*MI)->getOperand((*MI)->getNumOperands()-1).getMetadata(); - if (Var == DV && isDbgValueInDefinedReg(*MI)) + if (Var == DV && isDbgValueInDefinedReg(*MI) && + !PrevMI->isIdenticalTo(*MI)) MultipleValues.push_back(*MI); + PrevMI = *MI; } DbgScope *Scope = findDbgScope(MInsn); - if (!Scope && DV.getTag() == dwarf::DW_TAG_arg_variable) + bool CurFnArg = false; + if (DV.getTag() == dwarf::DW_TAG_arg_variable && + DISubprogram(DV.getContext()).describes(MF->getFunction())) + CurFnArg = true; + if (!Scope && CurFnArg) Scope = CurrentFnDbgScope; // If variable scope is not found then skip this variable. if (!Scope) @@ -2198,7 +2268,7 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { Processed.insert(DV); DbgVariable *RegVar = new DbgVariable(DV); Scope->addVariable(RegVar); - if (DV.getTag() != dwarf::DW_TAG_arg_variable) + if (!CurFnArg) DbgVariableLabelsMap[RegVar] = getLabelBeforeInsn(MInsn); if (DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc())) { DbgVariableToDbgInstMap[AbsVar] = MInsn; @@ -2217,7 +2287,8 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { const MachineInstr *Begin = NULL; const MachineInstr *End = NULL; for (SmallVector::iterator - MVI = MultipleValues.begin(), MVE = MultipleValues.end(); MVI != MVE; ++MVI) { + MVI = MultipleValues.begin(), MVE = MultipleValues.end(); + MVI != MVE; ++MVI) { if (!Begin) { Begin = *MVI; continue; @@ -2241,8 +2312,11 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF) { } // Collect info for variables that were optimized out. + const Function *F = MF->getFunction(); + const Module *M = F->getParent(); if (NamedMDNode *NMD = - MF->getFunction()->getParent()->getNamedMetadata("llvm.dbg.lv")) { + M->getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(F->getName())))) { for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { DIVariable DV(cast_or_null(NMD->getOperand(i))); if (!DV || !Processed.insert(DV)) @@ -2319,7 +2393,8 @@ void DwarfDebug::endScope(const MachineInstr *MI) { } /// getOrCreateDbgScope - Create DbgScope for the scope. -DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *InlinedAt) { +DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, + const MDNode *InlinedAt) { if (!InlinedAt) { DbgScope *WScope = DbgScopeMap.lookup(Scope); if (WScope) @@ -2335,13 +2410,20 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl if (!WScope->getParent()) { StringRef SPName = DISubprogram(Scope).getLinkageName(); - if (SPName == Asm->MF->getFunction()->getName()) + // We used to check only for a linkage name, but that fails + // since we began omitting the linkage name for private + // functions. The new way is to check for the name in metadata, + // but that's not supported in old .ll test cases. Ergo, we + // check both. + if (SPName == Asm->MF->getFunction()->getName() || + DISubprogram(Scope).getFunction() == Asm->MF->getFunction()) CurrentFnDbgScope = WScope; } return WScope; } + getOrCreateAbstractScope(Scope); DbgScope *WScope = DbgScopeMap.lookup(InlinedAt); if (WScope) return WScope; @@ -2355,7 +2437,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl Parent->addScope(WScope); ConcreteScopes[InlinedAt] = WScope; - getOrCreateAbstractScope(Scope); return WScope; } @@ -2365,8 +2446,6 @@ DbgScope *DwarfDebug::getOrCreateDbgScope(const MDNode *Scope, const MDNode *Inl static bool hasValidLocation(LLVMContext &Ctx, const MachineInstr *MInsn, const MDNode *&Scope, const MDNode *&InlinedAt) { - if (MInsn->isDebugValue()) - return false; DebugLoc DL = MInsn->getDebugLoc(); if (DL.isUnknown()) return false; @@ -2488,7 +2567,8 @@ bool DwarfDebug::extractScopeInformation() { // current instruction scope does not match scope of first instruction // in this range then create a new instruction range. DbgRange R(RangeBeginMI, PrevMI); - MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, PrevInlinedAt); + MI2ScopeMap[RangeBeginMI] = getOrCreateDbgScope(PrevScope, + PrevInlinedAt); MIRanges.push_back(R); } @@ -2565,7 +2645,6 @@ void DwarfDebug::identifyScopeMarkers() { RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "DbgRange does not have first instruction!"); assert(RI->second && "DbgRange does not have second instruction!"); - InsnsBeginScopeSet.insert(RI->first); InsnsEndScopeSet.insert(RI->second); } } @@ -2616,6 +2695,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { recordSourceLine(Line, Col, Scope); + /// ProcessedArgs - Collection of arguments already processed. + SmallPtrSet ProcessedArgs; + DebugLoc PrevLoc; for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; ++I) @@ -2624,14 +2706,19 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { const MachineInstr *MI = II; DebugLoc DL = MI->getDebugLoc(); if (MI->isDebugValue()) { - // DBG_VALUE needs a label if the variable is local variable or - // an argument whose location is changing. assert (MI->getNumOperands() > 1 && "Invalid machine instruction!"); DIVariable DV(MI->getOperand(MI->getNumOperands() - 1).getMetadata()); if (!DV.Verify()) continue; - if (DV.getTag() != dwarf::DW_TAG_arg_variable) + // If DBG_VALUE is for a local variable then it needs a label. + if (DV.getTag() != dwarf::DW_TAG_arg_variable + && isDbgValueInUndefinedReg(MI) == false) InsnNeedsLabel.insert(MI); - else if (!ProcessedArgs.insert(DV)) + // DBG_VALUE for inlined functions argument needs a label. + else if (!DISubprogram(getDISubprogram(DV.getContext())). + describes(MF->getFunction())) + InsnNeedsLabel.insert(MI); + // DBG_VALUE indicating argument location change needs a label. + else if (isDbgValueInUndefinedReg(MI) == false && !ProcessedArgs.insert(DV)) InsnNeedsLabel.insert(MI); } else { // If location is unknown then instruction needs a location only if @@ -2664,7 +2751,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); - collectVariableInfo(MF); + SmallPtrSet ProcessedVars; + collectVariableInfo(MF, ProcessedVars); // Get function line info. if (!Lines.empty()) { @@ -2679,9 +2767,31 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Construct abstract scopes. for (SmallVector::iterator AI = AbstractScopesList.begin(), - AE = AbstractScopesList.end(); AI != AE; ++AI) - constructScopeDIE(*AI); - + AE = AbstractScopesList.end(); AI != AE; ++AI) { + DISubprogram SP((*AI)->getScopeNode()); + if (SP.Verify()) { + // Collect info for variables that were optimized out. + StringRef FName = SP.getLinkageName(); + if (FName.empty()) + FName = SP.getName(); + const Module *M = MF->getFunction()->getParent(); + if (NamedMDNode *NMD = + M->getNamedMetadata(Twine("llvm.dbg.lv.", + getRealLinkageName(FName)))) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + DIVariable DV(cast_or_null(NMD->getOperand(i))); + if (!DV || !ProcessedVars.insert(DV)) + continue; + DbgScope *Scope = AbstractScopes.lookup(DV.getContext()); + if (Scope) + Scope->addVariable(new DbgVariable(DV)); + } + } + } + if (ProcessedSPNodes.count((*AI)->getScopeNode()) == 0) + constructScopeDIE(*AI); + } + DIE *CurFnDIE = constructScopeDIE(CurrentFnDbgScope); if (!DisableFramePointerElim(*MF)) @@ -2696,13 +2806,11 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { // Clear debug info CurrentFnDbgScope = NULL; InsnNeedsLabel.clear(); - ProcessedArgs.clear(); DbgVariableToFrameIndexMap.clear(); VarToAbstractVarMap.clear(); DbgVariableToDbgInstMap.clear(); DbgVariableLabelsMap.clear(); DeleteContainerSeconds(DbgScopeMap); - InsnsBeginScopeSet.clear(); InsnsEndScopeSet.clear(); ConcreteScopes.clear(); DeleteContainerSeconds(AbstractScopes); @@ -2764,7 +2872,8 @@ DbgScope *DwarfDebug::findDbgScope(const MachineInstr *MInsn) { /// recordSourceLine - Register a source line with debug info. Returns the /// unique label that was emitted and which provides correspondence to /// the source line list. -MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S) { +MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, + const MDNode *S) { StringRef Dir; StringRef Fn; @@ -2790,6 +2899,16 @@ MCSymbol *DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode Src = GetOrCreateSourceID(Dir, Fn); } +#if 0 + if (!Lines.empty()) { + SrcLineInfo lastSrcLineInfo = Lines.back(); + // Emitting sequential line records with the same line number (but + // different addresses) seems to confuse GDB. Avoid this. + if (lastSrcLineInfo.getLine() == Line) + return NULL; + } +#endif + MCSymbol *Label = MMI->getContext().CreateTempSymbol(); Lines.push_back(SrcLineInfo(Line, Col, Src, Label)); @@ -2898,7 +3017,8 @@ void DwarfDebug::EmitSectionLabels() { if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) EmitSectionSym(Asm, MacroInfo); - EmitSectionSym(Asm, TLOF.getDwarfLineSection()); + DwarfDebugLineSectionSym = + EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); EmitSectionSym(Asm, TLOF.getDwarfLocSection()); EmitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); @@ -2961,6 +3081,11 @@ void DwarfDebug::emitDIE(DIE *Die) { 4); break; } + case dwarf::DW_AT_stmt_list: { + Asm->EmitLabelDifference(CurrentLineSectionSym, + DwarfDebugLineSectionSym, 4); + break; + } case dwarf::DW_AT_location: { if (UseDotDebugLocEntry.count(Die) != 0) { DIELabel *L = cast(Values[i]); @@ -3106,6 +3231,8 @@ void DwarfDebug::emitDebugLines() { Asm->getObjFileLowering().getDwarfLineSection()); // Construct the section header. + CurrentLineSectionSym = Asm->GetTempSymbol("section_line_begin"); + Asm->OutStreamer.EmitLabel(CurrentLineSectionSym); Asm->OutStreamer.AddComment("Length of Source Line Info"); Asm->EmitLabelDifference(Asm->GetTempSymbol("line_end"), Asm->GetTempSymbol("line_begin"), 4); @@ -3491,8 +3618,9 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getTargetData().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; - for (SmallVector::iterator I = DotDebugLocEntries.begin(), - E = DotDebugLocEntries.end(); I != E; ++I, ++index) { + for (SmallVector::iterator + I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); + I != E; ++I, ++index) { DotDebugLocEntry Entry = *I; if (Entry.isEmpty()) { Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 0d6116fc9861..5a281c851748 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -156,6 +156,9 @@ class DwarfDebug { /// not included DbgScopeMap. AbstractScopes owns its DbgScope*s. DenseMap AbstractScopes; + /// AbstractSPDies - Collection of abstract subprogram DIEs. + DenseMap AbstractSPDies; + /// AbstractScopesList - Tracks abstract scopes constructed while processing /// a function. This list is cleared during endFunction(). SmallVectorAbstractScopesList; @@ -210,7 +213,7 @@ class DwarfDebug { DenseMap ContainingTypeMap; typedef SmallVector ScopeVector; - SmallPtrSet InsnsBeginScopeSet; + SmallPtrSet InsnsEndScopeSet; /// InlineInfo - Keep track of inlined functions and their location. This @@ -219,6 +222,10 @@ class DwarfDebug { DenseMap > InlineInfo; SmallVector InlinedSPNodes; + // ProcessedSPNodes - This is a collection of subprogram MDNodes that + // are processed to create DIEs. + SmallPtrSet ProcessedSPNodes; + /// LabelsBeforeInsn - Maps instruction with label emitted before /// instruction. DenseMap LabelsBeforeInsn; @@ -231,9 +238,6 @@ class DwarfDebug { /// a debuggging information entity. SmallPtrSet InsnNeedsLabel; - /// ProcessedArgs - Collection of arguments already processed. - SmallPtrSet ProcessedArgs; - SmallVector DebugRangeSymbols; /// Previous instruction's location information. This is used to determine @@ -257,7 +261,10 @@ class DwarfDebug { MCSymbol *DwarfFrameSectionSym, *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; MCSymbol *DwarfDebugLocSectionSym; + MCSymbol *DwarfDebugLineSectionSym, *CurrentLineSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; + + DIEInteger *DIEIntegerOne; private: /// getSourceDirectoryAndFileIds - Return the directory and file ids that @@ -593,7 +600,8 @@ private: bool extractScopeInformation(); /// collectVariableInfo - Populate DbgScope entries with variables' info. - void collectVariableInfo(const MachineFunction *); + void collectVariableInfo(const MachineFunction *, + SmallPtrSet &ProcessedVars); /// collectVariableInfoFromMMITable - Collect variable information from /// side table maintained by MMI. diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index f92127f22748..c8a63cf2393b 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -52,13 +52,13 @@ static void EmitCamlGlobal(const Module &M, AsmPrinter &AP, const char *Id) { SymName.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); SymName += "__"; SymName += Id; - + // Capitalize the first letter of the module name. SymName[Letter] = toupper(SymName[Letter]); - + SmallString<128> TmpStr; AP.Mang->getNameWithPrefix(TmpStr, SymName); - + MCSymbol *Sym = AP.OutContext.GetOrCreateSymbol(TmpStr); AP.OutStreamer.EmitSymbolAttribute(Sym, MCSA_Global); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 9dec22ec78a3..7f98df0d22ea 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -358,23 +358,10 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything -/// after it, replacing it with an unconditional branch to NewDest. This -/// returns true if OldInst's block is modified, false if NewDest is modified. +/// after it, replacing it with an unconditional branch to NewDest. void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest) { - MachineBasicBlock *OldBB = OldInst->getParent(); - - // Remove all the old successors of OldBB from the CFG. - while (!OldBB->succ_empty()) - OldBB->removeSuccessor(OldBB->succ_begin()); - - // Remove all the dead instructions from the end of OldBB. - OldBB->erase(OldInst, OldBB->end()); - - // If OldBB isn't immediately before OldBB, insert a branch to it. - if (++MachineFunction::iterator(OldBB) != MachineFunction::iterator(NewDest)) - TII->InsertBranch(*OldBB, NewDest, 0, SmallVector()); - OldBB->addSuccessor(NewDest); + TII->ReplaceTailWithBranchTo(OldInst, NewDest); ++NumTailMerge; } @@ -383,6 +370,9 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, MachineBasicBlock::iterator BBI1) { + if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) + return 0; + MachineFunction &MF = *CurMBB.getParent(); // Create the fall-through block. @@ -443,18 +433,20 @@ static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB, MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB)); MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector Cond; + DebugLoc dl; // FIXME: this is nowhere if (I != MF->end() && !TII->AnalyzeBranch(*CurMBB, TBB, FBB, Cond, true)) { MachineBasicBlock *NextBB = I; if (TBB == NextBB && !Cond.empty() && !FBB) { if (!TII->ReverseBranchCondition(Cond)) { TII->RemoveBranch(*CurMBB); - TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond); + TII->InsertBranch(*CurMBB, SuccBB, NULL, Cond, dl); return; } } } - TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector()); + TII->InsertBranch(*CurMBB, SuccBB, NULL, + SmallVector(), dl); } bool @@ -625,9 +617,10 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. -unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength) { - unsigned commonTailIndex = 0; +bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex) { + commonTailIndex = 0; unsigned TimeEstimate = ~0U; for (unsigned i = 0, e = SameTails.size(); i != e; ++i) { // Use PredBB if possible; that doesn't require a new branch. @@ -655,6 +648,11 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, << maxCommonTailLength); MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + if (!newMBB) { + DEBUG(dbgs() << "... failed!"); + return false; + } + SameTails[commonTailIndex].setBlock(newMBB); SameTails[commonTailIndex].setTailStartPos(newMBB->begin()); @@ -662,7 +660,7 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, if (PredBB == MBB) PredBB = newMBB; - return commonTailIndex; + return true; } // See if any of the blocks in MergePotentials (which all have a common single @@ -757,7 +755,11 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength); + if (!CreateCommonTailOnlyBlock(PredBB, + maxCommonTailLength, commonTailIndex)) { + RemoveBlocksWithHash(CurHash, SuccBB, PredBB); + continue; + } } MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock(); @@ -874,10 +876,11 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) { } // Remove the unconditional branch at the end, if any. if (TBB && (Cond.empty() || FBB)) { + DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PBB); if (!Cond.empty()) // reinsert conditional branch only, for now - TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond); + TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond, dl); } MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB), *P)); } @@ -976,6 +979,7 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1, bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { bool MadeChange = false; MachineFunction &MF = *MBB->getParent(); + DebugLoc dl; // FIXME: this is nowhere ReoptimizeBlock: MachineFunction::iterator FallThrough = MBB; @@ -1027,7 +1031,7 @@ ReoptimizeBlock: TII->RemoveBranch(PrevBB); PriorCond.clear(); if (PriorTBB != MBB) - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1066,7 +1070,7 @@ ReoptimizeBlock: // the condition is false, remove the uncond second branch. if (PriorFBB == MBB) { TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1079,7 +1083,7 @@ ReoptimizeBlock: SmallVector NewPriorCond(PriorCond); if (!TII->ReverseBranchCondition(NewPriorCond)) { TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond); + TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1116,7 +1120,7 @@ ReoptimizeBlock: << "To make fallthrough to: " << *PriorTBB << "\n"); TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond); + TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond, dl); // Move this block to the end of the function. MBB->moveAfter(--MF.end()); @@ -1145,7 +1149,7 @@ ReoptimizeBlock: SmallVector NewCond(CurCond); if (!TII->ReverseBranchCondition(NewCond)) { TII->RemoveBranch(*MBB); - TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond); + TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond, dl); MadeChange = true; ++NumBranchOpts; goto ReoptimizeBlock; @@ -1200,7 +1204,7 @@ ReoptimizeBlock: PriorFBB = MBB; } TII->RemoveBranch(PrevBB); - TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond); + TII->InsertBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, dl); } // Iterate through all the predecessors, revectoring each in-turn. @@ -1226,7 +1230,7 @@ ReoptimizeBlock: if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) { TII->RemoveBranch(*PMBB); NewCurCond.clear(); - TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond); + TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond, dl); MadeChange = true; ++NumBranchOpts; PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false); @@ -1246,7 +1250,7 @@ ReoptimizeBlock: } // Add the branch back if the block is more than just an uncond branch. - TII->InsertBranch(*MBB, CurTBB, 0, CurCond); + TII->InsertBranch(*MBB, CurTBB, 0, CurCond, dl); } } @@ -1286,7 +1290,7 @@ ReoptimizeBlock: if (CurFallsThru) { MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB)); CurCond.clear(); - TII->InsertBranch(*MBB, NextBB, 0, CurCond); + TII->InsertBranch(*MBB, NextBB, 0, CurCond, dl); } MBB->moveAfter(PredBB); MadeChange = true; diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h index b08739564060..15dfa7f6bee5 100644 --- a/lib/CodeGen/BranchFolding.h +++ b/lib/CodeGen/BranchFolding.h @@ -102,8 +102,9 @@ namespace llvm { MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); - unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, - unsigned maxCommonTailLength); + bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + unsigned maxCommonTailLength, + unsigned &commonTailIndex); bool OptimizeBranches(MachineFunction &MF); bool OptimizeBlock(MachineBasicBlock *MBB); diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index 3e38872a36d6..ffeff1ee27a6 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -1,19 +1,20 @@ add_llvm_library(LLVMCodeGen - Analysis.cpp AggressiveAntiDepBreaker.cpp + Analysis.cpp BranchFolding.cpp CalcSpillWeights.cpp + CallingConvLower.cpp CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp ELFCodeEmitter.cpp ELFWriter.cpp - ExactHazardRecognizer.cpp GCMetadata.cpp GCMetadataPrinter.cpp GCStrategy.cpp IfConversion.cpp + InlineSpiller.cpp IntrinsicLowering.cpp LLVMTargetMachine.cpp LatencyPriorityQueue.cpp @@ -45,6 +46,7 @@ add_llvm_library(LLVMCodeGen OptimizePHIs.cpp PHIElimination.cpp Passes.cpp + PostRAHazardRecognizer.cpp PostRASchedulerList.cpp PreAllocSplitting.cpp ProcessImplicitDefs.cpp @@ -52,7 +54,6 @@ add_llvm_library(LLVMCodeGen PseudoSourceValue.cpp RegAllocFast.cpp RegAllocLinearScan.cpp - RegAllocLocal.cpp RegAllocPBQP.cpp RegisterCoalescer.cpp RegisterScavenging.cpp diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp index a328d0e556e9..240a7b94fccf 100644 --- a/lib/CodeGen/CalcSpillWeights.cpp +++ b/lib/CodeGen/CalcSpillWeights.cpp @@ -116,7 +116,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) { SmallVector spillIs; if (lis->isReMaterializable(li, spillIs, isLoad)) { // If all of the definitions of the interval are re-materializable, - // it is a preferred candidate for spilling. If non of the defs are + // it is a preferred candidate for spilling. If none of the defs are // loads, then it's potentially very cheap to re-materialize. // FIXME: this gets much more complicated once we support non-trivial // re-materialization. diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp new file mode 100644 index 000000000000..62ad8171a9d4 --- /dev/null +++ b/lib/CodeGen/CallingConvLower.cpp @@ -0,0 +1,177 @@ +//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the CCState class, used for lowering and implementing +// calling conventions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/CallingConvLower.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +using namespace llvm; + +CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, + SmallVector &locs, LLVMContext &C) + : CallingConv(CC), IsVarArg(isVarArg), TM(tm), + TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { + // No stack is used. + StackOffset = 0; + + UsedRegs.resize((TRI.getNumRegs()+31)/32); +} + +// HandleByVal - Allocate a stack slot large enough to pass an argument by +// value. The size and alignment information of the argument is encoded in its +// parameter attribute. +void CCState::HandleByVal(unsigned ValNo, EVT ValVT, + EVT LocVT, CCValAssign::LocInfo LocInfo, + int MinSize, int MinAlign, + ISD::ArgFlagsTy ArgFlags) { + unsigned Align = ArgFlags.getByValAlign(); + unsigned Size = ArgFlags.getByValSize(); + if (MinSize > (int)Size) + Size = MinSize; + if (MinAlign > (int)Align) + Align = MinAlign; + unsigned Offset = AllocateStack(Size, Align); + + addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); +} + +/// MarkAllocated - Mark a register and all of its aliases as allocated. +void CCState::MarkAllocated(unsigned Reg) { + UsedRegs[Reg/32] |= 1 << (Reg&31); + + if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) + for (; (Reg = *RegAliases); ++RegAliases) + UsedRegs[Reg/32] |= 1 << (Reg&31); +} + +/// AnalyzeFormalArguments - Analyze an array of argument values, +/// incorporating info about the formals into this state. +void +CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, + CCAssignFn Fn) { + unsigned NumArgs = Ins.size(); + + for (unsigned i = 0; i != NumArgs; ++i) { + EVT ArgVT = Ins[i].VT; + ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Formal argument #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// CheckReturn - Analyze the return values of a function, returning true if +/// the return can be performed without sret-demotion, and false otherwise. +bool CCState::CheckReturn(const SmallVectorImpl &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) + return false; + } + return true; +} + +/// AnalyzeReturn - Analyze the returned values of a return, +/// incorporating info about the result values into this state. +void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, + CCAssignFn Fn) { + // Determine which register each value should be copied into. + for (unsigned i = 0, e = Outs.size(); i != e; ++i) { + EVT VT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Return operand #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallOperands - Analyze the outgoing arguments to a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallOperands(const SmallVectorImpl &Outs, + CCAssignFn Fn) { + unsigned NumOps = Outs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + EVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallOperands - Same as above except it takes vectors of types +/// and argument flags. +void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, + SmallVectorImpl &Flags, + CCAssignFn Fn) { + unsigned NumOps = ArgVTs.size(); + for (unsigned i = 0; i != NumOps; ++i) { + EVT ArgVT = ArgVTs[i]; + ISD::ArgFlagsTy ArgFlags = Flags[i]; + if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { +#ifndef NDEBUG + dbgs() << "Call operand #" << i << " has unhandled type " + << ArgVT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallResult - Analyze the return values of a call, +/// incorporating info about the passed values into this state. +void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, + CCAssignFn Fn) { + for (unsigned i = 0, e = Ins.size(); i != e; ++i) { + EVT VT = Ins[i].VT; + ISD::ArgFlagsTy Flags = Ins[i].Flags; + if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { +#ifndef NDEBUG + dbgs() << "Call result #" << i << " has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); + } + } +} + +/// AnalyzeCallResult - Same as above except it's specialized for calls which +/// produce a single value. +void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { + if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { +#ifndef NDEBUG + dbgs() << "Call result has unhandled type " + << VT.getEVTString(); +#endif + llvm_unreachable(0); + } +} diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp index 3ff2a046d233..e0e315c6c677 100644 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ b/lib/CodeGen/CodePlacementOpt.cpp @@ -178,6 +178,8 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, continue; // Move the block. + DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber() + << " to top of loop.\n"); Changed = true; // Move it and all the blocks that can reach it via fallthrough edges @@ -297,6 +299,8 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, continue; // Move the block. + DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber() + << " to be contiguous with loop.\n"); Changed = true; // Process this block and all loop blocks contiguous with it, to keep diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index fd957b12fc44..e3746a985644 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -29,6 +30,7 @@ CriticalAntiDepBreaker:: CriticalAntiDepBreaker(MachineFunction& MFi) : AntiDepBreaker(), MF(MFi), MRI(MF.getRegInfo()), + TII(MF.getTarget().getInstrInfo()), TRI(MF.getTarget().getRegisterInfo()), AllocatableSet(TRI->getAllocatableSet(MF)) { @@ -71,25 +73,27 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { DefIndices[AliasReg] = ~0u; } } - } else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + } + + // In a non-return block, examine the live-in regs of all successors. + // Note a return block can have successors if the return instruction is + // predicated. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - // Repeat, for all aliases. - for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { - unsigned AliasReg = *Alias; - Classes[AliasReg] = reinterpret_cast(-1); - KillIndices[AliasReg] = BB->size(); - DefIndices[AliasReg] = ~0u; - } + unsigned Reg = *I; + Classes[Reg] = reinterpret_cast(-1); + KillIndices[Reg] = BB->size(); + DefIndices[Reg] = ~0u; + // Repeat, for all aliases. + for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { + unsigned AliasReg = *Alias; + Classes[AliasReg] = reinterpret_cast(-1); + KillIndices[AliasReg] = BB->size(); + DefIndices[AliasReg] = ~0u; } - } + } // Mark live-out callee-saved registers. In a return block this is // all callee-saved registers. In non-return this is any @@ -164,6 +168,26 @@ static const SDep *CriticalPathStep(const SUnit *SU) { } void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { + // It's not safe to change register allocation for source operands of + // that have special allocation requirements. Also assume all registers + // used in a call must not be changed (ABI). + // FIXME: The issue with predicated instruction is more complex. We are being + // conservatively here because the kill markers cannot be trusted after + // if-conversion: + // %R6 = LDR %SP, %reg0, 92, pred:14, pred:%reg0; mem:LD4[FixedStack14] + // ... + // STR %R0, %R6, %reg0, 0, pred:0, pred:%CPSR; mem:ST4[%395] + // %R6 = LDR %SP, %reg0, 100, pred:0, pred:%CPSR; mem:LD4[FixedStack12] + // STR %R0, %R6, %reg0, 0, pred:14, pred:%reg0; mem:ST4[%396](align=8) + // + // The first R6 kill is not really a kill since it's killed by a predicated + // instruction which may not be executed. The second R6 def may or may not + // re-define R6 so it's not safe to change it since the last R6 use cannot be + // changed. + bool Special = MI->getDesc().isCall() || + MI->getDesc().hasExtraSrcRegAllocReq() || + TII->isPredicated(MI); + // Scan the register operands for this instruction and update // Classes and RegRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -199,9 +223,7 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (Classes[Reg] != reinterpret_cast(-1)) RegRefs.insert(std::make_pair(Reg, &MO)); - // It's not safe to change register allocation for source operands of - // that have special allocation requirements. - if (MO.isUse() && MI->getDesc().hasExtraSrcRegAllocReq()) { + if (MO.isUse() && Special) { if (KeepRegs.insert(Reg)) { for (const unsigned *Subreg = TRI->getSubRegisters(Reg); *Subreg; ++Subreg) @@ -216,38 +238,43 @@ void CriticalAntiDepBreaker::ScanInstruction(MachineInstr *MI, // Update liveness. // Proceding upwards, registers that are defed but not used in this // instruction are now dead. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) continue; - if (!MO.isDef()) continue; - // Ignore two-addr defs. - if (MI->isRegTiedToUseOperand(i)) continue; - - DefIndices[Reg] = Count; - KillIndices[Reg] = ~0u; - assert(((KillIndices[Reg] == ~0u) != - (DefIndices[Reg] == ~0u)) && - "Kill and Def maps aren't consistent for Reg!"); - KeepRegs.erase(Reg); - Classes[Reg] = 0; - RegRefs.erase(Reg); - // Repeat, for all subregs. - for (const unsigned *Subreg = TRI->getSubRegisters(Reg); - *Subreg; ++Subreg) { - unsigned SubregReg = *Subreg; - DefIndices[SubregReg] = Count; - KillIndices[SubregReg] = ~0u; - KeepRegs.erase(SubregReg); - Classes[SubregReg] = 0; - RegRefs.erase(SubregReg); - } - // Conservatively mark super-registers as unusable. - for (const unsigned *Super = TRI->getSuperRegisters(Reg); - *Super; ++Super) { - unsigned SuperReg = *Super; - Classes[SuperReg] = reinterpret_cast(-1); + + if (!TII->isPredicated(MI)) { + // Predicated defs are modeled as read + write, i.e. similar to two + // address updates. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + if (!MO.isDef()) continue; + // Ignore two-addr defs. + if (MI->isRegTiedToUseOperand(i)) continue; + + DefIndices[Reg] = Count; + KillIndices[Reg] = ~0u; + assert(((KillIndices[Reg] == ~0u) != + (DefIndices[Reg] == ~0u)) && + "Kill and Def maps aren't consistent for Reg!"); + KeepRegs.erase(Reg); + Classes[Reg] = 0; + RegRefs.erase(Reg); + // Repeat, for all subregs. + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) { + unsigned SubregReg = *Subreg; + DefIndices[SubregReg] = Count; + KillIndices[SubregReg] = ~0u; + KeepRegs.erase(SubregReg); + Classes[SubregReg] = 0; + RegRefs.erase(SubregReg); + } + // Conservatively mark super-registers as unusable. + for (const unsigned *Super = TRI->getSuperRegisters(Reg); + *Super; ++Super) { + unsigned SuperReg = *Super; + Classes[SuperReg] = reinterpret_cast(-1); + } } } for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -334,10 +361,15 @@ BreakAntiDependencies(const std::vector& SUnits, // so just duck out immediately if the block is empty. if (SUnits.empty()) return 0; + // Keep a map of the MachineInstr*'s back to the SUnit representing them. + // This is used for updating debug information. + DenseMap MISUnitMap; + // Find the node at the bottom of the critical path. const SUnit *Max = 0; for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { const SUnit *SU = &SUnits[i]; + MISUnitMap[SU->getInstr()] = SU; if (!Max || SU->getDepth() + SU->Latency > Max->getDepth() + Max->Latency) Max = SU; } @@ -473,7 +505,11 @@ BreakAntiDependencies(const std::vector& SUnits, PrescanInstruction(MI); - if (MI->getDesc().hasExtraDefRegAllocReq()) + // If MI's defs have a special allocation requirement, don't allow + // any def registers to be changed. Also assume all registers + // defined in a call must not be changed (ABI). + if (MI->getDesc().isCall() || MI->getDesc().hasExtraDefRegAllocReq() || + TII->isPredicated(MI)) // If this instruction's defs have special allocation requirement, don't // break this anti-dependency. AntiDepReg = 0; @@ -485,7 +521,7 @@ BreakAntiDependencies(const std::vector& SUnits, if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); if (Reg == 0) continue; - if (MO.isUse() && AntiDepReg == Reg) { + if (MO.isUse() && TRI->regsOverlap(AntiDepReg, Reg)) { AntiDepReg = 0; break; } @@ -519,8 +555,22 @@ BreakAntiDependencies(const std::vector& SUnits, std::multimap::iterator> Range = RegRefs.equal_range(AntiDepReg); for (std::multimap::iterator - Q = Range.first, QE = Range.second; Q != QE; ++Q) + Q = Range.first, QE = Range.second; Q != QE; ++Q) { Q->second->setReg(NewReg); + // If the SU for the instruction being updated has debug information + // related to the anti-dependency register, make sure to update that + // as well. + const SUnit *SU = MISUnitMap[Q->second->getParent()]; + if (!SU) continue; + for (unsigned i = 0, e = SU->DbgInstrList.size() ; i < e ; ++i) { + MachineInstr *DI = SU->DbgInstrList[i]; + assert (DI->getNumOperands()==3 && DI->getOperand(0).isReg() && + DI->getOperand(0).getReg() + && "Non register dbg_value attached to SUnit!"); + if (DI->getOperand(0).getReg() == AntiDepReg) + DI->getOperand(0).setReg(NewReg); + } + } // We just went back in time and modified history; the // liveness information for the anti-depenence reg is now diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index cc42dd2b8e32..540630083bcc 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -22,15 +22,18 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include namespace llvm { +class TargetInstrInfo; +class TargetRegisterInfo; + class CriticalAntiDepBreaker : public AntiDepBreaker { MachineFunction& MF; MachineRegisterInfo &MRI; + const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; /// AllocatableSet - The set of allocatable registers. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index f6739f434044..01b31b420931 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -22,6 +22,7 @@ #include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Support/CallSite.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/PromoteMemToReg.h" @@ -87,10 +88,13 @@ namespace { /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still /// use the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. - bool CleanupSelectors(); + bool CleanupSelectors(SmallPtrSet &Sels); + + bool HasCatchAllInSelector(IntrinsicInst *); /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. - void FindAllCleanupSelectors(SmallPtrSet &Sels); + void FindAllCleanupSelectors(SmallPtrSet &Sels, + SmallPtrSet &CatchAllSels); /// FindAllURoRInvokes - Find all URoR invokes in the function. void FindAllURoRInvokes(SmallPtrSet &URoRInvokes); @@ -150,7 +154,7 @@ namespace { Changed = true; } - return false; + return Changed; } public: @@ -186,25 +190,32 @@ FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm, bool fast) { return new DwarfEHPrepare(tm, fast); } +/// HasCatchAllInSelector - Return true if the intrinsic instruction has a +/// catch-all. +bool DwarfEHPrepare::HasCatchAllInSelector(IntrinsicInst *II) { + if (!EHCatchAllValue) return false; + + unsigned ArgIdx = II->getNumArgOperands() - 1; + GlobalVariable *GV = dyn_cast(II->getArgOperand(ArgIdx)); + return GV == EHCatchAllValue; +} + /// FindAllCleanupSelectors - Find all eh.selector calls that are clean-ups. void DwarfEHPrepare:: -FindAllCleanupSelectors(SmallPtrSet &Sels) { +FindAllCleanupSelectors(SmallPtrSet &Sels, + SmallPtrSet &CatchAllSels) { for (Value::use_iterator I = SelectorIntrinsic->use_begin(), E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *SI = cast(I); - if (!SI || SI->getParent()->getParent() != F) continue; - - unsigned NumOps = SI->getNumOperands(); - if (NumOps > 4) continue; - bool IsCleanUp = (NumOps == 3); + IntrinsicInst *II = cast(I); - if (!IsCleanUp) - if (ConstantInt *CI = dyn_cast(SI->getOperand(3))) - IsCleanUp = (CI->getZExtValue() == 0); + if (II->getParent()->getParent() != F) + continue; - if (IsCleanUp) - Sels.insert(SI); + if (!HasCatchAllInSelector(II)) + Sels.insert(II); + else + CatchAllSels.insert(II); } } @@ -222,7 +233,7 @@ FindAllURoRInvokes(SmallPtrSet &URoRInvokes) { /// CleanupSelectors - Any remaining eh.selector intrinsic calls which still use /// the ".llvm.eh.catch.all.value" call need to convert to using its /// initializer instead. -bool DwarfEHPrepare::CleanupSelectors() { +bool DwarfEHPrepare::CleanupSelectors(SmallPtrSet &Sels) { if (!EHCatchAllValue) return false; if (!SelectorIntrinsic) { @@ -232,17 +243,15 @@ bool DwarfEHPrepare::CleanupSelectors() { } bool Changed = false; - for (Value::use_iterator - I = SelectorIntrinsic->use_begin(), - E = SelectorIntrinsic->use_end(); I != E; ++I) { - IntrinsicInst *Sel = dyn_cast(I); - if (!Sel || Sel->getParent()->getParent() != F) continue; + for (SmallPtrSet::iterator + I = Sels.begin(), E = Sels.end(); I != E; ++I) { + IntrinsicInst *Sel = *I; // Index of the ".llvm.eh.catch.all.value" variable. - unsigned OpIdx = Sel->getNumOperands() - 1; - GlobalVariable *GV = dyn_cast(Sel->getOperand(OpIdx)); + unsigned OpIdx = Sel->getNumArgOperands() - 1; + GlobalVariable *GV = dyn_cast(Sel->getArgOperand(OpIdx)); if (GV != EHCatchAllValue) continue; - Sel->setOperand(OpIdx, EHCatchAllValue->getInitializer()); + Sel->setArgOperand(OpIdx, EHCatchAllValue->getInitializer()); Changed = true; } @@ -293,8 +302,6 @@ DwarfEHPrepare::FindSelectorAndURoR(Instruction *Inst, bool &URoRInvoke, /// function. This is a candidate to merge the selector associated with the URoR /// invoke with the one from the URoR's landing pad. bool DwarfEHPrepare::HandleURoRInvokes() { - if (!DT) return CleanupSelectors(); // We require DominatorTree information. - if (!EHCatchAllValue) { EHCatchAllValue = F->getParent()->getNamedGlobal(".llvm.eh.catch.all.value"); @@ -307,14 +314,20 @@ bool DwarfEHPrepare::HandleURoRInvokes() { if (!SelectorIntrinsic) return false; } + SmallPtrSet Sels; + SmallPtrSet CatchAllSels; + FindAllCleanupSelectors(Sels, CatchAllSels); + + if (!DT) + // We require DominatorTree information. + return CleanupSelectors(CatchAllSels); + if (!URoR) { URoR = F->getParent()->getFunction("_Unwind_Resume_or_Rethrow"); - if (!URoR) return CleanupSelectors(); + if (!URoR) return CleanupSelectors(CatchAllSels); } - SmallPtrSet Sels; SmallPtrSet URoRInvokes; - FindAllCleanupSelectors(Sels); FindAllURoRInvokes(URoRInvokes); SmallPtrSet SelsToConvert; @@ -340,7 +353,8 @@ bool DwarfEHPrepare::HandleURoRInvokes() { if (!ExceptionValueIntrinsic) { ExceptionValueIntrinsic = Intrinsic::getDeclaration(F->getParent(), Intrinsic::eh_exception); - if (!ExceptionValueIntrinsic) return CleanupSelectors(); + if (!ExceptionValueIntrinsic) + return CleanupSelectors(CatchAllSels); } for (Value::use_iterator @@ -360,21 +374,9 @@ bool DwarfEHPrepare::HandleURoRInvokes() { // an eh.selector intrinsic call. If the eh.selector is a 'clean-up', we // need to convert it to a 'catch-all'. for (SmallPtrSet::iterator - SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) { - IntrinsicInst *II = *SI; - unsigned NumOps = II->getNumOperands(); - - if (NumOps <= 4) { - bool IsCleanUp = (NumOps == 3); - - if (!IsCleanUp) - if (ConstantInt *CI = dyn_cast(II->getOperand(3))) - IsCleanUp = (CI->getZExtValue() == 0); - - if (IsCleanUp) - SelsToConvert.insert(II); - } - } + SI = SelCalls.begin(), SE = SelCalls.end(); SI != SE; ++SI) + if (!HasCatchAllInSelector(*SI)) + SelsToConvert.insert(*SI); } } } @@ -388,12 +390,22 @@ bool DwarfEHPrepare::HandleURoRInvokes() { SI = SelsToConvert.begin(), SE = SelsToConvert.end(); SI != SE; ++SI) { IntrinsicInst *II = *SI; - SmallVector Args; // Use the exception object pointer and the personality function // from the original selector. - Args.push_back(II->getOperand(1)); // Exception object pointer. - Args.push_back(II->getOperand(2)); // Personality function. + CallSite CS(II); + IntrinsicInst::op_iterator I = CS.arg_begin(); + IntrinsicInst::op_iterator E = CS.arg_end(); + IntrinsicInst::op_iterator B = prior(E); + + // Exclude last argument if it is an integer. + if (isa(B)) E = B; + + // Add exception object pointer (front). + // Add personality function (next). + // Add in any filter IDs (rest). + SmallVector Args(I, E); + Args.push_back(EHCatchAllValue->getInitializer()); // Catch-all indicator. CallInst *NewSelector = @@ -409,7 +421,7 @@ bool DwarfEHPrepare::HandleURoRInvokes() { } } - Changed |= CleanupSelectors(); + Changed |= CleanupSelectors(CatchAllSels); return Changed; } diff --git a/lib/CodeGen/ELFCodeEmitter.cpp b/lib/CodeGen/ELFCodeEmitter.cpp index 8416d3bda930..36b0e6514b3a 100644 --- a/lib/CodeGen/ELFCodeEmitter.cpp +++ b/lib/CodeGen/ELFCodeEmitter.cpp @@ -90,7 +90,7 @@ bool ELFCodeEmitter::finishFunction(MachineFunction &MF) { for (std::vector::iterator MRI = JTRelocations.begin(), MRE = JTRelocations.end(); MRI != MRE; ++MRI) { MachineRelocation &MR = *MRI; - unsigned MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); + uintptr_t MBBOffset = getMachineBasicBlockAddress(MR.getBasicBlock()); MR.setResultPointer((void*)MBBOffset); MR.setConstantVal(ES->SectionIdx); JTSection.addRelocation(MR); diff --git a/lib/CodeGen/ExactHazardRecognizer.cpp b/lib/CodeGen/ExactHazardRecognizer.cpp deleted file mode 100644 index af5f2892c2f0..000000000000 --- a/lib/CodeGen/ExactHazardRecognizer.cpp +++ /dev/null @@ -1,180 +0,0 @@ -//===----- ExactHazardRecognizer.cpp - hazard recognizer -------- ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements a hazard recognizer using the instructions itineraries -// defined for the current target. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "post-RA-sched" -#include "ExactHazardRecognizer.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetInstrItineraries.h" - -using namespace llvm; - -ExactHazardRecognizer:: -ExactHazardRecognizer(const InstrItineraryData &LItinData) : - ScheduleHazardRecognizer(), ItinData(LItinData) -{ - // Determine the maximum depth of any itinerary. This determines the - // depth of the scoreboard. We always make the scoreboard at least 1 - // cycle deep to avoid dealing with the boundary condition. - unsigned ScoreboardDepth = 1; - if (!ItinData.isEmpty()) { - for (unsigned idx = 0; ; ++idx) { - if (ItinData.isEndMarker(idx)) - break; - - const InstrStage *IS = ItinData.beginStage(idx); - const InstrStage *E = ItinData.endStage(idx); - unsigned ItinDepth = 0; - for (; IS != E; ++IS) - ItinDepth += IS->getCycles(); - - ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); - } - } - - ReservedScoreboard.reset(ScoreboardDepth); - RequiredScoreboard.reset(ScoreboardDepth); - - DEBUG(dbgs() << "Using exact hazard recognizer: ScoreboardDepth = " - << ScoreboardDepth << '\n'); -} - -void ExactHazardRecognizer::Reset() { - RequiredScoreboard.reset(); - ReservedScoreboard.reset(); -} - -void ExactHazardRecognizer::ScoreBoard::dump() const { - dbgs() << "Scoreboard:\n"; - - unsigned last = Depth - 1; - while ((last > 0) && ((*this)[last] == 0)) - last--; - - for (unsigned i = 0; i <= last; i++) { - unsigned FUs = (*this)[i]; - dbgs() << "\t"; - for (int j = 31; j >= 0; j--) - dbgs() << ((FUs & (1 << j)) ? '1' : '0'); - dbgs() << '\n'; - } -} - -ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU) { - if (ItinData.isEmpty()) - return NoHazard; - - unsigned cycle = 0; - - // Use the itinerary for the underlying instruction to check for - // free FU's in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), - *E = ItinData.endStage(idx); IS != E; ++IS) { - // We must find one of the stage's units free for every cycle the - // stage is occupied. FIXME it would be more accurate to find the - // same unit free in all the cycles. - for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < RequiredScoreboard.getDepth()) && - "Scoreboard depth exceeded!"); - - unsigned freeUnits = IS->getUnits(); - switch (IS->getReservationKind()) { - default: - assert(0 && "Invalid FU reservation"); - case InstrStage::Required: - // Required FUs conflict with both reserved and required ones - freeUnits &= ~ReservedScoreboard[cycle + i]; - // FALLTHROUGH - case InstrStage::Reserved: - // Reserved FUs can conflict only with required ones. - freeUnits &= ~RequiredScoreboard[cycle + i]; - break; - } - - if (!freeUnits) { - DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); - DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); - DEBUG(SU->getInstr()->dump()); - return Hazard; - } - } - - // Advance the cycle to the next stage. - cycle += IS->getNextCycles(); - } - - return NoHazard; -} - -void ExactHazardRecognizer::EmitInstruction(SUnit *SU) { - if (ItinData.isEmpty()) - return; - - unsigned cycle = 0; - - // Use the itinerary for the underlying instruction to reserve FU's - // in the scoreboard at the appropriate future cycles. - unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), - *E = ItinData.endStage(idx); IS != E; ++IS) { - // We must reserve one of the stage's units for every cycle the - // stage is occupied. FIXME it would be more accurate to reserve - // the same unit free in all the cycles. - for (unsigned int i = 0; i < IS->getCycles(); ++i) { - assert(((cycle + i) < RequiredScoreboard.getDepth()) && - "Scoreboard depth exceeded!"); - - unsigned freeUnits = IS->getUnits(); - switch (IS->getReservationKind()) { - default: - assert(0 && "Invalid FU reservation"); - case InstrStage::Required: - // Required FUs conflict with both reserved and required ones - freeUnits &= ~ReservedScoreboard[cycle + i]; - // FALLTHROUGH - case InstrStage::Reserved: - // Reserved FUs can conflict only with required ones. - freeUnits &= ~RequiredScoreboard[cycle + i]; - break; - } - - // reduce to a single unit - unsigned freeUnit = 0; - do { - freeUnit = freeUnits; - freeUnits = freeUnit & (freeUnit - 1); - } while (freeUnits); - - assert(freeUnit && "No function unit available!"); - if (IS->getReservationKind() == InstrStage::Required) - RequiredScoreboard[cycle + i] |= freeUnit; - else - ReservedScoreboard[cycle + i] |= freeUnit; - } - - // Advance the cycle to the next stage. - cycle += IS->getNextCycles(); - } - - DEBUG(ReservedScoreboard.dump()); - DEBUG(RequiredScoreboard.dump()); -} - -void ExactHazardRecognizer::AdvanceCycle() { - ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); - RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); -} diff --git a/lib/CodeGen/ExactHazardRecognizer.h b/lib/CodeGen/ExactHazardRecognizer.h deleted file mode 100644 index 91c81a970fa5..000000000000 --- a/lib/CodeGen/ExactHazardRecognizer.h +++ /dev/null @@ -1,86 +0,0 @@ -//=- llvm/CodeGen/ExactHazardRecognizer.h - Scheduling Support -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the ExactHazardRecognizer class, which -// implements hazard-avoidance heuristics for scheduling, based on the -// scheduling itineraries specified for the target. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H -#define LLVM_CODEGEN_EXACTHAZARDRECOGNIZER_H - -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetInstrItineraries.h" - -namespace llvm { - class ExactHazardRecognizer : public ScheduleHazardRecognizer { - // ScoreBoard to track function unit usage. ScoreBoard[0] is a - // mask of the FUs in use in the cycle currently being - // schedule. ScoreBoard[1] is a mask for the next cycle. The - // ScoreBoard is used as a circular buffer with the current cycle - // indicated by Head. - class ScoreBoard { - unsigned *Data; - - // The maximum number of cycles monitored by the Scoreboard. This - // value is determined based on the target itineraries to ensure - // that all hazards can be tracked. - size_t Depth; - // Indices into the Scoreboard that represent the current cycle. - size_t Head; - public: - ScoreBoard():Data(NULL), Depth(0), Head(0) { } - ~ScoreBoard() { - delete[] Data; - } - - size_t getDepth() const { return Depth; } - unsigned& operator[](size_t idx) const { - assert(Depth && "ScoreBoard was not initialized properly!"); - - return Data[(Head + idx) % Depth]; - } - - void reset(size_t d = 1) { - if (Data == NULL) { - Depth = d; - Data = new unsigned[Depth]; - } - - memset(Data, 0, Depth * sizeof(Data[0])); - Head = 0; - } - - void advance() { - Head = (Head + 1) % Depth; - } - - // Print the scoreboard. - void dump() const; - }; - - // Itinerary data for the target. - const InstrItineraryData &ItinData; - - ScoreBoard ReservedScoreboard; - ScoreBoard RequiredScoreboard; - - public: - ExactHazardRecognizer(const InstrItineraryData &ItinData); - - virtual HazardType getHazardType(SUnit *SU); - virtual void Reset(); - virtual void EmitInstruction(SUnit *SU); - virtual void AdvanceCycle(); - }; -} - -#endif diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index 790cb2164897..71506cc6abb9 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -271,7 +271,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { case Intrinsic::gcwrite: if (LowerWr) { // Replace a write barrier with a simple store. - Value *St = new StoreInst(CI->getOperand(1), CI->getOperand(3), CI); + Value *St = new StoreInst(CI->getArgOperand(0), CI->getArgOperand(2), CI); CI->replaceAllUsesWith(St); CI->eraseFromParent(); } @@ -279,7 +279,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { case Intrinsic::gcread: if (LowerRd) { // Replace a read barrier with a simple load. - Value *Ld = new LoadInst(CI->getOperand(2), "", CI); + Value *Ld = new LoadInst(CI->getArgOperand(1), "", CI); Ld->takeName(CI); CI->replaceAllUsesWith(Ld); CI->eraseFromParent(); @@ -290,7 +290,7 @@ bool LowerIntrinsics::PerformDefaultLowering(Function &F, GCStrategy &S) { // Initialize the GC root, but do not delete the intrinsic. The // backend needs the intrinsic to flag the stack slot. Roots.push_back(cast( - CI->getOperand(1)->stripPointerCasts())); + CI->getArgOperand(0)->stripPointerCasts())); } break; default: diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index c61fd17e7911..6b445e0b8e0f 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -20,6 +20,7 @@ #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -33,20 +34,22 @@ using namespace llvm; static cl::opt IfCvtFnStart("ifcvt-fn-start", cl::init(-1), cl::Hidden); static cl::opt IfCvtFnStop("ifcvt-fn-stop", cl::init(-1), cl::Hidden); static cl::opt IfCvtLimit("ifcvt-limit", cl::init(-1), cl::Hidden); -static cl::opt DisableSimple("disable-ifcvt-simple", +static cl::opt DisableSimple("disable-ifcvt-simple", cl::init(false), cl::Hidden); -static cl::opt DisableSimpleF("disable-ifcvt-simple-false", +static cl::opt DisableSimpleF("disable-ifcvt-simple-false", cl::init(false), cl::Hidden); -static cl::opt DisableTriangle("disable-ifcvt-triangle", +static cl::opt DisableTriangle("disable-ifcvt-triangle", cl::init(false), cl::Hidden); -static cl::opt DisableTriangleR("disable-ifcvt-triangle-rev", +static cl::opt DisableTriangleR("disable-ifcvt-triangle-rev", cl::init(false), cl::Hidden); -static cl::opt DisableTriangleF("disable-ifcvt-triangle-false", +static cl::opt DisableTriangleF("disable-ifcvt-triangle-false", cl::init(false), cl::Hidden); -static cl::opt DisableTriangleFR("disable-ifcvt-triangle-false-rev", +static cl::opt DisableTriangleFR("disable-ifcvt-triangle-false-rev", cl::init(false), cl::Hidden); -static cl::opt DisableDiamond("disable-ifcvt-diamond", +static cl::opt DisableDiamond("disable-ifcvt-diamond", cl::init(false), cl::Hidden); +static cl::opt IfCvtBranchFold("ifcvt-branch-fold", + cl::init(true), cl::Hidden); STATISTIC(NumSimple, "Number of simple if-conversions performed"); STATISTIC(NumSimpleFalse, "Number of simple (F) if-conversions performed"); @@ -115,7 +118,7 @@ namespace { BB(0), TrueBB(0), FalseBB(0) {} }; - /// IfcvtToken - Record information about pending if-conversions to attemp: + /// IfcvtToken - Record information about pending if-conversions to attempt: /// BBI - Corresponding BBInfo. /// Kind - Type of block. See IfcvtKind. /// NeedSubsumption - True if the to-be-predicated BB has already been @@ -146,6 +149,7 @@ namespace { const TargetLowering *TLI; const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; bool MadeChange; int FnNum; public: @@ -167,8 +171,7 @@ namespace { std::vector &Tokens); bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl &Cond, bool isTriangle = false, bool RevBranch = false); - bool AnalyzeBlocks(MachineFunction &MF, - std::vector &Tokens); + void AnalyzeBlocks(MachineFunction &MF, std::vector &Tokens); void InvalidatePreds(MachineBasicBlock *BB); void RemoveExtraEdges(BBInfo &BBI); bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind); @@ -177,14 +180,22 @@ namespace { unsigned NumDups1, unsigned NumDups2); void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, - SmallVectorImpl &Cond); + SmallVectorImpl &Cond, + SmallSet &Redefs); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, + SmallSet &Redefs, bool IgnoreBr = false); - void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI); + void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); - bool MeetIfcvtSizeLimit(unsigned Size) const { - return Size > 0 && Size <= TLI->getIfCvtBlockSizeLimit(); + bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size) const { + return Size > 0 && TII->isProfitableToIfCvt(BB, Size); + } + + bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize, + MachineBasicBlock &FBB, unsigned FSize) const { + return TSize > 0 && FSize > 0 && + TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize); } // blockAlwaysFallThrough - Block ends without a terminator. @@ -227,8 +238,15 @@ FunctionPass *llvm::createIfConverterPass() { return new IfConverter(); } bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); + TRI = MF.getTarget().getRegisterInfo(); if (!TII) return false; + // Tail merge tend to expose more if-conversion opportunities. + BranchFolder BF(true); + bool BFChange = BF.OptimizeFunction(MF, TII, + MF.getTarget().getRegisterInfo(), + getAnalysisIfAvailable()); + DEBUG(dbgs() << "\nIfcvt: function (" << ++FnNum << ") \'" << MF.getFunction()->getName() << "\'"); @@ -253,7 +271,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { while (IfCvtLimit == -1 || (int)NumIfCvts < IfCvtLimit) { // Do an initial analysis for each basic block and find all the potential // candidates to perform if-conversion. - bool Change = AnalyzeBlocks(MF, Tokens); + bool Change = false; + AnalyzeBlocks(MF, Tokens); while (!Tokens.empty()) { IfcvtToken *Token = Tokens.back(); Tokens.pop_back(); @@ -281,7 +300,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { case ICSimpleFalse: { bool isFalse = Kind == ICSimpleFalse; if ((isFalse && DisableSimpleF) || (!isFalse && DisableSimple)) break; - DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? " false" :"") + DEBUG(dbgs() << "Ifcvt (Simple" << (Kind == ICSimpleFalse ? + " false" : "") << "): BB#" << BBI.BB->getNumber() << " (" << ((Kind == ICSimpleFalse) ? BBI.FalseBB->getNumber() @@ -289,8 +309,8 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { RetVal = IfConvertSimple(BBI, Kind); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { - if (isFalse) NumSimpleFalse++; - else NumSimple++; + if (isFalse) ++NumSimpleFalse; + else ++NumSimple; } break; } @@ -316,11 +336,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); if (RetVal) { if (isFalse) { - if (isRev) NumTriangleFRev++; - else NumTriangleFalse++; + if (isRev) ++NumTriangleFRev; + else ++NumTriangleFalse; } else { - if (isRev) NumTriangleRev++; - else NumTriangle++; + if (isRev) ++NumTriangleRev; + else ++NumTriangle; } } break; @@ -332,7 +352,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { << BBI.FalseBB->getNumber() << ") "); RetVal = IfConvertDiamond(BBI, Kind, NumDups, NumDups2); DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n"); - if (RetVal) NumDiamonds++; + if (RetVal) ++NumDiamonds; break; } } @@ -361,13 +381,14 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { Roots.clear(); BBAnalysis.clear(); - if (MadeChange) { + if (MadeChange && IfCvtBranchFold) { BranchFolder BF(false); BF.OptimizeFunction(MF, TII, MF.getTarget().getRegisterInfo(), getAnalysisIfAvailable()); } + MadeChange |= BFChange; return MadeChange; } @@ -387,9 +408,10 @@ static MachineBasicBlock *findFalseBlock(MachineBasicBlock *BB, /// ReverseBranchCondition - Reverse the condition of the end of the block /// branch. Swap block's 'true' and 'false' successors. bool IfConverter::ReverseBranchCondition(BBInfo &BBI) { + DebugLoc dl; // FIXME: this is nowhere if (!TII->ReverseBranchCondition(BBI.BrCond)) { TII->RemoveBranch(*BBI.BB); - TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond); + TII->InsertBranch(*BBI.BB, BBI.FalseBB, BBI.TrueBB, BBI.BrCond, dl); std::swap(BBI.TrueBB, BBI.FalseBB); return true; } @@ -420,7 +442,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { if (TrueBBI.BB->pred_size() > 1) { if (TrueBBI.CannotBeCopied || - TrueBBI.NonPredSize > TLI->getIfCvtDupBlockSizeLimit()) + !TII->isProfitableToDupForIfCvt(*TrueBBI.BB, TrueBBI.NonPredSize)) return false; Dups = TrueBBI.NonPredSize; } @@ -431,7 +453,7 @@ bool IfConverter::ValidSimple(BBInfo &TrueBBI, unsigned &Dups) const { /// ValidTriangle - Returns true if the 'true' and 'false' blocks (along /// with their common predecessor) forms a valid triangle shape for ifcvt. /// If 'FalseBranch' is true, it checks if 'true' block's false branch -/// branches to the false branch rather than the other way around. It also +/// branches to the 'false' block rather than the other way around. It also /// returns the number of instructions that the ifcvt would need to duplicate /// if performed in 'Dups'. bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, @@ -457,7 +479,7 @@ bool IfConverter::ValidTriangle(BBInfo &TrueBBI, BBInfo &FalseBBI, ++Size; } } - if (Size > TLI->getIfCvtDupBlockSizeLimit()) + if (!TII->isProfitableToDupForIfCvt(*TrueBBI.BB, Size)) return false; Dups = Size; } @@ -514,7 +536,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, MachineBasicBlock::iterator TI = TrueBBI.BB->begin(); MachineBasicBlock::iterator FI = FalseBBI.BB->begin(); - while (TI != TrueBBI.BB->end() && FI != FalseBBI.BB->end()) { + MachineBasicBlock::iterator TIE = TrueBBI.BB->end(); + MachineBasicBlock::iterator FIE = FalseBBI.BB->end(); + // Skip dbg_value instructions + while (TI != TIE && TI->isDebugValue()) + ++TI; + while (FI != FIE && FI->isDebugValue()) + ++FI; + while (TI != TIE && FI != FIE) { + // Skip dbg_value instructions. These do not count. + if (TI->isDebugValue()) { + while (TI != TIE && TI->isDebugValue()) + ++TI; + if (TI == TIE) + break; + } + if (FI->isDebugValue()) { + while (FI != FIE && FI->isDebugValue()) + ++FI; + if (FI == FIE) + break; + } if (!TI->isIdenticalTo(FI)) break; ++Dups1; @@ -524,7 +566,27 @@ bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI, TI = firstNonBranchInst(TrueBBI.BB, TII); FI = firstNonBranchInst(FalseBBI.BB, TII); - while (TI != TrueBBI.BB->begin() && FI != FalseBBI.BB->begin()) { + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin(); + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin(); + // Skip dbg_value instructions at end of the bb's. + while (TI != TIB && TI->isDebugValue()) + --TI; + while (FI != FIB && FI->isDebugValue()) + --FI; + while (TI != TIB && FI != FIB) { + // Skip dbg_value instructions. These do not count. + if (TI->isDebugValue()) { + while (TI != TIB && TI->isDebugValue()) + --TI; + if (TI == TIB) + break; + } + if (FI->isDebugValue()) { + while (FI != FIB && FI->isDebugValue()) + --FI; + if (FI == FIB) + break; + } if (!TI->isIdenticalTo(FI)) break; ++Dups2; @@ -556,7 +618,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { // No false branch. This BB must end with a conditional branch and a // fallthrough. if (!BBI.FalseBB) - BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); + BBI.FalseBB = findFalseBlock(BBI.BB, BBI.TrueBB); if (!BBI.FalseBB) { // Malformed bcc? True and false blocks are the same? BBI.IsUnpredicable = true; @@ -569,6 +631,9 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { BBI.ClobbersPred = false; for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end(); I != E; ++I) { + if (I->isDebugValue()) + continue; + const TargetInstrDesc &TID = I->getDesc(); if (TID.isNotDuplicable()) BBI.CannotBeCopied = true; @@ -702,8 +767,8 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, bool FNeedSub = FalseBBI.Predicate.size() > 0; bool Enqueued = false; if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize - (Dups + Dups2)) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize - (Dups + Dups2)) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize - (Dups + Dups2), + *FalseBBI.BB, FalseBBI.NonPredSize - (Dups + Dups2)) && FeasibilityAnalysis(TrueBBI, BBI.BrCond) && FeasibilityAnalysis(FalseBBI, RevCond)) { // Diamond: @@ -720,7 +785,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, } if (ValidTriangle(TrueBBI, FalseBBI, false, Dups) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) { // Triangle: // EBB @@ -732,23 +797,23 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, Tokens.push_back(new IfcvtToken(BBI, ICTriangle, TNeedSub, Dups)); Enqueued = true; } - + if (ValidTriangle(TrueBBI, FalseBBI, true, Dups) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups)); Enqueued = true; } if (ValidSimple(TrueBBI, Dups) && - MeetIfcvtSizeLimit(TrueBBI.NonPredSize) && + MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize) && FeasibilityAnalysis(TrueBBI, BBI.BrCond)) { // Simple (split, no rejoin): // EBB // | \_ // | | // | TBB---> exit - // | + // | // FBB Tokens.push_back(new IfcvtToken(BBI, ICSimple, TNeedSub, Dups)); Enqueued = true; @@ -757,21 +822,21 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, if (CanRevCond) { // Try the other path... if (ValidTriangle(FalseBBI, TrueBBI, false, Dups) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups)); Enqueued = true; } if (ValidTriangle(FalseBBI, TrueBBI, true, Dups) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond, true, true)) { Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups)); Enqueued = true; } if (ValidSimple(FalseBBI, Dups) && - MeetIfcvtSizeLimit(FalseBBI.NonPredSize) && + MeetIfcvtSizeLimit(*FalseBBI.BB, FalseBBI.NonPredSize) && FeasibilityAnalysis(FalseBBI, RevCond)) { Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups)); Enqueued = true; @@ -785,11 +850,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB, } /// AnalyzeBlocks - Analyze all blocks and find entries for all if-conversion -/// candidates. It returns true if any CFG restructuring is done to expose more -/// if-conversion opportunities. -bool IfConverter::AnalyzeBlocks(MachineFunction &MF, +/// candidates. +void IfConverter::AnalyzeBlocks(MachineFunction &MF, std::vector &Tokens) { - bool Change = false; std::set Visited; for (unsigned i = 0, e = Roots.size(); i != e; ++i) { for (idf_ext_iterator I=idf_ext_begin(Roots[i],Visited), @@ -801,20 +864,23 @@ bool IfConverter::AnalyzeBlocks(MachineFunction &MF, // Sort to favor more complex ifcvt scheme. std::stable_sort(Tokens.begin(), Tokens.end(), IfcvtTokenCmp); - - return Change; } /// canFallThroughTo - Returns true either if ToBB is the next block after BB or /// that all the intervening blocks are empty (given BB can fall through to its /// next block). static bool canFallThroughTo(MachineBasicBlock *BB, MachineBasicBlock *ToBB) { - MachineFunction::iterator I = BB; + MachineFunction::iterator PI = BB; + MachineFunction::iterator I = llvm::next(PI); MachineFunction::iterator TI = ToBB; MachineFunction::iterator E = BB->getParent()->end(); - while (++I != TI) - if (I == E || !I->empty()) + while (I != TI) { + // Check isSuccessor to avoid case where the next block is empty, but + // it's not a successor. + if (I == E || !I->empty() || !PI->isSuccessor(I)) return false; + PI = I++; + } return true; } @@ -836,8 +902,9 @@ void IfConverter::InvalidatePreds(MachineBasicBlock *BB) { /// static void InsertUncondBranch(MachineBasicBlock *BB, MachineBasicBlock *ToBB, const TargetInstrInfo *TII) { + DebugLoc dl; // FIXME: this is nowhere SmallVector NoCond; - TII->InsertBranch(*BB, ToBB, NULL, NoCond); + TII->InsertBranch(*BB, ToBB, NULL, NoCond, dl); } /// RemoveExtraEdges - Remove true / false edges if either / both are no longer @@ -849,6 +916,66 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } +/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are +/// modeled as read + write (sort like two-address instructions). These +/// routines track register liveness and add implicit uses to if-converted +/// instructions to conform to the model. +static void InitPredRedefs(MachineBasicBlock *BB, SmallSet &Redefs, + const TargetRegisterInfo *TRI) { + for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), + E = BB->livein_end(); I != E; ++I) { + unsigned Reg = *I; + Redefs.insert(Reg); + for (const unsigned *Subreg = TRI->getSubRegisters(Reg); + *Subreg; ++Subreg) + Redefs.insert(*Subreg); + } +} + +static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, + const TargetRegisterInfo *TRI, + bool AddImpUse = false) { + SmallVector Defs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (MO.isDef()) + Defs.push_back(Reg); + else if (MO.isKill()) { + Redefs.erase(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.erase(*SR); + } + } + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + unsigned Reg = Defs[i]; + if (Redefs.count(Reg)) { + if (AddImpUse) + // Treat predicated update as read + write. + MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, + true/*IsImp*/,false/*IsKill*/)); + } else { + Redefs.insert(Reg); + for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) + Redefs.insert(*SR); + } + } +} + +static void UpdatePredRedefs(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + SmallSet &Redefs, + const TargetRegisterInfo *TRI) { + while (I != E) { + UpdatePredRedefs(I, Redefs, TRI); + ++I; + } +} + /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. /// bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { @@ -873,13 +1000,19 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { if (TII->ReverseBranchCondition(Cond)) assert(false && "Unable to reverse branch condition!"); + // Initialize liveins to the first BB. These are potentiall redefined by + // predicated instructions. + SmallSet Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); } else { - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); // Merge converted block into entry block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -922,6 +1055,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()]; BBInfo *CvtBBI = &TrueBBI; BBInfo *NextBBI = &FalseBBI; + DebugLoc dl; // FIXME: this is nowhere SmallVector Cond(BBI.BrCond.begin(), BBI.BrCond.end()); if (Kind == ICTriangleFalse || Kind == ICTriangleFRev) @@ -957,21 +1091,26 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { } } + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet Redefs; + InitPredRedefs(CvtBBI->BB, Redefs, TRI); + InitPredRedefs(NextBBI->BB, Redefs, TRI); + bool HasEarlyExit = CvtBBI->FalseBB != NULL; - bool DupBB = CvtBBI->BB->pred_size() > 1; - if (DupBB) { + if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); // Now merge the entry of the triangle with the true block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); - MergeBlocks(BBI, *CvtBBI); + MergeBlocks(BBI, *CvtBBI, false); } // If 'true' block has a 'false' successor, add an exit branch to it. @@ -980,7 +1119,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { CvtBBI->BrCond.end()); if (TII->ReverseBranchCondition(RevCond)) assert(false && "Unable to reverse branch condition!"); - TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond); + TII->InsertBranch(*BBI.BB, CvtBBI->FalseBB, NULL, RevCond, dl); BBI.BB->addSuccessor(CvtBBI->FalseBB); } @@ -1009,7 +1148,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { RemoveExtraEdges(BBI); // Update block info. BB can be iteratively if-converted. - if (!IterIfcvt) + if (!IterIfcvt) BBI.IsDone = true; InvalidatePreds(BBI.BB); CvtBBI->IsDone = true; @@ -1044,9 +1183,9 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, return false; } - // Merge the 'true' and 'false' blocks by copying the instructions - // from the 'false' block to the 'true' block. That is, unless the true - // block would clobber the predicate, in that case, do the opposite. + // Put the predicated instructions from the 'true' block before the + // instructions from the 'false' block, unless the true block would clobber + // the predicate, in which case, do the opposite. BBInfo *BBI1 = &TrueBBI; BBInfo *BBI2 = &FalseBBI; SmallVector RevCond(BBI.BrCond.begin(), BBI.BrCond.end()); @@ -1071,39 +1210,72 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Remove the conditional branch from entry to the blocks. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + // Initialize liveins to the first BB. These are potentially redefined by + // predicated instructions. + SmallSet Redefs; + InitPredRedefs(BBI1->BB, Redefs, TRI); + // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); MachineBasicBlock::iterator DI2 = BBI2->BB->begin(); + MachineBasicBlock::iterator DIE1 = BBI1->BB->end(); + MachineBasicBlock::iterator DIE2 = BBI2->BB->end(); + // Skip dbg_value instructions + while (DI1 != DIE1 && DI1->isDebugValue()) + ++DI1; + while (DI2 != DIE2 && DI2->isDebugValue()) + ++DI2; BBI1->NonPredSize -= NumDups1; BBI2->NonPredSize -= NumDups1; + + // Skip past the dups on each side separately since there may be + // differing dbg_value entries. + for (unsigned i = 0; i < NumDups1; ++DI1) { + if (!DI1->isDebugValue()) + ++i; + } while (NumDups1 != 0) { - ++DI1; ++DI2; - --NumDups1; + if (!DI2->isDebugValue()) + --NumDups1; } + + UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); // Predicate the 'true' block after removing its branch. BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB); DI1 = BBI1->BB->end(); - for (unsigned i = 0; i != NumDups2; ++i) + for (unsigned i = 0; i != NumDups2; ) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI1 != BBI1->BB->begin()); --DI1; + // skip dbg_value instructions + if (!DI1->isDebugValue()) + ++i; + } BBI1->BB->erase(DI1, BBI1->BB->end()); - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs); // Predicate the 'false' block. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); while (NumDups2 != 0) { + // NumDups2 only counted non-dbg_value instructions, so this won't + // run off the head of the list. + assert (DI2 != BBI2->BB->begin()); --DI2; - --NumDups2; + // skip dbg_value instructions + if (!DI2->isDebugValue()) + --NumDups2; } - PredicateBlock(*BBI2, DI2, *Cond2); + PredicateBlock(*BBI2, DI2, *Cond2, Redefs); // Merge the true block into the entry of the diamond. - MergeBlocks(BBI, *BBI1); - MergeBlocks(BBI, *BBI2); + MergeBlocks(BBI, *BBI1, TailBB == 0); + MergeBlocks(BBI, *BBI2, TailBB == 0); // If the if-converted block falls through or unconditionally branches into // the tail block, and the tail block does not have other predecessors, then @@ -1111,16 +1283,32 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // tail, add a unconditional branch to it. if (TailBB) { BBInfo TailBBI = BBAnalysis[TailBB->getNumber()]; - if (TailBB->pred_size() == 1 && !TailBBI.HasFallThrough) { - BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); + bool CanMergeTail = !TailBBI.HasFallThrough; + // There may still be a fall-through edge from BBI1 or BBI2 to TailBB; + // check if there are any other predecessors besides those. + unsigned NumPreds = TailBB->pred_size(); + if (NumPreds > 1) + CanMergeTail = false; + else if (NumPreds == 1 && CanMergeTail) { + MachineBasicBlock::pred_iterator PI = TailBB->pred_begin(); + if (*PI != BBI1->BB && *PI != BBI2->BB) + CanMergeTail = false; + } + if (CanMergeTail) { MergeBlocks(BBI, TailBBI); TailBBI.IsDone = true; } else { + BBI.BB->addSuccessor(TailBB); InsertUncondBranch(BBI.BB, TailBB, TII); BBI.HasFallThrough = false; } } + // RemoveExtraEdges won't work if the block has an unanalyzable branch, + // which can happen here if TailBB is unanalyzable and is merged, so + // explicitly remove BBI1 and BBI2 as successors. + BBI.BB->removeSuccessor(BBI1->BB); + BBI.BB->removeSuccessor(BBI2->BB); RemoveExtraEdges(BBI); // Update block info. @@ -1135,9 +1323,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, /// specified end with the specified condition. void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, - SmallVectorImpl &Cond) { + SmallVectorImpl &Cond, + SmallSet &Redefs) { for (MachineBasicBlock::iterator I = BBI.BB->begin(); I != E; ++I) { - if (TII->isPredicated(I)) + if (I->isDebugValue() || TII->isPredicated(I)) continue; if (!TII->PredicateInstruction(I, Cond)) { #ifndef NDEBUG @@ -1145,6 +1334,10 @@ void IfConverter::PredicateBlock(BBInfo &BBI, #endif llvm_unreachable(0); } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(I, Redefs, TRI, true); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1152,48 +1345,55 @@ void IfConverter::PredicateBlock(BBInfo &BBI, BBI.IsAnalyzed = false; BBI.NonPredSize = 0; - NumIfConvBBs++; + ++NumIfConvBBs; } /// CopyAndPredicateBlock - Copy and predicate instructions from source BB to /// the destination block. Skip end of block branches if IgnoreBr is true. void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, + SmallSet &Redefs, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); for (MachineBasicBlock::iterator I = FromBBI.BB->begin(), E = FromBBI.BB->end(); I != E; ++I) { const TargetInstrDesc &TID = I->getDesc(); - bool isPredicated = TII->isPredicated(I); // Do not copy the end of the block branches. - if (IgnoreBr && !isPredicated && TID.isBranch()) + if (IgnoreBr && TID.isBranch()) break; MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - if (!isPredicated) + if (!TII->isPredicated(I) && !MI->isDebugValue()) { if (!TII->PredicateInstruction(MI, Cond)) { #ifndef NDEBUG dbgs() << "Unable to predicate " << *I << "!\n"; #endif llvm_unreachable(0); } + } + + // If the predicated instruction now redefines a register as the result of + // if-conversion, add an implicit kill. + UpdatePredRedefs(MI, Redefs, TRI, true); } - std::vector Succs(FromBBI.BB->succ_begin(), - FromBBI.BB->succ_end()); - MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); - MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; + if (!IgnoreBr) { + std::vector Succs(FromBBI.BB->succ_begin(), + FromBBI.BB->succ_end()); + MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); + MachineBasicBlock *FallThrough = FromBBI.HasFallThrough ? NBB : NULL; - for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - MachineBasicBlock *Succ = Succs[i]; - // Fallthrough edge can't be transferred. - if (Succ == FallThrough) - continue; - ToBBI.BB->addSuccessor(Succ); + for (unsigned i = 0, e = Succs.size(); i != e; ++i) { + MachineBasicBlock *Succ = Succs[i]; + // Fallthrough edge can't be transferred. + if (Succ == FallThrough) + continue; + ToBBI.BB->addSuccessor(Succ); + } } std::copy(FromBBI.Predicate.begin(), FromBBI.Predicate.end(), @@ -1203,25 +1403,18 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, ToBBI.ClobbersPred |= FromBBI.ClobbersPred; ToBBI.IsAnalyzed = false; - NumDupBBs++; + ++NumDupBBs; } /// MergeBlocks - Move all instructions from FromBB to the end of ToBB. -/// -void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) { +/// This will leave FromBB as an empty block, so remove all of its +/// successor edges except for the fall-through edge. If AddEdges is true, +/// i.e., when FromBBI's branch is being moved, add those successor edges to +/// ToBBI. +void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { ToBBI.BB->splice(ToBBI.BB->end(), FromBBI.BB, FromBBI.BB->begin(), FromBBI.BB->end()); - // Redirect all branches to FromBB to ToBB. - std::vector Preds(FromBBI.BB->pred_begin(), - FromBBI.BB->pred_end()); - for (unsigned i = 0, e = Preds.size(); i != e; ++i) { - MachineBasicBlock *Pred = Preds[i]; - if (Pred == ToBBI.BB) - continue; - Pred->ReplaceUsesOfBlockWith(FromBBI.BB, ToBBI.BB); - } - std::vector Succs(FromBBI.BB->succ_begin(), FromBBI.BB->succ_end()); MachineBasicBlock *NBB = getNextBlock(FromBBI.BB); @@ -1233,7 +1426,8 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI) { if (Succ == FallThrough) continue; FromBBI.BB->removeSuccessor(Succ); - ToBBI.BB->addSuccessor(Succ); + if (AddEdges) + ToBBI.BB->addSuccessor(Succ); } // Now FromBBI always falls through to the next block! diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp new file mode 100644 index 000000000000..12adcaa3a22e --- /dev/null +++ b/lib/CodeGen/InlineSpiller.cpp @@ -0,0 +1,408 @@ +//===-------- InlineSpiller.cpp - Insert spills and restores inline -------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// The inline spiller modifies the machine function directly instead of +// inserting spills and restores in VirtRegMap. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "spiller" +#include "Spiller.h" +#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { +class InlineSpiller : public Spiller { + MachineFunction &mf_; + LiveIntervals &lis_; + VirtRegMap &vrm_; + MachineFrameInfo &mfi_; + MachineRegisterInfo &mri_; + const TargetInstrInfo &tii_; + const TargetRegisterInfo &tri_; + const BitVector reserved_; + + // Variables that are valid during spill(), but used by multiple methods. + LiveInterval *li_; + std::vector *newIntervals_; + const TargetRegisterClass *rc_; + int stackSlot_; + const SmallVectorImpl *spillIs_; + + // Values of the current interval that can potentially remat. + SmallPtrSet reMattable_; + + // Values in reMattable_ that failed to remat at some point. + SmallPtrSet usedValues_; + + ~InlineSpiller() {} + +public: + InlineSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) + : mf_(*mf), lis_(*lis), vrm_(*vrm), + mfi_(*mf->getFrameInfo()), + mri_(mf->getRegInfo()), + tii_(*mf->getTarget().getInstrInfo()), + tri_(*mf->getTarget().getRegisterInfo()), + reserved_(tri_.getReservedRegs(mf_)) {} + + void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex *earliestIndex); + +private: + bool allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, + SlotIndex UseIdx); + bool reMaterializeFor(MachineBasicBlock::iterator MI); + void reMaterializeAll(); + + bool foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl &Ops); + void insertReload(LiveInterval &NewLI, MachineBasicBlock::iterator MI); + void insertSpill(LiveInterval &NewLI, MachineBasicBlock::iterator MI); +}; +} + +namespace llvm { +Spiller *createInlineSpiller(MachineFunction *mf, + LiveIntervals *lis, + const MachineLoopInfo *mli, + VirtRegMap *vrm) { + return new InlineSpiller(mf, lis, vrm); +} +} + +/// allUsesAvailableAt - Return true if all registers used by OrigMI at +/// OrigIdx are also available with the same value at UseIdx. +bool InlineSpiller::allUsesAvailableAt(const MachineInstr *OrigMI, + SlotIndex OrigIdx, + SlotIndex UseIdx) { + OrigIdx = OrigIdx.getUseIndex(); + UseIdx = UseIdx.getUseIndex(); + for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = OrigMI->getOperand(i); + if (!MO.isReg() || !MO.getReg() || MO.getReg() == li_->reg) + continue; + // Reserved registers are OK. + if (MO.isUndef() || !lis_.hasInterval(MO.getReg())) + continue; + // We don't want to move any defs. + if (MO.isDef()) + return false; + // We cannot depend on virtual registers in spillIs_. They will be spilled. + for (unsigned si = 0, se = spillIs_->size(); si != se; ++si) + if ((*spillIs_)[si]->reg == MO.getReg()) + return false; + + LiveInterval &LI = lis_.getInterval(MO.getReg()); + const VNInfo *OVNI = LI.getVNInfoAt(OrigIdx); + if (!OVNI) + continue; + if (OVNI != LI.getVNInfoAt(UseIdx)) + return false; + } + return true; +} + +/// reMaterializeFor - Attempt to rematerialize li_->reg before MI instead of +/// reloading it. +bool InlineSpiller::reMaterializeFor(MachineBasicBlock::iterator MI) { + SlotIndex UseIdx = lis_.getInstructionIndex(MI).getUseIndex(); + VNInfo *OrigVNI = li_->getVNInfoAt(UseIdx); + if (!OrigVNI) { + DEBUG(dbgs() << "\tadding flags: "); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) + MO.setIsUndef(); + } + DEBUG(dbgs() << UseIdx << '\t' << *MI); + return true; + } + if (!reMattable_.count(OrigVNI)) { + DEBUG(dbgs() << "\tusing non-remat valno " << OrigVNI->id << ": " + << UseIdx << '\t' << *MI); + return false; + } + MachineInstr *OrigMI = lis_.getInstructionFromIndex(OrigVNI->def); + if (!allUsesAvailableAt(OrigMI, OrigVNI->def, UseIdx)) { + usedValues_.insert(OrigVNI); + DEBUG(dbgs() << "\tcannot remat for " << UseIdx << '\t' << *MI); + return false; + } + + // If the instruction also writes li_->reg, it had better not require the same + // register for uses and defs. + bool Reads, Writes; + SmallVector Ops; + tie(Reads, Writes) = MI->readsWritesVirtualRegister(li_->reg, &Ops); + if (Writes) { + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + if (MO.isUse() ? MI->isRegTiedToDefOperand(Ops[i]) : MO.getSubReg()) { + usedValues_.insert(OrigVNI); + DEBUG(dbgs() << "\tcannot remat tied reg: " << UseIdx << '\t' << *MI); + return false; + } + } + } + + // Alocate a new register for the remat. + unsigned NewVReg = mri_.createVirtualRegister(rc_); + vrm_.grow(); + LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + NewLI.markNotSpillable(); + newIntervals_->push_back(&NewLI); + + // Finally we can rematerialize OrigMI before MI. + MachineBasicBlock &MBB = *MI->getParent(); + tii_.reMaterialize(MBB, MI, NewLI.reg, 0, OrigMI, tri_); + MachineBasicBlock::iterator RematMI = MI; + SlotIndex DefIdx = lis_.InsertMachineInstrInMaps(--RematMI).getDefIndex(); + DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *RematMI); + + // Replace operands + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + if (MO.isReg() && MO.isUse() && MO.getReg() == li_->reg) { + MO.setReg(NewVReg); + MO.setIsKill(); + } + } + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); + + VNInfo *DefVNI = NewLI.getNextValue(DefIdx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(DefIdx, UseIdx.getDefIndex(), DefVNI)); + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + return true; +} + +/// reMaterializeAll - Try to rematerialize as many uses of li_ as possible, +/// and trim the live ranges after. +void InlineSpiller::reMaterializeAll() { + // Do a quick scan of the interval values to find if any are remattable. + reMattable_.clear(); + usedValues_.clear(); + for (LiveInterval::const_vni_iterator I = li_->vni_begin(), + E = li_->vni_end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->isUnused() || !VNI->isDefAccurate()) + continue; + MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); + if (!DefMI || !tii_.isTriviallyReMaterializable(DefMI)) + continue; + reMattable_.insert(VNI); + } + + // Often, no defs are remattable. + if (reMattable_.empty()) + return; + + // Try to remat before all uses of li_->reg. + bool anyRemat = false; + for (MachineRegisterInfo::use_nodbg_iterator + RI = mri_.use_nodbg_begin(li_->reg); + MachineInstr *MI = RI.skipInstruction();) + anyRemat |= reMaterializeFor(MI); + + if (!anyRemat) + return; + + // Remove any values that were completely rematted. + bool anyRemoved = false; + for (SmallPtrSet::iterator I = reMattable_.begin(), + E = reMattable_.end(); I != E; ++I) { + VNInfo *VNI = *I; + if (VNI->hasPHIKill() || usedValues_.count(VNI)) + continue; + MachineInstr *DefMI = lis_.getInstructionFromIndex(VNI->def); + DEBUG(dbgs() << "\tremoving dead def: " << VNI->def << '\t' << *DefMI); + lis_.RemoveMachineInstrFromMaps(DefMI); + vrm_.RemoveMachineInstrFromMaps(DefMI); + DefMI->eraseFromParent(); + li_->removeValNo(VNI); + anyRemoved = true; + } + + if (!anyRemoved) + return; + + // Removing values may cause debug uses where li_ is not live. + for (MachineRegisterInfo::use_iterator RI = mri_.use_begin(li_->reg); + MachineInstr *MI = RI.skipInstruction();) { + if (!MI->isDebugValue()) + continue; + // Try to preserve the debug value if li_ is live immediately after it. + MachineBasicBlock::iterator NextMI = MI; + ++NextMI; + if (NextMI != MI->getParent()->end() && !lis_.isNotInMIMap(NextMI)) { + SlotIndex NearIdx = lis_.getInstructionIndex(NextMI); + if (li_->liveAt(NearIdx)) + continue; + } + DEBUG(dbgs() << "Removing debug info due to remat:" << "\t" << *MI); + MI->eraseFromParent(); + } +} + +/// foldMemoryOperand - Try folding stack slot references in Ops into MI. +/// Return true on success, and MI will be erased. +bool InlineSpiller::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl &Ops) { + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied + // operands. + SmallVector FoldOps; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + unsigned Idx = Ops[i]; + MachineOperand &MO = MI->getOperand(Idx); + if (MO.isImplicit()) + continue; + // FIXME: Teach targets to deal with subregs. + if (MO.getSubReg()) + return false; + // Tied use operands should not be passed to foldMemoryOperand. + if (!MI->isRegTiedToDefOperand(Idx)) + FoldOps.push_back(Idx); + } + + MachineInstr *FoldMI = tii_.foldMemoryOperand(MI, FoldOps, stackSlot_); + if (!FoldMI) + return false; + lis_.ReplaceMachineInstrInMaps(MI, FoldMI); + vrm_.addSpillSlotUse(stackSlot_, FoldMI); + MI->eraseFromParent(); + DEBUG(dbgs() << "\tfolded: " << *FoldMI); + return true; +} + +/// insertReload - Insert a reload of NewLI.reg before MI. +void InlineSpiller::insertReload(LiveInterval &NewLI, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); + tii_.loadRegFromStackSlot(MBB, MI, NewLI.reg, stackSlot_, rc_, &tri_); + --MI; // Point to load instruction. + SlotIndex LoadIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + vrm_.addSpillSlotUse(stackSlot_, MI); + DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); + VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); +} + +/// insertSpill - Insert a spill of NewLI.reg after MI. +void InlineSpiller::insertSpill(LiveInterval &NewLI, + MachineBasicBlock::iterator MI) { + MachineBasicBlock &MBB = *MI->getParent(); + SlotIndex Idx = lis_.getInstructionIndex(MI).getDefIndex(); + tii_.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, stackSlot_, rc_, &tri_); + --MI; // Point to store instruction. + SlotIndex StoreIdx = lis_.InsertMachineInstrInMaps(MI).getDefIndex(); + vrm_.addSpillSlotUse(stackSlot_, MI); + DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); + VNInfo *StoreVNI = NewLI.getNextValue(Idx, 0, true, + lis_.getVNInfoAllocator()); + NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); +} + +void InlineSpiller::spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex *earliestIndex) { + DEBUG(dbgs() << "Inline spilling " << *li << "\n"); + assert(li->isSpillable() && "Attempting to spill already spilled value."); + assert(!li->isStackSlot() && "Trying to spill a stack slot."); + + li_ = li; + newIntervals_ = &newIntervals; + rc_ = mri_.getRegClass(li->reg); + spillIs_ = &spillIs; + + reMaterializeAll(); + + // Remat may handle everything. + if (li_->empty()) + return; + + stackSlot_ = vrm_.assignVirt2StackSlot(li->reg); + + // Iterate over instructions using register. + for (MachineRegisterInfo::reg_iterator RI = mri_.reg_begin(li->reg); + MachineInstr *MI = RI.skipInstruction();) { + + // Debug values are not allowed to affect codegen. + if (MI->isDebugValue()) { + // Modify DBG_VALUE now that the value is in a spill slot. + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = MI->getOperand(2).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = tii_.emitFrameIndexDebugValue(mf_, stackSlot_, + Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + } else { + DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); + MI->eraseFromParent(); + } + continue; + } + + // Analyze instruction. + bool Reads, Writes; + SmallVector Ops; + tie(Reads, Writes) = MI->readsWritesVirtualRegister(li->reg, &Ops); + + // Attempt to fold memory ops. + if (foldMemoryOperand(MI, Ops)) + continue; + + // Allocate interval around instruction. + // FIXME: Infer regclass from instruction alone. + unsigned NewVReg = mri_.createVirtualRegister(rc_); + vrm_.grow(); + LiveInterval &NewLI = lis_.getOrCreateInterval(NewVReg); + NewLI.markNotSpillable(); + + if (Reads) + insertReload(NewLI, MI); + + // Rewrite instruction operands. + bool hasLiveDef = false; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(Ops[i]); + MO.setReg(NewVReg); + if (MO.isUse()) { + if (!MI->isRegTiedToDefOperand(Ops[i])) + MO.setIsKill(); + } else { + if (!MO.isDead()) + hasLiveDef = true; + } + } + + // FIXME: Use a second vreg if instruction has no tied ops. + if (Writes && hasLiveDef) + insertSpill(NewLI, MI); + + DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + newIntervals.push_back(&NewLI); + } +} diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 63bb5f21f8f1..03ae214ae7da 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -16,6 +16,7 @@ #include "llvm/Module.h" #include "llvm/Type.h" #include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/IRBuilder.h" #include "llvm/Support/raw_ostream.h" @@ -314,21 +315,22 @@ static Value *LowerCTLZ(LLVMContext &Context, Value *V, Instruction *IP) { static void ReplaceFPIntrinsicWithCall(CallInst *CI, const char *Fname, const char *Dname, const char *LDname) { - switch (CI->getOperand(1)->getType()->getTypeID()) { + CallSite CS(CI); + switch (CI->getArgOperand(0)->getType()->getTypeID()) { default: llvm_unreachable("Invalid type in intrinsic"); case Type::FloatTyID: - ReplaceCallWith(Fname, CI, CI->op_begin() + 1, CI->op_end(), + ReplaceCallWith(Fname, CI, CS.arg_begin(), CS.arg_end(), Type::getFloatTy(CI->getContext())); break; case Type::DoubleTyID: - ReplaceCallWith(Dname, CI, CI->op_begin() + 1, CI->op_end(), + ReplaceCallWith(Dname, CI, CS.arg_begin(), CS.arg_end(), Type::getDoubleTy(CI->getContext())); break; case Type::X86_FP80TyID: case Type::FP128TyID: case Type::PPC_FP128TyID: - ReplaceCallWith(LDname, CI, CI->op_begin() + 1, CI->op_end(), - CI->getOperand(1)->getType()); + ReplaceCallWith(LDname, CI, CS.arg_begin(), CS.arg_end(), + CI->getArgOperand(0)->getType()); break; } } @@ -340,6 +342,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { const Function *Callee = CI->getCalledFunction(); assert(Callee && "Cannot lower an indirect call!"); + CallSite CS(CI); switch (Callee->getIntrinsicID()) { case Intrinsic::not_intrinsic: report_fatal_error("Cannot lower a call to a non-intrinsic function '"+ @@ -353,7 +356,7 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { // by the lowerinvoke pass. In both cases, the right thing to do is to // convert the call to an explicit setjmp or longjmp call. case Intrinsic::setjmp: { - Value *V = ReplaceCallWith("setjmp", CI, CI->op_begin() + 1, CI->op_end(), + Value *V = ReplaceCallWith("setjmp", CI, CS.arg_begin(), CS.arg_end(), Type::getInt32Ty(Context)); if (!CI->getType()->isVoidTy()) CI->replaceAllUsesWith(V); @@ -365,32 +368,32 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; case Intrinsic::longjmp: { - ReplaceCallWith("longjmp", CI, CI->op_begin() + 1, CI->op_end(), + ReplaceCallWith("longjmp", CI, CS.arg_begin(), CS.arg_end(), Type::getVoidTy(Context)); break; } case Intrinsic::siglongjmp: { // Insert the call to abort - ReplaceCallWith("abort", CI, CI->op_end(), CI->op_end(), + ReplaceCallWith("abort", CI, CS.arg_end(), CS.arg_end(), Type::getVoidTy(Context)); break; } case Intrinsic::ctpop: - CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTPOP(Context, CI->getArgOperand(0), CI)); break; case Intrinsic::bswap: - CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerBSWAP(Context, CI->getArgOperand(0), CI)); break; case Intrinsic::ctlz: - CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getOperand(1), CI)); + CI->replaceAllUsesWith(LowerCTLZ(Context, CI->getArgOperand(0), CI)); break; case Intrinsic::cttz: { // cttz(x) -> ctpop(~X & (X-1)) - Value *Src = CI->getOperand(1); + Value *Src = CI->getArgOperand(0); Value *NotSrc = Builder.CreateNot(Src); NotSrc->setName(Src->getName() + ".not"); Value *SrcM1 = ConstantInt::get(Src->getType(), 1); @@ -451,37 +454,37 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { case Intrinsic::memcpy: { const IntegerType *IntPtr = TD.getIntPtrType(Context); - Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getOperand(1); - Ops[1] = CI->getOperand(2); + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); Ops[2] = Size; - ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getOperand(1)->getType()); + ReplaceCallWith("memcpy", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); break; } case Intrinsic::memmove: { const IntegerType *IntPtr = TD.getIntPtrType(Context); - Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getOperand(1); - Ops[1] = CI->getOperand(2); + Ops[0] = CI->getArgOperand(0); + Ops[1] = CI->getArgOperand(1); Ops[2] = Size; - ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getOperand(1)->getType()); + ReplaceCallWith("memmove", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); break; } case Intrinsic::memset: { const IntegerType *IntPtr = TD.getIntPtrType(Context); - Value *Size = Builder.CreateIntCast(CI->getOperand(3), IntPtr, + Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getOperand(1); + Ops[0] = CI->getArgOperand(0); // Extend the amount to i32. - Ops[1] = Builder.CreateIntCast(CI->getOperand(2), Type::getInt32Ty(Context), + Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context), /* isSigned */ false); Ops[2] = Size; - ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getOperand(1)->getType()); + ReplaceCallWith("memset", CI, Ops, Ops+3, CI->getArgOperand(0)->getType()); break; } case Intrinsic::sqrt: { diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index b584704bff3d..bf3137e49536 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -329,12 +329,15 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, if (OptLevel != CodeGenOpt::None) PM.add(createOptimizePHIsPass()); - // Delete dead machine instructions regardless of optimization level. - PM.add(createDeadMachineInstructionElimPass()); - printAndVerify(PM, "After codegen DCE pass", - /* allowDoubleDefs= */ true); - if (OptLevel != CodeGenOpt::None) { + // With optimization, dead code should already be eliminated. However + // there is one known exception: lowered code for arguments that are only + // used by tail calls, where the tail calls reuse the incoming stack + // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). + PM.add(createDeadMachineInstructionElimPass()); + printAndVerify(PM, "After codegen DCE pass", + /* allowDoubleDefs= */ true); + PM.add(createOptimizeExtsPass()); if (!DisableMachineLICM) PM.add(createMachineLICMPass()); @@ -358,7 +361,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, /* allowDoubleDefs= */ true); // Perform register allocation. - PM.add(createRegisterAllocator()); + PM.add(createRegisterAllocator(OptLevel)); printAndVerify(PM, "After Register Allocation"); // Perform stack slot coloring and post-ra machine LICM. diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp index 03b4eab93dca..b9527fafbee8 100644 --- a/lib/CodeGen/LatencyPriorityQueue.cpp +++ b/lib/CodeGen/LatencyPriorityQueue.cpp @@ -118,7 +118,7 @@ void LatencyPriorityQueue::AdjustPriorityOfUnscheduledPreds(SUnit *SU) { SUnit *LatencyPriorityQueue::pop() { if (empty()) return NULL; std::vector::iterator Best = Queue.begin(); - for (std::vector::iterator I = next(Queue.begin()), + for (std::vector::iterator I = llvm::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 025ad0538f2c..21a9b7d4db6f 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -68,6 +68,37 @@ bool LiveInterval::liveBeforeAndAt(SlotIndex I) const { return r->end == I; } +/// killedAt - Return true if a live range ends at index. Note that the kill +/// point is not contained in the half-open live range. It is usually the +/// getDefIndex() slot following its last use. +bool LiveInterval::killedAt(SlotIndex I) const { + Ranges::const_iterator r = std::lower_bound(ranges.begin(), ranges.end(), I); + + // Now r points to the first interval with start >= I, or ranges.end(). + if (r == ranges.begin()) + return false; + + --r; + // Now r points to the last interval with end <= I. + // r->end is the kill point. + return r->end == I; +} + +/// killedInRange - Return true if the interval has kills in [Start,End). +bool LiveInterval::killedInRange(SlotIndex Start, SlotIndex End) const { + Ranges::const_iterator r = + std::lower_bound(ranges.begin(), ranges.end(), End); + + // Now r points to the first interval with start >= End, or ranges.end(). + if (r == ranges.begin()) + return false; + + --r; + // Now r points to the last interval with end <= End. + // r->end is the kill point. + return r->end >= Start && r->end < End; +} + // overlaps - Return true if the intersection of the two live intervals is // not empty. // @@ -149,7 +180,6 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { assert(I != ranges.end() && "Not a valid interval!"); VNInfo *ValNo = I->valno; - SlotIndex OldEnd = I->end; // Search for the first interval that we can't merge with. Ranges::iterator MergeTo = next(I); @@ -163,9 +193,6 @@ void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { // Erase any dead ranges. ranges.erase(next(I), MergeTo); - // Update kill info. - ValNo->removeKills(OldEnd, I->end.getPrevSlot()); - // If the newly formed range now touches the range after it and if they have // the same value number, merge the two ranges into one range. Ranges::iterator Next = next(I); @@ -245,9 +272,6 @@ LiveInterval::addRangeFrom(LiveRange LR, iterator From) { // endpoint as well. if (End > it->end) extendIntervalEndTo(it, End); - else if (End < it->end) - // Overlapping intervals, there might have been a kill here. - it->valno->removeKill(End); return it; } } else { @@ -288,7 +312,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { - ValNo->removeKills(Start, End); if (RemoveDeadValNo) { // Check if val# is dead. bool isDead = true; @@ -296,7 +319,7 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, if (II != I && II->valno == ValNo) { isDead = false; break; - } + } if (isDead) { // Now that ValNo is dead, remove it. If it is the largest value // number, just nuke it (and any other deleted values neighboring it), @@ -320,7 +343,6 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, // Otherwise if the span we are removing is at the end of the LiveRange, // adjust the other way. if (I->end == End) { - ValNo->removeKills(Start, End); I->end = Start; return; } @@ -529,6 +551,7 @@ void LiveInterval::MergeValueInAsValue( SmallVector ReplacedValNos; iterator IP = begin(); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) { + assert(I->valno == RHS.getValNumInfo(I->valno->id) && "Bad VNInfo"); if (I->valno != RHSValNo) continue; SlotIndex Start = I->start, End = I->end; @@ -823,10 +846,12 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { else { OS << " = "; for (LiveInterval::Ranges::const_iterator I = ranges.begin(), - E = ranges.end(); I != E; ++I) - OS << *I; + E = ranges.end(); I != E; ++I) { + OS << *I; + assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); + } } - + // Print value number info. if (getNumValNums()) { OS << " "; @@ -843,21 +868,6 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const { OS << "?"; else OS << vni->def; - unsigned ee = vni->kills.size(); - if (ee || vni->hasPHIKill()) { - OS << "-("; - for (unsigned j = 0; j != ee; ++j) { - OS << vni->kills[j]; - if (j != ee-1) - OS << " "; - } - if (vni->hasPHIKill()) { - if (ee) - OS << " "; - OS << "phi"; - } - OS << ")"; - } } } } diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index a6d38adeab04..194d03d8dbfb 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -50,9 +50,6 @@ using namespace llvm; static cl::opt DisableReMat("disable-rematerialization", cl::init(false), cl::Hidden); -static cl::opt EnableFastSpilling("fast-spill", - cl::init(false), cl::Hidden); - STATISTIC(numIntervals , "Number of original intervals"); STATISTIC(numFolds , "Number of loads/stores folded into instructions"); STATISTIC(numSplits , "Number of intervals split"); @@ -90,8 +87,8 @@ void LiveIntervals::releaseMemory() { r2iMap_.clear(); - // Release VNInfo memroy regions after all VNInfo objects are dtor'd. - VNInfoAllocator.DestroyAll(); + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); while (!CloneMIs.empty()) { MachineInstr *MI = CloneMIs.back(); CloneMIs.pop_back(); @@ -195,6 +192,10 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) if (SrcReg == li.reg || DstReg == li.reg) continue; + if (MI.isCopy()) + if (MI.getOperand(0).getReg() == li.reg || + MI.getOperand(1).getReg() == li.reg) + continue; // Check for operands using reg for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { @@ -218,10 +219,7 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li, return false; } -/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except -/// it checks for sub-register reference and it can check use as well. -bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li, - unsigned Reg, bool CheckUse, +bool LiveIntervals::conflictsWithAliasRef(LiveInterval &li, unsigned Reg, SmallPtrSet &JoinedCopies) { for (LiveInterval::Ranges::const_iterator I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { @@ -239,12 +237,11 @@ bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li, MachineOperand& MO = MI->getOperand(i); if (!MO.isReg()) continue; - if (MO.isUse() && !CheckUse) - continue; unsigned PhysReg = MO.getReg(); - if (PhysReg == 0 || TargetRegisterInfo::isVirtualRegister(PhysReg)) + if (PhysReg == 0 || PhysReg == Reg || + TargetRegisterInfo::isVirtualRegister(PhysReg)) continue; - if (tri_->isSubRegister(Reg, PhysReg)) + if (tri_->regsOverlap(Reg, PhysReg)) return true; } } @@ -272,7 +269,7 @@ bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { if (MO.getReg() == Reg && MO.isDef()) { assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() && MI.getOperand(MOIdx).getSubReg() && - MO.getSubReg()); + (MO.getSubReg() || MO.isImplicit())); return true; } } @@ -328,9 +325,10 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg() || - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (mi->isCopyLike() || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) { CopyMI = mi; + } VNInfo *ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); @@ -356,7 +354,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, LiveRange LR(defIndex, killIdx, ValNo); interval.addRange(LR); DEBUG(dbgs() << " +" << LR << "\n"); - ValNo->addKill(killIdx); return; } } @@ -376,7 +373,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // valno in the killing blocks. assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); DEBUG(dbgs() << " phi-join"); - ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); } else { // Iterate over all of the blocks that the variable is completely @@ -407,7 +403,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, } LiveRange LR(Start, killIdx, ValNo); interval.addRange(LR); - ValNo->addKill(killIdx); DEBUG(dbgs() << " +" << LR); } @@ -434,11 +429,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // are actually two values in the live interval. Because of this we // need to take the LiveRegion that defines this register and split it // into two values. - // Two-address vregs should always only be redefined once. This means - // that at this point, there should be exactly one value number in it. - assert((PartReDef || interval.containsOneValue()) && - "Unexpected 2-addr liveint!"); - SlotIndex DefIndex = interval.getValNumInfo(0)->def.getDefIndex(); SlotIndex RedefIndex = MIIdx.getDefIndex(); if (MO.isEarlyClobber()) RedefIndex = MIIdx.getUseIndex(); @@ -446,8 +436,9 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, const LiveRange *OldLR = interval.getLiveRangeContaining(RedefIndex.getUseIndex()); VNInfo *OldValNo = OldLR->valno; + SlotIndex DefIndex = OldValNo->def.getDefIndex(); - // Delete the initial value, which should be short and continuous, + // Delete the previous value, which should be short and continuous, // because the 2-addr copy must be in the same MBB as the redef. interval.removeRange(DefIndex, RedefIndex); @@ -464,15 +455,14 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, // A re-def may be a copy. e.g. %reg1030:6 = VMOVD %reg1026, ... unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (PartReDef && - tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (PartReDef && (mi->isCopyLike() || + tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg))) OldValNo->setCopy(&*mi); // Add the new live interval which replaces the range for the input copy. LiveRange LR(DefIndex, RedefIndex, ValNo); DEBUG(dbgs() << " replace range with " << LR); interval.addRange(LR); - ValNo->addKill(RedefIndex); // If this redefinition is dead, we need to add a dummy unit live // range covering the def slot. @@ -496,7 +486,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, VNInfo *ValNo; MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (mi->isExtractSubreg() || mi->isInsertSubreg() || mi->isSubregToReg()|| + if (mi->isCopyLike() || tii_->isMoveInstr(*mi, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = mi; ValNo = interval.getNextValue(defIndex, CopyMI, true, VNInfoAllocator); @@ -504,7 +494,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, SlotIndex killIndex = getMBBEndIdx(mbb); LiveRange LR(defIndex, killIndex, ValNo); interval.addRange(LR); - ValNo->addKill(indexes_->getTerminatorGap(mbb)); ValNo->setHasPHIKill(true); DEBUG(dbgs() << " phi-join +" << LR); } else { @@ -600,7 +589,6 @@ exit: ValNo->setHasRedefByEC(true); LiveRange LR(start, end, ValNo); interval.addRange(LR); - LR.valno->addKill(end); DEBUG(dbgs() << " +" << LR << '\n'); } @@ -615,7 +603,7 @@ void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, else if (allocatableRegs_[MO.getReg()]) { MachineInstr *CopyMI = NULL; unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg() || + if (MI->isCopyLike() || tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg)) CopyMI = MI; handlePhysicalRegisterDef(MBB, MI, MIIdx, MO, @@ -701,7 +689,6 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB, LiveRange LR(start, end, vni); interval.addRange(LR); - LR.valno->addKill(end); DEBUG(dbgs() << " +" << LR << '\n'); } @@ -787,37 +774,6 @@ LiveInterval* LiveIntervals::dupInterval(LiveInterval *li) { return NewLI; } -/// getVNInfoSourceReg - Helper function that parses the specified VNInfo -/// copy field and returns the source register that defines it. -unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const { - if (!VNI->getCopy()) - return 0; - - if (VNI->getCopy()->isExtractSubreg()) { - // If it's extracting out of a physical register, return the sub-register. - unsigned Reg = VNI->getCopy()->getOperand(1).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm(); - unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg(); - if (SrcSubReg == DstSubReg) - // %reg1034:3 = EXTRACT_SUBREG %EDX, 3 - // reg1034 can still be coalesced to EDX. - return Reg; - assert(DstSubReg == 0); - Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm()); - } - return Reg; - } else if (VNI->getCopy()->isInsertSubreg() || - VNI->getCopy()->isSubregToReg()) - return VNI->getCopy()->getOperand(2).getReg(); - - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*VNI->getCopy(), SrcReg, DstReg, SrcSubReg, DstSubReg)) - return SrcReg; - llvm_unreachable("Unrecognized copy instruction!"); - return 0; -} - //===----------------------------------------------------------------------===// // Register allocator hooks. // @@ -991,22 +947,22 @@ bool LiveIntervals::tryFoldMemoryOperand(MachineInstr* &MI, if (DefMI && (MRInfo & VirtRegMap::isMod)) return false; - MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(*mf_, MI, FoldOps, Slot) - : tii_->foldMemoryOperand(*mf_, MI, FoldOps, DefMI); + MachineInstr *fmi = isSS ? tii_->foldMemoryOperand(MI, FoldOps, Slot) + : tii_->foldMemoryOperand(MI, FoldOps, DefMI); if (fmi) { // Remember this instruction uses the spill slot. if (isSS) vrm.addSpillSlotUse(Slot, fmi); // Attempt to fold the memory reference into the instruction. If // we can do this, we don't need to insert spill code. - MachineBasicBlock &MBB = *MI->getParent(); if (isSS && !mf_->getFrameInfo()->isImmutableObjectIndex(Slot)) vrm.virtFolded(Reg, MI, fmi, (VirtRegMap::ModRef)MRInfo); vrm.transferSpillPts(MI, fmi); vrm.transferRestorePts(MI, fmi); vrm.transferEmergencySpills(MI, fmi); ReplaceMachineInstrInMaps(MI, fmi); - MI = MBB.insert(MBB.erase(MI), fmi); + MI->eraseFromParent(); + MI = fmi; ++numFolds; return true; } @@ -1098,7 +1054,6 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, if (!mop.isReg()) continue; unsigned Reg = mop.getReg(); - unsigned RegI = Reg; if (Reg == 0 || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; if (Reg != li.reg) @@ -1140,26 +1095,8 @@ rewriteInstructionForSpills(const LiveInterval &li, const VNInfo *VNI, // // Keep track of whether we replace a use and/or def so that we can // create the spill interval with the appropriate range. - - HasUse = mop.isUse(); - HasDef = mop.isDef(); SmallVector Ops; - Ops.push_back(i); - for (unsigned j = i+1, e = MI->getNumOperands(); j != e; ++j) { - const MachineOperand &MOj = MI->getOperand(j); - if (!MOj.isReg()) - continue; - unsigned RegJ = MOj.getReg(); - if (RegJ == 0 || TargetRegisterInfo::isPhysicalRegister(RegJ)) - continue; - if (RegJ == RegI) { - Ops.push_back(j); - if (!MOj.isUndef()) { - HasUse |= MOj.isUse(); - HasDef |= MOj.isDef(); - } - } - } + tie(HasUse, HasDef) = MI->readsWritesVirtualRegister(Reg, &Ops); // Create a new virtual register for the spill interval. // Create the new register now so we can map the fold instruction @@ -1294,16 +1231,7 @@ bool LiveIntervals::anyKillInMBBAfterIdx(const LiveInterval &li, const VNInfo *VNI, MachineBasicBlock *MBB, SlotIndex Idx) const { - SlotIndex End = getMBBEndIdx(MBB); - for (unsigned j = 0, ee = VNI->kills.size(); j != ee; ++j) { - if (VNI->kills[j].isPHI()) - continue; - - SlotIndex KillIdx = VNI->kills[j]; - if (KillIdx > Idx && KillIdx <= End) - return true; - } - return false; + return li.killedInRange(Idx.getNextSlot(), getMBBEndIdx(MBB)); } /// RewriteInfo - Keep track of machine instrs that will be rewritten @@ -1312,10 +1240,7 @@ namespace { struct RewriteInfo { SlotIndex Index; MachineInstr *MI; - bool HasUse; - bool HasDef; - RewriteInfo(SlotIndex i, MachineInstr *mi, bool u, bool d) - : Index(i), MI(mi), HasUse(u), HasDef(d) {} + RewriteInfo(SlotIndex i, MachineInstr *mi) : Index(i), MI(mi) {} }; struct RewriteInfoCompare { @@ -1394,7 +1319,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // easily see a situation where both registers are reloaded before // the INSERT_SUBREG and both target registers that would overlap. continue; - RewriteMIs.push_back(RewriteInfo(index, MI, O.isUse(), O.isDef())); + RewriteMIs.push_back(RewriteInfo(index, MI)); } std::sort(RewriteMIs.begin(), RewriteMIs.end(), RewriteInfoCompare()); @@ -1404,18 +1329,11 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, RewriteInfo &rwi = RewriteMIs[i]; ++i; SlotIndex index = rwi.Index; - bool MIHasUse = rwi.HasUse; - bool MIHasDef = rwi.HasDef; MachineInstr *MI = rwi.MI; // If MI def and/or use the same register multiple times, then there // are multiple entries. - unsigned NumUses = MIHasUse; while (i != e && RewriteMIs[i].MI == MI) { assert(RewriteMIs[i].Index == index); - bool isUse = RewriteMIs[i].HasUse; - if (isUse) ++NumUses; - MIHasUse |= isUse; - MIHasDef |= RewriteMIs[i].HasDef; ++i; } MachineBasicBlock *MBB = MI->getParent(); @@ -1440,7 +1358,8 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit, // = use // It's better to start a new interval to avoid artifically // extend the new interval. - if (MIHasDef && !MIHasUse) { + if (MI->readsWritesVirtualRegister(li.reg) == + std::make_pair(false,true)) { MBBVRegsMap.erase(MBB->getNumber()); ThisVReg = 0; } @@ -1651,104 +1570,10 @@ LiveIntervals::normalizeSpillWeights(std::vector &NewLIs) { normalizeSpillWeight(*NewLIs[i]); } -std::vector LiveIntervals:: -addIntervalsForSpillsFast(const LiveInterval &li, - const MachineLoopInfo *loopInfo, - VirtRegMap &vrm) { - unsigned slot = vrm.assignVirt2StackSlot(li.reg); - - std::vector added; - - assert(li.isSpillable() && "attempt to spill already spilled interval!"); - - DEBUG({ - dbgs() << "\t\t\t\tadding intervals for spills for interval: "; - li.dump(); - dbgs() << '\n'; - }); - - const TargetRegisterClass* rc = mri_->getRegClass(li.reg); - - MachineRegisterInfo::reg_iterator RI = mri_->reg_begin(li.reg); - while (RI != mri_->reg_end()) { - MachineInstr* MI = &*RI; - - SmallVector Indices; - bool HasUse = false; - bool HasDef = false; - - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand& mop = MI->getOperand(i); - if (!mop.isReg() || mop.getReg() != li.reg) continue; - - HasUse |= MI->getOperand(i).isUse(); - HasDef |= MI->getOperand(i).isDef(); - - Indices.push_back(i); - } - - if (!tryFoldMemoryOperand(MI, vrm, NULL, getInstructionIndex(MI), - Indices, true, slot, li.reg)) { - unsigned NewVReg = mri_->createVirtualRegister(rc); - vrm.grow(); - vrm.assignVirt2StackSlot(NewVReg, slot); - - // create a new register for this spill - LiveInterval &nI = getOrCreateInterval(NewVReg); - nI.markNotSpillable(); - - // Rewrite register operands to use the new vreg. - for (SmallVectorImpl::iterator I = Indices.begin(), - E = Indices.end(); I != E; ++I) { - MI->getOperand(*I).setReg(NewVReg); - - if (MI->getOperand(*I).isUse()) - MI->getOperand(*I).setIsKill(true); - } - - // Fill in the new live interval. - SlotIndex index = getInstructionIndex(MI); - if (HasUse) { - LiveRange LR(index.getLoadIndex(), index.getUseIndex(), - nI.getNextValue(SlotIndex(), 0, false, - getVNInfoAllocator())); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - vrm.addRestorePoint(NewVReg, MI); - } - if (HasDef) { - LiveRange LR(index.getDefIndex(), index.getStoreIndex(), - nI.getNextValue(SlotIndex(), 0, false, - getVNInfoAllocator())); - DEBUG(dbgs() << " +" << LR); - nI.addRange(LR); - vrm.addSpillPoint(NewVReg, true, MI); - } - - added.push_back(&nI); - - DEBUG({ - dbgs() << "\t\t\t\tadded new interval: "; - nI.dump(); - dbgs() << '\n'; - }); - } - - - RI = mri_->reg_begin(li.reg); - } - - return added; -} - std::vector LiveIntervals:: addIntervalsForSpills(const LiveInterval &li, SmallVectorImpl &SpillIs, const MachineLoopInfo *loopInfo, VirtRegMap &vrm) { - - if (EnableFastSpilling) - return addIntervalsForSpillsFast(li, loopInfo, vrm); - assert(li.isSpillable() && "attempt to spill already spilled interval!"); DEBUG({ @@ -2184,7 +2009,6 @@ LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, SlotIndex(getInstructionIndex(startInst).getDefIndex()), startInst, true, getVNInfoAllocator()); VN->setHasPHIKill(true); - VN->kills.push_back(indexes_->getTerminatorGap(startInst->getParent())); LiveRange LR( SlotIndex(getInstructionIndex(startInst).getDefIndex()), getMBBEndIdx(startInst->getParent()), VN); diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index 798b9b939cd3..709e2c6d5ca7 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -35,8 +35,8 @@ void LiveStacks::getAnalysisUsage(AnalysisUsage &AU) const { } void LiveStacks::releaseMemory() { - // Release VNInfo memroy regions after all VNInfo objects are dtor'd. - VNInfoAllocator.DestroyAll(); + // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. + VNInfoAllocator.Reset(); S2IMap.clear(); S2RCMap.clear(); } diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 079684eea079..41b891d30f23 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -286,7 +286,7 @@ MachineInstr *LiveVariables::FindLastRefOrPartRef(unsigned Reg) { MachineInstr *LastDef = PhysRegDef[Reg]; MachineInstr *LastUse = PhysRegUse[Reg]; if (!LastDef && !LastUse) - return false; + return 0; MachineInstr *LastRefOrPartRef = LastUse ? LastUse : LastDef; unsigned LastRefOrPartRefDist = DistanceMap[LastRefOrPartRef]; @@ -609,7 +609,12 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // Finally, if the last instruction in the block is a return, make sure to // mark it as using all of the live-out values in the function. - if (!MBB->empty() && MBB->back().getDesc().isReturn()) { + // Things marked both call and return are tail calls; do not do this for + // them. The tail callee need not take the same registers as input + // that it produces as output, and there are dependencies for its input + // registers elsewhere. + if (!MBB->empty() && MBB->back().getDesc().isReturn() + && !MBB->back().getDesc().isCall()) { MachineInstr *Ret = &MBB->back(); for (MachineRegisterInfo::liveout_iterator diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp index b0348a5b753c..dfd4eaeca660 100644 --- a/lib/CodeGen/LowerSubregs.cpp +++ b/lib/CodeGen/LowerSubregs.cpp @@ -53,15 +53,15 @@ namespace { bool runOnMachineFunction(MachineFunction&); private: - bool LowerExtract(MachineInstr *MI); - bool LowerInsert(MachineInstr *MI); bool LowerSubregToReg(MachineInstr *MI); + bool LowerCopy(MachineInstr *MI); void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, const TargetRegisterInfo *TRI); void TransferKillFlag(MachineInstr *MI, unsigned SrcReg, const TargetRegisterInfo *TRI, bool AddIfNotFound = false); + void TransferImplicitDefs(MachineInstr *MI); }; char LowerSubregsInstructionPass::ID = 0; @@ -83,7 +83,7 @@ LowerSubregsInstructionPass::TransferDeadFlag(MachineInstr *MI, if (MII->addRegisterDead(DstReg, TRI)) break; assert(MII != MI->getParent()->begin() && - "copyRegToReg output doesn't reference destination register!"); + "copyPhysReg output doesn't reference destination register!"); } } @@ -100,64 +100,24 @@ LowerSubregsInstructionPass::TransferKillFlag(MachineInstr *MI, if (MII->addRegisterKilled(SrcReg, TRI, AddIfNotFound)) break; assert(MII != MI->getParent()->begin() && - "copyRegToReg output doesn't reference source register!"); + "copyPhysReg output doesn't reference source register!"); } } -bool LowerSubregsInstructionPass::LowerExtract(MachineInstr *MI) { - MachineBasicBlock *MBB = MI->getParent(); - - assert(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && - MI->getOperand(1).isReg() && MI->getOperand(1).isUse() && - MI->getOperand(2).isImm() && "Malformed extract_subreg"); - - unsigned DstReg = MI->getOperand(0).getReg(); - unsigned SuperReg = MI->getOperand(1).getReg(); - unsigned SubIdx = MI->getOperand(2).getImm(); - unsigned SrcReg = TRI->getSubReg(SuperReg, SubIdx); - - assert(TargetRegisterInfo::isPhysicalRegister(SuperReg) && - "Extract supperg source must be a physical register"); - assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && - "Extract destination must be in a physical register"); - assert(SrcReg && "invalid subregister index for register"); - - DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); - - if (SrcReg == DstReg) { - // No need to insert an identity copy instruction. - if (MI->getOperand(1).isKill()) { - // We must make sure the super-register gets killed. Replace the - // instruction with KILL. - MI->setDesc(TII->get(TargetOpcode::KILL)); - MI->RemoveOperand(2); // SubIdx - DEBUG(dbgs() << "subreg: replace by: " << *MI); - return true; - } - - DEBUG(dbgs() << "subreg: eliminated!"); - } else { - // Insert copy - const TargetRegisterClass *TRCS = TRI->getPhysicalRegisterRegClass(DstReg); - const TargetRegisterClass *TRCD = TRI->getPhysicalRegisterRegClass(SrcReg); - bool Emitted = TII->copyRegToReg(*MBB, MI, DstReg, SrcReg, TRCD, TRCS, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Subreg and Dst must be of compatible register class"); - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) - TransferDeadFlag(MI, DstReg, TRI); - if (MI->getOperand(1).isKill()) - TransferKillFlag(MI, SuperReg, TRI, true); - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI); - }); +/// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered +/// replacement instructions immediately precede it. Copy any implicit-def +/// operands from MI to the replacement instruction. +void +LowerSubregsInstructionPass::TransferImplicitDefs(MachineInstr *MI) { + MachineBasicBlock::iterator CopyMI = MI; + --CopyMI; + + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isImplicit() || MO.isUse()) + continue; + CopyMI->addOperand(MachineOperand::CreateReg(MO.getReg(), true, true)); } - - DEBUG(dbgs() << '\n'); - MBB->erase(MI); - return true; } bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { @@ -166,10 +126,10 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { MI->getOperand(1).isImm() && (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && MI->getOperand(3).isImm() && "Invalid subreg_to_reg"); - + unsigned DstReg = MI->getOperand(0).getReg(); unsigned InsReg = MI->getOperand(2).getReg(); - unsigned InsSIdx = MI->getOperand(2).getSubReg(); + assert(!MI->getOperand(2).getSubReg() && "SubIdx on physreg?"); unsigned SubIdx = MI->getOperand(3).getImm(); assert(SubIdx != 0 && "Invalid index for insert_subreg"); @@ -182,27 +142,25 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); - if (DstSubReg == InsReg && InsSIdx == 0) { + if (DstSubReg == InsReg) { // No need to insert an identify copy instruction. // Watch out for case like this: - // %RAX = ... - // %RAX = SUBREG_TO_REG 0, %EAX:3, 3 - // The first def is defining RAX, not EAX so the top bits were not - // zero extended. + // %RAX = SUBREG_TO_REG 0, %EAX, 3 + // We must leave %RAX live. + if (DstReg != InsReg) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + MI->RemoveOperand(3); // SubIdx + MI->RemoveOperand(1); // Imm + DEBUG(dbgs() << "subreg: replace by: " << *MI); + return true; + } DEBUG(dbgs() << "subreg: eliminated!"); } else { - // Insert sub-register copy - const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg); - const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg); - bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Subreg and Dst must be of compatible register class"); + TII->copyPhysReg(*MBB, MI, MI->getDebugLoc(), DstSubReg, InsReg, + MI->getOperand(2).isKill()); // Transfer the kill/dead flags, if needed. if (MI->getOperand(0).isDead()) TransferDeadFlag(MI, DstSubReg, TRI); - if (MI->getOperand(2).isKill()) - TransferKillFlag(MI, InsReg, TRI); DEBUG({ MachineBasicBlock::iterator dMI = MI; dbgs() << "subreg: " << *(--dMI); @@ -214,87 +172,39 @@ bool LowerSubregsInstructionPass::LowerSubregToReg(MachineInstr *MI) { return true; } -bool LowerSubregsInstructionPass::LowerInsert(MachineInstr *MI) { - MachineBasicBlock *MBB = MI->getParent(); - assert((MI->getOperand(0).isReg() && MI->getOperand(0).isDef()) && - (MI->getOperand(1).isReg() && MI->getOperand(1).isUse()) && - (MI->getOperand(2).isReg() && MI->getOperand(2).isUse()) && - MI->getOperand(3).isImm() && "Invalid insert_subreg"); - - unsigned DstReg = MI->getOperand(0).getReg(); -#ifndef NDEBUG - unsigned SrcReg = MI->getOperand(1).getReg(); -#endif - unsigned InsReg = MI->getOperand(2).getReg(); - unsigned SubIdx = MI->getOperand(3).getImm(); +bool LowerSubregsInstructionPass::LowerCopy(MachineInstr *MI) { + MachineOperand &DstMO = MI->getOperand(0); + MachineOperand &SrcMO = MI->getOperand(1); - assert(DstReg == SrcReg && "insert_subreg not a two-address instruction?"); - assert(SubIdx != 0 && "Invalid index for insert_subreg"); - unsigned DstSubReg = TRI->getSubReg(DstReg, SubIdx); - assert(DstSubReg && "invalid subregister index for register"); - assert(TargetRegisterInfo::isPhysicalRegister(SrcReg) && - "Insert superreg source must be in a physical register"); - assert(TargetRegisterInfo::isPhysicalRegister(InsReg) && - "Inserted value must be in a physical register"); - - DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); - - if (DstSubReg == InsReg) { - // No need to insert an identity copy instruction. If the SrcReg was - // , we need to make sure it is alive by inserting a KILL - if (MI->getOperand(1).isUndef() && !MI->getOperand(0).isDead()) { - MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::KILL), DstReg); - if (MI->getOperand(2).isUndef()) - MIB.addReg(InsReg, RegState::Undef); - else - MIB.addReg(InsReg, RegState::Kill); - } else { - DEBUG(dbgs() << "subreg: eliminated!\n"); - MBB->erase(MI); + if (SrcMO.getReg() == DstMO.getReg()) { + DEBUG(dbgs() << "identity copy: " << *MI); + // No need to insert an identity copy instruction, but replace with a KILL + // if liveness is changed. + if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) { + // We must make sure the super-register gets killed. Replace the + // instruction with KILL. + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); return true; } - } else { - // Insert sub-register copy - const TargetRegisterClass *TRC0= TRI->getPhysicalRegisterRegClass(DstSubReg); - const TargetRegisterClass *TRC1= TRI->getPhysicalRegisterRegClass(InsReg); - if (MI->getOperand(2).isUndef()) - // If the source register being inserted is undef, then this becomes a - // KILL. - BuildMI(*MBB, MI, MI->getDebugLoc(), - TII->get(TargetOpcode::KILL), DstSubReg); - else { - bool Emitted = TII->copyRegToReg(*MBB, MI, DstSubReg, InsReg, TRC0, TRC1, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Subreg and Dst must be of compatible register class"); - } - MachineBasicBlock::iterator CopyMI = MI; - --CopyMI; - - // INSERT_SUBREG is a two-address instruction so it implicitly kills SrcReg. - if (!MI->getOperand(1).isUndef()) - CopyMI->addOperand(MachineOperand::CreateReg(DstReg, false, true, true)); - - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) { - TransferDeadFlag(MI, DstSubReg, TRI); - } else { - // Make sure the full DstReg is live after this replacement. - CopyMI->addOperand(MachineOperand::CreateReg(DstReg, true, true)); - } - - // Make sure the inserted register gets killed - if (MI->getOperand(2).isKill() && !MI->getOperand(2).isUndef()) - TransferKillFlag(MI, InsReg, TRI); + // Vanilla identity copy. + MI->eraseFromParent(); + return true; } - DEBUG({ - MachineBasicBlock::iterator dMI = MI; - dbgs() << "subreg: " << *(--dMI) << "\n"; - }); + DEBUG(dbgs() << "real copy: " << *MI); + TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), + DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); - MBB->erase(MI); + if (DstMO.isDead()) + TransferDeadFlag(MI, DstMO.getReg(), TRI); + if (MI->getNumOperands() > 2) + TransferImplicitDefs(MI); + DEBUG({ + MachineBasicBlock::iterator dMI = MI; + dbgs() << "replaced by: " << *(--dMI); + }); + MI->eraseFromParent(); return true; } @@ -317,12 +227,13 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) { mi != me;) { MachineBasicBlock::iterator nmi = llvm::next(mi); MachineInstr *MI = mi; - if (MI->isExtractSubreg()) { - MadeChange |= LowerExtract(MI); - } else if (MI->isInsertSubreg()) { - MadeChange |= LowerInsert(MI); - } else if (MI->isSubregToReg()) { + assert(!MI->isInsertSubreg() && "INSERT_SUBREG should no longer appear"); + assert(MI->getOpcode() != TargetOpcode::EXTRACT_SUBREG && + "EXTRACT_SUBREG should no longer appear"); + if (MI->isSubregToReg()) { MadeChange |= LowerSubregToReg(MI); + } else if (MI->isCopy()) { + MadeChange |= LowerCopy(MI); } mi = nmi; } diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index eaaa1f85b563..a27ee479433b 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -13,7 +13,10 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/BasicBlock.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -136,6 +139,13 @@ void ilist_traits::deleteNode(MachineInstr* MI) { Parent->getParent()->DeleteMachineInstr(MI); } +MachineBasicBlock::iterator MachineBasicBlock::getFirstNonPHI() { + iterator I = begin(); + while (I != end() && I->isPHI()) + ++I; + return I; +} + MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() { iterator I = end(); while (I != begin() && (--I)->getDesc().isTerminator()) @@ -245,6 +255,7 @@ void MachineBasicBlock::updateTerminator() { MachineBasicBlock *TBB = 0, *FBB = 0; SmallVector Cond; + DebugLoc dl; // FIXME: this is nowhere bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond); (void) B; assert(!B && "UpdateTerminators requires analyzable predecessors!"); @@ -259,7 +270,7 @@ void MachineBasicBlock::updateTerminator() { // its layout successor, insert a branch. TBB = *succ_begin(); if (!isLayoutSuccessor(TBB)) - TII->InsertBranch(*this, TBB, 0, Cond); + TII->InsertBranch(*this, TBB, 0, Cond, dl); } } else { if (FBB) { @@ -270,10 +281,10 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) return; TII->RemoveBranch(*this); - TII->InsertBranch(*this, FBB, 0, Cond); + TII->InsertBranch(*this, FBB, 0, Cond, dl); } else if (isLayoutSuccessor(FBB)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, 0, Cond); + TII->InsertBranch(*this, TBB, 0, Cond, dl); } } else { // The block has a fallthrough conditional branch. @@ -284,14 +295,14 @@ void MachineBasicBlock::updateTerminator() { if (TII->ReverseBranchCondition(Cond)) { // We can't reverse the condition, add an unconditional branch. Cond.clear(); - TII->InsertBranch(*this, MBBA, 0, Cond); + TII->InsertBranch(*this, MBBA, 0, Cond, dl); return; } TII->RemoveBranch(*this); - TII->InsertBranch(*this, MBBA, 0, Cond); + TII->InsertBranch(*this, MBBA, 0, Cond, dl); } else if (!isLayoutSuccessor(MBBA)) { TII->RemoveBranch(*this); - TII->InsertBranch(*this, TBB, MBBA, Cond); + TII->InsertBranch(*this, TBB, MBBA, Cond, dl); } } } @@ -331,12 +342,32 @@ void MachineBasicBlock::transferSuccessors(MachineBasicBlock *fromMBB) { if (this == fromMBB) return; - for (MachineBasicBlock::succ_iterator I = fromMBB->succ_begin(), - E = fromMBB->succ_end(); I != E; ++I) - addSuccessor(*I); + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + addSuccessor(Succ); + fromMBB->removeSuccessor(Succ); + } +} + +void +MachineBasicBlock::transferSuccessorsAndUpdatePHIs(MachineBasicBlock *fromMBB) { + if (this == fromMBB) + return; - while (!fromMBB->succ_empty()) - fromMBB->removeSuccessor(fromMBB->succ_begin()); + while (!fromMBB->succ_empty()) { + MachineBasicBlock *Succ = *fromMBB->succ_begin(); + addSuccessor(Succ); + fromMBB->removeSuccessor(Succ); + + // Fix up any PHI nodes in the successor. + for (MachineBasicBlock::iterator MI = Succ->begin(), ME = Succ->end(); + MI != ME && MI->isPHI(); ++MI) + for (unsigned i = 2, e = MI->getNumOperands()+1; i != e; i += 2) { + MachineOperand &MO = MI->getOperand(i); + if (MO.getMBB() == fromMBB) + MO.setMBB(this); + } + } } bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const { @@ -395,6 +426,82 @@ bool MachineBasicBlock::canFallThrough() { return FBB == 0; } +MachineBasicBlock * +MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { + MachineFunction *MF = getParent(); + DebugLoc dl; // FIXME: this is nowhere + + // We may need to update this's terminator, but we can't do that if AnalyzeBranch + // fails. If this uses a jump table, we won't touch it. + const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + MachineBasicBlock *TBB = 0, *FBB = 0; + SmallVector Cond; + if (TII->AnalyzeBranch(*this, TBB, FBB, Cond)) + return NULL; + + MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); + MF->insert(llvm::next(MachineFunction::iterator(this)), NMBB); + DEBUG(dbgs() << "PHIElimination splitting critical edge:" + " BB#" << getNumber() + << " -- BB#" << NMBB->getNumber() + << " -- BB#" << Succ->getNumber() << '\n'); + + ReplaceUsesOfBlockWith(Succ, NMBB); + updateTerminator(); + + // Insert unconditional "jump Succ" instruction in NMBB if necessary. + NMBB->addSuccessor(Succ); + if (!NMBB->isLayoutSuccessor(Succ)) { + Cond.clear(); + MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + } + + // Fix PHI nodes in Succ so they refer to NMBB instead of this + for (MachineBasicBlock::iterator i = Succ->begin(), e = Succ->end(); + i != e && i->isPHI(); ++i) + for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) + if (i->getOperand(ni+1).getMBB() == this) + i->getOperand(ni+1).setMBB(NMBB); + + if (LiveVariables *LV = + P->getAnalysisIfAvailable()) + LV->addNewBlock(NMBB, this, Succ); + + if (MachineDominatorTree *MDT = + P->getAnalysisIfAvailable()) + MDT->addNewBlock(NMBB, this); + + if (MachineLoopInfo *MLI = + P->getAnalysisIfAvailable()) + if (MachineLoop *TIL = MLI->getLoopFor(this)) { + // If one or the other blocks were not in a loop, the new block is not + // either, and thus LI doesn't need to be updated. + if (MachineLoop *DestLoop = MLI->getLoopFor(Succ)) { + if (TIL == DestLoop) { + // Both in the same loop, the NMBB joins loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (TIL->contains(DestLoop)) { + // Edge from an outer loop to an inner loop. Add to the outer loop. + TIL->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else if (DestLoop->contains(TIL)) { + // Edge from an inner loop to an outer loop. Add to the outer loop. + DestLoop->addBasicBlockToLoop(NMBB, MLI->getBase()); + } else { + // Edge from two loops with no containment relation. Because these + // are natural loops, we know that the destination block must be the + // header of its loop (adding a branch into a loop elsewhere would + // create an irreducible loop). + assert(DestLoop->getHeader() == Succ && + "Should not create irreducible loops!"); + if (MachineLoop *P = DestLoop->getParentLoop()) + P->addBasicBlockToLoop(NMBB, MLI->getBase()); + } + } + } + + return NMBB; +} + /// removeFromParent - This method unlinks 'this' from the containing function, /// and returns it, but does not delete it. MachineBasicBlock *MachineBasicBlock::removeFromParent() { diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index 6f4f7a883409..833cc00027db 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -30,9 +30,7 @@ using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); STATISTIC(NumCSEs, "Number of common subexpression eliminated"); - -static cl::opt CSEPhysDef("machine-cse-phys-defs", - cl::init(false), cl::Hidden); +STATISTIC(NumPhysCSEs, "Number of phyreg defining common subexpr eliminated"); namespace { class MachineCSE : public MachineFunctionPass { @@ -128,6 +126,28 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, ++NumCoalesces; Changed = true; } + + if (!DefMI->isCopy()) + continue; + SrcReg = DefMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) + continue; + if (DefMI->getOperand(0).getSubReg() || DefMI->getOperand(1).getSubReg()) + continue; + const TargetRegisterClass *SRC = MRI->getRegClass(SrcReg); + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + const TargetRegisterClass *NewRC = getCommonSubClass(RC, SRC); + if (!NewRC) + continue; + DEBUG(dbgs() << "Coalescing: " << *DefMI); + DEBUG(dbgs() << "*** to: " << *MI); + MO.setReg(SrcReg); + MRI->clearKillFlags(SrcReg); + if (NewRC != SRC) + MRI->setRegClass(SrcReg, NewRC); + DefMI->eraseFromParent(); + ++NumCoalesces; + Changed = true; } return Changed; @@ -172,7 +192,8 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, /// hasLivePhysRegDefUse - Return true if the specified instruction read / write /// physical registers (except for dead defs of physical registers). It also -/// returns the physical register def by reference if it's the only one. +/// returns the physical register def by reference if it's the only one and the +/// instruction does not uses a physical register. bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, const MachineBasicBlock *MBB, unsigned &PhysDef) const { @@ -186,9 +207,11 @@ bool MachineCSE::hasLivePhysRegDefUse(const MachineInstr *MI, continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) continue; - if (MO.isUse()) + if (MO.isUse()) { // Can't touch anything to read a physical register. + PhysDef = 0; return true; + } if (MO.isDead()) // If the def is dead, it's ok. continue; @@ -240,8 +263,8 @@ bool MachineCSE::PhysRegDefReaches(MachineInstr *CSMI, MachineInstr *MI, static bool isCopy(const MachineInstr *MI, const TargetInstrInfo *TII) { unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - return TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) || - MI->isExtractSubreg() || MI->isInsertSubreg() || MI->isSubregToReg(); + return MI->isCopyLike() || + TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); } bool MachineCSE::isCSECandidate(MachineInstr *MI) { @@ -356,6 +379,7 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { if (!isCSECandidate(MI)) continue; + bool DefPhys = false; bool FoundCSE = VNT.count(MI); if (!FoundCSE) { // Look for trivial copy coalescing opportunities. @@ -376,11 +400,13 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { // ... Unless the CS is local and it also defines the physical register // which is not clobbered in between. - if (PhysDef && CSEPhysDef) { + if (PhysDef) { unsigned CSVN = VNT.lookup(MI); MachineInstr *CSMI = Exps[CSVN]; - if (PhysRegDefReaches(CSMI, MI, PhysDef)) + if (PhysRegDefReaches(CSMI, MI, PhysDef)) { FoundCSE = true; + DefPhys = true; + } } } @@ -426,6 +452,8 @@ bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) { } MI->eraseFromParent(); ++NumCSEs; + if (DefPhys) + ++NumPhysCSEs; } else { DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n"); VNT.insert(MI, CurrVN++); diff --git a/lib/CodeGen/MachineDominators.cpp b/lib/CodeGen/MachineDominators.cpp index 408873903b0d..b5f8fbba99de 100644 --- a/lib/CodeGen/MachineDominators.cpp +++ b/lib/CodeGen/MachineDominators.cpp @@ -46,7 +46,6 @@ MachineDominatorTree::MachineDominatorTree() } MachineDominatorTree::~MachineDominatorTree() { - DT->releaseMemory(); delete DT; } diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index a38c881982e7..666120f032c6 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -378,7 +378,7 @@ void MachineFunction::viewCFG() const #ifndef NDEBUG ViewGraph(this, "mf" + getFunction()->getNameStr()); #else - errs() << "SelectionDAG::viewGraph is only available in debug builds on " + errs() << "MachineFunction::viewCFG is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } @@ -388,7 +388,7 @@ void MachineFunction::viewCFGOnly() const #ifndef NDEBUG ViewGraph(this, "mf" + getFunction()->getNameStr(), true); #else - errs() << "SelectionDAG::viewGraph is only available in debug builds on " + errs() << "MachineFunction::viewCFGOnly is only available in debug builds on " << "systems with Graphviz or gv!\n"; #endif // NDEBUG } @@ -438,10 +438,16 @@ MCSymbol *MachineFunction::getJTISymbol(unsigned JTI, MCContext &Ctx, /// index with a negative value. /// int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, - bool Immutable, bool isSS) { + bool Immutable) { assert(Size != 0 && "Cannot allocate zero size fixed stack objects!"); - Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable, - isSS)); + // The alignment of the frame index can be determined from its offset from + // the incoming frame position. If the frame object is at offset 32 and + // the stack is guaranteed to be 16-byte aligned, then we know that the + // object is 16-byte aligned. + unsigned StackAlign = TFI.getStackAlignment(); + unsigned Align = MinAlign(SPOffset, StackAlign); + Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, + /*isSS*/false)); return -++NumFixedObjects; } diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e54cd5cf9492..6b2e98549c71 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -111,6 +111,26 @@ void MachineOperand::setReg(unsigned Reg) { Contents.Reg.RegNo = Reg; } +void MachineOperand::substVirtReg(unsigned Reg, unsigned SubIdx, + const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isVirtualRegister(Reg)); + if (SubIdx && getSubReg()) + SubIdx = TRI.composeSubRegIndices(SubIdx, getSubReg()); + setReg(Reg); + if (SubIdx) + setSubReg(SubIdx); +} + +void MachineOperand::substPhysReg(unsigned Reg, const TargetRegisterInfo &TRI) { + assert(TargetRegisterInfo::isPhysicalRegister(Reg)); + if (getSubReg()) { + Reg = TRI.getSubReg(Reg, getSubReg()); + assert(Reg && "Invalid SubReg for physical register"); + setSubReg(0); + } + setReg(Reg); +} + /// ChangeToImmediate - Replace this operand with a new immediate operand of /// the specified value. If an operand is known to be an immediate already, /// the setImm method should be used. @@ -861,14 +881,14 @@ int MachineInstr::findFirstPredOperandIdx() const { bool MachineInstr:: isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { if (isInlineAsm()) { - assert(DefOpIdx >= 2); + assert(DefOpIdx >= 3); const MachineOperand &MO = getOperand(DefOpIdx); if (!MO.isReg() || !MO.isDef() || MO.getReg() == 0) return false; // Determine the actual operand index that corresponds to this index. unsigned DefNo = 0; unsigned DefPart = 0; - for (unsigned i = 1, e = getNumOperands(); i < e; ) { + for (unsigned i = 2, e = getNumOperands(); i < e; ) { const MachineOperand &FMO = getOperand(i); // After the normal asm operands there may be additional imp-def regs. if (!FMO.isImm()) @@ -883,7 +903,7 @@ isRegTiedToUseOperand(unsigned DefOpIdx, unsigned *UseOpIdx) const { } ++DefNo; } - for (unsigned i = 1, e = getNumOperands(); i != e; ++i) { + for (unsigned i = 2, e = getNumOperands(); i != e; ++i) { const MachineOperand &FMO = getOperand(i); if (!FMO.isImm()) continue; @@ -926,7 +946,7 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { // Find the flag operand corresponding to UseOpIdx unsigned FlagIdx, NumOps=0; - for (FlagIdx = 1; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { + for (FlagIdx = 2; FlagIdx < UseOpIdx; FlagIdx += NumOps+1) { const MachineOperand &UFMO = getOperand(FlagIdx); // After the normal asm operands there may be additional imp-def regs. if (!UFMO.isImm()) @@ -944,9 +964,9 @@ isRegTiedToDefOperand(unsigned UseOpIdx, unsigned *DefOpIdx) const { if (!DefOpIdx) return true; - unsigned DefIdx = 1; - // Remember to adjust the index. First operand is asm string, then there - // is a flag for each. + unsigned DefIdx = 2; + // Remember to adjust the index. First operand is asm string, second is + // the AlignStack bit, then there is a flag for each. while (DefNo) { const MachineOperand &FMO = getOperand(DefIdx); assert(FMO.isImm()); @@ -1017,6 +1037,29 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) { } } +void MachineInstr::substituteRegister(unsigned FromReg, + unsigned ToReg, + unsigned SubIdx, + const TargetRegisterInfo &RegInfo) { + if (TargetRegisterInfo::isPhysicalRegister(ToReg)) { + if (SubIdx) + ToReg = RegInfo.getSubReg(ToReg, SubIdx); + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substPhysReg(ToReg, RegInfo); + } + } else { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || MO.getReg() != FromReg) + continue; + MO.substVirtReg(ToReg, SubIdx, RegInfo); + } + } +} + /// isSafeToMove - Return true if it is safe to move this instruction. If /// SawStore is set to true, it means that there is a store (or call) between /// the instruction's location and its intended destination. @@ -1168,6 +1211,28 @@ void MachineInstr::dump() const { dbgs() << " " << *this; } +static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, + raw_ostream &CommentOS) { + const LLVMContext &Ctx = MF->getFunction()->getContext(); + if (!DL.isUnknown()) { // Print source line info. + DIScope Scope(DL.getScope(Ctx)); + // Omit the directory, because it's likely to be long and uninteresting. + if (Scope.Verify()) + CommentOS << Scope.getFilename(); + else + CommentOS << ""; + CommentOS << ':' << DL.getLine(); + if (DL.getCol() != 0) + CommentOS << ':' << DL.getCol(); + DebugLoc InlinedAtDL = DebugLoc::getFromDILocation(DL.getInlinedAt(Ctx)); + if (!InlinedAtDL.isUnknown()) { + CommentOS << " @[ "; + printDebugLoc(InlinedAtDL, MF, CommentOS); + CommentOS << " ]"; + } + } +} + void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. const MachineFunction *MF = 0; @@ -1240,6 +1305,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << "!\"" << MDS->getString() << '\"'; else MO.print(OS, TM); + } else if (TM && (isInsertSubreg() || isRegSequence()) && MO.isImm()) { + OS << TM->getRegisterInfo()->getSubRegIndexName(MO.getImm()); } else MO.print(OS, TM); } @@ -1265,19 +1332,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { if (!debugLoc.isUnknown() && MF) { if (!HaveSemi) OS << ";"; - - // TODO: print InlinedAtLoc information - - DIScope Scope(debugLoc.getScope(MF->getFunction()->getContext())); OS << " dbg:"; - // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) - OS << Scope.getFilename(); - else - OS << ""; - OS << ':' << debugLoc.getLine(); - if (debugLoc.getCol() != 0) - OS << ':' << debugLoc.getCol(); + printDebugLoc(debugLoc, MF, OS); } OS << "\n"; @@ -1418,6 +1474,25 @@ void MachineInstr::addRegisterDefined(unsigned IncomingReg, true /*IsImp*/)); } +void MachineInstr::setPhysRegsDeadExcept(const SmallVectorImpl &UsedRegs, + const TargetRegisterInfo &TRI) { + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { + MachineOperand &MO = getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) continue; + bool Dead = true; + for (SmallVectorImpl::const_iterator I = UsedRegs.begin(), + E = UsedRegs.end(); I != E; ++I) + if (TRI.regsOverlap(*I, Reg)) { + Dead = false; + break; + } + // If there are no uses, including partial uses, the def is dead. + if (Dead) MO.setIsDead(); + } +} + unsigned MachineInstrExpressionTrait::getHashValue(const MachineInstr* const &MI) { unsigned Hash = MI->getOpcode() * 37; diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 61206173e645..956d21c0b34b 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -62,6 +62,7 @@ namespace { // State that is updated as we process loops bool Changed; // True if a loop is changed. + bool FirstInLoop; // True if it's the first LICM in the loop. MachineLoop *CurLoop; // The current loop we are working on. MachineBasicBlock *CurPreheader; // The preheader for CurLoop. @@ -82,7 +83,6 @@ namespace { const char *getPassName() const { return "Machine Instruction LICM"; } - // FIXME: Loop preheaders? virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); @@ -127,8 +127,8 @@ namespace { void AddToLiveIns(unsigned Reg); /// IsLICMCandidate - Returns true if the instruction may be a suitable - /// candidate for LICM. e.g. If the instruction is a call, then it's obviously - /// not safe to hoist it. + /// candidate for LICM. e.g. If the instruction is a call, then it's + /// obviously not safe to hoist it. bool IsLICMCandidate(MachineInstr &I); /// IsLoopInvariantInst - Returns true if the instruction is loop @@ -181,6 +181,10 @@ namespace { /// current loop preheader that may become duplicates of instructions that /// are hoisted out of the loop. void InitCSEMap(MachineBasicBlock *BB); + + /// getCurPreheader - Get the preheader for the current loop, splitting + /// a critical edge if needed. + MachineBasicBlock *getCurPreheader(); }; } // end anonymous namespace @@ -192,12 +196,17 @@ FunctionPass *llvm::createMachineLICMPass(bool PreRegAlloc) { return new MachineLICM(PreRegAlloc); } -/// LoopIsOuterMostWithPreheader - Test if the given loop is the outer-most -/// loop that has a preheader. -static bool LoopIsOuterMostWithPreheader(MachineLoop *CurLoop) { +/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most +/// loop that has a unique predecessor. +static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { + // Check whether this loop even has a unique predecessor. + if (!CurLoop->getLoopPredecessor()) + return false; + // Ok, now check to see if any of its outer loops do. for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) - if (L->getLoopPreheader()) + if (L->getLoopPredecessor()) return false; + // None of them did, so this is the outermost with a unique predecessor. return true; } @@ -207,7 +216,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { else DEBUG(dbgs() << "******** Post-regalloc Machine LICM ********\n"); - Changed = false; + Changed = FirstInLoop = false; TM = &MF.getTarget(); TII = TM->getInstrInfo(); TRI = TM->getRegisterInfo(); @@ -220,23 +229,17 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { DT = &getAnalysis(); AA = &getAnalysis(); - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); I != E; ++I){ - CurLoop = *I; + SmallVector Worklist(MLI->begin(), MLI->end()); + while (!Worklist.empty()) { + CurLoop = Worklist.pop_back_val(); + CurPreheader = 0; // If this is done before regalloc, only visit outer-most preheader-sporting // loops. - if (PreRegAlloc && !LoopIsOuterMostWithPreheader(CurLoop)) - continue; - - // Determine the block to which to hoist instructions. If we can't find a - // suitable loop preheader, we can't do any hoisting. - // - // FIXME: We are only hoisting if the basic block coming into this loop - // has only one successor. This isn't the case in general because we haven't - // broken critical edges or added preheaders. - CurPreheader = CurLoop->getLoopPreheader(); - if (!CurPreheader) + if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { + Worklist.append(CurLoop->begin(), CurLoop->end()); continue; + } if (!PreRegAlloc) HoistRegionPostRA(); @@ -244,6 +247,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { // CSEMap is initialized for loop header when the first instruction is // being hoisted. MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); + FirstInLoop = true; HoistRegion(N); CSEMap.clear(); } @@ -436,13 +440,16 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { /// operands that is safe to hoist, this instruction is called to do the /// dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) return; + // Now move the instructions to the predecessor, inserting it before any // terminator instructions. DEBUG({ dbgs() << "Hoisting " << *MI; - if (CurPreheader->getBasicBlock()) + if (Preheader->getBasicBlock()) dbgs() << " to MachineBasicBlock " - << CurPreheader->getName(); + << Preheader->getName(); if (MI->getParent()->getBasicBlock()) dbgs() << " from MachineBasicBlock " << MI->getParent()->getName(); @@ -451,7 +458,7 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { // Splice the instruction to the preheader. MachineBasicBlock *MBB = MI->getParent(); - CurPreheader->splice(CurPreheader->getFirstTerminator(), MBB, MI); + Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); // Add register to livein list to all the BBs in the current loop since a // loop invariant must be kept live throughout the whole loop. This is @@ -490,26 +497,16 @@ void MachineLICM::HoistRegion(MachineDomTreeNode *N) { /// candidate for LICM. e.g. If the instruction is a call, then it's obviously /// not safe to hoist it. bool MachineLICM::IsLICMCandidate(MachineInstr &I) { + // It is not profitable to hoist implicitdefs. FIXME: Why not? what if they + // are an argument to some other otherwise-hoistable instruction? if (I.isImplicitDef()) return false; - - const TargetInstrDesc &TID = I.getDesc(); - // Ignore stuff that we obviously can't hoist. - if (TID.mayStore() || TID.isCall() || TID.isTerminator() || - TID.hasUnmodeledSideEffects()) + // Check if it's safe to move the instruction. + bool DontMoveAcrossStore = true; + if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore)) return false; - - if (TID.mayLoad()) { - // Okay, this instruction does a load. As a refinement, we allow the target - // to decide whether the loaded value is actually a constant. If so, we can - // actually use it as a load. - if (!I.isInvariantLoad(AA)) - // FIXME: we should be able to hoist loads with no other side effects if - // there are no other instructions which can change memory in this loop. - // This is a trivial form of alias analysis. - return false; - } + return true; } @@ -754,6 +751,9 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, /// that are safe to hoist, this instruction is called to do the dirty work. /// void MachineLICM::Hoist(MachineInstr *MI) { + MachineBasicBlock *Preheader = getCurPreheader(); + if (!Preheader) return; + // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { // If not, try unfolding a hoistable load. @@ -765,9 +765,9 @@ void MachineLICM::Hoist(MachineInstr *MI) { // terminator instructions. DEBUG({ dbgs() << "Hoisting " << *MI; - if (CurPreheader->getBasicBlock()) + if (Preheader->getBasicBlock()) dbgs() << " to MachineBasicBlock " - << CurPreheader->getName(); + << Preheader->getName(); if (MI->getParent()->getBasicBlock()) dbgs() << " from MachineBasicBlock " << MI->getParent()->getName(); @@ -776,7 +776,10 @@ void MachineLICM::Hoist(MachineInstr *MI) { // If this is the first instruction being hoisted to the preheader, // initialize the CSE map with potential common expressions. - InitCSEMap(CurPreheader); + if (FirstInLoop) { + InitCSEMap(Preheader); + FirstInLoop = false; + } // Look for opportunity to CSE the hoisted instruction. unsigned Opcode = MI->getOpcode(); @@ -784,7 +787,7 @@ void MachineLICM::Hoist(MachineInstr *MI) { CI = CSEMap.find(Opcode); if (!EliminateCSE(MI, CI)) { // Otherwise, splice the instruction to the preheader. - CurPreheader->splice(CurPreheader->getFirstTerminator(),MI->getParent(),MI); + Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); // Clear the kill flags of any register this instruction defines, // since they may need to be live throughout the entire loop @@ -808,3 +811,30 @@ void MachineLICM::Hoist(MachineInstr *MI) { ++NumHoisted; Changed = true; } + +MachineBasicBlock *MachineLICM::getCurPreheader() { + // Determine the block to which to hoist instructions. If we can't find a + // suitable loop predecessor, we can't do any hoisting. + + // If we've tried to get a preheader and failed, don't try again. + if (CurPreheader == reinterpret_cast(-1)) + return 0; + + if (!CurPreheader) { + CurPreheader = CurLoop->getLoopPreheader(); + if (!CurPreheader) { + MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); + if (!Pred) { + CurPreheader = reinterpret_cast(-1); + return 0; + } + + CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); + if (!CurPreheader) { + CurPreheader = reinterpret_cast(-1); + return 0; + } + } + } + return CurPreheader; +} diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 70bf7e5da5e8..5d852f26beda 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -20,7 +20,7 @@ using namespace llvm; MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - RegClass2VRegMap.resize(TRI.getNumRegClasses()+1); // RC ID starts at 1. + RegClass2VRegMap = new std::vector[TRI.getNumRegClasses()]; UsedPhysRegs.resize(TRI.getNumRegs()); // Create the physreg use/def lists. @@ -37,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() { "PhysRegUseDefLists has entries after all instructions are deleted"); #endif delete [] PhysRegUseDefLists; + delete [] RegClass2VRegMap; } /// setRegClass - Set the register class of the specified virtual register. @@ -52,7 +53,7 @@ MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { // Remove from old register class's vregs list. This may be slow but // fortunately this operation is rarely needed. std::vector &VRegs = RegClass2VRegMap[OldRC->getID()]; - std::vector::iterator I=std::find(VRegs.begin(), VRegs.end(), VR); + std::vector::iterator I = std::find(VRegs.begin(), VRegs.end(), VR); VRegs.erase(I); // Add to new register class's vregs list. @@ -174,115 +175,36 @@ unsigned MachineRegisterInfo::getLiveInVirtReg(unsigned PReg) const { return 0; } -static cl::opt -SchedLiveInCopies("schedule-livein-copies", cl::Hidden, - cl::desc("Schedule copies of livein registers"), - cl::init(false)); - -/// EmitLiveInCopy - Emit a copy for a live in physical register. If the -/// physical register has only a single copy use, then coalesced the copy -/// if possible. -static void EmitLiveInCopy(MachineBasicBlock *MBB, - MachineBasicBlock::iterator &InsertPos, - unsigned VirtReg, unsigned PhysReg, - const TargetRegisterClass *RC, - DenseMap &CopyRegMap, - const MachineRegisterInfo &MRI, - const TargetRegisterInfo &TRI, - const TargetInstrInfo &TII) { - unsigned NumUses = 0; - MachineInstr *UseMI = NULL; - for (MachineRegisterInfo::use_iterator UI = MRI.use_begin(VirtReg), - UE = MRI.use_end(); UI != UE; ++UI) { - UseMI = &*UI; - if (++NumUses > 1) - break; - } - - // If the number of uses is not one, or the use is not a move instruction, - // don't coalesce. Also, only coalesce away a virtual register to virtual - // register copy. - bool Coalesced = false; - unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (NumUses == 1 && - TII.isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - TargetRegisterInfo::isVirtualRegister(DstReg)) { - VirtReg = DstReg; - Coalesced = true; - } - - // Now find an ideal location to insert the copy. - MachineBasicBlock::iterator Pos = InsertPos; - while (Pos != MBB->begin()) { - MachineInstr *PrevMI = prior(Pos); - DenseMap::iterator RI = CopyRegMap.find(PrevMI); - // copyRegToReg might emit multiple instructions to do a copy. - unsigned CopyDstReg = (RI == CopyRegMap.end()) ? 0 : RI->second; - if (CopyDstReg && !TRI.regsOverlap(CopyDstReg, PhysReg)) - // This is what the BB looks like right now: - // r1024 = mov r0 - // ... - // r1 = mov r1024 - // - // We want to insert "r1025 = mov r1". Inserting this copy below the - // move to r1024 makes it impossible for that move to be coalesced. - // - // r1025 = mov r1 - // r1024 = mov r0 - // ... - // r1 = mov 1024 - // r2 = mov 1025 - break; // Woot! Found a good location. - --Pos; - } - - bool Emitted = TII.copyRegToReg(*MBB, Pos, VirtReg, PhysReg, RC, RC, - DebugLoc()); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - - CopyRegMap.insert(std::make_pair(prior(Pos), VirtReg)); - if (Coalesced) { - if (&*InsertPos == UseMI) ++InsertPos; - MBB->erase(UseMI); - } -} - /// EmitLiveInCopies - Emit copies to initialize livein virtual registers /// into the given entry block. void MachineRegisterInfo::EmitLiveInCopies(MachineBasicBlock *EntryMBB, const TargetRegisterInfo &TRI, const TargetInstrInfo &TII) { - if (SchedLiveInCopies) { - // Emit the copies at a heuristically-determined location in the block. - DenseMap CopyRegMap; - MachineBasicBlock::iterator InsertPos = EntryMBB->begin(); - for (MachineRegisterInfo::livein_iterator LI = livein_begin(), - E = livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = getRegClass(LI->second); - EmitLiveInCopy(EntryMBB, InsertPos, LI->second, LI->first, - RC, CopyRegMap, *this, TRI, TII); + // Emit the copies into the top of the block. + for (unsigned i = 0, e = LiveIns.size(); i != e; ++i) + if (LiveIns[i].second) { + if (use_empty(LiveIns[i].second)) { + // The livein has no uses. Drop it. + // + // It would be preferable to have isel avoid creating live-in + // records for unused arguments in the first place, but it's + // complicated by the debug info code for arguments. + LiveIns.erase(LiveIns.begin() + i); + --i; --e; + } else { + // Emit a copy. + BuildMI(*EntryMBB, EntryMBB->begin(), DebugLoc(), + TII.get(TargetOpcode::COPY), LiveIns[i].second) + .addReg(LiveIns[i].first); + + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); } - } else { - // Emit the copies into the top of the block. - for (MachineRegisterInfo::livein_iterator LI = livein_begin(), - E = livein_end(); LI != E; ++LI) - if (LI->second) { - const TargetRegisterClass *RC = getRegClass(LI->second); - bool Emitted = TII.copyRegToReg(*EntryMBB, EntryMBB->begin(), - LI->second, LI->first, RC, RC, - DebugLoc()); - assert(Emitted && "Unable to issue a live-in copy instruction!\n"); - (void) Emitted; - } - } - - // Add function live-ins to entry block live-in set. - for (MachineRegisterInfo::livein_iterator I = livein_begin(), - E = livein_end(); I != E; ++I) - EntryMBB->addLiveIn(I->first); + } else { + // Add the register to the entry block live-in set. + EntryMBB->addLiveIn(LiveIns[i].first); + } } void MachineRegisterInfo::closePhysRegsUsed(const TargetRegisterInfo &TRI) { diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index 1610e6c9610c..61334fc1790a 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This pass moves instructions into successor blocks, when possible, so that +// This pass moves instructions into successor blocks when possible, so that // they aren't executed on paths where their results aren't needed. // // This pass is not intended to be a replacement or a complete alternative @@ -45,9 +45,9 @@ namespace { public: static char ID; // Pass identification MachineSinking() : MachineFunctionPass(&ID) {} - + virtual bool runOnMachineFunction(MachineFunction &MF); - + virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); @@ -63,7 +63,7 @@ namespace { bool AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const; }; } // end anonymous namespace - + char MachineSinking::ID = 0; static RegisterPass X("machine-sink", "Machine code sinking"); @@ -72,7 +72,7 @@ FunctionPass *llvm::createMachineSinkingPass() { return new MachineSinking(); } /// AllUsesDominatedByBlock - Return true if all uses of the specified register /// occur in blocks dominated by the specified block. -bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, +bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, MachineBasicBlock *MBB) const { assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Only makes sense for vregs"); @@ -80,27 +80,30 @@ bool MachineSinking::AllUsesDominatedByBlock(unsigned Reg, // This may leave a referencing dbg_value in the original block, before // the definition of the vreg. Dwarf generator handles this although the // user might not get the right info at runtime. - for (MachineRegisterInfo::use_nodbg_iterator I = - RegInfo->use_nodbg_begin(Reg), - E = RegInfo->use_nodbg_end(); I != E; ++I) { + for (MachineRegisterInfo::use_nodbg_iterator + I = RegInfo->use_nodbg_begin(Reg), E = RegInfo->use_nodbg_end(); + I != E; ++I) { // Determine the block of the use. MachineInstr *UseInst = &*I; MachineBasicBlock *UseBlock = UseInst->getParent(); + if (UseInst->isPHI()) { // PHI nodes use the operand in the predecessor block, not the block with // the PHI. UseBlock = UseInst->getOperand(I.getOperandNo()+1).getMBB(); } + // Check that it dominates. if (!DT->dominates(MBB, UseBlock)) return false; } + return true; } bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << "******** Machine Sinking ********\n"); - + const TargetMachine &TM = MF.getTarget(); TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); @@ -111,19 +114,19 @@ bool MachineSinking::runOnMachineFunction(MachineFunction &MF) { AllocatableSet = TRI->getAllocatableSet(MF); bool EverMadeChange = false; - + while (1) { bool MadeChange = false; // Process all basic blocks. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) MadeChange |= ProcessBlock(*I); - + // If this iteration over the code changed anything, keep iterating. if (!MadeChange) break; EverMadeChange = true; - } + } return EverMadeChange; } @@ -132,8 +135,8 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (MBB.succ_size() <= 1 || MBB.empty()) return false; // Don't bother sinking code out of unreachable blocks. In addition to being - // unprofitable, it can also lead to infinite looping, because in an unreachable - // loop there may be nowhere to stop. + // unprofitable, it can also lead to infinite looping, because in an + // unreachable loop there may be nowhere to stop. if (!DT->isReachableFromEntry(&MBB)) return false; bool MadeChange = false; @@ -144,7 +147,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { bool ProcessedBegin, SawStore = false; do { MachineInstr *MI = I; // The instruction to sink. - + // Predecrement I (if it's not begin) so that it isn't invalidated by // sinking. ProcessedBegin = I == MBB.begin(); @@ -156,10 +159,10 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) { if (SinkInstruction(MI, SawStore)) ++NumSunk, MadeChange = true; - + // If we just processed the first instruction in the block, we're done. } while (!ProcessedBegin); - + return MadeChange; } @@ -169,7 +172,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Check if it's safe to move the instruction. if (!MI->isSafeToMove(TII, AA, SawStore)) return false; - + // FIXME: This should include support for sinking instructions within the // block they are currently in to shorten the live ranges. We often get // instructions sunk into the top of a large block, but it would be better to @@ -177,22 +180,22 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // be careful not to *increase* register pressure though, e.g. sinking // "x = y + z" down if it kills y and z would increase the live ranges of y // and z and only shrink the live range of x. - + // Loop over all the operands of the specified instruction. If there is // anything we can't handle, bail out. MachineBasicBlock *ParentBlock = MI->getParent(); - + // SuccToSinkTo - This is the successor to sink this instruction to, once we // decide. MachineBasicBlock *SuccToSinkTo = 0; - + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; // Ignore non-register operands. - + unsigned Reg = MO.getReg(); if (Reg == 0) continue; - + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { if (MO.isUse()) { // If the physreg has no defs anywhere, it's just an ambient register @@ -200,13 +203,16 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // it could get allocated to something with a def during allocation. if (!RegInfo->def_empty(Reg)) return false; + if (AllocatableSet.test(Reg)) return false; + // Check for a def among the register's aliases too. for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) { unsigned AliasReg = *Alias; if (!RegInfo->def_empty(AliasReg)) return false; + if (AllocatableSet.test(AliasReg)) return false; } @@ -221,28 +227,31 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // If it's not safe to move defs of the register class, then abort. if (!TII->isSafeToMoveRegClassDefs(RegInfo->getRegClass(Reg))) return false; - + // FIXME: This picks a successor to sink into based on having one // successor that dominates all the uses. However, there are cases where // sinking can happen but where the sink point isn't a successor. For // example: + // // x = computation // if () {} else {} // use x - // the instruction could be sunk over the whole diamond for the + // + // the instruction could be sunk over the whole diamond for the // if/then/else (or loop, etc), allowing it to be sunk into other blocks // after that. - + // Virtual register defs can only be sunk if all their uses are in blocks // dominated by one of the successors. if (SuccToSinkTo) { // If a previous operand picked a block to sink to, then this operand // must be sinkable to the same block. - if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) + if (!AllUsesDominatedByBlock(Reg, SuccToSinkTo)) return false; + continue; } - + // Otherwise, we should look at all the successors and decide which one // we should sink to. for (MachineBasicBlock::succ_iterator SI = ParentBlock->succ_begin(), @@ -252,13 +261,13 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { break; } } - + // If we couldn't find a block to sink to, ignore this instruction. if (SuccToSinkTo == 0) return false; } } - + // If there are no outputs, it must have side-effects. if (SuccToSinkTo == 0) return false; @@ -267,15 +276,26 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // landing pad is implicitly defined. if (SuccToSinkTo->isLandingPad()) return false; - + // It is not possible to sink an instruction into its own block. This can // happen with loops. if (MI->getParent() == SuccToSinkTo) return false; - - DEBUG(dbgs() << "Sink instr " << *MI); - DEBUG(dbgs() << "to block " << *SuccToSinkTo); - + + // If the instruction to move defines a dead physical register which is live + // when leaving the basic block, don't move it because it could turn into a + // "zombie" define of that preg. E.g., EFLAGS. () + for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) { + const MachineOperand &MO = MI->getOperand(I); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (SuccToSinkTo->isLiveIn(Reg)) + return false; + } + + DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); + // If the block has multiple predecessors, this would introduce computation on // a path that it doesn't already exist. We could split the critical edge, // but for now we just punt. @@ -305,18 +325,18 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { // Otherwise we are OK with sinking along a critical edge. DEBUG(dbgs() << "Sinking along critical edge.\n"); } - - // Determine where to insert into. Skip phi nodes. + + // Determine where to insert into. Skip phi nodes. MachineBasicBlock::iterator InsertPos = SuccToSinkTo->begin(); while (InsertPos != SuccToSinkTo->end() && InsertPos->isPHI()) ++InsertPos; - + // Move the instruction. SuccToSinkTo->splice(InsertPos, ParentBlock, MI, ++MachineBasicBlock::iterator(MI)); - // Conservatively, clear any kill flags, since it's possible that - // they are no longer correct. + // Conservatively, clear any kill flags, since it's possible that they are no + // longer correct. MI->clearKillInfo(); return true; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 8baf01c90736..2297c908b1e0 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -390,7 +390,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB exits via unconditional fall-through but its successor " "differs from its CFG successor!", MBB); } - if (!MBB->empty() && MBB->back().getDesc().isBarrier()) { + if (!MBB->empty() && MBB->back().getDesc().isBarrier() && + !TII->isPredicated(&MBB->back())) { report("MBB exits via unconditional fall-through but ends with a " "barrier instruction!", MBB); } diff --git a/lib/CodeGen/OptimizeExts.cpp b/lib/CodeGen/OptimizeExts.cpp index 41fc20407441..dcdc243e5db3 100644 --- a/lib/CodeGen/OptimizeExts.cpp +++ b/lib/CodeGen/OptimizeExts.cpp @@ -118,6 +118,26 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; } + // It's an error to translate this: + // + // %reg1025 = %reg1024 + // ... + // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 + // + // into this: + // + // %reg1025 = %reg1024 + // ... + // %reg1027 = COPY %reg1025:4 + // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 + // + // The problem here is that SUBREG_TO_REG is there to assert that an + // implicit zext occurs. It doesn't insert a zext instruction. If we allow + // the COPY here, it will give us the value after the , + // not the original value of %reg1024 before . + if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) + continue; + MachineBasicBlock *UseMBB = UseMI->getParent(); if (UseMBB == MBB) { // Local uses that come after the extension. @@ -165,8 +185,8 @@ bool OptimizeExts::OptimizeInstr(MachineInstr *MI, MachineBasicBlock *MBB, continue; unsigned NewVR = MRI->createVirtualRegister(RC); BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), - TII->get(TargetOpcode::EXTRACT_SUBREG), NewVR) - .addReg(DstReg).addImm(SubIdx); + TII->get(TargetOpcode::COPY), NewVR) + .addReg(DstReg, 0, SubIdx); UseMO->setReg(NewVR); ++NumReuse; Changed = true; diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 2717d4d5cefc..1613fe21e42d 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -107,6 +107,11 @@ bool OptimizePHIs::IsSingleValuePHICycle(MachineInstr *MI, SrcSubIdx == 0 && DstSubIdx == 0 && TargetRegisterInfo::isVirtualRegister(MvSrcReg)) SrcMI = MRI->getVRegDef(MvSrcReg); + else if (SrcMI && SrcMI->isCopy() && + !SrcMI->getOperand(0).getSubReg() && + !SrcMI->getOperand(1).getSubReg() && + TargetRegisterInfo::isVirtualRegister(SrcMI->getOperand(1).getReg())) + SrcMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg()); if (!SrcMI) return false; diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h index bd18b5216f37..02938df00700 100644 --- a/lib/CodeGen/PBQP/HeuristicSolver.h +++ b/lib/CodeGen/PBQP/HeuristicSolver.h @@ -406,7 +406,7 @@ namespace PBQP { // Create node data objects. for (Graph::NodeItr nItr = g.nodesBegin(), nEnd = g.nodesEnd(); - nItr != nEnd; ++nItr) { + nItr != nEnd; ++nItr) { nodeDataList.push_back(NodeData()); g.setNodeData(nItr, &nodeDataList.back()); } diff --git a/lib/CodeGen/PBQP/Heuristics/Briggs.h b/lib/CodeGen/PBQP/Heuristics/Briggs.h index 30d34d9e3e92..4c1ce119ed05 100644 --- a/lib/CodeGen/PBQP/Heuristics/Briggs.h +++ b/lib/CodeGen/PBQP/Heuristics/Briggs.h @@ -18,7 +18,6 @@ #ifndef LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H #define LLVM_CODEGEN_PBQP_HEURISTICS_BRIGGS_H -#include "llvm/Support/Compiler.h" #include "../HeuristicSolver.h" #include "../HeuristicBase.h" @@ -267,8 +266,8 @@ namespace PBQP { if (!nd.isHeuristic) return; - EdgeData &ed ATTRIBUTE_UNUSED = getHeuristicEdgeData(eItr); - + EdgeData &ed = getHeuristicEdgeData(eItr); + (void)ed; assert(ed.isUpToDate && "Edge data is not up to date."); // Update node. diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index edbc13f3ff7f..ea6b094d7efe 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -34,7 +34,6 @@ using namespace llvm; STATISTIC(NumAtomic, "Number of atomic phis lowered"); -STATISTIC(NumSplits, "Number of critical edges split on demand"); STATISTIC(NumReused, "Number of reused lowered phis"); char PHIElimination::ID = 0; @@ -184,7 +183,6 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Create a new register for the incoming PHI arguments. MachineFunction &MF = *MBB.getParent(); - const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); unsigned IncomingReg = 0; bool reusedIncoming = false; // Is IncomingReg reused from an earlier PHI? @@ -208,10 +206,12 @@ void llvm::PHIElimination::LowerAtomicPHINode( ++NumReused; DEBUG(dbgs() << "Reusing %reg" << IncomingReg << " for " << *MPhi); } else { + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(DestReg); entry = IncomingReg = MF.getRegInfo().createVirtualRegister(RC); } - TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC, - MPhi->getDebugLoc()); + BuildMI(MBB, AfterPHIsIt, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), DestReg) + .addReg(IncomingReg); } // Update live variable information if there is any. @@ -293,8 +293,8 @@ void llvm::PHIElimination::LowerAtomicPHINode( // Insert the copy. if (!reusedIncoming && IncomingReg) - TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC, - MPhi->getDebugLoc()); + BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg).addReg(SrcReg); // Now update live variable information if we have it. Otherwise we're done if (!LV) continue; @@ -391,57 +391,8 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF, // (not considering PHI nodes). If the register is live in to this block // anyway, we would gain nothing from splitting. if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB)) - SplitCriticalEdge(PreMBB, &MBB); + PreMBB->SplitCriticalEdge(&MBB, this); } } return true; } - -MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A, - MachineBasicBlock *B) { - assert(A && B && "Missing MBB end point"); - - MachineFunction *MF = A->getParent(); - - // We may need to update A's terminator, but we can't do that if AnalyzeBranch - // fails. If A uses a jump table, we won't touch it. - const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - if (TII->AnalyzeBranch(*A, TBB, FBB, Cond)) - return NULL; - - ++NumSplits; - - MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock(); - MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB); - DEBUG(dbgs() << "PHIElimination splitting critical edge:" - " BB#" << A->getNumber() - << " -- BB#" << NMBB->getNumber() - << " -- BB#" << B->getNumber() << '\n'); - - A->ReplaceUsesOfBlockWith(B, NMBB); - A->updateTerminator(); - - // Insert unconditional "jump B" instruction in NMBB if necessary. - NMBB->addSuccessor(B); - if (!NMBB->isLayoutSuccessor(B)) { - Cond.clear(); - MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond); - } - - // Fix PHI nodes in B so they refer to NMBB instead of A - for (MachineBasicBlock::iterator i = B->begin(), e = B->end(); - i != e && i->isPHI(); ++i) - for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2) - if (i->getOperand(ni+1).getMBB() == A) - i->getOperand(ni+1).setMBB(NMBB); - - if (LiveVariables *LV=getAnalysisIfAvailable()) - LV->addNewBlock(NMBB, A, B); - - if (MachineDominatorTree *MDT=getAnalysisIfAvailable()) - MDT->addNewBlock(NMBB, A); - - return NMBB; -} diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 5ea2941b483c..3489db2e9f4f 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -24,6 +24,11 @@ using namespace llvm; //===---------------------------------------------------------------------===// MachinePassRegistry RegisterRegAlloc::Registry; +static FunctionPass *createDefaultRegisterAllocator() { return 0; } +static RegisterRegAlloc +defaultRegAlloc("default", + "pick register allocator based on -O option", + createDefaultRegisterAllocator); //===---------------------------------------------------------------------===// /// @@ -33,8 +38,8 @@ MachinePassRegistry RegisterRegAlloc::Registry; static cl::opt > RegAlloc("regalloc", - cl::init(&createLinearScanRegisterAllocator), - cl::desc("Register allocator to use (default=linearscan)")); + cl::init(&createDefaultRegisterAllocator), + cl::desc("Register allocator to use")); //===---------------------------------------------------------------------===// @@ -42,13 +47,22 @@ RegAlloc("regalloc", /// createRegisterAllocator - choose the appropriate register allocator. /// //===---------------------------------------------------------------------===// -FunctionPass *llvm::createRegisterAllocator() { +FunctionPass *llvm::createRegisterAllocator(CodeGenOpt::Level OptLevel) { RegisterRegAlloc::FunctionPassCtor Ctor = RegisterRegAlloc::getDefault(); - + if (!Ctor) { Ctor = RegAlloc; RegisterRegAlloc::setDefault(RegAlloc); } - - return Ctor(); + + if (Ctor != createDefaultRegisterAllocator) + return Ctor(); + + // When the 'default' allocator is requested, pick one based on OptLevel. + switch (OptLevel) { + case CodeGenOpt::None: + return createFastRegisterAllocator(); + default: + return createLinearScanRegisterAllocator(); + } } diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp new file mode 100644 index 000000000000..cbde2b01eeaf --- /dev/null +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -0,0 +1,180 @@ +//===----- PostRAHazardRecognizer.cpp - hazard recognizer -------- ---------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements a hazard recognizer using the instructions itineraries +// defined for the current target. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "post-RA-sched" +#include "llvm/CodeGen/PostRAHazardRecognizer.h" +#include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrItineraries.h" + +using namespace llvm; + +PostRAHazardRecognizer:: +PostRAHazardRecognizer(const InstrItineraryData &LItinData) : + ScheduleHazardRecognizer(), ItinData(LItinData) { + // Determine the maximum depth of any itinerary. This determines the + // depth of the scoreboard. We always make the scoreboard at least 1 + // cycle deep to avoid dealing with the boundary condition. + unsigned ScoreboardDepth = 1; + if (!ItinData.isEmpty()) { + for (unsigned idx = 0; ; ++idx) { + if (ItinData.isEndMarker(idx)) + break; + + const InstrStage *IS = ItinData.beginStage(idx); + const InstrStage *E = ItinData.endStage(idx); + unsigned ItinDepth = 0; + for (; IS != E; ++IS) + ItinDepth += IS->getCycles(); + + ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth); + } + } + + ReservedScoreboard.reset(ScoreboardDepth); + RequiredScoreboard.reset(ScoreboardDepth); + + DEBUG(dbgs() << "Using post-ra hazard recognizer: ScoreboardDepth = " + << ScoreboardDepth << '\n'); +} + +void PostRAHazardRecognizer::Reset() { + RequiredScoreboard.reset(); + ReservedScoreboard.reset(); +} + +void PostRAHazardRecognizer::ScoreBoard::dump() const { + dbgs() << "Scoreboard:\n"; + + unsigned last = Depth - 1; + while ((last > 0) && ((*this)[last] == 0)) + last--; + + for (unsigned i = 0; i <= last; i++) { + unsigned FUs = (*this)[i]; + dbgs() << "\t"; + for (int j = 31; j >= 0; j--) + dbgs() << ((FUs & (1 << j)) ? '1' : '0'); + dbgs() << '\n'; + } +} + +ScheduleHazardRecognizer::HazardType +PostRAHazardRecognizer::getHazardType(SUnit *SU) { + if (ItinData.isEmpty()) + return NoHazard; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to check for + // free FU's in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must find one of the stage's units free for every cycle the + // stage is occupied. FIXME it would be more accurate to find the + // same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < RequiredScoreboard.getDepth()) && + "Scoreboard depth exceeded!"); + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + + if (!freeUnits) { + DEBUG(dbgs() << "*** Hazard in cycle " << (cycle + i) << ", "); + DEBUG(dbgs() << "SU(" << SU->NodeNum << "): "); + DEBUG(SU->getInstr()->dump()); + return Hazard; + } + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + return NoHazard; +} + +void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) { + if (ItinData.isEmpty()) + return; + + unsigned cycle = 0; + + // Use the itinerary for the underlying instruction to reserve FU's + // in the scoreboard at the appropriate future cycles. + unsigned idx = SU->getInstr()->getDesc().getSchedClass(); + for (const InstrStage *IS = ItinData.beginStage(idx), + *E = ItinData.endStage(idx); IS != E; ++IS) { + // We must reserve one of the stage's units for every cycle the + // stage is occupied. FIXME it would be more accurate to reserve + // the same unit free in all the cycles. + for (unsigned int i = 0; i < IS->getCycles(); ++i) { + assert(((cycle + i) < RequiredScoreboard.getDepth()) && + "Scoreboard depth exceeded!"); + + unsigned freeUnits = IS->getUnits(); + switch (IS->getReservationKind()) { + default: + assert(0 && "Invalid FU reservation"); + case InstrStage::Required: + // Required FUs conflict with both reserved and required ones + freeUnits &= ~ReservedScoreboard[cycle + i]; + // FALLTHROUGH + case InstrStage::Reserved: + // Reserved FUs can conflict only with required ones. + freeUnits &= ~RequiredScoreboard[cycle + i]; + break; + } + + // reduce to a single unit + unsigned freeUnit = 0; + do { + freeUnit = freeUnits; + freeUnits = freeUnit & (freeUnit - 1); + } while (freeUnits); + + assert(freeUnit && "No function unit available!"); + if (IS->getReservationKind() == InstrStage::Required) + RequiredScoreboard[cycle + i] |= freeUnit; + else + ReservedScoreboard[cycle + i] |= freeUnit; + } + + // Advance the cycle to the next stage. + cycle += IS->getNextCycles(); + } + + DEBUG(ReservedScoreboard.dump()); + DEBUG(RequiredScoreboard.dump()); +} + +void PostRAHazardRecognizer::AdvanceCycle() { + ReservedScoreboard[0] = 0; ReservedScoreboard.advance(); + RequiredScoreboard[0] = 0; RequiredScoreboard.advance(); +} diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index 9714ea653b59..4af8e07f3480 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -22,8 +22,6 @@ #include "AntiDepBreaker.h" #include "AggressiveAntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" -#include "ExactHazardRecognizer.h" -#include "SimpleHazardRecognizer.h" #include "ScheduleDAGInstrs.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" @@ -65,10 +63,6 @@ EnableAntiDepBreaking("break-anti-dependencies", cl::desc("Break post-RA scheduling anti-dependencies: " "\"critical\", \"all\", or \"none\""), cl::init("none"), cl::Hidden); -static cl::opt -EnablePostRAHazardAvoidance("avoid-hazards", - cl::desc("Enable exact hazard avoidance"), - cl::init(true), cl::Hidden); // If DebugDiv > 0 then only schedule MBB with (ID % DebugDiv) == DebugMod static cl::opt @@ -85,6 +79,7 @@ AntiDepBreaker::~AntiDepBreaker() { } namespace { class PostRAScheduler : public MachineFunctionPass { AliasAnalysis *AA; + const TargetInstrInfo *TII; CodeGenOpt::Level OptLevel; public: @@ -187,30 +182,9 @@ namespace { }; } -/// isSchedulingBoundary - Test if the given instruction should be -/// considered a scheduling boundary. This primarily includes labels -/// and terminators. -/// -static bool isSchedulingBoundary(const MachineInstr *MI, - const MachineFunction &MF) { - // Terminators and labels can't be scheduled around. - if (MI->getDesc().isTerminator() || MI->isLabel()) - return true; - - // Don't attempt to schedule around any instruction that defines - // a stack-oriented pointer, as it's unlikely to be profitable. This - // saves compile time, because it doesn't require every single - // stack slot reference to depend on the instruction that does the - // modification. - const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); - if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) - return true; - - return false; -} - bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { AA = &getAnalysis(); + TII = Fn.getTarget().getInstrInfo(); // Check for explicit enable/disable of post-ra scheduling. TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE; @@ -237,10 +211,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { const MachineLoopInfo &MLI = getAnalysis(); const MachineDominatorTree &MDT = getAnalysis(); - const InstrItineraryData &InstrItins = Fn.getTarget().getInstrItineraryData(); - ScheduleHazardRecognizer *HR = EnablePostRAHazardAvoidance ? - (ScheduleHazardRecognizer *)new ExactHazardRecognizer(InstrItins) : - (ScheduleHazardRecognizer *)new SimpleHazardRecognizer(); + const TargetMachine &TM = Fn.getTarget(); + const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + ScheduleHazardRecognizer *HR = + TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins); AntiDepBreaker *ADB = ((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ? (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) : @@ -271,8 +245,8 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { MachineBasicBlock::iterator Current = MBB->end(); unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { - MachineInstr *MI = prior(I); - if (isSchedulingBoundary(MI, Fn)) { + MachineInstr *MI = llvm::prior(I); + if (TII->isSchedulingBoundary(MI, MBB, Fn)) { Scheduler.Run(MBB, I, Current, CurrentCount); Scheduler.EmitSchedule(); Current = MI; @@ -680,15 +654,6 @@ void SchedulePostRATDList::ListScheduleTopDown() { ScheduleNodeTopDown(FoundSUnit, CurCycle); HazardRec->EmitInstruction(FoundSUnit); CycleHasInsts = true; - - // If we are using the target-specific hazards, then don't - // advance the cycle time just because we schedule a node. If - // the target allows it we can schedule multiple nodes in the - // same cycle. - if (!EnablePostRAHazardAvoidance) { - if (FoundSUnit->Latency) // Don't increment CurCycle for pseudo-ops! - ++CurCycle; - } } else { if (CycleHasInsts) { DEBUG(dbgs() << "*** Finished cycle " << CurCycle << '\n'); diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp index 96e7327a7eb7..fb2f90935551 100644 --- a/lib/CodeGen/PreAllocSplitting.cpp +++ b/lib/CodeGen/PreAllocSplitting.cpp @@ -512,9 +512,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, LI->addRange(LiveRange(UseIndex, EndIndex, RetVNI)); // FIXME: Need to set kills properly for inter-block stuff. - if (RetVNI->isKill(UseIndex)) RetVNI->removeKill(UseIndex); - if (IsIntraBlock) - RetVNI->addKill(EndIndex); } else if (ContainsDefs && ContainsUses) { SmallPtrSet& BlockDefs = Defs[MBB]; SmallPtrSet& BlockUses = Uses[MBB]; @@ -556,12 +553,6 @@ PreAllocSplitting::PerformPHIConstruction(MachineBasicBlock::iterator UseI, NewVNs, LiveOut, Phis, false, true); LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - - if (foundUse && RetVNI->isKill(StartIndex)) - RetVNI->removeKill(StartIndex); - if (IsIntraBlock) { - RetVNI->addKill(EndIndex); - } } // Memoize results so we don't have to recompute them. @@ -636,9 +627,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us for (DenseMap::iterator I = IncomingVNs.begin(), E = IncomingVNs.end(); I != E; ++I) { I->second->setHasPHIKill(true); - SlotIndex KillIndex(LIs->getMBBEndIdx(I->first), true); - if (!I->second->isKill(KillIndex)) - I->second->addKill(KillIndex); } } @@ -648,8 +636,6 @@ PreAllocSplitting::PerformPHIConstructionFallBack(MachineBasicBlock::iterator Us } else EndIndex = LIs->getMBBEndIdx(MBB); LI->addRange(LiveRange(StartIndex, EndIndex, RetVNI)); - if (IsIntraBlock) - RetVNI->addKill(EndIndex); // Memoize results so we don't have to recompute them. if (!IsIntraBlock) @@ -691,10 +677,12 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { // If the def is a move, set the copy field. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + if (TII->isMoveInstr(*DI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { if (DstReg == LI->reg) NewVN->setCopy(&*DI); - + } else if (DI->isCopyLike() && DI->getOperand(0).getReg() == LI->reg) + NewVN->setCopy(&*DI); + NewVNs[&*DI] = NewVN; } @@ -725,25 +713,6 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) { VNInfo* DeadVN = NewVNs[&*DI]; LI->addRange(LiveRange(DefIdx, DefIdx.getNextSlot(), DeadVN)); - DeadVN->addKill(DefIdx); - } - - // Update kill markers. - for (LiveInterval::vni_iterator VI = LI->vni_begin(), VE = LI->vni_end(); - VI != VE; ++VI) { - VNInfo* VNI = *VI; - for (unsigned i = 0, e = VNI->kills.size(); i != e; ++i) { - SlotIndex KillIdx = VNI->kills[i]; - if (KillIdx.isPHI()) - continue; - MachineInstr *KillMI = LIs->getInstructionFromIndex(KillIdx); - if (KillMI) { - MachineOperand *KillMO = KillMI->findRegisterUseOperand(CurrLI->reg); - if (KillMO) - // It could be a dead def. - KillMO->setIsKill(); - } - } } } @@ -773,19 +742,14 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { VNsToCopy.push_back(OldVN); // Locate two-address redefinitions - for (VNInfo::KillSet::iterator KI = OldVN->kills.begin(), - KE = OldVN->kills.end(); KI != KE; ++KI) { - assert(!KI->isPHI() && - "VN previously reported having no PHI kills."); - MachineInstr* MI = LIs->getInstructionFromIndex(*KI); - unsigned DefIdx = MI->findRegisterDefOperandIdx(CurrLI->reg); - if (DefIdx == ~0U) continue; - if (MI->isRegTiedToUseOperand(DefIdx)) { - VNInfo* NextVN = - CurrLI->findDefinedVNInfoForRegInt(KI->getDefIndex()); - if (NextVN == OldVN) continue; + for (MachineRegisterInfo::def_iterator DI = MRI->def_begin(CurrLI->reg), + DE = MRI->def_end(); DI != DE; ++DI) { + if (!DI->isRegTiedToUseOperand(DI.getOperandNo())) continue; + SlotIndex DefIdx = LIs->getInstructionIndex(&*DI).getDefIndex(); + VNInfo* NextVN = CurrLI->findDefinedVNInfoForRegInt(DefIdx); + if (std::find(VNsToCopy.begin(), VNsToCopy.end(), NextVN) != + VNsToCopy.end()) Stack.push_back(NextVN); - } } } @@ -836,7 +800,7 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) { if (IntervalSSMap.count(CurrLI->reg)) IntervalSSMap[NewVReg] = IntervalSSMap[CurrLI->reg]; - NumRenumbers++; + ++NumRenumbers; } bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, @@ -854,7 +818,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo, if (KillPt == DefMI->getParent()->end()) return false; - TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI); + TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, *TRI); SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt)); ReconstructLiveInterval(CurrLI); @@ -899,12 +863,11 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg, SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment()); } - MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), - FoldPt, Ops, SS); + MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); if (FMI) { LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FMI = MBB->insert(MBB->erase(FoldPt), FMI); + FoldPt->eraseFromParent(); ++NumFolds; IntervalSSMap[vreg] = SS; @@ -980,12 +943,11 @@ MachineInstr* PreAllocSplitting::FoldRestore(unsigned vreg, if (!TII->canFoldMemoryOperand(FoldPt, Ops)) return 0; - MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(), - FoldPt, Ops, SS); + MachineInstr* FMI = TII->foldMemoryOperand(FoldPt, Ops, SS); if (FMI) { LIs->ReplaceMachineInstrInMaps(FoldPt, FMI); - FMI = MBB->insert(MBB->erase(FoldPt), FMI); + FoldPt->eraseFromParent(); ++NumRestoreFolds; } @@ -1192,7 +1154,7 @@ unsigned PreAllocSplitting::getNumberOfNonSpills( int StoreFrameIndex; unsigned StoreVReg = TII->isStoreToStackSlot(*UI, StoreFrameIndex); if (StoreVReg != Reg || StoreFrameIndex != FrameIndex) - NonSpills++; + ++NonSpills; int DefIdx = (*UI)->findRegisterDefOperandIdx(Reg); if (DefIdx != -1 && (*UI)->isRegTiedToUseOperand(DefIdx)) @@ -1255,7 +1217,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { (*LI)->removeValNo(CurrVN); DefMI->eraseFromParent(); VNUseCount.erase(CurrVN); - NumDeadSpills++; + ++NumDeadSpills; changed = true; continue; } @@ -1291,9 +1253,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { Ops.push_back(OpIdx); if (!TII->canFoldMemoryOperand(use, Ops)) continue; - MachineInstr* NewMI = - TII->foldMemoryOperand(*use->getParent()->getParent(), - use, Ops, FrameIndex); + MachineInstr* NewMI = TII->foldMemoryOperand(use, Ops, FrameIndex); if (!NewMI) continue; @@ -1303,10 +1263,9 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { (*LI)->removeValNo(CurrVN); DefMI->eraseFromParent(); - MachineBasicBlock* MBB = use->getParent(); - NewMI = MBB->insert(MBB->erase(use), NewMI); + use->eraseFromParent(); VNUseCount[CurrVN].erase(use); - + // Remove deleted instructions. Note that we need to remove them from // the VNInfo->use map as well, just to be safe. for (SmallPtrSet::iterator II = @@ -1328,7 +1287,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { if (VI->second.erase(use)) VI->second.insert(NewMI); - NumDeadSpills++; + ++NumDeadSpills; changed = true; continue; } @@ -1350,7 +1309,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet& split) { LIs->RemoveMachineInstrFromMaps(DefMI); (*LI)->removeValNo(CurrVN); DefMI->eraseFromParent(); - NumDeadSpills++; + ++NumDeadSpills; changed = true; } } diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp index 62f525fa1d97..ca4c47716875 100644 --- a/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/lib/CodeGen/ProcessImplicitDefs.cpp @@ -46,14 +46,14 @@ bool ProcessImplicitDefs::CanTurnIntoImplicitDef(MachineInstr *MI, const TargetInstrInfo *tii_) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) + Reg == SrcReg && DstSubReg == 0) return true; - if (OpIdx == 2 && MI->isSubregToReg()) - return true; - if (OpIdx == 1 && MI->isExtractSubreg()) - return true; - return false; + switch(OpIdx) { + case 1: return MI->isCopy() && MI->getOperand(0).getSubReg() == 0; + case 2: return MI->isSubregToReg() && MI->getOperand(0).getSubReg() == 0; + default: return false; + } } /// processImplicitDefs - Process IMPLICIT_DEF instructions and make sure @@ -101,11 +101,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { continue; } - if (MI->isInsertSubreg()) { - MachineOperand &MO = MI->getOperand(2); + // Eliminate %reg1032:sub = COPY undef. + if (MI->isCopy() && MI->getOperand(0).getSubReg()) { + MachineOperand &MO = MI->getOperand(1); if (ImpDefRegs.count(MO.getReg())) { - // %reg1032 = INSERT_SUBREG %reg1032, undef, 2 - // This is an identity copy, eliminate it now. if (MO.isKill()) { LiveVariables::VarInfo& vi = lv_->getVarInfo(MO.getReg()); vi.removeKill(MI); @@ -119,7 +118,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { bool ChangedToImpDef = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand& MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse() || MO.isUndef()) + if (!MO.isReg() || (MO.isDef() && !MO.getSubReg()) || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if (!Reg) @@ -144,6 +143,12 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { Changed = true; MO.setIsUndef(); + // This is a partial register redef of an implicit def. + // Make sure the whole register is defined by the instruction. + if (MO.isDef()) { + MI->addRegisterDefined(Reg); + continue; + } if (MO.isKill() || MI->isRegTiedToDefOperand(i)) { // Make sure other uses of for (unsigned j = i+1; j != e; ++j) { @@ -219,8 +224,10 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) { // Turn a copy use into an implicit_def. unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; - if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && - Reg == SrcReg && SrcSubReg == 0 && DstSubReg == 0) { + if ((RMI->isCopy() && RMI->getOperand(1).getReg() == Reg && + RMI->getOperand(0).getSubReg() == 0) || + (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) && + Reg == SrcReg && DstSubReg == 0)) { RMI->setDesc(tii_->get(TargetOpcode::IMPLICIT_DEF)); bool isKill = false; diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index e778024c6a3a..3843b2537051 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -158,9 +158,9 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { AdjustsStack = true; FrameSDOps.push_back(I); } else if (I->isInlineAsm()) { - // An InlineAsm might be a call; assume it is to get the stack frame - // aligned correctly for calls. - AdjustsStack = true; + // Some inline asm's need a stack frame, as indicated by operand 1. + if (I->getOperand(1).getImm()) + AdjustsStack = true; } MFI->setAdjustsStack(AdjustsStack); @@ -202,22 +202,17 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if (Fn.getFunction()->hasFnAttr(Attribute::Naked)) return; - // Figure out which *callee saved* registers are modified by the current - // function, thus needing to be saved and restored in the prolog/epilog. - const TargetRegisterClass * const *CSRegClasses = - RegInfo->getCalleeSavedRegClasses(&Fn); - std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; if (Fn.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + CSI.push_back(CalleeSavedInfo(Reg)); } else { for (const unsigned *AliasSet = RegInfo->getAliasSet(Reg); *AliasSet; ++AliasSet) { // Check alias registers too. if (Fn.getRegInfo().isPhysRegUsed(*AliasSet)) { - CSI.push_back(CalleeSavedInfo(Reg, CSRegClasses[i])); + CSI.push_back(CalleeSavedInfo(Reg)); break; } } @@ -236,7 +231,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { for (std::vector::iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { unsigned Reg = I->getReg(); - const TargetRegisterClass *RC = I->getRegClass(); + const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { @@ -265,8 +260,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx; } else { // Spill it to the stack where we must. - FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, - true, false); + FrameIdx = MFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset, true); } I->setFrameIdx(FrameIdx); @@ -303,8 +297,10 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*EntryBlock, I, CSI[i].getReg(), true, - CSI[i].getFrameIdx(), CSI[i].getRegClass(),TRI); + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, + CSI[i].getFrameIdx(), RC, TRI); } } @@ -328,9 +324,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // terminators that preceed it. if (!TII.restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, CSI[i].getReg(), + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), - CSI[i].getRegClass(), TRI); + RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -374,10 +372,12 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { MBB->addLiveIn(blockCSI[i].getReg()); // Insert the spill to the stack frame. - TII.storeRegToStackSlot(*MBB, I, blockCSI[i].getReg(), + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.storeRegToStackSlot(*MBB, I, Reg, true, blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass(), TRI); + RC, TRI); } } @@ -423,9 +423,11 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that preceed it. for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { - TII.loadRegFromStackSlot(*MBB, I, blockCSI[i].getReg(), + unsigned Reg = blockCSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, blockCSI[i].getFrameIdx(), - blockCSI[i].getRegClass(), TRI); + RC, TRI); assert(I != MBB->begin() && "loadRegFromStackSlot didn't insert any code!"); // Insert in reverse order. loadRegFromStackSlot can insert @@ -639,6 +641,9 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { +#ifndef NDEBUG + int SPAdjCount = 0; // frame setup / destroy count. +#endif int SPAdj = 0; // SP offset due to call frame setup / destroy. if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); @@ -646,6 +651,10 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (I->getOpcode() == FrameSetupOpcode || I->getOpcode() == FrameDestroyOpcode) { +#ifndef NDEBUG + // Track whether we see even pairs of them + SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; +#endif // Remember how much SP has been adjusted to create the call // frame. int Size = I->getOperand(0).getImm(); @@ -712,7 +721,13 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } - assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); + // If we have evenly matched pairs of frame setup / destroy instructions, + // make sure the adjustments come out to zero. If we don't have matched + // pairs, we can't be sure the missing bit isn't in another basic block + // due to a custom inserter playing tricks, so just asserting SPAdj==0 + // isn't sufficient. See tMOVCC on Thumb1, for example. + assert((SPAdjCount || SPAdj == 0) && + "Unbalanced call frame setup / destroy pairs?"); } } @@ -870,11 +885,7 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { // Scavenge a new scratch register CurrentVirtReg = Reg; const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - CurrentScratchReg = RS->FindUnusedReg(RC); - if (CurrentScratchReg == 0) - // No register is "free". Scavenge a register. - CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); - + CurrentScratchReg = RS->scavengeRegister(RC, I, SPAdj); PrevValue = Value; } // replace this reference to the virtual register with the diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index b3b576070863..f44478e5dd0b 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -110,6 +110,11 @@ namespace { // Allocatable - vector of allocatable physical registers. BitVector Allocatable; + // SkippedInstrs - Descriptors of instructions whose clobber list was ignored + // because all registers were spilled. It is still necessary to mark all the + // clobbered registers as used by the function. + SmallPtrSet SkippedInstrs; + // isBulkSpilling - This flag is set when LiveRegMap will be cleared // completely after spilling all live registers. LiveRegMap entries should // not be erased. @@ -135,6 +140,8 @@ namespace { private: bool runOnMachineFunction(MachineFunction &Fn); void AllocateBasicBlock(); + void handleThroughOperands(MachineInstr *MI, + SmallVectorImpl &VirtDead); int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); bool isLastUseOfLocalReg(MachineOperand&); @@ -508,27 +515,20 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, bool New; tie(LRI, New) = LiveVirtRegs.insert(std::make_pair(VirtReg, LiveReg())); LiveReg &LR = LRI->second; - bool PartialRedef = MI->getOperand(OpNum).getSubReg(); if (New) { // If there is no hint, peek at the only use of this register. if ((!Hint || !TargetRegisterInfo::isPhysicalRegister(Hint)) && MRI->hasOneNonDBGUse(VirtReg)) { + const MachineInstr &UseMI = *MRI->use_nodbg_begin(VirtReg); unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; // It's a copy, use the destination register as a hint. - if (TII->isMoveInstr(*MRI->use_nodbg_begin(VirtReg), - SrcReg, DstReg, SrcSubReg, DstSubReg)) + if (UseMI.isCopyLike()) + Hint = UseMI.getOperand(0).getReg(); + else if (TII->isMoveInstr(UseMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) Hint = DstReg; } allocVirtReg(MI, *LRI, Hint); - // If this is only a partial redefinition, we must reload the other parts. - if (PartialRedef && MI->readsVirtualRegister(VirtReg)) { - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - int FI = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << "Reloading for partial redef: %reg" << VirtReg << "\n"); - TII->loadRegFromStackSlot(*MBB, MI, LR.PhysReg, FI, RC, TRI); - ++NumLoads; - } - } else if (LR.LastUse && !PartialRedef) { + } else if (LR.LastUse) { // Redefining a live register - kill at the last use, unless it is this // instruction defining VirtReg multiple times. if (LR.LastUse != MI || LR.LastUse->getOperand(LR.LastOpNum).isUse()) @@ -564,10 +564,16 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, } else if (LR.Dirty) { if (isLastUseOfLocalReg(MO)) { DEBUG(dbgs() << "Killing last use: " << MO << "\n"); - MO.setIsKill(); + if (MO.isUse()) + MO.setIsKill(); + else + MO.setIsDead(); } else if (MO.isKill()) { DEBUG(dbgs() << "Clearing dubious kill: " << MO << "\n"); MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing dubious dead: " << MO << "\n"); + MO.setIsDead(false); } } else if (MO.isKill()) { // We must remove kill flags from uses of reloaded registers because the @@ -576,6 +582,9 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, // This would cause a second reload of %x into a different register. DEBUG(dbgs() << "Clearing clean kill: " << MO << "\n"); MO.setIsKill(false); + } else if (MO.isDead()) { + DEBUG(dbgs() << "Clearing clean dead: " << MO << "\n"); + MO.setIsDead(false); } assert(LR.PhysReg && "Register not assigned"); LR.LastUse = MI; @@ -607,6 +616,91 @@ bool RAFast::setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg) { return MO.isDead(); } +// Handle special instruction operand like early clobbers and tied ops when +// there are additional physreg defines. +void RAFast::handleThroughOperands(MachineInstr *MI, + SmallVectorImpl &VirtDead) { + DEBUG(dbgs() << "Scanning for through registers:"); + SmallSet ThroughRegs; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isEarlyClobber() || MI->isRegTiedToDefOperand(i) || + (MO.getSubReg() && MI->readsVirtualRegister(Reg))) { + if (ThroughRegs.insert(Reg)) + DEBUG(dbgs() << " %reg" << Reg); + } + } + + // If any physreg defines collide with preallocated through registers, + // we must spill and reallocate. + DEBUG(dbgs() << "\nChecking for physdef collisions.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + if (ThroughRegs.count(PhysRegState[Reg])) + definePhysReg(MI, Reg, regFree); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) { + UsedInInstr.set(*AS); + if (ThroughRegs.count(PhysRegState[*AS])) + definePhysReg(MI, *AS, regFree); + } + } + + SmallVector PartialDefs; + DEBUG(dbgs() << "Allocating tied uses and early clobbers.\n"); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + if (MO.isUse()) { + unsigned DefIdx = 0; + if (!MI->isRegTiedToDefOperand(i, &DefIdx)) continue; + DEBUG(dbgs() << "Operand " << i << "("<< MO << ") is tied to operand " + << DefIdx << ".\n"); + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + setPhysReg(MI, i, PhysReg); + // Note: we don't update the def operand yet. That would cause the normal + // def-scan to attempt spilling. + } else if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) { + DEBUG(dbgs() << "Partial redefine: " << MO << "\n"); + // Reload the register, but don't assign to the operand just yet. + // That would confuse the later phys-def processing pass. + LiveRegMap::iterator LRI = reloadVirtReg(MI, i, Reg, 0); + PartialDefs.push_back(LRI->second.PhysReg); + } else if (MO.isEarlyClobber()) { + // Note: defineVirtReg may invalidate MO. + LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); + unsigned PhysReg = LRI->second.PhysReg; + if (setPhysReg(MI, i, PhysReg)) + VirtDead.push_back(Reg); + } + } + + // Restore UsedInInstr to a state usable for allocating normal virtual uses. + UsedInInstr.reset(); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || (MO.isDef() && !MO.isEarlyClobber())) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + UsedInInstr.set(*AS); + } + + // Also mark PartialDefs as used to avoid reallocation. + for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) + UsedInInstr.set(PartialDefs[i]); +} + void RAFast::AllocateBasicBlock() { DEBUG(dbgs() << "\nAllocating " << *MBB); @@ -620,7 +714,7 @@ void RAFast::AllocateBasicBlock() { E = MBB->livein_end(); I != E; ++I) definePhysReg(MII, *I, regReserved); - SmallVector PhysECs, VirtDead; + SmallVector VirtDead; SmallVector Coalesced; // Otherwise, sequentially allocate each instruction in the MBB. @@ -670,8 +764,25 @@ void RAFast::AllocateBasicBlock() { LiveRegMap::iterator LRI = LiveVirtRegs.find(Reg); if (LRI != LiveVirtRegs.end()) setPhysReg(MI, i, LRI->second.PhysReg); - else - MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + else { + int SS = StackSlotForVirtReg[Reg]; + if (SS == -1) + MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + else { + // Modify DBG_VALUE now that the value is in a spill slot. + uint64_t Offset = MI->getOperand(1).getImm(); + const MDNode *MDPtr = + MI->getOperand(MI->getNumOperands()-1).getMetadata(); + DebugLoc DL = MI->getDebugLoc(); + if (MachineInstr *NewDV = + TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + MBB->insert(MBB->erase(MI), NewDV); + } else + MO.setReg(0); // We can't allocate a physreg for a DebugValue, sorry! + } + } } // Next instruction. continue; @@ -679,17 +790,25 @@ void RAFast::AllocateBasicBlock() { // If this is a copy, we may be able to coalesce. unsigned CopySrc, CopyDst, CopySrcSub, CopyDstSub; - if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) + if (MI->isCopy()) { + CopyDst = MI->getOperand(0).getReg(); + CopySrc = MI->getOperand(1).getReg(); + CopyDstSub = MI->getOperand(0).getSubReg(); + CopySrcSub = MI->getOperand(1).getSubReg(); + } else if (!TII->isMoveInstr(*MI, CopySrc, CopyDst, CopySrcSub, CopyDstSub)) CopySrc = CopyDst = 0; // Track registers used by instruction. UsedInInstr.reset(); - PhysECs.clear(); // First scan. // Mark physreg uses and early clobbers as used. // Find the end of the virtreg operands unsigned VirtOpEnd = 0; + bool hasTiedOps = false; + bool hasEarlyClobbers = false; + bool hasPartialRedefs = false; + bool hasPhysDefs = false; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -697,20 +816,44 @@ void RAFast::AllocateBasicBlock() { if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { VirtOpEnd = i+1; + if (MO.isUse()) { + hasTiedOps = hasTiedOps || + TID.getOperandConstraint(i, TOI::TIED_TO) != -1; + } else { + if (MO.isEarlyClobber()) + hasEarlyClobbers = true; + if (MO.getSubReg() && MI->readsVirtualRegister(Reg)) + hasPartialRedefs = true; + } continue; } if (!Allocatable.test(Reg)) continue; if (MO.isUse()) { usePhysReg(MO); } else if (MO.isEarlyClobber()) { - definePhysReg(MI, Reg, MO.isDead() ? regFree : regReserved); - PhysECs.push_back(Reg); - } + definePhysReg(MI, Reg, (MO.isImplicit() || MO.isDead()) ? + regFree : regReserved); + hasEarlyClobbers = true; + } else + hasPhysDefs = true; + } + + // The instruction may have virtual register operands that must be allocated + // the same register at use-time and def-time: early clobbers and tied + // operands. If there are also physical defs, these registers must avoid + // both physical defs and uses, making them more constrained than normal + // operands. + // We didn't detect inline asm tied operands above, so just make this extra + // pass for all inline asm. + if (MI->isInlineAsm() || hasEarlyClobbers || hasPartialRedefs || + (hasTiedOps && hasPhysDefs)) { + handleThroughOperands(MI, VirtDead); + // Don't attempt coalescing when we have funny stuff going on. + CopyDst = 0; } // Second scan. - // Allocate virtreg uses and early clobbers. - // Collect VirtKills + // Allocate virtreg uses. for (unsigned i = 0; i != VirtOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -722,12 +865,6 @@ void RAFast::AllocateBasicBlock() { CopySrc = (CopySrc == Reg || CopySrc == PhysReg) ? PhysReg : 0; if (setPhysReg(MI, i, PhysReg)) killVirtReg(LRI); - } else if (MO.isEarlyClobber()) { - // Note: defineVirtReg may invalidate MO. - LiveRegMap::iterator LRI = defineVirtReg(MI, i, Reg, 0); - unsigned PhysReg = LRI->second.PhysReg; - setPhysReg(MI, i, PhysReg); - PhysECs.push_back(PhysReg); } } @@ -735,12 +872,16 @@ void RAFast::AllocateBasicBlock() { // Track registers defined by instruction - early clobbers at this point. UsedInInstr.reset(); - for (unsigned i = 0, e = PhysECs.size(); i != e; ++i) { - unsigned PhysReg = PhysECs[i]; - UsedInInstr.set(PhysReg); - for (const unsigned *AS = TRI->getAliasSet(PhysReg); - unsigned Alias = *AS; ++AS) - UsedInInstr.set(Alias); + if (hasEarlyClobbers) { + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg() || !MO.isDef()) continue; + unsigned Reg = MO.getReg(); + if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + UsedInInstr.set(Reg); + for (const unsigned *AS = TRI->getAliasSet(Reg); *AS; ++AS) + UsedInInstr.set(*AS); + } } unsigned DefOpEnd = MI->getNumOperands(); @@ -752,13 +893,18 @@ void RAFast::AllocateBasicBlock() { DefOpEnd = VirtOpEnd; DEBUG(dbgs() << " Spilling remaining registers before call.\n"); spillAll(MI); + + // The imp-defs are skipped below, but we still need to mark those + // registers as used by the function. + SkippedInstrs.insert(&TID); } // Third scan. // Allocate defs and collect dead defs. for (unsigned i = 0; i != DefOpEnd; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.getReg()) continue; + if (!MO.isReg() || !MO.isDef() || !MO.getReg() || MO.isEarlyClobber()) + continue; unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { @@ -837,6 +983,14 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { // Make sure the set of used physregs is closed under subreg operations. MRI->closePhysRegsUsed(*TRI); + // Add the clobber lists for all the instructions we skipped earlier. + for (SmallPtrSet::const_iterator + I = SkippedInstrs.begin(), E = SkippedInstrs.end(); I != E; ++I) + if (const unsigned *Defs = (*I)->getImplicitDefs()) + while (*Defs) + MRI->setPhysRegUsed(*Defs++); + + SkippedInstrs.clear(); StackSlotForVirtReg.clear(); return true; } diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index bc331f0ff81d..044672d6d7a5 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -83,7 +83,8 @@ namespace { // pressure, it can caused fewer GPRs to be held in the queue. static cl::opt NumRecentlyUsedRegs("linearscan-skip-count", - cl::desc("Number of registers for linearscan to remember to skip."), + cl::desc("Number of registers for linearscan to remember" + "to skip."), cl::init(0), cl::Hidden); @@ -421,9 +422,10 @@ unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { unsigned SrcReg, DstReg, SrcSubReg, DstSubReg; if (vni->def != SlotIndex() && vni->isDefAccurate() && (CopyMI = li_->getInstructionFromIndex(vni->def)) && - tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg)) + (CopyMI->isCopy() || + tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))) // Defined by a copy, try to extend SrcReg forward - CandReg = SrcReg; + CandReg = CopyMI->isCopy() ? CopyMI->getOperand(1).getReg() : SrcReg; else if (TrivCoalesceEnds && (CopyMI = li_->getInstructionFromIndex(range.end.getBaseIndex())) && @@ -992,6 +994,24 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) mri_->setRegAllocationHint(cur->reg, 0, Reg); } + } else if (CopyMI && CopyMI->isCopy()) { + DstReg = CopyMI->getOperand(0).getReg(); + DstSubReg = CopyMI->getOperand(0).getSubReg(); + SrcReg = CopyMI->getOperand(1).getReg(); + SrcSubReg = CopyMI->getOperand(1).getSubReg(); + unsigned Reg = 0; + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + Reg = SrcReg; + else if (vrm_->isAssignedReg(SrcReg)) + Reg = vrm_->getPhys(SrcReg); + if (Reg) { + if (SrcSubReg) + Reg = tri_->getSubReg(Reg, SrcSubReg); + if (DstSubReg) + Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); + if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) + mri_->setRegAllocationHint(cur->reg, 0, Reg); + } } } } @@ -1206,8 +1226,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { DEBUG(dbgs() << "\t\t\tspilling(c): " << *cur << '\n'); SmallVector spillIs; std::vector added; - - added = spiller_->spill(cur, spillIs); + spiller_->spill(cur, added, spillIs); std::sort(added.begin(), added.end(), LISorter()); addStackInterval(cur, ls_, li_, mri_, *vrm_); @@ -1285,10 +1304,8 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) { if (sli->beginIndex() < earliestStart) earliestStart = sli->beginIndex(); - std::vector newIs; - newIs = spiller_->spill(sli, spillIs, &earliestStart); + spiller_->spill(sli, added, spillIs, &earliestStart); addStackInterval(sli, ls_, li_, mri_, *vrm_); - std::copy(newIs.begin(), newIs.end(), std::back_inserter(added)); spilled.insert(sli->reg); } diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp deleted file mode 100644 index 321ae12def57..000000000000 --- a/lib/CodeGen/RegAllocLocal.cpp +++ /dev/null @@ -1,1254 +0,0 @@ -//===-- RegAllocLocal.cpp - A BasicBlock generic register allocator -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This register allocator allocates registers to a basic block at a time, -// attempting to keep values in registers and reusing registers as appropriate. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "regalloc" -#include "llvm/BasicBlock.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include -using namespace llvm; - -STATISTIC(NumStores, "Number of stores added"); -STATISTIC(NumLoads , "Number of loads added"); -STATISTIC(NumCopies, "Number of copies coalesced"); - -static RegisterRegAlloc - localRegAlloc("local", "local register allocator", - createLocalRegisterAllocator); - -namespace { - class RALocal : public MachineFunctionPass { - public: - static char ID; - RALocal() : MachineFunctionPass(&ID), StackSlotForVirtReg(-1) {} - private: - const TargetMachine *TM; - MachineFunction *MF; - MachineRegisterInfo *MRI; - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - - // StackSlotForVirtReg - Maps virtual regs to the frame index where these - // values are spilled. - IndexedMap StackSlotForVirtReg; - - // Virt2PhysRegMap - This map contains entries for each virtual register - // that is currently available in a physical register. - IndexedMap Virt2PhysRegMap; - - unsigned &getVirt2PhysRegMapSlot(unsigned VirtReg) { - return Virt2PhysRegMap[VirtReg]; - } - - // PhysRegsUsed - This array is effectively a map, containing entries for - // each physical register that currently has a value (ie, it is in - // Virt2PhysRegMap). The value mapped to is the virtual register - // corresponding to the physical register (the inverse of the - // Virt2PhysRegMap), or 0. The value is set to 0 if this register is pinned - // because it is used by a future instruction, and to -2 if it is not - // allocatable. If the entry for a physical register is -1, then the - // physical register is "not in the map". - // - std::vector PhysRegsUsed; - - // PhysRegsUseOrder - This contains a list of the physical registers that - // currently have a virtual register value in them. This list provides an - // ordering of registers, imposing a reallocation order. This list is only - // used if all registers are allocated and we have to spill one, in which - // case we spill the least recently used register. Entries at the front of - // the list are the least recently used registers, entries at the back are - // the most recently used. - // - std::vector PhysRegsUseOrder; - - // Virt2LastUseMap - This maps each virtual register to its last use - // (MachineInstr*, operand index pair). - IndexedMap, VirtReg2IndexFunctor> - Virt2LastUseMap; - - std::pair& getVirtRegLastUse(unsigned Reg) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - return Virt2LastUseMap[Reg]; - } - - // VirtRegModified - This bitset contains information about which virtual - // registers need to be spilled back to memory when their registers are - // scavenged. If a virtual register has simply been rematerialized, there - // is no reason to spill it to memory when we need the register back. - // - BitVector VirtRegModified; - - // UsedInMultipleBlocks - Tracks whether a particular register is used in - // more than one block. - BitVector UsedInMultipleBlocks; - - void markVirtRegModified(unsigned Reg, bool Val = true) { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - Reg -= TargetRegisterInfo::FirstVirtualRegister; - if (Val) - VirtRegModified.set(Reg); - else - VirtRegModified.reset(Reg); - } - - bool isVirtRegModified(unsigned Reg) const { - assert(TargetRegisterInfo::isVirtualRegister(Reg) && "Illegal VirtReg!"); - assert(Reg - TargetRegisterInfo::FirstVirtualRegister < - VirtRegModified.size() && "Illegal virtual register!"); - return VirtRegModified[Reg - TargetRegisterInfo::FirstVirtualRegister]; - } - - void AddToPhysRegsUseOrder(unsigned Reg) { - std::vector::iterator It = - std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), Reg); - if (It != PhysRegsUseOrder.end()) - PhysRegsUseOrder.erase(It); - PhysRegsUseOrder.push_back(Reg); - } - - void MarkPhysRegRecentlyUsed(unsigned Reg) { - if (PhysRegsUseOrder.empty() || - PhysRegsUseOrder.back() == Reg) return; // Already most recently used - - for (unsigned i = PhysRegsUseOrder.size(); i != 0; --i) { - unsigned RegMatch = PhysRegsUseOrder[i-1]; // remove from middle - if (!areRegsEqual(Reg, RegMatch)) continue; - - PhysRegsUseOrder.erase(PhysRegsUseOrder.begin()+i-1); - // Add it to the end of the list - PhysRegsUseOrder.push_back(RegMatch); - if (RegMatch == Reg) - return; // Found an exact match, exit early - } - } - - public: - virtual const char *getPassName() const { - return "Local Register Allocator"; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequiredID(PHIEliminationID); - AU.addRequiredID(TwoAddressInstructionPassID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - private: - /// runOnMachineFunction - Register allocate the whole function - bool runOnMachineFunction(MachineFunction &Fn); - - /// AllocateBasicBlock - Register allocate the specified basic block. - void AllocateBasicBlock(MachineBasicBlock &MBB); - - - /// areRegsEqual - This method returns true if the specified registers are - /// related to each other. To do this, it checks to see if they are equal - /// or if the first register is in the alias set of the second register. - /// - bool areRegsEqual(unsigned R1, unsigned R2) const { - if (R1 == R2) return true; - for (const unsigned *AliasSet = TRI->getAliasSet(R2); - *AliasSet; ++AliasSet) { - if (*AliasSet == R1) return true; - } - return false; - } - - /// getStackSpaceFor - This returns the frame index of the specified virtual - /// register on the stack, allocating space if necessary. - int getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC); - - /// removePhysReg - This method marks the specified physical register as no - /// longer being in use. - /// - void removePhysReg(unsigned PhysReg); - - void storeVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg, bool isKill); - - /// spillVirtReg - This method spills the value specified by PhysReg into - /// the virtual register slot specified by VirtReg. It then updates the RA - /// data structures to indicate the fact that PhysReg is now available. - /// - void spillVirtReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, - unsigned VirtReg, unsigned PhysReg); - - /// spillPhysReg - This method spills the specified physical register into - /// the virtual register slot associated with it. If OnlyVirtRegs is set to - /// true, then the request is ignored if the physical register does not - /// contain a virtual register. - /// - void spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs = false); - - /// assignVirtToPhysReg - This method updates local state so that we know - /// that PhysReg is the proper container for VirtReg now. The physical - /// register must not be used for anything else when this is called. - /// - void assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg); - - /// isPhysRegAvailable - Return true if the specified physical register is - /// free and available for use. This also includes checking to see if - /// aliased registers are all free... - /// - bool isPhysRegAvailable(unsigned PhysReg) const; - - /// getFreeReg - Look to see if there is a free register available in the - /// specified register class. If not, return 0. - /// - unsigned getFreeReg(const TargetRegisterClass *RC); - - /// getReg - Find a physical register to hold the specified virtual - /// register. If all compatible physical registers are used, this method - /// spills the last used virtual register to the stack, and uses that - /// register. If NoFree is true, that means the caller knows there isn't - /// a free register, do not call getFreeReg(). - unsigned getReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned VirtReg, bool NoFree = false); - - /// reloadVirtReg - This method transforms the specified virtual - /// register use to refer to a physical register. This method may do this - /// in one of several ways: if the register is available in a physical - /// register already, it uses that physical register. If the value is not - /// in a physical register, and if there are physical registers available, - /// it loads it into a register: PhysReg if that is an available physical - /// register, otherwise any physical register of the right class. - /// If register pressure is high, and it is possible, it tries to fold the - /// load of the virtual register into the instruction itself. It avoids - /// doing this if register pressure is low to improve the chance that - /// subsequent instructions can use the reloaded value. This method - /// returns the modified instruction. - /// - MachineInstr *reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, SmallSet &RRegs, - unsigned PhysReg); - - /// ComputeLocalLiveness - Computes liveness of registers within a basic - /// block, setting the killed/dead flags as appropriate. - void ComputeLocalLiveness(MachineBasicBlock& MBB); - - void reloadPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I, - unsigned PhysReg); - }; - char RALocal::ID = 0; -} - -/// getStackSpaceFor - This allocates space for the specified virtual register -/// to be held on the stack. -int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) { - // Find the location Reg would belong... - int SS = StackSlotForVirtReg[VirtReg]; - if (SS != -1) - return SS; // Already has space allocated? - - // Allocate a new stack object for this spill location... - int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(), - RC->getAlignment()); - - // Assign the slot. - StackSlotForVirtReg[VirtReg] = FrameIdx; - return FrameIdx; -} - - -/// removePhysReg - This method marks the specified physical register as no -/// longer being in use. -/// -void RALocal::removePhysReg(unsigned PhysReg) { - PhysRegsUsed[PhysReg] = -1; // PhyReg no longer used - - std::vector::iterator It = - std::find(PhysRegsUseOrder.begin(), PhysRegsUseOrder.end(), PhysReg); - if (It != PhysRegsUseOrder.end()) - PhysRegsUseOrder.erase(It); -} - -/// storeVirtReg - Store a virtual register to its assigned stack slot. -void RALocal::storeVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg, - bool isKill) { - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - DEBUG(dbgs() << " to stack slot #" << FrameIndex); - TII->storeRegToStackSlot(MBB, I, PhysReg, isKill, FrameIndex, RC, TRI); - ++NumStores; // Update statistics - - // Mark the spill instruction as last use if we're not killing the register. - if (!isKill) { - MachineInstr *Spill = llvm::prior(I); - int OpNum = Spill->findRegisterUseOperandIdx(PhysReg); - if (OpNum < 0) - getVirtRegLastUse(VirtReg) = std::make_pair((MachineInstr*)0, 0); - else - getVirtRegLastUse(VirtReg) = std::make_pair(Spill, OpNum); - } -} - -/// spillVirtReg - This method spills the value specified by PhysReg into the -/// virtual register slot specified by VirtReg. It then updates the RA data -/// structures to indicate the fact that PhysReg is now available. -/// -void RALocal::spillVirtReg(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned VirtReg, unsigned PhysReg) { - assert(VirtReg && "Spilling a physical register is illegal!" - " Must not have appropriate kill for the register or use exists beyond" - " the intended one."); - DEBUG(dbgs() << " Spilling register " << TRI->getName(PhysReg) - << " containing %reg" << VirtReg); - - if (!isVirtRegModified(VirtReg)) { - DEBUG(dbgs() << " which has not been modified, so no store necessary!"); - std::pair &LastUse = getVirtRegLastUse(VirtReg); - if (LastUse.first) - LastUse.first->getOperand(LastUse.second).setIsKill(); - } else { - // Otherwise, there is a virtual register corresponding to this physical - // register. We only need to spill it into its stack slot if it has been - // modified. - // If the instruction reads the register that's spilled, (e.g. this can - // happen if it is a move to a physical register), then the spill - // instruction is not a kill. - bool isKill = !(I != MBB.end() && I->readsRegister(PhysReg)); - storeVirtReg(MBB, I, VirtReg, PhysReg, isKill); - } - - getVirt2PhysRegMapSlot(VirtReg) = 0; // VirtReg no longer available - - DEBUG(dbgs() << '\n'); - removePhysReg(PhysReg); -} - - -/// spillPhysReg - This method spills the specified physical register into the -/// virtual register slot associated with it. If OnlyVirtRegs is set to true, -/// then the request is ignored if the physical register does not contain a -/// virtual register. -/// -void RALocal::spillPhysReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned PhysReg, bool OnlyVirtRegs) { - if (PhysRegsUsed[PhysReg] != -1) { // Only spill it if it's used! - assert(PhysRegsUsed[PhysReg] != -2 && "Non allocable reg used!"); - if (PhysRegsUsed[PhysReg] || !OnlyVirtRegs) - spillVirtReg(MBB, I, PhysRegsUsed[PhysReg], PhysReg); - return; - } - - // If the selected register aliases any other registers, we must make - // sure that one of the aliases isn't alive. - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] == -1 || // Spill aliased register. - PhysRegsUsed[*AliasSet] == -2) // If allocatable. - continue; - - if (PhysRegsUsed[*AliasSet]) - spillVirtReg(MBB, I, PhysRegsUsed[*AliasSet], *AliasSet); - } -} - - -/// assignVirtToPhysReg - This method updates local state so that we know -/// that PhysReg is the proper container for VirtReg now. The physical -/// register must not be used for anything else when this is called. -/// -void RALocal::assignVirtToPhysReg(unsigned VirtReg, unsigned PhysReg) { - assert(PhysRegsUsed[PhysReg] == -1 && "Phys reg already assigned!"); - // Update information to note the fact that this register was just used, and - // it holds VirtReg. - PhysRegsUsed[PhysReg] = VirtReg; - getVirt2PhysRegMapSlot(VirtReg) = PhysReg; - AddToPhysRegsUseOrder(PhysReg); // New use of PhysReg -} - - -/// isPhysRegAvailable - Return true if the specified physical register is free -/// and available for use. This also includes checking to see if aliased -/// registers are all free... -/// -bool RALocal::isPhysRegAvailable(unsigned PhysReg) const { - if (PhysRegsUsed[PhysReg] != -1) return false; - - // If the selected register aliases any other allocated registers, it is - // not free! - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) - if (PhysRegsUsed[*AliasSet] >= 0) // Aliased register in use? - return false; // Can't use this reg then. - return true; -} - - -/// getFreeReg - Look to see if there is a free register available in the -/// specified register class. If not, return 0. -/// -unsigned RALocal::getFreeReg(const TargetRegisterClass *RC) { - // Get iterators defining the range of registers that are valid to allocate in - // this class, which also specifies the preferred allocation order. - TargetRegisterClass::iterator RI = RC->allocation_order_begin(*MF); - TargetRegisterClass::iterator RE = RC->allocation_order_end(*MF); - - for (; RI != RE; ++RI) - if (isPhysRegAvailable(*RI)) { // Is reg unused? - assert(*RI != 0 && "Cannot use register!"); - return *RI; // Found an unused register! - } - return 0; -} - - -/// getReg - Find a physical register to hold the specified virtual -/// register. If all compatible physical registers are used, this method spills -/// the last used virtual register to the stack, and uses that register. -/// -unsigned RALocal::getReg(MachineBasicBlock &MBB, MachineInstr *I, - unsigned VirtReg, bool NoFree) { - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - - // First check to see if we have a free register of the requested type... - unsigned PhysReg = NoFree ? 0 : getFreeReg(RC); - - if (PhysReg != 0) { - // Assign the register. - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; - } - - // If we didn't find an unused register, scavenge one now! - assert(!PhysRegsUseOrder.empty() && "No allocated registers??"); - - // Loop over all of the preallocated registers from the least recently used - // to the most recently used. When we find one that is capable of holding - // our register, use it. - for (unsigned i = 0; PhysReg == 0; ++i) { - assert(i != PhysRegsUseOrder.size() && - "Couldn't find a register of the appropriate class!"); - - unsigned R = PhysRegsUseOrder[i]; - - // We can only use this register if it holds a virtual register (ie, it - // can be spilled). Do not use it if it is an explicitly allocated - // physical register! - assert(PhysRegsUsed[R] != -1 && - "PhysReg in PhysRegsUseOrder, but is not allocated?"); - if (PhysRegsUsed[R] && PhysRegsUsed[R] != -2) { - // If the current register is compatible, use it. - if (RC->contains(R)) { - PhysReg = R; - break; - } - - // If one of the registers aliased to the current register is - // compatible, use it. - for (const unsigned *AliasIt = TRI->getAliasSet(R); - *AliasIt; ++AliasIt) { - if (!RC->contains(*AliasIt)) continue; - - // If this is pinned down for some reason, don't use it. For - // example, if CL is pinned, and we run across CH, don't use - // CH as justification for using scavenging ECX (which will - // fail). - if (PhysRegsUsed[*AliasIt] == 0) continue; - - // Make sure the register is allocatable. Don't allocate SIL on - // x86-32. - if (PhysRegsUsed[*AliasIt] == -2) continue; - - PhysReg = *AliasIt; // Take an aliased register - break; - } - } - } - - assert(PhysReg && "Physical register not assigned!?!?"); - - // At this point PhysRegsUseOrder[i] is the least recently used register of - // compatible register class. Spill it to memory and reap its remains. - spillPhysReg(MBB, I, PhysReg); - - // Now that we know which register we need to assign this to, do it now! - assignVirtToPhysReg(VirtReg, PhysReg); - return PhysReg; -} - - -/// reloadVirtReg - This method transforms the specified virtual -/// register use to refer to a physical register. This method may do this in -/// one of several ways: if the register is available in a physical register -/// already, it uses that physical register. If the value is not in a physical -/// register, and if there are physical registers available, it loads it into a -/// register: PhysReg if that is an available physical register, otherwise any -/// register. If register pressure is high, and it is possible, it tries to -/// fold the load of the virtual register into the instruction itself. It -/// avoids doing this if register pressure is low to improve the chance that -/// subsequent instructions can use the reloaded value. This method returns -/// the modified instruction. -/// -MachineInstr *RALocal::reloadVirtReg(MachineBasicBlock &MBB, MachineInstr *MI, - unsigned OpNum, - SmallSet &ReloadedRegs, - unsigned PhysReg) { - unsigned VirtReg = MI->getOperand(OpNum).getReg(); - unsigned SubIdx = MI->getOperand(OpNum).getSubReg(); - - // If the virtual register is already available, just update the instruction - // and return. - if (unsigned PR = getVirt2PhysRegMapSlot(VirtReg)) { - if (SubIdx) { - PR = TRI->getSubReg(PR, SubIdx); - MI->getOperand(OpNum).setSubReg(0); - } - MI->getOperand(OpNum).setReg(PR); // Assign the input register - if (!MI->isDebugValue()) { - // Do not do these for DBG_VALUE as they can affect codegen. - MarkPhysRegRecentlyUsed(PR); // Already have this value available! - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); - } - return MI; - } - - // Otherwise, we need to fold it into the current instruction, or reload it. - // If we have registers available to hold the value, use them. - const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(VirtReg); - // If we already have a PhysReg (this happens when the instruction is a - // reg-to-reg copy with a PhysReg destination) use that. - if (!PhysReg || !TargetRegisterInfo::isPhysicalRegister(PhysReg) || - !isPhysRegAvailable(PhysReg)) - PhysReg = getFreeReg(RC); - int FrameIndex = getStackSpaceFor(VirtReg, RC); - - if (PhysReg) { // Register is available, allocate it! - assignVirtToPhysReg(VirtReg, PhysReg); - } else { // No registers available. - // Force some poor hapless value out of the register file to - // make room for the new register, and reload it. - PhysReg = getReg(MBB, MI, VirtReg, true); - } - - markVirtRegModified(VirtReg, false); // Note that this reg was just reloaded - - DEBUG(dbgs() << " Reloading %reg" << VirtReg << " into " - << TRI->getName(PhysReg) << "\n"); - - // Add move instruction(s) - TII->loadRegFromStackSlot(MBB, MI, PhysReg, FrameIndex, RC, TRI); - ++NumLoads; // Update statistics - - MF->getRegInfo().setPhysRegUsed(PhysReg); - // Assign the input register. - if (SubIdx) { - MI->getOperand(OpNum).setSubReg(0); - MI->getOperand(OpNum).setReg(TRI->getSubReg(PhysReg, SubIdx)); - } else - MI->getOperand(OpNum).setReg(PhysReg); // Assign the input register - getVirtRegLastUse(VirtReg) = std::make_pair(MI, OpNum); - - if (!ReloadedRegs.insert(PhysReg)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); - } - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (ReloadedRegs.insert(*SubRegs)) continue; - - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Ran out of registers during register allocation!"; - if (MI->isInlineAsm()) { - Msg << "\nPlease check your inline asm statement for invalid " - << "constraints:\n"; - MI->print(Msg, TM); - } - report_fatal_error(Msg.str()); - } - - return MI; -} - -/// isReadModWriteImplicitKill - True if this is an implicit kill for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitKill(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - MO.isDef() && !MO.isDead()) - return true; - } - return false; -} - -/// isReadModWriteImplicitDef - True if this is an implicit def for a -/// read/mod/write register, i.e. update partial register. -static bool isReadModWriteImplicitDef(MachineInstr *MI, unsigned Reg) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.getReg() == Reg && MO.isImplicit() && - !MO.isDef() && MO.isKill()) - return true; - } - return false; -} - -// precedes - Helper function to determine with MachineInstr A -// precedes MachineInstr B within the same MBB. -static bool precedes(MachineBasicBlock::iterator A, - MachineBasicBlock::iterator B) { - if (A == B) - return false; - - MachineBasicBlock::iterator I = A->getParent()->begin(); - while (I != A->getParent()->end()) { - if (I == A) - return true; - else if (I == B) - return false; - - ++I; - } - - return false; -} - -/// ComputeLocalLiveness - Computes liveness of registers within a basic -/// block, setting the killed/dead flags as appropriate. -void RALocal::ComputeLocalLiveness(MachineBasicBlock& MBB) { - // Keep track of the most recently seen previous use or def of each reg, - // so that we can update them with dead/kill markers. - DenseMap > LastUseDef; - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); - I != E; ++I) { - if (I->isDebugValue()) - continue; - - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &MO = I->getOperand(i); - // Uses don't trigger any flags, but we need to save - // them for later. Also, we have to process these - // _before_ processing the defs, since an instr - // uses regs before it defs them. - if (!MO.isReg() || !MO.getReg() || !MO.isUse()) - continue; - - // Ignore helpful kill flags from earlier passes. - MO.setIsKill(false); - - LastUseDef[MO.getReg()] = std::make_pair(I, i); - - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) continue; - - const unsigned *Aliases = TRI->getAliasSet(MO.getReg()); - if (Aliases == 0) - continue; - - while (*Aliases) { - DenseMap >::iterator - alias = LastUseDef.find(*Aliases); - - if (alias != LastUseDef.end() && alias->second.first != I) - LastUseDef[*Aliases] = std::make_pair(I, i); - - ++Aliases; - } - } - - for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { - MachineOperand &MO = I->getOperand(i); - // Defs others than 2-addr redefs _do_ trigger flag changes: - // - A def followed by a def is dead - // - A use followed by a def is a kill - if (!MO.isReg() || !MO.getReg() || !MO.isDef()) continue; - - unsigned SubIdx = MO.getSubReg(); - DenseMap >::iterator - last = LastUseDef.find(MO.getReg()); - if (last != LastUseDef.end()) { - // Check if this is a two address instruction. If so, then - // the def does not kill the use. - if (last->second.first == I && I->isRegTiedToUseOperand(i)) - continue; - - MachineOperand &lastUD = - last->second.first->getOperand(last->second.second); - if (SubIdx && lastUD.getSubReg() != SubIdx) - // Partial re-def, the last def is not dead. - // %reg1024:5 = - // %reg1024:6 = - // or - // %reg1024:5 = op %reg1024, 5 - continue; - - if (lastUD.isDef()) - lastUD.setIsDead(true); - else - lastUD.setIsKill(true); - } - - LastUseDef[MO.getReg()] = std::make_pair(I, i); - } - } - - // Live-out (of the function) registers contain return values of the function, - // so we need to make sure they are alive at return time. - MachineBasicBlock::iterator Ret = MBB.getFirstTerminator(); - bool BBEndsInReturn = (Ret != MBB.end() && Ret->getDesc().isReturn()); - - if (BBEndsInReturn) - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) - if (!Ret->readsRegister(*I)) { - Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); - LastUseDef[*I] = std::make_pair(Ret, Ret->getNumOperands()-1); - } - - // Finally, loop over the final use/def of each reg - // in the block and determine if it is dead. - for (DenseMap >::iterator - I = LastUseDef.begin(), E = LastUseDef.end(); I != E; ++I) { - MachineInstr *MI = I->second.first; - unsigned idx = I->second.second; - MachineOperand &MO = MI->getOperand(idx); - - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(MO.getReg()); - - // A crude approximation of "live-out" calculation - bool usedOutsideBlock = isPhysReg ? false : - UsedInMultipleBlocks.test(MO.getReg() - - TargetRegisterInfo::FirstVirtualRegister); - - // If the machine BB ends in a return instruction, then the value isn't used - // outside of the BB. - if (!isPhysReg && (!usedOutsideBlock || BBEndsInReturn)) { - // DBG_VALUE complicates this: if the only refs of a register outside - // this block are DBG_VALUE, we can't keep the reg live just for that, - // as it will cause the reg to be spilled at the end of this block when - // it wouldn't have been otherwise. Nullify the DBG_VALUEs when that - // happens. - bool UsedByDebugValueOnly = false; - for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), - UE = MRI->reg_end(); UI != UE; ++UI) { - // Two cases: - // - used in another block - // - used in the same block before it is defined (loop) - if (UI->getParent() == &MBB && - !(MO.isDef() && UI.getOperand().isUse() && precedes(&*UI, MI))) - continue; - - if (UI->isDebugValue()) { - UsedByDebugValueOnly = true; - continue; - } - - // A non-DBG_VALUE use means we can leave DBG_VALUE uses alone. - UsedInMultipleBlocks.set(MO.getReg() - - TargetRegisterInfo::FirstVirtualRegister); - usedOutsideBlock = true; - UsedByDebugValueOnly = false; - break; - } - - if (UsedByDebugValueOnly) - for (MachineRegisterInfo::reg_iterator UI = MRI->reg_begin(MO.getReg()), - UE = MRI->reg_end(); UI != UE; ++UI) - if (UI->isDebugValue() && - (UI->getParent() != &MBB || - (MO.isDef() && precedes(&*UI, MI)))) - UI.getOperand().setReg(0U); - } - - // Physical registers and those that are not live-out of the block are - // killed/dead at their last use/def within this block. - if (isPhysReg || !usedOutsideBlock || BBEndsInReturn) { - if (MO.isUse()) { - // Don't mark uses that are tied to defs as kills. - if (!MI->isRegTiedToDefOperand(idx)) - MO.setIsKill(true); - } else { - MO.setIsDead(true); - } - } - } -} - -void RALocal::AllocateBasicBlock(MachineBasicBlock &MBB) { - // loop over each instruction - MachineBasicBlock::iterator MII = MBB.begin(); - - DEBUG({ - const BasicBlock *LBB = MBB.getBasicBlock(); - if (LBB) - dbgs() << "\nStarting RegAlloc of BB: " << LBB->getName(); - }); - - // Add live-in registers as active. - for (MachineBasicBlock::livein_iterator I = MBB.livein_begin(), - E = MBB.livein_end(); I != E; ++I) { - unsigned Reg = *I; - MF->getRegInfo().setPhysRegUsed(Reg); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - AddToPhysRegsUseOrder(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } - } - - ComputeLocalLiveness(MBB); - - // Otherwise, sequentially allocate each instruction in the MBB. - while (MII != MBB.end()) { - MachineInstr *MI = MII++; - const TargetInstrDesc &TID = MI->getDesc(); - DEBUG({ - dbgs() << "\nStarting RegAlloc of: " << *MI; - dbgs() << " Regs have values: "; - for (unsigned i = 0; i != TRI->getNumRegs(); ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { - if (PhysRegsUsed[i] && isVirtRegModified(PhysRegsUsed[i])) - dbgs() << "*"; - dbgs() << "[" << TRI->getName(i) - << ",%reg" << PhysRegsUsed[i] << "] "; - } - dbgs() << '\n'; - }); - - // Determine whether this is a copy instruction. The cases where the - // source or destination are phys regs are handled specially. - unsigned SrcCopyReg, DstCopyReg, SrcCopySubReg, DstCopySubReg; - unsigned SrcCopyPhysReg = 0U; - bool isCopy = TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg) && - SrcCopySubReg == DstCopySubReg; - if (isCopy && TargetRegisterInfo::isVirtualRegister(SrcCopyReg)) - SrcCopyPhysReg = getVirt2PhysRegMapSlot(SrcCopyReg); - - // Loop over the implicit uses, making sure that they are at the head of the - // use order list, so they don't get reallocated. - if (TID.ImplicitUses) { - for (const unsigned *ImplicitUses = TID.ImplicitUses; - *ImplicitUses; ++ImplicitUses) - MarkPhysRegRecentlyUsed(*ImplicitUses); - } - - SmallVector Kills; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isKill()) continue; - - if (!MO.isImplicit()) - Kills.push_back(MO.getReg()); - else if (!isReadModWriteImplicitKill(MI, MO.getReg())) - // These are extra physical register kills when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - Kills.push_back(MO.getReg()); - } - - // If any physical regs are earlyclobber, spill any value they might - // have in them, then mark them unallocatable. - // If any virtual regs are earlyclobber, allocate them now (before - // freeing inputs that are killed). - if (MI->isInlineAsm()) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.isEarlyClobber() || - !MO.getReg()) - continue; - - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) { - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) - DestPhysReg = getReg(MBB, MI, DestVirtReg); - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = - std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - if (unsigned DestSubIdx = MO.getSubReg()) { - MO.setSubReg(0); - DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); - } - MO.setReg(DestPhysReg); // Assign the earlyclobber register - } else { - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(*SubRegs); - } - } - } - } - - // If a DBG_VALUE says something is located in a spilled register, - // change the DBG_VALUE to be undef, which prevents the register - // from being reloaded here. Doing that would change the generated - // code, unless another use immediately follows this instruction. - if (MI->isDebugValue() && - MI->getNumOperands()==3 && MI->getOperand(0).isReg()) { - unsigned VirtReg = MI->getOperand(0).getReg(); - if (VirtReg && TargetRegisterInfo::isVirtualRegister(VirtReg) && - !getVirt2PhysRegMapSlot(VirtReg)) - MI->getOperand(0).setReg(0U); - } - - // Get the used operands into registers. This has the potential to spill - // incoming values if we are out of registers. Note that we completely - // ignore physical register uses here. We assume that if an explicit - // physical register is referenced by the instruction, that it is guaranteed - // to be live-in, or the input is badly hosed. - // - SmallSet ReloadedRegs; - for (unsigned i = 0; i != MI->getNumOperands(); ++i) { - MachineOperand &MO = MI->getOperand(i); - // here we are looking for only used operands (never def&use) - if (MO.isReg() && !MO.isDef() && MO.getReg() && !MO.isImplicit() && - TargetRegisterInfo::isVirtualRegister(MO.getReg())) - MI = reloadVirtReg(MBB, MI, i, ReloadedRegs, - isCopy ? DstCopyReg : 0); - } - - // If this instruction is the last user of this register, kill the - // value, freeing the register being used, so it doesn't need to be - // spilled to memory. - // - for (unsigned i = 0, e = Kills.size(); i != e; ++i) { - unsigned VirtReg = Kills[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - // If the virtual register was never materialized into a register, it - // might not be in the map, but it won't hurt to zero it out anyway. - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. - continue; - } else { - assert((!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1) && - "Silently clearing a virtual register?"); - } - - if (!PhysReg) continue; - - DEBUG(dbgs() << " Last use of " << TRI->getName(PhysReg) - << "[%reg" << VirtReg <<"], removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *SubRegs = TRI->getSubRegisters(PhysReg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] != -2) { - DEBUG(dbgs() << " Last use of " - << TRI->getName(*SubRegs) << "[%reg" << VirtReg - <<"], removing it from live set\n"); - removePhysReg(*SubRegs); - } - } - } - - // Loop over all of the operands of the instruction, spilling registers that - // are defined, and marking explicit destinations in the PhysRegsUsed map. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || MO.isImplicit() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - - unsigned Reg = MO.getReg(); - if (PhysRegsUsed[Reg] == -2) continue; // Something like ESP. - // These are extra physical register defs when a sub-register - // is defined (def of a sub-register is a read/mod/write of the - // larger registers). Ignore. - if (isReadModWriteImplicitDef(MI, MO.getReg())) continue; - - MF->getRegInfo().setPhysRegUsed(Reg); - spillPhysReg(MBB, MI, Reg, true); // Spill any existing value in reg - PhysRegsUsed[Reg] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(Reg); - - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - MF->getRegInfo().setPhysRegUsed(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - AddToPhysRegsUseOrder(*SubRegs); - } - } - - // Loop over the implicit defs, spilling them as well. - if (TID.ImplicitDefs) { - for (const unsigned *ImplicitDefs = TID.ImplicitDefs; - *ImplicitDefs; ++ImplicitDefs) { - unsigned Reg = *ImplicitDefs; - if (PhysRegsUsed[Reg] != -2) { - spillPhysReg(MBB, MI, Reg, true); - AddToPhysRegsUseOrder(Reg); - PhysRegsUsed[Reg] = 0; // It is free and reserved now - } - MF->getRegInfo().setPhysRegUsed(Reg); - for (const unsigned *SubRegs = TRI->getSubRegisters(Reg); - *SubRegs; ++SubRegs) { - if (PhysRegsUsed[*SubRegs] == -2) continue; - - AddToPhysRegsUseOrder(*SubRegs); - PhysRegsUsed[*SubRegs] = 0; // It is free and reserved now - MF->getRegInfo().setPhysRegUsed(*SubRegs); - } - } - } - - SmallVector DeadDefs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg() && MO.isDead()) - DeadDefs.push_back(MO.getReg()); - } - - // Okay, we have allocated all of the source operands and spilled any values - // that would be destroyed by defs of this instruction. Loop over the - // explicit defs and assign them to a register, spilling incoming values if - // we need to scavenge a register. - // - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isDef() || !MO.getReg() || - MO.isEarlyClobber() || - !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - unsigned DestVirtReg = MO.getReg(); - unsigned DestPhysReg; - - // If DestVirtReg already has a value, use it. - if (!(DestPhysReg = getVirt2PhysRegMapSlot(DestVirtReg))) { - // If this is a copy try to reuse the input as the output; - // that will make the copy go away. - // If this is a copy, the source reg is a phys reg, and - // that reg is available, use that phys reg for DestPhysReg. - // If this is a copy, the source reg is a virtual reg, and - // the phys reg that was assigned to that virtual reg is now - // available, use that phys reg for DestPhysReg. (If it's now - // available that means this was the last use of the source.) - if (isCopy && - TargetRegisterInfo::isPhysicalRegister(SrcCopyReg) && - isPhysRegAvailable(SrcCopyReg)) { - DestPhysReg = SrcCopyReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else if (isCopy && - TargetRegisterInfo::isVirtualRegister(SrcCopyReg) && - SrcCopyPhysReg && isPhysRegAvailable(SrcCopyPhysReg) && - MF->getRegInfo().getRegClass(DestVirtReg)-> - contains(SrcCopyPhysReg)) { - DestPhysReg = SrcCopyPhysReg; - assignVirtToPhysReg(DestVirtReg, DestPhysReg); - } else - DestPhysReg = getReg(MBB, MI, DestVirtReg); - } - MF->getRegInfo().setPhysRegUsed(DestPhysReg); - markVirtRegModified(DestVirtReg); - getVirtRegLastUse(DestVirtReg) = std::make_pair((MachineInstr*)0, 0); - DEBUG(dbgs() << " Assigning " << TRI->getName(DestPhysReg) - << " to %reg" << DestVirtReg << "\n"); - - if (unsigned DestSubIdx = MO.getSubReg()) { - MO.setSubReg(0); - DestPhysReg = TRI->getSubReg(DestPhysReg, DestSubIdx); - } - MO.setReg(DestPhysReg); // Assign the output register - } - - // If this instruction defines any registers that are immediately dead, - // kill them now. - // - for (unsigned i = 0, e = DeadDefs.size(); i != e; ++i) { - unsigned VirtReg = DeadDefs[i]; - unsigned PhysReg = VirtReg; - if (TargetRegisterInfo::isVirtualRegister(VirtReg)) { - unsigned &PhysRegSlot = getVirt2PhysRegMapSlot(VirtReg); - PhysReg = PhysRegSlot; - assert(PhysReg != 0); - PhysRegSlot = 0; - } else if (PhysRegsUsed[PhysReg] == -2) { - // Unallocatable register dead, ignore. - continue; - } else if (!PhysReg) - continue; - - DEBUG(dbgs() << " Register " << TRI->getName(PhysReg) - << " [%reg" << VirtReg - << "] is never used, removing it from live set\n"); - removePhysReg(PhysReg); - for (const unsigned *AliasSet = TRI->getAliasSet(PhysReg); - *AliasSet; ++AliasSet) { - if (PhysRegsUsed[*AliasSet] != -2) { - DEBUG(dbgs() << " Register " << TRI->getName(*AliasSet) - << " [%reg" << *AliasSet - << "] is never used, removing it from live set\n"); - removePhysReg(*AliasSet); - } - } - } - - // If this instruction is a call, make sure there are no dirty registers. The - // call might throw an exception, and the landing pad expects to find all - // registers in stack slots. - if (TID.isCall()) - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) { - if (PhysRegsUsed[i] <= 0) continue; - unsigned VirtReg = PhysRegsUsed[i]; - if (!isVirtRegModified(VirtReg)) continue; - DEBUG(dbgs() << " Storing dirty %reg" << VirtReg); - storeVirtReg(MBB, MI, VirtReg, i, false); - markVirtRegModified(VirtReg, false); - DEBUG(dbgs() << " because the call might throw\n"); - } - - // Finally, if this is a noop copy instruction, zap it. (Except that if - // the copy is dead, it must be kept to avoid messing up liveness info for - // the register scavenger. See pr4100.) - if (TII->isMoveInstr(*MI, SrcCopyReg, DstCopyReg, - SrcCopySubReg, DstCopySubReg) && - SrcCopyReg == DstCopyReg && SrcCopySubReg == DstCopySubReg && - DeadDefs.empty()) { - ++NumCopies; - MBB.erase(MI); - } - } - - MachineBasicBlock::iterator MI = MBB.getFirstTerminator(); - - // Spill all physical registers holding virtual registers now. - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - if (PhysRegsUsed[i] != -1 && PhysRegsUsed[i] != -2) { - if (unsigned VirtReg = PhysRegsUsed[i]) - spillVirtReg(MBB, MI, VirtReg, i); - else - removePhysReg(i); - } - -#if 0 - // This checking code is very expensive. - bool AllOk = true; - for (unsigned i = TargetRegisterInfo::FirstVirtualRegister, - e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) - if (unsigned PR = Virt2PhysRegMap[i]) { - cerr << "Register still mapped: " << i << " -> " << PR << "\n"; - AllOk = false; - } - assert(AllOk && "Virtual registers still in phys regs?"); -#endif - - // Clear any physical register which appear live at the end of the basic - // block, but which do not hold any virtual registers. e.g., the stack - // pointer. - PhysRegsUseOrder.clear(); -} - -/// runOnMachineFunction - Register allocate the whole function -/// -bool RALocal::runOnMachineFunction(MachineFunction &Fn) { - DEBUG(dbgs() << "Machine Function\n"); - MF = &Fn; - MRI = &Fn.getRegInfo(); - TM = &Fn.getTarget(); - TRI = TM->getRegisterInfo(); - TII = TM->getInstrInfo(); - - PhysRegsUsed.assign(TRI->getNumRegs(), -1); - - // At various places we want to efficiently check to see whether a register - // is allocatable. To handle this, we mark all unallocatable registers as - // being pinned down, permanently. - { - BitVector Allocable = TRI->getAllocatableSet(Fn); - for (unsigned i = 0, e = Allocable.size(); i != e; ++i) - if (!Allocable[i]) - PhysRegsUsed[i] = -2; // Mark the reg unallocable. - } - - // initialize the virtual->physical register map to have a 'null' - // mapping for all virtual registers - unsigned LastVirtReg = MF->getRegInfo().getLastVirtReg(); - StackSlotForVirtReg.grow(LastVirtReg); - Virt2PhysRegMap.grow(LastVirtReg); - Virt2LastUseMap.grow(LastVirtReg); - VirtRegModified.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); - UsedInMultipleBlocks.resize(LastVirtReg+1 - - TargetRegisterInfo::FirstVirtualRegister); - - // Loop over all of the basic blocks, eliminating virtual register references - for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end(); - MBB != MBBe; ++MBB) - AllocateBasicBlock(*MBB); - - StackSlotForVirtReg.clear(); - PhysRegsUsed.clear(); - VirtRegModified.clear(); - UsedInMultipleBlocks.clear(); - Virt2PhysRegMap.clear(); - Virt2LastUseMap.clear(); - return true; -} - -FunctionPass *llvm::createLocalRegisterAllocator() { - return new RALocal(); -} diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 4fafd2818a12..7e61a12a7eea 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -396,28 +396,23 @@ PBQPRegAlloc::CoalesceMap PBQPRegAlloc::findCoalesces() { if (srcRegIsPhysical && dstRegIsPhysical) continue; - // If it's a copy that includes a virtual register but the source and - // destination classes differ then we can't coalesce, so continue with - // the next instruction. - const TargetRegisterClass *srcRegClass = srcRegIsPhysical ? - tri->getPhysicalRegisterRegClass(srcReg) : mri->getRegClass(srcReg); - - const TargetRegisterClass *dstRegClass = dstRegIsPhysical ? - tri->getPhysicalRegisterRegClass(dstReg) : mri->getRegClass(dstReg); - - if (srcRegClass != dstRegClass) + // If it's a copy that includes two virtual register but the source and + // destination classes differ then we can't coalesce. + if (!srcRegIsPhysical && !dstRegIsPhysical && + mri->getRegClass(srcReg) != mri->getRegClass(dstReg)) continue; - // We also need any physical regs to be allocable, coalescing with - // a non-allocable register is invalid. - if (srcRegIsPhysical) { + // If one is physical and one is virtual, check that the physical is + // allocatable in the class of the virtual. + if (srcRegIsPhysical && !dstRegIsPhysical) { + const TargetRegisterClass *dstRegClass = mri->getRegClass(dstReg); if (std::find(dstRegClass->allocation_order_begin(*mf), dstRegClass->allocation_order_end(*mf), srcReg) == dstRegClass->allocation_order_end(*mf)) continue; } - - if (dstRegIsPhysical) { + if (!srcRegIsPhysical && dstRegIsPhysical) { + const TargetRegisterClass *srcRegClass = mri->getRegClass(srcReg); if (std::find(srcRegClass->allocation_order_begin(*mf), srcRegClass->allocation_order_end(*mf), dstReg) == srcRegClass->allocation_order_end(*mf)) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 1131e3db4e7d..ab0bc2d78a60 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -16,6 +16,8 @@ #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Pass.h" @@ -33,6 +35,160 @@ char RegisterCoalescer::ID = 0; // RegisterCoalescer::~RegisterCoalescer() {} +unsigned CoalescerPair::compose(unsigned a, unsigned b) const { + if (!a) return b; + if (!b) return a; + return tri_.composeSubRegIndices(a, b); +} + +bool CoalescerPair::isMoveInstr(const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) const { + if (MI->isCopy()) { + Dst = MI->getOperand(0).getReg(); + DstSub = MI->getOperand(0).getSubReg(); + Src = MI->getOperand(1).getReg(); + SrcSub = MI->getOperand(1).getSubReg(); + } else if (MI->isSubregToReg()) { + Dst = MI->getOperand(0).getReg(); + DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); + Src = MI->getOperand(2).getReg(); + SrcSub = MI->getOperand(2).getSubReg(); + } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) { + return false; + } + return true; +} + +bool CoalescerPair::setRegisters(const MachineInstr *MI) { + srcReg_ = dstReg_ = subIdx_ = 0; + newRC_ = 0; + flipped_ = crossClass_ = false; + + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + partial_ = SrcSub || DstSub; + + // If one register is a physreg, it must be Dst. + if (TargetRegisterInfo::isPhysicalRegister(Src)) { + if (TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + flipped_ = true; + } + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + + if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + // Eliminate DstSub on a physreg. + if (DstSub) { + Dst = tri_.getSubReg(Dst, DstSub); + if (!Dst) return false; + DstSub = 0; + } + + // Eliminate SrcSub by picking a corresponding Dst superregister. + if (SrcSub) { + Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); + if (!Dst) return false; + SrcSub = 0; + } else if (!MRI.getRegClass(Src)->contains(Dst)) { + return false; + } + } else { + // Both registers are virtual. + + // Both registers have subreg indices. + if (SrcSub && DstSub) { + // For now we only handle the case of identical indices in commensurate + // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg + // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg. + if (SrcSub != DstSub) + return false; + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + if (!getCommonSubClass(DstRC, SrcRC)) + return false; + SrcSub = DstSub = 0; + } + + // There can be no SrcSub. + if (SrcSub) { + std::swap(Src, Dst); + DstSub = SrcSub; + SrcSub = 0; + assert(!flipped_ && "Unexpected flip"); + flipped_ = true; + } + + // Find the new register class. + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + if (DstSub) + newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + else + newRC_ = getCommonSubClass(DstRC, SrcRC); + if (!newRC_) + return false; + crossClass_ = newRC_ != DstRC || newRC_ != SrcRC; + } + // Check our invariants + assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + "Cannot have a physical SubIdx"); + srcReg_ = Src; + dstReg_ = Dst; + subIdx_ = DstSub; + return true; +} + +bool CoalescerPair::flip() { + if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_)) + return false; + std::swap(srcReg_, dstReg_); + flipped_ = !flipped_; + return true; +} + +bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { + if (!MI) + return false; + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + + // Find the virtual register that is srcReg_. + if (Dst == srcReg_) { + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + } else if (Src != srcReg_) { + return false; + } + + // Now check that Dst matches dstReg_. + if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) { + if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + assert(!subIdx_ && "Inconsistent CoalescerPair state."); + // DstSub could be set for a physreg from INSERT_SUBREG. + if (DstSub) + Dst = tri_.getSubReg(Dst, DstSub); + // Full copy of Src. + if (!SrcSub) + return dstReg_ == Dst; + // This is a partial register copy. Check that the parts match. + return tri_.getSubReg(dstReg_, SrcSub) == Dst; + } else { + // dstReg_ is virtual. + if (dstReg_ != Dst) + return false; + // Registers match, do the subregisters line up? + return compose(subIdx_, SrcSub) == DstSub; + } +} + // Because of the way .a files work, we must force the SimpleRC // implementation to be pulled in if the RegisterCoalescer classes are // pulled in. Otherwise we run the risk of RegisterCoalescer being diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 690e59f96ebc..43b3fb642635 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -141,6 +141,10 @@ void RegScavenger::forward() { // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. + // FIXME: The scavenger is not predication aware. If the instruction is + // predicated, conservatively assume "kill" markers do not actually kill the + // register. Similarly ignores "dead" markers. + bool isPred = TII->isPredicated(MI); BitVector EarlyClobberRegs(NumPhysRegs); BitVector KillRegs(NumPhysRegs); BitVector DefRegs(NumPhysRegs); @@ -155,11 +159,11 @@ void RegScavenger::forward() { if (MO.isUse()) { // Two-address operands implicitly kill. - if (MO.isKill() || MI->isRegTiedToDefOperand(i)) + if (!isPred && (MO.isKill() || MI->isRegTiedToDefOperand(i))) addRegWithSubRegs(KillRegs, Reg); } else { assert(MO.isDef()); - if (MO.isDead()) + if (!isPred && MO.isDead()) addRegWithSubRegs(DeadRegs, Reg); else addRegWithSubRegs(DefRegs, Reg); @@ -238,8 +242,18 @@ unsigned RegScavenger::FindUnusedReg(const TargetRegisterClass *RC) const { return 0; } +/// getRegsAvailable - Return all available registers in the register class +/// in Mask. +void RegScavenger::getRegsAvailable(const TargetRegisterClass *RC, + BitVector &Mask) { + for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); + I != E; ++I) + if (!isAliasUsed(*I)) + Mask.set(*I); +} + /// findSurvivorReg - Return the candidate register that is unused for the -/// longest after MBBI. UseMI is set to the instruction where the search +/// longest after StargMII. UseMI is set to the instruction where the search /// stopped. /// /// No more than InstrLimit instructions are inspected. @@ -258,6 +272,10 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, bool inVirtLiveRange = false; for (++MI; InstrLimit > 0 && MI != ME; ++MI, --InstrLimit) { + if (MI->isDebugValue()) { + ++InstrLimit; // Don't count debug instructions + continue; + } bool isVirtKillInsn = false; bool isVirtDefInsn = false; // Remove any candidates touched by instruction. @@ -321,13 +339,16 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, Candidates.reset(MO.getReg()); } + // Try to find a register that's unused if there is one, as then we won't + // have to spill. + if ((Candidates & RegsAvailable).any()) + Candidates &= RegsAvailable; + // Find the register whose use is furthest away. MachineBasicBlock::iterator UseMI; unsigned SReg = findSurvivorReg(I, Candidates, 25, UseMI); - // If we found an unused register there is no reason to spill it. We have - // probably found a callee-saved register that has been saved in the - // prologue, but happens to be unused at this point. + // If we found an unused register there is no reason to spill it. if (!isAliasUsed(SReg)) return SReg; diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index da20c12c360a..7d39dc496afe 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -380,26 +380,26 @@ void ScheduleDAG::VerifySchedule(bool isBottomUp) { } #endif -/// InitDAGTopologicalSorting - create the initial topological +/// InitDAGTopologicalSorting - create the initial topological /// ordering from the DAG to be scheduled. /// -/// The idea of the algorithm is taken from +/// The idea of the algorithm is taken from /// "Online algorithms for managing the topological order of /// a directed acyclic graph" by David J. Pearce and Paul H.J. Kelly -/// This is the MNR algorithm, which was first introduced by -/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in +/// This is the MNR algorithm, which was first introduced by +/// A. Marchetti-Spaccamela, U. Nanni and H. Rohnert in /// "Maintaining a topological order under edge insertions". /// -/// Short description of the algorithm: +/// Short description of the algorithm: /// /// Topological ordering, ord, of a DAG maps each node to a topological /// index so that for all edges X->Y it is the case that ord(X) < ord(Y). /// -/// This means that if there is a path from the node X to the node Z, +/// This means that if there is a path from the node X to the node Z, /// then ord(X) < ord(Z). /// /// This property can be used to check for reachability of nodes: -/// if Z is reachable from X, then an insertion of the edge Z->X would +/// if Z is reachable from X, then an insertion of the edge Z->X would /// create a cycle. /// /// The algorithm first computes a topological ordering for the DAG by @@ -431,7 +431,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { // Collect leaf nodes. WorkList.push_back(SU); } - } + } int Id = DAGSize; while (!WorkList.empty()) { @@ -456,7 +456,7 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { SUnit *SU = &SUnits[i]; for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { - assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && + assert(Node2Index[SU->NodeNum] > Node2Index[I->getSUnit()->NodeNum] && "Wrong topological sorting"); } } @@ -494,7 +494,7 @@ void ScheduleDAGTopologicalSort::RemovePred(SUnit *M, SUnit *N) { void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, bool& HasLoop) { std::vector WorkList; - WorkList.reserve(SUnits.size()); + WorkList.reserve(SUnits.size()); WorkList.push_back(SU); do { @@ -504,20 +504,20 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, for (int I = SU->Succs.size()-1; I >= 0; --I) { int s = SU->Succs[I].getSUnit()->NodeNum; if (Node2Index[s] == UpperBound) { - HasLoop = true; + HasLoop = true; return; } // Visit successors if not already and in affected region. if (!Visited.test(s) && Node2Index[s] < UpperBound) { WorkList.push_back(SU->Succs[I].getSUnit()); - } - } + } + } } while (!WorkList.empty()); } -/// Shift - Renumber the nodes so that the topological ordering is +/// Shift - Renumber the nodes so that the topological ordering is /// preserved. -void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, +void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, int UpperBound) { std::vector L; int shift = 0; @@ -568,7 +568,7 @@ bool ScheduleDAGTopologicalSort::IsReachable(const SUnit *SU, // Is Ord(TargetSU) < Ord(SU) ? if (LowerBound < UpperBound) { Visited.reset(); - // There may be a path from TargetSU to SU. Check for it. + // There may be a path from TargetSU to SU. Check for it. DFS(TargetSU, UpperBound, HasLoop); } return HasLoop; @@ -580,8 +580,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) { Index2Node[index] = n; } -ScheduleDAGTopologicalSort::ScheduleDAGTopologicalSort( - std::vector &sunits) - : SUnits(sunits) {} +ScheduleDAGTopologicalSort:: +ScheduleDAGTopologicalSort(std::vector &sunits) : SUnits(sunits) {} ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/lib/CodeGen/ScheduleDAGEmit.cpp b/lib/CodeGen/ScheduleDAGEmit.cpp index ee08e1dc0eea..0a2fb3796a42 100644 --- a/lib/CodeGen/ScheduleDAGEmit.cpp +++ b/lib/CodeGen/ScheduleDAGEmit.cpp @@ -50,11 +50,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, break; } } - bool Success = TII->copyRegToReg(*BB, InsertPos, Reg, VRI->second, - SU->CopyDstRC, SU->CopySrcRC, - DebugLoc()); - (void)Success; - assert(Success && "copyRegToReg failed!"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(VRI->second); } else { // Copy from physical register. assert(I->getReg() && "Unknown physical register!"); @@ -62,11 +59,8 @@ void ScheduleDAG::EmitPhysRegCopy(SUnit *SU, bool isNew = VRBaseMap.insert(std::make_pair(SU, VRBase)).second; isNew = isNew; // Silence compiler warning. assert(isNew && "Node emitted out of order - early"); - bool Success = TII->copyRegToReg(*BB, InsertPos, VRBase, I->getReg(), - SU->CopyDstRC, SU->CopySrcRC, - DebugLoc()); - (void)Success; - assert(Success && "copyRegToReg failed!"); + BuildMI(*BB, InsertPos, DebugLoc(), TII->get(TargetOpcode::COPY), VRBase) + .addReg(I->getReg()); } break; } diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index ad82db28f88b..d90659bb163e 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -69,8 +69,10 @@ namespace llvm { const SmallSet &LoopLiveIns) { unsigned Count = 0; for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); - I != E; ++I, ++Count) { + I != E; ++I) { const MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isUse()) @@ -79,6 +81,7 @@ namespace llvm { if (LoopLiveIns.count(MOReg)) Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count))); } + ++Count; // Not every iteration due to dbg_value above. } const std::vector &Children = Node->getChildren(); diff --git a/lib/CodeGen/SelectionDAG/CMakeLists.txt b/lib/CodeGen/SelectionDAG/CMakeLists.txt index 0cfd5e1d7e21..799988a4c862 100644 --- a/lib/CodeGen/SelectionDAG/CMakeLists.txt +++ b/lib/CodeGen/SelectionDAG/CMakeLists.txt @@ -1,5 +1,4 @@ add_llvm_library(LLVMSelectionDAG - CallingConvLower.cpp DAGCombiner.cpp FastISel.cpp FunctionLoweringInfo.cpp diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp deleted file mode 100644 index 4e6c1fcc9604..000000000000 --- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp +++ /dev/null @@ -1,179 +0,0 @@ -//===-- CallingConvLower.cpp - Calling Conventions ------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the CCState class, used for lowering and implementing -// calling conventions. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/CallingConvLower.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetData.h" -#include "llvm/Target/TargetMachine.h" -using namespace llvm; - -CCState::CCState(CallingConv::ID CC, bool isVarArg, const TargetMachine &tm, - SmallVector &locs, LLVMContext &C) - : CallingConv(CC), IsVarArg(isVarArg), TM(tm), - TRI(*TM.getRegisterInfo()), Locs(locs), Context(C) { - // No stack is used. - StackOffset = 0; - - UsedRegs.resize((TRI.getNumRegs()+31)/32); -} - -// HandleByVal - Allocate a stack slot large enough to pass an argument by -// value. The size and alignment information of the argument is encoded in its -// parameter attribute. -void CCState::HandleByVal(unsigned ValNo, EVT ValVT, - EVT LocVT, CCValAssign::LocInfo LocInfo, - int MinSize, int MinAlign, - ISD::ArgFlagsTy ArgFlags) { - unsigned Align = ArgFlags.getByValAlign(); - unsigned Size = ArgFlags.getByValSize(); - if (MinSize > (int)Size) - Size = MinSize; - if (MinAlign > (int)Align) - Align = MinAlign; - unsigned Offset = AllocateStack(Size, Align); - - addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo)); -} - -/// MarkAllocated - Mark a register and all of its aliases as allocated. -void CCState::MarkAllocated(unsigned Reg) { - UsedRegs[Reg/32] |= 1 << (Reg&31); - - if (const unsigned *RegAliases = TRI.getAliasSet(Reg)) - for (; (Reg = *RegAliases); ++RegAliases) - UsedRegs[Reg/32] |= 1 << (Reg&31); -} - -/// AnalyzeFormalArguments - Analyze an array of argument values, -/// incorporating info about the formals into this state. -void -CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, - CCAssignFn Fn) { - unsigned NumArgs = Ins.size(); - - for (unsigned i = 0; i != NumArgs; ++i) { - EVT ArgVT = Ins[i].VT; - ISD::ArgFlagsTy ArgFlags = Ins[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Formal argument #" << i << " has unhandled type " - << ArgVT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// CheckReturn - Analyze the return values of a function, returning true if -/// the return can be performed without sret-demotion, and false otherwise. -bool CCState::CheckReturn(const SmallVectorImpl &OutTys, - const SmallVectorImpl &ArgsFlags, - CCAssignFn Fn) { - // Determine which register each value should be copied into. - for (unsigned i = 0, e = OutTys.size(); i != e; ++i) { - EVT VT = OutTys[i]; - ISD::ArgFlagsTy ArgFlags = ArgsFlags[i]; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) - return false; - } - return true; -} - -/// AnalyzeReturn - Analyze the returned values of a return, -/// incorporating info about the result values into this state. -void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, - CCAssignFn Fn) { - // Determine which register each value should be copied into. - for (unsigned i = 0, e = Outs.size(); i != e; ++i) { - EVT VT = Outs[i].Val.getValueType(); - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Return operand #" << i << " has unhandled type " - << VT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - - -/// AnalyzeCallOperands - Analyze the outgoing arguments to a call, -/// incorporating info about the passed values into this state. -void CCState::AnalyzeCallOperands(const SmallVectorImpl &Outs, - CCAssignFn Fn) { - unsigned NumOps = Outs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = Outs[i].Val.getValueType(); - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// AnalyzeCallOperands - Same as above except it takes vectors of types -/// and argument flags. -void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, - SmallVectorImpl &Flags, - CCAssignFn Fn) { - unsigned NumOps = ArgVTs.size(); - for (unsigned i = 0; i != NumOps; ++i) { - EVT ArgVT = ArgVTs[i]; - ISD::ArgFlagsTy ArgFlags = Flags[i]; - if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { -#ifndef NDEBUG - dbgs() << "Call operand #" << i << " has unhandled type " - << ArgVT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// AnalyzeCallResult - Analyze the return values of a call, -/// incorporating info about the passed values into this state. -void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, - CCAssignFn Fn) { - for (unsigned i = 0, e = Ins.size(); i != e; ++i) { - EVT VT = Ins[i].VT; - ISD::ArgFlagsTy Flags = Ins[i].Flags; - if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { -#ifndef NDEBUG - dbgs() << "Call result #" << i << " has unhandled type " - << VT.getEVTString(); -#endif - llvm_unreachable(0); - } - } -} - -/// AnalyzeCallResult - Same as above except it's specialized for calls which -/// produce a single value. -void CCState::AnalyzeCallResult(EVT VT, CCAssignFn Fn) { - if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { -#ifndef NDEBUG - dbgs() << "Call result has unhandled type " - << VT.getEVTString(); -#endif - llvm_unreachable(0); - } -} diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6bddd784fe86..e67175246457 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -211,6 +211,7 @@ namespace { SDValue visitBUILD_VECTOR(SDNode *N); SDValue visitCONCAT_VECTORS(SDNode *N); SDValue visitVECTOR_SHUFFLE(SDNode *N); + SDValue visitMEMBARRIER(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); @@ -668,7 +669,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); Replace = true; - return DAG.getExtLoad(ExtType, dl, PVT, + return DAG.getExtLoad(ExtType, PVT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), MemVT, LD->isVolatile(), @@ -890,7 +891,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD) : LD->getExtensionType(); - SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, + SDValue NewLD = DAG.getExtLoad(ExtType, PVT, dl, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(), LD->getSrcValueOffset(), MemVT, LD->isVolatile(), @@ -1079,6 +1080,7 @@ SDValue DAGCombiner::visit(SDNode *N) { case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); + case ISD::MEMBARRIER: return visitMEMBARRIER(N); } return SDValue(); } @@ -1313,7 +1315,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (GlobalAddressSDNode *GA = dyn_cast(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, GA->getOffset() + (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A @@ -1550,7 +1552,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 @@ -2028,7 +2030,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. @@ -2038,7 +2040,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // Avoid infinite looping with PromoteIntBinOp. (N0.getOpcode() == ISD::ANY_EXTEND && (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || - (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && + (N0.getOpcode() == ISD::TRUNCATE && + (!TLI.isZExtFree(VT, Op0VT) || + !TLI.isTruncateFree(Op0VT, VT)) && + TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { @@ -2193,7 +2198,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -2216,7 +2221,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N0.getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -2250,7 +2255,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -2286,7 +2291,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, LoadResultTy, LN0->getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -2317,7 +2322,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { } // fold (or x, undef) -> -1 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) { + if (!LegalOperations && + (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); } @@ -2425,6 +2431,11 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) return SDValue(Rot, 0); + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -3158,6 +3169,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { return NewSRL; } + // Attempt to convert a srl of a load into a narrower zero-extending load. + SDValue NarrowLoad = ReduceLoadWidth(N); + if (NarrowLoad.getNode()) + return NarrowLoad; + // Here is a common situation. We want to optimize: // // %a = ... @@ -3487,7 +3503,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3531,7 +3547,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -3557,24 +3573,24 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -3635,10 +3651,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && - (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), - N0.getValueType()) || - !TLI.isZExtFree(N0.getValueType(), VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3679,7 +3692,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3723,7 +3736,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, @@ -3881,7 +3894,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -3925,8 +3938,9 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), - VT, LN0->getChain(), LN0->getBasePtr(), + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), VT, + N->getDebugLoc(), + LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), MemVT, LN0->isVolatile(), LN0->isNonTemporal(), @@ -3950,24 +3964,24 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); + return DAG.getVSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend else { - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - SDValue VsetCC = - DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), - N0.getOperand(1), - cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + EVT MatchingElementType = + EVT::getIntegerVT(*DAG.getContext(), + N0VT.getScalarType().getSizeInBits()); + EVT MatchingVectorType = + EVT::getVectorVT(*DAG.getContext(), MatchingElementType, + N0VT.getVectorNumElements()); + SDValue VsetCC = + DAG.getVSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + N0.getOperand(1), + cast(N0.getOperand(2))->get()); + return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); } } @@ -4024,6 +4038,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { /// extended, also fold the extension to form a extending load. SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { unsigned Opc = N->getOpcode(); + ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -4040,6 +4055,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ExtVT = cast(N->getOperand(1))->getVT(); if (LegalOperations && !TLI.isLoadExtLegal(ISD::SEXTLOAD, ExtVT)) return SDValue(); + } else if (Opc == ISD::SRL) { + // Annother special-case: SRL is basically zero-extending a narrower + // value. + ExtType = ISD::ZEXTLOAD; + N0 = SDValue(N, 0); + ConstantSDNode *N01 = dyn_cast(N0.getOperand(1)); + if (!N01) return SDValue(); + ExtVT = EVT::getIntegerVT(*DAG.getContext(), + VT.getSizeInBits() - N01->getZExtValue()); } unsigned EVTBits = ExtVT.getSizeInBits(); @@ -4085,7 +4109,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { ? DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, LN0->isVolatile(), LN0->isNonTemporal(), NewAlign) - : DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(), NewPtr, + : DAG.getExtLoad(ExtType, VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset() + PtrOff, ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), NewAlign); @@ -4172,7 +4196,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, @@ -4189,7 +4213,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), EVT, @@ -4243,8 +4267,17 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) - return ReduceLoadWidth(N); + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { + SDValue Reduced = ReduceLoadWidth(N); + if (Reduced.getNode()) + return Reduced; + } + + // Simplify the operands using demanded-bits information. + if (!VT.isVector() && + SimplifyDemandedBits(SDValue(N, 0))) + return SDValue(N, 0); + return SDValue(); } @@ -4943,7 +4976,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, VT, N->getDebugLoc(), LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), @@ -5527,8 +5560,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), - LD->getValueType(0), + return DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + N->getDebugLoc(), Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); @@ -5551,8 +5584,8 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), - LD->getValueType(0), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getValueType(0), + LD->getDebugLoc(), BetterChain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), @@ -6077,7 +6110,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // Check if the result type doesn't match the inserted element type. A // SCALAR_TO_VECTOR may truncate the inserted element and the // EXTRACT_VECTOR_ELT may widen the extracted vector. - EVT EltVT = InVec.getValueType().getVectorElementType(); SDValue InOp = InVec.getOperand(0); EVT NVT = N->getValueType(0); if (InOp.getValueType() != NVT) { @@ -6277,8 +6309,6 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { } SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { - return SDValue(); - EVT VT = N->getValueType(0); unsigned NumElts = VT.getVectorNumElements(); @@ -6334,6 +6364,59 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { return SDValue(); } +SDValue DAGCombiner::visitMEMBARRIER(SDNode* N) { + if (!TLI.getShouldFoldAtomicFences()) + return SDValue(); + + SDValue atomic = N->getOperand(0); + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + break; + default: + return SDValue(); + } + + SDValue fence = atomic.getOperand(0); + if (fence.getOpcode() != ISD::MEMBARRIER) + return SDValue(); + + switch (atomic.getOpcode()) { + case ISD::ATOMIC_CMP_SWAP: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2), + atomic.getOperand(3)), atomic.getResNo()); + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + return SDValue(DAG.UpdateNodeOperands(atomic.getNode(), + fence.getOperand(0), + atomic.getOperand(1), atomic.getOperand(2)), + atomic.getResNo()); + default: + return SDValue(); + } +} + /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform /// an AND to a vector_shuffle with the destination vector and a zero vector. /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> @@ -6565,8 +6648,8 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType(), - TheSelect->getDebugLoc(), TheSelect->getValueType(0), + TheSelect->getDebugLoc(), LLD->getChain(), Addr, 0, 0, LLD->getMemoryVT(), LLD->isVolatile(), @@ -6807,38 +6890,34 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } } - // Check to see if this is an integer abs. select_cc setl[te] X, 0, -X, X -> + // Check to see if this is an integer abs. + // select_cc setg[te] X, 0, X, -X -> + // select_cc setgt X, -1, X, -X -> + // select_cc setl[te] X, 0, -X, X -> + // select_cc setlt X, 1, -X, X -> // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C && N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE) && - N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1) && - N2.getOperand(0) == N1 && N0.getValueType().isInteger()) { + if (N1C) { + ConstantSDNode *SubC = NULL; + if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || + (N1C->isAllOnesValue() && CC == ISD::SETGT)) && + N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) + SubC = dyn_cast(N3.getOperand(0)); + else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || + (N1C->isOne() && CC == ISD::SETLT)) && + N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) + SubC = dyn_cast(N2.getOperand(0)); + EVT XType = N0.getValueType(); - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, N0, - DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), XType, - N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); - } - // Check to see if this is an integer abs. select_cc setgt X, -1, X, -X -> - // Y = sra (X, size(X)-1); xor (add (X, Y), Y) - if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT && - N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) { - if (ConstantSDNode *SubC = dyn_cast(N3.getOperand(0))) { - EVT XType = N0.getValueType(); - if (SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, - N0, - DAG.getConstant(XType.getSizeInBits()-1, - getShiftAmountTy())); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), - XType, N0, Shift); - AddToWorkList(Shift.getNode()); - AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); - } + if (SubC && SubC->isNullValue() && XType.isInteger()) { + SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + N0, + DAG.getConstant(XType.getSizeInBits()-1, + getShiftAmountTy())); + SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + XType, N0, Shift); + AddToWorkList(Shift.getNode()); + AddToWorkList(Add.getNode()); + return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); } } diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 95f4d072e01f..3f7e4a5fac42 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -44,18 +44,38 @@ #include "llvm/Instructions.h" #include "llvm/IntrinsicInst.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/DebugInfo.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Support/ErrorHandling.h" -#include "FunctionLoweringInfo.h" using namespace llvm; +/// startNewBlock - Set the current block to which generated machine +/// instructions will be appended, and clear the local CSE map. +/// +void FastISel::startNewBlock() { + LocalValueMap.clear(); + + // Start out as null, meaining no local-value instructions have + // been emitted. + LastLocalValue = 0; + + // Advance the last local value past any EH_LABEL instructions. + MachineBasicBlock::iterator + I = FuncInfo.MBB->begin(), E = FuncInfo.MBB->end(); + while (I != E && I->getOpcode() == TargetOpcode::EH_LABEL) { + LastLocalValue = I; + ++I; + } +} + bool FastISel::hasTrivialKill(const Value *V) const { // Don't consider constants or arguments to have trivial kills. const Instruction *I = dyn_cast(V); @@ -99,25 +119,31 @@ unsigned FastISel::getRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - DenseMap::iterator I = ValueMap.find(V); - if (I != ValueMap.end()) - return I->second; + DenseMap::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) { + unsigned Reg = I->second; + return Reg; + } unsigned Reg = LocalValueMap[V]; if (Reg != 0) return Reg; // In bottom-up mode, just create the virtual register which will be used // to hold the value. It will be materialized later. - if (IsBottomUp) { - Reg = createResultReg(TLI.getRegClassFor(VT)); - if (isa(V)) - ValueMap[V] = Reg; - else - LocalValueMap[V] = Reg; - return Reg; - } + if (isa(V) && + (!isa(V) || + !FuncInfo.StaticAllocaMap.count(cast(V)))) + return FuncInfo.InitializeRegForValue(V); + + MachineBasicBlock::iterator SaveInsertPt = enterLocalValueArea(); + + // Materialize the value in a register. Emit any instructions in the + // local value area. + Reg = materializeRegForValue(V, VT); - return materializeRegForValue(V, VT); + leaveLocalValueArea(SaveInsertPt); + + return Reg; } /// materializeRegForValue - Helper for getRegForVale. This function is @@ -161,11 +187,15 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { } } } else if (const Operator *Op = dyn_cast(V)) { - if (!SelectOperator(Op, Op->getOpcode())) return 0; - Reg = LocalValueMap[Op]; + if (!SelectOperator(Op, Op->getOpcode())) + if (!isa(Op) || + !TargetSelectInstruction(cast(Op))) + return 0; + Reg = lookUpRegForValue(Op); } else if (isa(V)) { Reg = createResultReg(TLI.getRegClassFor(VT)); - BuildMI(MBB, DL, TII.get(TargetOpcode::IMPLICIT_DEF), Reg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::IMPLICIT_DEF), Reg); } // If target-independent code couldn't handle the value, give target-specific @@ -175,8 +205,10 @@ unsigned FastISel::materializeRegForValue(const Value *V, MVT VT) { // Don't cache constant materializations in the general ValueMap. // To do so would require tracking what uses they dominate. - if (Reg != 0) + if (Reg != 0) { LocalValueMap[V] = Reg; + LastLocalValue = MRI.getVRegDef(Reg); + } return Reg; } @@ -185,8 +217,9 @@ unsigned FastISel::lookUpRegForValue(const Value *V) { // cache values defined by Instructions across blocks, and other values // only locally. This is because Instructions already have the SSA // def-dominates-use requirement enforced. - if (ValueMap.count(V)) - return ValueMap[V]; + DenseMap::iterator I = FuncInfo.ValueMap.find(V); + if (I != FuncInfo.ValueMap.end()) + return I->second; return LocalValueMap[V]; } @@ -202,14 +235,17 @@ unsigned FastISel::UpdateValueMap(const Value *I, unsigned Reg) { return Reg; } - unsigned &AssignedReg = ValueMap[I]; + unsigned &AssignedReg = FuncInfo.ValueMap[I]; if (AssignedReg == 0) + // Use the new register. AssignedReg = Reg; else if (Reg != AssignedReg) { - const TargetRegisterClass *RegClass = MRI.getRegClass(Reg); - TII.copyRegToReg(*MBB, MBB->end(), AssignedReg, - Reg, RegClass, RegClass, DL); + // Arrange for uses of AssignedReg to be replaced by uses of Reg. + FuncInfo.RegFixups[AssignedReg] = Reg; + + AssignedReg = Reg; } + return AssignedReg; } @@ -237,6 +273,33 @@ std::pair FastISel::getRegForGEPIndex(const Value *Idx) { return std::pair(IdxN, IdxNIsKill); } +void FastISel::recomputeInsertPt() { + if (getLastLocalValue()) { + FuncInfo.InsertPt = getLastLocalValue(); + ++FuncInfo.InsertPt; + } else + FuncInfo.InsertPt = FuncInfo.MBB->getFirstNonPHI(); + + // Now skip past any EH_LABELs, which must remain at the beginning. + while (FuncInfo.InsertPt != FuncInfo.MBB->end() && + FuncInfo.InsertPt->getOpcode() == TargetOpcode::EH_LABEL) + ++FuncInfo.InsertPt; +} + +MachineBasicBlock::iterator FastISel::enterLocalValueArea() { + MachineBasicBlock::iterator OldInsertPt = FuncInfo.InsertPt; + recomputeInsertPt(); + return OldInsertPt; +} + +void FastISel::leaveLocalValueArea(MachineBasicBlock::iterator OldInsertPt) { + if (FuncInfo.InsertPt != FuncInfo.MBB->begin()) + LastLocalValue = llvm::prior(FuncInfo.InsertPt); + + // Restore the previous insert position. + FuncInfo.InsertPt = OldInsertPt; +} + /// SelectBinaryOp - Select and emit code for a binary operator instruction, /// which has an opcode which directly corresponds to the given ISD opcode. /// @@ -345,7 +408,7 @@ bool FastISel::SelectGetElementPtr(const User *I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast(Idx)) { - if (CI->getZExtValue() == 0) continue; + if (CI->isZero()) continue; uint64_t Offs = TD.getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); N = FastEmit_ri_(VT, ISD::ADD, N, NIsKill, Offs, VT); @@ -395,7 +458,7 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(I); if (!DIVariable(DI->getVariable()).Verify() || - !MF.getMMI().hasDebugInfo()) + !FuncInfo.MF->getMMI().hasDebugInfo()) return true; const Value *Address = DI->getAddress(); @@ -409,11 +472,12 @@ bool FastISel::SelectCall(const User *I) { // those are handled in SelectionDAGBuilder. if (AI) { DenseMap::iterator SI = - StaticAllocaMap.find(AI); - if (SI == StaticAllocaMap.end()) break; // VLAs. + FuncInfo.StaticAllocaMap.find(AI); + if (SI == FuncInfo.StaticAllocaMap.end()) break; // VLAs. int FI = SI->second; if (!DI->getDebugLoc().isUnknown()) - MF.getMMI().setVariableDbgInfo(DI->getVariable(), FI, DI->getDebugLoc()); + FuncInfo.MF->getMMI().setVariableDbgInfo(DI->getVariable(), + FI, DI->getDebugLoc()); } else // Building the map above is target independent. Generating DBG_VALUE // inline is target dependent; do this now. @@ -428,23 +492,28 @@ bool FastISel::SelectCall(const User *I) { if (!V) { // Currently the optimizer can produce this; insert an undef to // help debugging. Probably the optimizer should not do this. - BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantInt *CI = dyn_cast(V)) { - BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addImm(CI->getZExtValue()).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (const ConstantFP *CF = dyn_cast(V)) { - BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addFPImm(CF).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { - BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. // Insert an undef so we can see what we dropped. - BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()). - addMetadata(DI->getVariable()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(0U).addImm(DI->getOffset()) + .addMetadata(DI->getVariable()); } return true; } @@ -453,14 +522,13 @@ bool FastISel::SelectCall(const User *I) { switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) { default: break; case TargetLowering::Expand: { - assert(MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); + assert(FuncInfo.MBB->isLandingPad() && + "Call to eh.exception not in landing pad!"); unsigned Reg = TLI.getExceptionAddressRegister(); const TargetRegisterClass *RC = TLI.getRegClassFor(VT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Reg, RC, RC, DL); - assert(InsertedCopy && "Can't copy address registers!"); - InsertedCopy = InsertedCopy; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); UpdateValueMap(I, ResultReg); return true; } @@ -472,25 +540,23 @@ bool FastISel::SelectCall(const User *I) { switch (TLI.getOperationAction(ISD::EHSELECTION, VT)) { default: break; case TargetLowering::Expand: { - if (MBB->isLandingPad()) - AddCatchInfo(*cast(I), &MF.getMMI(), MBB); + if (FuncInfo.MBB->isLandingPad()) + AddCatchInfo(*cast(I), &FuncInfo.MF->getMMI(), FuncInfo.MBB); else { #ifndef NDEBUG - CatchInfoLost.insert(cast(I)); + FuncInfo.CatchInfoLost.insert(cast(I)); #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) MBB->addLiveIn(Reg); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } unsigned Reg = TLI.getExceptionSelectorRegister(); EVT SrcVT = TLI.getPointerTy(); const TargetRegisterClass *RC = TLI.getRegClassFor(SrcVT); unsigned ResultReg = createResultReg(RC); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, Reg, - RC, RC, DL); - assert(InsertedCopy && "Can't copy address registers!"); - InsertedCopy = InsertedCopy; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Reg); bool ResultRegIsKill = hasTrivialKill(I); @@ -605,12 +671,12 @@ bool FastISel::SelectBitCast(const User *I) { if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); - ResultReg = createResultReg(DstClass); - - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - Op0, DstClass, SrcClass, DL); - if (!InsertedCopy) - ResultReg = 0; + // Don't attempt a cross-class copy. It will likely fail. + if (SrcClass == DstClass) { + ResultReg = createResultReg(DstClass); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(Op0); + } } // If the reg-reg copy failed, select a BIT_CONVERT opcode. @@ -655,14 +721,15 @@ FastISel::SelectInstruction(const Instruction *I) { /// unless it is the immediate (fall-through) successor, and update /// the CFG. void -FastISel::FastEmitBranch(MachineBasicBlock *MSucc) { - if (MBB->isLayoutSuccessor(MSucc)) { +FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { + if (FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // The unconditional fall-through case, which needs no instructions. } else { // The unconditional branch case. - TII.InsertBranch(*MBB, MSucc, NULL, SmallVector()); + TII.InsertBranch(*FuncInfo.MBB, MSucc, NULL, + SmallVector(), DL); } - MBB->addSuccessor(MSucc); + FuncInfo.MBB->addSuccessor(MSucc); } /// SelectFNeg - Emit an FNeg operation. @@ -711,9 +778,40 @@ FastISel::SelectFNeg(const User *I) { return true; } +bool +FastISel::SelectLoad(const User *I) { + LoadInst *LI = const_cast(cast(I)); + + // For a load from an alloca, make a limited effort to find the value + // already available in a register, avoiding redundant loads. + if (!LI->isVolatile() && isa(LI->getPointerOperand())) { + BasicBlock::iterator ScanFrom = LI; + if (const Value *V = FindAvailableLoadedValue(LI->getPointerOperand(), + LI->getParent(), ScanFrom)) { + if (!V->use_empty() && + (!isa(V) || + cast(V)->getParent() == LI->getParent() || + (isa(V) && + FuncInfo.StaticAllocaMap.count(cast(V)))) && + (!isa(V) || + LI->getParent() == &LI->getParent()->getParent()->getEntryBlock())) { + unsigned ResultReg = getRegForValue(V); + if (ResultReg != 0) { + UpdateValueMap(I, ResultReg); + return true; + } + } + } + } + + return false; +} + bool FastISel::SelectOperator(const User *I, unsigned Opcode) { switch (Opcode) { + case Instruction::Load: + return SelectLoad(I); case Instruction::Add: return SelectBinaryOp(I, ISD::ADD); case Instruction::FAdd: @@ -762,8 +860,8 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { if (BI->isUnconditional()) { const BasicBlock *LLVMSucc = BI->getSuccessor(0); - MachineBasicBlock *MSucc = MBBMap[LLVMSucc]; - FastEmitBranch(MSucc); + MachineBasicBlock *MSucc = FuncInfo.MBBMap[LLVMSucc]; + FastEmitBranch(MSucc, BI->getDebugLoc()); return true; } @@ -778,7 +876,7 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { case Instruction::Alloca: // FunctionLowering has the static-sized case covered. - if (StaticAllocaMap.count(cast(I))) + if (FuncInfo.StaticAllocaMap.count(cast(I))) return true; // Dynamic-sized alloca is not handled yet. @@ -824,32 +922,16 @@ FastISel::SelectOperator(const User *I, unsigned Opcode) { } } -FastISel::FastISel(MachineFunction &mf, - DenseMap &vm, - DenseMap &bm, - DenseMap &am, - std::vector > &pn -#ifndef NDEBUG - , SmallSet &cil -#endif - ) - : MBB(0), - ValueMap(vm), - MBBMap(bm), - StaticAllocaMap(am), - PHINodesToUpdate(pn), -#ifndef NDEBUG - CatchInfoLost(cil), -#endif - MF(mf), - MRI(MF.getRegInfo()), - MFI(*MF.getFrameInfo()), - MCP(*MF.getConstantPool()), - TM(MF.getTarget()), +FastISel::FastISel(FunctionLoweringInfo &funcInfo) + : FuncInfo(funcInfo), + MRI(FuncInfo.MF->getRegInfo()), + MFI(*FuncInfo.MF->getFrameInfo()), + MCP(*FuncInfo.MF->getConstantPool()), + TM(FuncInfo.MF->getTarget()), TD(*TM.getTargetData()), TII(*TM.getInstrInfo()), TLI(*TM.getTargetLowering()), - IsBottomUp(false) { + TRI(*TM.getRegisterInfo()) { } FastISel::~FastISel() {} @@ -978,7 +1060,7 @@ unsigned FastISel::FastEmitInst_(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const TargetInstrDesc &II = TII.get(MachineInstOpcode); - BuildMI(MBB, DL, II, ResultReg); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg); return ResultReg; } @@ -989,13 +1071,13 @@ unsigned FastISel::FastEmitInst_r(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) + .addReg(Op0, Op0IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II).addReg(Op0, Op0IsKill * RegState::Kill); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) + .addReg(Op0, Op0IsKill * RegState::Kill); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; @@ -1009,17 +1091,15 @@ unsigned FastISel::FastEmitInst_rr(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1032,17 +1112,15 @@ unsigned FastISel::FastEmitInst_ri(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1055,17 +1133,15 @@ unsigned FastISel::FastEmitInst_rf(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addFPImm(FPImm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1079,19 +1155,17 @@ unsigned FastISel::FastEmitInst_rri(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); else { - BuildMI(MBB, DL, II) + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(Op0, Op0IsKill * RegState::Kill) .addReg(Op1, Op1IsKill * RegState::Kill) .addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1103,13 +1177,11 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, const TargetInstrDesc &II = TII.get(MachineInstOpcode); if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg).addImm(Imm); else { - BuildMI(MBB, DL, II).addImm(Imm); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II).addImm(Imm); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TargetOpcode::COPY), + ResultReg).addReg(II.ImplicitDefs[0]); } return ResultReg; } @@ -1117,24 +1189,12 @@ unsigned FastISel::FastEmitInst_i(unsigned MachineInstOpcode, unsigned FastISel::FastEmitInst_extractsubreg(MVT RetVT, unsigned Op0, bool Op0IsKill, uint32_t Idx) { - const TargetRegisterClass* RC = MRI.getRegClass(Op0); - unsigned ResultReg = createResultReg(TLI.getRegClassFor(RetVT)); - const TargetInstrDesc &II = TII.get(TargetOpcode::EXTRACT_SUBREG); - - if (II.getNumDefs() >= 1) - BuildMI(MBB, DL, II, ResultReg) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Idx); - else { - BuildMI(MBB, DL, II) - .addReg(Op0, Op0IsKill * RegState::Kill) - .addImm(Idx); - bool InsertedCopy = TII.copyRegToReg(*MBB, MBB->end(), ResultReg, - II.ImplicitDefs[0], RC, RC, DL); - if (!InsertedCopy) - ResultReg = 0; - } + assert(TargetRegisterInfo::isVirtualRegister(Op0) && + "Cannot yet extract from physregs"); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, + DL, TII.get(TargetOpcode::COPY), ResultReg) + .addReg(Op0, getKillRegState(Op0IsKill), Idx); return ResultReg; } @@ -1154,14 +1214,14 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { const TerminatorInst *TI = LLVMBB->getTerminator(); SmallPtrSet SuccsHandled; - unsigned OrigNumPHINodesToUpdate = PHINodesToUpdate.size(); + unsigned OrigNumPHINodesToUpdate = FuncInfo.PHINodesToUpdate.size(); // Check successor nodes' PHI nodes that expect a constant to be available // from this block. for (unsigned succ = 0, e = TI->getNumSuccessors(); succ != e; ++succ) { const BasicBlock *SuccBB = TI->getSuccessor(succ); if (!isa(SuccBB->begin())) continue; - MachineBasicBlock *SuccMBB = MBBMap[SuccBB]; + MachineBasicBlock *SuccMBB = FuncInfo.MBBMap[SuccBB]; // If this terminator has multiple identical successors (common for // switches), only handle each succ once. @@ -1182,7 +1242,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // by bailing out early, we may leave behind some dead instructions, // since SelectionDAG's HandlePHINodesInSuccessorBlocks will insert its // own moves. Second, this check is necessary becuase FastISel doesn't - // use CreateRegForValue to create registers, so it always creates + // use CreateRegs to create registers, so it always creates // exactly one register for each non-void instruction. EVT VT = TLI.getValueType(PN->getType(), /*AllowUnknown=*/true); if (VT == MVT::Other || !TLI.isTypeLegal(VT)) { @@ -1190,7 +1250,7 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (VT == MVT::i1) VT = TLI.getTypeToTransformTo(LLVMBB->getContext(), VT); else { - PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } } @@ -1205,10 +1265,10 @@ bool FastISel::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { unsigned Reg = getRegForValue(PHIOp); if (Reg == 0) { - PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); + FuncInfo.PHINodesToUpdate.resize(OrigNumPHINodesToUpdate); return false; } - PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); + FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg)); DL = DebugLoc(); } } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index 65c36c1289db..928e1ecd4cf4 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" -#include "FunctionLoweringInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/DerivedTypes.h" #include "llvm/Function.h" #include "llvm/Instructions.h" @@ -30,7 +30,6 @@ #include "llvm/Target/TargetData.h" #include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" @@ -47,9 +46,11 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { if (isa(I)) return true; const BasicBlock *BB = I->getParent(); for (Value::const_use_iterator UI = I->use_begin(), E = I->use_end(); - UI != E; ++UI) - if (cast(*UI)->getParent() != BB || isa(*UI)) + UI != E; ++UI) { + const User *U = *UI; + if (cast(U)->getParent() != BB || isa(U)) return true; + } return false; } @@ -64,9 +65,11 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool EnableFastISel) { const BasicBlock *Entry = A->getParent()->begin(); for (Value::const_use_iterator UI = A->use_begin(), E = A->use_end(); - UI != E; ++UI) - if (cast(*UI)->getParent() != Entry || isa(*UI)) + UI != E; ++UI) { + const User *U = *UI; + if (cast(U)->getParent() != Entry || isa(U)) return false; // Use not in entry block. + } return true; } @@ -74,12 +77,18 @@ FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) : TLI(tli) { } -void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf, - bool EnableFastISel) { +void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); + // Check whether the function can return without sret-demotion. + SmallVector Outs; + GetReturnInfo(Fn->getReturnType(), + Fn->getAttributes().getRetAttributes(), Outs, TLI); + CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), Fn->isVarArg(), + Outs, Fn->getContext()); + // Create a vreg for each argument register that is not dead and is used // outside of the entry block for the function. for (Function::const_arg_iterator AI = Fn->arg_begin(), E = Fn->arg_end(); @@ -172,31 +181,33 @@ void FunctionLoweringInfo::clear() { #endif LiveOutRegInfo.clear(); ArgDbgValues.clear(); + RegFixups.clear(); } -unsigned FunctionLoweringInfo::MakeReg(EVT VT) { +/// CreateReg - Allocate a single virtual register for the given type. +unsigned FunctionLoweringInfo::CreateReg(EVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } -/// CreateRegForValue - Allocate the appropriate number of virtual registers of +/// CreateRegs - Allocate the appropriate number of virtual registers of /// the correctly promoted or expanded types. Assign these registers /// consecutive vreg numbers and return the first assigned number. /// /// In the case that the given value has struct or array type, this function /// will assign registers for each member or element. /// -unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { +unsigned FunctionLoweringInfo::CreateRegs(const Type *Ty) { SmallVector ValueVTs; - ComputeValueVTs(TLI, V->getType(), ValueVTs); + ComputeValueVTs(TLI, Ty, ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - EVT RegisterVT = TLI.getRegisterType(V->getContext(), ValueVT); + EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); - unsigned NumRegs = TLI.getNumRegisters(V->getContext(), ValueVT); + unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { - unsigned R = MakeReg(RegisterVT); + unsigned R = CreateReg(RegisterVT); if (!FirstReg) FirstReg = R; } } @@ -208,7 +219,7 @@ unsigned FunctionLoweringInfo::CreateRegForValue(const Value *V) { void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, MachineBasicBlock *MBB) { // Inform the MachineModuleInfo of the personality for this landing pad. - const ConstantExpr *CE = cast(I.getOperand(2)); + const ConstantExpr *CE = cast(I.getArgOperand(1)); assert(CE->getOpcode() == Instruction::BitCast && isa(CE->getOperand(0)) && "Personality should be a function"); @@ -217,18 +228,18 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Gather all the type infos for this landing pad and pass them along to // MachineModuleInfo. std::vector TyInfo; - unsigned N = I.getNumOperands(); + unsigned N = I.getNumArgOperands(); - for (unsigned i = N - 1; i > 2; --i) { - if (const ConstantInt *CI = dyn_cast(I.getOperand(i))) { + for (unsigned i = N - 1; i > 1; --i) { + if (const ConstantInt *CI = dyn_cast(I.getArgOperand(i))) { unsigned FilterLength = CI->getZExtValue(); unsigned FirstCatch = i + FilterLength + !FilterLength; - assert (FirstCatch <= N && "Invalid filter length"); + assert(FirstCatch <= N && "Invalid filter length"); if (FirstCatch < N) { TyInfo.reserve(N - FirstCatch); for (unsigned j = FirstCatch; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); TyInfo.clear(); } @@ -240,7 +251,7 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, // Filter. TyInfo.reserve(FilterLength - 1); for (unsigned j = i + 1; j < FirstCatch; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addFilterTypeInfo(MBB, TyInfo); TyInfo.clear(); } @@ -249,10 +260,10 @@ void llvm::AddCatchInfo(const CallInst &I, MachineModuleInfo *MMI, } } - if (N > 3) { - TyInfo.reserve(N - 3); - for (unsigned j = 3; j < N; ++j) - TyInfo.push_back(ExtractTypeInfo(I.getOperand(j))); + if (N > 2) { + TyInfo.reserve(N - 2); + for (unsigned j = 2; j < N; ++j) + TyInfo.push_back(ExtractTypeInfo(I.getArgOperand(j))); MMI->addCatchTypeInfo(MBB, TyInfo); } } diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h deleted file mode 100644 index 4067a5b33044..000000000000 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.h +++ /dev/null @@ -1,144 +0,0 @@ -//===-- FunctionLoweringInfo.h - Lower functions from LLVM IR to CodeGen --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This implements routines for translating functions from LLVM IR into -// Machine IR. -// -//===----------------------------------------------------------------------===// - -#ifndef FUNCTIONLOWERINGINFO_H -#define FUNCTIONLOWERINGINFO_H - -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/ADT/APInt.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#ifndef NDEBUG -#include "llvm/ADT/SmallSet.h" -#endif -#include "llvm/CodeGen/ValueTypes.h" -#include "llvm/CodeGen/ISDOpcodes.h" -#include "llvm/Support/CallSite.h" -#include - -namespace llvm { - -class AllocaInst; -class BasicBlock; -class CallInst; -class Function; -class GlobalVariable; -class Instruction; -class MachineInstr; -class MachineBasicBlock; -class MachineFunction; -class MachineModuleInfo; -class MachineRegisterInfo; -class TargetLowering; -class Value; - -//===--------------------------------------------------------------------===// -/// FunctionLoweringInfo - This contains information that is global to a -/// function that is used when lowering a region of the function. -/// -class FunctionLoweringInfo { -public: - const TargetLowering &TLI; - const Function *Fn; - MachineFunction *MF; - MachineRegisterInfo *RegInfo; - - /// CanLowerReturn - true iff the function's return value can be lowered to - /// registers. - bool CanLowerReturn; - - /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg - /// allocated to hold a pointer to the hidden sret parameter. - unsigned DemoteRegister; - - /// MBBMap - A mapping from LLVM basic blocks to their machine code entry. - DenseMap MBBMap; - - /// ValueMap - Since we emit code for the function a basic block at a time, - /// we must remember which virtual registers hold the values for - /// cross-basic-block values. - DenseMap ValueMap; - - /// StaticAllocaMap - Keep track of frame indices for fixed sized allocas in - /// the entry block. This allows the allocas to be efficiently referenced - /// anywhere in the function. - DenseMap StaticAllocaMap; - - /// ArgDbgValues - A list of DBG_VALUE instructions created during isel for - /// function arguments that are inserted after scheduling is completed. - SmallVector ArgDbgValues; - -#ifndef NDEBUG - SmallSet CatchInfoLost; - SmallSet CatchInfoFound; -#endif - - struct LiveOutInfo { - unsigned NumSignBits; - APInt KnownOne, KnownZero; - LiveOutInfo() : NumSignBits(0), KnownOne(1, 0), KnownZero(1, 0) {} - }; - - /// LiveOutRegInfo - Information about live out vregs, indexed by their - /// register number offset by 'FirstVirtualRegister'. - std::vector LiveOutRegInfo; - - /// PHINodesToUpdate - A list of phi instructions whose operand list will - /// be updated after processing the current basic block. - /// TODO: This isn't per-function state, it's per-basic-block state. But - /// there's no other convenient place for it to live right now. - std::vector > PHINodesToUpdate; - - explicit FunctionLoweringInfo(const TargetLowering &TLI); - - /// set - Initialize this FunctionLoweringInfo with the given Function - /// and its associated MachineFunction. - /// - void set(const Function &Fn, MachineFunction &MF, bool EnableFastISel); - - /// clear - Clear out all the function-specific state. This returns this - /// FunctionLoweringInfo to an empty state, ready to be used for a - /// different function. - void clear(); - - unsigned MakeReg(EVT VT); - - /// isExportedInst - Return true if the specified value is an instruction - /// exported from its block. - bool isExportedInst(const Value *V) { - return ValueMap.count(V); - } - - unsigned CreateRegForValue(const Value *V); - - unsigned InitializeRegForValue(const Value *V) { - unsigned &R = ValueMap[V]; - assert(R == 0 && "Already initialized this value register!"); - return R = CreateRegForValue(V); - } -}; - -/// AddCatchInfo - Extract the personality and type infos from an eh.selector -/// call, and add them to the specified machine basic block. -void AddCatchInfo(const CallInst &I, - MachineModuleInfo *MMI, MachineBasicBlock *MBB); - -/// CopyCatchInfo - Copy catch information from DestBB to SrcBB. -void CopyCatchInfo(const BasicBlock *SrcBB, const BasicBlock *DestBB, - MachineModuleInfo *MMI, FunctionLoweringInfo &FLI); - -} // end namespace llvm - -#endif diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 16eb8a72adc8..61c2a90e7edc 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -123,7 +123,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, EVT VT = Node->getValueType(ResNo); const TargetRegisterClass *SrcRC = 0, *DstRC = 0; - SrcRC = TRI->getPhysicalRegisterRegClass(SrcReg, VT); + SrcRC = TRI->getMinimalPhysRegClass(SrcReg, VT); // Figure out the register class to create for the destreg. if (VRBase) { @@ -142,11 +142,8 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, } else { // Create the reg, emit the copy. VRBase = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, VRBase, SrcReg, - DstRC, SrcRC, Node->getDebugLoc()); - - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + VRBase).addReg(SrcReg); } SDValue Op(Node, ResNo); @@ -246,7 +243,7 @@ unsigned InstrEmitter::getVR(SDValue Op, const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); VReg = MRI->createVirtualRegister(RC); } - BuildMI(MBB, Op.getDebugLoc(), + BuildMI(*MBB, InsertPos, Op.getDebugLoc(), TII->get(TargetOpcode::IMPLICIT_DEF), VReg); return VReg; } @@ -288,10 +285,8 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, "Don't have operand info for this instruction!"); if (DstRC && SrcRC != DstRC && !SrcRC->hasSuperClass(DstRC)) { unsigned NewVReg = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC, Op.getNode()->getDebugLoc()); - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Op.getNode()->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVReg).addReg(VReg); VReg = NewVReg; } } @@ -428,12 +423,9 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, } if (Opc == TargetOpcode::EXTRACT_SUBREG) { + // EXTRACT_SUBREG is lowered as %dst = COPY %src:sub unsigned SubIdx = cast(Node->getOperand(1))->getZExtValue(); - // Create the extract_subreg machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::EXTRACT_SUBREG)); - // Figure out the register class to create for the destreg. unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); const TargetRegisterClass *TRC = MRI->getRegClass(VReg); @@ -450,11 +442,16 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(SRC); } - // Add def, source, and subreg index - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + // Create the extract_subreg machine instruction. + MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::COPY), VRBase); + + // Add source, and subreg index AddOperand(MI, Node->getOperand(0), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - MI->addOperand(MachineOperand::CreateImm(SubIdx)); + assert(TargetRegisterInfo::isVirtualRegister(MI->getOperand(1).getReg()) && + "Cannot yet extract from physregs"); + MI->getOperand(1).setSubReg(SubIdx); MBB->insert(InsertPos, MI); } else if (Opc == TargetOpcode::INSERT_SUBREG || Opc == TargetOpcode::SUBREG_TO_REG) { @@ -511,18 +508,13 @@ void InstrEmitter::EmitCopyToRegClassNode(SDNode *Node, DenseMap &VRBaseMap) { unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); - const TargetRegisterClass *SrcRC = MRI->getRegClass(VReg); + // Create the new VReg in the destination class and emit a copy. unsigned DstRCIdx = cast(Node->getOperand(1))->getZExtValue(); const TargetRegisterClass *DstRC = TRI->getRegClass(DstRCIdx); - - // Create the new VReg in the destination class and emit a copy. unsigned NewVReg = MRI->createVirtualRegister(DstRC); - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, NewVReg, VReg, - DstRC, SrcRC, Node->getDebugLoc()); - assert(Emitted && - "Unable to issue a copy instruction for a COPY_TO_REGCLASS node!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + NewVReg).addReg(VReg); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; @@ -604,9 +596,10 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); if (const ConstantInt *CI = dyn_cast(V)) { - // FIXME: SDDbgValues aren't updated with legalization, so it's possible - // to have i128 values in them at this point. As a crude workaround, just - // drop the debug info if this happens. + // FIXME: SDDbgValue constants aren't updated with legalization, so it's + // possible to have i128 constants in them at this point. Dwarf writer + // does not handle i128 constants at the moment so, as a crude workaround, + // just drop the debug info if this happens. if (!CI->getValue().isSignedIntN(64)) MIB.addReg(0U); else @@ -676,6 +669,33 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Create the new machine instruction. MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + + // The MachineInstr constructor adds implicit-def operands. Scan through + // these to determine which are dead. + if (MI->getNumOperands() != 0 && + Node->getValueType(Node->getNumValues()-1) == MVT::Flag) { + // First, collect all used registers. + SmallVector UsedRegs; + for (SDNode *F = Node->getFlaggedUser(); F; F = F->getFlaggedUser()) + if (F->getOpcode() == ISD::CopyFromReg) + UsedRegs.push_back(cast(F->getOperand(1))->getReg()); + else { + // Collect declared implicit uses. + const TargetInstrDesc &TID = TII->get(F->getMachineOpcode()); + UsedRegs.append(TID.getImplicitUses(), + TID.getImplicitUses() + TID.getNumImplicitUses()); + // In addition to declared implicit uses, we must also check for + // direct RegisterSDNode operands. + for (unsigned i = 0, e = F->getNumOperands(); i != e; ++i) + if (RegisterSDNode *R = dyn_cast(F->getOperand(i))) { + unsigned Reg = R->getReg(); + if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) + UsedRegs.push_back(Reg); + } + } + // Then mark unused registers as dead. + MI->setPhysRegsDeadExcept(UsedRegs, *TRI); + } // Add result register values for things that are defined by this // instruction. @@ -696,16 +716,24 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, MI->setMemRefs(cast(Node)->memoperands_begin(), cast(Node)->memoperands_end()); + // Insert the instruction into position in the block. This needs to + // happen before any custom inserter hook is called so that the + // hook knows where in the block to insert the replacement code. + MBB->insert(InsertPos, MI); + if (II.usesCustomInsertionHook()) { // Insert this instruction into the basic block using a target // specific inserter which may returns a new basic block. - MBB = TLI->EmitInstrWithCustomInserter(MI, MBB); - InsertPos = MBB->end(); + bool AtEnd = InsertPos == MBB->end(); + MachineBasicBlock *NewMBB = TLI->EmitInstrWithCustomInserter(MI, MBB); + if (NewMBB != MBB) { + if (AtEnd) + InsertPos = NewMBB->end(); + MBB = NewMBB; + } return; } - MBB->insert(InsertPos, MI); - // Additional results must be an physical register def. if (HasPhysRegOuts) { for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { @@ -761,24 +789,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned DestReg = cast(Node->getOperand(1))->getReg(); if (SrcReg == DestReg) // Coalesced away the copy? Ignore. break; - - const TargetRegisterClass *SrcTRC = 0, *DstTRC = 0; - // Get the register classes of the src/dst. - if (TargetRegisterInfo::isVirtualRegister(SrcReg)) - SrcTRC = MRI->getRegClass(SrcReg); - else - SrcTRC = TRI->getPhysicalRegisterRegClass(SrcReg,SrcVal.getValueType()); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) - DstTRC = MRI->getRegClass(DestReg); - else - DstTRC = TRI->getPhysicalRegisterRegClass(DestReg, - Node->getOperand(1).getValueType()); - - bool Emitted = TII->copyRegToReg(*MBB, InsertPos, DestReg, SrcReg, - DstTRC, SrcTRC, Node->getDebugLoc()); - assert(Emitted && "Unable to issue a copy instruction!\n"); - (void) Emitted; + BuildMI(*MBB, InsertPos, Node->getDebugLoc(), TII->get(TargetOpcode::COPY), + DestReg).addReg(SrcReg); break; } case ISD::CopyFromReg: { @@ -807,6 +820,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, const char *AsmStr = cast(AsmStrV)->getSymbol(); MI->addOperand(MachineOperand::CreateES(AsmStr)); + // Add the isAlignStack bit. + int64_t isAlignStack = + cast(Node->getOperand(InlineAsm::Op_IsAlignStack))-> + getZExtValue(); + MI->addOperand(MachineOperand::CreateImm(isAlignStack)); + // Add all of the operand registers to the instruction. for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) { unsigned Flags = @@ -821,14 +840,22 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, case InlineAsm::Kind_RegDef: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, true)); + // FIXME: Add dead flags for physical and virtual registers defined. + // For now, mark physical register defs as implicit to help fast + // regalloc. This makes inline asm look a lot like calls. + MI->addOperand(MachineOperand::CreateReg(Reg, true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: for (; NumVals; --NumVals, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, true, false, false, - false, false, true)); + MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, + /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), + /*isKill=*/ false, + /*isDead=*/ false, + /*isUndef=*/false, + /*isEarlyClobber=*/ true)); } break; case InlineAsm::Kind_RegUse: // Use of register. diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 62a37a5fd0ae..7a47da4ec52e 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -31,6 +31,7 @@ #include "llvm/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/ADT/DenseMap.h" @@ -133,7 +134,7 @@ private: /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, - SDValue N1, SDValue N2, + SDValue N1, SDValue N2, SmallVectorImpl &Mask) const; bool LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, @@ -143,6 +144,8 @@ private: DebugLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); + std::pair ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, RTLIB::Libcall Call_PPCF128); @@ -172,6 +175,8 @@ private: SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandVectorBuildThroughStack(SDNode* Node); + std::pair ExpandAtomic(SDNode *Node); + void ExpandNode(SDNode *Node, SmallVectorImpl &Results); void PromoteNode(SDNode *Node, SmallVectorImpl &Results); }; @@ -181,8 +186,8 @@ private: /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> -SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, +SDValue +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, SDValue N1, SDValue N2, SmallVectorImpl &Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); @@ -193,12 +198,12 @@ SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, if (NumEltsGrowth == 1) return DAG.getVectorShuffle(NVT, dl, N1, N2, &Mask[0]); - + SmallVector NewMask; for (unsigned i = 0; i != NumMaskElts; ++i) { int Idx = Mask[i]; for (unsigned j = 0; j != NumEltsGrowth; ++j) { - if (Idx < 0) + if (Idx < 0) NewMask.push_back(-1); else NewMask.push_back(Idx * NumEltsGrowth + j); @@ -320,7 +325,8 @@ bool SelectionDAGLegalize::LegalizeAllNodesNotLeadingTo(SDNode *N, SDNode *Dest, bool OperandsLeadToDest = false; for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) OperandsLeadToDest |= // If an operand leads to Dest, so do we. - LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, NodesLeadingTo); + LegalizeAllNodesNotLeadingTo(N->getOperand(i).getNode(), Dest, + NodesLeadingTo); if (OperandsLeadToDest) { NodesLeadingTo.insert(N); @@ -357,7 +363,7 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, EVT SVT = VT; while (SVT != MVT::f32) { SVT = (MVT::SimpleValueType)(SVT.getSimpleVT().SimpleTy - 1); - if (CFP->isValueValidForType(SVT, CFP->getValueAPF()) && + if (ConstantFPSDNode::isValueValidForType(SVT, CFP->getValueAPF()) && // Only do this if the target has a native EXTLOAD instruction from // smaller type. TLI.isLoadExtLegal(ISD::EXTLOAD, SVT) && @@ -372,8 +378,8 @@ static SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP, SDValue CPIdx = DAG.getConstantPool(LLVMC, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); if (Extend) - return DAG.getExtLoad(ISD::EXTLOAD, dl, - OrigVT, DAG.getEntryNode(), + return DAG.getExtLoad(ISD::EXTLOAD, OrigVT, dl, + DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, VT, false, false, Alignment); return DAG.getLoad(OrigVT, dl, DAG.getEntryNode(), CPIdx, @@ -450,7 +456,7 @@ SDValue ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, 8 * (StoredBytes - Offset)); // Load from the stack slot. - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Store, StackPtr, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Store, StackPtr, NULL, 0, MemVT, false, false, 0); Stores.push_back(DAG.getTruncStore(Load.getValue(1), dl, Load, Ptr, @@ -552,7 +558,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // The last copy may be partial. Do an extending load. EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), 8 * (LoadedBytes - Offset)); - SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, RegVT, Chain, Ptr, + SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, RegVT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + Offset, MemVT, LD->isVolatile(), LD->isNonTemporal(), @@ -568,7 +574,7 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, Stores.size()); // Finally, perform the original load only redirected to the stack slot. - Load = DAG.getExtLoad(LD->getExtensionType(), dl, VT, TF, StackBase, + Load = DAG.getExtLoad(LD->getExtensionType(), VT, dl, TF, StackBase, NULL, 0, LoadedVT, false, false, 0); // Callers expect a MERGE_VALUES node. @@ -597,23 +603,23 @@ SDValue ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Load the value in two parts SDValue Lo, Hi; if (TLI.isLittleEndian()) { - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } else { - Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getSrcValue(), + Hi = DAG.getExtLoad(HiExtType, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset, NewLoadedVT, LD->isVolatile(), LD->isNonTemporal(), Alignment); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getConstant(IncrementSize, TLI.getPointerTy())); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, VT, dl, Chain, Ptr, LD->getSrcValue(), SVOffset + IncrementSize, NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment, IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); } // aggregate the two parts @@ -773,7 +779,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { "Unexpected illegal type!"); for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) - assert((isTypeLegal(Node->getOperand(i).getValueType()) || + assert((isTypeLegal(Node->getOperand(i).getValueType()) || Node->getOperand(i).getOpcode() == ISD::TargetConstant) && "Unexpected illegal type!"); @@ -853,6 +859,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { case ISD::MERGE_VALUES: case ISD::EH_RETURN: case ISD::FRAME_TO_ARGS_OFFSET: + case ISD::EH_SJLJ_SETJMP: + case ISD::EH_SJLJ_LONGJMP: // These operations lie about being legal: when they claim to be legal, // they should actually be expanded. Action = TLI.getOperationAction(Node->getOpcode(), Node->getValueType(0)); @@ -925,8 +933,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { break; } - Result = DAG.UpdateNodeOperands(Result.getValue(0), Ops.data(), - Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), Ops.data(), + Ops.size()), 0); switch (Action) { case TargetLowering::Legal: for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) @@ -1000,11 +1008,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { NodesLeadingTo); } - // Now that we legalized all of the inputs (which may have inserted - // libcalls) create the new CALLSEQ_START node. + // Now that we have legalized all of the inputs (which may have inserted + // libcalls), create the new CALLSEQ_START node. Tmp1 = LegalizeOp(Node->getOperand(0)); // Legalize the chain. - // Merge in the last call, to ensure that this call start after the last + // Merge in the last call to ensure that this call starts after the last // call ended. if (LastCALLSEQ_END.getOpcode() != ISD::EntryToken) { Tmp1 = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -1016,7 +1024,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Tmp1 != Node->getOperand(0)) { SmallVector Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), &Ops[0], + Ops.size()), Result.getResNo()); } // Remember that the CALLSEQ_START is legalized. @@ -1058,7 +1067,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (Tmp1 != Node->getOperand(0)) { SmallVector Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); } } else { Tmp2 = LegalizeOp(Node->getOperand(Node->getNumOperands()-1)); @@ -1067,7 +1078,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { SmallVector Ops(Node->op_begin(), Node->op_end()); Ops[0] = Tmp1; Ops.back() = Tmp2; - Result = DAG.UpdateNodeOperands(Result, &Ops[0], Ops.size()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + &Ops[0], Ops.size()), + Result.getResNo()); } } assert(IsLegalizingCall && "Call sequence imbalance between start/end?"); @@ -1087,7 +1100,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { EVT VT = Node->getValueType(0); - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); Tmp3 = Result.getValue(0); Tmp4 = Result.getValue(1); @@ -1100,7 +1115,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), + Result = ExpandUnalignedLoad(cast(Result.getNode()), DAG, TLI); Tmp3 = Result.getOperand(0); Tmp4 = Result.getOperand(1); @@ -1166,7 +1181,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { ISD::LoadExtType NewExtType = ExtType == ISD::ZEXTLOAD ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), + Result = DAG.getExtLoad(NewExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset, NVT, isVolatile, isNonTemporal, Alignment); @@ -1202,8 +1217,8 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { if (TLI.isLittleEndian()) { // EXTLOAD:i24 -> ZEXTLOAD:i16 | (shl EXTLOAD@+2:i8, 16) // Load the bottom RoundWidth bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, - Node->getValueType(0), Tmp1, Tmp2, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, Node->getValueType(0), dl, + Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1211,13 +1226,13 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); - // Build a factor node to remember that this load is independent of the - // other one. + // Build a factor node to remember that this load is independent of + // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); @@ -1231,7 +1246,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // Big endian - avoid unaligned loads. // EXTLOAD:i24 -> (shl EXTLOAD:i16, 8) | ZEXTLOAD@+2:i8 // Load the top RoundWidth bits. - Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Tmp1, Tmp2, + Hi = DAG.getExtLoad(ExtType, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset, RoundVT, isVolatile, isNonTemporal, Alignment); @@ -1239,14 +1254,14 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { IncrementSize = RoundWidth / 8; Tmp2 = DAG.getNode(ISD::ADD, dl, Tmp2.getValueType(), Tmp2, DAG.getIntPtrConstant(IncrementSize)); - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, - Node->getValueType(0), Tmp1, Tmp2, + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, + Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), SVOffset + IncrementSize, ExtraVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); - // Build a factor node to remember that this load is independent of the - // other one. + // Build a factor node to remember that this load is independent of + // the other one. Ch = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo.getValue(1), Hi.getValue(1)); @@ -1267,7 +1282,9 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { isCustom = true; // FALLTHROUGH case TargetLowering::Legal: - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp2, LD->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp2, LD->getOffset()), + Result.getResNo()); Tmp1 = Result.getValue(0); Tmp2 = Result.getValue(1); @@ -1281,10 +1298,12 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // If this is an unaligned load and the target doesn't support it, // expand it. if (!TLI.allowsUnalignedMemoryAccesses(LD->getMemoryVT())) { - const Type *Ty = LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + const Type *Ty = + LD->getMemoryVT().getTypeForEVT(*DAG.getContext()); + unsigned ABIAlignment = + TLI.getTargetData()->getABITypeAlignment(Ty); if (LD->getAlignment() < ABIAlignment){ - Result = ExpandUnalignedLoad(cast(Result.getNode()), + Result = ExpandUnalignedLoad(cast(Result.getNode()), DAG, TLI); Tmp1 = Result.getOperand(0); Tmp2 = Result.getOperand(1); @@ -1310,10 +1329,11 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { Tmp2 = LegalizeOp(Load.getValue(1)); break; } - assert(ExtType != ISD::EXTLOAD &&"EXTLOAD should always be supported!"); + assert(ExtType != ISD::EXTLOAD && + "EXTLOAD should always be supported!"); // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. - Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), + Result = DAG.getExtLoad(ISD::EXTLOAD, Node->getValueType(0), dl, Tmp1, Tmp2, LD->getSrcValue(), LD->getSrcValueOffset(), SrcVT, LD->isVolatile(), LD->isNonTemporal(), @@ -1355,8 +1375,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { { Tmp3 = LegalizeOp(ST->getValue()); - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); EVT VT = Tmp3.getValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { @@ -1366,7 +1388,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast(Result.getNode()), DAG, TLI); @@ -1459,8 +1481,10 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { } else { if (Tmp1 != ST->getChain() || Tmp3 != ST->getValue() || Tmp2 != ST->getBasePtr()) - Result = DAG.UpdateNodeOperands(Result, Tmp1, Tmp3, Tmp2, - ST->getOffset()); + Result = SDValue(DAG.UpdateNodeOperands(Result.getNode(), + Tmp1, Tmp3, Tmp2, + ST->getOffset()), + Result.getResNo()); switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { default: assert(0 && "This action is not supported yet!"); @@ -1469,7 +1493,7 @@ SDValue SelectionDAGLegalize::LegalizeOp(SDValue Op) { // expand it. if (!TLI.allowsUnalignedMemoryAccesses(ST->getMemoryVT())) { const Type *Ty = ST->getMemoryVT().getTypeForEVT(*DAG.getContext()); - unsigned ABIAlignment = TLI.getTargetData()->getABITypeAlignment(Ty); + unsigned ABIAlignment= TLI.getTargetData()->getABITypeAlignment(Ty); if (ST->getAlignment() < ABIAlignment) Result = ExpandUnalignedStore(cast(Result.getNode()), DAG, TLI); @@ -1531,7 +1555,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { return DAG.getLoad(Op.getValueType(), dl, Ch, StackPtr, NULL, 0, false, false, 0); else - return DAG.getExtLoad(ISD::EXTLOAD, dl, Op.getValueType(), Ch, StackPtr, + return DAG.getExtLoad(ISD::EXTLOAD, Op.getValueType(), dl, Ch, StackPtr, NULL, 0, Vec.getValueType().getVectorElementType(), false, false, 0); } @@ -1568,7 +1592,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { Node->getOperand(i), Idx, SV, Offset, EltVT, false, false, 0)); } else - Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, + Stores.push_back(DAG.getStore(DAG.getEntryNode(), dl, Node->getOperand(i), Idx, SV, Offset, false, false, 0)); } @@ -1763,7 +1787,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, DestAlign); assert(SlotSize < DestSize && "Unknown extension!"); - return DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, Store, FIPtr, SV, 0, SlotVT, + return DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, Store, FIPtr, SV, 0, SlotVT, false, false, DestAlign); } @@ -1926,6 +1950,44 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, return CallInfo.first; } +// ExpandChainLibCall - Expand a node into a call to a libcall. Similar to +// ExpandLibCall except that the first operand is the in-chain. +std::pair +SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, + SDNode *Node, + bool isSigned) { + assert(!IsLegalizingCall && "Cannot overlap legalization of calls!"); + SDValue InChain = Node->getOperand(0); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 1, e = Node->getNumOperands(); i != e; ++i) { + EVT ArgVT = Node->getOperand(i).getValueType(); + const Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Node->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + // Splice the libcall in wherever FindInputOutputChains tells us to. + const Type *RetTy = Node->getValueType(0).getTypeForEVT(*DAG.getContext()); + std::pair CallInfo = + TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false, + 0, TLI.getLibcallCallingConv(LC), false, + /*isReturnValueUsed=*/true, + Callee, Args, DAG, Node->getDebugLoc()); + + // Legalize the call sequence, starting with the chain. This will advance + // the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that + // was added by LowerCallTo (guaranteeing proper serialization of calls). + LegalizeOp(CallInfo.second); + return CallInfo; +} + SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, @@ -2048,7 +2110,8 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue HiOr = DAG.getNode(ISD::OR, dl, MVT::i64, Hi, TwoP84); SDValue LoFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, LoOr); SDValue HiFlt = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f64, HiOr); - SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, TwoP84PlusTwoP52); + SDValue HiSub = DAG.getNode(ISD::FSUB, dl, MVT::f64, HiFlt, + TwoP84PlusTwoP52); return DAG.getNode(ISD::FADD, dl, MVT::f64, LoFlt, HiSub); } @@ -2058,11 +2121,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, if (Op0.getValueType() == MVT::i64 && DestVT == MVT::f32) { EVT SHVT = TLI.getShiftAmountTy(); - SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + SDValue And = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0xfffffffffffff800), MVT::i64)); SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, And, DAG.getConstant(UINT64_C(0x800), MVT::i64)); - SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, + SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); @@ -2122,7 +2185,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, false, false, Alignment); else { FudgeInReg = - LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, dl, DestVT, + LegalizeOp(DAG.getExtLoad(ISD::EXTLOAD, DestVT, dl, DAG.getEntryNode(), CPIdx, PseudoSourceValue::getConstantPool(), 0, MVT::f32, false, false, Alignment)); @@ -2332,6 +2395,92 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, } } +std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { + unsigned Opc = Node->getOpcode(); + MVT VT = cast(Node)->getMemoryVT().getSimpleVT(); + RTLIB::Libcall LC; + + switch (Opc) { + default: + llvm_unreachable("Unhandled atomic intrinsic Expand!"); + break; + case ISD::ATOMIC_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_1; break; + case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; + case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; + case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + } + break; + case ISD::ATOMIC_CMP_SWAP: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1; break; + case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; + case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; + case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + } + break; + case ISD::ATOMIC_LOAD_ADD: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_ADD_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + } + break; + case ISD::ATOMIC_LOAD_SUB: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_SUB_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + } + break; + case ISD::ATOMIC_LOAD_AND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_AND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + } + break; + case ISD::ATOMIC_LOAD_OR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_OR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + } + break; + case ISD::ATOMIC_LOAD_XOR: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_XOR_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + } + break; + case ISD::ATOMIC_LOAD_NAND: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_NAND_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + } + break; + } + + return ExpandChainLibCall(LC, Node, false); +} + void SelectionDAGLegalize::ExpandNode(SDNode *Node, SmallVectorImpl &Results) { DebugLoc dl = Node->getDebugLoc(); @@ -2357,10 +2506,48 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EH_RETURN: case ISD::EH_LABEL: case ISD::PREFETCH: - case ISD::MEMBARRIER: case ISD::VAEND: + case ISD::EH_SJLJ_LONGJMP: + Results.push_back(Node->getOperand(0)); + break; + case ISD::EH_SJLJ_SETJMP: + Results.push_back(DAG.getConstant(0, MVT::i32)); Results.push_back(Node->getOperand(0)); break; + case ISD::MEMBARRIER: { + // If the target didn't lower this, lower it to '__sync_synchronize()' call + TargetLowering::ArgListTy Args; + std::pair CallResult = + TLI.LowerCallTo(Node->getOperand(0), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, CallingConv::C, false, + /*isReturnValueUsed=*/true, + DAG.getExternalSymbol("__sync_synchronize", + TLI.getPointerTy()), + Args, DAG, dl); + Results.push_back(CallResult.second); + break; + } + // By default, atomic intrinsics are marked Legal and lowered. Targets + // which don't support them directly, however, may want libcalls, in which + // case they mark them Expand, and we get here. + // FIXME: Unimplemented for now. Add libcalls. + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_LOAD_ADD: + case ISD::ATOMIC_LOAD_SUB: + case ISD::ATOMIC_LOAD_AND: + case ISD::ATOMIC_LOAD_OR: + case ISD::ATOMIC_LOAD_XOR: + case ISD::ATOMIC_LOAD_NAND: + case ISD::ATOMIC_LOAD_MIN: + case ISD::ATOMIC_LOAD_MAX: + case ISD::ATOMIC_LOAD_UMIN: + case ISD::ATOMIC_LOAD_UMAX: + case ISD::ATOMIC_CMP_SWAP: { + std::pair Tmp = ExpandAtomic(Node); + Results.push_back(Tmp.first); + Results.push_back(Tmp.second); + break; + } case ISD::DYNAMIC_STACKALLOC: ExpandDYNAMIC_STACKALLOC(Node, Results); break; @@ -2465,15 +2652,31 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); - SDValue VAList = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, - false, false, 0); + unsigned Align = Node->getConstantOperandVal(3); + + SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, V, 0, + false, false, 0); + SDValue VAList = VAListLoad; + + if (Align > TLI.getMinStackArgumentAlignment()) { + assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); + + VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(Align - 1, + TLI.getPointerTy())); + + VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + DAG.getConstant(-Align, + TLI.getPointerTy())); + } + // Increment the pointer, VAList, to the next vaarg Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, DAG.getConstant(TLI.getTargetData()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), TLI.getPointerTy())); // Store the incremented VAList to the legalized pointer - Tmp3 = DAG.getStore(VAList.getValue(1), dl, Tmp3, Tmp2, V, 0, + Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, V, 0, false, false, 0); // Load the actual argument out of the pointer VAList Results.push_back(DAG.getLoad(VT, dl, Tmp3, VAList, NULL, 0, @@ -2496,7 +2699,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, case ISD::EXTRACT_VECTOR_ELT: if (Node->getOperand(0).getValueType().getVectorNumElements() == 1) // This must be an access of the only element. Return it. - Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), + Tmp1 = DAG.getNode(ISD::BIT_CONVERT, dl, Node->getValueType(0), Node->getOperand(0)); else Tmp1 = ExpandExtractFromVectorThroughStack(SDValue(Node, 0)); @@ -2948,13 +3151,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node, const TargetData &TD = *TLI.getTargetData(); unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - + Index = DAG.getNode(ISD::MUL, dl, PTy, Index, DAG.getConstant(EntrySize, PTy)); SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); - SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, + SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, PTy, dl, Chain, Addr, PseudoSourceValue::getJumpTable(), 0, MemVT, false, false, 0); Addr = LD; diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index e3eb949567a3..650ee5a0721c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -453,8 +453,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { - NewL = DAG.getLoad(L->getAddressingMode(), dl, L->getExtensionType(), - NVT, L->getChain(), L->getBasePtr(), L->getOffset(), + NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), + NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), NVT, L->isVolatile(), L->isNonTemporal(), L->getAlignment()); // Legalized the chain result - switch anything that used the old chain to @@ -464,8 +464,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { } // Do a non-extending load followed by FP_EXTEND. - NewL = DAG.getLoad(L->getAddressingMode(), dl, ISD::NON_EXTLOAD, - L->getMemoryVT(), L->getChain(), + NewL = DAG.getLoad(L->getAddressingMode(), ISD::NON_EXTLOAD, + L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getSrcValue(), L->getSrcValueOffset(), L->getMemoryVT(), L->isVolatile(), @@ -504,7 +504,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { DebugLoc dl = N->getDebugLoc(); SDValue NewVAARG; - NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); + NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -698,9 +699,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { @@ -739,9 +741,10 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { @@ -757,8 +760,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), + 0); } SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { @@ -1106,7 +1110,7 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); - Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, + Hi = DAG.getExtLoad(LD->getExtensionType(), NVT, dl, Chain, Ptr, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -1294,9 +1298,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { @@ -1375,9 +1379,9 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { @@ -1393,8 +1397,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index 8b382bc7670d..b94ea9a3a9af 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -369,7 +369,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); DebugLoc dl = N->getDebugLoc(); - SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), + SDValue Res = DAG.getExtLoad(ExtType, NVT, dl, N->getChain(), N->getBasePtr(), N->getSrcValue(), N->getSrcValueOffset(), N->getMemoryVT(), N->isVolatile(), N->isNonTemporal(), N->getAlignment()); @@ -572,7 +572,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SmallVector Parts(NumRegs); for (unsigned i = 0; i < NumRegs; ++i) { - Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2)); + Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2), + N->getConstantOperandVal(3)); Chain = Parts[i].getValue(1); } @@ -725,8 +726,9 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BR_CC(SDNode *N, unsigned OpNo) { // The chain (Op#0), CC (#1) and basic block destination (Op#4) are always // legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - N->getOperand(1), LHS, RHS, N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), LHS, RHS, N->getOperand(4)), + 0); } SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { @@ -737,8 +739,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); // The chain (Op#0) and basic block destination (Op#2) are always legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Cond, - N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Cond, + N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { @@ -773,7 +775,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { for (unsigned i = 0; i < NumElts; ++i) NewOps.push_back(GetPromotedInteger(N->getOperand(i))); - return DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], NumElts); + return SDValue(DAG.UpdateNodeOperands(N, &NewOps[0], NumElts), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { @@ -798,17 +800,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, assert(N->getOperand(1).getValueType().getSizeInBits() >= N->getValueType(0).getVectorElementType().getSizeInBits() && "Type of inserted value narrower than vector element type!"); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), GetPromotedInteger(N->getOperand(1)), - N->getOperand(2)); + N->getOperand(2)), + 0); } assert(OpNo == 2 && "Different operand and result vector types?"); // Promote the index. SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - N->getOperand(1), Idx); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + N->getOperand(1), Idx), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { @@ -819,15 +822,14 @@ SDValue DAGTypeLegalizer::PromoteIntOp_MEMBARRIER(SDNode *N) { SDValue Flag = GetPromotedInteger(N->getOperand(i)); NewOps[i] = DAG.getZeroExtendInReg(Flag, dl, MVT::i1); } - return DAG.UpdateNodeOperands(SDValue (N, 0), NewOps, - array_lengthof(NewOps)); + return SDValue(DAG.UpdateNodeOperands(N, NewOps, array_lengthof(NewOps)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SCALAR_TO_VECTOR(SDNode *N) { // Integer SCALAR_TO_VECTOR operands are implicitly truncated, so just promote // the operand in place. - return DAG.UpdateNodeOperands(SDValue(N, 0), - GetPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + GetPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { @@ -837,8 +839,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT SVT = TLI.getSetCCResultType(N->getOperand(1).getValueType()); SDValue Cond = PromoteTargetBoolean(N->getOperand(0), SVT); - return DAG.UpdateNodeOperands(SDValue(N, 0), Cond, - N->getOperand(1), N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, Cond, + N->getOperand(1), N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { @@ -849,8 +851,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT_CC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast(N->getOperand(4))->get()); // The CC (#4) and the possible return values (#2 and #3) have legal types. - return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2), - N->getOperand(3), N->getOperand(4)); + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2), + N->getOperand(3), N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { @@ -861,12 +863,12 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SETCC(SDNode *N, unsigned OpNo) { PromoteSetCCOperands(LHS, RHS, cast(N->getOperand(2))->get()); // The CC (#2) is always legal. - return DAG.UpdateNodeOperands(SDValue(N, 0), LHS, RHS, N->getOperand(2)); + return SDValue(DAG.UpdateNodeOperands(N, LHS, RHS, N->getOperand(2)), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), - ZExtPromotedInteger(N->getOperand(1))); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), + ZExtPromotedInteger(N->getOperand(1))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { @@ -878,8 +880,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), - SExtPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + SExtPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ @@ -905,8 +907,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { - return DAG.UpdateNodeOperands(SDValue(N, 0), - ZExtPromotedInteger(N->getOperand(0))); + return SDValue(DAG.UpdateNodeOperands(N, + ZExtPromotedInteger(N->getOperand(0))), 0); } SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { @@ -990,6 +992,11 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::SHL: case ISD::SRA: case ISD::SRL: ExpandIntRes_Shift(N, Lo, Hi); break; + + case ISD::SADDO: + case ISD::SSUBO: ExpandIntRes_SADDSUBO(N, Lo, Hi); break; + case ISD::UADDO: + case ISD::USUBO: ExpandIntRes_UADDSUBO(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -1526,7 +1533,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, if (N->getMemoryVT().bitsLE(NVT)) { EVT MemVT = N->getMemoryVT(); - Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + Lo = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); // Remember the chain. @@ -1559,7 +1566,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); - Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, NEVT, isVolatile, isNonTemporal, MinAlign(Alignment, IncrementSize)); @@ -1577,7 +1584,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, unsigned ExcessBits = (EBytes - IncrementSize)*8; // Load both the high bits and maybe some of the low bits. - Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getSrcValue(), SVOffset, + Hi = DAG.getExtLoad(ExtType, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset, EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), isVolatile, isNonTemporal, Alignment); @@ -1586,7 +1593,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); // Load the rest of the low bits. - Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getSrcValue(), + Lo = DAG.getExtLoad(ISD::ZEXTLOAD, NVT, dl, Ch, Ptr, N->getSrcValue(), SVOffset+IncrementSize, EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, @@ -1716,6 +1723,48 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); } +void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = Node->getOperand(0); + SDValue RHS = Node->getOperand(1); + DebugLoc dl = Node->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(Node->getOpcode() == ISD::SADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Compute the overflow. + // + // LHSSign -> LHS >= 0 + // RHSSign -> RHS >= 0 + // SumSign -> Sum >= 0 + // + // Add: + // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign) + // Sub: + // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign) + // + EVT OType = Node->getValueType(1); + SDValue Zero = DAG.getConstant(0, LHS.getValueType()); + + SDValue LHSSign = DAG.getSetCC(dl, OType, LHS, Zero, ISD::SETGE); + SDValue RHSSign = DAG.getSetCC(dl, OType, RHS, Zero, ISD::SETGE); + SDValue SignsMatch = DAG.getSetCC(dl, OType, LHSSign, RHSSign, + Node->getOpcode() == ISD::SADDO ? + ISD::SETEQ : ISD::SETNE); + + SDValue SumSign = DAG.getSetCC(dl, OType, Sum, Zero, ISD::SETGE); + SDValue SumSignNE = DAG.getSetCC(dl, OType, LHSSign, SumSign, ISD::SETNE); + + SDValue Cmp = DAG.getNode(ISD::AND, dl, OType, SignsMatch, SumSignNE); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(Node, 1), Cmp); +} + void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -1912,6 +1961,29 @@ void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, Hi = DAG.getNode(ISD::TRUNCATE, dl, NVT, Hi); } +void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + DebugLoc dl = N->getDebugLoc(); + + // Expand the result by simply replacing it with the equivalent + // non-overflow-checking operation. + SDValue Sum = DAG.getNode(N->getOpcode() == ISD::UADDO ? + ISD::ADD : ISD::SUB, dl, LHS.getValueType(), + LHS, RHS); + SplitInteger(Sum, Lo, Hi); + + // Calculate the overflow: addition overflows iff a + b < a, and subtraction + // overflows iff a - b > a. + SDValue Ofl = DAG.getSetCC(dl, N->getValueType(1), Sum, LHS, + N->getOpcode () == ISD::UADDO ? + ISD::SETULT : ISD::SETUGT); + + // Use the calculated overflow everywhere. + ReplaceValueWith(SDValue(N, 1), Ofl); +} + void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); @@ -2154,9 +2226,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), DAG.getCondCode(CCCode), NewLHS, NewRHS, - N->getOperand(4)); + N->getOperand(4)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { @@ -2172,9 +2244,9 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { } // Update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, N->getOperand(2), N->getOperand(3), - DAG.getCondCode(CCCode)); + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { @@ -2190,8 +2262,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { } // Otherwise, update N to have the operands specified. - return DAG.UpdateNodeOperands(SDValue(N, 0), NewLHS, NewRHS, - DAG.getCondCode(CCCode)); + return SDValue(DAG.UpdateNodeOperands(N, NewLHS, NewRHS, + DAG.getCondCode(CCCode)), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { @@ -2200,7 +2272,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) { // upper half of the shift amount is zero. Just use the lower half. SDValue Lo, Hi; GetExpandedInteger(N->getOperand(1), Lo, Hi); - return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo); + return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), Lo), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { @@ -2209,7 +2281,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) { // constant to valid type. SDValue Lo, Hi; GetExpandedInteger(N->getOperand(0), Lo, Hi); - return DAG.UpdateNodeOperands(SDValue(N, 0), Lo); + return SDValue(DAG.UpdateNodeOperands(N, Lo), 0); } SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { @@ -2384,7 +2456,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // Load the value out, extending it from f32 to the destination float type. // FIXME: Avoid the extend by constructing the right constant pool? - SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, dl, DstVT, DAG.getEntryNode(), + SDValue Fudge = DAG.getExtLoad(ISD::EXTLOAD, DstVT, dl, DAG.getEntryNode(), FudgePtr, NULL, 0, MVT::f32, false, false, Alignment); return DAG.getNode(ISD::FADD, dl, DstVT, SignedConv, Fudge); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 17f131b21e4a..6e56c98e9b56 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -485,15 +485,14 @@ SDNode *DAGTypeLegalizer::AnalyzeNewNode(SDNode *N) { NewOps.push_back(Op); } else if (Op != OrigOp) { // This is the first operand to change - add all operands so far. - NewOps.insert(NewOps.end(), N->op_begin(), N->op_begin() + i); + NewOps.append(N->op_begin(), N->op_begin() + i); NewOps.push_back(Op); } } // Some operands changed - update the node. if (!NewOps.empty()) { - SDNode *M = DAG.UpdateNodeOperands(SDValue(N, 0), &NewOps[0], - NewOps.size()).getNode(); + SDNode *M = DAG.UpdateNodeOperands(N, &NewOps[0], NewOps.size()); if (M != N) { // The node morphed into a different node. Normally for this to happen // the original node would have to be marked NewNode. However this can @@ -684,40 +683,45 @@ void DAGTypeLegalizer::ReplaceValueWith(SDValue From, SDValue To) { // can potentially cause recursive merging. SmallSetVector NodesToAnalyze; NodeUpdateListener NUL(*this, NodesToAnalyze); - DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); - - // The old node may still be present in a map like ExpandedIntegers or - // PromotedIntegers. Inform maps about the replacement. - ReplacedValues[From] = To; - - // Process the list of nodes that need to be reanalyzed. - while (!NodesToAnalyze.empty()) { - SDNode *N = NodesToAnalyze.back(); - NodesToAnalyze.pop_back(); - if (N->getNodeId() != DAGTypeLegalizer::NewNode) - // The node was analyzed while reanalyzing an earlier node - it is safe to - // skip. Note that this is not a morphing node - otherwise it would still - // be marked NewNode. - continue; + do { + DAG.ReplaceAllUsesOfValueWith(From, To, &NUL); + + // The old node may still be present in a map like ExpandedIntegers or + // PromotedIntegers. Inform maps about the replacement. + ReplacedValues[From] = To; + + // Process the list of nodes that need to be reanalyzed. + while (!NodesToAnalyze.empty()) { + SDNode *N = NodesToAnalyze.back(); + NodesToAnalyze.pop_back(); + if (N->getNodeId() != DAGTypeLegalizer::NewNode) + // The node was analyzed while reanalyzing an earlier node - it is safe + // to skip. Note that this is not a morphing node - otherwise it would + // still be marked NewNode. + continue; - // Analyze the node's operands and recalculate the node ID. - SDNode *M = AnalyzeNewNode(N); - if (M != N) { - // The node morphed into a different node. Make everyone use the new node - // instead. - assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); - assert(N->getNumValues() == M->getNumValues() && - "Node morphing changed the number of results!"); - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { - SDValue OldVal(N, i); - SDValue NewVal(M, i); - if (M->getNodeId() == Processed) - RemapValue(NewVal); - DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + // Analyze the node's operands and recalculate the node ID. + SDNode *M = AnalyzeNewNode(N); + if (M != N) { + // The node morphed into a different node. Make everyone use the new + // node instead. + assert(M->getNodeId() != NewNode && "Analysis resulted in NewNode!"); + assert(N->getNumValues() == M->getNumValues() && + "Node morphing changed the number of results!"); + for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) { + SDValue OldVal(N, i); + SDValue NewVal(M, i); + if (M->getNodeId() == Processed) + RemapValue(NewVal); + DAG.ReplaceAllUsesOfValueWith(OldVal, NewVal, &NUL); + } + // The original node continues to exist in the DAG, marked NewNode. } - // The original node continues to exist in the DAG, marked NewNode. } - } + // When recursively update nodes with new nodes, it is possible to have + // new uses of From due to CSE. If this happens, replace the new uses of + // From with To. + } while (!From.use_empty()); } void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index c6659630ede2..bd86694446d6 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -345,6 +345,9 @@ private: void ExpandIntRes_UREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandIntRes_Shift (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_SADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandIntRes_UADDSUBO (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi); bool ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -620,6 +623,7 @@ private: SDValue WidenVecRes_Binary(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); + SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); SDValue WidenVecRes_Unary(SDNode *N); SDValue WidenVecRes_InregOp(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 88e1e624ae32..9c2b1d9ed73d 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -238,13 +238,15 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, } void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + EVT OVT = N->getValueType(0); + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Chain = N->getOperand(0); SDValue Ptr = N->getOperand(1); DebugLoc dl = N->getDebugLoc(); + const unsigned Align = N->getConstantOperandVal(3); - Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2)); - Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2)); + Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); + Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0); // Handle endianness of the load. if (TLI.isBigEndian()) diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 0e2bd0233712..621c08724210 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -116,7 +116,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Ops.push_back(LegalizeOp(Node->getOperand(i))); SDValue Result = - DAG.UpdateNodeOperands(Op.getValue(0), Ops.data(), Ops.size()); + SDValue(DAG.UpdateNodeOperands(Op.getNode(), Ops.data(), Ops.size()), 0); bool HasVectorValue = false; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 7efeea1ddaf9..93aeff5c1e6c 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -165,9 +165,10 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { assert(N->isUnindexed() && "Indexed vector load?"); - SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getDebugLoc(), + SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getExtensionType(), N->getValueType(0).getVectorElementType(), + N->getDebugLoc(), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getSrcValue(), N->getSrcValueOffset(), @@ -448,6 +449,11 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: case ISD::ANY_EXTEND: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: SplitVecRes_UnaryOp(N, Lo, Hi); break; @@ -755,14 +761,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, EVT LoMemVT, HiMemVT; GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); - Lo = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, LoVT, Ch, Ptr, Offset, + Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, SV, SVOffset, LoMemVT, isVolatile, isNonTemporal, Alignment); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, DAG.getIntPtrConstant(IncrementSize)); SVOffset += IncrementSize; - Hi = DAG.getLoad(ISD::UNINDEXED, dl, ExtType, HiVT, Ch, Ptr, Offset, + Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, SV, SVOffset, HiMemVT, isVolatile, isNonTemporal, Alignment); // Build a factor node to remember that this load is independent of the @@ -1082,10 +1088,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { uint64_t LoElts = Lo.getValueType().getVectorNumElements(); if (IdxVal < LoElts) - return DAG.UpdateNodeOperands(SDValue(N, 0), Lo, Idx); - return DAG.UpdateNodeOperands(SDValue(N, 0), Hi, + return SDValue(DAG.UpdateNodeOperands(N, Lo, Idx), 0); + return SDValue(DAG.UpdateNodeOperands(N, Hi, DAG.getConstant(IdxVal - LoElts, - Idx.getValueType())); + Idx.getValueType())), + 0); } // Store the vector to the stack. @@ -1099,7 +1106,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Load back the required element. StackPtr = GetVectorElementPointer(StackPtr, EltVT, Idx); - return DAG.getExtLoad(ISD::EXTLOAD, dl, N->getValueType(0), Store, StackPtr, + return DAG.getExtLoad(ISD::EXTLOAD, N->getValueType(0), dl, Store, StackPtr, SV, 0, EltVT, false, false, 0); } @@ -1199,7 +1206,6 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FPOWI: case ISD::FREM: case ISD::FSUB: case ISD::MUL: @@ -1215,6 +1221,10 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { Res = WidenVecRes_Binary(N); break; + case ISD::FPOWI: + Res = WidenVecRes_POWI(N); + break; + case ISD::SHL: case ISD::SRA: case ISD::SRL: @@ -1241,6 +1251,11 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEG: case ISD::FSIN: case ISD::FSQRT: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: Res = WidenVecRes_Unary(N); break; } @@ -1258,7 +1273,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; unsigned NumElts = VT.getVectorNumElements(); - while (!TLI.isTypeLegal(VT) && NumElts != 1) { + while (!TLI.isTypeSynthesizable(VT) && NumElts != 1) { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); } @@ -1273,13 +1288,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return DAG.UnrollVectorOp(N, WidenVT.getVectorNumElements()); } else { // Since the operation can trap, apply operation on the original vector. + EVT MaxVT = VT; SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); unsigned CurNumElts = N->getValueType(0).getVectorNumElements(); SmallVector ConcatOps(CurNumElts); unsigned ConcatEnd = 0; // Current ConcatOps index. - unsigned Idx = 0; // Current Idx into input vectors. + int Idx = 0; // Current Idx into input vectors. + + // NumElts := greatest synthesizable vector size (at most WidenVT) + // while (orig. vector has unhandled elements) { + // take munches of size NumElts from the beginning and add to ConcatOps + // NumElts := next smaller supported vector size or 1 + // } while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, @@ -1290,26 +1312,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { Idx += NumElts; CurNumElts -= NumElts; } - EVT PrevVecVT = VT; do { NumElts = NumElts / 2; VT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NumElts); - } while (!TLI.isTypeLegal(VT) && NumElts != 1); + } while (!TLI.isTypeSynthesizable(VT) && NumElts != 1); if (NumElts == 1) { - // Since we are using concat vector, build a vector from the scalar ops. - SDValue VecOp = DAG.getUNDEF(PrevVecVT); for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp1, DAG.getIntPtrConstant(Idx)); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, InOp2, DAG.getIntPtrConstant(Idx)); - VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, PrevVecVT, VecOp, - DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2), - DAG.getIntPtrConstant(i)); + ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, + EOp1, EOp2); } CurNumElts = 0; - ConcatOps[ConcatEnd++] = VecOp; } } @@ -1320,23 +1337,65 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { return ConcatOps[0]; } - // Rebuild vector to one with the widen type - Idx = ConcatEnd - 1; - while (Idx != 0) { + // while (Some element of ConcatOps is not of type MaxVT) { + // From the end of ConcatOps, collect elements of the same type and put + // them into an op of the next larger supported type + // } + while (ConcatOps[ConcatEnd-1].getValueType() != MaxVT) { + Idx = ConcatEnd - 1; VT = ConcatOps[Idx--].getValueType(); - while (Idx != 0 && ConcatOps[Idx].getValueType() == VT) - --Idx; - if (Idx != 0) { - VT = ConcatOps[Idx].getValueType(); - ConcatOps[Idx+1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, - &ConcatOps[Idx+1], ConcatEnd - Idx - 1); + while (Idx >= 0 && ConcatOps[Idx].getValueType() == VT) + Idx--; + + int NextSize = VT.isVector() ? VT.getVectorNumElements() : 1; + EVT NextVT; + do { + NextSize *= 2; + NextVT = EVT::getVectorVT(*DAG.getContext(), WidenEltVT, NextSize); + } while (!TLI.isTypeSynthesizable(NextVT)); + + if (!VT.isVector()) { + // Scalar type, create an INSERT_VECTOR_ELEMENT of type NextVT + SDValue VecOp = DAG.getUNDEF(NextVT); + unsigned NumToInsert = ConcatEnd - Idx - 1; + for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { + VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, + ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + } + ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; + } + else { + // Vector type, create a CONCAT_VECTORS of type NextVT + SDValue undefVec = DAG.getUNDEF(VT); + unsigned OpsToConcat = NextSize/VT.getVectorNumElements(); + SmallVector SubConcatOps(OpsToConcat); + unsigned RealVals = ConcatEnd - Idx - 1; + unsigned SubConcatEnd = 0; + unsigned SubConcatIdx = Idx + 1; + while (SubConcatEnd < RealVals) + SubConcatOps[SubConcatEnd++] = ConcatOps[++Idx]; + while (SubConcatEnd < OpsToConcat) + SubConcatOps[SubConcatEnd++] = undefVec; + ConcatOps[SubConcatIdx] = DAG.getNode(ISD::CONCAT_VECTORS, dl, + NextVT, &SubConcatOps[0], + OpsToConcat); + ConcatEnd = SubConcatIdx + 1; } } + + // Check to see if we have a single operation with the widen type. + if (ConcatEnd == 1) { + VT = ConcatOps[0].getValueType(); + if (VT == WidenVT) + return ConcatOps[0]; + } - unsigned NumOps = WidenVT.getVectorNumElements()/VT.getVectorNumElements(); + // add undefs of size MaxVT until ConcatOps grows to length of WidenVT + unsigned NumOps = + WidenVT.getVectorNumElements()/MaxVT.getVectorNumElements(); if (NumOps != ConcatEnd ) { - SDValue UndefVal = DAG.getUNDEF(VT); + SDValue UndefVal = DAG.getUNDEF(MaxVT); for (unsigned j = ConcatEnd; j < NumOps; ++j) ConcatOps[j] = UndefVal; } @@ -1366,7 +1425,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(Opcode, dl, WidenVT, InOp); } - if (TLI.isTypeLegal(InWidenVT)) { + if (TLI.isTypeSynthesizable(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1410,6 +1469,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &Ops[0], WidenNumElts); } +SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp = GetWidenedVector(N->getOperand(0)); + SDValue ShOp = N->getOperand(1); + return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); +} + SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); @@ -1501,7 +1567,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BIT_CONVERT(SDNode *N) { NewInVT = EVT::getVectorVT(*DAG.getContext(), InVT, NewNumElts); } - if (TLI.isTypeLegal(NewInVT)) { + if (TLI.isTypeSynthesizable(NewInVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1642,7 +1708,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { SatOp, CvtCode); } - if (TLI.isTypeLegal(InWidenVT)) { + if (TLI.isTypeSynthesizable(InWidenVT)) { // Because the result and the input are different vector types, widening // the result could create a legal type but widening the input might make // it an illegal type that might lead to repeatedly splitting the input @@ -1968,7 +2034,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BIT_CONVERT(SDNode *N) { if (InWidenSize % Size == 0 && !VT.isVector()) { unsigned NewNumElts = InWidenSize / Size; EVT NewVT = EVT::getVectorVT(*DAG.getContext(), VT, NewNumElts); - if (TLI.isTypeLegal(NewVT)) { + if (TLI.isTypeSynthesizable(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BIT_CONVERT, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, DAG.getIntPtrConstant(0)); @@ -2066,7 +2132,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, unsigned MemVTWidth = MemVT.getSizeInBits(); if (MemVT.getSizeInBits() <= WidenEltWidth) break; - if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && + if (TLI.isTypeSynthesizable(MemVT) && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { RetVT = MemVT; @@ -2080,7 +2146,7 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, VT >= (unsigned)MVT::FIRST_VECTOR_VALUETYPE; --VT) { EVT MemVT = (MVT::SimpleValueType) VT; unsigned MemVTWidth = MemVT.getSizeInBits(); - if (TLI.isTypeLegal(MemVT) && WidenEltVT == MemVT.getVectorElementType() && + if (TLI.isTypeSynthesizable(MemVT) && WidenEltVT == MemVT.getVectorElementType() && (WidenWidth % MemVTWidth) == 0 && (MemVTWidth <= Width || (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { @@ -2286,14 +2352,14 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector& LdChain, unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector Ops(WidenNumElts); unsigned Increment = LdEltVT.getSizeInBits() / 8; - Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, SV, SVOffset, + Ops[0] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, BasePtr, SV, SVOffset, LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Offset)); - Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, SV, + Ops[i] = DAG.getExtLoad(ExtType, EltVT, dl, Chain, NewBasePtr, SV, SVOffset + Offset, LdEltVT, isVolatile, isNonTemporal, Align); LdChain.push_back(Ops[i].getValue(1)); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index ad8630afff45..3b86c3286585 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -535,7 +535,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { SUnit *LRDef = LiveRegDefs[Reg]; EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); + TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); // If cross copy register class is null, then it must be possible copy diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 820ba6681606..3ef521c398e1 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -320,7 +320,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); - if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { + if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); assert(LiveRegDefs[I->getReg()] == I->getSUnit() && "Physical register dependency violated?"); @@ -795,7 +795,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { SUnit *LRDef = LiveRegDefs[Reg]; EVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII); const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, VT); + TRI->getMinimalPhysRegClass(Reg, VT); const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC); // If cross copy register class is null, then it must be possible copy @@ -1116,7 +1116,7 @@ namespace { SUnit *pop() { if (empty()) return NULL; std::vector::iterator Best = Queue.begin(); - for (std::vector::iterator I = next(Queue.begin()), + for (std::vector::iterator I = llvm::next(Queue.begin()), E = Queue.end(); I != E; ++I) if (Picker(*Best, *I)) Best = I; @@ -1275,6 +1275,17 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ return left->getHeight() > right->getHeight(); } else if (RStall) return false; + + // If either node is scheduling for latency, sort them by height and latency + // first. + if (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency) { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + if (left->Latency != right->Latency) + return left->Latency > right->Latency; + } + return BURRSort(left, right, SPQ); } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 3185c88b82bf..06cf05308755 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -59,7 +59,11 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { SUnits.back().OrigNode = &SUnits.back(); SUnit *SU = &SUnits.back(); const TargetLowering &TLI = DAG->getTargetLoweringInfo(); - SU->SchedulingPref = TLI.getSchedulingPreference(N); + if (N->isMachineOpcode() && + N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) + SU->SchedulingPref = Sched::None; + else + SU->SchedulingPref = TLI.getSchedulingPreference(N); return SU; } @@ -97,7 +101,7 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) { PhysReg = Reg; const TargetRegisterClass *RC = - TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo)); + TRI->getMinimalPhysRegClass(Reg, Def->getValueType(ResNo)); Cost = RC->getCopyCost(); } } @@ -106,17 +110,42 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, SelectionDAG *DAG) { SmallVector VTs; - for (unsigned i = 0, e = N->getNumValues(); i != e; ++i) - VTs.push_back(N->getValueType(i)); + SDNode *FlagDestNode = Flag.getNode(); + + // Don't add a flag from a node to itself. + if (FlagDestNode == N) return; + + // Don't add a flag to something which already has a flag. + if (N->getValueType(N->getNumValues() - 1) == MVT::Flag) return; + + for (unsigned I = 0, E = N->getNumValues(); I != E; ++I) + VTs.push_back(N->getValueType(I)); + if (AddFlag) VTs.push_back(MVT::Flag); + SmallVector Ops; - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - Ops.push_back(N->getOperand(i)); - if (Flag.getNode()) + for (unsigned I = 0, E = N->getNumOperands(); I != E; ++I) + Ops.push_back(N->getOperand(I)); + + if (FlagDestNode) Ops.push_back(Flag); + SDVTList VTList = DAG->getVTList(&VTs[0], VTs.size()); + MachineSDNode::mmo_iterator Begin = 0, End = 0; + MachineSDNode *MN = dyn_cast(N); + + // Store memory references. + if (MN) { + Begin = MN->memoperands_begin(); + End = MN->memoperands_end(); + } + DAG->MorphNodeTo(N, N->getOpcode(), VTList, &Ops[0], Ops.size()); + + // Reset the memory references + if (MN) + MN->setMemRefs(Begin, End); } /// ClusterNeighboringLoads - Force nearby loads together by "flagging" them. @@ -124,98 +153,98 @@ static void AddFlags(SDNode *N, SDValue Flag, bool AddFlag, /// offsets are not far apart (target specific), it add MVT::Flag inputs and /// outputs to ensure they are scheduled together and in order. This /// optimization may benefit some targets by improving cache locality. -void ScheduleDAGSDNodes::ClusterNeighboringLoads() { +void ScheduleDAGSDNodes::ClusterNeighboringLoads(SDNode *Node) { + SDNode *Chain = 0; + unsigned NumOps = Node->getNumOperands(); + if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) + Chain = Node->getOperand(NumOps-1).getNode(); + if (!Chain) + return; + + // Look for other loads of the same chain. Find loads that are loading from + // the same base pointer and different offsets. SmallPtrSet Visited; SmallVector Offsets; DenseMap O2SMap; // Map from offset to SDNode. - for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), - E = DAG->allnodes_end(); NI != E; ++NI) { - SDNode *Node = &*NI; - if (!Node || !Node->isMachineOpcode()) + bool Cluster = false; + SDNode *Base = Node; + for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); + I != E; ++I) { + SDNode *User = *I; + if (User == Node || !Visited.insert(User)) continue; - - unsigned Opc = Node->getMachineOpcode(); - const TargetInstrDesc &TID = TII->get(Opc); - if (!TID.mayLoad()) + int64_t Offset1, Offset2; + if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || + Offset1 == Offset2) + // FIXME: Should be ok if they addresses are identical. But earlier + // optimizations really should have eliminated one of the loads. continue; + if (O2SMap.insert(std::make_pair(Offset1, Base)).second) + Offsets.push_back(Offset1); + O2SMap.insert(std::make_pair(Offset2, User)); + Offsets.push_back(Offset2); + if (Offset2 < Offset1) + Base = User; + Cluster = true; + } - SDNode *Chain = 0; - unsigned NumOps = Node->getNumOperands(); - if (Node->getOperand(NumOps-1).getValueType() == MVT::Other) - Chain = Node->getOperand(NumOps-1).getNode(); - if (!Chain) - continue; + if (!Cluster) + return; - // Look for other loads of the same chain. Find loads that are loading from - // the same base pointer and different offsets. - Visited.clear(); - Offsets.clear(); - O2SMap.clear(); - bool Cluster = false; - SDNode *Base = Node; - int64_t BaseOffset; - for (SDNode::use_iterator I = Chain->use_begin(), E = Chain->use_end(); - I != E; ++I) { - SDNode *User = *I; - if (User == Node || !Visited.insert(User)) - continue; - int64_t Offset1, Offset2; - if (!TII->areLoadsFromSameBasePtr(Base, User, Offset1, Offset2) || - Offset1 == Offset2) - // FIXME: Should be ok if they addresses are identical. But earlier - // optimizations really should have eliminated one of the loads. - continue; - if (O2SMap.insert(std::make_pair(Offset1, Base)).second) - Offsets.push_back(Offset1); - O2SMap.insert(std::make_pair(Offset2, User)); - Offsets.push_back(Offset2); - if (Offset2 < Offset1) { - Base = User; - BaseOffset = Offset2; - } else { - BaseOffset = Offset1; - } - Cluster = true; - } + // Sort them in increasing order. + std::sort(Offsets.begin(), Offsets.end()); + + // Check if the loads are close enough. + SmallVector Loads; + unsigned NumLoads = 0; + int64_t BaseOff = Offsets[0]; + SDNode *BaseLoad = O2SMap[BaseOff]; + Loads.push_back(BaseLoad); + for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { + int64_t Offset = Offsets[i]; + SDNode *Load = O2SMap[Offset]; + if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset,NumLoads)) + break; // Stop right here. Ignore loads that are further away. + Loads.push_back(Load); + ++NumLoads; + } - if (!Cluster) - continue; + if (NumLoads == 0) + return; - // Sort them in increasing order. - std::sort(Offsets.begin(), Offsets.end()); - - // Check if the loads are close enough. - SmallVector Loads; - unsigned NumLoads = 0; - int64_t BaseOff = Offsets[0]; - SDNode *BaseLoad = O2SMap[BaseOff]; - Loads.push_back(BaseLoad); - for (unsigned i = 1, e = Offsets.size(); i != e; ++i) { - int64_t Offset = Offsets[i]; - SDNode *Load = O2SMap[Offset]; - if (!TII->shouldScheduleLoadsNear(BaseLoad, Load, BaseOff, Offset, - NumLoads)) - break; // Stop right here. Ignore loads that are further away. - Loads.push_back(Load); - ++NumLoads; - } + // Cluster loads by adding MVT::Flag outputs and inputs. This also + // ensure they are scheduled in order of increasing addresses. + SDNode *Lead = Loads[0]; + AddFlags(Lead, SDValue(0, 0), true, DAG); + + SDValue InFlag = SDValue(Lead, Lead->getNumValues() - 1); + for (unsigned I = 1, E = Loads.size(); I != E; ++I) { + bool OutFlag = I < E - 1; + SDNode *Load = Loads[I]; + + AddFlags(Load, InFlag, OutFlag, DAG); + + if (OutFlag) + InFlag = SDValue(Load, Load->getNumValues() - 1); + + ++LoadsClustered; + } +} - if (NumLoads == 0) +/// ClusterNodes - Cluster certain nodes which should be scheduled together. +/// +void ScheduleDAGSDNodes::ClusterNodes() { + for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(), + E = DAG->allnodes_end(); NI != E; ++NI) { + SDNode *Node = &*NI; + if (!Node || !Node->isMachineOpcode()) continue; - // Cluster loads by adding MVT::Flag outputs and inputs. This also - // ensure they are scheduled in order of increasing addresses. - SDNode *Lead = Loads[0]; - AddFlags(Lead, SDValue(0,0), true, DAG); - SDValue InFlag = SDValue(Lead, Lead->getNumValues()-1); - for (unsigned i = 1, e = Loads.size(); i != e; ++i) { - bool OutFlag = i < e-1; - SDNode *Load = Loads[i]; - AddFlags(Load, InFlag, OutFlag, DAG); - if (OutFlag) - InFlag = SDValue(Load, Load->getNumValues()-1); - ++LoadsClustered; - } + unsigned Opc = Node->getMachineOpcode(); + const TargetInstrDesc &TID = TII->get(Opc); + if (TID.mayLoad()) + // Cluster loads from "near" addresses into combined SUnits. + ClusterNeighboringLoads(Node); } } @@ -364,8 +393,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { if (Cost >= 0) PhysReg = 0; - const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, - OpSU->Latency, PhysReg); + // If this is a ctrl dep, latency is 1. + unsigned OpLatency = isChain ? 1 : OpSU->Latency; + const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, + OpLatency, PhysReg); if (!isChain && !UnitLatencies) { ComputeOperandLatency(OpN, N, i, const_cast(dep)); ST.adjustSchedDependency(OpSU, SU, const_cast(dep)); @@ -382,8 +413,8 @@ void ScheduleDAGSDNodes::AddSchedEdges() { /// excludes nodes that aren't interesting to scheduling, and represents /// flagged together nodes with a single SUnit. void ScheduleDAGSDNodes::BuildSchedGraph(AliasAnalysis *AA) { - // Cluster loads from "near" addresses into combined SUnits. - ClusterNeighboringLoads(); + // Cluster certain nodes which should be scheduled together. + ClusterNodes(); // Populate the SUnits array. BuildSchedUnits(); // Compute all the scheduling dependencies between nodes. @@ -427,15 +458,18 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, return; unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); - if (Def->isMachineOpcode() && Use->isMachineOpcode()) { + if (Def->isMachineOpcode()) { const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (DefIdx >= II.getNumDefs()) return; int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); if (DefCycle < 0) return; - const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); - int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + int UseCycle = 1; + if (Use->isMachineOpcode()) { + const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); + UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + } if (UseCycle >= 0) { int Latency = DefCycle - UseCycle + 1; if (Latency >= 0) @@ -473,7 +507,7 @@ namespace { } // ProcessSourceNode - Process nodes with source order numbers. These are added -// to a vector which EmitSchedule use to determine how to insert dbg_value +// to a vector which EmitSchedule uses to determine how to insert dbg_value // instructions in the right order. static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, @@ -485,13 +519,13 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, return; MachineBasicBlock *BB = Emitter.getBlock(); - if (BB->empty() || BB->back().isPHI()) { + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; } - Orders.push_back(std::make_pair(Order, &BB->back())); + Orders.push_back(std::make_pair(Order, prior(Emitter.getInsertPos()))); if (!N->getHasDebugValue()) return; // Opportunistically insert immediate dbg_value uses, i.e. those with source @@ -530,7 +564,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { for (; PDI != PDE; ++PDI) { MachineInstr *DbgMI= Emitter.EmitDbgValue(*PDI, VRBaseMap); if (DbgMI) - BB->insert(BB->end(), DbgMI); + BB->insert(InsertPos, DbgMI); } } @@ -574,9 +608,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert all the dbg_values which have not already been inserted in source // order sequence. if (HasDbg) { - MachineBasicBlock::iterator BBBegin = BB->empty() ? BB->end() : BB->begin(); - while (BBBegin != BB->end() && BBBegin->isPHI()) - ++BBBegin; + MachineBasicBlock::iterator BBBegin = BB->getFirstNonPHI(); // Sort the source order instructions and use the order to insert debug // values. @@ -586,14 +618,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); // Now emit the rest according to source order. unsigned LastOrder = 0; - MachineInstr *LastMI = 0; for (unsigned i = 0, e = Orders.size(); i != e && DI != DE; ++i) { unsigned Order = Orders[i].first; MachineInstr *MI = Orders[i].second; // Insert all SDDbgValue's whose order(s) are before "Order". if (!MI) continue; - MachineBasicBlock *MIBB = MI->getParent(); #ifndef NDEBUG unsigned LastDIOrder = 0; #endif @@ -612,13 +642,14 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() { // Insert to start of the BB (after PHIs). BB->insert(BBBegin, DbgMI); else { + // Insert at the instruction, which may be in a different + // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; - MIBB->insert(llvm::next(Pos), DbgMI); + MI->getParent()->insert(llvm::next(Pos), DbgMI); } } } LastOrder = Order; - LastMI = MI; } // Add trailing DbgValue's before the terminator. FIXME: May want to add // some of them before one or more conditional branches? diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index e8714ba83285..842fc8c72703 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -108,7 +108,10 @@ namespace llvm { private: /// ClusterNeighboringLoads - Cluster loads from "near" addresses into /// combined SUnits. - void ClusterNeighboringLoads(); + void ClusterNeighboringLoads(SDNode *Node); + /// ClusterNodes - Cluster certain nodes which should be scheduled together. + /// + void ClusterNodes(); /// BuildSchedUnits, AddSchedEdges - Helper functions for BuildSchedGraph. void BuildSchedUnits(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 38bf68b8539f..e83a0346b535 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -790,9 +790,8 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { } // EntryNode could meaningfully have debug info if we can find it... -SelectionDAG::SelectionDAG(const TargetMachine &tm, FunctionLoweringInfo &fli) +SelectionDAG::SelectionDAG(const TargetMachine &tm) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - FLI(fli), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0) { AllNodes.push_back(&EntryNode); @@ -808,7 +807,6 @@ void SelectionDAG::init(MachineFunction &mf) { SelectionDAG::~SelectionDAG() { allnodes_clear(); delete Ordering; - DbgInfo->clear(); delete DbgInfo; } @@ -835,11 +833,8 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); - delete Ordering; - Ordering = new SDNodeOrdering(); + Ordering->clear(); DbgInfo->clear(); - delete DbgInfo; - DbgInfo = new SDDbgInfo(); } SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { @@ -980,7 +975,7 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { } } -SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { @@ -1015,7 +1010,7 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, GV, VT, + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2291,7 +2286,6 @@ bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const { SDValue SelectionDAG::getShuffleScalarElt(const ShuffleVectorSDNode *N, unsigned i) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); if (N->getMaskElt(i) < 0) return getUNDEF(VT.getVectorElementType()); unsigned Index = N->getMaskElt(i); @@ -2475,9 +2469,18 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, VT.getVectorNumElements() == Operand.getValueType().getVectorNumElements()) && "Vector element count mismatch!"); - if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND) + + if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND || + OpOpcode == ISD::ANY_EXTEND) // (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x) return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0)); + + // (ext (trunx x)) -> x + if (OpOpcode == ISD::TRUNCATE) { + SDValue OpOp = Operand.getNode()->getOperand(0); + if (OpOp.getValueType() == VT) + return OpOp; + } break; case ISD::TRUNCATE: assert(VT.isInteger() && Operand.getValueType().isInteger() && @@ -2622,7 +2625,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N1.getOpcode() == ISD::BUILD_VECTOR && N2.getOpcode() == ISD::BUILD_VECTOR) { SmallVector Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); - Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } break; @@ -3011,7 +3014,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. ConstantSDNode *N1C = dyn_cast(N1.getNode()); - ConstantSDNode *N2C = dyn_cast(N2.getNode()); switch (Opcode) { case ISD::CONCAT_VECTORS: // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to @@ -3020,8 +3022,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, N2.getOpcode() == ISD::BUILD_VECTOR && N3.getOpcode() == ISD::BUILD_VECTOR) { SmallVector Elts(N1.getNode()->op_begin(), N1.getNode()->op_end()); - Elts.insert(Elts.end(), N2.getNode()->op_begin(), N2.getNode()->op_end()); - Elts.insert(Elts.end(), N3.getNode()->op_begin(), N3.getNode()->op_end()); + Elts.append(N2.getNode()->op_begin(), N2.getNode()->op_end()); + Elts.append(N3.getNode()->op_begin(), N3.getNode()->op_end()); return getNode(ISD::BUILD_VECTOR, DL, VT, &Elts[0], Elts.size()); } break; @@ -3041,14 +3043,6 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (N2 == N3) return N2; // select C, X, X -> X break; - case ISD::BRCOND: - if (N2C) { - if (N2C->getZExtValue()) // Unconditional branch - return getNode(ISD::BR, DL, MVT::Other, N1, N3); - else - return N1; // Never-taken branch - } - break; case ISD::VECTOR_SHUFFLE: llvm_unreachable("should use getVectorShuffle constructor!"); break; @@ -3267,6 +3261,15 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, if (VT.bitsGT(LVT)) VT = LVT; } + + // If we're optimizing for size, and there is a limit, bump the maximum number + // of operations inserted down to 4. This is a wild guess that approximates + // the size of a call to memcpy or memset (3 arguments + call). + if (Limit != ~0U) { + const Function *F = DAG.getMachineFunction().getFunction(); + if (F->hasFnAttr(Attribute::OptimizeForSize)) + Limit = 4; + } unsigned NumMemOps = 0; while (Size != 0) { @@ -3321,9 +3324,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, std::string Str; bool CopyFromStr = isMemSrcFromString(Src, Str); bool isZeroStr = CopyFromStr && Str.empty(); - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemcpy(); + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemcpy(); + if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), @@ -3368,7 +3370,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // FIXME does the case above also need this? EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); - Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, + Value = DAG.getExtLoad(ISD::EXTLOAD, NVT, dl, Chain, getMemBasePlusOffset(Src, SrcOff, DAG), SrcSV, SrcSVOff + SrcOff, VT, isVol, false, MinAlign(SrcAlign, SrcOff)); @@ -3401,9 +3403,6 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // below a certain threshold. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); std::vector MemOps; - uint64_t Limit = -1ULL; - if (!AlwaysInline) - Limit = TLI.getMaxStoresPerMemmove(); bool DstAlignCanChange = false; MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); FrameIndexSDNode *FI = dyn_cast(Dst); @@ -3412,6 +3411,7 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned SrcAlign = DAG.InferPtrAlignment(Src); if (Align > SrcAlign) SrcAlign = Align; + unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), @@ -3895,8 +3895,8 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, EVT VT, SDValue Chain, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, @@ -3919,12 +3919,12 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, MachineMemOperand *MMO = MF.getMachineMemOperand(SV, Flags, SVOffset, MemVT.getStoreSize(), Alignment); - return getLoad(AM, dl, ExtType, VT, Chain, Ptr, Offset, MemVT, MMO); + return getLoad(AM, ExtType, VT, dl, Chain, Ptr, Offset, MemVT, MMO); } SDValue -SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl, - ISD::LoadExtType ExtType, EVT VT, SDValue Chain, +SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, + EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO) { if (VT == MemVT) { @@ -3974,18 +3974,18 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, bool isVolatile, bool isNonTemporal, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, dl, ISD::NON_EXTLOAD, VT, Chain, Ptr, Undef, + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, SV, SVOffset, VT, isVolatile, isNonTemporal, Alignment); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, const Value *SV, int SVOffset, EVT MemVT, bool isVolatile, bool isNonTemporal, unsigned Alignment) { SDValue Undef = getUNDEF(Ptr.getValueType()); - return getLoad(ISD::UNINDEXED, dl, ExtType, VT, Chain, Ptr, Undef, + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, SV, SVOffset, MemVT, isVolatile, isNonTemporal, Alignment); } @@ -3995,7 +3995,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, LoadSDNode *LD = cast(OrigLoad); assert(LD->getOffset().getOpcode() == ISD::UNDEF && "Load is already a indexed load!"); - return getLoad(AM, dl, LD->getExtensionType(), OrigLoad.getValueType(), + return getLoad(AM, LD->getExtensionType(), OrigLoad.getValueType(), dl, LD->getChain(), Base, Offset, LD->getSrcValue(), LD->getSrcValueOffset(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -4141,9 +4141,10 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr, - SDValue SV) { - SDValue Ops[] = { Chain, Ptr, SV }; - return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 3); + SDValue SV, + unsigned Align) { + SDValue Ops[] = { Chain, Ptr, SV, getTargetConstant(Align, MVT::i32) }; + return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, @@ -4425,17 +4426,16 @@ SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { /// already exists. If the resultant node does not exist in the DAG, the /// input node is returned. As a degenerate case, if you specify the same /// input operands as the node already has, the input node is returned. -SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op) { assert(N->getNumOperands() == 1 && "Update with wrong number of operands"); // Check to see if there is no change. - if (Op == N->getOperand(0)) return InN; + if (Op == N->getOperand(0)) return N; // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Op, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4447,22 +4447,20 @@ SDValue SelectionDAG::UpdateNodeOperands(SDValue InN, SDValue Op) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG::UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2) { assert(N->getNumOperands() == 2 && "Update with wrong number of operands"); // Check to see if there is no change. if (Op1 == N->getOperand(0) && Op2 == N->getOperand(1)) - return InN; // No operands changed, just return the input node. + return N; // No operands changed, just return the input node. // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Op1, Op2, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4477,32 +4475,31 @@ UpdateNodeOperands(SDValue InN, SDValue Op1, SDValue Op2) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, SDValue Op3) { +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3) { SDValue Ops[] = { Op1, Op2, Op3 }; return UpdateNodeOperands(N, Ops, 3); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4) { SDValue Ops[] = { Op1, Op2, Op3, Op4 }; return UpdateNodeOperands(N, Ops, 4); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue N, SDValue Op1, SDValue Op2, +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, SDValue Op1, SDValue Op2, SDValue Op3, SDValue Op4, SDValue Op5) { SDValue Ops[] = { Op1, Op2, Op3, Op4, Op5 }; return UpdateNodeOperands(N, Ops, 5); } -SDValue SelectionDAG:: -UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { - SDNode *N = InN.getNode(); +SDNode *SelectionDAG:: +UpdateNodeOperands(SDNode *N, const SDValue *Ops, unsigned NumOps) { assert(N->getNumOperands() == NumOps && "Update with wrong number of operands"); @@ -4516,12 +4513,12 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { } // No operands changed, just return the input node. - if (!AnyChange) return InN; + if (!AnyChange) return N; // See if the modified node already exists. void *InsertPos = 0; if (SDNode *Existing = FindModifiedNodeSlot(N, Ops, NumOps, InsertPos)) - return SDValue(Existing, InN.getResNo()); + return Existing; // Nope it doesn't. Remove the node from its current place in the maps. if (InsertPos) @@ -4535,7 +4532,7 @@ UpdateNodeOperands(SDValue InN, const SDValue *Ops, unsigned NumOps) { // If this gets put into a CSE map, add it. if (InsertPos) CSEMap.InsertNode(N, InsertPos); - return InN; + return N; } /// DropOperands - Release the operands and set this node to have @@ -5378,9 +5375,10 @@ HandleSDNode::~HandleSDNode() { DropOperands(); } -GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA, +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL, + const GlobalValue *GA, EVT VT, int64_t o, unsigned char TF) - : SDNode(Opc, DebugLoc(), getSDVTList(VT)), Offset(o), TargetFlags(TF) { + : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } @@ -5669,13 +5667,16 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; case ISD::FCOS: return "fcos"; - case ISD::FPOWI: return "fpowi"; - case ISD::FPOW: return "fpow"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FEXP: return "fexp"; + case ISD::FEXP2: return "fexp2"; + case ISD::FLOG: return "flog"; + case ISD::FLOG2: return "flog2"; + case ISD::FLOG10: return "flog10"; // Binary operators case ISD::ADD: return "add"; @@ -5706,7 +5707,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FREM: return "frem"; case ISD::FCOPYSIGN: return "fcopysign"; case ISD::FGETSIGN: return "fgetsign"; + case ISD::FPOW: return "fpow"; + case ISD::FPOWI: return "fpowi"; case ISD::SETCC: return "setcc"; case ISD::VSETCC: return "vsetcc"; case ISD::SELECT: return "select"; @@ -6260,23 +6263,6 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo(); unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx), FrameOffset); - if (MFI.isFixedObjectIndex(FrameIdx)) { - int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset; - - // The alignment of the frame index can be determined from its offset from - // the incoming frame position. If the frame object is at offset 32 and - // the stack is guaranteed to be 16-byte aligned, then we know that the - // object is 16-byte aligned. - unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment(); - unsigned Align = MinAlign(ObjectOffset, StackAlign); - - // Finally, the frame object itself may have a known alignment. Factor - // the alignment + offset into a new alignment. For example, if we know - // the FI is 8 byte aligned, but the pointer is 4 off, we really have a - // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte - // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc. - return std::max(Align, FIInfoAlign); - } return FIInfoAlign; } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index fbe601f2db09..d323c163c143 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "isel" #include "SDNodeDbgValue.h" #include "SelectionDAGBuilder.h" -#include "FunctionLoweringInfo.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -32,6 +31,7 @@ #include "llvm/Module.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/MachineFunction.h" @@ -70,113 +70,6 @@ LimitFPPrecision("limit-float-precision", cl::location(LimitFloatPrecision), cl::init(0)); -namespace { - /// RegsForValue - This struct represents the registers (physical or virtual) - /// that a particular set of values is assigned, and the type information - /// about the value. The most common situation is to represent one value at a - /// time, but struct or array values are handled element-wise as multiple - /// values. The splitting of aggregates is performed recursively, so that we - /// never have aggregate-typed registers. The values at this point do not - /// necessarily have legal types, so each value may require one or more - /// registers of some legal type. - /// - struct RegsForValue { - /// TLI - The TargetLowering object. - /// - const TargetLowering *TLI; - - /// ValueVTs - The value types of the values, which may not be legal, and - /// may need be promoted or synthesized from one or more registers. - /// - SmallVector ValueVTs; - - /// RegVTs - The value types of the registers. This is the same size as - /// ValueVTs and it records, for each value, what the type of the assigned - /// register or registers are. (Individual values are never synthesized - /// from more than one type of register.) - /// - /// With virtual registers, the contents of RegVTs is redundant with TLI's - /// getRegisterType member function, however when with physical registers - /// it is necessary to have a separate record of the types. - /// - SmallVector RegVTs; - - /// Regs - This list holds the registers assigned to the values. - /// Each legal or promoted value requires one register, and each - /// expanded value requires multiple registers. - /// - SmallVector Regs; - - RegsForValue() : TLI(0) {} - - RegsForValue(const TargetLowering &tli, - const SmallVector ®s, - EVT regvt, EVT valuevt) - : TLI(&tli), ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} - RegsForValue(const TargetLowering &tli, - const SmallVector ®s, - const SmallVector ®vts, - const SmallVector &valuevts) - : TLI(&tli), ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} - RegsForValue(LLVMContext &Context, const TargetLowering &tli, - unsigned Reg, const Type *Ty) : TLI(&tli) { - ComputeValueVTs(tli, Ty, ValueVTs); - - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(Context, ValueVT); - EVT RegisterVT = TLI->getRegisterType(Context, ValueVT); - for (unsigned i = 0; i != NumRegs; ++i) - Regs.push_back(Reg + i); - RegVTs.push_back(RegisterVT); - Reg += NumRegs; - } - } - - /// areValueTypesLegal - Return true if types of all the values are legal. - bool areValueTypesLegal() { - for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT RegisterVT = RegVTs[Value]; - if (!TLI->isTypeLegal(RegisterVT)) - return false; - } - return true; - } - - - /// append - Add the specified values to this one. - void append(const RegsForValue &RHS) { - TLI = RHS.TLI; - ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); - RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); - Regs.append(RHS.Regs.begin(), RHS.Regs.end()); - } - - - /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from - /// this value and returns the result as a ValueVTs value. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the - /// specified value into the registers specified by this object. This uses - /// Chain/Flag as the input and updates them for the output Chain/Flag. - /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const; - - /// AddInlineAsmOperands - Add this value to the specified inlineasm node - /// operand list. This adds the code marker, matching input operand index - /// (if applicable), and includes the number of values added into it. - void AddInlineAsmOperands(unsigned Kind, - bool HasMatching, unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector &Ops) const; - }; -} - /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type /// larger then ValueVT then AssertOp can be used to specify whether the extra @@ -528,6 +421,268 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, } } +namespace { + /// RegsForValue - This struct represents the registers (physical or virtual) + /// that a particular set of values is assigned, and the type information + /// about the value. The most common situation is to represent one value at a + /// time, but struct or array values are handled element-wise as multiple + /// values. The splitting of aggregates is performed recursively, so that we + /// never have aggregate-typed registers. The values at this point do not + /// necessarily have legal types, so each value may require one or more + /// registers of some legal type. + /// + struct RegsForValue { + /// ValueVTs - The value types of the values, which may not be legal, and + /// may need be promoted or synthesized from one or more registers. + /// + SmallVector ValueVTs; + + /// RegVTs - The value types of the registers. This is the same size as + /// ValueVTs and it records, for each value, what the type of the assigned + /// register or registers are. (Individual values are never synthesized + /// from more than one type of register.) + /// + /// With virtual registers, the contents of RegVTs is redundant with TLI's + /// getRegisterType member function, however when with physical registers + /// it is necessary to have a separate record of the types. + /// + SmallVector RegVTs; + + /// Regs - This list holds the registers assigned to the values. + /// Each legal or promoted value requires one register, and each + /// expanded value requires multiple registers. + /// + SmallVector Regs; + + RegsForValue() {} + + RegsForValue(const SmallVector ®s, + EVT regvt, EVT valuevt) + : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} + + RegsForValue(const SmallVector ®s, + const SmallVector ®vts, + const SmallVector &valuevts) + : ValueVTs(valuevts), RegVTs(regvts), Regs(regs) {} + + RegsForValue(LLVMContext &Context, const TargetLowering &tli, + unsigned Reg, const Type *Ty) { + ComputeValueVTs(tli, Ty, ValueVTs); + + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); + EVT RegisterVT = tli.getRegisterType(Context, ValueVT); + for (unsigned i = 0; i != NumRegs; ++i) + Regs.push_back(Reg + i); + RegVTs.push_back(RegisterVT); + Reg += NumRegs; + } + } + + /// areValueTypesLegal - Return true if types of all the values are legal. + bool areValueTypesLegal(const TargetLowering &TLI) { + for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT RegisterVT = RegVTs[Value]; + if (!TLI.isTypeLegal(RegisterVT)) + return false; + } + return true; + } + + /// append - Add the specified values to this one. + void append(const RegsForValue &RHS) { + ValueVTs.append(RHS.ValueVTs.begin(), RHS.ValueVTs.end()); + RegVTs.append(RHS.RegVTs.begin(), RHS.RegVTs.end()); + Regs.append(RHS.Regs.begin(), RHS.Regs.end()); + } + + /// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from + /// this value and returns the result as a ValueVTs value. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// getCopyToRegs - Emit a series of CopyToReg nodes that copies the + /// specified value into the registers specified by this object. This uses + /// Chain/Flag as the input and updates them for the output Chain/Flag. + /// If the Flag pointer is NULL, no flag is used. + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const; + + /// AddInlineAsmOperands - Add this value to the specified inlineasm node + /// operand list. This adds the code marker, matching input operand index + /// (if applicable), and includes the number of values added into it. + void AddInlineAsmOperands(unsigned Kind, + bool HasMatching, unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector &Ops) const; + }; +} + +/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from +/// this value and returns the result as a ValueVT value. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, + FunctionLoweringInfo &FuncInfo, + DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Assemble the legal parts into the final values. + SmallVector Values(ValueVTs.size()); + SmallVector Parts; + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + // Copy the legal parts from the registers. + EVT ValueVT = ValueVTs[Value]; + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + Parts.resize(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue P; + if (Flag == 0) { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); + } else { + P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); + *Flag = P.getValue(2); + } + + Chain = P.getValue(1); + + // If the source register was virtual and if we know something about it, + // add an assert node. + if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && + RegisterVT.isInteger() && !RegisterVT.isVector()) { + unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; + if (FuncInfo.LiveOutRegInfo.size() > SlotNo) { + const FunctionLoweringInfo::LiveOutInfo &LOI = + FuncInfo.LiveOutRegInfo[SlotNo]; + + unsigned RegSize = RegisterVT.getSizeInBits(); + unsigned NumSignBits = LOI.NumSignBits; + unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); + + // FIXME: We capture more information than the dag can represent. For + // now, just use the tightest assertzext/assertsext possible. + bool isSExt = true; + EVT FromVT(MVT::Other); + if (NumSignBits == RegSize) + isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 + else if (NumZeroBits >= RegSize-1) + isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 + else if (NumSignBits > RegSize-8) + isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 + else if (NumZeroBits >= RegSize-8) + isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 + else if (NumSignBits > RegSize-16) + isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 + else if (NumZeroBits >= RegSize-16) + isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 + else if (NumSignBits > RegSize-32) + isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 + else if (NumZeroBits >= RegSize-32) + isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 + + if (FromVT != MVT::Other) + P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, + RegisterVT, P, DAG.getValueType(FromVT)); + } + } + + Parts[i] = P; + } + + Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), + NumRegs, RegisterVT, ValueVT); + Part += NumRegs; + Parts.clear(); + } + + return DAG.getNode(ISD::MERGE_VALUES, dl, + DAG.getVTList(&ValueVTs[0], ValueVTs.size()), + &Values[0], ValueVTs.size()); +} + +/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the +/// specified value into the registers specified by this object. This uses +/// Chain/Flag as the input and updates them for the output Chain/Flag. +/// If the Flag pointer is NULL, no flag is used. +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + SDValue &Chain, SDValue *Flag) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Get the list of the values's legal parts. + unsigned NumRegs = Regs.size(); + SmallVector Parts(NumRegs); + for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { + EVT ValueVT = ValueVTs[Value]; + unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); + EVT RegisterVT = RegVTs[Value]; + + getCopyToParts(DAG, dl, + Val.getValue(Val.getResNo() + Value), + &Parts[Part], NumParts, RegisterVT); + Part += NumParts; + } + + // Copy the parts into the registers. + SmallVector Chains(NumRegs); + for (unsigned i = 0; i != NumRegs; ++i) { + SDValue Part; + if (Flag == 0) { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); + } else { + Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); + *Flag = Part.getValue(1); + } + + Chains[i] = Part.getValue(0); + } + + if (NumRegs == 1 || Flag) + // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is + // flagged to it. That is the CopyToReg nodes and the user are considered + // a single scheduling unit. If we create a TokenFactor and return it as + // chain, then the TokenFactor is both a predecessor (operand) of the + // user as well as a successor (the TF operands are flagged to the user). + // c1, f1 = CopyToReg + // c2, f2 = CopyToReg + // c3 = TokenFactor c1, c2 + // ... + // = op c3, ..., f2 + Chain = Chains[NumRegs-1]; + else + Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); +} + +/// AddInlineAsmOperands - Add this value to the specified inlineasm node +/// operand list. This adds the code marker and includes the number of +/// values added into it. +void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, + unsigned MatchingIdx, + SelectionDAG &DAG, + std::vector &Ops) const { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); + if (HasMatching) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); + SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); + Ops.push_back(Res); + + for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { + unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); + EVT RegisterVT = RegVTs[Value]; + for (unsigned i = 0; i != NumRegs; ++i) { + assert(Reg < Regs.size() && "Mismatch in # registers expected"); + Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); + } + } +} void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { AA = &aa; @@ -543,6 +698,7 @@ void SelectionDAGBuilder::init(GCFunctionInfo *gfi, AliasAnalysis &aa) { /// consumed. void SelectionDAGBuilder::clear() { NodeMap.clear(); + UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); CurDebugLoc = DebugLoc(); @@ -649,27 +805,63 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { } } +// getValue - Return an SDValue for the given Value. SDValue SelectionDAGBuilder::getValue(const Value *V) { + // If we already have an SDValue for this value, use it. It's important + // to do this first, so that we don't create a CopyFromReg if we already + // have a regular SDValue. SDValue &N = NodeMap[V]; if (N.getNode()) return N; + // If there's a virtual register allocated and initialized for this + // value, use it. + DenseMap::iterator It = FuncInfo.ValueMap.find(V); + if (It != FuncInfo.ValueMap.end()) { + unsigned InReg = It->second; + RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + SDValue Chain = DAG.getEntryNode(); + return N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); + } + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + return Val; +} + +/// getNonRegisterValue - Return an SDValue for the given Value, but +/// don't look in FuncInfo.ValueMap for a virtual register. +SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { + // If we already have an SDValue for this value, use it. + SDValue &N = NodeMap[V]; + if (N.getNode()) return N; + + // Otherwise create a new SDValue and remember it. + SDValue Val = getValueImpl(V); + NodeMap[V] = Val; + return Val; +} + +/// getValueImpl - Helper function for getValue and getMaterializedValue. +/// Create an SDValue for the given value. +SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { if (const Constant *C = dyn_cast(V)) { EVT VT = TLI.getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) - return N = DAG.getConstant(*CI, VT); + return DAG.getConstant(*CI, VT); if (const GlobalValue *GV = dyn_cast(C)) - return N = DAG.getGlobalAddress(GV, VT); + return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); if (isa(C)) - return N = DAG.getConstant(0, TLI.getPointerTy()); + return DAG.getConstant(0, TLI.getPointerTy()); if (const ConstantFP *CFP = dyn_cast(C)) - return N = DAG.getConstantFP(*CFP, VT); + return DAG.getConstantFP(*CFP, VT); if (isa(C) && !V->getType()->isAggregateType()) - return N = DAG.getUNDEF(VT); + return DAG.getUNDEF(VT); if (const ConstantExpr *CE = dyn_cast(C)) { visit(CE->getOpcode(), *CE); @@ -757,82 +949,25 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); } - unsigned InReg = FuncInfo.ValueMap[V]; - assert(InReg && "Value not in map!"); - - RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); - SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL); -} - -/// Get the EVTs and ArgFlags collections that represent the legalized return -/// type of the given function. This does not require a DAG or a return value, -/// and is suitable for use before any DAGs for the function are constructed. -static void getReturnInfo(const Type* ReturnType, - Attributes attr, SmallVectorImpl &OutVTs, - SmallVectorImpl &OutFlags, - const TargetLowering &TLI, - SmallVectorImpl *Offsets = 0) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; - unsigned Offset = 0; - - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - if (attr & Attribute::SExt) - ExtendKind = ISD::SIGN_EXTEND; - else if (attr & Attribute::ZExt) - ExtendKind = ISD::ZERO_EXTEND; - - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } - - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( - PartVT.getTypeForEVT(ReturnType->getContext())); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr & Attribute::InReg) - Flags.setInReg(); - - // Propagate extension type if any - if (attr & Attribute::SExt) - Flags.setSExt(); - else if (attr & Attribute::ZExt) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) { - OutVTs.push_back(PartVT); - OutFlags.push_back(Flags); - if (Offsets) - { - Offsets->push_back(Offset); - Offset += PartSize; - } - } + // If this is an instruction which fast-isel has deferred, select it now. + if (const Instruction *Inst = dyn_cast(V)) { + unsigned InReg = FuncInfo.InitializeRegForValue(Inst); + RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + SDValue Chain = DAG.getEntryNode(); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL); } + + llvm_unreachable("Can't get register for value!"); + return SDValue(); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SDValue Chain = getControlRoot(); SmallVector Outs; - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); + SmallVector OutVals; - if (!FLI.CanLowerReturn) { - unsigned DemoteReg = FLI.DemoteRegister; + if (!FuncInfo.CanLowerReturn) { + unsigned DemoteReg = FuncInfo.DemoteRegister; const Function *F = I.getParent()->getParent(); // Emit a store of the return value through the virtual register. @@ -908,8 +1043,11 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { else if (F->paramHasAttr(0, Attribute::ZExt)) Flags.setZExt(); - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true)); + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), + /*isfixed=*/true)); + OutVals.push_back(Parts[i]); + } } } } @@ -918,7 +1056,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, - Outs, getCurDebugLoc(), DAG); + Outs, OutVals, getCurDebugLoc(), DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1119,7 +1257,7 @@ SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases){ } void SelectionDAGBuilder::visitBr(const BranchInst &I) { - MachineBasicBlock *BrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *BrMBB = FuncInfo.MBB; // Update machine-CFG edges. MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1269,18 +1407,10 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MVT::Other, getControlRoot(), Cond, DAG.getBasicBlock(CB.TrueBB)); - // If the branch was constant folded, fix up the CFG. - if (BrCond.getOpcode() == ISD::BR) { - SwitchBB->removeSuccessor(CB.FalseBB); - } else { - // Otherwise, go ahead and insert the false branch. - if (BrCond == getControlRoot()) - SwitchBB->removeSuccessor(CB.TrueBB); - - if (CB.FalseBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, - DAG.getBasicBlock(CB.FalseBB)); - } + // Insert the false branch. + if (CB.FalseBB != NextBlock) + BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond, + DAG.getBasicBlock(CB.FalseBB)); DAG.setRoot(BrCond); } @@ -1319,7 +1449,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // therefore require extension or truncating. SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); - unsigned JumpTableReg = FuncInfo.MakeReg(TLI.getPointerTy()); + unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; @@ -1370,7 +1500,7 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, SDValue ShiftOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); - B.Reg = FuncInfo.MakeReg(TLI.getPointerTy()); + B.Reg = FuncInfo.CreateReg(TLI.getPointerTy()); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), B.Reg, ShiftOp); @@ -1402,29 +1532,41 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { - // Make desired shift SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, TLI.getPointerTy()); - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), - TLI.getPointerTy(), - DAG.getConstant(1, TLI.getPointerTy()), - ShiftOp); - - // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), - TLI.getPointerTy(), SwitchVal, - DAG.getConstant(B.Mask, TLI.getPointerTy())); - SDValue AndCmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(AndOp.getValueType()), - AndOp, DAG.getConstant(0, TLI.getPointerTy()), - ISD::SETNE); + SDValue Cmp; + if (CountPopulation_64(B.Mask) == 1) { + // Testing for a single bit; just compare the shift count with what it + // would need to be to shift a 1 bit in that position. + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(ShiftOp.getValueType()), + ShiftOp, + DAG.getConstant(CountTrailingZeros_64(B.Mask), + TLI.getPointerTy()), + ISD::SETEQ); + } else { + // Make desired shift + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), + TLI.getPointerTy(), + DAG.getConstant(1, TLI.getPointerTy()), + ShiftOp); + + // Emit bit tests and jumps + SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + TLI.getPointerTy(), SwitchVal, + DAG.getConstant(B.Mask, TLI.getPointerTy())); + Cmp = DAG.getSetCC(getCurDebugLoc(), + TLI.getSetCCResultType(AndOp.getValueType()), + AndOp, DAG.getConstant(0, TLI.getPointerTy()), + ISD::SETNE); + } SwitchBB->addSuccessor(B.TargetBB); SwitchBB->addSuccessor(NextMBB); SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), MVT::Other, getControlRoot(), - AndCmp, DAG.getBasicBlock(B.TargetBB)); + Cmp, DAG.getBasicBlock(B.TargetBB)); // Set NextBlock to be the MBB immediately after the current one, if any. // This is used to avoid emitting unnecessary branches to the next block. @@ -1441,7 +1583,7 @@ void SelectionDAGBuilder::visitBitTestCase(MachineBasicBlock* NextMBB, } void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { - MachineBasicBlock *InvokeMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *InvokeMBB = FuncInfo.MBB; // Retrieve successors. MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)]; @@ -1969,7 +2111,7 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, } void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { - MachineBasicBlock *SwitchMBB = FuncInfo.MBBMap[SI.getParent()]; + MachineBasicBlock *SwitchMBB = FuncInfo.MBB; // Figure out which block is immediately after the current one. MachineBasicBlock *NextBlock = 0; @@ -2035,7 +2177,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { } void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { - MachineBasicBlock *IndirectBrMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB; // Update machine-CFG edges with unique successors. SmallVector succs; @@ -2245,7 +2387,6 @@ void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } @@ -2254,7 +2395,6 @@ void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT SrcVT = N.getValueType(); EVT DestVT = TLI.getValueType(I.getType()); setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); } @@ -2579,7 +2719,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { // If this is a constant subscript, handle it quickly. if (const ConstantInt *CI = dyn_cast(Idx)) { - if (CI->getZExtValue() == 0) continue; + if (CI->isZero()) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); SDValue OffsVal; @@ -2643,12 +2783,13 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { SDValue AllocSize = getValue(I.getArraySize()); - AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), AllocSize.getValueType(), - AllocSize, - DAG.getConstant(TySize, AllocSize.getValueType())); - EVT IntPtr = TLI.getPointerTy(); - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + if (AllocSize.getValueType() != IntPtr) + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + + AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize, + DAG.getConstant(TySize, IntPtr)); // Handle alignment. If the requested alignment is less than or equal to // the stack alignment, ignore it. If the size is greater than or equal to @@ -2804,8 +2945,8 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, Ops.push_back(DAG.getConstant(Intrinsic, TLI.getPointerTy())); // Add all operands of the call to the operand list. - for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) { - SDValue Op = getValue(I.getOperand(i)); + for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { + SDValue Op = getValue(I.getArgOperand(i)); assert(TLI.isTypeLegal(Op.getValueType()) && "Intrinsic uses a non-legal type?"); Ops.push_back(Op); @@ -2910,11 +3051,11 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, SDValue Root = getRoot(); SDValue L = DAG.getAtomic(Op, getCurDebugLoc(), - getValue(I.getOperand(2)).getValueType().getSimpleVT(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), Root, - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - I.getOperand(1)); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + I.getArgOperand(0)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -2923,8 +3064,8 @@ SelectionDAGBuilder::implVisitBinaryAtomic(const CallInst& I, // implVisitAluOverflow - Lower arithmetic overflow instrinsics. const char * SelectionDAGBuilder::implVisitAluOverflow(const CallInst &I, ISD::NodeType Op) { - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); @@ -2938,9 +3079,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3050,8 +3191,8 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FEXP, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3064,9 +3205,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. @@ -3160,8 +3301,8 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3174,9 +3315,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Get the exponent. @@ -3269,8 +3410,8 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG2, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3283,9 +3424,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. @@ -3371,8 +3512,8 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FLOG10, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3385,9 +3526,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue result; DebugLoc dl = getCurDebugLoc(); - if (getValue(I.getOperand(1)).getValueType() == MVT::f32 && + if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(1)); + SDValue Op = getValue(I.getArgOperand(0)); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); @@ -3485,8 +3626,8 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FEXP2, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0))); } setValue(&I, result); @@ -3497,12 +3638,12 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { void SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue result; - const Value *Val = I.getOperand(1); + const Value *Val = I.getArgOperand(0); DebugLoc dl = getCurDebugLoc(); bool IsExp10 = false; if (getValue(Val).getValueType() == MVT::f32 && - getValue(I.getOperand(2)).getValueType() == MVT::f32 && + getValue(I.getArgOperand(1)).getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (Constant *C = const_cast(dyn_cast(Val))) { if (ConstantFP *CFP = dyn_cast(C)) { @@ -3513,7 +3654,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { } if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getOperand(2)); + SDValue Op = getValue(I.getArgOperand(1)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3618,9 +3759,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { } else { // No special expansion. result = DAG.getNode(ISD::FPOW, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1))); } setValue(&I, result); @@ -3696,7 +3837,7 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const DbgValueInst &DI, if (DV.isInlinedFnArgument(MF.getFunction())) return false; - MachineBasicBlock *MBB = FuncInfo.MBBMap[DI.getParent()]; + MachineBasicBlock *MBB = FuncInfo.MBB; if (MBB != &MF.front()) return false; @@ -3750,11 +3891,11 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::frameaddress: setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: return "_setjmp"+!TLI.usesUnderscoreSetJmp(); @@ -3763,63 +3904,63 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast(I.getOperand(1)->getType())->getAddressSpace() + assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && - cast(I.getOperand(2)->getType())->getAddressSpace() + cast(I.getArgOperand(1)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast(I.getOperand(4))->getZExtValue(); - bool isVol = cast(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, - I.getOperand(1), 0, I.getOperand(2), 0)); + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } case Intrinsic::memset: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast(I.getOperand(1)->getType())->getAddressSpace() + assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast(I.getOperand(4))->getZExtValue(); - bool isVol = cast(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getOperand(1), 0)); + I.getArgOperand(0), 0)); return 0; } case Intrinsic::memmove: { // Assert for address < 256 since we support only user defined address // spaces. - assert(cast(I.getOperand(1)->getType())->getAddressSpace() + assert(cast(I.getArgOperand(0)->getType())->getAddressSpace() < 256 && - cast(I.getOperand(2)->getType())->getAddressSpace() + cast(I.getArgOperand(1)->getType())->getAddressSpace() < 256 && "Unknown address space"); - SDValue Op1 = getValue(I.getOperand(1)); - SDValue Op2 = getValue(I.getOperand(2)); - SDValue Op3 = getValue(I.getOperand(3)); - unsigned Align = cast(I.getOperand(4))->getZExtValue(); - bool isVol = cast(I.getOperand(5))->getZExtValue(); + SDValue Op1 = getValue(I.getArgOperand(0)); + SDValue Op2 = getValue(I.getArgOperand(1)); + SDValue Op3 = getValue(I.getArgOperand(2)); + unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + bool isVol = cast(I.getArgOperand(4))->getZExtValue(); // If the source and destination are known to not be aliases, we can // lower memmove as memcpy. uint64_t Size = -1ULL; if (ConstantSDNode *C = dyn_cast(Op3)) Size = C->getZExtValue(); - if (AA->alias(I.getOperand(1), Size, I.getOperand(2), Size) == + if (AA->alias(I.getArgOperand(0), Size, I.getArgOperand(1), Size) == AliasAnalysis::NoAlias) { DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - false, I.getOperand(1), 0, I.getOperand(2), 0)); + false, I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, - I.getOperand(1), 0, I.getOperand(2), 0)); + I.getArgOperand(0), 0, I.getArgOperand(1), 0)); return 0; } case Intrinsic::dbg_declare: { @@ -3908,7 +4049,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } else { bool createUndef = false; // FIXME : Why not use getValue() directly ? - SDValue &N = NodeMap[V]; + SDValue N = NodeMap[V]; + if (!N.getNode() && isa(V)) + // Check unused arguments map. + N = UnusedArgNodeMap[V]; if (N.getNode()) { if (!EmitFuncArgumentDbgValue(DI, V, Variable, Offset, N)) { SDV = DAG.getDbgValue(Variable, N.getNode(), @@ -3956,7 +4100,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_exception: { // Insert the EXCEPTIONADDR instruction. - assert(FuncInfo.MBBMap[I.getParent()]->isLandingPad() && + assert(FuncInfo.MBB->isLandingPad() && "Call to eh.exception not in landing pad!"); SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[1]; @@ -3968,7 +4112,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_selector: { - MachineBasicBlock *CallMBB = FuncInfo.MBBMap[I.getParent()]; + MachineBasicBlock *CallMBB = FuncInfo.MBB; MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); if (CallMBB->isLandingPad()) AddCatchInfo(I, &MMI, CallMBB); @@ -3978,13 +4122,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { #endif // FIXME: Mark exception selector register as live in. Hack for PR1508. unsigned Reg = TLI.getExceptionSelectorRegister(); - if (Reg) FuncInfo.MBBMap[I.getParent()]->addLiveIn(Reg); + if (Reg) FuncInfo.MBB->addLiveIn(Reg); } // Insert the EHSELECTION instruction. SDVTList VTs = DAG.getVTList(TLI.getPointerTy(), MVT::Other); SDValue Ops[2]; - Ops[0] = getValue(I.getOperand(1)); + Ops[0] = getValue(I.getArgOperand(0)); Ops[1] = getRoot(); SDValue Op = DAG.getNode(ISD::EHSELECTION, dl, VTs, Ops, 2); DAG.setRoot(Op.getValue(1)); @@ -3994,7 +4138,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_typeid_for: { // Find the type id for the given typeinfo. - GlobalVariable *GV = ExtractTypeInfo(I.getOperand(1)); + GlobalVariable *GV = ExtractTypeInfo(I.getArgOperand(0)); unsigned TypeID = DAG.getMachineFunction().getMMI().getTypeIDFor(GV); Res = DAG.getConstant(TypeID, MVT::i32); setValue(&I, Res); @@ -4007,15 +4151,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, MVT::Other, getControlRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2)))); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); return 0; case Intrinsic::eh_unwind_init: DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return 0; case Intrinsic::eh_dwarf_cfa: { - EVT VT = getValue(I.getOperand(1)).getValueType(); - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), dl, + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, TLI.getPointerTy()); SDValue Offset = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), @@ -4031,7 +4174,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_callsite: { MachineModuleInfo &MMI = DAG.getMachineFunction().getMMI(); - ConstantInt *CI = dyn_cast(I.getOperand(1)); + ConstantInt *CI = dyn_cast(I.getArgOperand(0)); assert(CI && "Non-constant call site value in eh.sjlj.callsite!"); assert(MMI.getCurrentCallSite() == 0 && "Overlapping call sites!"); @@ -4040,13 +4183,13 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::eh_sjlj_setjmp: { setValue(&I, DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, MVT::i32, getRoot(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; } case Intrinsic::eh_sjlj_longjmp: { DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, getRoot(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)))); return 0; } @@ -4072,34 +4215,34 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } EVT DestVT = TLI.getValueType(I.getType()); - const Value *Op1 = I.getOperand(1); + const Value *Op1 = I.getArgOperand(0); Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), - getValue(I.getOperand(2)), - getValue(I.getOperand(3)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), Code); setValue(&I, Res); return 0; } case Intrinsic::sqrt: setValue(&I, DAG.getNode(ISD::FSQRT, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::powi: - setValue(&I, ExpandPowI(dl, getValue(I.getOperand(1)), - getValue(I.getOperand(2)), DAG)); + setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG)); return 0; case Intrinsic::sin: setValue(&I, DAG.getNode(ISD::FSIN, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::cos: setValue(&I, DAG.getNode(ISD::FCOS, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::log: visitLog(I); @@ -4121,14 +4264,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::convert_to_fp16: setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, - MVT::i16, getValue(I.getOperand(1)))); + MVT::i16, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::convert_from_fp16: setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, - MVT::f32, getValue(I.getOperand(1)))); + MVT::f32, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::pcmarker: { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); return 0; } @@ -4143,23 +4286,23 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::bswap: setValue(&I, DAG.getNode(ISD::BSWAP, dl, - getValue(I.getOperand(1)).getValueType(), - getValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)))); return 0; case Intrinsic::cttz: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTTZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTLZ, dl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { - SDValue Arg = getValue(I.getOperand(1)); + SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); return 0; @@ -4173,7 +4316,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::stackrestore: { - Res = getValue(I.getOperand(1)); + Res = getValue(I.getArgOperand(0)); DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); return 0; } @@ -4183,8 +4326,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { MachineFrameInfo *MFI = MF.getFrameInfo(); EVT PtrTy = TLI.getPointerTy(); - SDValue Src = getValue(I.getOperand(1)); // The guard's value. - AllocaInst *Slot = cast(I.getOperand(2)); + SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. + AllocaInst *Slot = cast(I.getArgOperand(1)); int FI = FuncInfo.StaticAllocaMap[Slot]; MFI->setStackProtectorIndex(FI); @@ -4201,14 +4344,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::objectsize: { // If we don't know by now, we're never going to know. - ConstantInt *CI = dyn_cast(I.getOperand(2)); + ConstantInt *CI = dyn_cast(I.getArgOperand(1)); assert(CI && "Non-constant type in __builtin_object_size?"); - SDValue Arg = getValue(I.getOperand(0)); + SDValue Arg = getValue(I.getCalledValue()); EVT Ty = Arg.getValueType(); - if (CI->getZExtValue() == 0) + if (CI->isZero()) Res = DAG.getConstant(-1ULL, Ty); else Res = DAG.getConstant(0, Ty); @@ -4221,14 +4364,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; case Intrinsic::init_trampoline: { - const Function *F = cast(I.getOperand(2)->stripPointerCasts()); + const Function *F = cast(I.getArgOperand(1)->stripPointerCasts()); SDValue Ops[6]; Ops[0] = getRoot(); - Ops[1] = getValue(I.getOperand(1)); - Ops[2] = getValue(I.getOperand(2)); - Ops[3] = getValue(I.getOperand(3)); - Ops[4] = DAG.getSrcValue(I.getOperand(1)); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); + Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); Res = DAG.getNode(ISD::TRAMPOLINE, dl, @@ -4241,8 +4384,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::gcroot: if (GFI) { - const Value *Alloca = I.getOperand(1); - const Constant *TypeMap = cast(I.getOperand(2)); + const Value *Alloca = I.getArgOperand(0); + const Constant *TypeMap = cast(I.getArgOperand(1)); FrameIndexSDNode *FI = cast(getValue(Alloca).getNode()); GFI->addStackRoot(FI->getIndex(), TypeMap); @@ -4274,9 +4417,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::prefetch: { SDValue Ops[4]; Ops[0] = getRoot(); - Ops[1] = getValue(I.getOperand(1)); - Ops[2] = getValue(I.getOperand(2)); - Ops[3] = getValue(I.getOperand(3)); + Ops[1] = getValue(I.getArgOperand(0)); + Ops[2] = getValue(I.getArgOperand(1)); + Ops[3] = getValue(I.getArgOperand(2)); DAG.setRoot(DAG.getNode(ISD::PREFETCH, dl, MVT::Other, &Ops[0], 4)); return 0; } @@ -4285,7 +4428,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[6]; Ops[0] = getRoot(); for (int x = 1; x < 6; ++x) - Ops[x] = getValue(I.getOperand(x)); + Ops[x] = getValue(I.getArgOperand(x - 1)); DAG.setRoot(DAG.getNode(ISD::MEMBARRIER, dl, MVT::Other, &Ops[0], 6)); return 0; @@ -4294,12 +4437,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Root = getRoot(); SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, getCurDebugLoc(), - getValue(I.getOperand(2)).getValueType().getSimpleVT(), + getValue(I.getArgOperand(1)).getValueType().getSimpleVT(), Root, - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - getValue(I.getOperand(3)), - I.getOperand(1)); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + getValue(I.getArgOperand(2)), + I.getArgOperand(0)); setValue(&I, L); DAG.setRoot(L.getValue(1)); return 0; @@ -4353,14 +4496,13 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Args.reserve(CS.arg_size()); // Check whether the function can return without sret-demotion. - SmallVector OutVTs; - SmallVector OutsFlags; + SmallVector Outs; SmallVector Offsets; - getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI, &Offsets); + GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), + Outs, TLI, &Offsets); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - FTy->isVarArg(), OutVTs, OutsFlags, DAG); + FTy->isVarArg(), Outs, FTy->getContext()); SDValue DemoteStackSlot; @@ -4453,7 +4595,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, ComputeValueVTs(TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; - unsigned NumValues = OutVTs.size(); + unsigned NumValues = Outs.size(); SmallVector Values(NumValues); SmallVector Chains(NumValues); @@ -4461,7 +4603,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second, + SDValue L = DAG.getLoad(Outs[i].VT, getCurDebugLoc(), Result.second, Add, NULL, Offsets[i], false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); @@ -4580,16 +4722,16 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, /// lowered like a normal call. bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) - if (I.getNumOperands() != 4) + if (I.getNumArgOperands() != 3) return false; - const Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); + const Value *LHS = I.getArgOperand(0), *RHS = I.getArgOperand(1); if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() || - !I.getOperand(3)->getType()->isIntegerTy() || + !I.getArgOperand(2)->getType()->isIntegerTy() || !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast(I.getOperand(3)); + const ConstantInt *Size = dyn_cast(I.getArgOperand(2)); // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 @@ -4656,11 +4798,16 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { void SelectionDAGBuilder::visitCall(const CallInst &I) { + // Handle inline assembly differently. + if (isa(I.getCalledValue())) { + visitInlineAsm(&I); + return; + } + const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { if (F->isDeclaration()) { - const TargetIntrinsicInfo *II = TM.getIntrinsicInfo(); - if (II) { + if (const TargetIntrinsicInfo *II = TM.getIntrinsicInfo()) { if (unsigned IID = II->getIntrinsicID(F)) { RenameFn = visitIntrinsicCall(I, IID); if (!RenameFn) @@ -4679,51 +4826,51 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!F->hasLocalLinkage() && F->hasName()) { StringRef Name = F->getName(); if (Name == "copysign" || Name == "copysignf" || Name == "copysignl") { - if (I.getNumOperands() == 3 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && - I.getType() == I.getOperand(2)->getType()) { - SDValue LHS = getValue(I.getOperand(1)); - SDValue RHS = getValue(I.getOperand(2)); + if (I.getNumArgOperands() == 2 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && + I.getType() == I.getArgOperand(1)->getType()) { + SDValue LHS = getValue(I.getArgOperand(0)); + SDValue RHS = getValue(I.getArgOperand(1)); setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), LHS.getValueType(), LHS, RHS)); return; } } else if (Name == "fabs" || Name == "fabsf" || Name == "fabsl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType()) { - SDValue Tmp = getValue(I.getOperand(1)); + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType()) { + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FABS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sin" || Name == "sinf" || Name == "sinl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FSIN, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "cos" || Name == "cosf" || Name == "cosl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FCOS, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; } } else if (Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") { - if (I.getNumOperands() == 2 && // Basic sanity checks. - I.getOperand(1)->getType()->isFloatingPointTy() && - I.getType() == I.getOperand(1)->getType() && + if (I.getNumArgOperands() == 1 && // Basic sanity checks. + I.getArgOperand(0)->getType()->isFloatingPointTy() && + I.getType() == I.getArgOperand(0)->getType() && I.onlyReadsMemory()) { - SDValue Tmp = getValue(I.getOperand(1)); + SDValue Tmp = getValue(I.getArgOperand(0)); setValue(&I, DAG.getNode(ISD::FSQRT, getCurDebugLoc(), Tmp.getValueType(), Tmp)); return; @@ -4733,14 +4880,11 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { return; } } - } else if (isa(I.getOperand(0))) { - visitInlineAsm(&I); - return; } - + SDValue Callee; if (!RenameFn) - Callee = getValue(I.getOperand(0)); + Callee = getValue(I.getCalledValue()); else Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); @@ -4749,210 +4893,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { LowerCallTo(&I, Callee, I.isTailCall()); } -/// getCopyFromRegs - Emit a series of CopyFromReg nodes that copies from -/// this value and returns the result as a ValueVT value. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - // Assemble the legal parts into the final values. - SmallVector Values(ValueVTs.size()); - SmallVector Parts; - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - // Copy the legal parts from the registers. - EVT ValueVT = ValueVTs[Value]; - unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; - - Parts.resize(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue P; - if (Flag == 0) { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT); - } else { - P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag); - *Flag = P.getValue(2); - } - - Chain = P.getValue(1); - - // If the source register was virtual and if we know something about it, - // add an assert node. - if (TargetRegisterInfo::isVirtualRegister(Regs[Part+i]) && - RegisterVT.isInteger() && !RegisterVT.isVector()) { - unsigned SlotNo = Regs[Part+i]-TargetRegisterInfo::FirstVirtualRegister; - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); - if (FLI.LiveOutRegInfo.size() > SlotNo) { - FunctionLoweringInfo::LiveOutInfo &LOI = FLI.LiveOutRegInfo[SlotNo]; - - unsigned RegSize = RegisterVT.getSizeInBits(); - unsigned NumSignBits = LOI.NumSignBits; - unsigned NumZeroBits = LOI.KnownZero.countLeadingOnes(); - - // FIXME: We capture more information than the dag can represent. For - // now, just use the tightest assertzext/assertsext possible. - bool isSExt = true; - EVT FromVT(MVT::Other); - if (NumSignBits == RegSize) - isSExt = true, FromVT = MVT::i1; // ASSERT SEXT 1 - else if (NumZeroBits >= RegSize-1) - isSExt = false, FromVT = MVT::i1; // ASSERT ZEXT 1 - else if (NumSignBits > RegSize-8) - isSExt = true, FromVT = MVT::i8; // ASSERT SEXT 8 - else if (NumZeroBits >= RegSize-8) - isSExt = false, FromVT = MVT::i8; // ASSERT ZEXT 8 - else if (NumSignBits > RegSize-16) - isSExt = true, FromVT = MVT::i16; // ASSERT SEXT 16 - else if (NumZeroBits >= RegSize-16) - isSExt = false, FromVT = MVT::i16; // ASSERT ZEXT 16 - else if (NumSignBits > RegSize-32) - isSExt = true, FromVT = MVT::i32; // ASSERT SEXT 32 - else if (NumZeroBits >= RegSize-32) - isSExt = false, FromVT = MVT::i32; // ASSERT ZEXT 32 - - if (FromVT != MVT::Other) - P = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl, - RegisterVT, P, DAG.getValueType(FromVT)); - } - } - - Parts[i] = P; - } - - Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), - NumRegs, RegisterVT, ValueVT); - Part += NumRegs; - Parts.clear(); - } - - return DAG.getNode(ISD::MERGE_VALUES, dl, - DAG.getVTList(&ValueVTs[0], ValueVTs.size()), - &Values[0], ValueVTs.size()); -} - -/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the -/// specified value into the registers specified by this object. This uses -/// Chain/Flag as the input and updates them for the output Chain/Flag. -/// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, - SDValue &Chain, SDValue *Flag) const { - // Get the list of the values's legal parts. - unsigned NumRegs = Regs.size(); - SmallVector Parts(NumRegs); - for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT ValueVT = ValueVTs[Value]; - unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; - - getCopyToParts(DAG, dl, - Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT); - Part += NumParts; - } - - // Copy the parts into the registers. - SmallVector Chains(NumRegs); - for (unsigned i = 0; i != NumRegs; ++i) { - SDValue Part; - if (Flag == 0) { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]); - } else { - Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag); - *Flag = Part.getValue(1); - } - - Chains[i] = Part.getValue(0); - } - - if (NumRegs == 1 || Flag) - // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is - // flagged to it. That is the CopyToReg nodes and the user are considered - // a single scheduling unit. If we create a TokenFactor and return it as - // chain, then the TokenFactor is both a predecessor (operand) of the - // user as well as a successor (the TF operands are flagged to the user). - // c1, f1 = CopyToReg - // c2, f2 = CopyToReg - // c3 = TokenFactor c1, c2 - // ... - // = op c3, ..., f2 - Chain = Chains[NumRegs-1]; - else - Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Chains[0], NumRegs); -} - -/// AddInlineAsmOperands - Add this value to the specified inlineasm node -/// operand list. This adds the code marker and includes the number of -/// values added into it. -void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, - unsigned MatchingIdx, - SelectionDAG &DAG, - std::vector &Ops) const { - unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size()); - if (HasMatching) - Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx); - SDValue Res = DAG.getTargetConstant(Flag, MVT::i32); - Ops.push_back(Res); - - for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { - unsigned NumRegs = TLI->getNumRegisters(*DAG.getContext(), ValueVTs[Value]); - EVT RegisterVT = RegVTs[Value]; - for (unsigned i = 0; i != NumRegs; ++i) { - assert(Reg < Regs.size() && "Mismatch in # registers expected"); - Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); - } - } -} - -/// isAllocatableRegister - If the specified register is safe to allocate, -/// i.e. it isn't a stack pointer or some other special register, return the -/// register class for the register. Otherwise, return null. -static const TargetRegisterClass * -isAllocatableRegister(unsigned Reg, MachineFunction &MF, - const TargetLowering &TLI, - const TargetRegisterInfo *TRI) { - EVT FoundVT = MVT::Other; - const TargetRegisterClass *FoundRC = 0; - for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), - E = TRI->regclass_end(); RCI != E; ++RCI) { - EVT ThisVT = MVT::Other; - - const TargetRegisterClass *RC = *RCI; - // If none of the value types for this register class are valid, we - // can't use it. For example, 64-bit reg classes on 32-bit targets. - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (TLI.isTypeLegal(*I)) { - // If we have already found this register in a different register class, - // choose the one with the largest VT specified. For example, on - // PowerPC, we favor f64 register classes over f32. - if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { - ThisVT = *I; - break; - } - } - } - - if (ThisVT == MVT::Other) continue; - - // NOTE: This isn't ideal. In particular, this might allocate the - // frame pointer in functions that need it (due to them not being taken - // out of allocation, because a variable sized allocation hasn't been seen - // yet). This is a slight code pessimization, but should still work. - for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), - E = RC->allocation_order_end(MF); I != E; ++I) - if (*I == Reg) { - // We found a matching register class. Keep looking at others in case - // we find one with larger registers that this physreg is also in. - FoundRC = RC; - FoundVT = ThisVT; - break; - } - } - return FoundRC; -} - - namespace llvm { + /// AsmOperandInfo - This contains information for each constraint that we are /// lowering. class LLVM_LIBRARY_VISIBILITY SDISelAsmOperandInfo : @@ -5041,8 +4983,56 @@ private: Regs.insert(*Aliases); } }; + } // end llvm namespace. +/// isAllocatableRegister - If the specified register is safe to allocate, +/// i.e. it isn't a stack pointer or some other special register, return the +/// register class for the register. Otherwise, return null. +static const TargetRegisterClass * +isAllocatableRegister(unsigned Reg, MachineFunction &MF, + const TargetLowering &TLI, + const TargetRegisterInfo *TRI) { + EVT FoundVT = MVT::Other; + const TargetRegisterClass *FoundRC = 0; + for (TargetRegisterInfo::regclass_iterator RCI = TRI->regclass_begin(), + E = TRI->regclass_end(); RCI != E; ++RCI) { + EVT ThisVT = MVT::Other; + + const TargetRegisterClass *RC = *RCI; + // If none of the value types for this register class are valid, we + // can't use it. For example, 64-bit reg classes on 32-bit targets. + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (TLI.isTypeLegal(*I)) { + // If we have already found this register in a different register class, + // choose the one with the largest VT specified. For example, on + // PowerPC, we favor f64 register classes over f32. + if (FoundVT == MVT::Other || FoundVT.bitsLT(*I)) { + ThisVT = *I; + break; + } + } + } + + if (ThisVT == MVT::Other) continue; + + // NOTE: This isn't ideal. In particular, this might allocate the + // frame pointer in functions that need it (due to them not being taken + // out of allocation, because a variable sized allocation hasn't been seen + // yet). This is a slight code pessimization, but should still work. + for (TargetRegisterClass::iterator I = RC->allocation_order_begin(MF), + E = RC->allocation_order_end(MF); I != E; ++I) + if (*I == Reg) { + // We found a matching register class. Keep looking at others in case + // we find one with larger registers that this physreg is also in. + FoundRC = RC; + FoundVT = ThisVT; + break; + } + } + return FoundRC; +} /// GetRegistersForValue - Assign registers (virtual or physical) for the /// specified operand. We prefer to assign virtual registers, to allow the @@ -5154,7 +5144,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, } } - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; @@ -5172,7 +5162,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, for (; NumRegs; --NumRegs) Regs.push_back(RegInfo.createVirtualRegister(RC)); - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, RegVT, ValueVT); + OpInfo.AssignedRegs = RegsForValue(Regs, RegVT, ValueVT); return; } @@ -5215,7 +5205,7 @@ GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, for (unsigned i = RegStart; i != RegEnd; ++i) Regs.push_back(RegClassRegs[i]); - OpInfo.AssignedRegs = RegsForValue(TLI, Regs, *RC->vt_begin(), + OpInfo.AssignedRegs = RegsForValue(Regs, *RC->vt_begin(), OpInfo.ConstraintVT); OpInfo.MarkAllocatedRegs(isOutReg, isInReg, OutputRegs, InputRegs, *TRI); return; @@ -5332,7 +5322,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, hasMemory, &DAG); + TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. @@ -5406,6 +5396,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { const MDNode *SrcLoc = CS.getInstruction()->getMetadata("srcloc"); AsmNodeOperands.push_back(DAG.getMDNode(SrcLoc)); + // Remember the AlignStack bit as operand 3. + AsmNodeOperands.push_back(DAG.getTargetConstant(IA->isAlignStack() ? 1 : 0, + MVT::i1)); + // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. RegsForValue RetValRegs; @@ -5497,7 +5491,6 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } RegsForValue MatchedRegs; - MatchedRegs.TLI = &TLI; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); MatchedRegs.RegVTs.push_back(RegVT); @@ -5535,7 +5528,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { std::vector Ops; TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode[0], - hasMemory, Ops, DAG); + Ops, DAG); if (Ops.empty()) report_fatal_error("Invalid operand for inline asm constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -5570,7 +5563,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty() || - !OpInfo.AssignedRegs.areValueTypesLegal()) + !OpInfo.AssignedRegs.areValueTypesLegal(TLI)) report_fatal_error("Couldn't allocate input reg for constraint '" + Twine(OpInfo.ConstraintCode) + "'!"); @@ -5595,7 +5588,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Finish up input operands. Set the input chain and add the flag last. - AsmNodeOperands[0] = Chain; + AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), @@ -5606,7 +5599,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { - SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, &Flag); // FIXME: Why don't we do this for inline asms with MRVs? @@ -5646,7 +5639,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; - SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(), + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, &Flag); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -5672,14 +5665,16 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { void SelectionDAGBuilder::visitVAStart(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { + const TargetData &TD = *TLI.getTargetData(); SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), getRoot(), getValue(I.getOperand(0)), - DAG.getSrcValue(I.getOperand(0))); + DAG.getSrcValue(I.getOperand(0)), + TD.getABITypeAlignment(I.getType())); setValue(&I, V); DAG.setRoot(V.getValue(1)); } @@ -5687,17 +5682,17 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(1)))); + getValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), MVT::Other, getRoot(), - getValue(I.getOperand(1)), - getValue(I.getOperand(2)), - DAG.getSrcValue(I.getOperand(1)), - DAG.getSrcValue(I.getOperand(2)))); + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), + DAG.getSrcValue(I.getArgOperand(0)), + DAG.getSrcValue(I.getArgOperand(1)))); } /// TargetLowering::LowerCallTo - This is the default LowerCallTo @@ -5715,6 +5710,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, DebugLoc dl) const { // Handle all of the outgoing arguments. SmallVector Outs; + SmallVector OutVals; for (unsigned i = 0, e = Args.size(); i != e; ++i) { SmallVector ValueVTs; ComputeValueVTs(*this, Args[i].Ty, ValueVTs); @@ -5768,13 +5764,15 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j], i < NumFixedArgs); + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + i < NumFixedArgs); if (NumParts > 1 && j == 0) MyFlags.Flags.setSplit(); else if (j != 0) MyFlags.Flags.setOrigAlign(1); Outs.push_back(MyFlags); + OutVals.push_back(Parts[j]); } } } @@ -5803,7 +5801,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy, SmallVector InVals; Chain = LowerCall(Chain, Callee, CallConv, isVarArg, isTailCall, - Outs, Ins, dl, DAG, InVals); + Outs, OutVals, Ins, dl, DAG, InVals); // Verify that the target's LowerCall behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -5876,7 +5874,7 @@ SDValue TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { void SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { - SDValue Op = getValue(V); + SDValue Op = getNonRegisterValue(V); assert((Op.getOpcode() != ISD::CopyFromReg || cast(Op.getOperand(1))->getReg() != Reg) && "Copy from a reg to the same reg!"); @@ -5894,21 +5892,16 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // If this is the entry block, emit arguments. const Function &F = *LLVMBB->getParent(); SelectionDAG &DAG = SDB->DAG; - SDValue OldRoot = DAG.getRoot(); DebugLoc dl = SDB->getCurDebugLoc(); const TargetData *TD = TLI.getTargetData(); SmallVector Ins; // Check whether the function can return without sret-demotion. - SmallVector OutVTs; - SmallVector OutsFlags; - getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - OutVTs, OutsFlags, TLI); - FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo(); - - FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(), - OutVTs, OutsFlags, DAG); - if (!FLI.CanLowerReturn) { + SmallVector Outs; + GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), + Outs, TLI); + + if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); @@ -6002,7 +5995,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // Set up the argument values. unsigned i = 0; Idx = 1; - if (!FLI.CanLowerReturn) { + if (!FuncInfo->CanLowerReturn) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector ValueVTs; @@ -6016,7 +6009,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); - FLI.DemoteRegister = SRetReg; + FuncInfo->DemoteRegister = SRetReg; NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); @@ -6032,6 +6025,12 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { SmallVector ValueVTs; ComputeValueVTs(TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); + + // If this argument is unused then remember its value. It is used to generate + // debugging information. + if (I->use_empty() && NumValues) + SDB->setUnusedArgValue(I, InVals[i]); + for (unsigned Value = 0; Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); @@ -6112,17 +6111,20 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { if (const Constant *C = dyn_cast(PHIOp)) { unsigned &RegOut = ConstantsOut[C]; if (RegOut == 0) { - RegOut = FuncInfo.CreateRegForValue(C); + RegOut = FuncInfo.CreateRegs(C->getType()); CopyValueToVirtualRegister(C, RegOut); } Reg = RegOut; } else { - Reg = FuncInfo.ValueMap[PHIOp]; - if (Reg == 0) { + DenseMap::iterator I = + FuncInfo.ValueMap.find(PHIOp); + if (I != FuncInfo.ValueMap.end()) + Reg = I->second; + else { assert(isa(PHIOp) && FuncInfo.StaticAllocaMap.count(cast(PHIOp)) && "Didn't codegen value into a register!??"); - Reg = FuncInfo.CreateRegForValue(PHIOp); + Reg = FuncInfo.CreateRegs(PHIOp->getType()); CopyValueToVirtualRegister(PHIOp, Reg); } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 3fcd4b9dc437..46733d6db124 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -88,6 +88,10 @@ class SelectionDAGBuilder { DebugLoc CurDebugLoc; DenseMap NodeMap; + + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used + /// to preserve debug information for incoming arguments. + DenseMap UnusedArgNodeMap; public: /// PendingLoads - Loads are not emitted to the program immediately. We bunch @@ -342,6 +346,8 @@ public: void visit(unsigned Opcode, const User &I); SDValue getValue(const Value *V); + SDValue getNonRegisterValue(const Value *V); + SDValue getValueImpl(const Value *V); void setValue(const Value *V, SDValue NewN) { SDValue &N = NodeMap[V]; @@ -349,6 +355,12 @@ public: N = NewN; } + void setUnusedArgValue(const Value *V, SDValue NewN) { + SDValue &N = UnusedArgNodeMap[V]; + assert(N.getNode() == 0 && "Already set a value for this node!"); + N = NewN; + } + void GetRegistersForValue(SDISelAsmOperandInfo &OpInfo, std::set &OutputRegs, std::set &InputRegs); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 65b8d4f65919..08ba5482f7d2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -14,7 +14,7 @@ #define DEBUG_TYPE "isel" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" -#include "FunctionLoweringInfo.h" +#include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/SelectionDAGISel.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/DebugInfo.h" @@ -171,7 +171,7 @@ TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, CodeGenOpt::Level OL) : MachineFunctionPass(&ID), TM(tm), TLI(*tm.getTargetLowering()), FuncInfo(new FunctionLoweringInfo(TLI)), - CurDAG(new SelectionDAG(tm, *FuncInfo)), + CurDAG(new SelectionDAG(tm)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), OptLevel(OL), @@ -244,7 +244,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); CurDAG->init(*MF); - FuncInfo->set(Fn, *MF, EnableFastISel); + FuncInfo->set(Fn, *MF); SDB->init(GFI, *AA); SelectAllBasicBlocks(Fn); @@ -300,7 +300,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { const TargetInstrDesc &TID = TM.getInstrInfo()->get(II->getOpcode()); - if (II->isInlineAsm() || (TID.isCall() && !TID.isReturn())) { + + // Operand 1 of an inline asm instruction indicates whether the asm + // needs stack or not. + if ((II->isInlineAsm() && II->getOperand(1).getImm()) || + (TID.isCall() && !TID.isReturn())) { MFI->setHasCalls(true); goto done; } @@ -312,6 +316,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there is a call to setjmp in the machine function. MF->setCallsSetJmp(FunctionCallsSetJmp(&Fn)); + // Replace forward-declared registers with the registers containing + // the desired value. + MachineRegisterInfo &MRI = MF->getRegInfo(); + for (DenseMap::iterator + I = FuncInfo->RegFixups.begin(), E = FuncInfo->RegFixups.end(); + I != E; ++I) { + unsigned From = I->first; + unsigned To = I->second; + // If To is also scheduled to be replaced, find what its ultimate + // replacement is. + for (;;) { + DenseMap::iterator J = + FuncInfo->RegFixups.find(To); + if (J == E) break; + To = J->second; + } + // Replace it. + MRI.replaceRegWith(From, To); + } + // Release function-specific state. SDB and CurDAG are already cleared // at this point. FuncInfo->clear(); @@ -319,10 +343,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { return true; } -MachineBasicBlock * -SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, - const BasicBlock *LLVMBB, - BasicBlock::const_iterator Begin, +void +SelectionDAGISel::SelectBasicBlock(BasicBlock::const_iterator Begin, BasicBlock::const_iterator End, bool &HadTailCall) { // Lower all of the non-terminator instructions. If a call is emitted @@ -337,7 +359,7 @@ SelectionDAGISel::SelectBasicBlock(MachineBasicBlock *BB, SDB->clear(); // Final step, emit the lowered DAG as machine code. - return CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } namespace { @@ -372,102 +394,6 @@ public: }; } -/// TrivialTruncElim - Eliminate some trivial nops that can result from -/// ShrinkDemandedOps: (trunc (ext n)) -> n. -static bool TrivialTruncElim(SDValue Op, - TargetLowering::TargetLoweringOpt &TLO) { - SDValue N0 = Op.getOperand(0); - EVT VT = Op.getValueType(); - if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::SIGN_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND) && - N0.getOperand(0).getValueType() == VT) { - return TLO.CombineTo(Op, N0.getOperand(0)); - } - return false; -} - -/// ShrinkDemandedOps - A late transformation pass that shrink expressions -/// using TargetLowering::TargetLoweringOpt::ShrinkDemandedOp. It converts -/// x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. -void SelectionDAGISel::ShrinkDemandedOps() { - SmallVector Worklist; - SmallPtrSet InWorklist; - - // Add all the dag nodes to the worklist. - Worklist.reserve(CurDAG->allnodes_size()); - for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), - E = CurDAG->allnodes_end(); I != E; ++I) { - Worklist.push_back(I); - InWorklist.insert(I); - } - - TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true); - while (!Worklist.empty()) { - SDNode *N = Worklist.pop_back_val(); - InWorklist.erase(N); - - if (N->use_empty() && N != CurDAG->getRoot().getNode()) { - // Deleting this node may make its operands dead, add them to the worklist - // if they aren't already there. - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) - if (InWorklist.insert(N->getOperand(i).getNode())) - Worklist.push_back(N->getOperand(i).getNode()); - - CurDAG->DeleteNode(N); - continue; - } - - // Run ShrinkDemandedOp on scalar binary operations. - if (N->getNumValues() != 1 || - !N->getValueType(0).isSimple() || !N->getValueType(0).isInteger()) - continue; - - unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); - APInt Demanded = APInt::getAllOnesValue(BitWidth); - APInt KnownZero, KnownOne; - if (!TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, - KnownZero, KnownOne, TLO) && - (N->getOpcode() != ISD::TRUNCATE || - !TrivialTruncElim(SDValue(N, 0), TLO))) - continue; - - // Revisit the node. - assert(!InWorklist.count(N) && "Already in worklist"); - Worklist.push_back(N); - InWorklist.insert(N); - - // Replace the old value with the new one. - DEBUG(errs() << "\nShrinkDemandedOps replacing "; - TLO.Old.getNode()->dump(CurDAG); - errs() << "\nWith: "; - TLO.New.getNode()->dump(CurDAG); - errs() << '\n'); - - if (InWorklist.insert(TLO.New.getNode())) - Worklist.push_back(TLO.New.getNode()); - - SDOPsWorkListRemover DeadNodes(Worklist, InWorklist); - CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New, &DeadNodes); - - if (!TLO.Old.getNode()->use_empty()) continue; - - for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); - i != e; ++i) { - SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); - if (OpNode->hasOneUse()) { - // Add OpNode to the end of the list to revisit. - DeadNodes.RemoveFromWorklist(OpNode); - Worklist.push_back(OpNode); - InWorklist.insert(OpNode); - } - } - - DeadNodes.RemoveFromWorklist(TLO.Old.getNode()); - CurDAG->DeleteNode(TLO.Old.getNode()); - } -} - void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet VisitedNodes; SmallVector Worklist; @@ -522,7 +448,7 @@ void SelectionDAGISel::ComputeLiveOutVRegInfo() { } while (!Worklist.empty()); } -MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { +void SelectionDAGISel::CodeGenAndEmitDAG() { std::string GroupName; if (TimePassesIsEnabled) GroupName = "Instruction Selection and Scheduling"; @@ -531,23 +457,19 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { ViewDAGCombine2 || ViewDAGCombineLT || ViewISelDAGs || ViewSchedDAGs || ViewSUnitDAGs) BlockName = MF->getFunction()->getNameStr() + ":" + - BB->getBasicBlock()->getNameStr(); + FuncInfo->MBB->getBasicBlock()->getNameStr(); - DEBUG(dbgs() << "Initial selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Initial selection DAG:\n"; CurDAG->dump()); if (ViewDAGCombine1) CurDAG->viewGraph("dag-combine1 input for " + BlockName); // Run the DAG combiner in pre-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining 1", GroupName); - CurDAG->Combine(Unrestricted, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining 1", GroupName, TimePassesIsEnabled); CurDAG->Combine(Unrestricted, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized lowered selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized lowered selection DAG:\n"; CurDAG->dump()); // Second step, hack on the DAG until it only uses operations and types that // the target supports. @@ -555,44 +477,36 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { BlockName); bool Changed; - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization", GroupName); - Changed = CurDAG->LegalizeTypes(); - } else { + { + NamedRegionTimer T("Type Legalization", GroupName, TimePassesIsEnabled); Changed = CurDAG->LegalizeTypes(); } - DEBUG(dbgs() << "Type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Type-legalized selection DAG:\n"; CurDAG->dump()); if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize types", GroupName); - CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining after legalize types", GroupName, + TimePassesIsEnabled); CurDAG->Combine(NoIllegalTypes, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized type-legalized selection DAG:\n"; + CurDAG->dump()); } - if (TimePassesIsEnabled) { - NamedRegionTimer T("Vector Legalization", GroupName); - Changed = CurDAG->LegalizeVectors(); - } else { + { + NamedRegionTimer T("Vector Legalization", GroupName, TimePassesIsEnabled); Changed = CurDAG->LegalizeVectors(); } if (Changed) { - if (TimePassesIsEnabled) { - NamedRegionTimer T("Type Legalization 2", GroupName); - CurDAG->LegalizeTypes(); - } else { + { + NamedRegionTimer T("Type Legalization 2", GroupName, TimePassesIsEnabled); CurDAG->LegalizeTypes(); } @@ -600,95 +514,79 @@ MachineBasicBlock *SelectionDAGISel::CodeGenAndEmitDAG(MachineBasicBlock *BB) { CurDAG->viewGraph("dag-combine-lv input for " + BlockName); // Run the DAG combiner in post-type-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining after legalize vectors", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining after legalize vectors", GroupName, + TimePassesIsEnabled); CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized vector-legalized selection DAG:\n"; + CurDAG->dump()); } if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName); - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Legalization", GroupName); - CurDAG->Legalize(OptLevel); - } else { + { + NamedRegionTimer T("DAG Legalization", GroupName, TimePassesIsEnabled); CurDAG->Legalize(OptLevel); } - DEBUG(dbgs() << "Legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Legalized selection DAG:\n"; CurDAG->dump()); if (ViewDAGCombine2) CurDAG->viewGraph("dag-combine2 input for " + BlockName); // Run the DAG combiner in post-legalize mode. - if (TimePassesIsEnabled) { - NamedRegionTimer T("DAG Combining 2", GroupName); - CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); - } else { + { + NamedRegionTimer T("DAG Combining 2", GroupName, TimePassesIsEnabled); CurDAG->Combine(NoIllegalOperations, *AA, OptLevel); } - DEBUG(dbgs() << "Optimized legalized selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Optimized legalized selection DAG:\n"; CurDAG->dump()); - if (OptLevel != CodeGenOpt::None) { - ShrinkDemandedOps(); + if (OptLevel != CodeGenOpt::None) ComputeLiveOutVRegInfo(); - } if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Selection", GroupName); - DoInstructionSelection(); - } else { + { + NamedRegionTimer T("Instruction Selection", GroupName, TimePassesIsEnabled); DoInstructionSelection(); } - DEBUG(dbgs() << "Selected selection DAG:\n"); - DEBUG(CurDAG->dump()); + DEBUG(dbgs() << "Selected selection DAG:\n"; CurDAG->dump()); if (ViewSchedDAGs) CurDAG->viewGraph("scheduler input for " + BlockName); // Schedule machine code. ScheduleDAGSDNodes *Scheduler = CreateScheduler(); - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Scheduling", GroupName); - Scheduler->Run(CurDAG, BB, BB->end()); - } else { - Scheduler->Run(CurDAG, BB, BB->end()); + { + NamedRegionTimer T("Instruction Scheduling", GroupName, + TimePassesIsEnabled); + Scheduler->Run(CurDAG, FuncInfo->MBB, FuncInfo->InsertPt); } if (ViewSUnitDAGs) Scheduler->viewGraph(); // Emit machine code to BB. This can change 'BB' to the last block being // inserted into. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Creation", GroupName); - BB = Scheduler->EmitSchedule(); - } else { - BB = Scheduler->EmitSchedule(); + { + NamedRegionTimer T("Instruction Creation", GroupName, TimePassesIsEnabled); + + FuncInfo->MBB = Scheduler->EmitSchedule(); + FuncInfo->InsertPt = Scheduler->InsertPos; } // Free the scheduler state. - if (TimePassesIsEnabled) { - NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName); - delete Scheduler; - } else { + { + NamedRegionTimer T("Instruction Scheduling Cleanup", GroupName, + TimePassesIsEnabled); delete Scheduler; } // Free the SelectionDAG state, now that we're finished with it. CurDAG->clear(); - - return BB; } void SelectionDAGISel::DoInstructionSelection() { @@ -750,21 +648,22 @@ void SelectionDAGISel::DoInstructionSelection() { /// PrepareEHLandingPad - Emit an EH_LABEL, set up live-in registers, and /// do other setup for EH landing-pad blocks. -void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { +void SelectionDAGISel::PrepareEHLandingPad() { // Add a label to mark the beginning of the landing pad. Deletion of the // landing pad can thus be detected via the MachineModuleInfo. - MCSymbol *Label = MF->getMMI().addLandingPad(BB); + MCSymbol *Label = MF->getMMI().addLandingPad(FuncInfo->MBB); const TargetInstrDesc &II = TM.getInstrInfo()->get(TargetOpcode::EH_LABEL); - BuildMI(BB, SDB->getCurDebugLoc(), II).addSym(Label); + BuildMI(*FuncInfo->MBB, FuncInfo->InsertPt, SDB->getCurDebugLoc(), II) + .addSym(Label); // Mark exception register as live in. unsigned Reg = TLI.getExceptionAddressRegister(); - if (Reg) BB->addLiveIn(Reg); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); // Mark exception selector register as live in. Reg = TLI.getExceptionSelectorRegister(); - if (Reg) BB->addLiveIn(Reg); + if (Reg) FuncInfo->MBB->addLiveIn(Reg); // FIXME: Hack around an exception handling flaw (PR1508): the personality // function and list of typeids logically belong to the invoke (or, if you @@ -777,7 +676,7 @@ void SelectionDAGISel::PrepareEHLandingPad(MachineBasicBlock *BB) { // in exceptions not being caught because no typeids are associated with // the invoke. This may not be the only way things can go wrong, but it // is the only way we try to work around for the moment. - const BasicBlock *LLVMBB = BB->getBasicBlock(); + const BasicBlock *LLVMBB = FuncInfo->MBB->getBasicBlock(); const BranchInst *Br = dyn_cast(LLVMBB->getTerminator()); if (Br && Br->isUnconditional()) { // Critical edge? @@ -796,83 +695,100 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (EnableFastISel) - FastIS = TLI.createFastISel(*MF, FuncInfo->ValueMap, FuncInfo->MBBMap, - FuncInfo->StaticAllocaMap, - FuncInfo->PHINodesToUpdate -#ifndef NDEBUG - , FuncInfo->CatchInfoLost -#endif - ); + FastIS = TLI.createFastISel(*FuncInfo); // Iterate over all basic blocks in the function. for (Function::const_iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { const BasicBlock *LLVMBB = &*I; - MachineBasicBlock *BB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); BasicBlock::const_iterator const End = LLVMBB->end(); - BasicBlock::const_iterator BI = Begin; + BasicBlock::const_iterator BI = End; + FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); + + // Setup an EH landing-pad block. + if (FuncInfo->MBB->isLandingPad()) + PrepareEHLandingPad(); + // Lower any arguments needed in this block if this is the entry block. if (LLVMBB == &Fn.getEntryBlock()) LowerArguments(LLVMBB); - // Setup an EH landing-pad block. - if (BB->isLandingPad()) - PrepareEHLandingPad(BB); - // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { + FastIS->startNewBlock(); + // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { CurDAG->setRoot(SDB->getControlRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); + + // If we inserted any instructions at the beginning, make a note of + // where they are, so we can be sure to emit subsequent instructions + // after them. + if (FuncInfo->InsertPt != FuncInfo->MBB->begin()) + FastIS->setLastLocalValue(llvm::prior(FuncInfo->InsertPt)); + else + FastIS->setLastLocalValue(0); } - FastIS->startNewBlock(BB); + // Do FastISel on as many instructions as possible. - for (; BI != End; ++BI) { + for (; BI != Begin; --BI) { + const Instruction *Inst = llvm::prior(BI); + + // If we no longer require this instruction, skip it. + if (!Inst->mayWriteToMemory() && + !isa(Inst) && + !isa(Inst) && + !FuncInfo->isExportedInst(Inst)) + continue; + + // Bottom-up: reset the insert pos at the top, after any local-value + // instructions. + FastIS->recomputeInsertPt(); + // Try to select the instruction with FastISel. - if (FastIS->SelectInstruction(BI)) + if (FastIS->SelectInstruction(Inst)) continue; // Then handle certain instructions as single-LLVM-Instruction blocks. - if (isa(BI)) { + if (isa(Inst)) { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel missed call: "; - BI->dump(); + Inst->dump(); } - if (!BI->getType()->isVoidTy() && !BI->use_empty()) { - unsigned &R = FuncInfo->ValueMap[BI]; + if (!Inst->getType()->isVoidTy() && !Inst->use_empty()) { + unsigned &R = FuncInfo->ValueMap[Inst]; if (!R) - R = FuncInfo->CreateRegForValue(BI); + R = FuncInfo->CreateRegs(Inst->getType()); } bool HadTailCall = false; - BB = SelectBasicBlock(BB, LLVMBB, BI, llvm::next(BI), HadTailCall); + SelectBasicBlock(Inst, BI, HadTailCall); // If the call was emitted as a tail call, we're done with the block. if (HadTailCall) { - BI = End; + --BI; break; } - // If the instruction was codegen'd with multiple blocks, - // inform the FastISel object where to resume inserting. - FastIS->setCurrentBlock(BB); continue; } // Otherwise, give up on FastISel for the rest of the block. // For now, be a little lenient about non-branch terminators. - if (!isa(BI) || isa(BI)) { + if (!isa(Inst) || isa(Inst)) { ++NumFastIselFailures; if (EnableFastISelVerbose || EnableFastISelAbort) { dbgs() << "FastISel miss: "; - BI->dump(); + Inst->dump(); } if (EnableFastISelAbort) // The "fast" selector couldn't handle something and bailed. @@ -881,17 +797,17 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } break; } + + FastIS->recomputeInsertPt(); } // Run SelectionDAG instruction selection on the remainder of the block // not handled by FastISel. If FastISel is not run, this is the entire // block. - if (BI != End) { - bool HadTailCall; - BB = SelectBasicBlock(BB, LLVMBB, BI, End, HadTailCall); - } + bool HadTailCall; + SelectBasicBlock(Begin, BI, HadTailCall); - FinishBasicBlock(BB); + FinishBasicBlock(); FuncInfo->PHINodesToUpdate.clear(); } @@ -899,11 +815,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } void -SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { +SelectionDAGISel::FinishBasicBlock() { DEBUG(dbgs() << "Total amount of phi nodes to update: " - << FuncInfo->PHINodesToUpdate.size() << "\n"); - DEBUG(for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) + << FuncInfo->PHINodesToUpdate.size() << "\n"; + for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) dbgs() << "Node " << i << " : (" << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); @@ -917,11 +833,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (!BB->isSuccessor(PHI->getParent())) + if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) continue; PHI->addOperand( MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } return; } @@ -930,33 +846,35 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->BitTestCases[i].Parent; + FuncInfo->MBB = SDB->BitTestCases[i].Parent; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code - SDB->visitBitTestHeader(SDB->BitTestCases[i], BB); + SDB->visitBitTestHeader(SDB->BitTestCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->MBB = SDB->BitTestCases[i].Cases[j].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code if (j+1 != ej) SDB->visitBitTestCase(SDB->BitTestCases[i].Cases[j+1].ThisBB, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], - BB); + FuncInfo->MBB); else SDB->visitBitTestCase(SDB->BitTestCases[i].Default, SDB->BitTestCases[i].Reg, SDB->BitTestCases[i].Cases[j], - BB); + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } // Update PHI Nodes @@ -1001,22 +919,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Lower header first, if it wasn't already lowered if (!SDB->JTCases[i].first.Emitted) { // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->MBB = SDB->JTCases[i].first.HeaderBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTableHeader(SDB->JTCases[i].second, SDB->JTCases[i].first, - BB); + FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); } // Set the current basic block to the mbb we wish to insert the code into - BB = SDB->JTCases[i].second.MBB; + FuncInfo->MBB = SDB->JTCases[i].second.MBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Emit the code SDB->visitJumpTable(SDB->JTCases[i].second); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - BB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); @@ -1034,11 +954,11 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); } // JT BB. Just iterate over successors here - if (BB->isSuccessor(PHIBB)) { + if (FuncInfo->MBB->isSuccessor(PHIBB)) { PHI->addOperand (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } } } @@ -1050,10 +970,10 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (BB->isSuccessor(PHI->getParent())) { + if (FuncInfo->MBB->isSuccessor(PHI->getParent())) { PHI->addOperand( MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(BB)); + PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); } } @@ -1061,7 +981,8 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // additional DAGs necessary. for (unsigned i = 0, e = SDB->SwitchCases.size(); i != e; ++i) { // Set the current basic block to the mbb we wish to insert the code into - MachineBasicBlock *ThisBB = BB = SDB->SwitchCases[i].ThisBB; + MachineBasicBlock *ThisBB = FuncInfo->MBB = SDB->SwitchCases[i].ThisBB; + FuncInfo->InsertPt = FuncInfo->MBB->end(); // Determine the unique successors. SmallVector Succs; @@ -1071,21 +992,24 @@ SelectionDAGISel::FinishBasicBlock(MachineBasicBlock *BB) { // Emit the code. Note that this could result in ThisBB being split, so // we need to check for updates. - SDB->visitSwitchCase(SDB->SwitchCases[i], BB); + SDB->visitSwitchCase(SDB->SwitchCases[i], FuncInfo->MBB); CurDAG->setRoot(SDB->getRoot()); SDB->clear(); - ThisBB = CodeGenAndEmitDAG(BB); + CodeGenAndEmitDAG(); + ThisBB = FuncInfo->MBB; // Handle any PHI nodes in successors of this chunk, as if we were coming // from the original BB before switch expansion. Note that PHI nodes can // occur multiple times in PHINodesToUpdate. We have to be very careful to // handle them the right number of times. for (unsigned i = 0, e = Succs.size(); i != e; ++i) { - BB = Succs[i]; - // BB may have been removed from the CFG if a branch was constant folded. - if (ThisBB->isSuccessor(BB)) { - for (MachineBasicBlock::iterator Phi = BB->begin(); - Phi != BB->end() && Phi->isPHI(); + FuncInfo->MBB = Succs[i]; + FuncInfo->InsertPt = FuncInfo->MBB->end(); + // FuncInfo->MBB may have been removed from the CFG if a branch was + // constant folded. + if (ThisBB->isSuccessor(FuncInfo->MBB)) { + for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin(); + Phi != FuncInfo->MBB->end() && Phi->isPHI(); ++Phi) { // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { @@ -1205,6 +1129,7 @@ SelectInlineAsmMemoryOperands(std::vector &Ops) { Ops.push_back(InOps[InlineAsm::Op_InputChain]); // 0 Ops.push_back(InOps[InlineAsm::Op_AsmString]); // 1 Ops.push_back(InOps[InlineAsm::Op_MDNode]); // 2, !srcloc + Ops.push_back(InOps[InlineAsm::Op_IsAlignStack]); // 3 unsigned i = InlineAsm::Op_FirstOperand, e = InOps.size(); if (InOps[e-1].getValueType() == MVT::Flag) @@ -1701,7 +1626,7 @@ MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList, SDValue(Res, ResNumResults-1)); if ((EmitNodeInfo & OPFL_FlagOutput) != 0) - --ResNumResults; + --ResNumResults; // Move the chain reference if needed. if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 3786bd197b85..6cae804422ce 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -278,7 +278,7 @@ std::string ScheduleDAGSDNodes::getGraphNodeLabel(const SUnit *SU) const { FlaggedNodes.push_back(N); while (!FlaggedNodes.empty()) { O << DOTGraphTraits - ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); + ::getSimpleNodeLabel(FlaggedNodes.back(), DAG); FlaggedNodes.pop_back(); if (!FlaggedNodes.empty()) O << "\n "; diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 44a80d3362d1..4f3866956cac 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -20,6 +20,7 @@ #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/GlobalVariable.h" #include "llvm/DerivedTypes.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineFunction.h" @@ -261,6 +262,38 @@ static void InitLibcallNames(const char **Names) { Names[RTLIB::MEMMOVE] = "memmove"; Names[RTLIB::MEMSET] = "memset"; Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and-xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -546,9 +579,9 @@ TargetLowering::TargetLowering(const TargetMachine &tm, SchedPreferenceInfo = Sched::Latency; JumpBufSize = 0; JumpBufAlignment = 0; - IfCvtBlockSizeLimit = 2; - IfCvtDupBlockSizeLimit = 0; PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + ShouldFoldAtomicFences = false; InitLibcallNames(LibcallRoutineNames); InitCmpLibcallCCs(CmpLibcallCCs); @@ -578,9 +611,9 @@ bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, - unsigned &NumIntermediates, - EVT &RegisterVT, - TargetLowering* TLI) { + unsigned &NumIntermediates, + EVT &RegisterVT, + TargetLowering *TLI) { // Figure out the right, legal destination reg to copy into. unsigned NumElts = VT.getVectorNumElements(); MVT EltTy = VT.getVectorElementType(); @@ -610,16 +643,12 @@ static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, EVT DestVT = TLI->getRegisterType(NewVT); RegisterVT = DestVT; - if (EVT(DestVT).bitsLT(NewVT)) { - // Value is expanded, e.g. i64 -> i16. + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. return NumVectorRegs*(NewVT.getSizeInBits()/DestVT.getSizeInBits()); - } else { - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; - } - return 1; + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; } /// computeRegisterProperties - Once all of the register classes are added, @@ -705,39 +734,39 @@ void TargetLowering::computeRegisterProperties() { for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { MVT VT = (MVT::SimpleValueType)i; - if (!isTypeLegal(VT)) { - MVT IntermediateVT; - EVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - // Determine if there is a legal wider type. - bool IsLegalWiderType = false; - EVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; - if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && NElts != 1) { - TransformToType[i] = SVT; - ValueTypeActions.setTypeAction(VT, Promote); - IsLegalWiderType = true; - break; - } + if (isTypeLegal(VT)) continue; + + MVT IntermediateVT; + EVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + // Determine if there is a legal wider type. + bool IsLegalWiderType = false; + EVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + EVT SVT = (MVT::SimpleValueType)nVT; + if (isTypeSynthesizable(SVT) && SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && NElts != 1) { + TransformToType[i] = SVT; + ValueTypeActions.setTypeAction(VT, Promote); + IsLegalWiderType = true; + break; } - if (!IsLegalWiderType) { - EVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - ValueTypeActions.setTypeAction(VT, Expand); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, Promote); - } + } + if (!IsLegalWiderType) { + EVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + ValueTypeActions.setTypeAction(VT, Expand); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, Promote); } } } @@ -811,6 +840,65 @@ unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, return 1; } +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(const Type* ReturnType, Attributes attr, + SmallVectorImpl &Outs, + const TargetLowering &TLI, + SmallVectorImpl *Offsets) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + unsigned Offset = 0; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr & Attribute::SExt) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr & Attribute::ZExt) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + unsigned PartSize = TLI.getTargetData()->getTypeAllocSize( + PartVT.getTypeForEVT(ReturnType->getContext())); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr & Attribute::InReg) + Flags.setInReg(); + + // Propagate extension type if any + if (attr & Attribute::SExt) + Flags.setSExt(); + else if (attr & Attribute::ZExt) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) { + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true)); + if (Offsets) { + Offsets->push_back(Offset); + Offset += PartSize; + } + } + } +} + /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate /// function arguments in the caller parameter area. This is the actual /// alignment, not its logarithm. @@ -1042,7 +1130,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -1076,7 +1164,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1101,7 +1189,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -1498,13 +1586,17 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, break; } case ISD::AssertZext: { - EVT VT = cast(Op.getOperand(1))->getVT(); - APInt InMask = APInt::getLowBitsSet(BitWidth, - VT.getSizeInBits()); - if (SimplifyDemandedBits(Op.getOperand(0), InMask & NewMask, + // Demand all the bits of the input that are demanded in the output. + // The low bits are obvious; the high bits are demanded because we're + // asserting that they're zero here. + if (SimplifyDemandedBits(Op.getOperand(0), NewMask, KnownZero, KnownOne, TLO, Depth+1)) return true; assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + + EVT VT = cast(Op.getOperand(1))->getVT(); + APInt InMask = APInt::getLowBitsSet(BitWidth, + VT.getSizeInBits()); KnownZero |= ~InMask & NewMask; break; } @@ -1544,7 +1636,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne2, TLO, Depth+1)) return true; // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; } // FALL THROUGH @@ -2346,7 +2438,6 @@ const char *TargetLowering::LowerXConstraint(EVT ConstraintVT) const{ /// vector. If it is invalid, don't add anything to Ops. void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, char ConstraintLetter, - bool hasMemory, std::vector &Ops, SelectionDAG &DAG) const { switch (ConstraintLetter) { @@ -2384,7 +2475,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, if (ConstraintLetter != 'n') { int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); - Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), + C->getDebugLoc(), Op.getValueType(), Offs)); return; } @@ -2507,18 +2599,18 @@ static unsigned getConstraintGenerality(TargetLowering::ConstraintType CT) { /// 'm' over 'r', for example. /// static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, - bool hasMemory, const TargetLowering &TLI, + const TargetLowering &TLI, SDValue Op, SelectionDAG *DAG) { assert(OpInfo.Codes.size() > 1 && "Doesn't have multiple constraint options"); unsigned BestIdx = 0; TargetLowering::ConstraintType BestType = TargetLowering::C_Unknown; int BestGenerality = -1; - + // Loop over the options, keeping track of the most general one. for (unsigned i = 0, e = OpInfo.Codes.size(); i != e; ++i) { TargetLowering::ConstraintType CType = TLI.getConstraintType(OpInfo.Codes[i]); - + // If this is an 'other' constraint, see if the operand is valid for it. // For example, on X86 we might have an 'rI' constraint. If the operand // is an integer in the range [0..31] we want to use I (saving a load @@ -2527,7 +2619,7 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, assert(OpInfo.Codes[i].size() == 1 && "Unhandled multi-letter 'other' constraint"); std::vector ResultOps; - TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], hasMemory, + TLI.LowerAsmOperandForConstraint(Op, OpInfo.Codes[i][0], ResultOps, *DAG); if (!ResultOps.empty()) { BestType = CType; @@ -2536,6 +2628,11 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, } } + // Things with matching constraints can only be registers, per gcc + // documentation. This mainly affects "g" constraints. + if (CType == TargetLowering::C_Memory && OpInfo.hasMatchingInput()) + continue; + // This constraint letter is more general than the previous one, use it. int Generality = getConstraintGenerality(CType); if (Generality > BestGenerality) { @@ -2554,7 +2651,6 @@ static void ChooseConstraint(TargetLowering::AsmOperandInfo &OpInfo, /// OpInfo.ConstraintCode and OpInfo.ConstraintType. void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, SDValue Op, - bool hasMemory, SelectionDAG *DAG) const { assert(!OpInfo.Codes.empty() && "Must have at least one constraint"); @@ -2563,7 +2659,7 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, OpInfo.ConstraintCode = OpInfo.Codes[0]; OpInfo.ConstraintType = getConstraintType(OpInfo.ConstraintCode); } else { - ChooseConstraint(OpInfo, hasMemory, *this, Op, DAG); + ChooseConstraint(OpInfo, *this, Op, DAG); } // 'X' matches anything. diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 5240bef5a5ff..6ab0cb03c065 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -31,6 +31,7 @@ #include "llvm/CodeGen/GCStrategy.h" #include "llvm/IntrinsicInst.h" #include "llvm/Module.h" +#include "llvm/Support/CallSite.h" #include "llvm/Support/IRBuilder.h" using namespace llvm; @@ -158,7 +159,8 @@ namespace { // Create a new invoke instruction. Args.clear(); - Args.append(CI->op_begin() + 1, CI->op_end()); + CallSite CS(CI); + Args.append(CS.arg_begin(), CS.arg_end()); InvokeInst *II = InvokeInst::Create(CI->getCalledValue(), NewBB, CleanupBB, @@ -194,7 +196,7 @@ Constant *ShadowStackGC::GetFrameMap(Function &F) { unsigned NumMeta = 0; SmallVector Metadata; for (unsigned I = 0; I != Roots.size(); ++I) { - Constant *C = cast(Roots[I].first->getOperand(2)); + Constant *C = cast(Roots[I].first->getArgOperand(1)); if (!C->isNullValue()) NumMeta = I + 1; Metadata.push_back(ConstantExpr::getBitCast(C, VoidPtr)); @@ -322,16 +324,16 @@ void ShadowStackGC::CollectRoots(Function &F) { assert(Roots.empty() && "Not cleaned up?"); - SmallVector,16> MetaRoots; + SmallVector, 16> MetaRoots; for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::iterator II = BB->begin(), E = BB->end(); II != E;) if (IntrinsicInst *CI = dyn_cast(II++)) if (Function *F = CI->getCalledFunction()) if (F->getIntrinsicID() == Intrinsic::gcroot) { - std::pair Pair = std::make_pair( - CI, cast(CI->getOperand(1)->stripPointerCasts())); - if (IsNullValue(CI->getOperand(2))) + std::pair Pair = std::make_pair( + CI, cast(CI->getArgOperand(0)->stripPointerCasts())); + if (IsNullValue(CI->getArgOperand(1))) Roots.push_back(Pair); else MetaRoots.push_back(Pair); diff --git a/lib/CodeGen/SimpleHazardRecognizer.h b/lib/CodeGen/SimpleHazardRecognizer.h deleted file mode 100644 index f69feaf9e570..000000000000 --- a/lib/CodeGen/SimpleHazardRecognizer.h +++ /dev/null @@ -1,89 +0,0 @@ -//=- llvm/CodeGen/SimpleHazardRecognizer.h - Scheduling Support -*- C++ -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the SimpleHazardRecognizer class, which -// implements hazard-avoidance heuristics for scheduling, based on the -// scheduling itineraries specified for the target. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H -#define LLVM_CODEGEN_SIMPLEHAZARDRECOGNIZER_H - -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" - -namespace llvm { - /// SimpleHazardRecognizer - A *very* simple hazard recognizer. It uses - /// a coarse classification and attempts to avoid that instructions of - /// a given class aren't grouped too densely together. - class SimpleHazardRecognizer : public ScheduleHazardRecognizer { - /// Class - A simple classification for SUnits. - enum Class { - Other, Load, Store - }; - - /// Window - The Class values of the most recently issued - /// instructions. - Class Window[8]; - - /// getClass - Classify the given SUnit. - Class getClass(const SUnit *SU) { - const MachineInstr *MI = SU->getInstr(); - const TargetInstrDesc &TID = MI->getDesc(); - if (TID.mayLoad()) - return Load; - if (TID.mayStore()) - return Store; - return Other; - } - - /// Step - Rotate the existing entries in Window and insert the - /// given class value in position as the most recent. - void Step(Class C) { - std::copy(Window+1, array_endof(Window), Window); - Window[array_lengthof(Window)-1] = C; - } - - public: - SimpleHazardRecognizer() : Window() { - Reset(); - } - - virtual HazardType getHazardType(SUnit *SU) { - Class C = getClass(SU); - if (C == Other) - return NoHazard; - unsigned Score = 0; - for (unsigned i = 0; i != array_lengthof(Window); ++i) - if (Window[i] == C) - Score += i + 1; - if (Score > array_lengthof(Window) * 2) - return Hazard; - return NoHazard; - } - - virtual void Reset() { - for (unsigned i = 0; i != array_lengthof(Window); ++i) - Window[i] = Other; - } - - virtual void EmitInstruction(SUnit *SU) { - Step(getClass(SU)); - } - - virtual void AdvanceCycle() { - Step(Other); - } - }; -} - -#endif diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index ed3c243ff3e4..e69d3e4fa78a 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -99,15 +99,23 @@ void SimpleRegisterCoalescing::getAnalysisUsage(AnalysisUsage &AU) const { /// /// This returns true if an interval was modified. /// -bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, - LiveInterval &IntB, +bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI) { + // Bail if there is no dst interval - can happen when merging physical subreg + // operations. + if (!li_->hasInterval(CP.getDstReg())) + return false; + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); SlotIndex CopyIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - assert(BLR != IntB.end() && "Live range not found!"); + if (BLR == IntB.end()) return false; VNInfo *BValNo = BLR->valno; // Get the location that B is defined at. Two options: either this value has @@ -119,7 +127,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // AValNo is the value number in A that defines the copy, A3 in the example. SlotIndex CopyUseIdx = CopyIdx.getUseIndex(); LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - assert(ALR != IntA.end() && "Live range not found!"); + // The live range might not exist after fun with physreg coalescing. + if (ALR == IntA.end()) return false; VNInfo *AValNo = ALR->valno; // If it's re-defined by an early clobber somewhere in the live range, then // it's not safe to eliminate the copy. FIXME: This is a temporary workaround. @@ -145,26 +154,21 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. - unsigned SrcReg = li_->getVNInfoSourceReg(AValNo); - if (!SrcReg) return false; // Not defined by a copy. - - // If the value number is not defined by a copy instruction, ignore it. - - // If the source register comes from an interval other than IntB, we can't - // handle this. - if (SrcReg != IntB.reg) return false; + if (!CP.isCoalescable(AValNo->getCopy())) + return false; // Get the LiveRange in IntB that this value number starts with. LiveInterval::iterator ValLR = IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - assert(ValLR != IntB.end() && "Live range not found!"); + if (ValLR == IntB.end()) + return false; // Make sure that the end of the live range is inside the same block as // CopyMI. MachineInstr *ValLREndInst = li_->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || - ValLREndInst->getParent() != CopyMI->getParent()) return false; + if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + return false; // Okay, we now know that ValLR ends in the same block that the CopyMI // live-range starts. If there are no intervening live ranges between them in @@ -207,6 +211,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // physreg has sub-registers, update their live intervals as well. if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; LiveInterval &SRLI = li_->getInterval(*SR); SRLI.addRange(LiveRange(FillerStart, FillerEnd, SRLI.getNextValue(FillerStart, 0, true, @@ -216,7 +222,6 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, // Okay, merge "B1" into the same value number as "B0". if (BValNo != ValLR->valno) { - IntB.addKills(ValLR->valno, BValNo->kills); IntB.MergeValueNumberInto(BValNo, ValLR->valno); } DEBUG({ @@ -230,13 +235,12 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA, int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); if (UIdx != -1) { ValLREndInst->getOperand(UIdx).setIsKill(false); - ValLR->valno->removeKill(FillerStart); } // If the copy instruction was killing the destination register before the // merge, find the last use and trim the live range. That will also add the // isKill marker. - if (ALR->valno->isKill(CopyIdx)) + if (ALR->end == CopyIdx) TrimLiveIntervalToLastUse(CopyUseIdx, CopyMI->getParent(), IntA, ALR); ++numExtends; @@ -304,23 +308,31 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) { /// /// This returns true if an interval was modified. /// -bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, - LiveInterval &IntB, +bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(const CoalescerPair &CP, MachineInstr *CopyMI) { - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); - // FIXME: For now, only eliminate the copy by commuting its def when the // source register is a virtual register. We want to guard against cases // where the copy is a back edge copy and commuting the def lengthen the // live interval of the source register to the entire loop. - if (TargetRegisterInfo::isPhysicalRegister(IntA.reg)) + if (CP.isPhys() && CP.isFlipped()) + return false; + + // Bail if there is no dst interval. + if (!li_->hasInterval(CP.getDstReg())) return false; + SlotIndex CopyIdx = + li_->getInstructionIndex(CopyMI).getDefIndex(); + + LiveInterval &IntA = + li_->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); + LiveInterval &IntB = + li_->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); + // BValNo is a value number in B that is defined by a copy from A. 'B3' in // the example above. LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - assert(BLR != IntB.end() && "Live range not found!"); + if (BLR == IntB.end()) return false; VNInfo *BValNo = BLR->valno; // Get the location that B is defined at. Two options: either this value has @@ -342,6 +354,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, AValNo->isUnused() || AValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(AValNo->def); + if (!DefMI) + return false; const TargetInstrDesc &TID = DefMI->getDesc(); if (!TID.isCommutable()) return false; @@ -380,7 +394,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // clobbers from the superreg. if (BHasSubRegs) for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) - if (HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) + if (li_->hasInterval(*SR) && + HasOtherReachingDefs(IntA, li_->getInterval(*SR), AValNo, 0)) return false; // If some of the uses of IntA.reg is already coalesced away, return false. @@ -413,7 +428,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, bool BHasPHIKill = BValNo->hasPHIKill(); SmallVector BDeadValNos; - VNInfo::KillSet BKills; std::map BExtend; // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. @@ -424,8 +438,6 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, // C = A // ... // = B - // - // then do not add kills of A to the newly created B interval. bool Extended = BLR->end > ALR->end && ALR->end != ALR->start; if (Extended) BExtend[ALR->end] = BLR->end; @@ -448,34 +460,38 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); if (ULR == IntA.end() || ULR->valno != AValNo) continue; - UseMO.setReg(NewReg); + if (TargetRegisterInfo::isPhysicalRegister(NewReg)) + UseMO.substPhysReg(NewReg, *tri_); + else + UseMO.setReg(NewReg); if (UseMI == CopyMI) continue; if (UseMO.isKill()) { if (Extended) UseMO.setIsKill(false); - else - BKills.push_back(UseIdx.getDefIndex()); } unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) + if (UseMI->isCopy()) { + if (UseMI->getOperand(0).getReg() != IntB.reg || + UseMI->getOperand(0).getSubReg()) + continue; + } else if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){ + if (DstReg != IntB.reg || DstSubIdx) + continue; + } else continue; - if (DstReg == IntB.reg && DstSubIdx == 0) { - // This copy will become a noop. If it's defining a new val#, - // remove that val# as well. However this live range is being - // extended to the end of the existing live range defined by the copy. - SlotIndex DefIdx = UseIdx.getDefIndex(); - const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); - BHasPHIKill |= DLR->valno->hasPHIKill(); - assert(DLR->valno->def == DefIdx); - BDeadValNos.push_back(DLR->valno); - BExtend[DLR->start] = DLR->end; - JoinedCopies.insert(UseMI); - // If this is a kill but it's going to be removed, the last use - // of the same val# is the new kill. - if (UseMO.isKill()) - BKills.pop_back(); - } + // This copy will become a noop. If it's defining a new val#, + // remove that val# as well. However this live range is being + // extended to the end of the existing live range defined by the copy. + SlotIndex DefIdx = UseIdx.getDefIndex(); + const LiveRange *DLR = IntB.getLiveRangeContaining(DefIdx); + if (!DLR) + continue; + BHasPHIKill |= DLR->valno->hasPHIKill(); + assert(DLR->valno->def == DefIdx); + BDeadValNos.push_back(DLR->valno); + BExtend[DLR->start] = DLR->end; + JoinedCopies.insert(UseMI); } // We need to insert a new liverange: [ALR.start, LastUse). It may be we can @@ -490,24 +506,21 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, VNInfo *DeadVNI = BDeadValNos[i]; if (BHasSubRegs) { for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { + if (!li_->hasInterval(*SR)) + continue; LiveInterval &SRLI = li_->getInterval(*SR); - const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def); - SRLI.removeValNo(SRLR->valno); + if (const LiveRange *SRLR = SRLI.getLiveRangeContaining(DeadVNI->def)) + SRLI.removeValNo(SRLR->valno); } } IntB.removeValNo(BDeadValNos[i]); } // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition - // is updated. Kills are also updated. + // is updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; ValNo->setCopy(0); - for (unsigned j = 0, ee = ValNo->kills.size(); j != ee; ++j) { - if (ValNo->kills[j] != BLR->end) - BKills.push_back(ValNo->kills[j]); - } - ValNo->kills.clear(); for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; @@ -517,18 +530,7 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, if (EI != BExtend.end()) End = EI->second; IntB.addRange(LiveRange(AI->start, End, ValNo)); - - // If the IntB live range is assigned to a physical register, and if that - // physreg has sub-registers, update their live intervals as well. - if (BHasSubRegs) { - for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) { - LiveInterval &SRLI = li_->getInterval(*SR); - SRLI.MergeInClobberRange(*li_, AI->start, End, - li_->getVNInfoAllocator()); - } - } } - IntB.addKills(ValNo, BKills); ValNo->setHasPHIKill(BHasPHIKill); DEBUG({ @@ -621,7 +623,11 @@ SimpleRegisterCoalescing::TrimLiveIntervalToLastUse(SlotIndex CopyIdx, // of last use. LastUse->setIsKill(); removeRange(li, LastUseIdx.getDefIndex(), LR->end, li_, tri_); - LR->valno->addKill(LastUseIdx.getDefIndex()); + if (LastUseMI->isCopy()) { + MachineOperand &DefMO = LastUseMI->getOperand(0); + if (DefMO.getReg() == li.reg && !DefMO.getSubReg()) + DefMO.setIsDead(); + } unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*LastUseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && DstReg == li.reg && DstSubIdx == 0) { @@ -663,6 +669,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, ValNo->isUnused() || ValNo->hasPHIKill()) return false; MachineInstr *DefMI = li_->getInstructionFromIndex(ValNo->def); + assert(DefMI && "Defining instruction disappeared"); const TargetInstrDesc &TID = DefMI->getDesc(); if (!TID.isAsCheapAsAMove()) return false; @@ -701,33 +708,20 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, return false; } - SlotIndex DefIdx = CopyIdx.getDefIndex(); - const LiveRange *DLR= li_->getInterval(DstReg).getLiveRangeContaining(DefIdx); - DLR->valno->setCopy(0); - // Don't forget to update sub-register intervals. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - const LiveRange *DLR = - li_->getInterval(*SR).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->getCopy() == CopyMI) - DLR->valno->setCopy(0); - } - } + RemoveCopyFlag(DstReg, CopyMI); // If copy kills the source register, find the last use and propagate // kill. bool checkForDeadDef = false; MachineBasicBlock *MBB = CopyMI->getParent(); - if (SrcLR->valno->isKill(DefIdx)) + if (SrcLR->end == CopyIdx.getDefIndex()) if (!TrimLiveIntervalToLastUse(CopyIdx, MBB, SrcInt, SrcLR)) { checkForDeadDef = true; } MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); - tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_); + tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, *tri_); MachineInstr *NewMI = prior(MII); if (checkForDeadDef) { @@ -747,24 +741,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, MachineOperand &MO = CopyMI->getOperand(i); if (MO.isReg() && MO.isImplicit()) NewMI->addOperand(MO); - if (MO.isDef() && li_->hasInterval(MO.getReg())) { - unsigned Reg = MO.getReg(); - const LiveRange *DLR = - li_->getInterval(Reg).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->getCopy() == CopyMI) - DLR->valno->setCopy(0); - // Handle subregs as well - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - for (const unsigned* SR = tri_->getSubRegisters(Reg); *SR; ++SR) { - if (!li_->hasInterval(*SR)) - continue; - const LiveRange *DLR = - li_->getInterval(*SR).getLiveRangeContaining(DefIdx); - if (DLR && DLR->valno->getCopy() == CopyMI) - DLR->valno->setCopy(0); - } - } - } + if (MO.isDef()) + RemoveCopyFlag(MO.getReg(), CopyMI); } TransferImplicitOps(CopyMI, NewMI); @@ -783,84 +761,72 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt, /// being updated is not zero, make sure to set it to the correct physical /// subregister. void -SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, - unsigned SubIdx) { - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - if (DstIsPhys && SubIdx) { - // Figure out the real physical register we are updating with. - DstReg = tri_->getSubReg(DstReg, SubIdx); - SubIdx = 0; - } - - // Copy the register use-list before traversing it. We may be adding operands - // and invalidating pointers. - SmallVector, 32> reglist; - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg), - E = mri_->reg_end(); I != E; ++I) - reglist.push_back(std::make_pair(&*I, I.getOperandNo())); - - for (unsigned N=0; N != reglist.size(); ++N) { - MachineInstr *UseMI = reglist[N].first; - MachineOperand &O = UseMI->getOperand(reglist[N].second); - unsigned OldSubIdx = O.getSubReg(); +SimpleRegisterCoalescing::UpdateRegDefsUses(const CoalescerPair &CP) { + bool DstIsPhys = CP.isPhys(); + unsigned SrcReg = CP.getSrcReg(); + unsigned DstReg = CP.getDstReg(); + unsigned SubIdx = CP.getSubIdx(); + + for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(SrcReg); + MachineInstr *UseMI = I.skipInstruction();) { + // A PhysReg copy that won't be coalesced can perhaps be rematerialized + // instead. if (DstIsPhys) { - unsigned UseDstReg = DstReg; - if (OldSubIdx) - UseDstReg = tri_->getSubReg(DstReg, OldSubIdx); - unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx) && - CopySrcSubIdx == 0 && - CopyDstSubIdx == 0 && - CopySrcReg != CopyDstReg && - CopySrcReg == SrcReg && CopyDstReg != UseDstReg) { - // If the use is a copy and it won't be coalesced away, and its source - // is defined by a trivial computation, try to rematerialize it instead. - if (!JoinedCopies.count(UseMI) && - ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, - CopyDstSubIdx, UseMI)) - continue; - } + CopySrcSubIdx == 0 && CopyDstSubIdx == 0 && + CopySrcReg != CopyDstReg && CopySrcReg == SrcReg && + CopyDstReg != DstReg && !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), CopyDstReg, 0, + UseMI)) + continue; - O.setReg(UseDstReg); - O.setSubReg(0); - if (OldSubIdx) { - // Def and kill of subregister of a virtual register actually defs and - // kills the whole register. Add imp-defs and imp-kills as needed. - if (O.isDef()) { - if(O.isDead()) - UseMI->addRegisterDead(DstReg, tri_, true); - else - UseMI->addRegisterDefined(DstReg, tri_); - } else if (!O.isUndef() && - (O.isKill() || - UseMI->isRegTiedToDefOperand(&O-&UseMI->getOperand(0)))) - UseMI->addRegisterKilled(DstReg, tri_, true); - } + if (UseMI->isCopy() && + !UseMI->getOperand(1).getSubReg() && + !UseMI->getOperand(0).getSubReg() && + UseMI->getOperand(1).getReg() == SrcReg && + UseMI->getOperand(0).getReg() != SrcReg && + UseMI->getOperand(0).getReg() != DstReg && + !JoinedCopies.count(UseMI) && + ReMaterializeTrivialDef(li_->getInterval(SrcReg), + UseMI->getOperand(0).getReg(), 0, UseMI)) + continue; + } - DEBUG({ - dbgs() << "\t\tupdated: "; - if (!UseMI->isDebugValue()) - dbgs() << li_->getInstructionIndex(UseMI) << "\t"; - dbgs() << *UseMI; - }); - continue; + SmallVector Ops; + bool Reads, Writes; + tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); + bool Kills = false, Deads = false; + + // Replace SrcReg with DstReg in all UseMI operands. + for (unsigned i = 0, e = Ops.size(); i != e; ++i) { + MachineOperand &MO = UseMI->getOperand(Ops[i]); + Kills |= MO.isKill(); + Deads |= MO.isDead(); + + if (DstIsPhys) + MO.substPhysReg(DstReg, *tri_); + else + MO.substVirtReg(DstReg, SubIdx, *tri_); } - // Sub-register indexes goes from small to large. e.g. - // RAX: 1 -> AL, 2 -> AX, 3 -> EAX - // EAX: 1 -> AL, 2 -> AX - // So RAX's sub-register 2 is AX, RAX's sub-regsiter 3 is EAX, whose - // sub-register 2 is also AX. - // - // FIXME: Properly compose subreg indices for all targets. - // - if (SubIdx && OldSubIdx && SubIdx != OldSubIdx) - ; - else if (SubIdx) - O.setSubReg(SubIdx); - O.setReg(DstReg); + // This instruction is a copy that will be removed. + if (JoinedCopies.count(UseMI)) + continue; + + if (SubIdx) { + // If UseMI was a simple SrcReg def, make sure we didn't turn it into a + // read-modify-write of DstReg. + if (Deads) + UseMI->addRegisterDead(DstReg, tri_); + else if (!Reads && Writes) + UseMI->addRegisterDefined(DstReg, tri_); + + // Kill flags apply to the whole physical register. + if (DstIsPhys && Kills) + UseMI->addRegisterKilled(DstReg, tri_); + } DEBUG({ dbgs() << "\t\tupdated: "; @@ -869,15 +835,15 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, dbgs() << *UseMI; }); + // After updating the operand, check if the machine instruction has // become a copy. If so, update its val# information. - if (JoinedCopies.count(UseMI)) + const TargetInstrDesc &TID = UseMI->getDesc(); + if (DstIsPhys || TID.getNumDefs() != 1 || TID.getNumOperands() <= 2) continue; - const TargetInstrDesc &TID = UseMI->getDesc(); unsigned CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx; - if (TID.getNumDefs() == 1 && TID.getNumOperands() > 2 && - tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, + if (tii_->isMoveInstr(*UseMI, CopySrcReg, CopyDstReg, CopySrcSubIdx, CopyDstSubIdx) && CopySrcReg != CopyDstReg && (TargetRegisterInfo::isVirtualRegister(CopyDstReg) || @@ -945,6 +911,27 @@ bool SimpleRegisterCoalescing::RemoveDeadDef(LiveInterval &li, return removeIntervalIfEmpty(li, li_, tri_); } +void SimpleRegisterCoalescing::RemoveCopyFlag(unsigned DstReg, + const MachineInstr *CopyMI) { + SlotIndex DefIdx = li_->getInstructionIndex(CopyMI).getDefIndex(); + if (li_->hasInterval(DstReg)) { + LiveInterval &LI = li_->getInterval(DstReg); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->getCopy() == CopyMI) + LR->valno->setCopy(0); + } + if (!TargetRegisterInfo::isPhysicalRegister(DstReg)) + return; + for (const unsigned* AS = tri_->getAliasSet(DstReg); *AS; ++AS) { + if (!li_->hasInterval(*AS)) + continue; + LiveInterval &LI = li_->getInterval(*AS); + if (const LiveRange *LR = LI.getLiveRangeContaining(DefIdx)) + if (LR->valno->getCopy() == CopyMI) + LR->valno->setCopy(0); + } +} + /// PropagateDeadness - Propagate the dead marker to the instruction which /// defines the val#. static void PropagateDeadness(LiveInterval &li, MachineInstr *CopyMI, @@ -978,8 +965,8 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, // Live-in to the function but dead. Remove it from entry live-in set. if (mf_->begin()->isLiveIn(li.reg)) mf_->begin()->removeLiveIn(li.reg); - const LiveRange *LR = li.getLiveRangeContaining(CopyIdx); - removeRange(li, LR->start, LR->end, li_, tri_); + if (const LiveRange *LR = li.getLiveRangeContaining(CopyIdx)) + removeRange(li, LR->start, LR->end, li_, tri_); return removeIntervalIfEmpty(li, li_, tri_); } @@ -1017,147 +1004,12 @@ SimpleRegisterCoalescing::ShortenDeadCopySrcLiveRange(LiveInterval &li, // val#, then propagate the dead marker. PropagateDeadness(li, CopyMI, RemoveStart, li_, tri_); ++numDeadValNo; - - if (LR->valno->isKill(RemoveEnd)) - LR->valno->removeKill(RemoveEnd); } removeRange(li, RemoveStart, RemoveEnd, li_, tri_); return removeIntervalIfEmpty(li, li_, tri_); } -/// CanCoalesceWithImpDef - Returns true if the specified copy instruction -/// from an implicit def to another register can be coalesced away. -bool SimpleRegisterCoalescing::CanCoalesceWithImpDef(MachineInstr *CopyMI, - LiveInterval &li, - LiveInterval &ImpLi) const{ - if (!CopyMI->killsRegister(ImpLi.reg)) - return false; - // Make sure this is the only use. - for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(ImpLi.reg), - UE = mri_->use_end(); UI != UE;) { - MachineInstr *UseMI = &*UI; - ++UI; - if (CopyMI == UseMI || JoinedCopies.count(UseMI)) - continue; - return false; - } - return true; -} - - -/// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a -/// a virtual destination register with physical source register. -bool -SimpleRegisterCoalescing::isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, - LiveInterval &SrcInt) { - // If the virtual register live interval is long but it has low use desity, - // do not join them, instead mark the physical register as its allocation - // preference. - const TargetRegisterClass *RC = mri_->getRegClass(DstInt.reg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(DstInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(DstInt.reg), - mri_->use_nodbg_end()) * Threshold < Length) - return false; - - // If the virtual register live interval extends into a loop, turn down - // aggressiveness. - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); - const MachineLoop *L = loopInfo->getLoopFor(CopyMBB); - if (!L) { - // Let's see if the virtual register live interval extends into the loop. - LiveInterval::iterator DLR = DstInt.FindLiveRangeContaining(CopyIdx); - assert(DLR != DstInt.end() && "Live range not found!"); - DLR = DstInt.FindLiveRangeContaining(DLR->end.getNextSlot()); - if (DLR != DstInt.end()) { - CopyMBB = li_->getMBBFromIndex(DLR->start); - L = loopInfo->getLoopFor(CopyMBB); - } - } - - if (!L || Length <= Threshold) - return true; - - SlotIndex UseIdx = CopyIdx.getUseIndex(); - LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); - MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); - if (loopInfo->getLoopFor(SMBB) != L) { - if (!loopInfo->isLoopHeader(CopyMBB)) - return false; - // If vr's live interval extends pass the loop header, do not join. - for (MachineBasicBlock::succ_iterator SI = CopyMBB->succ_begin(), - SE = CopyMBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock *SuccMBB = *SI; - if (SuccMBB == CopyMBB) - continue; - if (DstInt.overlaps(li_->getMBBStartIdx(SuccMBB), - li_->getMBBEndIdx(SuccMBB))) - return false; - } - } - return true; -} - -/// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a -/// copy from a virtual source register to a physical destination register. -bool -SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, - LiveInterval &SrcInt) { - // If the virtual register live interval is long but it has low use density, - // do not join them, instead mark the physical register as its allocation - // preference. - const TargetRegisterClass *RC = mri_->getRegClass(SrcInt.reg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(SrcInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(SrcInt.reg), - mri_->use_nodbg_end()) * Threshold < Length) - return false; - - if (SrcInt.empty()) - // Must be implicit_def. - return false; - - // If the virtual register live interval is defined or cross a loop, turn - // down aggressiveness. - SlotIndex CopyIdx = - li_->getInstructionIndex(CopyMI).getDefIndex(); - SlotIndex UseIdx = CopyIdx.getUseIndex(); - LiveInterval::iterator SLR = SrcInt.FindLiveRangeContaining(UseIdx); - assert(SLR != SrcInt.end() && "Live range not found!"); - SLR = SrcInt.FindLiveRangeContaining(SLR->start.getPrevSlot()); - if (SLR == SrcInt.end()) - return true; - MachineBasicBlock *SMBB = li_->getMBBFromIndex(SLR->start); - const MachineLoop *L = loopInfo->getLoopFor(SMBB); - - if (!L || Length <= Threshold) - return true; - - if (loopInfo->getLoopFor(CopyMBB) != L) { - if (SMBB != L->getLoopLatch()) - return false; - // If vr's live interval is extended from before the loop latch, do not - // join. - for (MachineBasicBlock::pred_iterator PI = SMBB->pred_begin(), - PE = SMBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *PredMBB = *PI; - if (PredMBB == SMBB) - continue; - if (SrcInt.overlaps(li_->getMBBStartIdx(PredMBB), - li_->getMBBEndIdx(PredMBB))) - return false; - } - } - return true; -} /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. @@ -1203,157 +1055,6 @@ SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg, return true; } -/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual -/// register with a physical register, check if any of the virtual register -/// operand is a sub-register use or def. If so, make sure it won't result -/// in an illegal extract_subreg or insert_subreg instruction. e.g. -/// vr1024 = extract_subreg vr1025, 1 -/// ... -/// vr1024 = mov8rr AH -/// If vr1024 is coalesced with AH, the extract_subreg is now illegal since -/// AH does not have a super-reg whose sub-register 1 is AH. -bool -SimpleRegisterCoalescing::HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, - unsigned VirtReg, - unsigned PhysReg) { - for (MachineRegisterInfo::reg_iterator I = mri_->reg_begin(VirtReg), - E = mri_->reg_end(); I != E; ++I) { - MachineOperand &O = I.getOperand(); - if (O.isDebug()) - continue; - MachineInstr *MI = &*I; - if (MI == CopyMI || JoinedCopies.count(MI)) - continue; - unsigned SubIdx = O.getSubReg(); - if (SubIdx && !tri_->getSubReg(PhysReg, SubIdx)) - return true; - if (MI->isExtractSubreg()) { - SubIdx = MI->getOperand(2).getImm(); - if (O.isUse() && !tri_->getSubReg(PhysReg, SubIdx)) - return true; - if (O.isDef()) { - unsigned SrcReg = MI->getOperand(1).getReg(); - const TargetRegisterClass *RC = - TargetRegisterInfo::isPhysicalRegister(SrcReg) - ? tri_->getPhysicalRegisterRegClass(SrcReg) - : mri_->getRegClass(SrcReg); - if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) - return true; - } - } - if (MI->isInsertSubreg() || MI->isSubregToReg()) { - SubIdx = MI->getOperand(3).getImm(); - if (VirtReg == MI->getOperand(0).getReg()) { - if (!tri_->getSubReg(PhysReg, SubIdx)) - return true; - } else { - unsigned DstReg = MI->getOperand(0).getReg(); - const TargetRegisterClass *RC = - TargetRegisterInfo::isPhysicalRegister(DstReg) - ? tri_->getPhysicalRegisterRegClass(DstReg) - : mri_->getRegClass(DstReg); - if (!tri_->getMatchingSuperReg(PhysReg, SubIdx, RC)) - return true; - } - } - } - return false; -} - - -/// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce -/// an extract_subreg where dst is a physical register, e.g. -/// cl = EXTRACT_SUBREG reg1024, 1 -bool -SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg, - unsigned SrcReg, unsigned SubIdx, - unsigned &RealDstReg) { - const TargetRegisterClass *RC = mri_->getRegClass(SrcReg); - RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC); - if (!RealDstReg) { - DEBUG(dbgs() << "\tIncompatible source regclass: " - << "none of the super-registers of " << tri_->getName(DstReg) - << " are in " << RC->getName() << ".\n"); - return false; - } - - LiveInterval &RHS = li_->getInterval(SrcReg); - // For this type of EXTRACT_SUBREG, conservatively - // check if the live interval of the source register interfere with the - // actual super physical register we are trying to coalesce with. - if (li_->hasInterval(RealDstReg) && - RHS.overlaps(li_->getInterval(RealDstReg))) { - DEBUG({ - dbgs() << "\t\tInterfere with register "; - li_->getInterval(RealDstReg).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR) - // Do not check DstReg or its sub-register. JoinIntervals() will take care - // of that. - if (*SR != DstReg && - !tri_->isSubRegister(DstReg, *SR) && - li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - return true; -} - -/// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce -/// an insert_subreg where src is a physical register, e.g. -/// reg1024 = INSERT_SUBREG reg1024, c1, 0 -bool -SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg, - unsigned SrcReg, unsigned SubIdx, - unsigned &RealSrcReg) { - const TargetRegisterClass *RC = mri_->getRegClass(DstReg); - RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC); - if (!RealSrcReg) { - DEBUG(dbgs() << "\tIncompatible destination regclass: " - << "none of the super-registers of " << tri_->getName(SrcReg) - << " are in " << RC->getName() << ".\n"); - return false; - } - - LiveInterval &LHS = li_->getInterval(DstReg); - if (li_->hasInterval(RealSrcReg) && - LHS.overlaps(li_->getInterval(RealSrcReg))) { - DEBUG({ - dbgs() << "\t\tInterfere with register "; - li_->getInterval(RealSrcReg).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR) - // Do not check SrcReg or its sub-register. JoinIntervals() will take care - // of that. - if (*SR != SrcReg && - !tri_->isSubRegister(SrcReg, *SR) && - li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\t\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; // Not coalescable - } - return true; -} - -/// getRegAllocPreference - Return register allocation preference register. -/// -static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF, - MachineRegisterInfo *MRI, - const TargetRegisterInfo *TRI) { - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - return 0; - std::pair Hint = MRI->getRegAllocationHint(Reg); - return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF); -} /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns true @@ -1369,354 +1070,97 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DEBUG(dbgs() << li_->getInstructionIndex(CopyMI) << '\t' << *CopyMI); - unsigned SrcReg, DstReg, SrcSubIdx = 0, DstSubIdx = 0; - bool isExtSubReg = CopyMI->isExtractSubreg(); - bool isInsSubReg = CopyMI->isInsertSubreg(); - bool isSubRegToReg = CopyMI->isSubregToReg(); - unsigned SubIdx = 0; - if (isExtSubReg) { - DstReg = CopyMI->getOperand(0).getReg(); - DstSubIdx = CopyMI->getOperand(0).getSubReg(); - SrcReg = CopyMI->getOperand(1).getReg(); - SrcSubIdx = CopyMI->getOperand(2).getImm(); - } else if (isInsSubReg || isSubRegToReg) { - DstReg = CopyMI->getOperand(0).getReg(); - DstSubIdx = CopyMI->getOperand(3).getImm(); - SrcReg = CopyMI->getOperand(2).getReg(); - SrcSubIdx = CopyMI->getOperand(2).getSubReg(); - if (SrcSubIdx && SrcSubIdx != DstSubIdx) { - // r1025 = INSERT_SUBREG r1025, r1024<2>, 2 Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - DEBUG(dbgs() << "\tSource of insert_subreg or subreg_to_reg is already " - "coalesced to another register.\n"); - return false; // Not coalescable. - } - } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) { - // e.g. %reg16404:1 = MOV8rr %reg16412:2 - Again = true; - return false; // Not coalescable. - } - } else { - llvm_unreachable("Unrecognized copy instruction!"); + CoalescerPair CP(*tii_, *tri_); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; } // If they are already joined we continue. - if (SrcReg == DstReg) { + if (CP.getSrcReg() == CP.getDstReg()) { DEBUG(dbgs() << "\tCopy already coalesced.\n"); return false; // Not coalescable. } - bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); - bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); - - // If they are both physical registers, we cannot join them. - if (SrcIsPhys && DstIsPhys) { - DEBUG(dbgs() << "\tCan not coalesce physregs.\n"); - return false; // Not coalescable. - } - - // We only join virtual registers with allocatable physical registers. - if (SrcIsPhys && !allocatableRegs_[SrcReg]) { - DEBUG(dbgs() << "\tSrc reg is unallocatable physreg.\n"); - return false; // Not coalescable. - } - if (DstIsPhys && !allocatableRegs_[DstReg]) { - DEBUG(dbgs() << "\tDst reg is unallocatable physreg.\n"); - return false; // Not coalescable. - } - - // We cannot handle dual subreg indices and mismatched classes at the same - // time. - if (SrcSubIdx && DstSubIdx && differingRegisterClasses(SrcReg, DstReg)) { - DEBUG(dbgs() << "\tCannot handle subreg indices and mismatched classes.\n"); - return false; - } + DEBUG(dbgs() << "\tConsidering merging %reg" << CP.getSrcReg()); - // Check that a physical source register is compatible with dst regclass - if (SrcIsPhys) { - unsigned SrcSubReg = SrcSubIdx ? - tri_->getSubReg(SrcReg, SrcSubIdx) : SrcReg; - const TargetRegisterClass *DstRC = mri_->getRegClass(DstReg); - const TargetRegisterClass *DstSubRC = DstRC; - if (DstSubIdx) - DstSubRC = DstRC->getSubRegisterRegClass(DstSubIdx); - assert(DstSubRC && "Illegal subregister index"); - if (!DstSubRC->contains(SrcSubReg)) { - DEBUG(dbgs() << "\tIncompatible destination regclass: " - << "none of the super-registers of " - << tri_->getName(SrcSubReg) << " are in " - << DstSubRC->getName() << ".\n"); - return false; // Not coalescable. - } - } - - // Check that a physical dst register is compatible with source regclass - if (DstIsPhys) { - unsigned DstSubReg = DstSubIdx ? - tri_->getSubReg(DstReg, DstSubIdx) : DstReg; - const TargetRegisterClass *SrcRC = mri_->getRegClass(SrcReg); - const TargetRegisterClass *SrcSubRC = SrcRC; - if (SrcSubIdx) - SrcSubRC = SrcRC->getSubRegisterRegClass(SrcSubIdx); - assert(SrcSubRC && "Illegal subregister index"); - if (!SrcSubRC->contains(DstSubReg)) { - DEBUG(dbgs() << "\tIncompatible source regclass: " - << "none of the super-registers of " - << tri_->getName(DstSubReg) << " are in " - << SrcSubRC->getName() << ".\n"); - (void)DstSubReg; - return false; // Not coalescable. + // Enforce policies. + if (CP.isPhys()) { + DEBUG(dbgs() <<" with physreg %" << tri_->getName(CP.getDstReg()) << "\n"); + // Only coalesce to allocatable physreg. + if (!allocatableRegs_[CP.getDstReg()]) { + DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); + return false; // Not coalescable. } - } - - // Should be non-null only when coalescing to a sub-register class. - bool CrossRC = false; - const TargetRegisterClass *SrcRC= SrcIsPhys ? 0 : mri_->getRegClass(SrcReg); - const TargetRegisterClass *DstRC= DstIsPhys ? 0 : mri_->getRegClass(DstReg); - const TargetRegisterClass *NewRC = NULL; - unsigned RealDstReg = 0; - unsigned RealSrcReg = 0; - if (isExtSubReg || isInsSubReg || isSubRegToReg) { - SubIdx = CopyMI->getOperand(isExtSubReg ? 2 : 3).getImm(); - if (SrcIsPhys && isExtSubReg) { - // r1024 = EXTRACT_SUBREG EAX, 0 then r1024 is really going to be - // coalesced with AX. - unsigned DstSubIdx = CopyMI->getOperand(0).getSubReg(); - if (DstSubIdx) { - // r1024<2> = EXTRACT_SUBREG EAX, 2. Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - if (DstSubIdx != SubIdx) { - DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); - return false; // Not coalescable. - } - } else - SrcReg = tri_->getSubReg(SrcReg, SubIdx); - SubIdx = 0; - } else if (DstIsPhys && (isInsSubReg || isSubRegToReg)) { - // EAX = INSERT_SUBREG EAX, r1024, 0 - unsigned SrcSubIdx = CopyMI->getOperand(2).getSubReg(); - if (SrcSubIdx) { - // EAX = INSERT_SUBREG EAX, r1024<2>, 2 Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - if (SrcSubIdx != SubIdx) { - DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); - return false; // Not coalescable. - } - } else - DstReg = tri_->getSubReg(DstReg, SubIdx); - SubIdx = 0; - } else if ((DstIsPhys && isExtSubReg) || - (SrcIsPhys && (isInsSubReg || isSubRegToReg))) { - if (!isSubRegToReg && CopyMI->getOperand(1).getSubReg()) { - DEBUG(dbgs() << "\tSrc of extract_subreg already coalesced with reg" - << " of a super-class.\n"); - return false; // Not coalescable. - } - - // FIXME: The following checks are somewhat conservative. Perhaps a better - // way to implement this is to treat this as coalescing a vr with the - // super physical register. - if (isExtSubReg) { - if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg)) - return false; // Not coalescable - } else { - if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) - return false; // Not coalescable - } - SubIdx = 0; - } else { - unsigned OldSubIdx = isExtSubReg ? CopyMI->getOperand(0).getSubReg() - : CopyMI->getOperand(2).getSubReg(); - if (OldSubIdx) { - if (OldSubIdx == SubIdx && !differingRegisterClasses(SrcReg, DstReg)) - // r1024<2> = EXTRACT_SUBREG r1025, 2. Then r1024 has already been - // coalesced to a larger register so the subreg indices cancel out. - // Also check if the other larger register is of the same register - // class as the would be resulting register. - SubIdx = 0; - else { - DEBUG(dbgs() << "\t Sub-register indices mismatch.\n"); - return false; // Not coalescable. - } - } - if (SubIdx) { - if (!DstIsPhys && !SrcIsPhys) { - if (isInsSubReg || isSubRegToReg) { - NewRC = tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx); - } else // extract_subreg { - NewRC = tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx); - } - if (!NewRC) { - DEBUG(dbgs() << "\t Conflicting sub-register indices.\n"); - return false; // Not coalescable - } + } else { + DEBUG({ + dbgs() << " with reg%" << CP.getDstReg(); + if (CP.getSubIdx()) + dbgs() << ":" << tri_->getSubRegIndexName(CP.getSubIdx()); + dbgs() << " to " << CP.getNewRC()->getName() << "\n"; + }); - if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { - DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " - << SrcRC->getName() << "/" - << DstRC->getName() << " -> " - << NewRC->getName() << ".\n"); - Again = true; // May be possible to coalesce later. - return false; - } - } - } - } else if (differingRegisterClasses(SrcReg, DstReg)) { - if (DisableCrossClassJoin) - return false; - CrossRC = true; - - // FIXME: What if the result of a EXTRACT_SUBREG is then coalesced - // with another? If it's the resulting destination register, then - // the subidx must be propagated to uses (but only those defined - // by the EXTRACT_SUBREG). If it's being coalesced into another - // register, it should be safe because register is assumed to have - // the register class of the super-register. - - // Process moves where one of the registers have a sub-register index. - MachineOperand *DstMO = CopyMI->findRegisterDefOperand(DstReg); - MachineOperand *SrcMO = CopyMI->findRegisterUseOperand(SrcReg); - SubIdx = DstMO->getSubReg(); - if (SubIdx) { - if (SrcMO->getSubReg()) - // FIXME: can we handle this? + // Avoid constraining virtual register regclass too much. + if (CP.isCrossClass()) { + if (DisableCrossClassJoin) { + DEBUG(dbgs() << "\tCross-class joins disabled.\n"); return false; - // This is not an insert_subreg but it looks like one. - // e.g. %reg1024:4 = MOV32rr %EAX - isInsSubReg = true; - if (SrcIsPhys) { - if (!CanJoinInsertSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealSrcReg)) - return false; // Not coalescable - SubIdx = 0; - } - } else { - SubIdx = SrcMO->getSubReg(); - if (SubIdx) { - // This is not a extract_subreg but it looks like one. - // e.g. %cl = MOV16rr %reg1024:1 - isExtSubReg = true; - if (DstIsPhys) { - if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx,RealDstReg)) - return false; // Not coalescable - SubIdx = 0; - } - } - } - - // Now determine the register class of the joined register. - if (!SrcIsPhys && !DstIsPhys) { - if (isExtSubReg) { - NewRC = - SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC; - } else if (isInsSubReg) { - NewRC = - SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC; - } else { - NewRC = getCommonSubClass(SrcRC, DstRC); - } - - if (!NewRC) { - DEBUG(dbgs() << "\tDisjoint regclasses: " - << SrcRC->getName() << ", " - << DstRC->getName() << ".\n"); - return false; // Not coalescable. } - - // If we are joining two virtual registers and the resulting register - // class is more restrictive (fewer register, smaller size). Check if it's - // worth doing the merge. - if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) { + if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), + mri_->getRegClass(CP.getSrcReg()), + mri_->getRegClass(CP.getDstReg()), + CP.getNewRC())) { DEBUG(dbgs() << "\tAvoid coalescing to constrained register class: " - << SrcRC->getName() << "/" - << DstRC->getName() << " -> " - << NewRC->getName() << ".\n"); - // Allow the coalescer to try again in case either side gets coalesced to - // a physical register that's compatible with the other side. e.g. - // r1024 = MOV32to32_ r1025 - // But later r1024 is assigned EAX then r1025 may be coalesced with EAX. + << CP.getNewRC()->getName() << ".\n"); Again = true; // May be possible to coalesce later. return false; } } - } - - // Will it create illegal extract_subreg / insert_subreg? - if (SrcIsPhys && HasIncompatibleSubRegDefUse(CopyMI, DstReg, SrcReg)) - return false; - if (DstIsPhys && HasIncompatibleSubRegDefUse(CopyMI, SrcReg, DstReg)) - return false; - - LiveInterval &SrcInt = li_->getInterval(SrcReg); - LiveInterval &DstInt = li_->getInterval(DstReg); - assert(SrcInt.reg == SrcReg && DstInt.reg == DstReg && - "Register mapping is horribly broken!"); - DEBUG({ - dbgs() << "\t\tInspecting "; - if (SrcRC) dbgs() << SrcRC->getName() << ": "; - SrcInt.print(dbgs(), tri_); - dbgs() << "\n\t\t and "; - if (DstRC) dbgs() << DstRC->getName() << ": "; - DstInt.print(dbgs(), tri_); - dbgs() << "\n"; - }); + // When possible, let DstReg be the larger interval. + if (!CP.getSubIdx() && li_->getInterval(CP.getSrcReg()).ranges.size() > + li_->getInterval(CP.getDstReg()).ranges.size()) + CP.flip(); + } + + // We need to be careful about coalescing a source physical register with a + // virtual register. Once the coalescing is done, it cannot be broken and + // these are not spillable! If the destination interval uses are far away, + // think twice about coalescing them! + // FIXME: Why are we skipping this test for partial copies? + // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. + if (!CP.isPartial() && CP.isPhys()) { + LiveInterval &JoinVInt = li_->getInterval(CP.getSrcReg()); + + // Don't join with physregs that have a ridiculous number of live + // ranges. The data structure performance is really bad when that + // happens. + if (li_->hasInterval(CP.getDstReg()) && + li_->getInterval(CP.getDstReg()).ranges.size() > 1000) { + mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); + ++numAborts; + DEBUG(dbgs() + << "\tPhysical register live interval too complicated, abort!\n"); + return false; + } - // Save a copy of the virtual register live interval. We'll manually - // merge this into the "real" physical register live interval this is - // coalesced with. - OwningPtr SavedLI; - if (RealDstReg) - SavedLI.reset(li_->dupInterval(&SrcInt)); - else if (RealSrcReg) - SavedLI.reset(li_->dupInterval(&DstInt)); - - if (!isExtSubReg && !isInsSubReg && !isSubRegToReg) { - // Check if it is necessary to propagate "isDead" property. - MachineOperand *mopd = CopyMI->findRegisterDefOperand(DstReg, false); - bool isDead = mopd->isDead(); - - // We need to be careful about coalescing a source physical register with a - // virtual register. Once the coalescing is done, it cannot be broken and - // these are not spillable! If the destination interval uses are far away, - // think twice about coalescing them! - if (!isDead && (SrcIsPhys || DstIsPhys)) { - // If the virtual register live interval is long but it has low use - // density, do not join them, instead mark the physical register as its - // allocation preference. - LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt; - LiveInterval &JoinPInt = SrcIsPhys ? SrcInt : DstInt; - unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg; - unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg; - - // Don't join with physregs that have a ridiculous number of live - // ranges. The data structure performance is really bad when that - // happens. - if (JoinPInt.ranges.size() > 1000) { - mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); - ++numAborts; - DEBUG(dbgs() - << "\tPhysical register live interval too complicated, abort!\n"); - return false; - } + const TargetRegisterClass *RC = mri_->getRegClass(CP.getSrcReg()); + unsigned Threshold = allocatableRCRegs_[RC].count() * 2; + unsigned Length = li_->getApproximateInstructionCount(JoinVInt); + if (Length > Threshold && + std::distance(mri_->use_nodbg_begin(CP.getSrcReg()), + mri_->use_nodbg_end()) * Threshold < Length) { + // Before giving up coalescing, if definition of source is defined by + // trivial computation, try rematerializing it. + if (!CP.isFlipped() && + ReMaterializeTrivialDef(JoinVInt, CP.getDstReg(), 0, CopyMI)) + return true; - const TargetRegisterClass *RC = mri_->getRegClass(JoinVReg); - unsigned Threshold = allocatableRCRegs_[RC].count() * 2; - unsigned Length = li_->getApproximateInstructionCount(JoinVInt); - if (Length > Threshold && - std::distance(mri_->use_nodbg_begin(JoinVReg), - mri_->use_nodbg_end()) * Threshold < Length) { - // Before giving up coalescing, if definition of source is defined by - // trivial computation, try rematerializing it. - if (ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) - return true; - - mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); - ++numAborts; - DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); - Again = true; // May be possible to coalesce later. - return false; - } + mri_->setRegAllocationHint(CP.getSrcReg(), 0, CP.getDstReg()); + ++numAborts; + DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); + Again = true; // May be possible to coalesce later. + return false; } } @@ -1724,32 +1168,24 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Otherwise, if one of the intervals being joined is a physreg, this method // always canonicalizes DstInt to be it. The output "SrcInt" will not have // been modified, so we can use this information below to update aliases. - bool Swapped = false; - // If SrcInt is implicitly defined, it's safe to coalesce. - if (SrcInt.empty()) { - if (!CanCoalesceWithImpDef(CopyMI, DstInt, SrcInt)) { - // Only coalesce an empty interval (defined by implicit_def) with - // another interval which has a valno defined by the CopyMI and the CopyMI - // is a kill of the implicit def. - DEBUG(dbgs() << "\tNot profitable!\n"); - return false; - } - } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) { + if (!JoinIntervals(CP)) { // Coalescing failed. // If definition of source is defined by trivial computation, try // rematerializing it. - if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && - ReMaterializeTrivialDef(SrcInt, DstReg, DstSubIdx, CopyMI)) + if (!CP.isFlipped() && + ReMaterializeTrivialDef(li_->getInterval(CP.getSrcReg()), + CP.getDstReg(), 0, CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. - if (!isExtSubReg && !isInsSubReg && !isSubRegToReg && - (AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) || - RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) { - JoinedCopies.insert(CopyMI); - DEBUG(dbgs() << "\tTrivial!\n"); - return true; + if (!CP.isPartial()) { + if (AdjustCopiesBackFrom(CP, CopyMI) || + RemoveCopyByCommutingDef(CP, CopyMI)) { + JoinedCopies.insert(CopyMI); + DEBUG(dbgs() << "\tTrivial!\n"); + return true; + } } // Otherwise, we are unable to join the intervals. @@ -1758,86 +1194,32 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; } - LiveInterval *ResSrcInt = &SrcInt; - LiveInterval *ResDstInt = &DstInt; - if (Swapped) { - std::swap(SrcReg, DstReg); - std::swap(ResSrcInt, ResDstInt); - } - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && - "LiveInterval::join didn't work right!"); - - // If we're about to merge live ranges into a physical register live interval, - // we have to update any aliased register's live ranges to indicate that they - // have clobbered values for this range. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - // If this is a extract_subreg where dst is a physical register, e.g. - // cl = EXTRACT_SUBREG reg1024, 1 - // then create and update the actual physical register allocated to RHS. - if (RealDstReg || RealSrcReg) { - LiveInterval &RealInt = - li_->getOrCreateInterval(RealDstReg ? RealDstReg : RealSrcReg); - for (LiveInterval::const_vni_iterator I = SavedLI->vni_begin(), - E = SavedLI->vni_end(); I != E; ++I) { - const VNInfo *ValNo = *I; - VNInfo *NewValNo = RealInt.getNextValue(ValNo->def, ValNo->getCopy(), - false, // updated at * - li_->getVNInfoAllocator()); - NewValNo->setFlags(ValNo->getFlags()); // * updated here. - RealInt.addKills(NewValNo, ValNo->kills); - RealInt.MergeValueInAsValue(*SavedLI, ValNo, NewValNo); - } - RealInt.weight += SavedLI->weight; - DstReg = RealDstReg ? RealDstReg : RealSrcReg; - } - - // Update the liveintervals of sub-registers. - for (const unsigned *AS = tri_->getSubRegisters(DstReg); *AS; ++AS) - li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, *ResSrcInt, - li_->getVNInfoAllocator()); - } - - // If this is a EXTRACT_SUBREG, make sure the result of coalescing is the - // larger super-register. - if ((isExtSubReg || isInsSubReg || isSubRegToReg) && - !SrcIsPhys && !DstIsPhys) { - if ((isExtSubReg && !Swapped) || - ((isInsSubReg || isSubRegToReg) && Swapped)) { - ResSrcInt->Copy(*ResDstInt, mri_, li_->getVNInfoAllocator()); - std::swap(SrcReg, DstReg); - std::swap(ResSrcInt, ResDstInt); - } - } - // Coalescing to a virtual register that is of a sub-register class of the // other. Make sure the resulting register is set to the right register class. - if (CrossRC) + if (CP.isCrossClass()) { ++numCrossRCs; - - // This may happen even if it's cross-rc coalescing. e.g. - // %reg1026 = SUBREG_TO_REG 0, %reg1037, 4 - // reg1026 -> GR64, reg1037 -> GR32_ABCD. The resulting register will have to - // be allocate a register from GR64_ABCD. - if (NewRC) - mri_->setRegClass(DstReg, NewRC); + mri_->setRegClass(CP.getDstReg(), CP.getNewRC()); + } // Remember to delete the copy instruction. JoinedCopies.insert(CopyMI); - UpdateRegDefsUses(SrcReg, DstReg, SubIdx); + UpdateRegDefsUses(CP); // If we have extended the live range of a physical register, make sure we // update live-in lists as well. - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { - const LiveInterval &VRegInterval = li_->getInterval(SrcReg); + if (CP.isPhys()) { SmallVector BlockSeq; - for (LiveInterval::const_iterator I = VRegInterval.begin(), - E = VRegInterval.end(); I != E; ++I ) { + // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the + // ranges for this, and they are preserved. + LiveInterval &SrcInt = li_->getInterval(CP.getSrcReg()); + for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); + I != E; ++I ) { li_->findLiveInMBBs(I->start, I->end, BlockSeq); for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { MachineBasicBlock &block = *BlockSeq[idx]; - if (!block.isLiveIn(DstReg)) - block.addLiveIn(DstReg); + if (!block.isLiveIn(CP.getDstReg())) + block.addLiveIn(CP.getDstReg()); } BlockSeq.clear(); } @@ -1845,32 +1227,17 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // SrcReg is guarateed to be the register whose live interval that is // being merged. - li_->removeInterval(SrcReg); + li_->removeInterval(CP.getSrcReg()); // Update regalloc hint. - tri_->UpdateRegAllocHint(SrcReg, DstReg, *mf_); - - // Manually deleted the live interval copy. - if (SavedLI) { - SavedLI->clear(); - SavedLI.reset(); - } - - // If resulting interval has a preference that no longer fits because of subreg - // coalescing, just clear the preference. - unsigned Preference = getRegAllocPreference(ResDstInt->reg, *mf_, mri_, tri_); - if (Preference && (isExtSubReg || isInsSubReg || isSubRegToReg) && - TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) { - const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg); - if (!RC->contains(Preference)) - mri_->setRegAllocationHint(ResDstInt->reg, 0, 0); - } + tri_->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *mf_); DEBUG({ - dbgs() << "\t\tJoined. Result = "; - ResDstInt->print(dbgs(), tri_); - dbgs() << "\n"; - }); + LiveInterval &DstInt = li_->getInterval(CP.getDstReg()); + dbgs() << "\tJoined. Result = "; + DstInt.print(dbgs(), tri_); + dbgs() << "\n"; + }); ++numJoins; return true; @@ -1927,263 +1294,53 @@ static unsigned ComputeUltimateVN(VNInfo *VNI, return ThisValNoAssignments[VN] = UltimateVN; } -static bool InVector(VNInfo *Val, const SmallVector &V) { - return std::find(V.begin(), V.end(), Val) != V.end(); -} - -static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) - ; - else if (MI->isExtractSubreg()) { - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); - } else if (MI->isSubregToReg() || - MI->isInsertSubreg()) { - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(2).getReg(); - } else - return false; - return (SrcReg == SR || TRI->isSuperRegister(SR, SrcReg)) && - (DstReg == DR || TRI->isSuperRegister(DR, DstReg)); -} - -/// RangeIsDefinedByCopyFromReg - Return true if the specified live range of -/// the specified live interval is defined by a copy from the specified -/// register. -bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, - LiveRange *LR, - unsigned Reg) { - unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno); - if (SrcReg == Reg) - return true; - // FIXME: Do isPHIDef and isDefAccurate both need to be tested? - if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) && - TargetRegisterInfo::isPhysicalRegister(li.reg) && - *tri_->getSuperRegisters(li.reg)) { - // It's a sub-register live interval, we may not have precise information. - // Re-compute it. - MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start); - if (DefMI && isValNoDefMove(DefMI, li.reg, Reg, tii_, tri_)) { - // Cache computed info. - LR->valno->def = LR->start; - LR->valno->setCopy(DefMI); - return true; - } - } - return false; -} - - -/// ValueLiveAt - Return true if the LiveRange pointed to by the given -/// iterator, or any subsequent range with the same value number, -/// is live at the given point. -bool SimpleRegisterCoalescing::ValueLiveAt(LiveInterval::iterator LRItr, - LiveInterval::iterator LREnd, - SlotIndex defPoint) const { - for (const VNInfo *valno = LRItr->valno; - (LRItr != LREnd) && (LRItr->valno == valno); ++LRItr) { - if (LRItr->contains(defPoint)) - return true; - } - - return false; -} - - -/// SimpleJoin - Attempt to joint the specified interval into this one. The -/// caller of this method must guarantee that the RHS only contains a single -/// value number and that the RHS is not defined by a copy from this -/// interval. This returns false if the intervals are not joinable, or it -/// joins them and returns true. -bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ - assert(RHS.containsOneValue()); - - // Some number (potentially more than one) value numbers in the current - // interval may be defined as copies from the RHS. Scan the overlapping - // portions of the LHS and RHS, keeping track of this and looking for - // overlapping live ranges that are NOT defined as copies. If these exist, we - // cannot coalesce. - - LiveInterval::iterator LHSIt = LHS.begin(), LHSEnd = LHS.end(); - LiveInterval::iterator RHSIt = RHS.begin(), RHSEnd = RHS.end(); - - if (LHSIt->start < RHSIt->start) { - LHSIt = std::upper_bound(LHSIt, LHSEnd, RHSIt->start); - if (LHSIt != LHS.begin()) --LHSIt; - } else if (RHSIt->start < LHSIt->start) { - RHSIt = std::upper_bound(RHSIt, RHSEnd, LHSIt->start); - if (RHSIt != RHS.begin()) --RHSIt; - } - - SmallVector EliminatedLHSVals; - - while (1) { - // Determine if these live intervals overlap. - bool Overlaps = false; - if (LHSIt->start <= RHSIt->start) - Overlaps = LHSIt->end > RHSIt->start; - else - Overlaps = RHSIt->end > LHSIt->start; - - // If the live intervals overlap, there are two interesting cases: if the - // LHS interval is defined by a copy from the RHS, it's ok and we record - // that the LHS value # is the same as the RHS. If it's not, then we cannot - // coalesce these live ranges and we bail out. - if (Overlaps) { - // If we haven't already recorded that this value # is safe, check it. - if (!InVector(LHSIt->valno, EliminatedLHSVals)) { - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (LHSIt->valno->hasRedefByEC()) - return false; - // Copy from the RHS? - if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) - return false; // Nope, bail out. - - if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def)) - // Here is an interesting situation: - // BB1: - // vr1025 = copy vr1024 - // .. - // BB2: - // vr1024 = op - // = vr1025 - // Even though vr1025 is copied from vr1024, it's not safe to - // coalesce them since the live range of vr1025 intersects the - // def of vr1024. This happens because vr1025 is assigned the - // value of the previous iteration of vr1024. +/// JoinIntervals - Attempt to join these two intervals. On failure, this +/// returns false. +bool SimpleRegisterCoalescing::JoinIntervals(CoalescerPair &CP) { + LiveInterval &RHS = li_->getInterval(CP.getSrcReg()); + DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), tri_); dbgs() << "\n"; }); + + // If a live interval is a physical register, check for interference with any + // aliases. The interference check implemented here is a bit more conservative + // than the full interfeence check below. We allow overlapping live ranges + // only when one is a copy of the other. + if (CP.isPhys()) { + for (const unsigned *AS = tri_->getAliasSet(CP.getDstReg()); *AS; ++AS){ + if (!li_->hasInterval(*AS)) + continue; + const LiveInterval &LHS = li_->getInterval(*AS); + LiveInterval::const_iterator LI = LHS.begin(); + for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); + RI != RE; ++RI) { + LI = std::lower_bound(LI, LHS.end(), RI->start); + // Does LHS have an overlapping live range starting before RI? + if ((LI != LHS.begin() && LI[-1].end > RI->start) && + (RI->start != RI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(RI->start)))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; + }); return false; - EliminatedLHSVals.push_back(LHSIt->valno); - } - - // We know this entire LHS live range is okay, so skip it now. - if (++LHSIt == LHSEnd) break; - continue; - } + } - if (LHSIt->end < RHSIt->end) { - if (++LHSIt == LHSEnd) break; - } else { - // One interesting case to check here. It's possible that we have - // something like "X3 = Y" which defines a new value number in the LHS, - // and is the last use of this liverange of the RHS. In this case, we - // want to notice this copy (so that it gets coalesced away) even though - // the live ranges don't actually overlap. - if (LHSIt->start == RHSIt->end) { - if (InVector(LHSIt->valno, EliminatedLHSVals)) { - // We already know that this value number is going to be merged in - // if coalescing succeeds. Just skip the liverange. - if (++LHSIt == LHSEnd) break; - } else { - // If it's re-defined by an early clobber somewhere in the live range, - // then conservatively abort coalescing. - if (LHSIt->valno->hasRedefByEC()) + // Check that LHS ranges beginning in this range are copies. + for (; LI != LHS.end() && LI->start < RI->end; ++LI) { + if (LI->start != LI->valno->def || + !CP.isCoalescable(li_->getInstructionFromIndex(LI->start))) { + DEBUG({ + dbgs() << "\t\tInterference from alias: "; + LHS.print(dbgs(), tri_); + dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; + }); return false; - // Otherwise, if this is a copy from the RHS, mark it as being merged - // in. - if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) { - if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def)) - // Here is an interesting situation: - // BB1: - // vr1025 = copy vr1024 - // .. - // BB2: - // vr1024 = op - // = vr1025 - // Even though vr1025 is copied from vr1024, it's not safe to - // coalesced them since live range of vr1025 intersects the - // def of vr1024. This happens because vr1025 is assigned the - // value of the previous iteration of vr1024. - return false; - EliminatedLHSVals.push_back(LHSIt->valno); - - // We know this entire LHS live range is okay, so skip it now. - if (++LHSIt == LHSEnd) break; } } } - - if (++RHSIt == RHSEnd) break; - } - } - - // If we got here, we know that the coalescing will be successful and that - // the value numbers in EliminatedLHSVals will all be merged together. Since - // the most common case is that EliminatedLHSVals has a single number, we - // optimize for it: if there is more than one value, we merge them all into - // the lowest numbered one, then handle the interval as if we were merging - // with one value number. - VNInfo *LHSValNo = NULL; - if (EliminatedLHSVals.size() > 1) { - // Loop through all the equal value numbers merging them into the smallest - // one. - VNInfo *Smallest = EliminatedLHSVals[0]; - for (unsigned i = 1, e = EliminatedLHSVals.size(); i != e; ++i) { - if (EliminatedLHSVals[i]->id < Smallest->id) { - // Merge the current notion of the smallest into the smaller one. - LHS.MergeValueNumberInto(Smallest, EliminatedLHSVals[i]); - Smallest = EliminatedLHSVals[i]; - } else { - // Merge into the smallest. - LHS.MergeValueNumberInto(EliminatedLHSVals[i], Smallest); - } } - LHSValNo = Smallest; - } else if (EliminatedLHSVals.empty()) { - if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && - *tri_->getSuperRegisters(LHS.reg)) - // Imprecise sub-register information. Can't handle it. - return false; - llvm_unreachable("No copies from the RHS?"); - } else { - LHSValNo = EliminatedLHSVals[0]; - } - - // Okay, now that there is a single LHS value number that we're merging the - // RHS into, update the value number info for the LHS to indicate that the - // value number is defined where the RHS value number was. - const VNInfo *VNI = RHS.getValNumInfo(0); - LHSValNo->def = VNI->def; - LHSValNo->setCopy(VNI->getCopy()); - - // Okay, the final step is to loop over the RHS live intervals, adding them to - // the LHS. - if (VNI->hasPHIKill()) - LHSValNo->setHasPHIKill(true); - LHS.addKills(LHSValNo, VNI->kills); - LHS.MergeRangesInAsValue(RHS, LHSValNo); - - LHS.ComputeJoinedWeight(RHS); - - // Update regalloc hint if both are virtual registers. - if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && - TargetRegisterInfo::isVirtualRegister(RHS.reg)) { - std::pair RHSPref = mri_->getRegAllocationHint(RHS.reg); - std::pair LHSPref = mri_->getRegAllocationHint(LHS.reg); - if (RHSPref != LHSPref) - mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second); } - // Update the liveintervals of sub-registers. - if (TargetRegisterInfo::isPhysicalRegister(LHS.reg)) - for (const unsigned *AS = tri_->getSubRegisters(LHS.reg); *AS; ++AS) - li_->getOrCreateInterval(*AS).MergeInClobberRanges(*li_, LHS, - li_->getVNInfoAllocator()); - - return true; -} - -/// JoinIntervals - Attempt to join these two intervals. On failure, this -/// returns false. Otherwise, if one of the intervals being joined is a -/// physreg, this method always canonicalizes LHS to be it. The output -/// "RHS" will not have been modified, so we can use this information -/// below to update aliases. -bool -SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, - bool &Swapped) { // Compute the final value assignment, assuming that the live ranges can be // coalesced. SmallVector LHSValNoAssignments; @@ -2192,203 +1349,87 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, DenseMap RHSValsDefinedFromLHS; SmallVector NewVNInfo; - // If a live interval is a physical register, conservatively check if any - // of its sub-registers is overlapping the live interval of the virtual - // register. If so, do not coalesce. - if (TargetRegisterInfo::isPhysicalRegister(LHS.reg) && - *tri_->getSubRegisters(LHS.reg)) { - // If it's coalescing a virtual register to a physical register, estimate - // its live interval length. This is the *cost* of scanning an entire live - // interval. If the cost is low, we'll do an exhaustive check instead. - - // If this is something like this: - // BB1: - // v1024 = op - // ... - // BB2: - // ... - // RAX = v1024 - // - // That is, the live interval of v1024 crosses a bb. Then we can't rely on - // less conservative check. It's possible a sub-register is defined before - // v1024 (or live in) and live out of BB1. - if (RHS.containsOneValue() && - li_->intervalIsInOneMBB(RHS) && - li_->getApproximateInstructionCount(RHS) <= 10) { - // Perform a more exhaustive check for some common cases. - if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies)) - return false; - } else { - for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; - } - } - } else if (TargetRegisterInfo::isPhysicalRegister(RHS.reg) && - *tri_->getSubRegisters(RHS.reg)) { - if (LHS.containsOneValue() && - li_->getApproximateInstructionCount(LHS) <= 10) { - // Perform a more exhaustive check for some common cases. - if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies)) - return false; - } else { - for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR) - if (li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) { - DEBUG({ - dbgs() << "\tInterfere with sub-register "; - li_->getInterval(*SR).print(dbgs(), tri_); - }); - return false; - } - } - } + LiveInterval &LHS = li_->getOrCreateInterval(CP.getDstReg()); + DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), tri_); dbgs() << "\n"; }); - // Compute ultimate value numbers for the LHS and RHS values. - if (RHS.containsOneValue()) { - // Copies from a liveinterval with a single value are simple to handle and - // very common, handle the special case here. This is important, because - // often RHS is small and LHS is large (e.g. a physreg). - - // Find out if the RHS is defined as a copy from some value in the LHS. - int RHSVal0DefinedFromLHS = -1; - int RHSValID = -1; - VNInfo *RHSValNoInfo = NULL; - VNInfo *RHSValNoInfo0 = RHS.getValNumInfo(0); - unsigned RHSSrcReg = li_->getVNInfoSourceReg(RHSValNoInfo0); - if (RHSSrcReg == 0 || RHSSrcReg != LHS.reg) { - // If RHS is not defined as a copy from the LHS, we can use simpler and - // faster checks to see if the live ranges are coalescable. This joiner - // can't swap the LHS/RHS intervals though. - if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { - return SimpleJoin(LHS, RHS); - } else { - RHSValNoInfo = RHSValNoInfo0; - } - } else { - // It was defined as a copy from the LHS, find out what value # it is. - RHSValNoInfo = - LHS.getLiveRangeContaining(RHSValNoInfo0->def.getPrevSlot())->valno; - RHSValID = RHSValNoInfo->id; - RHSVal0DefinedFromLHS = RHSValID; - } + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + continue; - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.resize(LHS.getNumValNums(), NULL); - - // Okay, *all* of the values in LHS that are defined as a copy from RHS - // should now get updated. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (unsigned LHSSrcReg = li_->getVNInfoSourceReg(VNI)) { - if (LHSSrcReg != RHS.reg) { - // If this is not a copy from the RHS, its value number will be - // unmodified by the coalescing. - NewVNInfo[VN] = VNI; - LHSValNoAssignments[VN] = VN; - } else if (RHSValID == -1) { - // Otherwise, it is a copy from the RHS, and we don't already have a - // value# for it. Keep the current value number, but remember it. - LHSValNoAssignments[VN] = RHSValID = VN; - NewVNInfo[VN] = RHSValNoInfo; - LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; - } else { - // Otherwise, use the specified value #. - LHSValNoAssignments[VN] = RHSValID; - if (VN == (unsigned)RHSValID) { // Else this val# is dead. - NewVNInfo[VN] = RHSValNoInfo; - LHSValsDefinedFromRHS[VNI] = RHSValNoInfo0; - } - } - } else { - NewVNInfo[VN] = VNI; - LHSValNoAssignments[VN] = VN; - } - } + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; - assert(RHSValID != -1 && "Didn't find value #?"); - RHSValNoAssignments[0] = RHSValID; - if (RHSVal0DefinedFromLHS != -1) { - // This path doesn't go through ComputeUltimateVN so just set - // it to anything. - RHSValsDefinedFromLHS[RHSValNoInfo0] = (VNInfo*)1; - } - } else { - // Loop over the value numbers of the LHS, seeing if any are defined from - // the RHS. - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? - continue; + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + if (!CP.isCoalescable(VNI->getCopy())) + continue; - // DstReg is known to be a register in the LHS interval. If the src is - // from the RHS interval, we can use its value #. - if (li_->getVNInfoSourceReg(VNI) != RHS.reg) - continue; + // Figure out the value # from the RHS. + LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + LHSValsDefinedFromRHS[VNI] = lr->valno; + } - // Figure out the value # from the RHS. - LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - assert(lr && "Cannot find live range"); - LHSValsDefinedFromRHS[VNI] = lr->valno; - } + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? + continue; - // Loop over the value numbers of the RHS, seeing if any are defined from - // the LHS. - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - if (VNI->isUnused() || VNI->getCopy() == 0) // Src not defined by a copy? - continue; + // Never join with a register that has EarlyClobber redefs. + if (VNI->hasRedefByEC()) + return false; - // DstReg is known to be a register in the RHS interval. If the src is - // from the LHS interval, we can use its value #. - if (li_->getVNInfoSourceReg(VNI) != LHS.reg) - continue; + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + if (!CP.isCoalescable(VNI->getCopy())) + continue; - // Figure out the value # from the LHS. - LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); - assert(lr && "Cannot find live range"); - RHSValsDefinedFromLHS[VNI] = lr->valno; - } + // Figure out the value # from the LHS. + LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); + // The copy could be to an aliased physreg. + if (!lr) continue; + RHSValsDefinedFromLHS[VNI] = lr->valno; + } - LHSValNoAssignments.resize(LHS.getNumValNums(), -1); - RHSValNoAssignments.resize(RHS.getNumValNums(), -1); - NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); - for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - ComputeUltimateVN(VNI, NewVNInfo, - LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, - LHSValNoAssignments, RHSValNoAssignments); + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; } - for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); - i != e; ++i) { - VNInfo *VNI = *i; - unsigned VN = VNI->id; - if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) - continue; - // If this value number isn't a copy from the LHS, it's a new number. - if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { - NewVNInfo.push_back(VNI); - RHSValNoAssignments[VN] = NewVNInfo.size()-1; - continue; - } - ComputeUltimateVN(VNI, NewVNInfo, - RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, - RHSValNoAssignments, LHSValNoAssignments); - } + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); } // Armed with the mappings of LHS/RHS values to ultimate values, walk the @@ -2399,15 +1440,17 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, LiveInterval::const_iterator JE = RHS.end(); // Skip ahead until the first place of potential sharing. - if (I->start < J->start) { - I = std::upper_bound(I, IE, J->start); - if (I != LHS.begin()) --I; - } else if (J->start < I->start) { - J = std::upper_bound(J, JE, I->start); - if (J != RHS.begin()) --J; + if (I != IE && J != JE) { + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } } - while (1) { + while (I != IE && J != JE) { // Determine if these two live ranges overlap. bool Overlaps; if (I->start < J->start) { @@ -2429,13 +1472,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, return false; } - if (I->end < J->end) { + if (I->end < J->end) ++I; - if (I == IE) break; - } else { + else ++J; - if (J == JE) break; - } } // Update kill info. Some live ranges are extended due to copy coalescing. @@ -2443,10 +1483,8 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = LHSValsDefinedFromRHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned LHSValID = LHSValNoAssignments[VNI->id]; - NewVNInfo[LHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[LHSValID]->setHasPHIKill(true); - RHS.addKills(NewVNInfo[LHSValID], VNI->kills); } // Update kill info. Some live ranges are extended due to copy coalescing. @@ -2454,25 +1492,19 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, E = RHSValsDefinedFromLHS.end(); I != E; ++I) { VNInfo *VNI = I->first; unsigned RHSValID = RHSValNoAssignments[VNI->id]; - NewVNInfo[RHSValID]->removeKill(VNI->def); if (VNI->hasPHIKill()) NewVNInfo[RHSValID]->setHasPHIKill(true); - LHS.addKills(NewVNInfo[RHSValID], VNI->kills); } + if (LHSValNoAssignments.empty()) + LHSValNoAssignments.push_back(-1); + if (RHSValNoAssignments.empty()) + RHSValNoAssignments.push_back(-1); + // If we get here, we know that we can coalesce the live ranges. Ask the // intervals to coalesce themselves now. - if ((RHS.ranges.size() > LHS.ranges.size() && - TargetRegisterInfo::isVirtualRegister(LHS.reg)) || - TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { - RHS.join(LHS, &RHSValNoAssignments[0], &LHSValNoAssignments[0], NewVNInfo, - mri_); - Swapped = true; - } else { - LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, - mri_); - Swapped = false; - } + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + mri_); return true; } @@ -2513,15 +1545,10 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB, // If this isn't a copy nor a extract_subreg, we can't join intervals. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; bool isInsUndef = false; - if (Inst->isExtractSubreg()) { + if (Inst->isCopy()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(1).getReg(); - } else if (Inst->isInsertSubreg()) { - DstReg = Inst->getOperand(0).getReg(); - SrcReg = Inst->getOperand(2).getReg(); - if (Inst->getOperand(1).isUndef()) - isInsUndef = true; - } else if (Inst->isInsertSubreg() || Inst->isSubregToReg()) { + } else if (Inst->isSubregToReg()) { DstReg = Inst->getOperand(0).getReg(); SrcReg = Inst->getOperand(2).getReg(); } else if (!tii_->isMoveInstr(*Inst, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) @@ -2650,6 +1677,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, E = mri_->use_nodbg_end(); I != E; ++I) { MachineOperand &Use = I.getOperand(); MachineInstr *UseMI = Use.getParent(); + if (UseMI->isIdentityCopy()) + continue; unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; if (tii_->isMoveInstr(*UseMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && SrcReg == DstReg && SrcSubIdx == DstSubIdx) @@ -2680,7 +1709,8 @@ SimpleRegisterCoalescing::lastRegisterUse(SlotIndex Start, // Ignore identity copies. unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (!(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && + if (!MI->isIdentityCopy() && + !(tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) for (unsigned i = 0, NumOps = MI->getNumOperands(); i != NumOps; ++i) { MachineOperand &Use = MI->getOperand(i); @@ -2750,10 +1780,9 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // Delete all coalesced copies. bool DoDelete = true; if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - assert((MI->isExtractSubreg() || MI->isInsertSubreg() || - MI->isSubregToReg()) && "Unrecognized copy instruction"); - DstReg = MI->getOperand(0).getReg(); - if (TargetRegisterInfo::isPhysicalRegister(DstReg)) + assert(MI->isCopyLike() && "Unrecognized copy instruction"); + SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) // Do not delete extract_subreg, insert_subreg of physical // registers unless the definition is dead. e.g. // %DO = INSERT_SUBREG %D0, %S0, 1 @@ -2762,7 +1791,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { DoDelete = false; } if (MI->allDefsAreDead()) { - LiveInterval &li = li_->getInterval(DstReg); + LiveInterval &li = li_->getInterval(SrcReg); if (!ShortenDeadCopySrcLiveRange(li, MI)) ShortenDeadCopyLiveRange(li, MI); DoDelete = true; @@ -2812,12 +1841,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // If the move will be an identity move delete it bool isMove= tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx); - if (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx) { + if (MI->isIdentityCopy() || + (isMove && SrcReg == DstReg && SrcSubIdx == DstSubIdx)) { if (li_->hasInterval(SrcReg)) { LiveInterval &RegInt = li_->getInterval(SrcReg); // If def of this move instruction is dead, remove its live range - // from the dstination register's live interval. - if (MI->registerDefIsDead(DstReg)) { + // from the destination register's live interval. + if (MI->allDefsAreDead()) { if (!ShortenDeadCopySrcLiveRange(RegInt, MI)) ShortenDeadCopyLiveRange(RegInt, MI); } @@ -2832,17 +1862,13 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { // Check for now unnecessary kill flags. if (li_->isNotInMIMap(MI)) continue; - SlotIndex UseIdx = li_->getInstructionIndex(MI).getUseIndex(); + SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || !MO.isKill()) continue; unsigned reg = MO.getReg(); if (!reg || !li_->hasInterval(reg)) continue; - LiveInterval &LI = li_->getInterval(reg); - const LiveRange *LR = LI.getLiveRangeContaining(UseIdx); - if (!LR || - (!LR->valno->isKill(UseIdx.getDefIndex()) && - LR->valno->def != UseIdx.getDefIndex())) + if (!li_->getInterval(reg).killedAt(DefIdx)) MO.setIsKill(false); } } diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 1be04f32aa69..e154da60affa 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -105,21 +105,12 @@ namespace llvm { /// possible to coalesce this interval, but it may be possible if other /// things get coalesced, then it returns true by reference in 'Again'. bool JoinCopy(CopyRec &TheCopy, bool &Again); - + /// JoinIntervals - Attempt to join these two intervals. On failure, this - /// returns false. Otherwise, if one of the intervals being joined is a - /// physreg, this method always canonicalizes DestInt to be it. The output - /// "SrcInt" will not have been modified, so we can use this information - /// below to update aliases. - bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped); - - /// SimpleJoin - Attempt to join the specified interval into this one. The - /// caller of this method must guarantee that the RHS only contains a single - /// value number and that the RHS is not defined by a copy from this - /// interval. This returns false if the intervals are not joinable, or it - /// joins them and returns true. - bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS); - + /// returns false. The output "SrcInt" will not have been modified, so we can + /// use this information below to update aliases. + bool JoinIntervals(CoalescerPair &CP); + /// Return true if the two specified registers belong to different register /// classes. The registers may be either phys or virt regs. bool differingRegisterClasses(unsigned RegA, unsigned RegB) const; @@ -128,8 +119,7 @@ namespace llvm { /// the source value number is defined by a copy from the destination reg /// see if we can merge these two destination reg valno# into a single /// value number, eliminating a copy. - bool AdjustCopiesBackFrom(LiveInterval &IntA, LiveInterval &IntB, - MachineInstr *CopyMI); + bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); /// HasOtherReachingDefs - Return true if there are definitions of IntB /// other than BValNo val# that can reach uses of AValno val# of IntA. @@ -140,8 +130,7 @@ namespace llvm { /// If the source value number is defined by a commutable instruction and /// its other operand is coalesced to the copy dest register, see if we /// can transform the copy into a noop by commuting the definition. - bool RemoveCopyByCommutingDef(LiveInterval &IntA, LiveInterval &IntB, - MachineInstr *CopyMI); + bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); /// TrimLiveIntervalToLastUse - If there is a last use in the same basic /// block as the copy instruction, trim the ive interval to the last use @@ -155,28 +144,6 @@ namespace llvm { bool ReMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, unsigned DstSubIdx, MachineInstr *CopyMI); - /// CanCoalesceWithImpDef - Returns true if the specified copy instruction - /// from an implicit def to another register can be coalesced away. - bool CanCoalesceWithImpDef(MachineInstr *CopyMI, - LiveInterval &li, LiveInterval &ImpLi) const; - - /// TurnCopiesFromValNoToImpDefs - The specified value# is defined by an - /// implicit_def and it is being removed. Turn all copies from this value# - /// into implicit_defs. - void TurnCopiesFromValNoToImpDefs(LiveInterval &li, VNInfo *VNI); - - /// isWinToJoinVRWithSrcPhysReg - Return true if it's worth while to join a - /// a virtual destination register with physical source register. - bool isWinToJoinVRWithSrcPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, LiveInterval &SrcInt); - - /// isWinToJoinVRWithDstPhysReg - Return true if it's worth while to join a - /// copy from a virtual source register to a physical destination register. - bool isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI, - MachineBasicBlock *CopyMBB, - LiveInterval &DstInt, LiveInterval &SrcInt); - /// isWinToJoinCrossClass - Return true if it's profitable to coalesce /// two virtual registers from different register classes. bool isWinToJoinCrossClass(unsigned SrcReg, @@ -185,43 +152,12 @@ namespace llvm { const TargetRegisterClass *DstRC, const TargetRegisterClass *NewRC); - /// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual - /// register with a physical register, check if any of the virtual register - /// operand is a sub-register use or def. If so, make sure it won't result - /// in an illegal extract_subreg or insert_subreg instruction. - bool HasIncompatibleSubRegDefUse(MachineInstr *CopyMI, - unsigned VirtReg, unsigned PhysReg); - - /// CanJoinExtractSubRegToPhysReg - Return true if it's possible to coalesce - /// an extract_subreg where dst is a physical register, e.g. - /// cl = EXTRACT_SUBREG reg1024, 1 - bool CanJoinExtractSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, - unsigned SubIdx, unsigned &RealDstReg); - - /// CanJoinInsertSubRegToPhysReg - Return true if it's possible to coalesce - /// an insert_subreg where src is a physical register, e.g. - /// reg1024 = INSERT_SUBREG reg1024, c1, 0 - bool CanJoinInsertSubRegToPhysReg(unsigned DstReg, unsigned SrcReg, - unsigned SubIdx, unsigned &RealDstReg); - - /// ValueLiveAt - Return true if the LiveRange pointed to by the given - /// iterator, or any subsequent range with the same value number, - /// is live at the given point. - bool ValueLiveAt(LiveInterval::iterator LRItr, LiveInterval::iterator LREnd, - SlotIndex defPoint) const; - - /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of - /// the specified live interval is defined by a copy from the specified - /// register. - bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR, - unsigned Reg); - /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a /// physical register and the existing subregister number of the def / use /// being updated is not zero, make sure to set it to the correct physical /// subregister. - void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + void UpdateRegDefsUses(const CoalescerPair &CP); /// ShortenDeadCopyLiveRange - Shorten a live range defined by a dead copy. /// Return true if live interval is removed. @@ -238,6 +174,10 @@ namespace llvm { /// it as well. bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); + /// RemoveCopyFlag - If DstReg is no longer defined by CopyMI, clear the + /// VNInfo copy flag for DstReg and all aliases. + void RemoveCopyFlag(unsigned DstReg, const MachineInstr *CopyMI); + /// lastRegisterUse - Returns the last use of the specific register between /// cycles Start and End or NULL if there are no uses. MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End, diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 059e8d6c19aa..e90869d600dd 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -46,6 +46,8 @@ namespace { Constant *UnregisterFn; Constant *BuiltinSetjmpFn; Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; Constant *LSDAAddrFn; Value *PersonalityFn; Constant *SelectorFn; @@ -69,7 +71,7 @@ namespace { void insertCallSiteStore(Instruction *I, int Number, Value *CallSite); void markInvokeCallSite(InvokeInst *II, int InvokeNo, Value *CallSite, SwitchInst *CatchSwitch); - void splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes); + void splitLiveRangesAcrossInvokes(SmallVector &Invokes); bool insertSjLjEHSupport(Function &F); }; } // end anonymous namespace @@ -107,6 +109,8 @@ bool SjLjEHPass::doInitialization(Module &M) { PointerType::getUnqual(FunctionContextTy), (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); + StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); + StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); BuiltinSetjmpFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_setjmp); LSDAAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_sjlj_lsda); SelectorFn = Intrinsic::getDeclaration(&M, Intrinsic::eh_selector); @@ -175,8 +179,10 @@ static void MarkBlocksLiveIn(BasicBlock *BB, std::set &LiveBBs) { /// we spill into a stack location, guaranteeing that there is nothing live /// across the unwind edge. This process also splits all critical edges /// coming out of invoke's. +/// FIXME: Move this function to a common utility file (Local.cpp?) so +/// both SjLj and LowerInvoke can use it. void SjLjEHPass:: -splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes) { +splitLiveRangesAcrossInvokes(SmallVector &Invokes) { // First step, split all critical edges from invoke instructions. for (unsigned i = 0, e = Invokes.size(); i != e; ++i) { InvokeInst *II = Invokes[i]; @@ -198,16 +204,33 @@ splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes) { ++AfterAllocaInsertPt; for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end(); AI != E; ++AI) { - // This is always a no-op cast because we're casting AI to AI->getType() so - // src and destination types are identical. BitCast is the only possibility. - CastInst *NC = new BitCastInst( - AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); - AI->replaceAllUsesWith(NC); - // Normally its is forbidden to replace a CastInst's operand because it - // could cause the opcode to reflect an illegal conversion. However, we're - // replacing it here with the same value it was constructed with to simply - // make NC its user. - NC->setOperand(0, AI); + const Type *Ty = AI->getType(); + // Aggregate types can't be cast, but are legal argument types, so we have + // to handle them differently. We use an extract/insert pair as a + // lightweight method to achieve the same goal. + if (isa(Ty) || isa(Ty) || isa(Ty)) { + Instruction *EI = ExtractValueInst::Create(AI, 0, "",AfterAllocaInsertPt); + Instruction *NI = InsertValueInst::Create(AI, EI, 0); + NI->insertAfter(EI); + AI->replaceAllUsesWith(NI); + // Set the operand of the instructions back to the AllocaInst. + EI->setOperand(0, AI); + NI->setOperand(0, AI); + } else { + // This is always a no-op cast because we're casting AI to AI->getType() + // so src and destination types are identical. BitCast is the only + // possibility. + CastInst *NC = new BitCastInst( + AI, AI->getType(), AI->getName()+".tmp", AfterAllocaInsertPt); + AI->replaceAllUsesWith(NC); + // Set the operand of the cast instruction back to the AllocaInst. + // Normally it's forbidden to replace a CastInst's operand because it + // could cause the opcode to reflect an illegal conversion. However, + // we're replacing it here with the same value it was constructed with. + // We do this because the above replaceAllUsesWith() clobbered the + // operand, but we want this one to remain. + NC->setOperand(0, AI); + } } // Finally, scan the code looking for instructions with bad live ranges. @@ -266,6 +289,9 @@ splitLiveRangesLiveAcrossInvokes(SmallVector &Invokes) { } // If we decided we need a spill, do it. + // FIXME: Spilling this way is overkill, as it forces all uses of + // the value to be reloaded from the stack slot, even those that aren't + // in the unwind blocks. We should be more selective. if (NeedsSpill) { ++NumSpilled; DemoteRegToStack(*Inst, true); @@ -294,22 +320,34 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // If we don't have any invokes or unwinds, there's nothing to do. if (Unwinds.empty() && Invokes.empty()) return false; - // Find the eh.selector.* and eh.exception calls. We'll use the first - // eh.selector to determine the right personality function to use. For - // SJLJ, we always use the same personality for the whole function, - // not on a per-selector basis. + // Find the eh.selector.*, eh.exception and alloca calls. + // + // Remember any allocas() that aren't in the entry block, as the + // jmpbuf saved SP will need to be updated for them. + // + // We'll use the first eh.selector to determine the right personality + // function to use. For SJLJ, we always use the same personality for the + // whole function, not on a per-selector basis. // FIXME: That's a bit ugly. Better way? SmallVector EH_Selectors; SmallVector EH_Exceptions; - for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + SmallVector JmpbufUpdatePoints; + // Note: Skip the entry block since there's nothing there that interests + // us. eh.selector and eh.exception shouldn't ever be there, and we + // want to disregard any allocas that are there. + for (Function::iterator BB = F.begin(), E = F.end(); ++BB != E;) { for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { if (CallInst *CI = dyn_cast(I)) { if (CI->getCalledFunction() == SelectorFn) { - if (!PersonalityFn) PersonalityFn = CI->getOperand(2); + if (!PersonalityFn) PersonalityFn = CI->getArgOperand(1); EH_Selectors.push_back(CI); } else if (CI->getCalledFunction() == ExceptionFn) { EH_Exceptions.push_back(CI); + } else if (CI->getCalledFunction() == StackRestoreFn) { + JmpbufUpdatePoints.push_back(CI); } + } else if (AllocaInst *AI = dyn_cast(I)) { + JmpbufUpdatePoints.push_back(AI); } } } @@ -329,7 +367,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // we spill into a stack location, guaranteeing that there is nothing live // across the unwind edge. This process also splits all critical edges // coming out of invoke's. - splitLiveRangesLiveAcrossInvokes(Invokes); + splitLiveRangesAcrossInvokes(Invokes); BasicBlock *EntryBB = F.begin(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer @@ -419,7 +457,7 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { // Populate the Function Context // 1. LSDA address // 2. Personality function address - // 3. jmpbuf (save FP and call eh.sjlj.setjmp) + // 3. jmpbuf (save SP, FP and call eh.sjlj.setjmp) // LSDA address Idxs[0] = Zero; @@ -440,31 +478,41 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { new StoreInst(PersonalityFn, PersonalityFieldPtr, true, EntryBB->getTerminator()); - // Save the frame pointer. + // Save the frame pointer. Idxs[1] = ConstantInt::get(Int32Ty, 5); - Value *FieldPtr + Value *JBufPtr = GetElementPtrInst::Create(FunctionContext, Idxs, Idxs+2, "jbuf_gep", EntryBB->getTerminator()); Idxs[1] = ConstantInt::get(Int32Ty, 0); - Value *ElemPtr = - GetElementPtrInst::Create(FieldPtr, Idxs, Idxs+2, "jbuf_fp_gep", + Value *FramePtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_fp_gep", EntryBB->getTerminator()); Value *Val = CallInst::Create(FrameAddrFn, ConstantInt::get(Int32Ty, 0), "fp", EntryBB->getTerminator()); - new StoreInst(Val, ElemPtr, true, EntryBB->getTerminator()); - // Call the setjmp instrinsic. It fills in the rest of the jmpbuf + new StoreInst(Val, FramePtr, true, EntryBB->getTerminator()); + + // Save the stack pointer. + Idxs[1] = ConstantInt::get(Int32Ty, 2); + Value *StackPtr = + GetElementPtrInst::Create(JBufPtr, Idxs, Idxs+2, "jbuf_sp_gep", + EntryBB->getTerminator()); + + Val = CallInst::Create(StackAddrFn, "sp", EntryBB->getTerminator()); + new StoreInst(Val, StackPtr, true, EntryBB->getTerminator()); + + // Call the setjmp instrinsic. It fills in the rest of the jmpbuf. Value *SetjmpArg = - CastInst::Create(Instruction::BitCast, FieldPtr, + CastInst::Create(Instruction::BitCast, JBufPtr, Type::getInt8PtrTy(F.getContext()), "", EntryBB->getTerminator()); Value *DispatchVal = CallInst::Create(BuiltinSetjmpFn, SetjmpArg, "dispatch", EntryBB->getTerminator()); - // check the return value of the setjmp. non-zero goes to dispatcher + // check the return value of the setjmp. non-zero goes to dispatcher. Value *IsNormal = new ICmpInst(EntryBB->getTerminator(), ICmpInst::ICMP_EQ, DispatchVal, Zero, "notunwind"); @@ -509,6 +557,16 @@ bool SjLjEHPass::insertSjLjEHSupport(Function &F) { Unwinds[i]->eraseFromParent(); } + // Following any allocas not in the entry block, update the saved SP + // in the jmpbuf to the new value. + for (unsigned i = 0, e = JmpbufUpdatePoints.size(); i != e; ++i) { + Instruction *AI = JmpbufUpdatePoints[i]; + Instruction *StackAddr = CallInst::Create(StackAddrFn, "sp"); + StackAddr->insertAfter(AI); + Instruction *StoreStackAddr = new StoreInst(StackAddr, StackPtr, true); + StoreStackAddr->insertAfter(StackAddr); + } + // Finally, for any returns from this function, if this function contains an // invoke, add a call to unregister the function context. for (unsigned i = 0, e = Returns.size(); i != e; ++i) diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 6110ef5d2f05..7a227cf02d57 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -213,9 +213,11 @@ void SlotIndexes::dump() const { // Print a SlotIndex to a raw_ostream. void SlotIndex::print(raw_ostream &os) const { - os << getIndex(); + os << entry().getIndex(); if (isPHI()) os << "*"; + else + os << "LudS"[getSlot()]; } // Dump a SlotIndex to stderr. diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index a7b2efe11825..56bcb2824ae8 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -14,18 +14,20 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; namespace { - enum SpillerName { trivial, standard, splitting }; + enum SpillerName { trivial, standard, splitting, inline_ }; } static cl::opt @@ -35,6 +37,7 @@ spillerOpt("spiller", cl::values(clEnumVal(trivial, "trivial spiller"), clEnumVal(standard, "default spiller"), clEnumVal(splitting, "splitting spiller"), + clEnumValN(inline_, "inline", "inline spiller"), clEnumValEnd), cl::init(standard)); @@ -53,8 +56,8 @@ protected: const TargetInstrInfo *tii; const TargetRegisterInfo *tri; VirtRegMap *vrm; - - /// Construct a spiller base. + + /// Construct a spiller base. SpillerBase(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) : mf(mf), lis(lis), vrm(vrm) { @@ -67,7 +70,8 @@ protected: /// Add spill ranges for every use/def of the live interval, inserting loads /// immediately before each use, and stores after each def. No folding or /// remat is attempted. - std::vector trivialSpillEverywhere(LiveInterval *li) { + void trivialSpillEverywhere(LiveInterval *li, + std::vector &newIntervals) { DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); assert(li->weight != HUGE_VALF && @@ -78,8 +82,6 @@ protected: DEBUG(dbgs() << "Trivial spill everywhere of reg" << li->reg << "\n"); - std::vector added; - const TargetRegisterClass *trc = mri->getRegClass(li->reg); unsigned ss = vrm->assignVirt2StackSlot(li->reg); @@ -96,7 +98,7 @@ protected: do { ++regItr; } while (regItr != mri->reg_end() && (&*regItr == mi)); - + // Collect uses & defs for this instr. SmallVector indices; bool hasUse = false; @@ -116,7 +118,7 @@ protected: vrm->assignVirt2StackSlot(newVReg, ss); LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); newLI->weight = HUGE_VALF; - + // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { unsigned mopIdx = indices[i]; @@ -136,10 +138,10 @@ protected: MachineInstr *loadInstr(prior(miItr)); SlotIndex loadIndex = lis->InsertMachineInstrInMaps(loadInstr).getDefIndex(); + vrm->addSpillSlotUse(ss, loadInstr); SlotIndex endIndex = loadIndex.getNextIndex(); VNInfo *loadVNI = newLI->getNextValue(loadIndex, 0, true, lis->getVNInfoAllocator()); - loadVNI->addKill(endIndex); newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); } @@ -150,17 +152,15 @@ protected: MachineInstr *storeInstr(llvm::next(miItr)); SlotIndex storeIndex = lis->InsertMachineInstrInMaps(storeInstr).getDefIndex(); + vrm->addSpillSlotUse(ss, storeInstr); SlotIndex beginIndex = storeIndex.getPrevIndex(); VNInfo *storeVNI = newLI->getNextValue(beginIndex, 0, true, lis->getVNInfoAllocator()); - storeVNI->addKill(storeIndex); newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); } - added.push_back(newLI); + newIntervals.push_back(newLI); } - - return added; } }; @@ -176,11 +176,12 @@ public: TrivialSpiller(MachineFunction *mf, LiveIntervals *lis, VirtRegMap *vrm) : SpillerBase(mf, lis, vrm) {} - std::vector spill(LiveInterval *li, - SmallVectorImpl &spillIs, - SlotIndex*) { + void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &, + SlotIndex*) { // Ignore spillIs - we don't use it. - return trivialSpillEverywhere(li); + trivialSpillEverywhere(li, newIntervals); } }; @@ -200,10 +201,13 @@ public: : lis(lis), loopInfo(loopInfo), vrm(vrm) {} /// Falls back on LiveIntervals::addIntervalsForSpills. - std::vector spill(LiveInterval *li, - SmallVectorImpl &spillIs, - SlotIndex*) { - return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); + void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex*) { + std::vector added = + lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm); + newIntervals.insert(newIntervals.end(), added.begin(), added.end()); } }; @@ -214,7 +218,7 @@ namespace { /// When a call to spill is placed this spiller will first try to break the /// interval up into its component values (one new interval per value). /// If this fails, or if a call is placed to spill a previously split interval -/// then the spiller falls back on the standard spilling mechanism. +/// then the spiller falls back on the standard spilling mechanism. class SplittingSpiller : public StandardSpiller { public: SplittingSpiller(MachineFunction *mf, LiveIntervals *lis, @@ -226,22 +230,21 @@ public: tri = mf->getTarget().getRegisterInfo(); } - std::vector spill(LiveInterval *li, - SmallVectorImpl &spillIs, - SlotIndex *earliestStart) { - - if (worthTryingToSplit(li)) { - return tryVNISplit(li, earliestStart); - } - // else - return StandardSpiller::spill(li, spillIs, earliestStart); + void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex *earliestStart) { + if (worthTryingToSplit(li)) + tryVNISplit(li, earliestStart); + else + StandardSpiller::spill(li, newIntervals, spillIs, earliestStart); } private: MachineRegisterInfo *mri; const TargetInstrInfo *tii; - const TargetRegisterInfo *tri; + const TargetRegisterInfo *tri; DenseSet alreadySplit; bool worthTryingToSplit(LiveInterval *li) const { @@ -258,18 +261,18 @@ private: SmallVector vnis; std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis)); - + for (SmallVectorImpl::iterator vniItr = vnis.begin(), vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) { VNInfo *vni = *vniItr; - - // Skip unused VNIs, or VNIs with no kills. - if (vni->isUnused() || vni->kills.empty()) + + // Skip unused VNIs. + if (vni->isUnused()) continue; DEBUG(dbgs() << " Extracted Val #" << vni->id << " as "); LiveInterval *splitInterval = extractVNI(li, vni); - + if (splitInterval != 0) { DEBUG(dbgs() << *splitInterval << "\n"); added.push_back(splitInterval); @@ -281,12 +284,12 @@ private: } else { DEBUG(dbgs() << "0\n"); } - } + } DEBUG(dbgs() << "Original LI: " << *li << "\n"); // If there original interval still contains some live ranges - // add it to added and alreadySplit. + // add it to added and alreadySplit. if (!li->empty()) { added.push_back(li); alreadySplit.insert(li); @@ -302,16 +305,15 @@ private: /// Extract the given value number from the interval. LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const { assert(vni->isDefAccurate() || vni->isPHIDef()); - assert(!vni->kills.empty()); - // Create a new vreg and live interval, copy VNI kills & ranges over. + // Create a new vreg and live interval, copy VNI ranges over. const TargetRegisterClass *trc = mri->getRegClass(li->reg); unsigned newVReg = mri->createVirtualRegister(trc); vrm->grow(); LiveInterval *newLI = &lis->getOrCreateInterval(newVReg); VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator()); - // Start by copying all live ranges in the VN to the new interval. + // Start by copying all live ranges in the VN to the new interval. for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end(); rItr != rEnd; ++rItr) { if (rItr->valno == vni) { @@ -319,7 +321,7 @@ private: } } - // Erase the old VNI & ranges. + // Erase the old VNI & ranges. li->removeValNo(vni); // Collect all current uses of the register belonging to the given VNI. @@ -336,15 +338,13 @@ private: // Insert a copy at the start of the MBB. The range proceeding the // copy will be attached to the original LiveInterval. MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def); - tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = defMBB->begin(); - copyMI->addRegisterKilled(li->reg, tri); + MachineInstr *copyMI = BuildMI(*defMBB, defMBB->begin(), DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB), 0, false, lis->getVNInfoAllocator()); phiDefVNI->setIsPHIDef(true); - phiDefVNI->addKill(copyIdx.getDefIndex()); li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI)); LiveRange *oldPHIDefRange = newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB)); @@ -367,8 +367,8 @@ private: newVNI->setIsPHIDef(false); // not a PHI def anymore. newVNI->setIsDefAccurate(true); } else { - // non-PHI def. Rename the def. If it's two-addr that means renaming the use - // and inserting a new copy too. + // non-PHI def. Rename the def. If it's two-addr that means renaming the + // use and inserting a new copy too. MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def); // We'll rename this now, so we can remove it from uses. uses.erase(defInst); @@ -384,38 +384,26 @@ private: twoAddrUseIsUndef = true; } } - + SlotIndex defIdx = lis->getInstructionIndex(defInst); newVNI->def = defIdx.getDefIndex(); if (isTwoAddr && !twoAddrUseIsUndef) { MachineBasicBlock *defMBB = defInst->getParent(); - tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst)); + MachineInstr *copyMI = BuildMI(*defMBB, defInst, DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - copyMI->addRegisterKilled(li->reg, tri); LiveRange *origUseRange = li->getLiveRangeContaining(newVNI->def.getUseIndex()); - VNInfo *origUseVNI = origUseRange->valno; origUseRange->end = copyIdx.getDefIndex(); - bool updatedKills = false; - for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) { - if (origUseVNI->kills[k] == defIdx.getDefIndex()) { - origUseVNI->kills[k] = copyIdx.getDefIndex(); - updatedKills = true; - break; - } - } - assert(updatedKills && "Failed to update VNI kill list."); VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI, true, lis->getVNInfoAllocator()); - copyVNI->addKill(defIdx.getDefIndex()); LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI); newLI->addRange(copyRange); - } + } } - + for (std::set::iterator usesItr = uses.begin(), usesEnd = uses.end(); usesItr != usesEnd; ++usesItr) { @@ -435,7 +423,7 @@ private: // Check if this instr is two address. unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg); bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx); - + // Rename uses (and defs for two-address instrs). for (unsigned i = 0; i < useInst->getNumOperands(); ++i) { MachineOperand &mo = useInst->getOperand(i); @@ -451,10 +439,9 @@ private: // reg. MachineBasicBlock *useMBB = useInst->getParent(); MachineBasicBlock::iterator useItr(useInst); - tii->copyRegToReg(*useMBB, llvm::next(useItr), li->reg, newVReg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = llvm::next(useItr); - copyMI->addRegisterKilled(newVReg, tri); + MachineInstr *copyMI = BuildMI(*useMBB, llvm::next(useItr), DebugLoc(), + tii->get(TargetOpcode::COPY), newVReg) + .addReg(li->reg, RegState::Kill); SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); // Change the old two-address defined range & vni to start at @@ -470,56 +457,44 @@ private: VNInfo *copyVNI = newLI->getNextValue(useIdx.getDefIndex(), 0, true, lis->getVNInfoAllocator()); - copyVNI->addKill(copyIdx.getDefIndex()); LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI); newLI->addRange(copyRange); } } - - // Iterate over any PHI kills - we'll need to insert new copies for them. - for (VNInfo::KillSet::iterator - killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end(); - killItr != killEnd; ++killItr) { - SlotIndex killIdx(*killItr); - if (killItr->isPHI()) { - MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); - LiveRange *oldKillRange = - newLI->getLiveRangeContaining(killIdx); - - assert(oldKillRange != 0 && "No kill range?"); - - tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(), - li->reg, newVReg, trc, trc, - DebugLoc()); - MachineInstr *copyMI = prior(killMBB->getFirstTerminator()); - copyMI->addRegisterKilled(newVReg, tri); - SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - // Save the current end. We may need it to add a new range if the - // current range runs of the end of the MBB. - SlotIndex newKillRangeEnd = oldKillRange->end; - oldKillRange->end = copyIdx.getDefIndex(); + // Iterate over any PHI kills - we'll need to insert new copies for them. + for (LiveInterval::iterator LRI = newLI->begin(), LRE = newLI->end(); + LRI != LRE; ++LRI) { + if (LRI->valno != newVNI || LRI->end.isPHI()) + continue; + SlotIndex killIdx = LRI->end; + MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx); + MachineInstr *copyMI = BuildMI(*killMBB, killMBB->getFirstTerminator(), + DebugLoc(), tii->get(TargetOpcode::COPY), + li->reg) + .addReg(newVReg, RegState::Kill); + SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI); - if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) { - assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) && - "PHI kill range doesn't reach kill-block end. Not sane."); - newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB), - newKillRangeEnd, newVNI)); - } + // Save the current end. We may need it to add a new range if the + // current range runs of the end of the MBB. + SlotIndex newKillRangeEnd = LRI->end; + LRI->end = copyIdx.getDefIndex(); - *killItr = oldKillRange->end; - VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), - copyMI, true, - lis->getVNInfoAllocator()); - newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB)); - newKillVNI->setHasPHIKill(true); - li->addRange(LiveRange(copyIdx.getDefIndex(), - lis->getMBBEndIdx(killMBB), - newKillVNI)); + if (newKillRangeEnd != lis->getMBBEndIdx(killMBB)) { + assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB) && + "PHI kill range doesn't reach kill-block end. Not sane."); + newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB), + newKillRangeEnd, newVNI)); } + VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(), + copyMI, true, + lis->getVNInfoAllocator()); + newKillVNI->setHasPHIKill(true); + li->addRange(LiveRange(copyIdx.getDefIndex(), + lis->getMBBEndIdx(killMBB), + newKillVNI)); } - newVNI->setHasPHIKill(false); return newLI; @@ -530,6 +505,13 @@ private: } // end anonymous namespace +namespace llvm { +Spiller *createInlineSpiller(MachineFunction*, + LiveIntervals*, + const MachineLoopInfo*, + VirtRegMap*); +} + llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, const MachineLoopInfo *loopInfo, VirtRegMap *vrm) { @@ -538,5 +520,6 @@ llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis, case trivial: return new TrivialSpiller(mf, lis, vrm); case standard: return new StandardSpiller(lis, loopInfo, vrm); case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); + case inline_: return createInlineSpiller(mf, lis, loopInfo, vrm); } } diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h index dda52e871fea..450447b3933a 100644 --- a/lib/CodeGen/Spiller.h +++ b/lib/CodeGen/Spiller.h @@ -33,11 +33,19 @@ namespace llvm { public: virtual ~Spiller() = 0; - /// Spill the given live range. The method used will depend on the Spiller - /// implementation selected. - virtual std::vector spill(LiveInterval *li, - SmallVectorImpl &spillIs, - SlotIndex *earliestIndex = 0) = 0; + /// spill - Spill the given live interval. The method used will depend on + /// the Spiller implementation selected. + /// + /// @param li The live interval to be spilled. + /// @param spillIs A list of intervals that are about to be spilled, + /// and so cannot be used for remat etc. + /// @param newIntervals The newly created intervals will be appended here. + /// @param earliestIndex The earliest point for splitting. (OK, it's another + /// pointer to the allocator guts). + virtual void spill(LiveInterval *li, + std::vector &newIntervals, + SmallVectorImpl &spillIs, + SlotIndex *earliestIndex = 0) = 0; }; diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 8a6a727a1f97..ca5c28ce010c 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -136,7 +136,7 @@ bool StackProtector::RequiresStackProtector() const { bool StackProtector::InsertStackProtectors() { BasicBlock *FailBB = 0; // The basic block to jump to if check fails. AllocaInst *AI = 0; // Place on stack that stores the stack guard. - Constant *StackGuardVar = 0; // The stack guard variable. + Value *StackGuardVar = 0; // The stack guard variable. for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { BasicBlock *BB = I++; @@ -153,9 +153,17 @@ bool StackProtector::InsertStackProtectors() { // StackGuard = load __stack_chk_guard // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) // - PointerType *PtrTy = PointerType::getUnqual( - Type::getInt8Ty(RI->getContext())); - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + const PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, + PointerType::get(PtrTy, AddressSpace)); + } else { + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } BasicBlock &Entry = F->getEntryBlock(); Instruction *InsPt = &Entry.front(); diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index 7f3b452f0a5a..eff3c33e3daa 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -19,6 +19,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -508,8 +509,7 @@ bool StackSlotColoring::PropagateBackward(MachineBasicBlock::iterator MII, // Abort the use is actually a sub-register def. We don't have enough // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isExtractSubreg() || - MII->isInsertSubreg() || MII->isSubregToReg()) + if (MO.getSubReg() || MII->isSubregToReg()) return false; const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); @@ -571,7 +571,7 @@ bool StackSlotColoring::PropagateForward(MachineBasicBlock::iterator MII, // Abort the use is actually a sub-register use. We don't have enough // information to figure out if it is really legal. - if (MO.getSubReg() || MII->isExtractSubreg()) + if (MO.getSubReg()) return false; const TargetRegisterClass *RC = TID.OpInfo[i].getRegClass(TRI); @@ -610,8 +610,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, DEBUG(MI->dump()); ++NumLoadElim; } else { - TII->copyRegToReg(*MBB, MI, DstReg, Reg, RC, RC, - MI->getDebugLoc()); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), + DstReg).addReg(Reg); ++NumRegRepl; } @@ -627,8 +627,8 @@ StackSlotColoring::UnfoldAndRewriteInstruction(MachineInstr *MI, int OldFI, DEBUG(MI->dump()); ++NumStoreElim; } else { - TII->copyRegToReg(*MBB, MI, Reg, SrcReg, RC, RC, - MI->getDebugLoc()); + BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(TargetOpcode::COPY), Reg) + .addReg(SrcReg); ++NumRegRepl; } diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 142398cc1642..59315cf67282 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterCoalescer.h" @@ -695,9 +696,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, // Insert copy from curr.second to a temporary at // the Phi defining curr.second MachineBasicBlock::iterator PI = MRI.getVRegDef(curr.second); - TII->copyRegToReg(*PI->getParent(), PI, t, - curr.second, RC, RC, DebugLoc()); - + BuildMI(*PI->getParent(), PI, DebugLoc(), TII->get(TargetOpcode::COPY), + t).addReg(curr.second); DEBUG(dbgs() << "Inserted copy from " << curr.second << " to " << t << "\n"); @@ -712,8 +712,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, } // Insert copy from map[curr.first] to curr.second - TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), curr.second, - map[curr.first], RC, RC, DebugLoc()); + BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), curr.second).addReg(map[curr.first]); map[curr.first] = curr.second; DEBUG(dbgs() << "Inserted copy from " << curr.first << " to " << curr.second << "\n"); @@ -761,8 +761,8 @@ void StrongPHIElimination::ScheduleCopies(MachineBasicBlock* MBB, // Insert a copy from dest to a new temporary t at the end of b unsigned t = MF->getRegInfo().createVirtualRegister(RC); - TII->copyRegToReg(*MBB, MBB->getFirstTerminator(), t, - curr.second, RC, RC, DebugLoc()); + BuildMI(*MBB, MBB->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), t).addReg(curr.second); map[curr.second] = t; MachineBasicBlock::iterator TI = MBB->getFirstTerminator(); @@ -830,9 +830,6 @@ void StrongPHIElimination::InsertCopies(MachineDomTreeNode* MDTN, LiveInterval& Int = LI.getInterval(I->getOperand(i).getReg()); VNInfo* FirstVN = *Int.vni_begin(); FirstVN->setHasPHIKill(false); - if (I->getOperand(i).isKill()) - FirstVN->addKill(LI.getInstructionIndex(I).getUseIndex()); - LiveRange LR (LI.getMBBStartIdx(I->getParent()), LI.getInstructionIndex(I).getUseIndex().getNextSlot(), FirstVN); @@ -959,9 +956,8 @@ bool StrongPHIElimination::runOnMachineFunction(MachineFunction &Fn) { } else { // Insert a last-minute copy if a conflict was detected. const TargetInstrInfo *TII = Fn.getTarget().getInstrInfo(); - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(I->first); - TII->copyRegToReg(*SI->second, SI->second->getFirstTerminator(), - I->first, SI->first, RC, RC, DebugLoc()); + BuildMI(*SI->second, SI->second->getFirstTerminator(), DebugLoc(), + TII->get(TargetOpcode::COPY), I->first).addReg(SI->first); LI.renumber(); diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index f2e2a76f00eb..075db803bd23 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/Target/TargetInstrInfo.h" @@ -559,11 +560,9 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, } MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); - TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first, - CopyInfos[i].second, RC,RC, DebugLoc()); - MachineInstr *CopyMI = prior(Loc); - Copies.push_back(CopyMI); + Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first).addReg(CopyInfos[i].second)); } NumInstrDups += TailBB->size() - 1; // subtract one for removed branch @@ -618,11 +617,10 @@ TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF, } MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { - const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first); - TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first, - CopyInfos[i].second, RC, RC, DebugLoc()); - MachineInstr *CopyMI = prior(Loc); - Copies.push_back(CopyMI); + Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), + TII->get(TargetOpcode::COPY), + CopyInfos[i].first) + .addReg(CopyInfos[i].second)); } } else { // No PHIs to worry about, just splice the instructions over. diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 0ad6619ac4fd..cdacb98e0e88 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -13,6 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/ADT/SmallVector.h" @@ -21,11 +22,34 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PostRAHazardRecognizer.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void +TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + + // Remove all the old successors of MBB from the CFG. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_begin()); + + // Remove all the dead instructions from the end of MBB. + MBB->erase(Tail, MBB->end()); + + // If MBB isn't immediately before MBB, insert a branch to it. + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) + InsertBranch(*MBB, NewDest, 0, SmallVector(), + Tail->getDebugLoc()); + MBB->addSuccessor(NewDest); +} + // commuteInstruction - The default implementation of this method just exchanges // the two operands returned by findCommutedOpIndices. MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, @@ -136,17 +160,9 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig, - const TargetRegisterInfo *TRI) const { + const TargetRegisterInfo &TRI) const { MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MachineOperand &MO = MI->getOperand(0); - if (TargetRegisterInfo::isVirtualRegister(DestReg)) { - MO.setReg(DestReg); - MO.setSubReg(SubIdx); - } else if (SubIdx) { - MO.setReg(TRI->getSubReg(DestReg, SubIdx)); - } else { - MO.setReg(DestReg); - } + MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); MBB.insert(I, MI); } @@ -175,6 +191,47 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { return FnSize; } +// If the COPY instruction in MI can be folded to a stack operation, return +// the register class to use. +static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, + unsigned FoldIdx) { + assert(MI->isCopy() && "MI must be a COPY instruction"); + if (MI->getNumOperands() != 2) + return 0; + assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); + + const MachineOperand &FoldOp = MI->getOperand(FoldIdx); + const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); + + if (FoldOp.getSubReg() || LiveOp.getSubReg()) + return 0; + + unsigned FoldReg = FoldOp.getReg(); + unsigned LiveReg = LiveOp.getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && + "Cannot fold physregs"); + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); + + if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + return RC->contains(LiveOp.getReg()) ? RC : 0; + + const TargetRegisterClass *LiveRC = MRI.getRegClass(LiveReg); + if (RC == LiveRC || RC->hasSubClass(LiveRC)) + return RC; + + // FIXME: Allow folding when register classes are memory compatible. + return 0; +} + +bool TargetInstrInfoImpl:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const { + return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); +} + /// foldMemoryOperand - Attempt to fold a load or store of the specified stack /// slot into the specified machine instruction for the specified operand(s). /// If this is possible, a new instruction is returned with the specified @@ -182,10 +239,9 @@ TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const { /// removing the old instruction and adding the new one in the instruction /// stream. MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, - MachineInstr* MI, +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl &Ops, - int FrameIndex) const { + int FI) const { unsigned Flags = 0; for (unsigned i = 0, e = Ops.size(); i != e; ++i) if (MI->getOperand(Ops[i]).isDef()) @@ -193,34 +249,56 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, else Flags |= MachineMemOperand::MOLoad; + MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "foldMemoryOperand needs an inserted instruction"); + MachineFunction &MF = *MBB->getParent(); + // Ask the target to do the actual folding. - MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FrameIndex); - if (!NewMI) return 0; + if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + // Add a memory operand, foldMemoryOperandImpl doesn't do that. + assert((!(Flags & MachineMemOperand::MOStore) || + NewMI->getDesc().mayStore()) && + "Folded a def to a non-store!"); + assert((!(Flags & MachineMemOperand::MOLoad) || + NewMI->getDesc().mayLoad()) && + "Folded a use to a non-load!"); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI), + Flags, /*Offset=*/0, + MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + NewMI->addMemOperand(MF, MMO); - assert((!(Flags & MachineMemOperand::MOStore) || - NewMI->getDesc().mayStore()) && - "Folded a def to a non-store!"); - assert((!(Flags & MachineMemOperand::MOLoad) || - NewMI->getDesc().mayLoad()) && - "Folded a use to a non-load!"); - const MachineFrameInfo &MFI = *MF.getFrameInfo(); - assert(MFI.getObjectOffset(FrameIndex) != -1); - MachineMemOperand *MMO = - MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIndex), - Flags, /*Offset=*/0, - MFI.getObjectSize(FrameIndex), - MFI.getObjectAlignment(FrameIndex)); - NewMI->addMemOperand(MF, MMO); + // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI. + return MBB->insert(MI, NewMI); + } - return NewMI; + // Straight COPY may fold as load/store. + if (!MI->isCopy() || Ops.size() != 1) + return 0; + + const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); + if (!RC) + return 0; + + const MachineOperand &MO = MI->getOperand(1-Ops[0]); + MachineBasicBlock::iterator Pos = MI; + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + if (Flags == MachineMemOperand::MOStore) + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); + else + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); + return --Pos; } /// foldMemoryOperand - Same as the previous version except it allows folding /// of any load and store from / to any address, not just from a specific /// stack slot. MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, - MachineInstr* MI, +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, const SmallVectorImpl &Ops, MachineInstr* LoadMI) const { assert(LoadMI->getDesc().canFoldAsLoad() && "LoadMI isn't foldable!"); @@ -228,11 +306,15 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, for (unsigned i = 0, e = Ops.size(); i != e; ++i) assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); #endif + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); // Ask the target to do the actual folding. MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); if (!NewMI) return 0; + NewMI = MBB.insert(MI, NewMI); + // Copy the memoperands from the load to the folded instruction. NewMI->setMemRefs(LoadMI->memoperands_begin(), LoadMI->memoperands_end()); @@ -240,11 +322,9 @@ TargetInstrInfo::foldMemoryOperand(MachineFunction &MF, return NewMI; } -bool -TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * - MI, - AliasAnalysis * - AA) const { +bool TargetInstrInfo:: +isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, + AliasAnalysis *AA) const { const MachineFunction &MF = *MI->getParent()->getParent(); const MachineRegisterInfo &MRI = MF.getRegInfo(); const TargetMachine &TM = MF.getTarget(); @@ -324,3 +404,31 @@ TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr * // Everything checked out. return true; } + +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const{ + // Terminators and labels can't be scheduled around. + if (MI->getDesc().isTerminator() || MI->isLabel()) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); + if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) + return true; + + return false; +} + +// Default implementation of CreateTargetPostRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfoImpl:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { + return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II); +} diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 71ad3fb6f99f..a80cfc4b256f 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -825,32 +825,32 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, TargetLoweringObjectFile::Initialize(Ctx, TM); TextSection = getContext().getCOFFSection(".text", - MCSectionCOFF::IMAGE_SCN_CNT_CODE | - MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_CNT_CODE | + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getText()); DataSection = getContext().getCOFFSection(".data", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); ReadOnlySection = getContext().getCOFFSection(".rdata", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); StaticCtorSection = getContext().getCOFFSection(".ctors", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); StaticDtorSection = getContext().getCOFFSection(".dtors", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); // FIXME: We're emitting LSDA info into a readonly section on COFF, even @@ -859,76 +859,76 @@ void TargetLoweringObjectFileCOFF::Initialize(MCContext &Ctx, // adjusted or this should be a data section. LSDASection = getContext().getCOFFSection(".gcc_except_table", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getReadOnly()); EHFrameSection = getContext().getCOFFSection(".eh_frame", - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE, + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, SectionKind::getDataRel()); // Debug info. DwarfAbbrevSection = getContext().getCOFFSection(".debug_abbrev", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfInfoSection = getContext().getCOFFSection(".debug_info", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfLineSection = getContext().getCOFFSection(".debug_line", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfFrameSection = getContext().getCOFFSection(".debug_frame", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfPubNamesSection = getContext().getCOFFSection(".debug_pubnames", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfPubTypesSection = getContext().getCOFFSection(".debug_pubtypes", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfStrSection = getContext().getCOFFSection(".debug_str", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfLocSection = getContext().getCOFFSection(".debug_loc", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfARangesSection = getContext().getCOFFSection(".debug_aranges", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfRangesSection = getContext().getCOFFSection(".debug_ranges", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DwarfMacroInfoSection = getContext().getCOFFSection(".debug_macinfo", - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE | - MCSectionCOFF::IMAGE_SCN_MEM_READ, + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); DrectveSection = getContext().getCOFFSection(".drectve", - MCSectionCOFF::IMAGE_SCN_LNK_INFO, + COFF::IMAGE_SCN_LNK_INFO, SectionKind::getMetadata()); } @@ -936,27 +936,27 @@ static unsigned getCOFFSectionFlags(SectionKind K) { unsigned Flags = 0; - if (!K.isMetadata()) + if (K.isMetadata()) Flags |= - MCSectionCOFF::IMAGE_SCN_MEM_DISCARDABLE; + COFF::IMAGE_SCN_MEM_DISCARDABLE; else if (K.isText()) Flags |= - MCSectionCOFF::IMAGE_SCN_MEM_EXECUTE | - MCSectionCOFF::IMAGE_SCN_CNT_CODE; + COFF::IMAGE_SCN_MEM_EXECUTE | + COFF::IMAGE_SCN_CNT_CODE; else if (K.isBSS ()) Flags |= - MCSectionCOFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE; + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; else if (K.isReadOnly()) Flags |= - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ; + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ; else if (K.isWriteable()) Flags |= - MCSectionCOFF::IMAGE_SCN_CNT_INITIALIZED_DATA | - MCSectionCOFF::IMAGE_SCN_MEM_READ | - MCSectionCOFF::IMAGE_SCN_MEM_WRITE; + COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; return Flags; } @@ -995,10 +995,10 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, unsigned Characteristics = getCOFFSectionFlags(Kind); - Characteristics |= MCSectionCOFF::IMAGE_SCN_LNK_COMDAT; + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; return getContext().getCOFFSection(Name.str(), Characteristics, - MCSectionCOFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind); + COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH, Kind); } if (Kind.isText()) diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index 3d10dc13d20b..564914373bb5 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -381,7 +382,7 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, DstReg = 0; unsigned SrcSubIdx, DstSubIdx; if (!TII->isMoveInstr(MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) { - if (MI.isExtractSubreg()) { + if (MI.isCopy()) { DstReg = MI.getOperand(0).getReg(); SrcReg = MI.getOperand(1).getReg(); } else if (MI.isInsertSubreg()) { @@ -897,6 +898,108 @@ TryInstructionTransform(MachineBasicBlock::iterator &mi, } } } + + // If this is an instruction with a load folded into it, try unfolding + // the load, e.g. avoid this: + // movq %rdx, %rcx + // addq (%rax), %rcx + // in favor of this: + // movq (%rax), %rcx + // addq %rdx, %rcx + // because it's preferable to schedule a load than a register copy. + if (TID.mayLoad() && !regBKilled) { + // Determine if a load can be unfolded. + unsigned LoadRegIndex; + unsigned NewOpc = + TII->getOpcodeAfterMemoryUnfold(mi->getOpcode(), + /*UnfoldLoad=*/true, + /*UnfoldStore=*/false, + &LoadRegIndex); + if (NewOpc != 0) { + const TargetInstrDesc &UnfoldTID = TII->get(NewOpc); + if (UnfoldTID.getNumDefs() == 1) { + MachineFunction &MF = *mbbi->getParent(); + + // Unfold the load. + DEBUG(dbgs() << "2addr: UNFOLDING: " << *mi); + const TargetRegisterClass *RC = + UnfoldTID.OpInfo[LoadRegIndex].getRegClass(TRI); + unsigned Reg = MRI->createVirtualRegister(RC); + SmallVector NewMIs; + if (!TII->unfoldMemoryOperand(MF, mi, Reg, + /*UnfoldLoad=*/true,/*UnfoldStore=*/false, + NewMIs)) { + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + return false; + } + assert(NewMIs.size() == 2 && + "Unfolded a load into multiple instructions!"); + // The load was previously folded, so this is the only use. + NewMIs[1]->addRegisterKilled(Reg, TRI); + + // Tentatively insert the instructions into the block so that they + // look "normal" to the transformation logic. + mbbi->insert(mi, NewMIs[0]); + mbbi->insert(mi, NewMIs[1]); + + DEBUG(dbgs() << "2addr: NEW LOAD: " << *NewMIs[0] + << "2addr: NEW INST: " << *NewMIs[1]); + + // Transform the instruction, now that it no longer has a load. + unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); + unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); + MachineBasicBlock::iterator NewMI = NewMIs[1]; + bool TransformSuccess = + TryInstructionTransform(NewMI, mi, mbbi, + NewSrcIdx, NewDstIdx, Dist); + if (TransformSuccess || + NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + // Success, or at least we made an improvement. Keep the unfolded + // instructions and discard the original. + if (LV) { + for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) { + MachineOperand &MO = mi->getOperand(i); + if (MO.isReg() && MO.getReg() != 0 && + TargetRegisterInfo::isVirtualRegister(MO.getReg())) { + if (MO.isUse()) { + if (MO.isKill()) { + if (NewMIs[0]->killsRegister(MO.getReg())) + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[0]); + else { + assert(NewMIs[1]->killsRegister(MO.getReg()) && + "Kill missing after load unfold!"); + LV->replaceKillInstruction(MO.getReg(), mi, NewMIs[1]); + } + } + } else if (LV->removeVirtualRegisterDead(MO.getReg(), mi)) { + if (NewMIs[1]->registerDefIsDead(MO.getReg())) + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[1]); + else { + assert(NewMIs[0]->registerDefIsDead(MO.getReg()) && + "Dead flag missing after load unfold!"); + LV->addVirtualRegisterDead(MO.getReg(), NewMIs[0]); + } + } + } + } + LV->addVirtualRegisterKilled(Reg, NewMIs[1]); + } + mi->eraseFromParent(); + mi = NewMIs[1]; + if (TransformSuccess) + return true; + } else { + // Transforming didn't eliminate the tie and didn't lead to an + // improvement. Clean up the unfolded instructions and keep the + // original. + DEBUG(dbgs() << "2addr: ABANDONING UNFOLD\n"); + NewMIs[0]->eraseFromParent(); + NewMIs[1]->eraseFromParent(); + } + } + } + } + return false; } @@ -1047,14 +1150,12 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){ DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n"); unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg(); - TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI); + TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, *TRI); ReMatRegs.set(regB); ++NumReMats; } else { - bool Emitted = TII->copyRegToReg(*mbbi, mi, regA, regB, rc, rc, - mi->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Unable to issue a copy instruction!\n"); + BuildMI(*mbbi, mi, mi->getDebugLoc(), TII->get(TargetOpcode::COPY), + regA).addReg(regB); } MachineBasicBlock::iterator prevMI = prior(mi); @@ -1104,12 +1205,30 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { } } } - + + // Schedule the source copy / remat inserted to form two-address + // instruction. FIXME: Does it matter the distance map may not be + // accurate after it's scheduled? + TII->scheduleTwoAddrSource(prior(mi), mi, *TRI); + MadeChange = true; DEBUG(dbgs() << "\t\trewrite to:\t" << *mi); } + // Rewrite INSERT_SUBREG as COPY now that we no longer need SSA form. + if (mi->isInsertSubreg()) { + // From %reg = INSERT_SUBREG %reg, %subreg, subidx + // To %reg:subidx = COPY %subreg + unsigned SubIdx = mi->getOperand(3).getImm(); + mi->RemoveOperand(3); + assert(mi->getOperand(0).getSubReg() == 0 && "Unexpected subreg idx"); + mi->getOperand(0).setSubReg(SubIdx); + mi->RemoveOperand(1); + mi->setDesc(TII->get(TargetOpcode::COPY)); + DEBUG(dbgs() << "\t\tconvert to:\t" << *mi); + } + // Clear TiedOperands here instead of at the top of the loop // since most instructions do not have tied operands. TiedOperands.clear(); @@ -1136,14 +1255,13 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) { static void UpdateRegSequenceSrcs(unsigned SrcReg, unsigned DstReg, unsigned SubIdx, - MachineRegisterInfo *MRI) { + MachineRegisterInfo *MRI, + const TargetRegisterInfo &TRI) { for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), RE = MRI->reg_end(); RI != RE; ) { MachineOperand &MO = RI.getOperand(); ++RI; - MO.setReg(DstReg); - assert(MO.getSubReg() == 0); - MO.setSubReg(SubIdx); + MO.substVirtReg(DstReg, SubIdx, TRI); } } @@ -1165,55 +1283,102 @@ TwoAddressInstructionPass::CoalesceExtSubRegs(SmallVector &Srcs, if (!Seen.insert(SrcReg)) continue; - // If there are no other uses than extract_subreg which feed into + // Check that the instructions are all in the same basic block. + MachineInstr *SrcDefMI = MRI->getVRegDef(SrcReg); + MachineInstr *DstDefMI = MRI->getVRegDef(DstReg); + if (SrcDefMI->getParent() != DstDefMI->getParent()) + continue; + + // If there are no other uses than copies which feed into // the reg_sequence, then we might be able to coalesce them. bool CanCoalesce = true; - SmallVector SubIndices; + SmallVector SrcSubIndices, DstSubIndices; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg), UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; - if (!UseMI->isExtractSubreg() || - UseMI->getOperand(0).getReg() != DstReg) { + if (!UseMI->isCopy() || UseMI->getOperand(0).getReg() != DstReg) { CanCoalesce = false; break; } - SubIndices.push_back(UseMI->getOperand(2).getImm()); + SrcSubIndices.push_back(UseMI->getOperand(1).getSubReg()); + DstSubIndices.push_back(UseMI->getOperand(0).getSubReg()); } - if (!CanCoalesce || SubIndices.size() < 2) + if (!CanCoalesce || SrcSubIndices.size() < 2) continue; - std::sort(SubIndices.begin(), SubIndices.end()); - unsigned NewSubIdx = 0; - if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices, - NewSubIdx)) { - bool Proceed = true; - if (NewSubIdx) - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ) { - MachineOperand &MO = RI.getOperand(); - ++RI; - // FIXME: If the sub-registers do not combine to the whole - // super-register, i.e. NewSubIdx != 0, and any of the use has a - // sub-register index, then abort the coalescing attempt. - if (MO.getSubReg()) { - Proceed = false; - break; - } - MO.setReg(DstReg); - MO.setSubReg(NewSubIdx); - } - if (Proceed) - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ) { - MachineOperand &MO = RI.getOperand(); - ++RI; - MO.setReg(DstReg); - if (NewSubIdx) - MO.setSubReg(NewSubIdx); - } + // Check that the source subregisters can be combined. + std::sort(SrcSubIndices.begin(), SrcSubIndices.end()); + unsigned NewSrcSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(SrcReg), SrcSubIndices, + NewSrcSubIdx)) + continue; + + // Check that the destination subregisters can also be combined. + std::sort(DstSubIndices.begin(), DstSubIndices.end()); + unsigned NewDstSubIdx = 0; + if (!TRI->canCombineSubRegIndices(MRI->getRegClass(DstReg), DstSubIndices, + NewDstSubIdx)) + continue; + + // If neither source nor destination can be combined to the full register, + // just give up. This could be improved if it ever matters. + if (NewSrcSubIdx != 0 && NewDstSubIdx != 0) + continue; + + // Now that we know that all the uses are extract_subregs and that those + // subregs can somehow be combined, scan all the extract_subregs again to + // make sure the subregs are in the right order and can be composed. + MachineInstr *SomeMI = 0; + CanCoalesce = true; + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineInstr *UseMI = &*UI; + assert(UseMI->isCopy()); + unsigned DstSubIdx = UseMI->getOperand(0).getSubReg(); + unsigned SrcSubIdx = UseMI->getOperand(1).getSubReg(); + assert(DstSubIdx != 0 && "missing subreg from RegSequence elimination"); + if ((NewDstSubIdx == 0 && + TRI->composeSubRegIndices(NewSrcSubIdx, DstSubIdx) != SrcSubIdx) || + (NewSrcSubIdx == 0 && + TRI->composeSubRegIndices(NewDstSubIdx, SrcSubIdx) != DstSubIdx)) { + CanCoalesce = false; + break; + } + // Keep track of one of the uses. + SomeMI = UseMI; + } + if (!CanCoalesce) + continue; + + // Insert a copy to replace the original. + MachineBasicBlock::iterator InsertLoc = SomeMI; + MachineInstr *CopyMI = BuildMI(*SomeMI->getParent(), SomeMI, + SomeMI->getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, NewDstSubIdx) + .addReg(SrcReg, 0, NewSrcSubIdx); + + // Remove all the old extract instructions. + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SrcReg), + UE = MRI->use_nodbg_end(); UI != UE; ) { + MachineInstr *UseMI = &*UI; + ++UI; + if (UseMI == CopyMI) + continue; + assert(UseMI->isCopy()); + // Move any kills to the new copy or extract instruction. + if (UseMI->getOperand(1).isKill()) { + CopyMI->getOperand(1).setIsKill(); + if (LV) + // Update live variables + LV->replaceKillInstruction(SrcReg, UseMI, &*CopyMI); } + UseMI->eraseFromParent(); + } } } @@ -1268,15 +1433,13 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { } IsImpDef = false; - // Remember EXTRACT_SUBREG sources. These might be candidate for - // coalescing. - if (DefMI->isExtractSubreg()) + // Remember COPY sources. These might be candidate for coalescing. + if (DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) RealSrcs.push_back(DefMI->getOperand(1).getReg()); - if (!Seen.insert(SrcReg) || - MI->getParent() != DefMI->getParent() || - !MI->getOperand(i).isKill() || - HasOtherRegSequenceUses(SrcReg, MI, MRI)) { + bool isKill = MI->getOperand(i).isKill(); + if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent() || + !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI)) { // REG_SEQUENCE cannot have duplicated operands, add a copy. // Also add an copy if the source is live-in the block. We don't want // to end up with a partial-redef of a livein, e.g. @@ -1292,30 +1455,23 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { // If the REG_SEQUENCE doesn't kill its source, keeping live variables // correctly up to date becomes very difficult. Insert a copy. // - const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - unsigned NewReg = MRI->createVirtualRegister(RC); MachineBasicBlock::iterator InsertLoc = MI; - bool Emitted = - TII->copyRegToReg(*MI->getParent(), InsertLoc, NewReg, SrcReg, RC, RC, - MI->getDebugLoc()); - (void)Emitted; - assert(Emitted && "Unable to issue a copy instruction!\n"); - MI->getOperand(i).setReg(NewReg); - if (MI->getOperand(i).isKill()) { - MachineBasicBlock::iterator CopyMI = prior(InsertLoc); - MachineOperand *KillMO = CopyMI->findRegisterUseOperand(SrcReg); - KillMO->setIsKill(); - if (LV) - // Update live variables - LV->replaceKillInstruction(SrcReg, MI, &*CopyMI); - } + MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, + MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, MI->getOperand(i+1).getImm()) + .addReg(SrcReg, getKillRegState(isKill)); + MI->getOperand(i).setReg(0); + if (LV && isKill) + LV->replaceKillInstruction(SrcReg, MI, CopyMI); + DEBUG(dbgs() << "Inserted: " << *CopyMI); } } for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { unsigned SrcReg = MI->getOperand(i).getReg(); + if (!SrcReg) continue; unsigned SubIdx = MI->getOperand(i+1).getImm(); - UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI); + UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI); } if (IsImpDef) { @@ -1328,8 +1484,11 @@ bool TwoAddressInstructionPass::EliminateRegSequences() { MI->eraseFromParent(); } - // Try coalescing some EXTRACT_SUBREG instructions. - CoalesceExtSubRegs(RealSrcs, DstReg); + // Try coalescing some EXTRACT_SUBREG instructions. This can create + // INSERT_SUBREG instructions that must have flags added by + // LiveIntervalAnalysis, so only run it when LiveVariables is available. + if (LV) + CoalesceExtSubRegs(RealSrcs, DstReg); } RegSequences.clear(); diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp index 871d83628ac1..57a1500e6e9d 100644 --- a/lib/CodeGen/VirtRegRewriter.cpp +++ b/lib/CodeGen/VirtRegRewriter.cpp @@ -667,8 +667,7 @@ static void ReMaterialize(MachineBasicBlock &MBB, assert(TID.getNumDefs() == 1 && "Don't know how to remat instructions that define > 1 values!"); #endif - TII->reMaterialize(MBB, MII, DestReg, - ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI); + TII->reMaterialize(MBB, MII, DestReg, 0, ReMatDefMI, *TRI); MachineInstr *NewMI = prior(MII); for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { MachineOperand &MO = NewMI->getOperand(i); @@ -769,7 +768,7 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB, I = PhysRegsAvailable.begin(), E = PhysRegsAvailable.end(); I != E; ++I) { unsigned Reg = I->first; - const TargetRegisterClass* RC = TRI->getPhysicalRegisterRegClass(Reg); + const TargetRegisterClass* RC = TRI->getMinimalPhysRegClass(Reg); // FIXME: A temporary workaround. We can't reuse available value if it's // not safe to move the def of the virtual register's class. e.g. // X86::RFP* register classes. Do not add it as a live-in. @@ -1022,7 +1021,7 @@ static unsigned FindFreeRegister(MachineBasicBlock::iterator MII, for (unsigned i = 0, e = Kills.size(); i != e; ++i) { unsigned Kill = Kills[i]; if (!Defs[Kill] && !Uses[Kill] && - TRI->getPhysicalRegisterRegClass(Kill) == RC) + RC->contains(Kill)) return Kill; } for (unsigned i = 0, e = LocalUses.size(); i != e; ++i) { @@ -1410,25 +1409,25 @@ OptimizeByUnfold(MachineBasicBlock::iterator &MII, if (TII->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { assert(NewMIs.size() == 1); MachineInstr *NewMI = NewMIs.back(); + MBB->insert(MII, NewMI); NewMIs.clear(); int Idx = NewMI->findRegisterUseOperandIdx(VirtReg, false); assert(Idx != -1); SmallVector Ops; Ops.push_back(Idx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, NewMI, Ops, SS); + MachineInstr *FoldedMI = TII->foldMemoryOperand(NewMI, Ops, SS); + NewMI->eraseFromParent(); if (FoldedMI) { VRM->addSpillSlotUse(SS, FoldedMI); if (!VRM->hasPhys(UnfoldVR)) VRM->assignVirt2Phys(UnfoldVR, UnfoldPR); VRM->virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); - MII = MBB->insert(MII, FoldedMI); + MII = FoldedMI; InvalidateKills(MI, TRI, RegKills, KillOps); VRM->RemoveMachineInstrFromMaps(&MI); MBB->erase(&MI); - MF.DeleteMachineInstr(NewMI); return true; } - MF.DeleteMachineInstr(NewMI); } } @@ -1480,7 +1479,6 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, if (MII == MBB->begin() || !MII->killsRegister(SrcReg)) return false; - MachineFunction &MF = *MBB->getParent(); MachineInstr &MI = *MII; MachineBasicBlock::iterator DefMII = prior(MII); MachineInstr *DefMI = DefMII; @@ -1511,11 +1509,12 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, MachineInstr *CommutedMI = TII->commuteInstruction(DefMI, true); if (!CommutedMI) return false; + MBB->insert(MII, CommutedMI); SmallVector Ops; Ops.push_back(NewDstIdx); - MachineInstr *FoldedMI = TII->foldMemoryOperand(MF, CommutedMI, Ops, SS); + MachineInstr *FoldedMI = TII->foldMemoryOperand(CommutedMI, Ops, SS); // Not needed since foldMemoryOperand returns new MI. - MF.DeleteMachineInstr(CommutedMI); + CommutedMI->eraseFromParent(); if (!FoldedMI) return false; @@ -1528,7 +1527,7 @@ CommuteToFoldReload(MachineBasicBlock::iterator &MII, MachineInstr *StoreMI = MII; VRM->addSpillSlotUse(SS, StoreMI); VRM->virtFolded(VirtReg, StoreMI, VirtRegMap::isMod); - MII = MBB->insert(MII, FoldedMI); // Update MII to backtrack. + MII = FoldedMI; // Update MII to backtrack. // Delete all 3 old instructions. InvalidateKills(*ReloadMI, TRI, RegKills, KillOps); @@ -1704,7 +1703,7 @@ bool LocalRewriter::InsertEmergencySpills(MachineInstr *MI) { std::vector &EmSpills = VRM->getEmergencySpills(MI); for (unsigned i = 0, e = EmSpills.size(); i != e; ++i) { unsigned PhysReg = EmSpills[i]; - const TargetRegisterClass *RC = TRI->getPhysicalRegisterRegClass(PhysReg); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(PhysReg); assert(RC && "Unable to determine register class!"); int SS = VRM->getEmergencySpillSlot(RC); if (UsedSS.count(SS)) @@ -1759,7 +1758,6 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, bool DoReMat = VRM->isReMaterialized(VirtReg); int SSorRMId = DoReMat ? VRM->getReMatId(VirtReg) : VRM->getStackSlot(VirtReg); - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); if (InReg == Phys) { // If the value is already available in the expected register, save @@ -1793,20 +1791,16 @@ bool LocalRewriter::InsertRestores(MachineInstr *MI, MachineBasicBlock::iterator InsertLoc = ComputeReloadLoc(MII, MBB->begin(), Phys, TRI, DoReMat, SSorRMId, TII, *MBB->getParent()); - - TII->copyRegToReg(*MBB, InsertLoc, Phys, InReg, RC, RC, - MI->getDebugLoc()); + MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), Phys) + .addReg(InReg, RegState::Kill); // This invalidates Phys. Spills.ClobberPhysReg(Phys); // Remember it's available. Spills.addAvailable(SSorRMId, Phys); - // Mark is killed. - MachineInstr *CopyMI = prior(InsertLoc); CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); - MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); - KillOpnd->setIsKill(); UpdateKills(*CopyMI, TRI, RegKills, KillOps); DEBUG(dbgs() << '\t' << *CopyMI); @@ -2013,7 +2007,7 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // = EXTRACT_SUBREG fi#1 // fi#1 is available in EDI, but it cannot be reused because it's not in // the right register file. - if (PhysReg && !AvoidReload && (SubIdx || MI.isExtractSubreg())) { + if (PhysReg && !AvoidReload && SubIdx) { const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); if (!RC->contains(PhysReg)) PhysReg = 0; @@ -2034,6 +2028,18 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, CanReuse = !ReusedOperands.isClobbered(PhysReg) && Spills.canClobberPhysReg(PhysReg); } + // If this is an asm, and PhysReg is used elsewhere as an earlyclobber + // operand, we can't also use it as an input. (Outputs always come + // before inputs, so we can stop looking at i.) + if (MI.isInlineAsm()) { + for (unsigned k=0; kgetPhys(VirtReg); assert(DesignatedReg && "Must map virtreg to physreg!"); @@ -2136,7 +2144,6 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, continue; } - const TargetRegisterClass* RC = MRI->getRegClass(VirtReg); MRI->setPhysRegUsed(DesignatedReg); ReusedOperands.markClobbered(DesignatedReg); @@ -2144,11 +2151,9 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, MachineBasicBlock::iterator InsertLoc = ComputeReloadLoc(&MI, MBB->begin(), PhysReg, TRI, DoReMat, SSorRMId, TII, MF); - - TII->copyRegToReg(*MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC, - MI.getDebugLoc()); - - MachineInstr *CopyMI = prior(InsertLoc); + MachineInstr *CopyMI = BuildMI(*MBB, InsertLoc, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY), + DesignatedReg).addReg(PhysReg); CopyMI->setAsmPrinterFlag(MachineInstr::ReloadReuse); UpdateKills(*CopyMI, TRI, RegKills, KillOps); @@ -2269,27 +2274,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, if (unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SS)) { DEBUG(dbgs() << "Promoted Load To Copy: " << MI); if (DestReg != InReg) { - const TargetRegisterClass *RC = MRI->getRegClass(VirtReg); - TII->copyRegToReg(*MBB, &MI, DestReg, InReg, RC, RC, - MI.getDebugLoc()); MachineOperand *DefMO = MI.findRegisterDefOperand(DestReg); - unsigned SubIdx = DefMO->getSubReg(); + MachineInstr *CopyMI = BuildMI(*MBB, &MI, MI.getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DestReg, RegState::Define, DefMO->getSubReg()) + .addReg(InReg, RegState::Kill); // Revisit the copy so we make sure to notice the effects of the // operation on the destreg (either needing to RA it if it's // virtual or needing to clobber any values if it's physical). - NextMII = &MI; - --NextMII; // backtrack to the copy. + NextMII = CopyMI; NextMII->setAsmPrinterFlag(MachineInstr::ReloadReuse); - // Propagate the sub-register index over. - if (SubIdx) { - DefMO = NextMII->findRegisterDefOperand(DestReg); - DefMO->setSubReg(SubIdx); - } - - // Mark is killed. - MachineOperand *KillOpnd = NextMII->findRegisterUseOperand(InReg); - KillOpnd->setIsKill(); - BackTracked = true; } else { DEBUG(dbgs() << "Removing now-noop copy: " << MI); @@ -2430,6 +2424,24 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // Also check if it's copying from an "undef", if so, we can't // eliminate this or else the undef marker is lost and it will // confuses the scavenger. This is extremely rare. + if (MI.isIdentityCopy() && !MI.getOperand(1).isUndef() && + MI.getNumOperands() == 2) { + ++NumDCE; + DEBUG(dbgs() << "Removing now-noop copy: " << MI); + SmallVector KillRegs; + InvalidateKills(MI, TRI, RegKills, KillOps, &KillRegs); + if (MO.isDead() && !KillRegs.empty()) { + // Source register or an implicit super/sub-register use is killed. + assert(TRI->regsOverlap(KillRegs[0], MI.getOperand(0).getReg())); + // Last def is now dead. + TransferDeadness(MI.getOperand(1).getReg(), RegKills, KillOps); + } + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + Spills.disallowClobberPhysReg(VirtReg); + goto ProcessNextInst; + } unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && Src == Dst && SrcSR == DstSR && @@ -2519,6 +2531,16 @@ LocalRewriter::RewriteMBB(LiveIntervals *LIs, // Check to see if this is a noop copy. If so, eliminate the // instruction before considering the dest reg to be changed. + if (MI.isIdentityCopy()) { + ++NumDCE; + DEBUG(dbgs() << "Removing now-noop copy: " << MI); + InvalidateKills(MI, TRI, RegKills, KillOps); + VRM->RemoveMachineInstrFromMaps(&MI); + MBB->erase(&MI); + Erased = true; + UpdateKills(*LastStore, TRI, RegKills, KillOps); + goto ProcessNextInst; + } { unsigned Src, Dst, SrcSR, DstSR; if (TII->isMoveInstr(MI, Src, Dst, SrcSR, DstSR) && -- cgit v1.2.3